LLVM 20.0.0git
VPlan.h
Go to the documentation of this file.
1//===- VPlan.h - Represent A Vectorizer Plan --------------------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9/// \file
10/// This file contains the declarations of the Vectorization Plan base classes:
11/// 1. VPBasicBlock and VPRegionBlock that inherit from a common pure virtual
12/// VPBlockBase, together implementing a Hierarchical CFG;
13/// 2. Pure virtual VPRecipeBase serving as the base class for recipes contained
14/// within VPBasicBlocks;
15/// 3. Pure virtual VPSingleDefRecipe serving as a base class for recipes that
16/// also inherit from VPValue.
17/// 4. VPInstruction, a concrete Recipe and VPUser modeling a single planned
18/// instruction;
19/// 5. The VPlan class holding a candidate for vectorization;
20/// 6. The VPlanPrinter class providing a way to print a plan in dot format;
21/// These are documented in docs/VectorizationPlan.rst.
22//
23//===----------------------------------------------------------------------===//
24
25#ifndef LLVM_TRANSFORMS_VECTORIZE_VPLAN_H
26#define LLVM_TRANSFORMS_VECTORIZE_VPLAN_H
27
28#include "VPlanAnalysis.h"
29#include "VPlanValue.h"
30#include "llvm/ADT/DenseMap.h"
34#include "llvm/ADT/Twine.h"
35#include "llvm/ADT/ilist.h"
36#include "llvm/ADT/ilist_node.h"
42#include "llvm/IR/DebugLoc.h"
43#include "llvm/IR/FMF.h"
44#include "llvm/IR/Operator.h"
46#include <algorithm>
47#include <cassert>
48#include <cstddef>
49#include <string>
50
51namespace llvm {
52
53class BasicBlock;
54class DominatorTree;
55class InnerLoopVectorizer;
56class IRBuilderBase;
57class LoopInfo;
58class raw_ostream;
59class RecurrenceDescriptor;
60class SCEV;
61class Type;
62class VPBasicBlock;
63class VPRegionBlock;
64class VPlan;
65class VPReplicateRecipe;
66class VPlanSlp;
67class Value;
68class LoopVectorizationCostModel;
69class LoopVersioning;
70
71struct VPCostContext;
72
73namespace Intrinsic {
74typedef unsigned ID;
75}
76
77/// Returns a calculation for the total number of elements for a given \p VF.
78/// For fixed width vectors this value is a constant, whereas for scalable
79/// vectors it is an expression determined at runtime.
80Value *getRuntimeVF(IRBuilderBase &B, Type *Ty, ElementCount VF);
81
82/// Return a value for Step multiplied by VF.
83Value *createStepForVF(IRBuilderBase &B, Type *Ty, ElementCount VF,
84 int64_t Step);
85
86/// A helper function that returns the reciprocal of the block probability of
87/// predicated blocks. If we return X, we are assuming the predicated block
88/// will execute once for every X iterations of the loop header.
89///
90/// TODO: We should use actual block probability here, if available. Currently,
91/// we always assume predicated blocks have a 50% chance of executing.
92inline unsigned getReciprocalPredBlockProb() { return 2; }
93
94/// A range of powers-of-2 vectorization factors with fixed start and
95/// adjustable end. The range includes start and excludes end, e.g.,:
96/// [1, 16) = {1, 2, 4, 8}
97struct VFRange {
98 // A power of 2.
100
101 // A power of 2. If End <= Start range is empty.
103
104 bool isEmpty() const {
106 }
107
109 : Start(Start), End(End) {
111 "Both Start and End should have the same scalable flag");
113 "Expected Start to be a power of 2");
115 "Expected End to be a power of 2");
116 }
117
118 /// Iterator to iterate over vectorization factors in a VFRange.
120 : public iterator_facade_base<iterator, std::forward_iterator_tag,
121 ElementCount> {
122 ElementCount VF;
123
124 public:
125 iterator(ElementCount VF) : VF(VF) {}
126
127 bool operator==(const iterator &Other) const { return VF == Other.VF; }
128
129 ElementCount operator*() const { return VF; }
130
132 VF *= 2;
133 return *this;
134 }
135 };
136
140 return iterator(End);
141 }
142};
143
144using VPlanPtr = std::unique_ptr<VPlan>;
145
146/// In what follows, the term "input IR" refers to code that is fed into the
147/// vectorizer whereas the term "output IR" refers to code that is generated by
148/// the vectorizer.
149
150/// VPLane provides a way to access lanes in both fixed width and scalable
151/// vectors, where for the latter the lane index sometimes needs calculating
152/// as a runtime expression.
153class VPLane {
154public:
155 /// Kind describes how to interpret Lane.
156 enum class Kind : uint8_t {
157 /// For First, Lane is the index into the first N elements of a
158 /// fixed-vector <N x <ElTy>> or a scalable vector <vscale x N x <ElTy>>.
159 First,
160 /// For ScalableLast, Lane is the offset from the start of the last
161 /// N-element subvector in a scalable vector <vscale x N x <ElTy>>. For
162 /// example, a Lane of 0 corresponds to lane `(vscale - 1) * N`, a Lane of
163 /// 1 corresponds to `((vscale - 1) * N) + 1`, etc.
165 };
166
167private:
168 /// in [0..VF)
169 unsigned Lane;
170
171 /// Indicates how the Lane should be interpreted, as described above.
172 Kind LaneKind;
173
174public:
175 VPLane(unsigned Lane) : Lane(Lane), LaneKind(VPLane::Kind::First) {}
176 VPLane(unsigned Lane, Kind LaneKind) : Lane(Lane), LaneKind(LaneKind) {}
177
179
180 static VPLane getLaneFromEnd(const ElementCount &VF, unsigned Offset) {
181 assert(Offset > 0 && Offset <= VF.getKnownMinValue() &&
182 "trying to extract with invalid offset");
183 unsigned LaneOffset = VF.getKnownMinValue() - Offset;
184 Kind LaneKind;
185 if (VF.isScalable())
186 // In this case 'LaneOffset' refers to the offset from the start of the
187 // last subvector with VF.getKnownMinValue() elements.
189 else
190 LaneKind = VPLane::Kind::First;
191 return VPLane(LaneOffset, LaneKind);
192 }
193
195 return getLaneFromEnd(VF, 1);
196 }
197
198 /// Returns a compile-time known value for the lane index and asserts if the
199 /// lane can only be calculated at runtime.
200 unsigned getKnownLane() const {
201 assert(LaneKind == Kind::First);
202 return Lane;
203 }
204
205 /// Returns an expression describing the lane index that can be used at
206 /// runtime.
207 Value *getAsRuntimeExpr(IRBuilderBase &Builder, const ElementCount &VF) const;
208
209 /// Returns the Kind of lane offset.
210 Kind getKind() const { return LaneKind; }
211
212 /// Returns true if this is the first lane of the whole vector.
213 bool isFirstLane() const { return Lane == 0 && LaneKind == Kind::First; }
214
215 /// Maps the lane to a cache index based on \p VF.
216 unsigned mapToCacheIndex(const ElementCount &VF) const {
217 switch (LaneKind) {
219 assert(VF.isScalable() && Lane < VF.getKnownMinValue());
220 return VF.getKnownMinValue() + Lane;
221 default:
222 assert(Lane < VF.getKnownMinValue());
223 return Lane;
224 }
225 }
226
227 /// Returns the maxmimum number of lanes that we are able to consider
228 /// caching for \p VF.
229 static unsigned getNumCachedLanes(const ElementCount &VF) {
230 return VF.getKnownMinValue() * (VF.isScalable() ? 2 : 1);
231 }
232};
233
234/// VPTransformState holds information passed down when "executing" a VPlan,
235/// needed for generating the output IR.
239 InnerLoopVectorizer *ILV, VPlan *Plan, Type *CanonicalIVTy);
240 /// Target Transform Info.
242
243 /// The chosen Vectorization Factor of the loop being vectorized.
245
246 /// Hold the index to generate specific scalar instructions. Null indicates
247 /// that all instances are to be generated, using either scalar or vector
248 /// instructions.
249 std::optional<VPLane> Lane;
250
251 struct DataState {
252 // Each value from the original loop, when vectorized, is represented by a
253 // vector value in the map.
255
258
259 /// Get the generated vector Value for a given VPValue \p Def if \p IsScalar
260 /// is false, otherwise return the generated scalar. \See set.
261 Value *get(VPValue *Def, bool IsScalar = false);
262
263 /// Get the generated Value for a given VPValue and given Part and Lane.
264 Value *get(VPValue *Def, const VPLane &Lane);
265
266 bool hasVectorValue(VPValue *Def) { return Data.VPV2Vector.contains(Def); }
267
269 auto I = Data.VPV2Scalars.find(Def);
270 if (I == Data.VPV2Scalars.end())
271 return false;
272 unsigned CacheIdx = Lane.mapToCacheIndex(VF);
273 return CacheIdx < I->second.size() && I->second[CacheIdx];
274 }
275
276 /// Set the generated vector Value for a given VPValue, if \p
277 /// IsScalar is false. If \p IsScalar is true, set the scalar in lane 0.
278 void set(VPValue *Def, Value *V, bool IsScalar = false) {
279 if (IsScalar) {
280 set(Def, V, VPLane(0));
281 return;
282 }
283 assert((VF.isScalar() || V->getType()->isVectorTy()) &&
284 "scalar values must be stored as (0, 0)");
285 Data.VPV2Vector[Def] = V;
286 }
287
288 /// Reset an existing vector value for \p Def and a given \p Part.
289 void reset(VPValue *Def, Value *V) {
290 assert(Data.VPV2Vector.contains(Def) && "need to overwrite existing value");
291 Data.VPV2Vector[Def] = V;
292 }
293
294 /// Set the generated scalar \p V for \p Def and the given \p Lane.
295 void set(VPValue *Def, Value *V, const VPLane &Lane) {
296 auto &Scalars = Data.VPV2Scalars[Def];
297 unsigned CacheIdx = Lane.mapToCacheIndex(VF);
298 if (Scalars.size() <= CacheIdx)
299 Scalars.resize(CacheIdx + 1);
300 assert(!Scalars[CacheIdx] && "should overwrite existing value");
301 Scalars[CacheIdx] = V;
302 }
303
304 /// Reset an existing scalar value for \p Def and a given \p Lane.
305 void reset(VPValue *Def, Value *V, const VPLane &Lane) {
306 auto Iter = Data.VPV2Scalars.find(Def);
307 assert(Iter != Data.VPV2Scalars.end() &&
308 "need to overwrite existing value");
309 unsigned CacheIdx = Lane.mapToCacheIndex(VF);
310 assert(CacheIdx < Iter->second.size() &&
311 "need to overwrite existing value");
312 Iter->second[CacheIdx] = V;
313 }
314
315 /// Add additional metadata to \p To that was not present on \p Orig.
316 ///
317 /// Currently this is used to add the noalias annotations based on the
318 /// inserted memchecks. Use this for instructions that are *cloned* into the
319 /// vector loop.
320 void addNewMetadata(Instruction *To, const Instruction *Orig);
321
322 /// Add metadata from one instruction to another.
323 ///
324 /// This includes both the original MDs from \p From and additional ones (\see
325 /// addNewMetadata). Use this for *newly created* instructions in the vector
326 /// loop.
327 void addMetadata(Value *To, Instruction *From);
328
329 /// Set the debug location in the builder using the debug location \p DL.
331
332 /// Construct the vector value of a scalarized value \p V one lane at a time.
333 void packScalarIntoVectorValue(VPValue *Def, const VPLane &Lane);
334
335 /// Hold state information used when constructing the CFG of the output IR,
336 /// traversing the VPBasicBlocks and generating corresponding IR BasicBlocks.
337 struct CFGState {
338 /// The previous VPBasicBlock visited. Initially set to null.
340
341 /// The previous IR BasicBlock created or used. Initially set to the new
342 /// header BasicBlock.
343 BasicBlock *PrevBB = nullptr;
344
345 /// The last IR BasicBlock in the output IR. Set to the exit block of the
346 /// vector loop.
347 BasicBlock *ExitBB = nullptr;
348
349 /// A mapping of each VPBasicBlock to the corresponding BasicBlock. In case
350 /// of replication, maps the BasicBlock of the last replica created.
352
353 /// Updater for the DominatorTree.
355
357 : DTU(DT, DomTreeUpdater::UpdateStrategy::Lazy) {}
358
359 /// Returns the BasicBlock* mapped to the pre-header of the loop region
360 /// containing \p R.
363
364 /// Hold a pointer to LoopInfo to register new basic blocks in the loop.
366
367 /// Hold a reference to the IRBuilder used to generate output IR code.
369
370 /// Hold a pointer to InnerLoopVectorizer to reuse its IR generation methods.
372
373 /// Pointer to the VPlan code is generated for.
375
376 /// The loop object for the current parent region, or nullptr.
378
379 /// LoopVersioning. It's only set up (non-null) if memchecks were
380 /// used.
381 ///
382 /// This is currently only used to add no-alias metadata based on the
383 /// memchecks. The actually versioning is performed manually.
385
386 /// Map SCEVs to their expanded values. Populated when executing
387 /// VPExpandSCEVRecipes.
389
390 /// VPlan-based type analysis.
392};
393
394/// VPBlockBase is the building block of the Hierarchical Control-Flow Graph.
395/// A VPBlockBase can be either a VPBasicBlock or a VPRegionBlock.
397 friend class VPBlockUtils;
398
399 const unsigned char SubclassID; ///< Subclass identifier (for isa/dyn_cast).
400
401 /// An optional name for the block.
402 std::string Name;
403
404 /// The immediate VPRegionBlock which this VPBlockBase belongs to, or null if
405 /// it is a topmost VPBlockBase.
406 VPRegionBlock *Parent = nullptr;
407
408 /// List of predecessor blocks.
410
411 /// List of successor blocks.
413
414 /// VPlan containing the block. Can only be set on the entry block of the
415 /// plan.
416 VPlan *Plan = nullptr;
417
418 /// Add \p Successor as the last successor to this block.
419 void appendSuccessor(VPBlockBase *Successor) {
420 assert(Successor && "Cannot add nullptr successor!");
421 Successors.push_back(Successor);
422 }
423
424 /// Add \p Predecessor as the last predecessor to this block.
425 void appendPredecessor(VPBlockBase *Predecessor) {
426 assert(Predecessor && "Cannot add nullptr predecessor!");
427 Predecessors.push_back(Predecessor);
428 }
429
430 /// Remove \p Predecessor from the predecessors of this block.
431 void removePredecessor(VPBlockBase *Predecessor) {
432 auto Pos = find(Predecessors, Predecessor);
433 assert(Pos && "Predecessor does not exist");
434 Predecessors.erase(Pos);
435 }
436
437 /// Remove \p Successor from the successors of this block.
438 void removeSuccessor(VPBlockBase *Successor) {
439 auto Pos = find(Successors, Successor);
440 assert(Pos && "Successor does not exist");
441 Successors.erase(Pos);
442 }
443
444 /// This function replaces one predecessor with another, useful when
445 /// trying to replace an old block in the CFG with a new one.
446 void replacePredecessor(VPBlockBase *Old, VPBlockBase *New) {
447 auto I = find(Predecessors, Old);
448 assert(I != Predecessors.end());
449 assert(Old->getParent() == New->getParent() &&
450 "replaced predecessor must have the same parent");
451 *I = New;
452 }
453
454 /// This function replaces one successor with another, useful when
455 /// trying to replace an old block in the CFG with a new one.
456 void replaceSuccessor(VPBlockBase *Old, VPBlockBase *New) {
457 auto I = find(Successors, Old);
458 assert(I != Successors.end());
459 assert(Old->getParent() == New->getParent() &&
460 "replaced successor must have the same parent");
461 *I = New;
462 }
463
464protected:
465 VPBlockBase(const unsigned char SC, const std::string &N)
466 : SubclassID(SC), Name(N) {}
467
468public:
469 /// An enumeration for keeping track of the concrete subclass of VPBlockBase
470 /// that are actually instantiated. Values of this enumeration are kept in the
471 /// SubclassID field of the VPBlockBase objects. They are used for concrete
472 /// type identification.
473 using VPBlockTy = enum { VPRegionBlockSC, VPBasicBlockSC, VPIRBasicBlockSC };
474
476
477 virtual ~VPBlockBase() = default;
478
479 const std::string &getName() const { return Name; }
480
481 void setName(const Twine &newName) { Name = newName.str(); }
482
483 /// \return an ID for the concrete type of this object.
484 /// This is used to implement the classof checks. This should not be used
485 /// for any other purpose, as the values may change as LLVM evolves.
486 unsigned getVPBlockID() const { return SubclassID; }
487
488 VPRegionBlock *getParent() { return Parent; }
489 const VPRegionBlock *getParent() const { return Parent; }
490
491 /// \return A pointer to the plan containing the current block.
492 VPlan *getPlan();
493 const VPlan *getPlan() const;
494
495 /// Sets the pointer of the plan containing the block. The block must be the
496 /// entry block into the VPlan.
497 void setPlan(VPlan *ParentPlan);
498
499 void setParent(VPRegionBlock *P) { Parent = P; }
500
501 /// \return the VPBasicBlock that is the entry of this VPBlockBase,
502 /// recursively, if the latter is a VPRegionBlock. Otherwise, if this
503 /// VPBlockBase is a VPBasicBlock, it is returned.
504 const VPBasicBlock *getEntryBasicBlock() const;
506
507 /// \return the VPBasicBlock that is the exiting this VPBlockBase,
508 /// recursively, if the latter is a VPRegionBlock. Otherwise, if this
509 /// VPBlockBase is a VPBasicBlock, it is returned.
510 const VPBasicBlock *getExitingBasicBlock() const;
512
513 const VPBlocksTy &getSuccessors() const { return Successors; }
514 VPBlocksTy &getSuccessors() { return Successors; }
515
518
519 const VPBlocksTy &getPredecessors() const { return Predecessors; }
520 VPBlocksTy &getPredecessors() { return Predecessors; }
521
522 /// \return the successor of this VPBlockBase if it has a single successor.
523 /// Otherwise return a null pointer.
525 return (Successors.size() == 1 ? *Successors.begin() : nullptr);
526 }
527
528 /// \return the predecessor of this VPBlockBase if it has a single
529 /// predecessor. Otherwise return a null pointer.
531 return (Predecessors.size() == 1 ? *Predecessors.begin() : nullptr);
532 }
533
534 size_t getNumSuccessors() const { return Successors.size(); }
535 size_t getNumPredecessors() const { return Predecessors.size(); }
536
537 /// An Enclosing Block of a block B is any block containing B, including B
538 /// itself. \return the closest enclosing block starting from "this", which
539 /// has successors. \return the root enclosing block if all enclosing blocks
540 /// have no successors.
542
543 /// \return the closest enclosing block starting from "this", which has
544 /// predecessors. \return the root enclosing block if all enclosing blocks
545 /// have no predecessors.
547
548 /// \return the successors either attached directly to this VPBlockBase or, if
549 /// this VPBlockBase is the exit block of a VPRegionBlock and has no
550 /// successors of its own, search recursively for the first enclosing
551 /// VPRegionBlock that has successors and return them. If no such
552 /// VPRegionBlock exists, return the (empty) successors of the topmost
553 /// VPBlockBase reached.
556 }
557
558 /// \return the hierarchical successor of this VPBlockBase if it has a single
559 /// hierarchical successor. Otherwise return a null pointer.
562 }
563
564 /// \return the predecessors either attached directly to this VPBlockBase or,
565 /// if this VPBlockBase is the entry block of a VPRegionBlock and has no
566 /// predecessors of its own, search recursively for the first enclosing
567 /// VPRegionBlock that has predecessors and return them. If no such
568 /// VPRegionBlock exists, return the (empty) predecessors of the topmost
569 /// VPBlockBase reached.
572 }
573
574 /// \return the hierarchical predecessor of this VPBlockBase if it has a
575 /// single hierarchical predecessor. Otherwise return a null pointer.
578 }
579
580 /// Set a given VPBlockBase \p Successor as the single successor of this
581 /// VPBlockBase. This VPBlockBase is not added as predecessor of \p Successor.
582 /// This VPBlockBase must have no successors.
584 assert(Successors.empty() && "Setting one successor when others exist.");
585 assert(Successor->getParent() == getParent() &&
586 "connected blocks must have the same parent");
587 appendSuccessor(Successor);
588 }
589
590 /// Set two given VPBlockBases \p IfTrue and \p IfFalse to be the two
591 /// successors of this VPBlockBase. This VPBlockBase is not added as
592 /// predecessor of \p IfTrue or \p IfFalse. This VPBlockBase must have no
593 /// successors.
594 void setTwoSuccessors(VPBlockBase *IfTrue, VPBlockBase *IfFalse) {
595 assert(Successors.empty() && "Setting two successors when others exist.");
596 appendSuccessor(IfTrue);
597 appendSuccessor(IfFalse);
598 }
599
600 /// Set each VPBasicBlock in \p NewPreds as predecessor of this VPBlockBase.
601 /// This VPBlockBase must have no predecessors. This VPBlockBase is not added
602 /// as successor of any VPBasicBlock in \p NewPreds.
604 assert(Predecessors.empty() && "Block predecessors already set.");
605 for (auto *Pred : NewPreds)
606 appendPredecessor(Pred);
607 }
608
609 /// Set each VPBasicBlock in \p NewSuccss as successor of this VPBlockBase.
610 /// This VPBlockBase must have no successors. This VPBlockBase is not added
611 /// as predecessor of any VPBasicBlock in \p NewSuccs.
613 assert(Successors.empty() && "Block successors already set.");
614 for (auto *Succ : NewSuccs)
615 appendSuccessor(Succ);
616 }
617
618 /// Remove all the predecessor of this block.
619 void clearPredecessors() { Predecessors.clear(); }
620
621 /// Remove all the successors of this block.
622 void clearSuccessors() { Successors.clear(); }
623
624 /// Swap successors of the block. The block must have exactly 2 successors.
625 // TODO: This should be part of introducing conditional branch recipes rather
626 // than being independent.
628 assert(Successors.size() == 2 && "must have 2 successors to swap");
629 std::swap(Successors[0], Successors[1]);
630 }
631
632 /// The method which generates the output IR that correspond to this
633 /// VPBlockBase, thereby "executing" the VPlan.
634 virtual void execute(VPTransformState *State) = 0;
635
636 /// Return the cost of the block.
638
639 /// Delete all blocks reachable from a given VPBlockBase, inclusive.
640 static void deleteCFG(VPBlockBase *Entry);
641
642 /// Return true if it is legal to hoist instructions into this block.
644 // There are currently no constraints that prevent an instruction to be
645 // hoisted into a VPBlockBase.
646 return true;
647 }
648
649 /// Replace all operands of VPUsers in the block with \p NewValue and also
650 /// replaces all uses of VPValues defined in the block with NewValue.
651 virtual void dropAllReferences(VPValue *NewValue) = 0;
652
653#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
654 void printAsOperand(raw_ostream &OS, bool PrintType = false) const {
655 OS << getName();
656 }
657
658 /// Print plain-text dump of this VPBlockBase to \p O, prefixing all lines
659 /// with \p Indent. \p SlotTracker is used to print unnamed VPValue's using
660 /// consequtive numbers.
661 ///
662 /// Note that the numbering is applied to the whole VPlan, so printing
663 /// individual blocks is consistent with the whole VPlan printing.
664 virtual void print(raw_ostream &O, const Twine &Indent,
665 VPSlotTracker &SlotTracker) const = 0;
666
667 /// Print plain-text dump of this VPlan to \p O.
668 void print(raw_ostream &O) const {
670 print(O, "", SlotTracker);
671 }
672
673 /// Print the successors of this block to \p O, prefixing all lines with \p
674 /// Indent.
675 void printSuccessors(raw_ostream &O, const Twine &Indent) const;
676
677 /// Dump this VPBlockBase to dbgs().
678 LLVM_DUMP_METHOD void dump() const { print(dbgs()); }
679#endif
680
681 /// Clone the current block and it's recipes without updating the operands of
682 /// the cloned recipes, including all blocks in the single-entry single-exit
683 /// region for VPRegionBlocks.
684 virtual VPBlockBase *clone() = 0;
685};
686
687/// Struct to hold various analysis needed for cost computations.
695
698 : TTI(TTI), TLI(TLI), Types(CanIVTy), LLVMCtx(CanIVTy->getContext()),
699 CM(CM) {}
700
701 /// Return the cost for \p UI with \p VF using the legacy cost model as
702 /// fallback until computing the cost of all recipes migrates to VPlan.
704
705 /// Return true if the cost for \p UI shouldn't be computed, e.g. because it
706 /// has already been pre-computed.
707 bool skipCostComputation(Instruction *UI, bool IsVector) const;
708
709 /// Returns the OperandInfo for \p V, if it is a live-in.
711};
712
713/// VPRecipeBase is a base class modeling a sequence of one or more output IR
714/// instructions. VPRecipeBase owns the VPValues it defines through VPDef
715/// and is responsible for deleting its defined values. Single-value
716/// recipes must inherit from VPSingleDef instead of inheriting from both
717/// VPRecipeBase and VPValue separately.
718class VPRecipeBase : public ilist_node_with_parent<VPRecipeBase, VPBasicBlock>,
719 public VPDef,
720 public VPUser {
721 friend VPBasicBlock;
722 friend class VPBlockUtils;
723
724 /// Each VPRecipe belongs to a single VPBasicBlock.
725 VPBasicBlock *Parent = nullptr;
726
727 /// The debug location for the recipe.
728 DebugLoc DL;
729
730public:
732 DebugLoc DL = {})
733 : VPDef(SC), VPUser(Operands), DL(DL) {}
734
735 template <typename IterT>
737 DebugLoc DL = {})
738 : VPDef(SC), VPUser(Operands), DL(DL) {}
739 virtual ~VPRecipeBase() = default;
740
741 /// Clone the current recipe.
742 virtual VPRecipeBase *clone() = 0;
743
744 /// \return the VPBasicBlock which this VPRecipe belongs to.
745 VPBasicBlock *getParent() { return Parent; }
746 const VPBasicBlock *getParent() const { return Parent; }
747
748 /// The method which generates the output IR instructions that correspond to
749 /// this VPRecipe, thereby "executing" the VPlan.
750 virtual void execute(VPTransformState &State) = 0;
751
752 /// Return the cost of this recipe, taking into account if the cost
753 /// computation should be skipped and the ForceTargetInstructionCost flag.
754 /// Also takes care of printing the cost for debugging.
756
757 /// Insert an unlinked recipe into a basic block immediately before
758 /// the specified recipe.
759 void insertBefore(VPRecipeBase *InsertPos);
760 /// Insert an unlinked recipe into \p BB immediately before the insertion
761 /// point \p IP;
763
764 /// Insert an unlinked Recipe into a basic block immediately after
765 /// the specified Recipe.
766 void insertAfter(VPRecipeBase *InsertPos);
767
768 /// Unlink this recipe from its current VPBasicBlock and insert it into
769 /// the VPBasicBlock that MovePos lives in, right after MovePos.
770 void moveAfter(VPRecipeBase *MovePos);
771
772 /// Unlink this recipe and insert into BB before I.
773 ///
774 /// \pre I is a valid iterator into BB.
776
777 /// This method unlinks 'this' from the containing basic block, but does not
778 /// delete it.
779 void removeFromParent();
780
781 /// This method unlinks 'this' from the containing basic block and deletes it.
782 ///
783 /// \returns an iterator pointing to the element after the erased one
785
786 /// Method to support type inquiry through isa, cast, and dyn_cast.
787 static inline bool classof(const VPDef *D) {
788 // All VPDefs are also VPRecipeBases.
789 return true;
790 }
791
792 static inline bool classof(const VPUser *U) { return true; }
793
794 /// Returns true if the recipe may have side-effects.
795 bool mayHaveSideEffects() const;
796
797 /// Returns true for PHI-like recipes.
798 bool isPhi() const {
799 return getVPDefID() >= VPFirstPHISC && getVPDefID() <= VPLastPHISC;
800 }
801
802 /// Returns true if the recipe may read from memory.
803 bool mayReadFromMemory() const;
804
805 /// Returns true if the recipe may write to memory.
806 bool mayWriteToMemory() const;
807
808 /// Returns true if the recipe may read from or write to memory.
809 bool mayReadOrWriteMemory() const {
811 }
812
813 /// Returns the debug location of the recipe.
814 DebugLoc getDebugLoc() const { return DL; }
815
816protected:
817 /// Compute the cost of this recipe either using a recipe's specialized
818 /// implementation or using the legacy cost model and the underlying
819 /// instructions.
821 VPCostContext &Ctx) const;
822};
823
824// Helper macro to define common classof implementations for recipes.
825#define VP_CLASSOF_IMPL(VPDefID) \
826 static inline bool classof(const VPDef *D) { \
827 return D->getVPDefID() == VPDefID; \
828 } \
829 static inline bool classof(const VPValue *V) { \
830 auto *R = V->getDefiningRecipe(); \
831 return R && R->getVPDefID() == VPDefID; \
832 } \
833 static inline bool classof(const VPUser *U) { \
834 auto *R = dyn_cast<VPRecipeBase>(U); \
835 return R && R->getVPDefID() == VPDefID; \
836 } \
837 static inline bool classof(const VPRecipeBase *R) { \
838 return R->getVPDefID() == VPDefID; \
839 } \
840 static inline bool classof(const VPSingleDefRecipe *R) { \
841 return R->getVPDefID() == VPDefID; \
842 }
843
844/// VPSingleDef is a base class for recipes for modeling a sequence of one or
845/// more output IR that define a single result VPValue.
846/// Note that VPRecipeBase must be inherited from before VPValue.
847class VPSingleDefRecipe : public VPRecipeBase, public VPValue {
848public:
849 template <typename IterT>
850 VPSingleDefRecipe(const unsigned char SC, IterT Operands, DebugLoc DL = {})
851 : VPRecipeBase(SC, Operands, DL), VPValue(this) {}
852
853 VPSingleDefRecipe(const unsigned char SC, ArrayRef<VPValue *> Operands,
854 DebugLoc DL = {})
855 : VPRecipeBase(SC, Operands, DL), VPValue(this) {}
856
857 template <typename IterT>
858 VPSingleDefRecipe(const unsigned char SC, IterT Operands, Value *UV,
859 DebugLoc DL = {})
860 : VPRecipeBase(SC, Operands, DL), VPValue(this, UV) {}
861
862 static inline bool classof(const VPRecipeBase *R) {
863 switch (R->getVPDefID()) {
864 case VPRecipeBase::VPDerivedIVSC:
865 case VPRecipeBase::VPEVLBasedIVPHISC:
866 case VPRecipeBase::VPExpandSCEVSC:
867 case VPRecipeBase::VPInstructionSC:
868 case VPRecipeBase::VPReductionEVLSC:
869 case VPRecipeBase::VPReductionSC:
870 case VPRecipeBase::VPReplicateSC:
871 case VPRecipeBase::VPScalarIVStepsSC:
872 case VPRecipeBase::VPVectorPointerSC:
873 case VPRecipeBase::VPReverseVectorPointerSC:
874 case VPRecipeBase::VPWidenCallSC:
875 case VPRecipeBase::VPWidenCanonicalIVSC:
876 case VPRecipeBase::VPWidenCastSC:
877 case VPRecipeBase::VPWidenGEPSC:
878 case VPRecipeBase::VPWidenIntrinsicSC:
879 case VPRecipeBase::VPWidenSC:
880 case VPRecipeBase::VPWidenEVLSC:
881 case VPRecipeBase::VPWidenSelectSC:
882 case VPRecipeBase::VPBlendSC:
883 case VPRecipeBase::VPPredInstPHISC:
884 case VPRecipeBase::VPCanonicalIVPHISC:
885 case VPRecipeBase::VPActiveLaneMaskPHISC:
886 case VPRecipeBase::VPFirstOrderRecurrencePHISC:
887 case VPRecipeBase::VPWidenPHISC:
888 case VPRecipeBase::VPWidenIntOrFpInductionSC:
889 case VPRecipeBase::VPWidenPointerInductionSC:
890 case VPRecipeBase::VPReductionPHISC:
891 case VPRecipeBase::VPScalarCastSC:
892 return true;
893 case VPRecipeBase::VPBranchOnMaskSC:
894 case VPRecipeBase::VPInterleaveSC:
895 case VPRecipeBase::VPIRInstructionSC:
896 case VPRecipeBase::VPWidenLoadEVLSC:
897 case VPRecipeBase::VPWidenLoadSC:
898 case VPRecipeBase::VPWidenStoreEVLSC:
899 case VPRecipeBase::VPWidenStoreSC:
900 case VPRecipeBase::VPHistogramSC:
901 // TODO: Widened stores don't define a value, but widened loads do. Split
902 // the recipes to be able to make widened loads VPSingleDefRecipes.
903 return false;
904 }
905 llvm_unreachable("Unhandled VPDefID");
906 }
907
908 static inline bool classof(const VPUser *U) {
909 auto *R = dyn_cast<VPRecipeBase>(U);
910 return R && classof(R);
911 }
912
913 virtual VPSingleDefRecipe *clone() override = 0;
914
915 /// Returns the underlying instruction.
917 return cast<Instruction>(getUnderlyingValue());
918 }
920 return cast<Instruction>(getUnderlyingValue());
921 }
922
923#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
924 /// Print this VPSingleDefRecipe to dbgs() (for debugging).
925 LLVM_DUMP_METHOD void dump() const;
926#endif
927};
928
929/// Class to record LLVM IR flag for a recipe along with it.
931 enum class OperationType : unsigned char {
932 Cmp,
933 OverflowingBinOp,
934 DisjointOp,
935 PossiblyExactOp,
936 GEPOp,
937 FPMathOp,
938 NonNegOp,
939 Other
940 };
941
942public:
943 struct WrapFlagsTy {
944 char HasNUW : 1;
945 char HasNSW : 1;
946
948 };
949
951 char IsDisjoint : 1;
953 };
954
955private:
956 struct ExactFlagsTy {
957 char IsExact : 1;
958 };
959 struct NonNegFlagsTy {
960 char NonNeg : 1;
961 };
962 struct FastMathFlagsTy {
963 char AllowReassoc : 1;
964 char NoNaNs : 1;
965 char NoInfs : 1;
966 char NoSignedZeros : 1;
967 char AllowReciprocal : 1;
968 char AllowContract : 1;
969 char ApproxFunc : 1;
970
971 FastMathFlagsTy(const FastMathFlags &FMF);
972 };
973
974 OperationType OpType;
975
976 union {
980 ExactFlagsTy ExactFlags;
982 NonNegFlagsTy NonNegFlags;
983 FastMathFlagsTy FMFs;
984 unsigned AllFlags;
985 };
986
987protected:
989 OpType = Other.OpType;
990 AllFlags = Other.AllFlags;
991 }
992
993public:
994 template <typename IterT>
995 VPRecipeWithIRFlags(const unsigned char SC, IterT Operands, DebugLoc DL = {})
996 : VPSingleDefRecipe(SC, Operands, DL) {
997 OpType = OperationType::Other;
998 AllFlags = 0;
999 }
1000
1001 template <typename IterT>
1002 VPRecipeWithIRFlags(const unsigned char SC, IterT Operands, Instruction &I)
1004 if (auto *Op = dyn_cast<CmpInst>(&I)) {
1005 OpType = OperationType::Cmp;
1006 CmpPredicate = Op->getPredicate();
1007 } else if (auto *Op = dyn_cast<PossiblyDisjointInst>(&I)) {
1008 OpType = OperationType::DisjointOp;
1009 DisjointFlags.IsDisjoint = Op->isDisjoint();
1010 } else if (auto *Op = dyn_cast<OverflowingBinaryOperator>(&I)) {
1011 OpType = OperationType::OverflowingBinOp;
1012 WrapFlags = {Op->hasNoUnsignedWrap(), Op->hasNoSignedWrap()};
1013 } else if (auto *Op = dyn_cast<PossiblyExactOperator>(&I)) {
1014 OpType = OperationType::PossiblyExactOp;
1015 ExactFlags.IsExact = Op->isExact();
1016 } else if (auto *GEP = dyn_cast<GetElementPtrInst>(&I)) {
1017 OpType = OperationType::GEPOp;
1018 GEPFlags = GEP->getNoWrapFlags();
1019 } else if (auto *PNNI = dyn_cast<PossiblyNonNegInst>(&I)) {
1020 OpType = OperationType::NonNegOp;
1021 NonNegFlags.NonNeg = PNNI->hasNonNeg();
1022 } else if (auto *Op = dyn_cast<FPMathOperator>(&I)) {
1023 OpType = OperationType::FPMathOp;
1024 FMFs = Op->getFastMathFlags();
1025 } else {
1026 OpType = OperationType::Other;
1027 AllFlags = 0;
1028 }
1029 }
1030
1031 template <typename IterT>
1032 VPRecipeWithIRFlags(const unsigned char SC, IterT Operands,
1033 CmpInst::Predicate Pred, DebugLoc DL = {})
1034 : VPSingleDefRecipe(SC, Operands, DL), OpType(OperationType::Cmp),
1035 CmpPredicate(Pred) {}
1036
1037 template <typename IterT>
1038 VPRecipeWithIRFlags(const unsigned char SC, IterT Operands,
1040 : VPSingleDefRecipe(SC, Operands, DL),
1041 OpType(OperationType::OverflowingBinOp), WrapFlags(WrapFlags) {}
1042
1043 template <typename IterT>
1044 VPRecipeWithIRFlags(const unsigned char SC, IterT Operands,
1045 FastMathFlags FMFs, DebugLoc DL = {})
1046 : VPSingleDefRecipe(SC, Operands, DL), OpType(OperationType::FPMathOp),
1047 FMFs(FMFs) {}
1048
1049 template <typename IterT>
1050 VPRecipeWithIRFlags(const unsigned char SC, IterT Operands,
1052 : VPSingleDefRecipe(SC, Operands, DL), OpType(OperationType::DisjointOp),
1054
1055protected:
1056 template <typename IterT>
1057 VPRecipeWithIRFlags(const unsigned char SC, IterT Operands,
1059 : VPSingleDefRecipe(SC, Operands, DL), OpType(OperationType::GEPOp),
1060 GEPFlags(GEPFlags) {}
1061
1062public:
1063 static inline bool classof(const VPRecipeBase *R) {
1064 return R->getVPDefID() == VPRecipeBase::VPInstructionSC ||
1065 R->getVPDefID() == VPRecipeBase::VPWidenSC ||
1066 R->getVPDefID() == VPRecipeBase::VPWidenEVLSC ||
1067 R->getVPDefID() == VPRecipeBase::VPWidenGEPSC ||
1068 R->getVPDefID() == VPRecipeBase::VPWidenCastSC ||
1069 R->getVPDefID() == VPRecipeBase::VPReplicateSC ||
1070 R->getVPDefID() == VPRecipeBase::VPReverseVectorPointerSC ||
1071 R->getVPDefID() == VPRecipeBase::VPVectorPointerSC;
1072 }
1073
1074 static inline bool classof(const VPUser *U) {
1075 auto *R = dyn_cast<VPRecipeBase>(U);
1076 return R && classof(R);
1077 }
1078
1079 /// Drop all poison-generating flags.
1081 // NOTE: This needs to be kept in-sync with
1082 // Instruction::dropPoisonGeneratingFlags.
1083 switch (OpType) {
1084 case OperationType::OverflowingBinOp:
1085 WrapFlags.HasNUW = false;
1086 WrapFlags.HasNSW = false;
1087 break;
1088 case OperationType::DisjointOp:
1089 DisjointFlags.IsDisjoint = false;
1090 break;
1091 case OperationType::PossiblyExactOp:
1092 ExactFlags.IsExact = false;
1093 break;
1094 case OperationType::GEPOp:
1096 break;
1097 case OperationType::FPMathOp:
1098 FMFs.NoNaNs = false;
1099 FMFs.NoInfs = false;
1100 break;
1101 case OperationType::NonNegOp:
1102 NonNegFlags.NonNeg = false;
1103 break;
1104 case OperationType::Cmp:
1105 case OperationType::Other:
1106 break;
1107 }
1108 }
1109
1110 /// Set the IR flags for \p I.
1111 void setFlags(Instruction *I) const {
1112 switch (OpType) {
1113 case OperationType::OverflowingBinOp:
1114 I->setHasNoUnsignedWrap(WrapFlags.HasNUW);
1115 I->setHasNoSignedWrap(WrapFlags.HasNSW);
1116 break;
1117 case OperationType::DisjointOp:
1118 cast<PossiblyDisjointInst>(I)->setIsDisjoint(DisjointFlags.IsDisjoint);
1119 break;
1120 case OperationType::PossiblyExactOp:
1121 I->setIsExact(ExactFlags.IsExact);
1122 break;
1123 case OperationType::GEPOp:
1124 cast<GetElementPtrInst>(I)->setNoWrapFlags(GEPFlags);
1125 break;
1126 case OperationType::FPMathOp:
1127 I->setHasAllowReassoc(FMFs.AllowReassoc);
1128 I->setHasNoNaNs(FMFs.NoNaNs);
1129 I->setHasNoInfs(FMFs.NoInfs);
1130 I->setHasNoSignedZeros(FMFs.NoSignedZeros);
1131 I->setHasAllowReciprocal(FMFs.AllowReciprocal);
1132 I->setHasAllowContract(FMFs.AllowContract);
1133 I->setHasApproxFunc(FMFs.ApproxFunc);
1134 break;
1135 case OperationType::NonNegOp:
1136 I->setNonNeg(NonNegFlags.NonNeg);
1137 break;
1138 case OperationType::Cmp:
1139 case OperationType::Other:
1140 break;
1141 }
1142 }
1143
1145 assert(OpType == OperationType::Cmp &&
1146 "recipe doesn't have a compare predicate");
1147 return CmpPredicate;
1148 }
1149
1151
1152 /// Returns true if the recipe has fast-math flags.
1153 bool hasFastMathFlags() const { return OpType == OperationType::FPMathOp; }
1154
1156
1157 bool hasNoUnsignedWrap() const {
1158 assert(OpType == OperationType::OverflowingBinOp &&
1159 "recipe doesn't have a NUW flag");
1160 return WrapFlags.HasNUW;
1161 }
1162
1163 bool hasNoSignedWrap() const {
1164 assert(OpType == OperationType::OverflowingBinOp &&
1165 "recipe doesn't have a NSW flag");
1166 return WrapFlags.HasNSW;
1167 }
1168
1169 bool isDisjoint() const {
1170 assert(OpType == OperationType::DisjointOp &&
1171 "recipe cannot have a disjoing flag");
1173 }
1174
1175#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1176 void printFlags(raw_ostream &O) const;
1177#endif
1178};
1179
1180/// Helper to access the operand that contains the unroll part for this recipe
1181/// after unrolling.
1182template <unsigned PartOpIdx> class VPUnrollPartAccessor {
1183protected:
1184 /// Return the VPValue operand containing the unroll part or null if there is
1185 /// no such operand.
1187
1188 /// Return the unroll part.
1189 unsigned getUnrollPart(VPUser &U) const;
1190};
1191
1192/// This is a concrete Recipe that models a single VPlan-level instruction.
1193/// While as any Recipe it may generate a sequence of IR instructions when
1194/// executed, these instructions would always form a single-def expression as
1195/// the VPInstruction is also a single def-use vertex.
1197 public VPUnrollPartAccessor<1> {
1198 friend class VPlanSlp;
1199
1200public:
1201 /// VPlan opcodes, extending LLVM IR with idiomatics instructions.
1202 enum {
1204 Instruction::OtherOpsEnd + 1, // Combines the incoming and previous
1205 // values of a first-order recurrence.
1211 /// Creates a scalar phi in a leaf VPBB with a single predecessor in VPlan.
1212 /// The first operand is the incoming value from the predecessor in VPlan,
1213 /// the second operand is the incoming value for all other predecessors
1214 /// (which are currently not modeled in VPlan).
1217 // Increment the canonical IV separately for each unrolled part.
1222 // Takes the VPValue to extract from as first operand and the lane or part
1223 // to extract as second operand, counting from the end starting with 1 for
1224 // last. The second operand must be a positive constant and <= VF.
1226 LogicalAnd, // Non-poison propagating logical And.
1227 // Add an offset in bytes (second operand) to a base pointer (first
1228 // operand). Only generates scalar values (either for the first lane only or
1229 // for all lanes, depending on its uses).
1231 // Returns a scalar boolean value, which is true if any lane of its single
1232 // operand is true.
1234 };
1235
1236private:
1237 typedef unsigned char OpcodeTy;
1238 OpcodeTy Opcode;
1239
1240 /// An optional name that can be used for the generated IR instruction.
1241 const std::string Name;
1242
1243 /// Returns true if this VPInstruction generates scalar values for all lanes.
1244 /// Most VPInstructions generate a single value per part, either vector or
1245 /// scalar. VPReplicateRecipe takes care of generating multiple (scalar)
1246 /// values per all lanes, stemming from an original ingredient. This method
1247 /// identifies the (rare) cases of VPInstructions that do so as well, w/o an
1248 /// underlying ingredient.
1249 bool doesGeneratePerAllLanes() const;
1250
1251 /// Returns true if we can generate a scalar for the first lane only if
1252 /// needed.
1253 bool canGenerateScalarForFirstLane() const;
1254
1255 /// Utility methods serving execute(): generates a single vector instance of
1256 /// the modeled instruction. \returns the generated value. . In some cases an
1257 /// existing value is returned rather than a generated one.
1258 Value *generate(VPTransformState &State);
1259
1260 /// Utility methods serving execute(): generates a scalar single instance of
1261 /// the modeled instruction for a given lane. \returns the scalar generated
1262 /// value for lane \p Lane.
1263 Value *generatePerLane(VPTransformState &State, const VPLane &Lane);
1264
1265#if !defined(NDEBUG)
1266 /// Return true if the VPInstruction is a floating point math operation, i.e.
1267 /// has fast-math flags.
1268 bool isFPMathOp() const;
1269#endif
1270
1271public:
1273 const Twine &Name = "")
1274 : VPRecipeWithIRFlags(VPDef::VPInstructionSC, Operands, DL),
1275 Opcode(Opcode), Name(Name.str()) {}
1276
1277 VPInstruction(unsigned Opcode, std::initializer_list<VPValue *> Operands,
1278 DebugLoc DL = {}, const Twine &Name = "")
1280
1281 VPInstruction(unsigned Opcode, CmpInst::Predicate Pred, VPValue *A,
1282 VPValue *B, DebugLoc DL = {}, const Twine &Name = "");
1283
1284 VPInstruction(unsigned Opcode, std::initializer_list<VPValue *> Operands,
1285 WrapFlagsTy WrapFlags, DebugLoc DL = {}, const Twine &Name = "")
1286 : VPRecipeWithIRFlags(VPDef::VPInstructionSC, Operands, WrapFlags, DL),
1287 Opcode(Opcode), Name(Name.str()) {}
1288
1289 VPInstruction(unsigned Opcode, std::initializer_list<VPValue *> Operands,
1290 DisjointFlagsTy DisjointFlag, DebugLoc DL = {},
1291 const Twine &Name = "")
1292 : VPRecipeWithIRFlags(VPDef::VPInstructionSC, Operands, DisjointFlag, DL),
1293 Opcode(Opcode), Name(Name.str()) {
1294 assert(Opcode == Instruction::Or && "only OR opcodes can be disjoint");
1295 }
1296
1298 DebugLoc DL = {}, const Twine &Name = "")
1299 : VPRecipeWithIRFlags(VPDef::VPInstructionSC,
1300 ArrayRef<VPValue *>({Ptr, Offset}), Flags, DL),
1301 Opcode(VPInstruction::PtrAdd), Name(Name.str()) {}
1302
1303 VPInstruction(unsigned Opcode, std::initializer_list<VPValue *> Operands,
1304 FastMathFlags FMFs, DebugLoc DL = {}, const Twine &Name = "");
1305
1306 VP_CLASSOF_IMPL(VPDef::VPInstructionSC)
1307
1308 VPInstruction *clone() override {
1310 auto *New = new VPInstruction(Opcode, Operands, getDebugLoc(), Name);
1311 New->transferFlags(*this);
1312 return New;
1313 }
1314
1315 unsigned getOpcode() const { return Opcode; }
1316
1317 /// Generate the instruction.
1318 /// TODO: We currently execute only per-part unless a specific instance is
1319 /// provided.
1320 void execute(VPTransformState &State) override;
1321
1322 /// Return the cost of this VPInstruction.
1324 VPCostContext &Ctx) const override {
1325 // TODO: Compute accurate cost after retiring the legacy cost model.
1326 return 0;
1327 }
1328
1329#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1330 /// Print the VPInstruction to \p O.
1331 void print(raw_ostream &O, const Twine &Indent,
1332 VPSlotTracker &SlotTracker) const override;
1333
1334 /// Print the VPInstruction to dbgs() (for debugging).
1335 LLVM_DUMP_METHOD void dump() const;
1336#endif
1337
1338 bool hasResult() const {
1339 // CallInst may or may not have a result, depending on the called function.
1340 // Conservatively return calls have results for now.
1341 switch (getOpcode()) {
1342 case Instruction::Ret:
1343 case Instruction::Br:
1344 case Instruction::Store:
1345 case Instruction::Switch:
1346 case Instruction::IndirectBr:
1347 case Instruction::Resume:
1348 case Instruction::CatchRet:
1349 case Instruction::Unreachable:
1350 case Instruction::Fence:
1351 case Instruction::AtomicRMW:
1354 return false;
1355 default:
1356 return true;
1357 }
1358 }
1359
1360 /// Returns true if the recipe only uses the first lane of operand \p Op.
1361 bool onlyFirstLaneUsed(const VPValue *Op) const override;
1362
1363 /// Returns true if the recipe only uses the first part of operand \p Op.
1364 bool onlyFirstPartUsed(const VPValue *Op) const override;
1365
1366 /// Returns true if this VPInstruction produces a scalar value from a vector,
1367 /// e.g. by performing a reduction or extracting a lane.
1368 bool isVectorToScalar() const;
1369
1370 /// Returns true if this VPInstruction's operands are single scalars and the
1371 /// result is also a single scalar.
1372 bool isSingleScalar() const;
1373
1374 /// Returns the symbolic name assigned to the VPInstruction.
1375 StringRef getName() const { return Name; }
1376};
1377
1378/// A recipe to wrap on original IR instruction not to be modified during
1379/// execution, execept for PHIs. For PHIs, a single VPValue operand is allowed,
1380/// and it is used to add a new incoming value for the single predecessor VPBB.
1381/// Expect PHIs, VPIRInstructions cannot have any operands.
1383 Instruction &I;
1384
1385public:
1387 : VPRecipeBase(VPDef::VPIRInstructionSC, ArrayRef<VPValue *>()), I(I) {}
1388
1389 ~VPIRInstruction() override = default;
1390
1391 VP_CLASSOF_IMPL(VPDef::VPIRInstructionSC)
1392
1394 auto *R = new VPIRInstruction(I);
1395 for (auto *Op : operands())
1396 R->addOperand(Op);
1397 return R;
1398 }
1399
1400 void execute(VPTransformState &State) override;
1401
1402 /// Return the cost of this VPIRInstruction.
1404 VPCostContext &Ctx) const override;
1405
1406 Instruction &getInstruction() const { return I; }
1407
1408#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1409 /// Print the recipe.
1410 void print(raw_ostream &O, const Twine &Indent,
1411 VPSlotTracker &SlotTracker) const override;
1412#endif
1413
1414 bool usesScalars(const VPValue *Op) const override {
1416 "Op must be an operand of the recipe");
1417 return true;
1418 }
1419
1420 bool onlyFirstPartUsed(const VPValue *Op) const override {
1422 "Op must be an operand of the recipe");
1423 return true;
1424 }
1425};
1426
1427/// VPWidenRecipe is a recipe for producing a widened instruction using the
1428/// opcode and operands of the recipe. This recipe covers most of the
1429/// traditional vectorization cases where each recipe transforms into a
1430/// vectorized version of itself.
1432 unsigned Opcode;
1433
1434protected:
1435 template <typename IterT>
1436 VPWidenRecipe(unsigned VPDefOpcode, Instruction &I,
1438 : VPRecipeWithIRFlags(VPDefOpcode, Operands, I), Opcode(I.getOpcode()) {}
1439
1440public:
1441 template <typename IterT>
1443 : VPWidenRecipe(VPDef::VPWidenSC, I, Operands) {}
1444
1445 ~VPWidenRecipe() override = default;
1446
1447 VPWidenRecipe *clone() override {
1448 auto *R = new VPWidenRecipe(*getUnderlyingInstr(), operands());
1449 R->transferFlags(*this);
1450 return R;
1451 }
1452
1453 static inline bool classof(const VPRecipeBase *R) {
1454 return R->getVPDefID() == VPRecipeBase::VPWidenSC ||
1455 R->getVPDefID() == VPRecipeBase::VPWidenEVLSC;
1456 }
1457
1458 static inline bool classof(const VPUser *U) {
1459 auto *R = dyn_cast<VPRecipeBase>(U);
1460 return R && classof(R);
1461 }
1462
1463 /// Produce a widened instruction using the opcode and operands of the recipe,
1464 /// processing State.VF elements.
1465 void execute(VPTransformState &State) override;
1466
1467 /// Return the cost of this VPWidenRecipe.
1469 VPCostContext &Ctx) const override;
1470
1471 unsigned getOpcode() const { return Opcode; }
1472
1473#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1474 /// Print the recipe.
1475 void print(raw_ostream &O, const Twine &Indent,
1476 VPSlotTracker &SlotTracker) const override;
1477#endif
1478};
1479
1480/// A recipe for widening operations with vector-predication intrinsics with
1481/// explicit vector length (EVL).
1484
1485public:
1486 template <typename IterT>
1488 : VPWidenRecipe(VPDef::VPWidenEVLSC, I, Operands) {
1489 addOperand(&EVL);
1490 }
1492 : VPWidenEVLRecipe(*W.getUnderlyingInstr(), W.operands(), EVL) {
1493 transferFlags(W);
1494 }
1495
1496 ~VPWidenEVLRecipe() override = default;
1497
1498 VPWidenRecipe *clone() override final {
1499 llvm_unreachable("VPWidenEVLRecipe cannot be cloned");
1500 return nullptr;
1501 }
1502
1503 VP_CLASSOF_IMPL(VPDef::VPWidenEVLSC);
1504
1506 const VPValue *getEVL() const { return getOperand(getNumOperands() - 1); }
1507
1508 /// Produce a vp-intrinsic using the opcode and operands of the recipe,
1509 /// processing EVL elements.
1510 void execute(VPTransformState &State) override final;
1511
1512 /// Returns true if the recipe only uses the first lane of operand \p Op.
1513 bool onlyFirstLaneUsed(const VPValue *Op) const override {
1515 "Op must be an operand of the recipe");
1516 // EVL in that recipe is always the last operand, thus any use before means
1517 // the VPValue should be vectorized.
1518 return getEVL() == Op;
1519 }
1520
1521#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1522 /// Print the recipe.
1523 void print(raw_ostream &O, const Twine &Indent,
1524 VPSlotTracker &SlotTracker) const override final;
1525#endif
1526};
1527
1528/// VPWidenCastRecipe is a recipe to create vector cast instructions.
1530 /// Cast instruction opcode.
1531 Instruction::CastOps Opcode;
1532
1533 /// Result type for the cast.
1534 Type *ResultTy;
1535
1536public:
1538 CastInst &UI)
1539 : VPRecipeWithIRFlags(VPDef::VPWidenCastSC, Op, UI), Opcode(Opcode),
1540 ResultTy(ResultTy) {
1541 assert(UI.getOpcode() == Opcode &&
1542 "opcode of underlying cast doesn't match");
1543 }
1544
1546 : VPRecipeWithIRFlags(VPDef::VPWidenCastSC, Op), Opcode(Opcode),
1547 ResultTy(ResultTy) {}
1548
1549 ~VPWidenCastRecipe() override = default;
1550
1552 if (auto *UV = getUnderlyingValue())
1553 return new VPWidenCastRecipe(Opcode, getOperand(0), ResultTy,
1554 *cast<CastInst>(UV));
1555
1556 return new VPWidenCastRecipe(Opcode, getOperand(0), ResultTy);
1557 }
1558
1559 VP_CLASSOF_IMPL(VPDef::VPWidenCastSC)
1560
1561 /// Produce widened copies of the cast.
1562 void execute(VPTransformState &State) override;
1563
1564 /// Return the cost of this VPWidenCastRecipe.
1566 VPCostContext &Ctx) const override;
1567
1568#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1569 /// Print the recipe.
1570 void print(raw_ostream &O, const Twine &Indent,
1571 VPSlotTracker &SlotTracker) const override;
1572#endif
1573
1574 Instruction::CastOps getOpcode() const { return Opcode; }
1575
1576 /// Returns the result type of the cast.
1577 Type *getResultType() const { return ResultTy; }
1578};
1579
1580/// VPScalarCastRecipe is a recipe to create scalar cast instructions.
1582 Instruction::CastOps Opcode;
1583
1584 Type *ResultTy;
1585
1586 Value *generate(VPTransformState &State);
1587
1588public:
1590 : VPSingleDefRecipe(VPDef::VPScalarCastSC, {Op}), Opcode(Opcode),
1591 ResultTy(ResultTy) {}
1592
1593 ~VPScalarCastRecipe() override = default;
1594
1596 return new VPScalarCastRecipe(Opcode, getOperand(0), ResultTy);
1597 }
1598
1599 VP_CLASSOF_IMPL(VPDef::VPScalarCastSC)
1600
1601 void execute(VPTransformState &State) override;
1602
1603 /// Return the cost of this VPScalarCastRecipe.
1605 VPCostContext &Ctx) const override {
1606 // TODO: Compute accurate cost after retiring the legacy cost model.
1607 return 0;
1608 }
1609
1610#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1611 void print(raw_ostream &O, const Twine &Indent,
1612 VPSlotTracker &SlotTracker) const override;
1613#endif
1614
1615 /// Returns the result type of the cast.
1616 Type *getResultType() const { return ResultTy; }
1617
1618 bool onlyFirstLaneUsed(const VPValue *Op) const override {
1619 // At the moment, only uniform codegen is implemented.
1621 "Op must be an operand of the recipe");
1622 return true;
1623 }
1624};
1625
1626/// A recipe for widening vector intrinsics.
1628 /// ID of the vector intrinsic to widen.
1629 Intrinsic::ID VectorIntrinsicID;
1630
1631 /// Scalar return type of the intrinsic.
1632 Type *ResultTy;
1633
1634 /// True if the intrinsic may read from memory.
1635 bool MayReadFromMemory;
1636
1637 /// True if the intrinsic may read write to memory.
1638 bool MayWriteToMemory;
1639
1640 /// True if the intrinsic may have side-effects.
1641 bool MayHaveSideEffects;
1642
1643public:
1645 ArrayRef<VPValue *> CallArguments, Type *Ty,
1646 DebugLoc DL = {})
1647 : VPRecipeWithIRFlags(VPDef::VPWidenIntrinsicSC, CallArguments, CI),
1648 VectorIntrinsicID(VectorIntrinsicID), ResultTy(Ty),
1649 MayReadFromMemory(CI.mayReadFromMemory()),
1650 MayWriteToMemory(CI.mayWriteToMemory()),
1651 MayHaveSideEffects(CI.mayHaveSideEffects()) {}
1652
1654 ArrayRef<VPValue *> CallArguments, Type *Ty,
1655 DebugLoc DL = {})
1656 : VPRecipeWithIRFlags(VPDef::VPWidenIntrinsicSC, CallArguments, DL),
1657 VectorIntrinsicID(VectorIntrinsicID), ResultTy(Ty) {
1658 LLVMContext &Ctx = Ty->getContext();
1659 AttributeList Attrs = Intrinsic::getAttributes(Ctx, VectorIntrinsicID);
1660 MemoryEffects ME = Attrs.getMemoryEffects();
1661 MayReadFromMemory = ME.onlyWritesMemory();
1662 MayWriteToMemory = ME.onlyReadsMemory();
1663 MayHaveSideEffects = MayWriteToMemory ||
1664 !Attrs.hasFnAttr(Attribute::NoUnwind) ||
1665 !Attrs.hasFnAttr(Attribute::WillReturn);
1666 }
1667
1669 std::initializer_list<VPValue *> CallArguments,
1670 Type *Ty, DebugLoc DL = {})
1671 : VPWidenIntrinsicRecipe(VectorIntrinsicID,
1672 ArrayRef<VPValue *>(CallArguments), Ty, DL) {}
1673
1674 ~VPWidenIntrinsicRecipe() override = default;
1675
1677 return new VPWidenIntrinsicRecipe(*cast<CallInst>(getUnderlyingValue()),
1678 VectorIntrinsicID, {op_begin(), op_end()},
1679 ResultTy, getDebugLoc());
1680 }
1681
1682 VP_CLASSOF_IMPL(VPDef::VPWidenIntrinsicSC)
1683
1684 /// Produce a widened version of the vector intrinsic.
1685 void execute(VPTransformState &State) override;
1686
1687 /// Return the cost of this vector intrinsic.
1689 VPCostContext &Ctx) const override;
1690
1691 /// Return the ID of the intrinsic.
1692 Intrinsic::ID getVectorIntrinsicID() const { return VectorIntrinsicID; }
1693
1694 /// Return the scalar return type of the intrinsic.
1695 Type *getResultType() const { return ResultTy; }
1696
1697 /// Return to name of the intrinsic as string.
1699
1700 /// Returns true if the intrinsic may read from memory.
1701 bool mayReadFromMemory() const { return MayReadFromMemory; }
1702
1703 /// Returns true if the intrinsic may write to memory.
1704 bool mayWriteToMemory() const { return MayWriteToMemory; }
1705
1706 /// Returns true if the intrinsic may have side-effects.
1707 bool mayHaveSideEffects() const { return MayHaveSideEffects; }
1708
1709#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1710 /// Print the recipe.
1711 void print(raw_ostream &O, const Twine &Indent,
1712 VPSlotTracker &SlotTracker) const override;
1713#endif
1714
1715 bool onlyFirstLaneUsed(const VPValue *Op) const override;
1716};
1717
1718/// A recipe for widening Call instructions using library calls.
1720 /// Variant stores a pointer to the chosen function. There is a 1:1 mapping
1721 /// between a given VF and the chosen vectorized variant, so there will be a
1722 /// different VPlan for each VF with a valid variant.
1723 Function *Variant;
1724
1725public:
1727 ArrayRef<VPValue *> CallArguments, DebugLoc DL = {})
1728 : VPRecipeWithIRFlags(VPDef::VPWidenCallSC, CallArguments,
1729 *cast<Instruction>(UV)),
1730 Variant(Variant) {
1731 assert(
1732 isa<Function>(getOperand(getNumOperands() - 1)->getLiveInIRValue()) &&
1733 "last operand must be the called function");
1734 }
1735
1736 ~VPWidenCallRecipe() override = default;
1737
1739 return new VPWidenCallRecipe(getUnderlyingValue(), Variant,
1740 {op_begin(), op_end()}, getDebugLoc());
1741 }
1742
1743 VP_CLASSOF_IMPL(VPDef::VPWidenCallSC)
1744
1745 /// Produce a widened version of the call instruction.
1746 void execute(VPTransformState &State) override;
1747
1748 /// Return the cost of this VPWidenCallRecipe.
1750 VPCostContext &Ctx) const override;
1751
1753 return cast<Function>(getOperand(getNumOperands() - 1)->getLiveInIRValue());
1754 }
1755
1757 return make_range(op_begin(), op_begin() + getNumOperands() - 1);
1758 }
1760 return make_range(op_begin(), op_begin() + getNumOperands() - 1);
1761 }
1762
1763#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1764 /// Print the recipe.
1765 void print(raw_ostream &O, const Twine &Indent,
1766 VPSlotTracker &SlotTracker) const override;
1767#endif
1768};
1769
1770/// A recipe representing a sequence of load -> update -> store as part of
1771/// a histogram operation. This means there may be aliasing between vector
1772/// lanes, which is handled by the llvm.experimental.vector.histogram family
1773/// of intrinsics. The only update operations currently supported are
1774/// 'add' and 'sub' where the other term is loop-invariant.
1776 /// Opcode of the update operation, currently either add or sub.
1777 unsigned Opcode;
1778
1779public:
1780 template <typename IterT>
1781 VPHistogramRecipe(unsigned Opcode, iterator_range<IterT> Operands,
1782 DebugLoc DL = {})
1783 : VPRecipeBase(VPDef::VPHistogramSC, Operands, DL), Opcode(Opcode) {}
1784
1785 ~VPHistogramRecipe() override = default;
1786
1788 return new VPHistogramRecipe(Opcode, operands(), getDebugLoc());
1789 }
1790
1791 VP_CLASSOF_IMPL(VPDef::VPHistogramSC);
1792
1793 /// Produce a vectorized histogram operation.
1794 void execute(VPTransformState &State) override;
1795
1796 /// Return the cost of this VPHistogramRecipe.
1798 VPCostContext &Ctx) const override;
1799
1800 unsigned getOpcode() const { return Opcode; }
1801
1802 /// Return the mask operand if one was provided, or a null pointer if all
1803 /// lanes should be executed unconditionally.
1804 VPValue *getMask() const {
1805 return getNumOperands() == 3 ? getOperand(2) : nullptr;
1806 }
1807
1808#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1809 /// Print the recipe
1810 void print(raw_ostream &O, const Twine &Indent,
1811 VPSlotTracker &SlotTracker) const override;
1812#endif
1813};
1814
1815/// A recipe for widening select instructions.
1817 template <typename IterT>
1819 : VPSingleDefRecipe(VPDef::VPWidenSelectSC, Operands, &I,
1820 I.getDebugLoc()) {}
1821
1822 ~VPWidenSelectRecipe() override = default;
1823
1825 return new VPWidenSelectRecipe(*cast<SelectInst>(getUnderlyingInstr()),
1826 operands());
1827 }
1828
1829 VP_CLASSOF_IMPL(VPDef::VPWidenSelectSC)
1830
1831 /// Produce a widened version of the select instruction.
1832 void execute(VPTransformState &State) override;
1833
1834 /// Return the cost of this VPWidenSelectRecipe.
1836 VPCostContext &Ctx) const override;
1837
1838#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1839 /// Print the recipe.
1840 void print(raw_ostream &O, const Twine &Indent,
1841 VPSlotTracker &SlotTracker) const override;
1842#endif
1843
1844 VPValue *getCond() const {
1845 return getOperand(0);
1846 }
1847
1848 bool isInvariantCond() const {
1850 }
1851};
1852
1853/// A recipe for handling GEP instructions.
1855 bool isPointerLoopInvariant() const {
1857 }
1858
1859 bool isIndexLoopInvariant(unsigned I) const {
1861 }
1862
1863 bool areAllOperandsInvariant() const {
1864 return all_of(operands(), [](VPValue *Op) {
1865 return Op->isDefinedOutsideLoopRegions();
1866 });
1867 }
1868
1869public:
1870 template <typename IterT>
1872 : VPRecipeWithIRFlags(VPDef::VPWidenGEPSC, Operands, *GEP) {}
1873
1874 ~VPWidenGEPRecipe() override = default;
1875
1877 return new VPWidenGEPRecipe(cast<GetElementPtrInst>(getUnderlyingInstr()),
1878 operands());
1879 }
1880
1881 VP_CLASSOF_IMPL(VPDef::VPWidenGEPSC)
1882
1883 /// Generate the gep nodes.
1884 void execute(VPTransformState &State) override;
1885
1886 /// Return the cost of this VPWidenGEPRecipe.
1888 VPCostContext &Ctx) const override {
1889 // TODO: Compute accurate cost after retiring the legacy cost model.
1890 return 0;
1891 }
1892
1893#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1894 /// Print the recipe.
1895 void print(raw_ostream &O, const Twine &Indent,
1896 VPSlotTracker &SlotTracker) const override;
1897#endif
1898};
1899
1900/// A recipe to compute the pointers for widened memory accesses of IndexTy
1901/// in reverse order.
1903 public VPUnrollPartAccessor<2> {
1904 Type *IndexedTy;
1905
1906public:
1909 : VPRecipeWithIRFlags(VPDef::VPReverseVectorPointerSC,
1910 ArrayRef<VPValue *>({Ptr, VF}), GEPFlags, DL),
1911 IndexedTy(IndexedTy) {}
1912
1913 VP_CLASSOF_IMPL(VPDef::VPReverseVectorPointerSC)
1914
1916 const VPValue *getVFValue() const { return getOperand(1); }
1917
1918 void execute(VPTransformState &State) override;
1919
1920 bool onlyFirstLaneUsed(const VPValue *Op) const override {
1922 "Op must be an operand of the recipe");
1923 return true;
1924 }
1925
1926 /// Return the cost of this VPVectorPointerRecipe.
1928 VPCostContext &Ctx) const override {
1929 // TODO: Compute accurate cost after retiring the legacy cost model.
1930 return 0;
1931 }
1932
1933 /// Returns true if the recipe only uses the first part of operand \p Op.
1934 bool onlyFirstPartUsed(const VPValue *Op) const override {
1936 "Op must be an operand of the recipe");
1937 assert(getNumOperands() <= 2 && "must have at most two operands");
1938 return true;
1939 }
1940
1943 IndexedTy, getGEPNoWrapFlags(),
1944 getDebugLoc());
1945 }
1946
1947#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1948 /// Print the recipe.
1949 void print(raw_ostream &O, const Twine &Indent,
1950 VPSlotTracker &SlotTracker) const override;
1951#endif
1952};
1953
1954/// A recipe to compute the pointers for widened memory accesses of IndexTy.
1956 public VPUnrollPartAccessor<1> {
1957 Type *IndexedTy;
1958
1959public:
1961 DebugLoc DL)
1962 : VPRecipeWithIRFlags(VPDef::VPVectorPointerSC, ArrayRef<VPValue *>(Ptr),
1963 GEPFlags, DL),
1964 IndexedTy(IndexedTy) {}
1965
1966 VP_CLASSOF_IMPL(VPDef::VPVectorPointerSC)
1967
1968 void execute(VPTransformState &State) override;
1969
1970 bool onlyFirstLaneUsed(const VPValue *Op) const override {
1972 "Op must be an operand of the recipe");
1973 return true;
1974 }
1975
1976 /// Returns true if the recipe only uses the first part of operand \p Op.
1977 bool onlyFirstPartUsed(const VPValue *Op) const override {
1979 "Op must be an operand of the recipe");
1980 assert(getNumOperands() <= 2 && "must have at most two operands");
1981 return true;
1982 }
1983
1985 return new VPVectorPointerRecipe(getOperand(0), IndexedTy,
1987 }
1988
1989 /// Return the cost of this VPHeaderPHIRecipe.
1991 VPCostContext &Ctx) const override {
1992 // TODO: Compute accurate cost after retiring the legacy cost model.
1993 return 0;
1994 }
1995
1996#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1997 /// Print the recipe.
1998 void print(raw_ostream &O, const Twine &Indent,
1999 VPSlotTracker &SlotTracker) const override;
2000#endif
2001};
2002
2003/// A pure virtual base class for all recipes modeling header phis, including
2004/// phis for first order recurrences, pointer inductions and reductions. The
2005/// start value is the first operand of the recipe and the incoming value from
2006/// the backedge is the second operand.
2007///
2008/// Inductions are modeled using the following sub-classes:
2009/// * VPCanonicalIVPHIRecipe: Canonical scalar induction of the vector loop,
2010/// starting at a specified value (zero for the main vector loop, the resume
2011/// value for the epilogue vector loop) and stepping by 1. The induction
2012/// controls exiting of the vector loop by comparing against the vector trip
2013/// count. Produces a single scalar PHI for the induction value per
2014/// iteration.
2015/// * VPWidenIntOrFpInductionRecipe: Generates vector values for integer and
2016/// floating point inductions with arbitrary start and step values. Produces
2017/// a vector PHI per-part.
2018/// * VPDerivedIVRecipe: Converts the canonical IV value to the corresponding
2019/// value of an IV with different start and step values. Produces a single
2020/// scalar value per iteration
2021/// * VPScalarIVStepsRecipe: Generates scalar values per-lane based on a
2022/// canonical or derived induction.
2023/// * VPWidenPointerInductionRecipe: Generate vector and scalar values for a
2024/// pointer induction. Produces either a vector PHI per-part or scalar values
2025/// per-lane based on the canonical induction.
2027protected:
2028 VPHeaderPHIRecipe(unsigned char VPDefID, Instruction *UnderlyingInstr,
2029 VPValue *Start = nullptr, DebugLoc DL = {})
2030 : VPSingleDefRecipe(VPDefID, ArrayRef<VPValue *>(), UnderlyingInstr, DL) {
2031 if (Start)
2032 addOperand(Start);
2033 }
2034
2035public:
2036 ~VPHeaderPHIRecipe() override = default;
2037
2038 /// Method to support type inquiry through isa, cast, and dyn_cast.
2039 static inline bool classof(const VPRecipeBase *B) {
2040 return B->getVPDefID() >= VPDef::VPFirstHeaderPHISC &&
2041 B->getVPDefID() <= VPDef::VPLastHeaderPHISC;
2042 }
2043 static inline bool classof(const VPValue *V) {
2044 auto *B = V->getDefiningRecipe();
2045 return B && B->getVPDefID() >= VPRecipeBase::VPFirstHeaderPHISC &&
2046 B->getVPDefID() <= VPRecipeBase::VPLastHeaderPHISC;
2047 }
2048
2049 /// Generate the phi nodes.
2050 void execute(VPTransformState &State) override = 0;
2051
2052 /// Return the cost of this header phi recipe.
2054 VPCostContext &Ctx) const override;
2055
2056#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2057 /// Print the recipe.
2058 void print(raw_ostream &O, const Twine &Indent,
2059 VPSlotTracker &SlotTracker) const override = 0;
2060#endif
2061
2062 /// Returns the start value of the phi, if one is set.
2064 return getNumOperands() == 0 ? nullptr : getOperand(0);
2065 }
2067 return getNumOperands() == 0 ? nullptr : getOperand(0);
2068 }
2069
2070 /// Update the start value of the recipe.
2072
2073 /// Returns the incoming value from the loop backedge.
2075 return getOperand(1);
2076 }
2077
2078 /// Returns the backedge value as a recipe. The backedge value is guaranteed
2079 /// to be a recipe.
2082 }
2083};
2084
2085/// Base class for widened induction (VPWidenIntOrFpInductionRecipe and
2086/// VPWidenPointerInductionRecipe), providing shared functionality, including
2087/// retrieving the step value, induction descriptor and original phi node.
2089 const InductionDescriptor &IndDesc;
2090
2091public:
2092 VPWidenInductionRecipe(unsigned char Kind, PHINode *IV, VPValue *Start,
2093 VPValue *Step, const InductionDescriptor &IndDesc,
2094 DebugLoc DL)
2095 : VPHeaderPHIRecipe(Kind, IV, Start, DL), IndDesc(IndDesc) {
2096 addOperand(Step);
2097 }
2098
2099 static inline bool classof(const VPRecipeBase *R) {
2100 return R->getVPDefID() == VPDef::VPWidenIntOrFpInductionSC ||
2101 R->getVPDefID() == VPDef::VPWidenPointerInductionSC;
2102 }
2103
2104 virtual void execute(VPTransformState &State) override = 0;
2105
2106 /// Returns the step value of the induction.
2108 const VPValue *getStepValue() const { return getOperand(1); }
2109
2110 PHINode *getPHINode() const { return cast<PHINode>(getUnderlyingValue()); }
2111
2112 /// Returns the induction descriptor for the recipe.
2113 const InductionDescriptor &getInductionDescriptor() const { return IndDesc; }
2114
2116 // TODO: All operands of base recipe must exist and be at same index in
2117 // derived recipe.
2119 "VPWidenIntOrFpInductionRecipe generates its own backedge value");
2120 }
2121
2123 // TODO: All operands of base recipe must exist and be at same index in
2124 // derived recipe.
2126 "VPWidenIntOrFpInductionRecipe generates its own backedge value");
2127 }
2128};
2129
2130/// A recipe for handling phi nodes of integer and floating-point inductions,
2131/// producing their vector values.
2133 TruncInst *Trunc;
2134
2135public:
2137 VPValue *VF, const InductionDescriptor &IndDesc,
2138 DebugLoc DL)
2139 : VPWidenInductionRecipe(VPDef::VPWidenIntOrFpInductionSC, IV, Start,
2140 Step, IndDesc, DL),
2141 Trunc(nullptr) {
2142 addOperand(VF);
2143 }
2144
2146 VPValue *VF, const InductionDescriptor &IndDesc,
2147 TruncInst *Trunc, DebugLoc DL)
2148 : VPWidenInductionRecipe(VPDef::VPWidenIntOrFpInductionSC, IV, Start,
2149 Step, IndDesc, DL),
2150 Trunc(Trunc) {
2151 addOperand(VF);
2152 }
2153
2155
2160 }
2161
2162 VP_CLASSOF_IMPL(VPDef::VPWidenIntOrFpInductionSC)
2163
2164 /// Generate the vectorized and scalarized versions of the phi node as
2165 /// needed by their users.
2166 void execute(VPTransformState &State) override;
2167
2168#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2169 /// Print the recipe.
2170 void print(raw_ostream &O, const Twine &Indent,
2171 VPSlotTracker &SlotTracker) const override;
2172#endif
2173
2175 const VPValue *getVFValue() const { return getOperand(2); }
2176
2178 // If the recipe has been unrolled (4 operands), return the VPValue for the
2179 // induction increment.
2180 return getNumOperands() == 5 ? getOperand(3) : nullptr;
2181 }
2182
2183 /// Returns the first defined value as TruncInst, if it is one or nullptr
2184 /// otherwise.
2185 TruncInst *getTruncInst() { return Trunc; }
2186 const TruncInst *getTruncInst() const { return Trunc; }
2187
2188 /// Returns true if the induction is canonical, i.e. starting at 0 and
2189 /// incremented by UF * VF (= the original IV is incremented by 1) and has the
2190 /// same type as the canonical induction.
2191 bool isCanonical() const;
2192
2193 /// Returns the scalar type of the induction.
2195 return Trunc ? Trunc->getType() : getPHINode()->getType();
2196 }
2197
2198 /// Returns the VPValue representing the value of this induction at
2199 /// the last unrolled part, if it exists. Returns itself if unrolling did not
2200 /// take place.
2202 return getNumOperands() == 5 ? getOperand(4) : this;
2203 }
2204};
2205
2207 public VPUnrollPartAccessor<3> {
2208 bool IsScalarAfterVectorization;
2209
2210public:
2211 /// Create a new VPWidenPointerInductionRecipe for \p Phi with start value \p
2212 /// Start.
2214 const InductionDescriptor &IndDesc,
2215 bool IsScalarAfterVectorization, DebugLoc DL)
2216 : VPWidenInductionRecipe(VPDef::VPWidenPointerInductionSC, Phi, Start,
2217 Step, IndDesc, DL),
2218 IsScalarAfterVectorization(IsScalarAfterVectorization) {}
2219
2221
2224 cast<PHINode>(getUnderlyingInstr()), getOperand(0), getOperand(1),
2225 getInductionDescriptor(), IsScalarAfterVectorization, getDebugLoc());
2226 }
2227
2228 VP_CLASSOF_IMPL(VPDef::VPWidenPointerInductionSC)
2229
2230 /// Generate vector values for the pointer induction.
2231 void execute(VPTransformState &State) override;
2232
2233 /// Returns true if only scalar values will be generated.
2234 bool onlyScalarsGenerated(bool IsScalable);
2235
2236 /// Returns the VPValue representing the value of this induction at
2237 /// the first unrolled part, if it exists. Returns itself if unrolling did not
2238 /// take place.
2240 return getUnrollPart(*this) == 0 ? this : getOperand(2);
2241 }
2242
2243#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2244 /// Print the recipe.
2245 void print(raw_ostream &O, const Twine &Indent,
2246 VPSlotTracker &SlotTracker) const override;
2247#endif
2248};
2249
2250/// Recipe to generate a scalar PHI. Used to generate code for recipes that
2251/// produce scalar header phis, including VPCanonicalIVPHIRecipe and
2252/// VPEVLBasedIVPHIRecipe.
2254 std::string Name;
2255
2256public:
2257 VPScalarPHIRecipe(VPValue *Start, VPValue *BackedgeValue, DebugLoc DL,
2258 StringRef Name)
2259 : VPHeaderPHIRecipe(VPDef::VPScalarPHISC, nullptr, Start, DL),
2260 Name(Name.str()) {
2261 addOperand(BackedgeValue);
2262 }
2263
2264 ~VPScalarPHIRecipe() override = default;
2265
2267 llvm_unreachable("cloning not implemented yet");
2268 }
2269
2270 VP_CLASSOF_IMPL(VPDef::VPScalarPHISC)
2271
2272 /// Generate the phi/select nodes.
2273 void execute(VPTransformState &State) override;
2274
2275 /// Returns true if the recipe only uses the first lane of operand \p Op.
2276 bool onlyFirstLaneUsed(const VPValue *Op) const override {
2278 "Op must be an operand of the recipe");
2279 return true;
2280 }
2281
2282#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2283 /// Print the recipe.
2284 void print(raw_ostream &O, const Twine &Indent,
2285 VPSlotTracker &SlotTracker) const override;
2286#endif
2287};
2288
2289/// A recipe for handling phis that are widened in the vector loop.
2290/// In the VPlan native path, all incoming VPValues & VPBasicBlock pairs are
2291/// managed in the recipe directly.
2293 /// List of incoming blocks. Only used in the VPlan native path.
2294 SmallVector<VPBasicBlock *, 2> IncomingBlocks;
2295
2296public:
2297 /// Create a new VPWidenPHIRecipe for \p Phi with start value \p Start.
2298 VPWidenPHIRecipe(PHINode *Phi, VPValue *Start = nullptr)
2299 : VPSingleDefRecipe(VPDef::VPWidenPHISC, ArrayRef<VPValue *>(), Phi) {
2300 if (Start)
2301 addOperand(Start);
2302 }
2303
2305 llvm_unreachable("cloning not implemented yet");
2306 }
2307
2308 ~VPWidenPHIRecipe() override = default;
2309
2310 VP_CLASSOF_IMPL(VPDef::VPWidenPHISC)
2311
2312 /// Generate the phi/select nodes.
2313 void execute(VPTransformState &State) override;
2314
2315#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2316 /// Print the recipe.
2317 void print(raw_ostream &O, const Twine &Indent,
2318 VPSlotTracker &SlotTracker) const override;
2319#endif
2320
2321 /// Adds a pair (\p IncomingV, \p IncomingBlock) to the phi.
2322 void addIncoming(VPValue *IncomingV, VPBasicBlock *IncomingBlock) {
2323 addOperand(IncomingV);
2324 IncomingBlocks.push_back(IncomingBlock);
2325 }
2326
2327 /// Returns the \p I th incoming VPBasicBlock.
2328 VPBasicBlock *getIncomingBlock(unsigned I) { return IncomingBlocks[I]; }
2329
2330 /// Returns the \p I th incoming VPValue.
2331 VPValue *getIncomingValue(unsigned I) { return getOperand(I); }
2332};
2333
2334/// A recipe for handling first-order recurrence phis. The start value is the
2335/// first operand of the recipe and the incoming value from the backedge is the
2336/// second operand.
2339 : VPHeaderPHIRecipe(VPDef::VPFirstOrderRecurrencePHISC, Phi, &Start) {}
2340
2341 VP_CLASSOF_IMPL(VPDef::VPFirstOrderRecurrencePHISC)
2342
2344 return R->getVPDefID() == VPDef::VPFirstOrderRecurrencePHISC;
2345 }
2346
2349 cast<PHINode>(getUnderlyingInstr()), *getOperand(0));
2350 }
2351
2352 void execute(VPTransformState &State) override;
2353
2354 /// Return the cost of this first-order recurrence phi recipe.
2356 VPCostContext &Ctx) const override;
2357
2358#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2359 /// Print the recipe.
2360 void print(raw_ostream &O, const Twine &Indent,
2361 VPSlotTracker &SlotTracker) const override;
2362#endif
2363};
2364
2365/// A recipe for handling reduction phis. The start value is the first operand
2366/// of the recipe and the incoming value from the backedge is the second
2367/// operand.
2369 public VPUnrollPartAccessor<2> {
2370 /// Descriptor for the reduction.
2371 const RecurrenceDescriptor &RdxDesc;
2372
2373 /// The phi is part of an in-loop reduction.
2374 bool IsInLoop;
2375
2376 /// The phi is part of an ordered reduction. Requires IsInLoop to be true.
2377 bool IsOrdered;
2378
2379public:
2380 /// Create a new VPReductionPHIRecipe for the reduction \p Phi described by \p
2381 /// RdxDesc.
2383 VPValue &Start, bool IsInLoop = false,
2384 bool IsOrdered = false)
2385 : VPHeaderPHIRecipe(VPDef::VPReductionPHISC, Phi, &Start),
2386 RdxDesc(RdxDesc), IsInLoop(IsInLoop), IsOrdered(IsOrdered) {
2387 assert((!IsOrdered || IsInLoop) && "IsOrdered requires IsInLoop");
2388 }
2389
2390 ~VPReductionPHIRecipe() override = default;
2391
2393 auto *R =
2394 new VPReductionPHIRecipe(cast<PHINode>(getUnderlyingInstr()), RdxDesc,
2395 *getOperand(0), IsInLoop, IsOrdered);
2396 R->addOperand(getBackedgeValue());
2397 return R;
2398 }
2399
2400 VP_CLASSOF_IMPL(VPDef::VPReductionPHISC)
2401
2403 return R->getVPDefID() == VPDef::VPReductionPHISC;
2404 }
2405
2406 /// Generate the phi/select nodes.
2407 void execute(VPTransformState &State) override;
2408
2409#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2410 /// Print the recipe.
2411 void print(raw_ostream &O, const Twine &Indent,
2412 VPSlotTracker &SlotTracker) const override;
2413#endif
2414
2416 return RdxDesc;
2417 }
2418
2419 /// Returns true, if the phi is part of an ordered reduction.
2420 bool isOrdered() const { return IsOrdered; }
2421
2422 /// Returns true, if the phi is part of an in-loop reduction.
2423 bool isInLoop() const { return IsInLoop; }
2424};
2425
2426/// A recipe for vectorizing a phi-node as a sequence of mask-based select
2427/// instructions.
2429public:
2430 /// The blend operation is a User of the incoming values and of their
2431 /// respective masks, ordered [I0, M0, I1, M1, I2, M2, ...]. Note that M0 can
2432 /// be omitted (implied by passing an odd number of operands) in which case
2433 /// all other incoming values are merged into it.
2435 : VPSingleDefRecipe(VPDef::VPBlendSC, Operands, Phi, Phi->getDebugLoc()) {
2436 assert(Operands.size() > 0 && "Expected at least one operand!");
2437 }
2438
2439 VPBlendRecipe *clone() override {
2441 return new VPBlendRecipe(cast<PHINode>(getUnderlyingValue()), Ops);
2442 }
2443
2444 VP_CLASSOF_IMPL(VPDef::VPBlendSC)
2445
2446 /// A normalized blend is one that has an odd number of operands, whereby the
2447 /// first operand does not have an associated mask.
2448 bool isNormalized() const { return getNumOperands() % 2; }
2449
2450 /// Return the number of incoming values, taking into account when normalized
2451 /// the first incoming value will have no mask.
2452 unsigned getNumIncomingValues() const {
2453 return (getNumOperands() + isNormalized()) / 2;
2454 }
2455
2456 /// Return incoming value number \p Idx.
2457 VPValue *getIncomingValue(unsigned Idx) const {
2458 return Idx == 0 ? getOperand(0) : getOperand(Idx * 2 - isNormalized());
2459 }
2460
2461 /// Return mask number \p Idx.
2462 VPValue *getMask(unsigned Idx) const {
2463 assert((Idx > 0 || !isNormalized()) && "First index has no mask!");
2464 return Idx == 0 ? getOperand(1) : getOperand(Idx * 2 + !isNormalized());
2465 }
2466
2467 /// Generate the phi/select nodes.
2468 void execute(VPTransformState &State) override;
2469
2470 /// Return the cost of this VPWidenMemoryRecipe.
2472 VPCostContext &Ctx) const override;
2473
2474#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2475 /// Print the recipe.
2476 void print(raw_ostream &O, const Twine &Indent,
2477 VPSlotTracker &SlotTracker) const override;
2478#endif
2479
2480 /// Returns true if the recipe only uses the first lane of operand \p Op.
2481 bool onlyFirstLaneUsed(const VPValue *Op) const override {
2483 "Op must be an operand of the recipe");
2484 // Recursing through Blend recipes only, must terminate at header phi's the
2485 // latest.
2486 return all_of(users(),
2487 [this](VPUser *U) { return U->onlyFirstLaneUsed(this); });
2488 }
2489};
2490
2491/// VPInterleaveRecipe is a recipe for transforming an interleave group of load
2492/// or stores into one wide load/store and shuffles. The first operand of a
2493/// VPInterleave recipe is the address, followed by the stored values, followed
2494/// by an optional mask.
2497
2498 /// Indicates if the interleave group is in a conditional block and requires a
2499 /// mask.
2500 bool HasMask = false;
2501
2502 /// Indicates if gaps between members of the group need to be masked out or if
2503 /// unusued gaps can be loaded speculatively.
2504 bool NeedsMaskForGaps = false;
2505
2506public:
2508 ArrayRef<VPValue *> StoredValues, VPValue *Mask,
2509 bool NeedsMaskForGaps)
2510 : VPRecipeBase(VPDef::VPInterleaveSC, {Addr}), IG(IG),
2511 NeedsMaskForGaps(NeedsMaskForGaps) {
2512 for (unsigned i = 0; i < IG->getFactor(); ++i)
2513 if (Instruction *I = IG->getMember(i)) {
2514 if (I->getType()->isVoidTy())
2515 continue;
2516 new VPValue(I, this);
2517 }
2518
2519 for (auto *SV : StoredValues)
2520 addOperand(SV);
2521 if (Mask) {
2522 HasMask = true;
2523 addOperand(Mask);
2524 }
2525 }
2526 ~VPInterleaveRecipe() override = default;
2527
2529 return new VPInterleaveRecipe(IG, getAddr(), getStoredValues(), getMask(),
2530 NeedsMaskForGaps);
2531 }
2532
2533 VP_CLASSOF_IMPL(VPDef::VPInterleaveSC)
2534
2535 /// Return the address accessed by this recipe.
2536 VPValue *getAddr() const {
2537 return getOperand(0); // Address is the 1st, mandatory operand.
2538 }
2539
2540 /// Return the mask used by this recipe. Note that a full mask is represented
2541 /// by a nullptr.
2542 VPValue *getMask() const {
2543 // Mask is optional and therefore the last, currently 2nd operand.
2544 return HasMask ? getOperand(getNumOperands() - 1) : nullptr;
2545 }
2546
2547 /// Return the VPValues stored by this interleave group. If it is a load
2548 /// interleave group, return an empty ArrayRef.
2550 // The first operand is the address, followed by the stored values, followed
2551 // by an optional mask.
2554 }
2555
2556 /// Generate the wide load or store, and shuffles.
2557 void execute(VPTransformState &State) override;
2558
2559 /// Return the cost of this VPInterleaveRecipe.
2561 VPCostContext &Ctx) const override;
2562
2563#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2564 /// Print the recipe.
2565 void print(raw_ostream &O, const Twine &Indent,
2566 VPSlotTracker &SlotTracker) const override;
2567#endif
2568
2570
2571 /// Returns the number of stored operands of this interleave group. Returns 0
2572 /// for load interleave groups.
2573 unsigned getNumStoreOperands() const {
2574 return getNumOperands() - (HasMask ? 2 : 1);
2575 }
2576
2577 /// The recipe only uses the first lane of the address.
2578 bool onlyFirstLaneUsed(const VPValue *Op) const override {
2580 "Op must be an operand of the recipe");
2581 return Op == getAddr() && !llvm::is_contained(getStoredValues(), Op);
2582 }
2583
2584 Instruction *getInsertPos() const { return IG->getInsertPos(); }
2585};
2586
2587/// A recipe to represent inloop reduction operations, performing a reduction on
2588/// a vector operand into a scalar value, and adding the result to a chain.
2589/// The Operands are {ChainOp, VecOp, [Condition]}.
2591 /// The recurrence decriptor for the reduction in question.
2592 const RecurrenceDescriptor &RdxDesc;
2593 bool IsOrdered;
2594 /// Whether the reduction is conditional.
2595 bool IsConditional = false;
2596
2597protected:
2598 VPReductionRecipe(const unsigned char SC, const RecurrenceDescriptor &R,
2600 VPValue *CondOp, bool IsOrdered, DebugLoc DL)
2601 : VPSingleDefRecipe(SC, Operands, I, DL), RdxDesc(R),
2602 IsOrdered(IsOrdered) {
2603 if (CondOp) {
2604 IsConditional = true;
2605 addOperand(CondOp);
2606 }
2607 }
2608
2609public:
2611 VPValue *ChainOp, VPValue *VecOp, VPValue *CondOp,
2612 bool IsOrdered, DebugLoc DL = {})
2613 : VPReductionRecipe(VPDef::VPReductionSC, R, I,
2614 ArrayRef<VPValue *>({ChainOp, VecOp}), CondOp,
2615 IsOrdered, DL) {}
2616
2617 ~VPReductionRecipe() override = default;
2618
2620 return new VPReductionRecipe(RdxDesc, getUnderlyingInstr(), getChainOp(),
2621 getVecOp(), getCondOp(), IsOrdered,
2622 getDebugLoc());
2623 }
2624
2625 static inline bool classof(const VPRecipeBase *R) {
2626 return R->getVPDefID() == VPRecipeBase::VPReductionSC ||
2627 R->getVPDefID() == VPRecipeBase::VPReductionEVLSC;
2628 }
2629
2630 static inline bool classof(const VPUser *U) {
2631 auto *R = dyn_cast<VPRecipeBase>(U);
2632 return R && classof(R);
2633 }
2634
2635 /// Generate the reduction in the loop
2636 void execute(VPTransformState &State) override;
2637
2638 /// Return the cost of VPReductionRecipe.
2640 VPCostContext &Ctx) const override;
2641
2642#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2643 /// Print the recipe.
2644 void print(raw_ostream &O, const Twine &Indent,
2645 VPSlotTracker &SlotTracker) const override;
2646#endif
2647
2648 /// Return the recurrence decriptor for the in-loop reduction.
2650 return RdxDesc;
2651 }
2652 /// Return true if the in-loop reduction is ordered.
2653 bool isOrdered() const { return IsOrdered; };
2654 /// Return true if the in-loop reduction is conditional.
2655 bool isConditional() const { return IsConditional; };
2656 /// The VPValue of the scalar Chain being accumulated.
2657 VPValue *getChainOp() const { return getOperand(0); }
2658 /// The VPValue of the vector value to be reduced.
2659 VPValue *getVecOp() const { return getOperand(1); }
2660 /// The VPValue of the condition for the block.
2662 return isConditional() ? getOperand(getNumOperands() - 1) : nullptr;
2663 }
2664};
2665
2666/// A recipe to represent inloop reduction operations with vector-predication
2667/// intrinsics, performing a reduction on a vector operand with the explicit
2668/// vector length (EVL) into a scalar value, and adding the result to a chain.
2669/// The Operands are {ChainOp, VecOp, EVL, [Condition]}.
2671public:
2674 VPDef::VPReductionEVLSC, R.getRecurrenceDescriptor(),
2676 ArrayRef<VPValue *>({R.getChainOp(), R.getVecOp(), &EVL}), CondOp,
2677 R.isOrdered(), R.getDebugLoc()) {}
2678
2679 ~VPReductionEVLRecipe() override = default;
2680
2682 llvm_unreachable("cloning not implemented yet");
2683 }
2684
2685 VP_CLASSOF_IMPL(VPDef::VPReductionEVLSC)
2686
2687 /// Generate the reduction in the loop
2688 void execute(VPTransformState &State) override;
2689
2690#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2691 /// Print the recipe.
2692 void print(raw_ostream &O, const Twine &Indent,
2693 VPSlotTracker &SlotTracker) const override;
2694#endif
2695
2696 /// The VPValue of the explicit vector length.
2697 VPValue *getEVL() const { return getOperand(2); }
2698
2699 /// Returns true if the recipe only uses the first lane of operand \p Op.
2700 bool onlyFirstLaneUsed(const VPValue *Op) const override {
2702 "Op must be an operand of the recipe");
2703 return Op == getEVL();
2704 }
2705};
2706
2707/// VPReplicateRecipe replicates a given instruction producing multiple scalar
2708/// copies of the original scalar type, one per lane, instead of producing a
2709/// single copy of widened type for all lanes. If the instruction is known to be
2710/// uniform only one copy, per lane zero, will be generated.
2712 /// Indicator if only a single replica per lane is needed.
2713 bool IsUniform;
2714
2715 /// Indicator if the replicas are also predicated.
2716 bool IsPredicated;
2717
2718public:
2719 template <typename IterT>
2721 bool IsUniform, VPValue *Mask = nullptr)
2722 : VPRecipeWithIRFlags(VPDef::VPReplicateSC, Operands, *I),
2723 IsUniform(IsUniform), IsPredicated(Mask) {
2724 if (Mask)
2725 addOperand(Mask);
2726 }
2727
2728 ~VPReplicateRecipe() override = default;
2729
2731 auto *Copy =
2732 new VPReplicateRecipe(getUnderlyingInstr(), operands(), IsUniform,
2733 isPredicated() ? getMask() : nullptr);
2734 Copy->transferFlags(*this);
2735 return Copy;
2736 }
2737
2738 VP_CLASSOF_IMPL(VPDef::VPReplicateSC)
2739
2740 /// Generate replicas of the desired Ingredient. Replicas will be generated
2741 /// for all parts and lanes unless a specific part and lane are specified in
2742 /// the \p State.
2743 void execute(VPTransformState &State) override;
2744
2745 /// Return the cost of this VPReplicateRecipe.
2747 VPCostContext &Ctx) const override;
2748
2749#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2750 /// Print the recipe.
2751 void print(raw_ostream &O, const Twine &Indent,
2752 VPSlotTracker &SlotTracker) const override;
2753#endif
2754
2755 bool isUniform() const { return IsUniform; }
2756
2757 bool isPredicated() const { return IsPredicated; }
2758
2759 /// Returns true if the recipe only uses the first lane of operand \p Op.
2760 bool onlyFirstLaneUsed(const VPValue *Op) const override {
2762 "Op must be an operand of the recipe");
2763 return isUniform();
2764 }
2765
2766 /// Returns true if the recipe uses scalars of operand \p Op.
2767 bool usesScalars(const VPValue *Op) const override {
2769 "Op must be an operand of the recipe");
2770 return true;
2771 }
2772
2773 /// Returns true if the recipe is used by a widened recipe via an intervening
2774 /// VPPredInstPHIRecipe. In this case, the scalar values should also be packed
2775 /// in a vector.
2776 bool shouldPack() const;
2777
2778 /// Return the mask of a predicated VPReplicateRecipe.
2780 assert(isPredicated() && "Trying to get the mask of a unpredicated recipe");
2781 return getOperand(getNumOperands() - 1);
2782 }
2783
2784 unsigned getOpcode() const { return getUnderlyingInstr()->getOpcode(); }
2785};
2786
2787/// A recipe for generating conditional branches on the bits of a mask.
2789public:
2791 : VPRecipeBase(VPDef::VPBranchOnMaskSC, {}) {
2792 if (BlockInMask) // nullptr means all-one mask.
2793 addOperand(BlockInMask);
2794 }
2795
2797 return new VPBranchOnMaskRecipe(getOperand(0));
2798 }
2799
2800 VP_CLASSOF_IMPL(VPDef::VPBranchOnMaskSC)
2801
2802 /// Generate the extraction of the appropriate bit from the block mask and the
2803 /// conditional branch.
2804 void execute(VPTransformState &State) override;
2805
2806 /// Return the cost of this VPBranchOnMaskRecipe.
2808 VPCostContext &Ctx) const override;
2809
2810#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2811 /// Print the recipe.
2812 void print(raw_ostream &O, const Twine &Indent,
2813 VPSlotTracker &SlotTracker) const override {
2814 O << Indent << "BRANCH-ON-MASK ";
2815 if (VPValue *Mask = getMask())
2816 Mask->printAsOperand(O, SlotTracker);
2817 else
2818 O << " All-One";
2819 }
2820#endif
2821
2822 /// Return the mask used by this recipe. Note that a full mask is represented
2823 /// by a nullptr.
2824 VPValue *getMask() const {
2825 assert(getNumOperands() <= 1 && "should have either 0 or 1 operands");
2826 // Mask is optional.
2827 return getNumOperands() == 1 ? getOperand(0) : nullptr;
2828 }
2829
2830 /// Returns true if the recipe uses scalars of operand \p Op.
2831 bool usesScalars(const VPValue *Op) const override {
2833 "Op must be an operand of the recipe");
2834 return true;
2835 }
2836};
2837
2838/// VPPredInstPHIRecipe is a recipe for generating the phi nodes needed when
2839/// control converges back from a Branch-on-Mask. The phi nodes are needed in
2840/// order to merge values that are set under such a branch and feed their uses.
2841/// The phi nodes can be scalar or vector depending on the users of the value.
2842/// This recipe works in concert with VPBranchOnMaskRecipe.
2844public:
2845 /// Construct a VPPredInstPHIRecipe given \p PredInst whose value needs a phi
2846 /// nodes after merging back from a Branch-on-Mask.
2848 : VPSingleDefRecipe(VPDef::VPPredInstPHISC, PredV, DL) {}
2849 ~VPPredInstPHIRecipe() override = default;
2850
2852 return new VPPredInstPHIRecipe(getOperand(0), getDebugLoc());
2853 }
2854
2855 VP_CLASSOF_IMPL(VPDef::VPPredInstPHISC)
2856
2857 /// Generates phi nodes for live-outs (from a replicate region) as needed to
2858 /// retain SSA form.
2859 void execute(VPTransformState &State) override;
2860
2861 /// Return the cost of this VPPredInstPHIRecipe.
2863 VPCostContext &Ctx) const override {
2864 // TODO: Compute accurate cost after retiring the legacy cost model.
2865 return 0;
2866 }
2867
2868#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2869 /// Print the recipe.
2870 void print(raw_ostream &O, const Twine &Indent,
2871 VPSlotTracker &SlotTracker) const override;
2872#endif
2873
2874 /// Returns true if the recipe uses scalars of operand \p Op.
2875 bool usesScalars(const VPValue *Op) const override {
2877 "Op must be an operand of the recipe");
2878 return true;
2879 }
2880};
2881
2882/// A common base class for widening memory operations. An optional mask can be
2883/// provided as the last operand.
2885protected:
2887
2888 /// Whether the accessed addresses are consecutive.
2890
2891 /// Whether the consecutive accessed addresses are in reverse order.
2893
2894 /// Whether the memory access is masked.
2895 bool IsMasked = false;
2896
2897 void setMask(VPValue *Mask) {
2898 assert(!IsMasked && "cannot re-set mask");
2899 if (!Mask)
2900 return;
2901 addOperand(Mask);
2902 IsMasked = true;
2903 }
2904
2905 VPWidenMemoryRecipe(const char unsigned SC, Instruction &I,
2906 std::initializer_list<VPValue *> Operands,
2907 bool Consecutive, bool Reverse, DebugLoc DL)
2909 Reverse(Reverse) {
2910 assert((Consecutive || !Reverse) && "Reverse implies consecutive");
2911 }
2912
2913public:
2915 llvm_unreachable("cloning not supported");
2916 }
2917
2918 static inline bool classof(const VPRecipeBase *R) {
2919 return R->getVPDefID() == VPRecipeBase::VPWidenLoadSC ||
2920 R->getVPDefID() == VPRecipeBase::VPWidenStoreSC ||
2921 R->getVPDefID() == VPRecipeBase::VPWidenLoadEVLSC ||
2922 R->getVPDefID() == VPRecipeBase::VPWidenStoreEVLSC;
2923 }
2924
2925 static inline bool classof(const VPUser *U) {
2926 auto *R = dyn_cast<VPRecipeBase>(U);
2927 return R && classof(R);
2928 }
2929
2930 /// Return whether the loaded-from / stored-to addresses are consecutive.
2931 bool isConsecutive() const { return Consecutive; }
2932
2933 /// Return whether the consecutive loaded/stored addresses are in reverse
2934 /// order.
2935 bool isReverse() const { return Reverse; }
2936
2937 /// Return the address accessed by this recipe.
2938 VPValue *getAddr() const { return getOperand(0); }
2939
2940 /// Returns true if the recipe is masked.
2941 bool isMasked() const { return IsMasked; }
2942
2943 /// Return the mask used by this recipe. Note that a full mask is represented
2944 /// by a nullptr.
2945 VPValue *getMask() const {
2946 // Mask is optional and therefore the last operand.
2947 return isMasked() ? getOperand(getNumOperands() - 1) : nullptr;
2948 }
2949
2950 /// Generate the wide load/store.
2951 void execute(VPTransformState &State) override {
2952 llvm_unreachable("VPWidenMemoryRecipe should not be instantiated.");
2953 }
2954
2955 /// Return the cost of this VPWidenMemoryRecipe.
2957 VPCostContext &Ctx) const override;
2958
2960};
2961
2962/// A recipe for widening load operations, using the address to load from and an
2963/// optional mask.
2964struct VPWidenLoadRecipe final : public VPWidenMemoryRecipe, public VPValue {
2966 bool Consecutive, bool Reverse, DebugLoc DL)
2967 : VPWidenMemoryRecipe(VPDef::VPWidenLoadSC, Load, {Addr}, Consecutive,
2968 Reverse, DL),
2969 VPValue(this, &Load) {
2970 setMask(Mask);
2971 }
2972
2974 return new VPWidenLoadRecipe(cast<LoadInst>(Ingredient), getAddr(),
2976 getDebugLoc());
2977 }
2978
2979 VP_CLASSOF_IMPL(VPDef::VPWidenLoadSC);
2980
2981 /// Generate a wide load or gather.
2982 void execute(VPTransformState &State) override;
2983
2984#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2985 /// Print the recipe.
2986 void print(raw_ostream &O, const Twine &Indent,
2987 VPSlotTracker &SlotTracker) const override;
2988#endif
2989
2990 /// Returns true if the recipe only uses the first lane of operand \p Op.
2991 bool onlyFirstLaneUsed(const VPValue *Op) const override {
2993 "Op must be an operand of the recipe");
2994 // Widened, consecutive loads operations only demand the first lane of
2995 // their address.
2996 return Op == getAddr() && isConsecutive();
2997 }
2998};
2999
3000/// A recipe for widening load operations with vector-predication intrinsics,
3001/// using the address to load from, the explicit vector length and an optional
3002/// mask.
3003struct VPWidenLoadEVLRecipe final : public VPWidenMemoryRecipe, public VPValue {
3005 : VPWidenMemoryRecipe(VPDef::VPWidenLoadEVLSC, L.getIngredient(),
3006 {L.getAddr(), &EVL}, L.isConsecutive(),
3007 L.isReverse(), L.getDebugLoc()),
3008 VPValue(this, &getIngredient()) {
3009 setMask(Mask);
3010 }
3011
3012 VP_CLASSOF_IMPL(VPDef::VPWidenLoadEVLSC)
3013
3014 /// Return the EVL operand.
3015 VPValue *getEVL() const { return getOperand(1); }
3016
3017 /// Generate the wide load or gather.
3018 void execute(VPTransformState &State) override;
3019
3020 /// Return the cost of this VPWidenLoadEVLRecipe.
3022 VPCostContext &Ctx) const override;
3023
3024#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3025 /// Print the recipe.
3026 void print(raw_ostream &O, const Twine &Indent,
3027 VPSlotTracker &SlotTracker) const override;
3028#endif
3029
3030 /// Returns true if the recipe only uses the first lane of operand \p Op.
3031 bool onlyFirstLaneUsed(const VPValue *Op) const override {
3033 "Op must be an operand of the recipe");
3034 // Widened loads only demand the first lane of EVL and consecutive loads
3035 // only demand the first lane of their address.
3036 return Op == getEVL() || (Op == getAddr() && isConsecutive());
3037 }
3038};
3039
3040/// A recipe for widening store operations, using the stored value, the address
3041/// to store to and an optional mask.
3044 VPValue *Mask, bool Consecutive, bool Reverse, DebugLoc DL)
3045 : VPWidenMemoryRecipe(VPDef::VPWidenStoreSC, Store, {Addr, StoredVal},
3047 setMask(Mask);
3048 }
3049
3051 return new VPWidenStoreRecipe(cast<StoreInst>(Ingredient), getAddr(),
3053 Reverse, getDebugLoc());
3054 }
3055
3056 VP_CLASSOF_IMPL(VPDef::VPWidenStoreSC);
3057
3058 /// Return the value stored by this recipe.
3059 VPValue *getStoredValue() const { return getOperand(1); }
3060
3061 /// Generate a wide store or scatter.
3062 void execute(VPTransformState &State) override;
3063
3064#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3065 /// Print the recipe.
3066 void print(raw_ostream &O, const Twine &Indent,
3067 VPSlotTracker &SlotTracker) const override;
3068#endif
3069
3070 /// Returns true if the recipe only uses the first lane of operand \p Op.
3071 bool onlyFirstLaneUsed(const VPValue *Op) const override {
3073 "Op must be an operand of the recipe");
3074 // Widened, consecutive stores only demand the first lane of their address,
3075 // unless the same operand is also stored.
3076 return Op == getAddr() && isConsecutive() && Op != getStoredValue();
3077 }
3078};
3079
3080/// A recipe for widening store operations with vector-predication intrinsics,
3081/// using the value to store, the address to store to, the explicit vector
3082/// length and an optional mask.
3085 : VPWidenMemoryRecipe(VPDef::VPWidenStoreEVLSC, S.getIngredient(),
3086 {S.getAddr(), S.getStoredValue(), &EVL},
3087 S.isConsecutive(), S.isReverse(), S.getDebugLoc()) {
3088 setMask(Mask);
3089 }
3090
3091 VP_CLASSOF_IMPL(VPDef::VPWidenStoreEVLSC)
3092
3093 /// Return the address accessed by this recipe.
3094 VPValue *getStoredValue() const { return getOperand(1); }
3095
3096 /// Return the EVL operand.
3097 VPValue *getEVL() const { return getOperand(2); }
3098
3099 /// Generate the wide store or scatter.
3100 void execute(VPTransformState &State) override;
3101
3102 /// Return the cost of this VPWidenStoreEVLRecipe.
3104 VPCostContext &Ctx) const override;
3105
3106#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3107 /// Print the recipe.
3108 void print(raw_ostream &O, const Twine &Indent,
3109 VPSlotTracker &SlotTracker) const override;
3110#endif
3111
3112 /// Returns true if the recipe only uses the first lane of operand \p Op.
3113 bool onlyFirstLaneUsed(const VPValue *Op) const override {
3115 "Op must be an operand of the recipe");
3116 if (Op == getEVL()) {
3117 assert(getStoredValue() != Op && "unexpected store of EVL");
3118 return true;
3119 }
3120 // Widened, consecutive memory operations only demand the first lane of
3121 // their address, unless the same operand is also stored. That latter can
3122 // happen with opaque pointers.
3123 return Op == getAddr() && isConsecutive() && Op != getStoredValue();
3124 }
3125};
3126
3127/// Recipe to expand a SCEV expression.
3129 const SCEV *Expr;
3130 ScalarEvolution &SE;
3131
3132public:
3134 : VPSingleDefRecipe(VPDef::VPExpandSCEVSC, {}), Expr(Expr), SE(SE) {}
3135
3136 ~VPExpandSCEVRecipe() override = default;
3137
3139 return new VPExpandSCEVRecipe(Expr, SE);
3140 }
3141
3142 VP_CLASSOF_IMPL(VPDef::VPExpandSCEVSC)
3143
3144 /// Generate a canonical vector induction variable of the vector loop, with
3145 void execute(VPTransformState &State) override;
3146
3147 /// Return the cost of this VPExpandSCEVRecipe.
3149 VPCostContext &Ctx) const override {
3150 // TODO: Compute accurate cost after retiring the legacy cost model.
3151 return 0;
3152 }
3153
3154#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3155 /// Print the recipe.
3156 void print(raw_ostream &O, const Twine &Indent,
3157 VPSlotTracker &SlotTracker) const override;
3158#endif
3159
3160 const SCEV *getSCEV() const { return Expr; }
3161};
3162
3163/// Canonical scalar induction phi of the vector loop. Starting at the specified
3164/// start value (either 0 or the resume value when vectorizing the epilogue
3165/// loop). VPWidenCanonicalIVRecipe represents the vector version of the
3166/// canonical induction variable.
3168public:
3170 : VPHeaderPHIRecipe(VPDef::VPCanonicalIVPHISC, nullptr, StartV, DL) {}
3171
3172 ~VPCanonicalIVPHIRecipe() override = default;
3173
3175 auto *R = new VPCanonicalIVPHIRecipe(getOperand(0), getDebugLoc());
3176 R->addOperand(getBackedgeValue());
3177 return R;
3178 }
3179
3180 VP_CLASSOF_IMPL(VPDef::VPCanonicalIVPHISC)
3181
3183 return D->getVPDefID() == VPDef::VPCanonicalIVPHISC;
3184 }
3185
3186 void execute(VPTransformState &State) override {
3188 "cannot execute this recipe, should be replaced by VPScalarPHIRecipe");
3189 }
3190
3191#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3192 /// Print the recipe.
3193 void print(raw_ostream &O, const Twine &Indent,
3194 VPSlotTracker &SlotTracker) const override;
3195#endif
3196
3197 /// Returns the scalar type of the induction.
3199 return getStartValue()->getLiveInIRValue()->getType();
3200 }
3201
3202 /// Returns true if the recipe only uses the first lane of operand \p Op.
3203 bool onlyFirstLaneUsed(const VPValue *Op) const override {
3205 "Op must be an operand of the recipe");
3206 return true;
3207 }
3208
3209 /// Returns true if the recipe only uses the first part of operand \p Op.
3210 bool onlyFirstPartUsed(const VPValue *Op) const override {
3212 "Op must be an operand of the recipe");
3213 return true;
3214 }
3215
3216 /// Return the cost of this VPCanonicalIVPHIRecipe.
3218 VPCostContext &Ctx) const override {
3219 // For now, match the behavior of the legacy cost model.
3220 return 0;
3221 }
3222};
3223
3224/// A recipe for generating the active lane mask for the vector loop that is
3225/// used to predicate the vector operations.
3226/// TODO: It would be good to use the existing VPWidenPHIRecipe instead and
3227/// remove VPActiveLaneMaskPHIRecipe.
3229public:
3231 : VPHeaderPHIRecipe(VPDef::VPActiveLaneMaskPHISC, nullptr, StartMask,
3232 DL) {}
3233
3234 ~VPActiveLaneMaskPHIRecipe() override = default;
3235
3238 if (getNumOperands() == 2)
3239 R->addOperand(getOperand(1));
3240 return R;
3241 }
3242
3243 VP_CLASSOF_IMPL(VPDef::VPActiveLaneMaskPHISC)
3244
3246 return D->getVPDefID() == VPDef::VPActiveLaneMaskPHISC;
3247 }
3248
3249 /// Generate the active lane mask phi of the vector loop.
3250 void execute(VPTransformState &State) override;
3251
3252#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3253 /// Print the recipe.
3254 void print(raw_ostream &O, const Twine &Indent,
3255 VPSlotTracker &SlotTracker) const override;
3256#endif
3257};
3258
3259/// A recipe for generating the phi node for the current index of elements,
3260/// adjusted in accordance with EVL value. It starts at the start value of the
3261/// canonical induction and gets incremented by EVL in each iteration of the
3262/// vector loop.
3264public:
3266 : VPHeaderPHIRecipe(VPDef::VPEVLBasedIVPHISC, nullptr, StartIV, DL) {}
3267
3268 ~VPEVLBasedIVPHIRecipe() override = default;
3269
3271 llvm_unreachable("cloning not implemented yet");
3272 }
3273
3274 VP_CLASSOF_IMPL(VPDef::VPEVLBasedIVPHISC)
3275
3277 return D->getVPDefID() == VPDef::VPEVLBasedIVPHISC;
3278 }
3279
3280 void execute(VPTransformState &State) override {
3282 "cannot execute this recipe, should be replaced by VPScalarPHIRecipe");
3283 }
3284
3285 /// Return the cost of this VPEVLBasedIVPHIRecipe.
3287 VPCostContext &Ctx) const override {
3288 // For now, match the behavior of the legacy cost model.
3289 return 0;
3290 }
3291
3292 /// Returns true if the recipe only uses the first lane of operand \p Op.
3293 bool onlyFirstLaneUsed(const VPValue *Op) const override {
3295 "Op must be an operand of the recipe");
3296 return true;
3297 }
3298
3299#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3300 /// Print the recipe.
3301 void print(raw_ostream &O, const Twine &Indent,
3302 VPSlotTracker &SlotTracker) const override;
3303#endif
3304};
3305
3306/// A Recipe for widening the canonical induction variable of the vector loop.
3308 public VPUnrollPartAccessor<1> {
3309public:
3311 : VPSingleDefRecipe(VPDef::VPWidenCanonicalIVSC, {CanonicalIV}) {}
3312
3313 ~VPWidenCanonicalIVRecipe() override = default;
3314
3316 return new VPWidenCanonicalIVRecipe(
3317 cast<VPCanonicalIVPHIRecipe>(getOperand(0)));
3318 }
3319
3320 VP_CLASSOF_IMPL(VPDef::VPWidenCanonicalIVSC)
3321
3322 /// Generate a canonical vector induction variable of the vector loop, with
3323 /// start = {<Part*VF, Part*VF+1, ..., Part*VF+VF-1> for 0 <= Part < UF}, and
3324 /// step = <VF*UF, VF*UF, ..., VF*UF>.
3325 void execute(VPTransformState &State) override;
3326
3327 /// Return the cost of this VPWidenCanonicalIVPHIRecipe.
3329 VPCostContext &Ctx) const override {
3330 // TODO: Compute accurate cost after retiring the legacy cost model.
3331 return 0;
3332 }
3333
3334#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3335 /// Print the recipe.
3336 void print(raw_ostream &O, const Twine &Indent,
3337 VPSlotTracker &SlotTracker) const override;
3338#endif
3339};
3340
3341/// A recipe for converting the input value \p IV value to the corresponding
3342/// value of an IV with different start and step values, using Start + IV *
3343/// Step.
3345 /// Kind of the induction.
3347 /// If not nullptr, the floating point induction binary operator. Must be set
3348 /// for floating point inductions.
3349 const FPMathOperator *FPBinOp;
3350
3351 /// Name to use for the generated IR instruction for the derived IV.
3352 std::string Name;
3353
3354public:
3356 VPCanonicalIVPHIRecipe *CanonicalIV, VPValue *Step,
3357 const Twine &Name = "")
3359 IndDesc.getKind(),
3360 dyn_cast_or_null<FPMathOperator>(IndDesc.getInductionBinOp()),
3361 Start, CanonicalIV, Step, Name) {}
3362
3364 const FPMathOperator *FPBinOp, VPValue *Start, VPValue *IV,
3365 VPValue *Step, const Twine &Name = "")
3366 : VPSingleDefRecipe(VPDef::VPDerivedIVSC, {Start, IV, Step}), Kind(Kind),
3367 FPBinOp(FPBinOp), Name(Name.str()) {}
3368
3369 ~VPDerivedIVRecipe() override = default;
3370
3372 return new VPDerivedIVRecipe(Kind, FPBinOp, getStartValue(), getOperand(1),
3373 getStepValue());
3374 }
3375
3376 VP_CLASSOF_IMPL(VPDef::VPDerivedIVSC)
3377
3378 /// Generate the transformed value of the induction at offset StartValue (1.
3379 /// operand) + IV (2. operand) * StepValue (3, operand).
3380 void execute(VPTransformState &State) override;
3381
3382 /// Return the cost of this VPDerivedIVRecipe.
3384 VPCostContext &Ctx) const override {
3385 // TODO: Compute accurate cost after retiring the legacy cost model.
3386 return 0;
3387 }
3388
3389#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3390 /// Print the recipe.
3391 void print(raw_ostream &O, const Twine &Indent,
3392 VPSlotTracker &SlotTracker) const override;
3393#endif
3394
3396 return getStartValue()->getLiveInIRValue()->getType();
3397 }
3398
3399 VPValue *getStartValue() const { return getOperand(0); }
3400 VPValue *getStepValue() const { return getOperand(2); }
3401
3402 /// Returns true if the recipe only uses the first lane of operand \p Op.
3403 bool onlyFirstLaneUsed(const VPValue *Op) const override {
3405 "Op must be an operand of the recipe");
3406 return true;
3407 }
3408};
3409
3410/// A recipe for handling phi nodes of integer and floating-point inductions,
3411/// producing their scalar values.
3413 public VPUnrollPartAccessor<2> {
3414 Instruction::BinaryOps InductionOpcode;
3415
3416public:
3419 : VPRecipeWithIRFlags(VPDef::VPScalarIVStepsSC,
3420 ArrayRef<VPValue *>({IV, Step}), FMFs),
3421 InductionOpcode(Opcode) {}
3422
3424 VPValue *Step)
3426 IV, Step, IndDesc.getInductionOpcode(),
3427 dyn_cast_or_null<FPMathOperator>(IndDesc.getInductionBinOp())
3428 ? IndDesc.getInductionBinOp()->getFastMathFlags()
3429 : FastMathFlags()) {}
3430
3431 ~VPScalarIVStepsRecipe() override = default;
3432
3434 return new VPScalarIVStepsRecipe(
3435 getOperand(0), getOperand(1), InductionOpcode,
3437 }
3438
3439 VP_CLASSOF_IMPL(VPDef::VPScalarIVStepsSC)
3440
3441 /// Generate the scalarized versions of the phi node as needed by their users.
3442 void execute(VPTransformState &State) override;
3443
3444 /// Return the cost of this VPScalarIVStepsRecipe.
3446 VPCostContext &Ctx) const override {
3447 // TODO: Compute accurate cost after retiring the legacy cost model.
3448 return 0;
3449 }
3450
3451#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3452 /// Print the recipe.
3453 void print(raw_ostream &O, const Twine &Indent,
3454 VPSlotTracker &SlotTracker) const override;
3455#endif
3456
3457 VPValue *getStepValue() const { return getOperand(1); }
3458
3459 /// Returns true if the recipe only uses the first lane of operand \p Op.
3460 bool onlyFirstLaneUsed(const VPValue *Op) const override {
3462 "Op must be an operand of the recipe");
3463 return true;
3464 }
3465};
3466
3467/// VPBasicBlock serves as the leaf of the Hierarchical Control-Flow Graph. It
3468/// holds a sequence of zero or more VPRecipe's each representing a sequence of
3469/// output IR instructions. All PHI-like recipes must come before any non-PHI recipes.
3471public:
3473
3474protected:
3475 /// The VPRecipes held in the order of output instructions to generate.
3477
3478 VPBasicBlock(const unsigned char BlockSC, const Twine &Name = "")
3479 : VPBlockBase(BlockSC, Name.str()) {}
3480
3481public:
3482 VPBasicBlock(const Twine &Name = "", VPRecipeBase *Recipe = nullptr)
3483 : VPBlockBase(VPBasicBlockSC, Name.str()) {
3484 if (Recipe)
3485 appendRecipe(Recipe);
3486 }
3487
3488 ~VPBasicBlock() override {
3489 while (!Recipes.empty())
3490 Recipes.pop_back();
3491 }
3492
3493 /// Instruction iterators...
3498
3499 //===--------------------------------------------------------------------===//
3500 /// Recipe iterator methods
3501 ///
3502 inline iterator begin() { return Recipes.begin(); }
3503 inline const_iterator begin() const { return Recipes.begin(); }
3504 inline iterator end() { return Recipes.end(); }
3505 inline const_iterator end() const { return Recipes.end(); }
3506
3507 inline reverse_iterator rbegin() { return Recipes.rbegin(); }
3508 inline const_reverse_iterator rbegin() const { return Recipes.rbegin(); }
3509 inline reverse_iterator rend() { return Recipes.rend(); }
3510 inline const_reverse_iterator rend() const { return Recipes.rend(); }
3511
3512 inline size_t size() const { return Recipes.size(); }
3513 inline bool empty() const { return Recipes.empty(); }
3514 inline const VPRecipeBase &front() const { return Recipes.front(); }
3515 inline VPRecipeBase &front() { return Recipes.front(); }
3516 inline const VPRecipeBase &back() const { return Recipes.back(); }
3517 inline VPRecipeBase &back() { return Recipes.back(); }
3518
3519 /// Returns a reference to the list of recipes.
3521
3522 /// Returns a pointer to a member of the recipe list.
3524 return &VPBasicBlock::Recipes;
3525 }
3526
3527 /// Method to support type inquiry through isa, cast, and dyn_cast.
3528 static inline bool classof(const VPBlockBase *V) {
3529 return V->getVPBlockID() == VPBlockBase::VPBasicBlockSC ||
3530 V->getVPBlockID() == VPBlockBase::VPIRBasicBlockSC;
3531 }
3532
3533 void insert(VPRecipeBase *Recipe, iterator InsertPt) {
3534 assert(Recipe && "No recipe to append.");
3535 assert(!Recipe->Parent && "Recipe already in VPlan");
3536 Recipe->Parent = this;
3537 Recipes.insert(InsertPt, Recipe);
3538 }
3539
3540 /// Augment the existing recipes of a VPBasicBlock with an additional
3541 /// \p Recipe as the last recipe.
3542 void appendRecipe(VPRecipeBase *Recipe) { insert(Recipe, end()); }
3543
3544 /// The method which generates the output IR instructions that correspond to
3545 /// this VPBasicBlock, thereby "executing" the VPlan.
3546 void execute(VPTransformState *State) override;
3547
3548 /// Return the cost of this VPBasicBlock.
3550
3551 /// Return the position of the first non-phi node recipe in the block.
3553
3554 /// Returns an iterator range over the PHI-like recipes in the block.
3556 return make_range(begin(), getFirstNonPhi());
3557 }
3558
3559 void dropAllReferences(VPValue *NewValue) override;
3560
3561 /// Split current block at \p SplitAt by inserting a new block between the
3562 /// current block and its successors and moving all recipes starting at
3563 /// SplitAt to the new block. Returns the new block.
3564 VPBasicBlock *splitAt(iterator SplitAt);
3565
3568
3569#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3570 /// Print this VPBsicBlock to \p O, prefixing all lines with \p Indent. \p
3571 /// SlotTracker is used to print unnamed VPValue's using consequtive numbers.
3572 ///
3573 /// Note that the numbering is applied to the whole VPlan, so printing
3574 /// individual blocks is consistent with the whole VPlan printing.
3575 void print(raw_ostream &O, const Twine &Indent,
3576 VPSlotTracker &SlotTracker) const override;
3577 using VPBlockBase::print; // Get the print(raw_stream &O) version.
3578#endif
3579
3580 /// If the block has multiple successors, return the branch recipe terminating
3581 /// the block. If there are no or only a single successor, return nullptr;
3583 const VPRecipeBase *getTerminator() const;
3584
3585 /// Returns true if the block is exiting it's parent region.
3586 bool isExiting() const;
3587
3588 /// Clone the current block and it's recipes, without updating the operands of
3589 /// the cloned recipes.
3590 VPBasicBlock *clone() override {
3591 auto *NewBlock = new VPBasicBlock(getName());
3592 for (VPRecipeBase &R : *this)
3593 NewBlock->appendRecipe(R.clone());
3594 return NewBlock;
3595 }
3596
3597protected:
3598 /// Execute the recipes in the IR basic block \p BB.
3599 void executeRecipes(VPTransformState *State, BasicBlock *BB);
3600
3601 /// Connect the VPBBs predecessors' in the VPlan CFG to the IR basic block
3602 /// generated for this VPBB.
3604
3605private:
3606 /// Create an IR BasicBlock to hold the output instructions generated by this
3607 /// VPBasicBlock, and return it. Update the CFGState accordingly.
3608 BasicBlock *createEmptyBasicBlock(VPTransformState::CFGState &CFG);
3609};
3610
3611/// A special type of VPBasicBlock that wraps an existing IR basic block.
3612/// Recipes of the block get added before the first non-phi instruction in the
3613/// wrapped block.
3614/// Note: At the moment, VPIRBasicBlock can only be used to wrap VPlan's
3615/// preheader block.
3617 BasicBlock *IRBB;
3618
3619public:
3621 : VPBasicBlock(VPIRBasicBlockSC,
3622 (Twine("ir-bb<") + IRBB->getName() + Twine(">")).str()),
3623 IRBB(IRBB) {}
3624
3625 ~VPIRBasicBlock() override {}
3626
3627 static inline bool classof(const VPBlockBase *V) {
3628 return V->getVPBlockID() == VPBlockBase::VPIRBasicBlockSC;
3629 }
3630
3631 /// Create a VPIRBasicBlock from \p IRBB containing VPIRInstructions for all
3632 /// instructions in \p IRBB, except its terminator which is managed in VPlan.
3634
3635 /// The method which generates the output IR instructions that correspond to
3636 /// this VPBasicBlock, thereby "executing" the VPlan.
3637 void execute(VPTransformState *State) override;
3638
3639 VPIRBasicBlock *clone() override {
3640 auto *NewBlock = new VPIRBasicBlock(IRBB);
3641 for (VPRecipeBase &R : Recipes)
3642 NewBlock->appendRecipe(R.clone());
3643 return NewBlock;
3644 }
3645
3646 BasicBlock *getIRBasicBlock() const { return IRBB; }
3647};
3648
3649/// VPRegionBlock represents a collection of VPBasicBlocks and VPRegionBlocks
3650/// which form a Single-Entry-Single-Exiting subgraph of the output IR CFG.
3651/// A VPRegionBlock may indicate that its contents are to be replicated several
3652/// times. This is designed to support predicated scalarization, in which a
3653/// scalar if-then code structure needs to be generated VF * UF times. Having
3654/// this replication indicator helps to keep a single model for multiple
3655/// candidate VF's. The actual replication takes place only once the desired VF
3656/// and UF have been determined.
3658 /// Hold the Single Entry of the SESE region modelled by the VPRegionBlock.
3659 VPBlockBase *Entry;
3660
3661 /// Hold the Single Exiting block of the SESE region modelled by the
3662 /// VPRegionBlock.
3663 VPBlockBase *Exiting;
3664
3665 /// An indicator whether this region is to generate multiple replicated
3666 /// instances of output IR corresponding to its VPBlockBases.
3667 bool IsReplicator;
3668
3669public:
3671 const std::string &Name = "", bool IsReplicator = false)
3672 : VPBlockBase(VPRegionBlockSC, Name), Entry(Entry), Exiting(Exiting),
3673 IsReplicator(IsReplicator) {
3674 assert(Entry->getPredecessors().empty() && "Entry block has predecessors.");
3675 assert(Exiting->getSuccessors().empty() && "Exit block has successors.");
3676 Entry->setParent(this);
3677 Exiting->setParent(this);
3678 }
3679 VPRegionBlock(const std::string &Name = "", bool IsReplicator = false)
3680 : VPBlockBase(VPRegionBlockSC, Name), Entry(nullptr), Exiting(nullptr),
3681 IsReplicator(IsReplicator) {}
3682
3683 ~VPRegionBlock() override {
3684 if (Entry) {
3685 VPValue DummyValue;
3686 Entry->dropAllReferences(&DummyValue);
3687 deleteCFG(Entry);
3688 }
3689 }
3690
3691 /// Method to support type inquiry through isa, cast, and dyn_cast.
3692 static inline bool classof(const VPBlockBase *V) {
3693 return V->getVPBlockID() == VPBlockBase::VPRegionBlockSC;
3694 }
3695
3696 const VPBlockBase *getEntry() const { return Entry; }
3697 VPBlockBase *getEntry() { return Entry; }
3698
3699 /// Set \p EntryBlock as the entry VPBlockBase of this VPRegionBlock. \p
3700 /// EntryBlock must have no predecessors.
3701 void setEntry(VPBlockBase *EntryBlock) {
3702 assert(EntryBlock->getPredecessors().empty() &&
3703 "Entry block cannot have predecessors.");
3704 Entry = EntryBlock;
3705 EntryBlock->setParent(this);
3706 }
3707
3708 const VPBlockBase *getExiting() const { return Exiting; }
3709 VPBlockBase *getExiting() { return Exiting; }
3710
3711 /// Set \p ExitingBlock as the exiting VPBlockBase of this VPRegionBlock. \p
3712 /// ExitingBlock must have no successors.
3713 void setExiting(VPBlockBase *ExitingBlock) {
3714 assert(ExitingBlock->getSuccessors().empty() &&
3715 "Exit block cannot have successors.");
3716 Exiting = ExitingBlock;
3717 ExitingBlock->setParent(this);
3718 }
3719
3720 /// Returns the pre-header VPBasicBlock of the loop region.
3722 assert(!isReplicator() && "should only get pre-header of loop regions");
3724 }
3725
3726 /// An indicator whether this region is to generate multiple replicated
3727 /// instances of output IR corresponding to its VPBlockBases.
3728 bool isReplicator() const { return IsReplicator; }
3729
3730 /// The method which generates the output IR instructions that correspond to
3731 /// this VPRegionBlock, thereby "executing" the VPlan.
3732 void execute(VPTransformState *State) override;
3733
3734 // Return the cost of this region.
3736
3737 void dropAllReferences(VPValue *NewValue) override;
3738
3739#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3740 /// Print this VPRegionBlock to \p O (recursively), prefixing all lines with
3741 /// \p Indent. \p SlotTracker is used to print unnamed VPValue's using
3742 /// consequtive numbers.
3743 ///
3744 /// Note that the numbering is applied to the whole VPlan, so printing
3745 /// individual regions is consistent with the whole VPlan printing.
3746 void print(raw_ostream &O, const Twine &Indent,
3747 VPSlotTracker &SlotTracker) const override;
3748 using VPBlockBase::print; // Get the print(raw_stream &O) version.
3749#endif
3750
3751 /// Clone all blocks in the single-entry single-exit region of the block and
3752 /// their recipes without updating the operands of the cloned recipes.
3753 VPRegionBlock *clone() override;
3754};
3755
3756/// VPlan models a candidate for vectorization, encoding various decisions take
3757/// to produce efficient output IR, including which branches, basic-blocks and
3758/// output IR instructions to generate, and their cost. VPlan holds a
3759/// Hierarchical-CFG of VPBasicBlocks and VPRegionBlocks rooted at an Entry
3760/// VPBasicBlock.
3761class VPlan {
3762 friend class VPlanPrinter;
3763 friend class VPSlotTracker;
3764
3765 /// VPBasicBlock corresponding to the original preheader. Used to place
3766 /// VPExpandSCEV recipes for expressions used during skeleton creation and the
3767 /// rest of VPlan execution.
3768 /// When this VPlan is used for the epilogue vector loop, the entry will be
3769 /// replaced by a new entry block created during skeleton creation.
3770 VPBasicBlock *Entry;
3771
3772 /// VPIRBasicBlock wrapping the header of the original scalar loop.
3773 VPIRBasicBlock *ScalarHeader;
3774
3775 /// Holds the VFs applicable to this VPlan.
3777
3778 /// Holds the UFs applicable to this VPlan. If empty, the VPlan is valid for
3779 /// any UF.
3781
3782 /// Holds the name of the VPlan, for printing.
3783 std::string Name;
3784
3785 /// Represents the trip count of the original loop, for folding
3786 /// the tail.
3787 VPValue *TripCount = nullptr;
3788
3789 /// Represents the backedge taken count of the original loop, for folding
3790 /// the tail. It equals TripCount - 1.
3791 VPValue *BackedgeTakenCount = nullptr;
3792
3793 /// Represents the vector trip count.
3794 VPValue VectorTripCount;
3795
3796 /// Represents the vectorization factor of the loop.
3797 VPValue VF;
3798
3799 /// Represents the loop-invariant VF * UF of the vector loop region.
3800 VPValue VFxUF;
3801
3802 /// Holds a mapping between Values and their corresponding VPValue inside
3803 /// VPlan.
3804 Value2VPValueTy Value2VPValue;
3805
3806 /// Contains all the external definitions created for this VPlan. External
3807 /// definitions are VPValues that hold a pointer to their underlying IR.
3808 SmallVector<VPValue *, 16> VPLiveInsToFree;
3809
3810 /// Mapping from SCEVs to the VPValues representing their expansions.
3811 /// NOTE: This mapping is temporary and will be removed once all users have
3812 /// been modeled in VPlan directly.
3813 DenseMap<const SCEV *, VPValue *> SCEVToExpansion;
3814
3815 /// Construct a VPlan with \p Entry to the plan and with \p ScalarHeader
3816 /// wrapping the original header of the scalar loop.
3817 VPlan(VPBasicBlock *Entry, VPIRBasicBlock *ScalarHeader)
3818 : Entry(Entry), ScalarHeader(ScalarHeader) {
3819 Entry->setPlan(this);
3820 assert(ScalarHeader->getNumSuccessors() == 0 &&
3821 "scalar header must be a leaf node");
3822 }
3823
3824public:
3825 /// Construct a VPlan for \p L. This will create VPIRBasicBlocks wrapping the
3826 /// original preheader and scalar header of \p L, to be used as entry and
3827 /// scalar header blocks of the new VPlan.
3828 VPlan(Loop *L);
3829
3830 /// Construct a VPlan with a new VPBasicBlock as entry, a VPIRBasicBlock
3831 /// wrapping \p ScalarHeaderBB and a trip count of \p TC.
3832 VPlan(BasicBlock *ScalarHeaderBB, VPValue *TC) {
3833 setEntry(new VPBasicBlock("preheader"));
3834 ScalarHeader = VPIRBasicBlock::fromBasicBlock(ScalarHeaderBB);
3835 TripCount = TC;
3836 }
3837
3838 ~VPlan();
3839
3841 Entry = VPBB;
3842 VPBB->setPlan(this);
3843 }
3844
3845 /// Create initial VPlan, having an "entry" VPBasicBlock (wrapping
3846 /// original scalar pre-header) which contains SCEV expansions that need
3847 /// to happen before the CFG is modified (when executing a VPlan for the
3848 /// epilogue vector loop, the original entry needs to be replaced by a new
3849 /// one); a VPBasicBlock for the vector pre-header, followed by a region for
3850 /// the vector loop, followed by the middle VPBasicBlock. If a check is needed
3851 /// to guard executing the scalar epilogue loop, it will be added to the
3852 /// middle block, together with VPBasicBlocks for the scalar preheader and
3853 /// exit blocks. \p InductionTy is the type of the canonical induction and
3854 /// used for related values, like the trip count expression.
3855 static VPlanPtr createInitialVPlan(Type *InductionTy,
3857 bool RequiresScalarEpilogueCheck,
3858 bool TailFolded, Loop *TheLoop);
3859
3860 /// Prepare the plan for execution, setting up the required live-in values.
3861 void prepareToExecute(Value *TripCount, Value *VectorTripCount,
3862 VPTransformState &State);
3863
3864 /// Generate the IR code for this VPlan.
3865 void execute(VPTransformState *State);
3866
3867 /// Return the cost of this plan.
3869
3870 VPBasicBlock *getEntry() { return Entry; }
3871 const VPBasicBlock *getEntry() const { return Entry; }
3872
3873 /// Returns the preheader of the vector loop region.
3875 return cast<VPBasicBlock>(getVectorLoopRegion()->getSinglePredecessor());
3876 }
3877
3878 /// Returns the VPRegionBlock of the vector loop.
3880 const VPRegionBlock *getVectorLoopRegion() const;
3881
3882 /// Returns the 'middle' block of the plan, that is the block that selects
3883 /// whether to execute the scalar tail loop or the exit block from the loop
3884 /// latch.
3886 return cast<VPBasicBlock>(getScalarPreheader()->getPredecessors().front());
3887 }
3889 return cast<VPBasicBlock>(getScalarPreheader()->getPredecessors().front());
3890 }
3891
3892 /// Return the VPBasicBlock for the preheader of the scalar loop.
3894 return cast<VPBasicBlock>(getScalarHeader()->getSinglePredecessor());
3895 }
3896
3897 /// Return the VPIRBasicBlock wrapping the header of the scalar loop.
3898 VPIRBasicBlock *getScalarHeader() const { return ScalarHeader; }
3899
3900 /// Return an iterator range over the VPIRBasicBlock wrapping the exit blocks
3901 /// of the VPlan, that is leaf nodes except the scalar header. Defined in
3902 /// VPlanHCFG, as the definition of the type needs access to the definitions
3903 /// of VPBlockShallowTraversalWrapper.
3904 auto getExitBlocks();
3905
3906 /// The trip count of the original loop.
3908 assert(TripCount && "trip count needs to be set before accessing it");
3909 return TripCount;
3910 }
3911
3912 /// Resets the trip count for the VPlan. The caller must make sure all uses of
3913 /// the original trip count have been replaced.
3914 void resetTripCount(VPValue *NewTripCount) {
3915 assert(TripCount && NewTripCount && TripCount->getNumUsers() == 0 &&
3916 "TripCount always must be set");
3917 TripCount = NewTripCount;
3918 }
3919
3920 /// The backedge taken count of the original loop.
3922 if (!BackedgeTakenCount)
3923 BackedgeTakenCount = new VPValue();
3924 return BackedgeTakenCount;
3925 }
3926
3927 /// The vector trip count.
3928 VPValue &getVectorTripCount() { return VectorTripCount; }
3929
3930 /// Returns the VF of the vector loop region.
3931 VPValue &getVF() { return VF; };
3932
3933 /// Returns VF * UF of the vector loop region.
3934 VPValue &getVFxUF() { return VFxUF; }
3935
3936 void addVF(ElementCount VF) { VFs.insert(VF); }
3937
3939 assert(hasVF(VF) && "Cannot set VF not already in plan");
3940 VFs.clear();
3941 VFs.insert(VF);
3942 }
3943
3944 bool hasVF(ElementCount VF) { return VFs.count(VF); }
3946 return any_of(VFs, [](ElementCount VF) { return VF.isScalable(); });
3947 }
3948
3949 /// Returns an iterator range over all VFs of the plan.
3952 return {VFs.begin(), VFs.end()};
3953 }
3954
3955 bool hasScalarVFOnly() const { return VFs.size() == 1 && VFs[0].isScalar(); }
3956
3957 bool hasUF(unsigned UF) const { return UFs.empty() || UFs.contains(UF); }
3958
3959 unsigned getUF() const {
3960 assert(UFs.size() == 1 && "Expected a single UF");
3961 return UFs[0];
3962 }
3963
3964 void setUF(unsigned UF) {
3965 assert(hasUF(UF) && "Cannot set the UF not already in plan");
3966 UFs.clear();
3967 UFs.insert(UF);
3968 }
3969
3970 /// Return a string with the name of the plan and the applicable VFs and UFs.
3971 std::string getName() const;
3972
3973 void setName(const Twine &newName) { Name = newName.str(); }
3974
3975 /// Gets the live-in VPValue for \p V or adds a new live-in (if none exists
3976 /// yet) for \p V.
3978 assert(V && "Trying to get or add the VPValue of a null Value");
3979 if (!Value2VPValue.count(V)) {
3980 VPValue *VPV = new VPValue(V);
3981 VPLiveInsToFree.push_back(VPV);
3982 assert(VPV->isLiveIn() && "VPV must be a live-in.");
3983 assert(!Value2VPValue.count(V) && "Value already exists in VPlan");
3984 Value2VPValue[V] = VPV;
3985 }
3986
3987 assert(Value2VPValue.count(V) && "Value does not exist in VPlan");
3988 assert(Value2VPValue[V]->isLiveIn() &&
3989 "Only live-ins should be in mapping");
3990 return Value2VPValue[V];
3991 }
3992
3993 /// Return the live-in VPValue for \p V, if there is one or nullptr otherwise.
3994 VPValue *getLiveIn(Value *V) const { return Value2VPValue.lookup(V); }
3995
3996#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3997 /// Print the live-ins of this VPlan to \p O.
3998 void printLiveIns(raw_ostream &O) const;
3999
4000 /// Print this VPlan to \p O.
4001 void print(raw_ostream &O) const;
4002
4003 /// Print this VPlan in DOT format to \p O.
4004 void printDOT(raw_ostream &O) const;
4005
4006 /// Dump the plan to stderr (for debugging).
4007 LLVM_DUMP_METHOD void dump() const;
4008#endif
4009
4010 /// Returns the canonical induction recipe of the vector loop.
4013 if (EntryVPBB->empty()) {
4014 // VPlan native path.
4015 EntryVPBB = cast<VPBasicBlock>(EntryVPBB->getSingleSuccessor());
4016 }
4017 return cast<VPCanonicalIVPHIRecipe>(&*EntryVPBB->begin());
4018 }
4019
4020 VPValue *getSCEVExpansion(const SCEV *S) const {
4021 return SCEVToExpansion.lookup(S);
4022 }
4023
4024 void addSCEVExpansion(const SCEV *S, VPValue *V) {
4025 assert(!SCEVToExpansion.contains(S) && "SCEV already expanded");
4026 SCEVToExpansion[S] = V;
4027 }
4028
4029 /// Clone the current VPlan, update all VPValues of the new VPlan and cloned
4030 /// recipes to refer to the clones, and return it.
4031 VPlan *duplicate();
4032};
4033
4034#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
4035/// VPlanPrinter prints a given VPlan to a given output stream. The printing is
4036/// indented and follows the dot format.
4038 raw_ostream &OS;
4039 const VPlan &Plan;
4040 unsigned Depth = 0;
4041 unsigned TabWidth = 2;
4042 std::string Indent;
4043 unsigned BID = 0;
4045
4047
4048 /// Handle indentation.
4049 void bumpIndent(int b) { Indent = std::string((Depth += b) * TabWidth, ' '); }
4050
4051 /// Print a given \p Block of the Plan.
4052 void dumpBlock(const VPBlockBase *Block);
4053
4054 /// Print the information related to the CFG edges going out of a given
4055 /// \p Block, followed by printing the successor blocks themselves.
4056 void dumpEdges(const VPBlockBase *Block);
4057
4058 /// Print a given \p BasicBlock, including its VPRecipes, followed by printing
4059 /// its successor blocks.
4060 void dumpBasicBlock(const VPBasicBlock *BasicBlock);
4061
4062 /// Print a given \p Region of the Plan.
4063 void dumpRegion(const VPRegionBlock *Region);
4064
4065 unsigned getOrCreateBID(const VPBlockBase *Block) {
4066 return BlockID.count(Block) ? BlockID[Block] : BlockID[Block] = BID++;
4067 }
4068
4069 Twine getOrCreateName(const VPBlockBase *Block);
4070
4071 Twine getUID(const VPBlockBase *Block);
4072
4073 /// Print the information related to a CFG edge between two VPBlockBases.
4074 void drawEdge(const VPBlockBase *From, const VPBlockBase *To, bool Hidden,
4075 const Twine &Label);
4076
4077public:
4079 : OS(O), Plan(P), SlotTracker(&P) {}
4080
4081 LLVM_DUMP_METHOD void dump();
4082};
4083
4085 const Value *V;
4086
4087 VPlanIngredient(const Value *V) : V(V) {}
4088
4089 void print(raw_ostream &O) const;
4090};
4091
4093 I.print(OS);
4094 return OS;
4095}
4096
4098 Plan.print(OS);
4099 return OS;
4100}
4101#endif
4102
4103//===----------------------------------------------------------------------===//
4104// VPlan Utilities
4105//===----------------------------------------------------------------------===//
4106
4107/// Class that provides utilities for VPBlockBases in VPlan.
4109public:
4110 VPBlockUtils() = delete;
4111
4112 /// Insert disconnected VPBlockBase \p NewBlock after \p BlockPtr. Add \p
4113 /// NewBlock as successor of \p BlockPtr and \p BlockPtr as predecessor of \p
4114 /// NewBlock, and propagate \p BlockPtr parent to \p NewBlock. \p BlockPtr's
4115 /// successors are moved from \p BlockPtr to \p NewBlock. \p NewBlock must
4116 /// have neither successors nor predecessors.
4117 static void insertBlockAfter(VPBlockBase *NewBlock, VPBlockBase *BlockPtr) {
4118 assert(NewBlock->getSuccessors().empty() &&
4119 NewBlock->getPredecessors().empty() &&
4120 "Can't insert new block with predecessors or successors.");
4121 NewBlock->setParent(BlockPtr->getParent());
4122 SmallVector<VPBlockBase *> Succs(BlockPtr->successors());
4123 for (VPBlockBase *Succ : Succs) {
4124 disconnectBlocks(BlockPtr, Succ);
4125 connectBlocks(NewBlock, Succ);
4126 }
4127 connectBlocks(BlockPtr, NewBlock);
4128 }
4129
4130 /// Insert disconnected block \p NewBlock before \p Blockptr. First
4131 /// disconnects all predecessors of \p BlockPtr and connects them to \p
4132 /// NewBlock. Add \p NewBlock as predecessor of \p BlockPtr and \p BlockPtr as
4133 /// successor of \p NewBlock.
4134 static void insertBlockBefore(VPBlockBase *NewBlock, VPBlockBase *BlockPtr) {
4135 assert(NewBlock->getSuccessors().empty() &&
4136 NewBlock->getPredecessors().empty() &&
4137 "Can't insert new block with predecessors or successors.");
4138 NewBlock->setParent(BlockPtr->getParent());
4139 for (VPBlockBase *Pred : to_vector(BlockPtr->predecessors())) {
4140 disconnectBlocks(Pred, BlockPtr);
4141 connectBlocks(Pred, NewBlock);
4142 }
4143 connectBlocks(NewBlock, BlockPtr);
4144 }
4145
4146 /// Insert disconnected VPBlockBases \p IfTrue and \p IfFalse after \p
4147 /// BlockPtr. Add \p IfTrue and \p IfFalse as succesors of \p BlockPtr and \p
4148 /// BlockPtr as predecessor of \p IfTrue and \p IfFalse. Propagate \p BlockPtr
4149 /// parent to \p IfTrue and \p IfFalse. \p BlockPtr must have no successors
4150 /// and \p IfTrue and \p IfFalse must have neither successors nor
4151 /// predecessors.
4152 static void insertTwoBlocksAfter(VPBlockBase *IfTrue, VPBlockBase *IfFalse,
4153 VPBlockBase *BlockPtr) {
4154 assert(IfTrue->getSuccessors().empty() &&
4155 "Can't insert IfTrue with successors.");
4156 assert(IfFalse->getSuccessors().empty() &&
4157 "Can't insert IfFalse with successors.");
4158 BlockPtr->setTwoSuccessors(IfTrue, IfFalse);
4159 IfTrue->setPredecessors({BlockPtr});
4160 IfFalse->setPredecessors({BlockPtr});
4161 IfTrue->setParent(BlockPtr->getParent());
4162 IfFalse->setParent(BlockPtr->getParent());
4163 }
4164
4165 /// Connect VPBlockBases \p From and \p To bi-directionally. If \p PredIdx is
4166 /// -1, append \p From to the predecessors of \p To, otherwise set \p To's
4167 /// predecessor at \p PredIdx to \p From. If \p SuccIdx is -1, append \p To to
4168 /// the successors of \p From, otherwise set \p From's successor at \p SuccIdx
4169 /// to \p To. Both VPBlockBases must have the same parent, which can be null.
4170 /// Both VPBlockBases can be already connected to other VPBlockBases.
4172 unsigned PredIdx = -1u, unsigned SuccIdx = -1u) {
4173 assert((From->getParent() == To->getParent()) &&
4174 "Can't connect two block with different parents");
4175 assert((SuccIdx != -1u || From->getNumSuccessors() < 2) &&
4176 "Blocks can't have more than two successors.");
4177 if (SuccIdx == -1u)
4178 From->appendSuccessor(To);
4179 else
4180 From->getSuccessors()[SuccIdx] = To;
4181
4182 if (PredIdx == -1u)
4183 To->appendPredecessor(From);
4184 else
4185 To->getPredecessors()[PredIdx] = From;
4186 }
4187
4188 /// Disconnect VPBlockBases \p From and \p To bi-directionally. Remove \p To
4189 /// from the successors of \p From and \p From from the predecessors of \p To.
4191 assert(To && "Successor to disconnect is null.");
4192 From->removeSuccessor(To);
4193 To->removePredecessor(From);
4194 }
4195
4196 /// Reassociate all the blocks connected to \p Old so that they now point to
4197 /// \p New.
4199 for (auto *Pred : to_vector(Old->getPredecessors()))
4200 Pred->replaceSuccessor(Old, New);
4201 for (auto *Succ : to_vector(Old->getSuccessors()))
4202 Succ->replacePredecessor(Old, New);
4203 New->setPredecessors(Old->getPredecessors());
4204 New->setSuccessors(Old->getSuccessors());
4205 Old->clearPredecessors();
4206 Old->clearSuccessors();
4207 }
4208
4209 /// Return an iterator range over \p Range which only includes \p BlockTy
4210 /// blocks. The accesses are casted to \p BlockTy.
4211 template <typename BlockTy, typename T>
4212 static auto blocksOnly(const T &Range) {
4213 // Create BaseTy with correct const-ness based on BlockTy.
4214 using BaseTy = std::conditional_t<std::is_const<BlockTy>::value,
4215 const VPBlockBase, VPBlockBase>;
4216
4217 // We need to first create an iterator range over (const) BlocktTy & instead
4218 // of (const) BlockTy * for filter_range to work properly.
4219 auto Mapped =
4220 map_range(Range, [](BaseTy *Block) -> BaseTy & { return *Block; });
4222 Mapped, [](BaseTy &Block) { return isa<BlockTy>(&Block); });
4223 return map_range(Filter, [](BaseTy &Block) -> BlockTy * {
4224 return cast<BlockTy>(&Block);
4225 });
4226 }
4227
4228 /// Inserts \p BlockPtr on the edge between \p From and \p To. That is, update
4229 /// \p From's successor to \p To to point to \p BlockPtr and \p To's
4230 /// predecessor from \p From to \p BlockPtr. \p From and \p To are added to \p
4231 /// BlockPtr's predecessors and successors respectively. There must be a
4232 /// single edge between \p From and \p To.
4234 VPBlockBase *BlockPtr) {
4235 auto &Successors = From->getSuccessors();
4236 auto &Predecessors = To->getPredecessors();
4237 assert(count(Successors, To) == 1 && count(Predecessors, From) == 1 &&
4238 "must have single between From and To");
4239 unsigned SuccIdx = std::distance(Successors.begin(), find(Successors, To));
4240 unsigned PredIx =
4241 std::distance(Predecessors.begin(), find(Predecessors, From));
4242 VPBlockUtils::connectBlocks(From, BlockPtr, -1, SuccIdx);
4243 VPBlockUtils::connectBlocks(BlockPtr, To, PredIx, -1);
4244 }
4245};
4246
4249 InterleaveGroupMap;
4250
4251 /// Type for mapping of instruction based interleave groups to VPInstruction
4252 /// interleave groups
4255
4256 /// Recursively \p Region and populate VPlan based interleave groups based on
4257 /// \p IAI.
4258 void visitRegion(VPRegionBlock *Region, Old2NewTy &Old2New,
4260 /// Recursively traverse \p Block and populate VPlan based interleave groups
4261 /// based on \p IAI.
4262 void visitBlock(VPBlockBase *Block, Old2NewTy &Old2New,
4264
4265public:
4267
4270 // Avoid releasing a pointer twice.
4271 for (auto &I : InterleaveGroupMap)
4272 DelSet.insert(I.second);
4273 for (auto *Ptr : DelSet)
4274 delete Ptr;
4275 }
4276
4277 /// Get the interleave group that \p Instr belongs to.
4278 ///
4279 /// \returns nullptr if doesn't have such group.
4282 return InterleaveGroupMap.lookup(Instr);
4283 }
4284};
4285
4286/// Class that maps (parts of) an existing VPlan to trees of combined
4287/// VPInstructions.
4289 enum class OpMode { Failed, Load, Opcode };
4290
4291 /// A DenseMapInfo implementation for using SmallVector<VPValue *, 4> as
4292 /// DenseMap keys.
4293 struct BundleDenseMapInfo {
4294 static SmallVector<VPValue *, 4> getEmptyKey() {
4295 return {reinterpret_cast<VPValue *>(-1)};
4296 }
4297
4298 static SmallVector<VPValue *, 4> getTombstoneKey() {
4299 return {reinterpret_cast<VPValue *>(-2)};
4300 }
4301
4302 static unsigned getHashValue(const SmallVector<VPValue *, 4> &V) {
4303 return static_cast<unsigned>(hash_combine_range(V.begin(), V.end()));
4304 }
4305
4306 static bool isEqual(const SmallVector<VPValue *, 4> &LHS,
4308 return LHS == RHS;
4309 }
4310 };
4311
4312 /// Mapping of values in the original VPlan to a combined VPInstruction.
4314 BundleToCombined;
4315
4317
4318 /// Basic block to operate on. For now, only instructions in a single BB are
4319 /// considered.
4320 const VPBasicBlock &BB;
4321
4322 /// Indicates whether we managed to combine all visited instructions or not.
4323 bool CompletelySLP = true;
4324
4325 /// Width of the widest combined bundle in bits.
4326 unsigned WidestBundleBits = 0;
4327
4328 using MultiNodeOpTy =
4329 typename std::pair<VPInstruction *, SmallVector<VPValue *, 4>>;
4330
4331 // Input operand bundles for the current multi node. Each multi node operand
4332 // bundle contains values not matching the multi node's opcode. They will
4333 // be reordered in reorderMultiNodeOps, once we completed building a
4334 // multi node.
4335 SmallVector<MultiNodeOpTy, 4> MultiNodeOps;
4336
4337 /// Indicates whether we are building a multi node currently.
4338 bool MultiNodeActive = false;
4339
4340 /// Check if we can vectorize Operands together.
4341 bool areVectorizable(ArrayRef<VPValue *> Operands) const;
4342
4343 /// Add combined instruction \p New for the bundle \p Operands.
4344 void addCombined(ArrayRef<VPValue *> Operands, VPInstruction *New);
4345
4346 /// Indicate we hit a bundle we failed to combine. Returns nullptr for now.
4347 VPInstruction *markFailed();
4348
4349 /// Reorder operands in the multi node to maximize sequential memory access
4350 /// and commutative operations.
4351 SmallVector<MultiNodeOpTy, 4> reorderMultiNodeOps();
4352
4353 /// Choose the best candidate to use for the lane after \p Last. The set of
4354 /// candidates to choose from are values with an opcode matching \p Last's
4355 /// or loads consecutive to \p Last.
4356 std::pair<OpMode, VPValue *> getBest(OpMode Mode, VPValue *Last,
4357 SmallPtrSetImpl<VPValue *> &Candidates,
4359
4360#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
4361 /// Print bundle \p Values to dbgs().
4362 void dumpBundle(ArrayRef<VPValue *> Values);
4363#endif
4364
4365public:
4366 VPlanSlp(VPInterleavedAccessInfo &IAI, VPBasicBlock &BB) : IAI(IAI), BB(BB) {}
4367
4368 ~VPlanSlp() = default;
4369
4370 /// Tries to build an SLP tree rooted at \p Operands and returns a
4371 /// VPInstruction combining \p Operands, if they can be combined.
4373
4374 /// Return the width of the widest combined bundle in bits.
4375 unsigned getWidestBundleBits() const { return WidestBundleBits; }
4376
4377 /// Return true if all visited instruction can be combined.
4378 bool isCompletelySLP() const { return CompletelySLP; }
4379};
4380} // end namespace llvm
4381
4382#endif // LLVM_TRANSFORMS_VECTORIZE_VPLAN_H
aarch64 promote const
static MCDisassembler::DecodeStatus addOperand(MCInst &Inst, const MCOperand &Opnd)
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
always inline
BlockVerifier::State From
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")
RelocType Type
Definition: COFFYAML.cpp:410
#define LLVM_DUMP_METHOD
Mark debug helper function definitions like dump() that should not be stripped from debug builds.
Definition: Compiler.h:622
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
This file defines the DenseMap class.
uint64_t Addr
std::string Name
std::optional< std::vector< StOtherPiece > > Other
Definition: ELFYAML.cpp:1315
Flatten the CFG
Hexagon Common GEP
std::pair< BasicBlock *, unsigned > BlockTy
A pair of (basic block, score).
This file defines an InstructionCost class that is used when calculating the cost of an instruction,...
#define I(x, y, z)
Definition: MD5.cpp:58
mir Rename Register Operands
ConstantRange Range(APInt(BitWidth, Low), APInt(BitWidth, High))
#define P(N)
static cl::opt< RegAllocEvictionAdvisorAnalysis::AdvisorMode > Mode("regalloc-enable-advisor", cl::Hidden, cl::init(RegAllocEvictionAdvisorAnalysis::AdvisorMode::Default), cl::desc("Enable regalloc advisor mode"), cl::values(clEnumValN(RegAllocEvictionAdvisorAnalysis::AdvisorMode::Default, "default", "Default"), clEnumValN(RegAllocEvictionAdvisorAnalysis::AdvisorMode::Release, "release", "precompiled"), clEnumValN(RegAllocEvictionAdvisorAnalysis::AdvisorMode::Development, "development", "for training")))
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
raw_pwrite_stream & OS
This file implements the SmallBitVector class.
This file defines the SmallPtrSet class.
This file defines the SmallVector class.
This pass exposes codegen information to IR-level passes.
This file contains the declarations of the entities induced by Vectorization Plans,...
#define VP_CLASSOF_IMPL(VPDefID)
Definition: VPlan.h:825
Value * RHS
Value * LHS
static const uint32_t IV[8]
Definition: blake3_impl.h:78
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: ArrayRef.h:41
ArrayRef< T > slice(size_t N, size_t M) const
slice(n, m) - Chop off the first N elements of the array, and keep M elements in the array.
Definition: ArrayRef.h:198
LLVM Basic Block Representation.
Definition: BasicBlock.h:61
This class represents a function call, abstracting a target machine's calling convention.
This is the base class for all instructions that perform data casts.
Definition: InstrTypes.h:444
Instruction::CastOps getOpcode() const
Return the opcode of this CastInst.
Definition: InstrTypes.h:608
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
Definition: InstrTypes.h:673
An abstraction over a floating-point predicate, and a pack of an integer predicate with samesign info...
Definition: CmpPredicate.h:22
This class represents an Operation in the Expression.
A debug info location.
Definition: DebugLoc.h:33
ValueT lookup(const_arg_type_t< KeyT > Val) const
lookup - Return the entry for the specified key, or a default constructed value if no such entry exis...
Definition: DenseMap.h:194
size_type count(const_arg_type_t< KeyT > Val) const
Return 1 if the specified key is in the map, 0 otherwise.
Definition: DenseMap.h:152
bool contains(const_arg_type_t< KeyT > Val) const
Return true if the specified key is in the map, false otherwise.
Definition: DenseMap.h:147
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
Definition: Dominators.h:162
constexpr bool isScalar() const
Exactly one element.
Definition: TypeSize.h:322
Utility class for floating point operations which can have information about relaxed accuracy require...
Definition: Operator.h:205
Convenience struct for specifying and reasoning about fast-math flags.
Definition: FMF.h:20
Represents flags for the getelementptr instruction/expression.
static GEPNoWrapFlags none()
an instruction for type-safe pointer arithmetic to access elements of arrays and structs
Definition: Instructions.h:933
Common base class shared among various IRBuilders.
Definition: IRBuilder.h:91
A struct for saving information about induction variables.
InductionKind
This enum represents the kinds of inductions that we support.
InnerLoopVectorizer vectorizes loops which contain only one basic block to a specified vectorization ...
bool mayWriteToMemory() const LLVM_READONLY
Return true if this instruction may modify memory.
bool mayHaveSideEffects() const LLVM_READONLY
Return true if the instruction may have side effects.
bool mayReadFromMemory() const LLVM_READONLY
Return true if this instruction may read memory.
unsigned getOpcode() const
Returns a member of one of the enums like Instruction::Add.
Definition: Instruction.h:274
The group of interleaved loads/stores sharing the same stride and close to each other.
Definition: VectorUtils.h:480
uint32_t getFactor() const
Definition: VectorUtils.h:496
InstTy * getMember(uint32_t Index) const
Get the member with the given index Index.
Definition: VectorUtils.h:550
InstTy * getInsertPos() const
Definition: VectorUtils.h:566
Drive the analysis of interleaved memory accesses in the loop.
Definition: VectorUtils.h:622
This is an important class for using LLVM in a threaded context.
Definition: LLVMContext.h:67
An instruction for reading from memory.
Definition: Instructions.h:176
LoopVectorizationCostModel - estimates the expected speedups due to vectorization.
This class emits a version of the loop where run-time checks ensure that may-alias pointers can't ove...
Represents a single loop in the control flow graph.
Definition: LoopInfo.h:39
bool onlyWritesMemory() const
Whether this function only (at most) writes memory.
Definition: ModRef.h:198
bool onlyReadsMemory() const
Whether this function only (at most) reads memory.
Definition: ModRef.h:195
An interface layer with SCEV used to manage how we see SCEV expressions for values in the context of ...
The RecurrenceDescriptor is used to identify recurrences variables in a loop.
Definition: IVDescriptors.h:77
This class represents an analyzed expression in the program.
The main scalar evolution driver.
This class represents the LLVM 'select' instruction.
size_type size() const
Determine the number of elements in the SetVector.
Definition: SetVector.h:98
iterator end()
Get an iterator to the end of the SetVector.
Definition: SetVector.h:113
void clear()
Completely clear the SetVector.
Definition: SetVector.h:273
size_type count(const key_type &key) const
Count the number of elements of a given key in the SetVector.
Definition: SetVector.h:264
bool empty() const
Determine if the SetVector is empty or not.
Definition: SetVector.h:93
iterator begin()
Get an iterator to the beginning of the SetVector.
Definition: SetVector.h:103
bool insert(const value_type &X)
Insert a new element into the SetVector.
Definition: SetVector.h:162
bool contains(const key_type &key) const
Check if the SetVector contains the given key.
Definition: SetVector.h:254
This class provides computation of slot numbers for LLVM Assembly writing.
Definition: AsmWriter.cpp:698
A templated base class for SmallPtrSet which provides the typesafe interface that is common across al...
Definition: SmallPtrSet.h:363
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
Definition: SmallPtrSet.h:384
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
Definition: SmallPtrSet.h:519
A SetVector that performs no allocations if smaller than a certain size.
Definition: SetVector.h:370
bool empty() const
Definition: SmallVector.h:81
size_t size() const
Definition: SmallVector.h:78
iterator erase(const_iterator CI)
Definition: SmallVector.h:737
void push_back(const T &Elt)
Definition: SmallVector.h:413
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1196
An instruction for storing to memory.
Definition: Instructions.h:292
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:51
Provides information about what library functions are available for the current target.
This pass provides access to the codegen interfaces that are needed for IR-level transformations.
This class represents a truncation of integer types.
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition: Twine.h:81
std::string str() const
Return the twine contents as a std::string.
Definition: Twine.cpp:17
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
LLVMContext & getContext() const
Return the LLVMContext in which this type was uniqued.
Definition: Type.h:128
Iterator to iterate over vectorization factors in a VFRange.
Definition: VPlan.h:121
ElementCount operator*() const
Definition: VPlan.h:129
iterator & operator++()
Definition: VPlan.h:131
iterator(ElementCount VF)
Definition: VPlan.h:125
bool operator==(const iterator &Other) const
Definition: VPlan.h:127
A recipe for generating the active lane mask for the vector loop that is used to predicate the vector...
Definition: VPlan.h:3228
void execute(VPTransformState &State) override
Generate the active lane mask phi of the vector loop.
VPActiveLaneMaskPHIRecipe * clone() override
Clone the current recipe.
Definition: VPlan.h:3236
static bool classof(const VPHeaderPHIRecipe *D)
Definition: VPlan.h:3245
VPActiveLaneMaskPHIRecipe(VPValue *StartMask, DebugLoc DL)
Definition: VPlan.h:3230
~VPActiveLaneMaskPHIRecipe() override=default
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
VPBasicBlock serves as the leaf of the Hierarchical Control-Flow Graph.
Definition: VPlan.h:3470
RecipeListTy::const_iterator const_iterator
Definition: VPlan.h:3495
void appendRecipe(VPRecipeBase *Recipe)
Augment the existing recipes of a VPBasicBlock with an additional Recipe as the last recipe.
Definition: VPlan.h:3542
VPBasicBlock * clone() override
Clone the current block and it's recipes, without updating the operands of the cloned recipes.
Definition: VPlan.h:3590
RecipeListTy::const_reverse_iterator const_reverse_iterator
Definition: VPlan.h:3497
RecipeListTy::iterator iterator
Instruction iterators...
Definition: VPlan.h:3494
void connectToPredecessors(VPTransformState::CFGState &CFG)
Connect the VPBBs predecessors' in the VPlan CFG to the IR basic block generated for this VPBB.
Definition: VPlan.cpp:419
void execute(VPTransformState *State) override
The method which generates the output IR instructions that correspond to this VPBasicBlock,...
Definition: VPlan.cpp:477
RecipeListTy & getRecipeList()
Returns a reference to the list of recipes.
Definition: VPlan.h:3520
VPBasicBlock(const unsigned char BlockSC, const Twine &Name="")
Definition: VPlan.h:3478
iterator end()
Definition: VPlan.h:3504
VPBasicBlock(const Twine &Name="", VPRecipeBase *Recipe=nullptr)
Definition: VPlan.h:3482
iterator begin()
Recipe iterator methods.
Definition: VPlan.h:3502
RecipeListTy::reverse_iterator reverse_iterator
Definition: VPlan.h:3496
iterator_range< iterator > phis()
Returns an iterator range over the PHI-like recipes in the block.
Definition: VPlan.h:3555
InstructionCost cost(ElementCount VF, VPCostContext &Ctx) override
Return the cost of this VPBasicBlock.
Definition: VPlan.cpp:765
iterator getFirstNonPhi()
Return the position of the first non-phi node recipe in the block.
Definition: VPlan.cpp:213
~VPBasicBlock() override
Definition: VPlan.h:3488
VPRegionBlock * getEnclosingLoopRegion()
Definition: VPlan.cpp:566
void dropAllReferences(VPValue *NewValue) override
Replace all operands of VPUsers in the block with NewValue and also replaces all uses of VPValues def...
Definition: VPlan.cpp:516
const_reverse_iterator rbegin() const
Definition: VPlan.h:3508
reverse_iterator rend()
Definition: VPlan.h:3509
VPBasicBlock * splitAt(iterator SplitAt)
Split current block at SplitAt by inserting a new block between the current block and its successors ...
Definition: VPlan.cpp:538
RecipeListTy Recipes
The VPRecipes held in the order of output instructions to generate.
Definition: VPlan.h:3476
void executeRecipes(VPTransformState *State, BasicBlock *BB)
Execute the recipes in the IR basic block BB.
Definition: VPlan.cpp:526
VPRecipeBase & back()
Definition: VPlan.h:3517
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print this VPBsicBlock to O, prefixing all lines with Indent.
Definition: VPlan.cpp:631
const VPRecipeBase & front() const
Definition: VPlan.h:3514
const_iterator begin() const
Definition: VPlan.h:3503
VPRecipeBase & front()
Definition: VPlan.h:3515
bool isExiting() const
Returns true if the block is exiting it's parent region.
Definition: VPlan.cpp:614
VPRecipeBase * getTerminator()
If the block has multiple successors, return the branch recipe terminating the block.
Definition: VPlan.cpp:602
const VPRecipeBase & back() const
Definition: VPlan.h:3516
void insert(VPRecipeBase *Recipe, iterator InsertPt)
Definition: VPlan.h:3533
bool empty() const
Definition: VPlan.h:3513
const_iterator end() const
Definition: VPlan.h:3505
static bool classof(const VPBlockBase *V)
Method to support type inquiry through isa, cast, and dyn_cast.
Definition: VPlan.h:3528
static RecipeListTy VPBasicBlock::* getSublistAccess(VPRecipeBase *)
Returns a pointer to a member of the recipe list.
Definition: VPlan.h:3523
reverse_iterator rbegin()
Definition: VPlan.h:3507
size_t size() const
Definition: VPlan.h:3512
const_reverse_iterator rend() const
Definition: VPlan.h:3510
A recipe for vectorizing a phi-node as a sequence of mask-based select instructions.
Definition: VPlan.h:2428
VPBlendRecipe(PHINode *Phi, ArrayRef< VPValue * > Operands)
The blend operation is a User of the incoming values and of their respective masks,...
Definition: VPlan.h:2434
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPWidenMemoryRecipe.
bool onlyFirstLaneUsed(const VPValue *Op) const override
Returns true if the recipe only uses the first lane of operand Op.
Definition: VPlan.h:2481
VPValue * getIncomingValue(unsigned Idx) const
Return incoming value number Idx.
Definition: VPlan.h:2457
VPValue * getMask(unsigned Idx) const
Return mask number Idx.
Definition: VPlan.h:2462
unsigned getNumIncomingValues() const
Return the number of incoming values, taking into account when normalized the first incoming value wi...
Definition: VPlan.h:2452
VPBlendRecipe * clone() override
Clone the current recipe.
Definition: VPlan.h:2439
void execute(VPTransformState &State) override
Generate the phi/select nodes.
bool isNormalized() const
A normalized blend is one that has an odd number of operands, whereby the first operand does not have...
Definition: VPlan.h:2448
VPBlockBase is the building block of the Hierarchical Control-Flow Graph.
Definition: VPlan.h:396
void setSuccessors(ArrayRef< VPBlockBase * > NewSuccs)
Set each VPBasicBlock in NewSuccss as successor of this VPBlockBase.
Definition: VPlan.h:612
VPRegionBlock * getParent()
Definition: VPlan.h:488
VPBlocksTy & getPredecessors()
Definition: VPlan.h:520
iterator_range< VPBlockBase ** > predecessors()
Definition: VPlan.h:517
const VPBasicBlock * getExitingBasicBlock() const
Definition: VPlan.cpp:178
LLVM_DUMP_METHOD void dump() const
Dump this VPBlockBase to dbgs().
Definition: VPlan.h:678
void setName(const Twine &newName)
Definition: VPlan.h:481
size_t getNumSuccessors() const
Definition: VPlan.h:534
iterator_range< VPBlockBase ** > successors()
Definition: VPlan.h:516
virtual void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const =0
Print plain-text dump of this VPBlockBase to O, prefixing all lines with Indent.
void swapSuccessors()
Swap successors of the block. The block must have exactly 2 successors.
Definition: VPlan.h:627
void printSuccessors(raw_ostream &O, const Twine &Indent) const
Print the successors of this block to O, prefixing all lines with Indent.
Definition: VPlan.cpp:619
bool isLegalToHoistInto()
Return true if it is legal to hoist instructions into this block.
Definition: VPlan.h:643
virtual ~VPBlockBase()=default
void print(raw_ostream &O) const
Print plain-text dump of this VPlan to O.
Definition: VPlan.h:668
const VPBlocksTy & getHierarchicalPredecessors()
Definition: VPlan.h:570
size_t getNumPredecessors() const
Definition: VPlan.h:535
void setPredecessors(ArrayRef< VPBlockBase * > NewPreds)
Set each VPBasicBlock in NewPreds as predecessor of this VPBlockBase.
Definition: VPlan.h:603
VPBlockBase * getEnclosingBlockWithPredecessors()
Definition: VPlan.cpp:200
const VPBlocksTy & getPredecessors() const
Definition: VPlan.h:519
virtual VPBlockBase * clone()=0
Clone the current block and it's recipes without updating the operands of the cloned recipes,...
static void deleteCFG(VPBlockBase *Entry)
Delete all blocks reachable from a given VPBlockBase, inclusive.
Definition: VPlan.cpp:208
enum { VPRegionBlockSC, VPBasicBlockSC, VPIRBasicBlockSC } VPBlockTy
An enumeration for keeping track of the concrete subclass of VPBlockBase that are actually instantiat...
Definition: VPlan.h:473
virtual InstructionCost cost(ElementCount VF, VPCostContext &Ctx)=0
Return the cost of the block.
VPlan * getPlan()
Definition: VPlan.cpp:153
void setPlan(VPlan *ParentPlan)
Sets the pointer of the plan containing the block.
Definition: VPlan.cpp:172
const VPRegionBlock * getParent() const
Definition: VPlan.h:489
const std::string & getName() const
Definition: VPlan.h:479
void clearSuccessors()
Remove all the successors of this block.
Definition: VPlan.h:622
VPBlockBase * getSingleHierarchicalSuccessor()
Definition: VPlan.h:560
void setTwoSuccessors(VPBlockBase *IfTrue, VPBlockBase *IfFalse)
Set two given VPBlockBases IfTrue and IfFalse to be the two successors of this VPBlockBase.
Definition: VPlan.h:594
VPBlockBase * getSinglePredecessor() const
Definition: VPlan.h:530
virtual void execute(VPTransformState *State)=0
The method which generates the output IR that correspond to this VPBlockBase, thereby "executing" the...
const VPBlocksTy & getHierarchicalSuccessors()
Definition: VPlan.h:554
void clearPredecessors()
Remove all the predecessor of this block.
Definition: VPlan.h:619
unsigned getVPBlockID() const
Definition: VPlan.h:486
void printAsOperand(raw_ostream &OS, bool PrintType=false) const
Definition: VPlan.h:654
VPBlockBase(const unsigned char SC, const std::string &N)
Definition: VPlan.h:465
VPBlocksTy & getSuccessors()
Definition: VPlan.h:514
VPBlockBase * getEnclosingBlockWithSuccessors()
An Enclosing Block of a block B is any block containing B, including B itself.
Definition: VPlan.cpp:192
const VPBasicBlock * getEntryBasicBlock() const
Definition: VPlan.cpp:158
void setOneSuccessor(VPBlockBase *Successor)
Set a given VPBlockBase Successor as the single successor of this VPBlockBase.
Definition: VPlan.h:583
void setParent(VPRegionBlock *P)
Definition: VPlan.h:499
virtual void dropAllReferences(VPValue *NewValue)=0
Replace all operands of VPUsers in the block with NewValue and also replaces all uses of VPValues def...
VPBlockBase * getSingleHierarchicalPredecessor()
Definition: VPlan.h:576
VPBlockBase * getSingleSuccessor() const
Definition: VPlan.h:524
const VPBlocksTy & getSuccessors() const
Definition: VPlan.h:513
Class that provides utilities for VPBlockBases in VPlan.
Definition: VPlan.h:4108
static auto blocksOnly(const T &Range)
Return an iterator range over Range which only includes BlockTy blocks.
Definition: VPlan.h:4212
static void insertBlockAfter(VPBlockBase *NewBlock, VPBlockBase *BlockPtr)
Insert disconnected VPBlockBase NewBlock after BlockPtr.
Definition: VPlan.h:4117
static void insertOnEdge(VPBlockBase *From, VPBlockBase *To, VPBlockBase *BlockPtr)
Inserts BlockPtr on the edge between From and To.
Definition: VPlan.h:4233
static void insertTwoBlocksAfter(VPBlockBase *IfTrue, VPBlockBase *IfFalse, VPBlockBase *BlockPtr)
Insert disconnected VPBlockBases IfTrue and IfFalse after BlockPtr.
Definition: VPlan.h:4152
static void connectBlocks(VPBlockBase *From, VPBlockBase *To, unsigned PredIdx=-1u, unsigned SuccIdx=-1u)
Connect VPBlockBases From and To bi-directionally.
Definition: VPlan.h:4171
static void disconnectBlocks(VPBlockBase *From, VPBlockBase *To)
Disconnect VPBlockBases From and To bi-directionally.
Definition: VPlan.h:4190
static void reassociateBlocks(VPBlockBase *Old, VPBlockBase *New)
Reassociate all the blocks connected to Old so that they now point to New.
Definition: VPlan.h:4198
static void insertBlockBefore(VPBlockBase *NewBlock, VPBlockBase *BlockPtr)
Insert disconnected block NewBlock before Blockptr.
Definition: VPlan.h:4134
A recipe for generating conditional branches on the bits of a mask.
Definition: VPlan.h:2788
VPValue * getMask() const
Return the mask used by this recipe.
Definition: VPlan.h:2824
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPBranchOnMaskRecipe.
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
Definition: VPlan.h:2812
VPBranchOnMaskRecipe(VPValue *BlockInMask)
Definition: VPlan.h:2790
VPBranchOnMaskRecipe * clone() override
Clone the current recipe.
Definition: VPlan.h:2796
bool usesScalars(const VPValue *Op) const override
Returns true if the recipe uses scalars of operand Op.
Definition: VPlan.h:2831
void execute(VPTransformState &State) override
Generate the extraction of the appropriate bit from the block mask and the conditional branch.
Canonical scalar induction phi of the vector loop.
Definition: VPlan.h:3167
bool onlyFirstPartUsed(const VPValue *Op) const override
Returns true if the recipe only uses the first part of operand Op.
Definition: VPlan.h:3210
~VPCanonicalIVPHIRecipe() override=default
static bool classof(const VPHeaderPHIRecipe *D)
Definition: VPlan.h:3182
VPCanonicalIVPHIRecipe * clone() override
Clone the current recipe.
Definition: VPlan.h:3174
VPCanonicalIVPHIRecipe(VPValue *StartV, DebugLoc DL)
Definition: VPlan.h:3169
bool onlyFirstLaneUsed(const VPValue *Op) const override
Returns true if the recipe only uses the first lane of operand Op.
Definition: VPlan.h:3203
Type * getScalarType() const
Returns the scalar type of the induction.
Definition: VPlan.h:3198
void execute(VPTransformState &State) override
Generate the phi nodes.
Definition: VPlan.h:3186
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPCanonicalIVPHIRecipe.
Definition: VPlan.h:3217
This class augments a recipe with a set of VPValues defined by the recipe.
Definition: VPlanValue.h:292
unsigned getVPDefID() const
Definition: VPlanValue.h:419
A recipe for converting the input value IV value to the corresponding value of an IV with different s...
Definition: VPlan.h:3344
void execute(VPTransformState &State) override
Generate the transformed value of the induction at offset StartValue (1.
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPDerivedIVRecipe.
Definition: VPlan.h:3383
VPValue * getStepValue() const
Definition: VPlan.h:3400
Type * getScalarType() const
Definition: VPlan.h:3395
VPDerivedIVRecipe * clone() override
Clone the current recipe.
Definition: VPlan.h:3371
VPDerivedIVRecipe(InductionDescriptor::InductionKind Kind, const FPMathOperator *FPBinOp, VPValue *Start, VPValue *IV, VPValue *Step, const Twine &Name="")
Definition: VPlan.h:3363
~VPDerivedIVRecipe() override=default
bool onlyFirstLaneUsed(const VPValue *Op) const override
Returns true if the recipe only uses the first lane of operand Op.
Definition: VPlan.h:3403
VPValue * getStartValue() const
Definition: VPlan.h:3399
VPDerivedIVRecipe(const InductionDescriptor &IndDesc, VPValue *Start, VPCanonicalIVPHIRecipe *CanonicalIV, VPValue *Step, const Twine &Name="")
Definition: VPlan.h:3355
A recipe for generating the phi node for the current index of elements, adjusted in accordance with E...
Definition: VPlan.h:3263
static bool classof(const VPHeaderPHIRecipe *D)
Definition: VPlan.h:3276
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
VPEVLBasedIVPHIRecipe * clone() override
Clone the current recipe.
Definition: VPlan.h:3270
~VPEVLBasedIVPHIRecipe() override=default
void execute(VPTransformState &State) override
Generate the phi nodes.
Definition: VPlan.h:3280
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPEVLBasedIVPHIRecipe.
Definition: VPlan.h:3286
VPEVLBasedIVPHIRecipe(VPValue *StartIV, DebugLoc DL)
Definition: VPlan.h:3265
bool onlyFirstLaneUsed(const VPValue *Op) const override
Returns true if the recipe only uses the first lane of operand Op.
Definition: VPlan.h:3293
Recipe to expand a SCEV expression.
Definition: VPlan.h:3128
VPExpandSCEVRecipe(const SCEV *Expr, ScalarEvolution &SE)
Definition: VPlan.h:3133
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPExpandSCEVRecipe.
Definition: VPlan.h:3148
const SCEV * getSCEV() const
Definition: VPlan.h:3160
void execute(VPTransformState &State) override
Generate a canonical vector induction variable of the vector loop, with.
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
VPExpandSCEVRecipe * clone() override
Clone the current recipe.
Definition: VPlan.h:3138
~VPExpandSCEVRecipe() override=default
A pure virtual base class for all recipes modeling header phis, including phis for first order recurr...
Definition: VPlan.h:2026
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this header phi recipe.
static bool classof(const VPValue *V)
Definition: VPlan.h:2043
VPHeaderPHIRecipe(unsigned char VPDefID, Instruction *UnderlyingInstr, VPValue *Start=nullptr, DebugLoc DL={})
Definition: VPlan.h:2028
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override=0
Print the recipe.
virtual VPValue * getBackedgeValue()
Returns the incoming value from the loop backedge.
Definition: VPlan.h:2074
VPValue * getStartValue()
Returns the start value of the phi, if one is set.
Definition: VPlan.h:2063
void setStartValue(VPValue *V)
Update the start value of the recipe.
Definition: VPlan.h:2071
VPValue * getStartValue() const
Definition: VPlan.h:2066
static bool classof(const VPRecipeBase *B)
Method to support type inquiry through isa, cast, and dyn_cast.
Definition: VPlan.h:2039
void execute(VPTransformState &State) override=0
Generate the phi nodes.
virtual VPRecipeBase & getBackedgeRecipe()
Returns the backedge value as a recipe.
Definition: VPlan.h:2080
~VPHeaderPHIRecipe() override=default
A recipe representing a sequence of load -> update -> store as part of a histogram operation.
Definition: VPlan.h:1775
void execute(VPTransformState &State) override
Produce a vectorized histogram operation.
VP_CLASSOF_IMPL(VPDef::VPHistogramSC)
VPHistogramRecipe * clone() override
Clone the current recipe.
Definition: VPlan.h:1787
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPHistogramRecipe.
VPHistogramRecipe(unsigned Opcode, iterator_range< IterT > Operands, DebugLoc DL={})
Definition: VPlan.h:1781
VPValue * getMask() const
Return the mask operand if one was provided, or a null pointer if all lanes should be executed uncond...
Definition: VPlan.h:1804
unsigned getOpcode() const
Definition: VPlan.h:1800
~VPHistogramRecipe() override=default
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
A special type of VPBasicBlock that wraps an existing IR basic block.
Definition: VPlan.h:3616
void execute(VPTransformState *State) override
The method which generates the output IR instructions that correspond to this VPBasicBlock,...
Definition: VPlan.cpp:455
VPIRBasicBlock(BasicBlock *IRBB)
Definition: VPlan.h:3620
static VPIRBasicBlock * fromBasicBlock(BasicBlock *IRBB)
Create a VPIRBasicBlock from IRBB containing VPIRInstructions for all instructions in IRBB,...
Definition: VPlan.cpp:843
BasicBlock * getIRBasicBlock() const
Definition: VPlan.h:3646
~VPIRBasicBlock() override
Definition: VPlan.h:3625
static bool classof(const VPBlockBase *V)
Definition: VPlan.h:3627
VPIRBasicBlock * clone() override
Clone the current block and it's recipes, without updating the operands of the cloned recipes.
Definition: VPlan.h:3639
A recipe to wrap on original IR instruction not to be modified during execution, execept for PHIs.
Definition: VPlan.h:1382
Instruction & getInstruction() const
Definition: VPlan.h:1406
bool onlyFirstPartUsed(const VPValue *Op) const override
Returns true if the VPUser only uses the first part of operand Op.
Definition: VPlan.h:1420
~VPIRInstruction() override=default
void execute(VPTransformState &State) override
The method which generates the output IR instructions that correspond to this VPRecipe,...
VPIRInstruction * clone() override
Clone the current recipe.
Definition: VPlan.h:1393
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPIRInstruction.
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
bool usesScalars(const VPValue *Op) const override
Returns true if the VPUser uses scalars of operand Op.
Definition: VPlan.h:1414
VPIRInstruction(Instruction &I)
Definition: VPlan.h:1386
This is a concrete Recipe that models a single VPlan-level instruction.
Definition: VPlan.h:1197
VPInstruction(VPValue *Ptr, VPValue *Offset, GEPNoWrapFlags Flags, DebugLoc DL={}, const Twine &Name="")
Definition: VPlan.h:1297
VPInstruction(unsigned Opcode, ArrayRef< VPValue * > Operands, DebugLoc DL, const Twine &Name="")
Definition: VPlan.h:1272
VPInstruction * clone() override
Clone the current recipe.
Definition: VPlan.h:1308
@ ResumePhi
Creates a scalar phi in a leaf VPBB with a single predecessor in VPlan.
Definition: VPlan.h:1215
@ FirstOrderRecurrenceSplice
Definition: VPlan.h:1203
@ CanonicalIVIncrementForPart
Definition: VPlan.h:1218
@ CalculateTripCountMinusVF
Definition: VPlan.h:1216
bool hasResult() const
Definition: VPlan.h:1338
LLVM_DUMP_METHOD void dump() const
Print the VPInstruction to dbgs() (for debugging).
StringRef getName() const
Returns the symbolic name assigned to the VPInstruction.
Definition: VPlan.h:1375
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPInstruction.
Definition: VPlan.h:1323
unsigned getOpcode() const
Definition: VPlan.h:1315
VPInstruction(unsigned Opcode, std::initializer_list< VPValue * > Operands, WrapFlagsTy WrapFlags, DebugLoc DL={}, const Twine &Name="")
Definition: VPlan.h:1284
bool onlyFirstPartUsed(const VPValue *Op) const override
Returns true if the recipe only uses the first part of operand Op.
VPInstruction(unsigned Opcode, std::initializer_list< VPValue * > Operands, DebugLoc DL={}, const Twine &Name="")
Definition: VPlan.h:1277
VPInstruction(unsigned Opcode, std::initializer_list< VPValue * > Operands, DisjointFlagsTy DisjointFlag, DebugLoc DL={}, const Twine &Name="")
Definition: VPlan.h:1289
bool isVectorToScalar() const
Returns true if this VPInstruction produces a scalar value from a vector, e.g.
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the VPInstruction to O.
bool onlyFirstLaneUsed(const VPValue *Op) const override
Returns true if the recipe only uses the first lane of operand Op.
bool isSingleScalar() const
Returns true if this VPInstruction's operands are single scalars and the result is also a single scal...
void execute(VPTransformState &State) override
Generate the instruction.
VPInterleaveRecipe is a recipe for transforming an interleave group of load or stores into one wide l...
Definition: VPlan.h:2495
bool onlyFirstLaneUsed(const VPValue *Op) const override
The recipe only uses the first lane of the address.
Definition: VPlan.h:2578
~VPInterleaveRecipe() override=default
VPValue * getAddr() const
Return the address accessed by this recipe.
Definition: VPlan.h:2536
VPInterleaveRecipe(const InterleaveGroup< Instruction > *IG, VPValue *Addr, ArrayRef< VPValue * > StoredValues, VPValue *Mask, bool NeedsMaskForGaps)
Definition: VPlan.h:2507
VPValue * getMask() const
Return the mask used by this recipe.
Definition: VPlan.h:2542
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
VPInterleaveRecipe * clone() override
Clone the current recipe.
Definition: VPlan.h:2528
void execute(VPTransformState &State) override
Generate the wide load or store, and shuffles.
ArrayRef< VPValue * > getStoredValues() const
Return the VPValues stored by this interleave group.
Definition: VPlan.h:2549
Instruction * getInsertPos() const
Definition: VPlan.h:2584
const InterleaveGroup< Instruction > * getInterleaveGroup()
Definition: VPlan.h:2569
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPInterleaveRecipe.
unsigned getNumStoreOperands() const
Returns the number of stored operands of this interleave group.
Definition: VPlan.h:2573
InterleaveGroup< VPInstruction > * getInterleaveGroup(VPInstruction *Instr) const
Get the interleave group that Instr belongs to.
Definition: VPlan.h:4281
In what follows, the term "input IR" refers to code that is fed into the vectorizer whereas the term ...
Definition: VPlan.h:153
static VPLane getLastLaneForVF(const ElementCount &VF)
Definition: VPlan.h:194
static unsigned getNumCachedLanes(const ElementCount &VF)
Returns the maxmimum number of lanes that we are able to consider caching for VF.
Definition: VPlan.h:229
Value * getAsRuntimeExpr(IRBuilderBase &Builder, const ElementCount &VF) const
Returns an expression describing the lane index that can be used at runtime.
Definition: VPlan.cpp:73
VPLane(unsigned Lane, Kind LaneKind)
Definition: VPlan.h:176
Kind getKind() const
Returns the Kind of lane offset.
Definition: VPlan.h:210
static VPLane getLaneFromEnd(const ElementCount &VF, unsigned Offset)
Definition: VPlan.h:180
bool isFirstLane() const
Returns true if this is the first lane of the whole vector.
Definition: VPlan.h:213
VPLane(unsigned Lane)
Definition: VPlan.h:175
unsigned getKnownLane() const
Returns a compile-time known value for the lane index and asserts if the lane can only be calculated ...
Definition: VPlan.h:200
static VPLane getFirstLane()
Definition: VPlan.h:178
Kind
Kind describes how to interpret Lane.
Definition: VPlan.h:156
@ ScalableLast
For ScalableLast, Lane is the offset from the start of the last N-element subvector in a scalable vec...
@ First
For First, Lane is the index into the first N elements of a fixed-vector <N x <ElTy>> or a scalable v...
unsigned mapToCacheIndex(const ElementCount &VF) const
Maps the lane to a cache index based on VF.
Definition: VPlan.h:216
VPPredInstPHIRecipe is a recipe for generating the phi nodes needed when control converges back from ...
Definition: VPlan.h:2843
~VPPredInstPHIRecipe() override=default
bool usesScalars(const VPValue *Op) const override
Returns true if the recipe uses scalars of operand Op.
Definition: VPlan.h:2875
void execute(VPTransformState &State) override
Generates phi nodes for live-outs (from a replicate region) as needed to retain SSA form.
VPPredInstPHIRecipe * clone() override
Clone the current recipe.
Definition: VPlan.h:2851
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPPredInstPHIRecipe.
Definition: VPlan.h:2862
VPPredInstPHIRecipe(VPValue *PredV, DebugLoc DL)
Construct a VPPredInstPHIRecipe given PredInst whose value needs a phi nodes after merging back from ...
Definition: VPlan.h:2847
VPRecipeBase is a base class modeling a sequence of one or more output IR instructions.
Definition: VPlan.h:720
bool mayReadFromMemory() const
Returns true if the recipe may read from memory.
bool mayReadOrWriteMemory() const
Returns true if the recipe may read from or write to memory.
Definition: VPlan.h:809
bool mayHaveSideEffects() const
Returns true if the recipe may have side-effects.
bool mayWriteToMemory() const
Returns true if the recipe may write to memory.
virtual InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const
Compute the cost of this recipe either using a recipe's specialized implementation or using the legac...
virtual ~VPRecipeBase()=default
VPBasicBlock * getParent()
Definition: VPlan.h:745
DebugLoc getDebugLoc() const
Returns the debug location of the recipe.
Definition: VPlan.h:814
virtual void execute(VPTransformState &State)=0
The method which generates the output IR instructions that correspond to this VPRecipe,...
void moveBefore(VPBasicBlock &BB, iplist< VPRecipeBase >::iterator I)
Unlink this recipe and insert into BB before I.
void insertBefore(VPRecipeBase *InsertPos)
Insert an unlinked recipe into a basic block immediately before the specified recipe.
void insertAfter(VPRecipeBase *InsertPos)
Insert an unlinked Recipe into a basic block immediately after the specified Recipe.
static bool classof(const VPDef *D)
Method to support type inquiry through isa, cast, and dyn_cast.
Definition: VPlan.h:787
iplist< VPRecipeBase >::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
VPRecipeBase(const unsigned char SC, ArrayRef< VPValue * > Operands, DebugLoc DL={})
Definition: VPlan.h:731
virtual VPRecipeBase * clone()=0
Clone the current recipe.
const VPBasicBlock * getParent() const
Definition: VPlan.h:746
InstructionCost cost(ElementCount VF, VPCostContext &Ctx)
Return the cost of this recipe, taking into account if the cost computation should be skipped and the...
static bool classof(const VPUser *U)
Definition: VPlan.h:792
VPRecipeBase(const unsigned char SC, iterator_range< IterT > Operands, DebugLoc DL={})
Definition: VPlan.h:736
void removeFromParent()
This method unlinks 'this' from the containing basic block, but does not delete it.
bool isPhi() const
Returns true for PHI-like recipes.
Definition: VPlan.h:798
void moveAfter(VPRecipeBase *MovePos)
Unlink this recipe from its current VPBasicBlock and insert it into the VPBasicBlock that MovePos liv...
Class to record LLVM IR flag for a recipe along with it.
Definition: VPlan.h:930
ExactFlagsTy ExactFlags
Definition: VPlan.h:980
FastMathFlagsTy FMFs
Definition: VPlan.h:983
NonNegFlagsTy NonNegFlags
Definition: VPlan.h:982
CmpInst::Predicate CmpPredicate
Definition: VPlan.h:977
GEPNoWrapFlags getGEPNoWrapFlags() const
Definition: VPlan.h:1150
VPRecipeWithIRFlags(const unsigned char SC, IterT Operands, CmpInst::Predicate Pred, DebugLoc DL={})
Definition: VPlan.h:1032
void setFlags(Instruction *I) const
Set the IR flags for I.
Definition: VPlan.h:1111
VPRecipeWithIRFlags(const unsigned char SC, IterT Operands, GEPNoWrapFlags GEPFlags, DebugLoc DL={})
Definition: VPlan.h:1057
static bool classof(const VPRecipeBase *R)
Definition: VPlan.h:1063
VPRecipeWithIRFlags(const unsigned char SC, IterT Operands, FastMathFlags FMFs, DebugLoc DL={})
Definition: VPlan.h:1044
void dropPoisonGeneratingFlags()
Drop all poison-generating flags.
Definition: VPlan.h:1080
bool hasFastMathFlags() const
Returns true if the recipe has fast-math flags.
Definition: VPlan.h:1153
VPRecipeWithIRFlags(const unsigned char SC, IterT Operands, Instruction &I)
Definition: VPlan.h:1002
DisjointFlagsTy DisjointFlags
Definition: VPlan.h:979
VPRecipeWithIRFlags(const unsigned char SC, IterT Operands, WrapFlagsTy WrapFlags, DebugLoc DL={})
Definition: VPlan.h:1038
VPRecipeWithIRFlags(const unsigned char SC, IterT Operands, DisjointFlagsTy DisjointFlags, DebugLoc DL={})
Definition: VPlan.h:1050
GEPNoWrapFlags GEPFlags
Definition: VPlan.h:981
void transferFlags(VPRecipeWithIRFlags &Other)
Definition: VPlan.h:988
WrapFlagsTy WrapFlags
Definition: VPlan.h:978
bool hasNoUnsignedWrap() const
Definition: VPlan.h:1157
bool isDisjoint() const
Definition: VPlan.h:1169
void printFlags(raw_ostream &O) const
CmpInst::Predicate getPredicate() const
Definition: VPlan.h:1144
bool hasNoSignedWrap() const
Definition: VPlan.h:1163
static bool classof(const VPUser *U)
Definition: VPlan.h:1074
FastMathFlags getFastMathFlags() const
VPRecipeWithIRFlags(const unsigned char SC, IterT Operands, DebugLoc DL={})
Definition: VPlan.h:995
A recipe to represent inloop reduction operations with vector-predication intrinsics,...
Definition: VPlan.h:2670
void execute(VPTransformState &State) override
Generate the reduction in the loop.
bool onlyFirstLaneUsed(const VPValue *Op) const override
Returns true if the recipe only uses the first lane of operand Op.
Definition: VPlan.h:2700
VPValue * getEVL() const
The VPValue of the explicit vector length.
Definition: VPlan.h:2697
VPReductionEVLRecipe(VPReductionRecipe &R, VPValue &EVL, VPValue *CondOp)
Definition: VPlan.h:2672
VPReductionEVLRecipe * clone() override
Clone the current recipe.
Definition: VPlan.h:2681
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
~VPReductionEVLRecipe() override=default
A recipe for handling reduction phis.
Definition: VPlan.h:2369
VPReductionPHIRecipe(PHINode *Phi, const RecurrenceDescriptor &RdxDesc, VPValue &Start, bool IsInLoop=false, bool IsOrdered=false)
Create a new VPReductionPHIRecipe for the reduction Phi described by RdxDesc.
Definition: VPlan.h:2382
bool isOrdered() const
Returns true, if the phi is part of an ordered reduction.
Definition: VPlan.h:2420
VPReductionPHIRecipe * clone() override
Clone the current recipe.
Definition: VPlan.h:2392
~VPReductionPHIRecipe() override=default
bool isInLoop() const
Returns true, if the phi is part of an in-loop reduction.
Definition: VPlan.h:2423
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
void execute(VPTransformState &State) override
Generate the phi/select nodes.
static bool classof(const VPHeaderPHIRecipe *R)
Definition: VPlan.h:2402
const RecurrenceDescriptor & getRecurrenceDescriptor() const
Definition: VPlan.h:2415
A recipe to represent inloop reduction operations, performing a reduction on a vector operand into a ...
Definition: VPlan.h:2590
bool isConditional() const
Return true if the in-loop reduction is conditional.
Definition: VPlan.h:2655
static bool classof(const VPRecipeBase *R)
Definition: VPlan.h:2625
VPReductionRecipe(const RecurrenceDescriptor &R, Instruction *I, VPValue *ChainOp, VPValue *VecOp, VPValue *CondOp, bool IsOrdered, DebugLoc DL={})
Definition: VPlan.h:2610
VPReductionRecipe(const unsigned char SC, const RecurrenceDescriptor &R, Instruction *I, ArrayRef< VPValue * > Operands, VPValue *CondOp, bool IsOrdered, DebugLoc DL)
Definition: VPlan.h:2598
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of VPReductionRecipe.
VPValue * getVecOp() const
The VPValue of the vector value to be reduced.
Definition: VPlan.h:2659
const RecurrenceDescriptor & getRecurrenceDescriptor() const
Return the recurrence decriptor for the in-loop reduction.
Definition: VPlan.h:2649
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
VPValue * getCondOp() const
The VPValue of the condition for the block.
Definition: VPlan.h:2661
bool isOrdered() const
Return true if the in-loop reduction is ordered.
Definition: VPlan.h:2653
~VPReductionRecipe() override=default
VPValue * getChainOp() const
The VPValue of the scalar Chain being accumulated.
Definition: VPlan.h:2657
VPReductionRecipe * clone() override
Clone the current recipe.
Definition: VPlan.h:2619
void execute(VPTransformState &State) override
Generate the reduction in the loop.
static bool classof(const VPUser *U)
Definition: VPlan.h:2630
VPRegionBlock represents a collection of VPBasicBlocks and VPRegionBlocks which form a Single-Entry-S...
Definition: VPlan.h:3657
VPRegionBlock * clone() override
Clone all blocks in the single-entry single-exit region of the block and their recipes without updati...
Definition: VPlan.cpp:702
const VPBlockBase * getEntry() const
Definition: VPlan.h:3696
bool isReplicator() const
An indicator whether this region is to generate multiple replicated instances of output IR correspond...
Definition: VPlan.h:3728
void dropAllReferences(VPValue *NewValue) override
Replace all operands of VPUsers in the block with NewValue and also replaces all uses of VPValues def...
Definition: VPlan.cpp:711
void setExiting(VPBlockBase *ExitingBlock)
Set ExitingBlock as the exiting VPBlockBase of this VPRegionBlock.
Definition: VPlan.h:3713
VPBlockBase * getExiting()
Definition: VPlan.h:3709
void setEntry(VPBlockBase *EntryBlock)
Set EntryBlock as the entry VPBlockBase of this VPRegionBlock.
Definition: VPlan.h:3701
InstructionCost cost(ElementCount VF, VPCostContext &Ctx) override
Return the cost of the block.
Definition: VPlan.cpp:772
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print this VPRegionBlock to O (recursively), prefixing all lines with Indent.
Definition: VPlan.cpp:810
VPRegionBlock(const std::string &Name="", bool IsReplicator=false)
Definition: VPlan.h:3679
VPRegionBlock(VPBlockBase *Entry, VPBlockBase *Exiting, const std::string &Name="", bool IsReplicator=false)
Definition: VPlan.h:3670
void execute(VPTransformState *State) override
The method which generates the output IR instructions that correspond to this VPRegionBlock,...
Definition: VPlan.cpp:718
const VPBlockBase * getExiting() const
Definition: VPlan.h:3708
VPBlockBase * getEntry()
Definition: VPlan.h:3697
VPBasicBlock * getPreheaderVPBB()
Returns the pre-header VPBasicBlock of the loop region.
Definition: VPlan.h:3721
~VPRegionBlock() override
Definition: VPlan.h:3683
static bool classof(const VPBlockBase *V)
Method to support type inquiry through isa, cast, and dyn_cast.
Definition: VPlan.h:3692
VPReplicateRecipe replicates a given instruction producing multiple scalar copies of the original sca...
Definition: VPlan.h:2711
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
void execute(VPTransformState &State) override
Generate replicas of the desired Ingredient.
~VPReplicateRecipe() override=default
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPReplicateRecipe.
bool onlyFirstLaneUsed(const VPValue *Op) const override
Returns true if the recipe only uses the first lane of operand Op.
Definition: VPlan.h:2760
bool usesScalars(const VPValue *Op) const override
Returns true if the recipe uses scalars of operand Op.
Definition: VPlan.h:2767
bool isUniform() const
Definition: VPlan.h:2755
bool isPredicated() const
Definition: VPlan.h:2757
VPReplicateRecipe * clone() override
Clone the current recipe.
Definition: VPlan.h:2730
VPReplicateRecipe(Instruction *I, iterator_range< IterT > Operands, bool IsUniform, VPValue *Mask=nullptr)
Definition: VPlan.h:2720
unsigned getOpcode() const
Definition: VPlan.h:2784
VPValue * getMask()
Return the mask of a predicated VPReplicateRecipe.
Definition: VPlan.h:2779
bool shouldPack() const
Returns true if the recipe is used by a widened recipe via an intervening VPPredInstPHIRecipe.
A recipe to compute the pointers for widened memory accesses of IndexTy in reverse order.
Definition: VPlan.h:1903
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPVectorPointerRecipe.
Definition: VPlan.h:1927
VPReverseVectorPointerRecipe * clone() override
Clone the current recipe.
Definition: VPlan.h:1941
bool onlyFirstPartUsed(const VPValue *Op) const override
Returns true if the recipe only uses the first part of operand Op.
Definition: VPlan.h:1934
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
void execute(VPTransformState &State) override
The method which generates the output IR instructions that correspond to this VPRecipe,...
bool onlyFirstLaneUsed(const VPValue *Op) const override
Returns true if the VPUser only uses the first lane of operand Op.
Definition: VPlan.h:1920
VPReverseVectorPointerRecipe(VPValue *Ptr, VPValue *VF, Type *IndexedTy, GEPNoWrapFlags GEPFlags, DebugLoc DL)
Definition: VPlan.h:1907
const VPValue * getVFValue() const
Definition: VPlan.h:1916
VPScalarCastRecipe is a recipe to create scalar cast instructions.
Definition: VPlan.h:1581
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPScalarCastRecipe.
Definition: VPlan.h:1604
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Each concrete VPDef prints itself.
~VPScalarCastRecipe() override=default
VPScalarCastRecipe * clone() override
Clone the current recipe.
Definition: VPlan.h:1595
bool onlyFirstLaneUsed(const VPValue *Op) const override
Returns true if the VPUser only uses the first lane of operand Op.
Definition: VPlan.h:1618
void execute(VPTransformState &State) override
The method which generates the output IR instructions that correspond to this VPRecipe,...
Type * getResultType() const
Returns the result type of the cast.
Definition: VPlan.h:1616
VPScalarCastRecipe(Instruction::CastOps Opcode, VPValue *Op, Type *ResultTy)
Definition: VPlan.h:1589
A recipe for handling phi nodes of integer and floating-point inductions, producing their scalar valu...
Definition: VPlan.h:3413
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
bool onlyFirstLaneUsed(const VPValue *Op) const override
Returns true if the recipe only uses the first lane of operand Op.
Definition: VPlan.h:3460
VPValue * getStepValue() const
Definition: VPlan.h:3457
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPScalarIVStepsRecipe.
Definition: VPlan.h:3445
VPScalarIVStepsRecipe(const InductionDescriptor &IndDesc, VPValue *IV, VPValue *Step)
Definition: VPlan.h:3423
VPScalarIVStepsRecipe * clone() override
Clone the current recipe.
Definition: VPlan.h:3433
VPScalarIVStepsRecipe(VPValue *IV, VPValue *Step, Instruction::BinaryOps Opcode, FastMathFlags FMFs)
Definition: VPlan.h:3417
~VPScalarIVStepsRecipe() override=default
void execute(VPTransformState &State) override
Generate the scalarized versions of the phi node as needed by their users.
Recipe to generate a scalar PHI.
Definition: VPlan.h:2253
VPScalarPHIRecipe(VPValue *Start, VPValue *BackedgeValue, DebugLoc DL, StringRef Name)
Definition: VPlan.h:2257
bool onlyFirstLaneUsed(const VPValue *Op) const override
Returns true if the recipe only uses the first lane of operand Op.
Definition: VPlan.h:2276
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
~VPScalarPHIRecipe() override=default
void execute(VPTransformState &State) override
Generate the phi/select nodes.
VPScalarPHIRecipe * clone() override
Clone the current recipe.
Definition: VPlan.h:2266
VPSingleDef is a base class for recipes for modeling a sequence of one or more output IR that define ...
Definition: VPlan.h:847
VPSingleDefRecipe(const unsigned char SC, ArrayRef< VPValue * > Operands, DebugLoc DL={})
Definition: VPlan.h:853
Instruction * getUnderlyingInstr()
Returns the underlying instruction.
Definition: VPlan.h:916
static bool classof(const VPRecipeBase *R)
Definition: VPlan.h:862
const Instruction * getUnderlyingInstr() const
Definition: VPlan.h:919
VPSingleDefRecipe(const unsigned char SC, IterT Operands, DebugLoc DL={})
Definition: VPlan.h:850
static bool classof(const VPUser *U)
Definition: VPlan.h:908
LLVM_DUMP_METHOD void dump() const
Print this VPSingleDefRecipe to dbgs() (for debugging).
VPSingleDefRecipe(const unsigned char SC, IterT Operands, Value *UV, DebugLoc DL={})
Definition: VPlan.h:858
virtual VPSingleDefRecipe * clone() override=0
Clone the current recipe.
This class can be used to assign names to VPValues.
Definition: VPlanValue.h:440
An analysis for type-inference for VPValues.
Definition: VPlanAnalysis.h:40
Helper to access the operand that contains the unroll part for this recipe after unrolling.
Definition: VPlan.h:1182
VPValue * getUnrollPartOperand(VPUser &U) const
Return the VPValue operand containing the unroll part or null if there is no such operand.
unsigned getUnrollPart(VPUser &U) const
Return the unroll part.
This class augments VPValue with operands which provide the inverse def-use edges from VPValue's user...
Definition: VPlanValue.h:200
operand_range operands()
Definition: VPlanValue.h:257
void setOperand(unsigned I, VPValue *New)
Definition: VPlanValue.h:242
unsigned getNumOperands() const
Definition: VPlanValue.h:236
operand_iterator op_end()
Definition: VPlanValue.h:255
operand_iterator op_begin()
Definition: VPlanValue.h:253
VPValue * getOperand(unsigned N) const
Definition: VPlanValue.h:237
VPUser()=delete
void addOperand(VPValue *Operand)
Definition: VPlanValue.h:231
bool isDefinedOutsideLoopRegions() const
Returns true if the VPValue is defined outside any loop region.
Definition: VPlan.cpp:1412
VPRecipeBase * getDefiningRecipe()
Returns the recipe defining this VPValue or nullptr if it is not defined by a recipe,...
Definition: VPlan.cpp:123
Value * getUnderlyingValue() const
Return the underlying Value attached to this VPValue.
Definition: VPlanValue.h:77
unsigned getNumUsers() const
Definition: VPlanValue.h:111
Value * getLiveInIRValue()
Returns the underlying IR value, if this VPValue is defined outside the scope of VPlan.
Definition: VPlanValue.h:172
bool isLiveIn() const
Returns true if this VPValue is a live-in, i.e. defined outside the VPlan.
Definition: VPlanValue.h:167
friend class VPRecipeBase
Definition: VPlanValue.h:52
user_range users()
Definition: VPlanValue.h:132
A recipe to compute the pointers for widened memory accesses of IndexTy.
Definition: VPlan.h:1956
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
VPVectorPointerRecipe(VPValue *Ptr, Type *IndexedTy, GEPNoWrapFlags GEPFlags, DebugLoc DL)
Definition: VPlan.h:1960
void execute(VPTransformState &State) override
The method which generates the output IR instructions that correspond to this VPRecipe,...
bool onlyFirstPartUsed(const VPValue *Op) const override
Returns true if the recipe only uses the first part of operand Op.
Definition: VPlan.h:1977
bool onlyFirstLaneUsed(const VPValue *Op) const override
Returns true if the VPUser only uses the first lane of operand Op.
Definition: VPlan.h:1970
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPHeaderPHIRecipe.
Definition: VPlan.h:1990
VPVectorPointerRecipe * clone() override
Clone the current recipe.
Definition: VPlan.h:1984
A recipe for widening Call instructions using library calls.
Definition: VPlan.h:1719
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
const_operand_range arg_operands() const
Definition: VPlan.h:1759
VPWidenCallRecipe * clone() override
Clone the current recipe.
Definition: VPlan.h:1738
Function * getCalledScalarFunction() const
Definition: VPlan.h:1752
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPWidenCallRecipe.
void execute(VPTransformState &State) override
Produce a widened version of the call instruction.
operand_range arg_operands()
Definition: VPlan.h:1756
~VPWidenCallRecipe() override=default
VPWidenCallRecipe(Value *UV, Function *Variant, ArrayRef< VPValue * > CallArguments, DebugLoc DL={})
Definition: VPlan.h:1726
A Recipe for widening the canonical induction variable of the vector loop.
Definition: VPlan.h:3308
void execute(VPTransformState &State) override
Generate a canonical vector induction variable of the vector loop, with start = {<Part*VF,...
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
~VPWidenCanonicalIVRecipe() override=default
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPWidenCanonicalIVPHIRecipe.
Definition: VPlan.h:3328
VPWidenCanonicalIVRecipe * clone() override
Clone the current recipe.
Definition: VPlan.h:3315
VPWidenCanonicalIVRecipe(VPCanonicalIVPHIRecipe *CanonicalIV)
Definition: VPlan.h:3310
VPWidenCastRecipe is a recipe to create vector cast instructions.
Definition: VPlan.h:1529
VPWidenCastRecipe(Instruction::CastOps Opcode, VPValue *Op, Type *ResultTy, CastInst &UI)
Definition: VPlan.h:1537
Instruction::CastOps getOpcode() const
Definition: VPlan.h:1574
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
Type * getResultType() const
Returns the result type of the cast.
Definition: VPlan.h:1577
VPWidenCastRecipe(Instruction::CastOps Opcode, VPValue *Op, Type *ResultTy)
Definition: VPlan.h:1545
void execute(VPTransformState &State) override
Produce widened copies of the cast.
~VPWidenCastRecipe() override=default
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPWidenCastRecipe.
VPWidenCastRecipe * clone() override
Clone the current recipe.
Definition: VPlan.h:1551
A recipe for widening operations with vector-predication intrinsics with explicit vector length (EVL)...
Definition: VPlan.h:1482
const VPValue * getEVL() const
Definition: VPlan.h:1506
~VPWidenEVLRecipe() override=default
VPWidenEVLRecipe(Instruction &I, iterator_range< IterT > Operands, VPValue &EVL)
Definition: VPlan.h:1487
VPWidenRecipe * clone() override final
Clone the current recipe.
Definition: VPlan.h:1498
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override final
Print the recipe.
VP_CLASSOF_IMPL(VPDef::VPWidenEVLSC)
VPWidenEVLRecipe(VPWidenRecipe &W, VPValue &EVL)
Definition: VPlan.h:1491
void execute(VPTransformState &State) override final
Produce a vp-intrinsic using the opcode and operands of the recipe, processing EVL elements.
VPValue * getEVL()
Definition: VPlan.h:1505
bool onlyFirstLaneUsed(const VPValue *Op) const override
Returns true if the recipe only uses the first lane of operand Op.
Definition: VPlan.h:1513
A recipe for handling GEP instructions.
Definition: VPlan.h:1854
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
void execute(VPTransformState &State) override
Generate the gep nodes.
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPWidenGEPRecipe.
Definition: VPlan.h:1887
VPWidenGEPRecipe * clone() override
Clone the current recipe.
Definition: VPlan.h:1876
~VPWidenGEPRecipe() override=default
VPWidenGEPRecipe(GetElementPtrInst *GEP, iterator_range< IterT > Operands)
Definition: VPlan.h:1871
Base class for widened induction (VPWidenIntOrFpInductionRecipe and VPWidenPointerInductionRecipe),...
Definition: VPlan.h:2088
VPValue * getBackedgeValue() override
Returns the incoming value from the loop backedge.
Definition: VPlan.h:2115
PHINode * getPHINode() const
Definition: VPlan.h:2110
VPWidenInductionRecipe(unsigned char Kind, PHINode *IV, VPValue *Start, VPValue *Step, const InductionDescriptor &IndDesc, DebugLoc DL)
Definition: VPlan.h:2092
VPValue * getStepValue()
Returns the step value of the induction.
Definition: VPlan.h:2107
const InductionDescriptor & getInductionDescriptor() const
Returns the induction descriptor for the recipe.
Definition: VPlan.h:2113
VPRecipeBase & getBackedgeRecipe() override
Returns the backedge value as a recipe.
Definition: VPlan.h:2122
static bool classof(const VPRecipeBase *R)
Definition: VPlan.h:2099
const VPValue * getStepValue() const
Definition: VPlan.h:2108
virtual void execute(VPTransformState &State) override=0
Generate the phi nodes.
A recipe for handling phi nodes of integer and floating-point inductions, producing their vector valu...
Definition: VPlan.h:2132
const TruncInst * getTruncInst() const
Definition: VPlan.h:2186
const VPValue * getVFValue() const
Definition: VPlan.h:2175
~VPWidenIntOrFpInductionRecipe() override=default
VPWidenIntOrFpInductionRecipe(PHINode *IV, VPValue *Start, VPValue *Step, VPValue *VF, const InductionDescriptor &IndDesc, TruncInst *Trunc, DebugLoc DL)
Definition: VPlan.h:2145
VPWidenIntOrFpInductionRecipe * clone() override
Clone the current recipe.
Definition: VPlan.h:2156
TruncInst * getTruncInst()
Returns the first defined value as TruncInst, if it is one or nullptr otherwise.
Definition: VPlan.h:2185
void execute(VPTransformState &State) override
Generate the vectorized and scalarized versions of the phi node as needed by their users.
VPWidenIntOrFpInductionRecipe(PHINode *IV, VPValue *Start, VPValue *Step, VPValue *VF, const InductionDescriptor &IndDesc, DebugLoc DL)
Definition: VPlan.h:2136
VPValue * getLastUnrolledPartOperand()
Returns the VPValue representing the value of this induction at the last unrolled part,...
Definition: VPlan.h:2201
Type * getScalarType() const
Returns the scalar type of the induction.
Definition: VPlan.h:2194
bool isCanonical() const
Returns true if the induction is canonical, i.e.
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
A recipe for widening vector intrinsics.
Definition: VPlan.h:1627
bool onlyFirstLaneUsed(const VPValue *Op) const override
Returns true if the VPUser only uses the first lane of operand Op.
VPWidenIntrinsicRecipe(Intrinsic::ID VectorIntrinsicID, std::initializer_list< VPValue * > CallArguments, Type *Ty, DebugLoc DL={})
Definition: VPlan.h:1668
Intrinsic::ID getVectorIntrinsicID() const
Return the ID of the intrinsic.
Definition: VPlan.h:1692
bool mayReadFromMemory() const
Returns true if the intrinsic may read from memory.
Definition: VPlan.h:1701
StringRef getIntrinsicName() const
Return to name of the intrinsic as string.
VPWidenIntrinsicRecipe(Intrinsic::ID VectorIntrinsicID, ArrayRef< VPValue * > CallArguments, Type *Ty, DebugLoc DL={})
Definition: VPlan.h:1653
bool mayHaveSideEffects() const
Returns true if the intrinsic may have side-effects.
Definition: VPlan.h:1707
VPWidenIntrinsicRecipe * clone() override
Clone the current recipe.
Definition: VPlan.h:1676
bool mayWriteToMemory() const
Returns true if the intrinsic may write to memory.
Definition: VPlan.h:1704
~VPWidenIntrinsicRecipe() override=default
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
Type * getResultType() const
Return the scalar return type of the intrinsic.
Definition: VPlan.h:1695
void execute(VPTransformState &State) override
Produce a widened version of the vector intrinsic.
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this vector intrinsic.
VPWidenIntrinsicRecipe(CallInst &CI, Intrinsic::ID VectorIntrinsicID, ArrayRef< VPValue * > CallArguments, Type *Ty, DebugLoc DL={})
Definition: VPlan.h:1644
A common base class for widening memory operations.
Definition: VPlan.h:2884
bool IsMasked
Whether the memory access is masked.
Definition: VPlan.h:2895
bool Reverse
Whether the consecutive accessed addresses are in reverse order.
Definition: VPlan.h:2892
bool isConsecutive() const
Return whether the loaded-from / stored-to addresses are consecutive.
Definition: VPlan.h:2931
static bool classof(const VPUser *U)
Definition: VPlan.h:2925
void execute(VPTransformState &State) override
Generate the wide load/store.
Definition: VPlan.h:2951
Instruction & Ingredient
Definition: VPlan.h:2886
VPWidenMemoryRecipe * clone() override
Clone the current recipe.
Definition: VPlan.h:2914
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPWidenMemoryRecipe.
Instruction & getIngredient() const
Definition: VPlan.h:2959
bool Consecutive
Whether the accessed addresses are consecutive.
Definition: VPlan.h:2889
static bool classof(const VPRecipeBase *R)
Definition: VPlan.h:2918
VPWidenMemoryRecipe(const char unsigned SC, Instruction &I, std::initializer_list< VPValue * > Operands, bool Consecutive, bool Reverse, DebugLoc DL)
Definition: VPlan.h:2905
VPValue * getMask() const
Return the mask used by this recipe.
Definition: VPlan.h:2945
bool isMasked() const
Returns true if the recipe is masked.
Definition: VPlan.h:2941
void setMask(VPValue *Mask)
Definition: VPlan.h:2897
VPValue * getAddr() const
Return the address accessed by this recipe.
Definition: VPlan.h:2938
bool isReverse() const
Return whether the consecutive loaded/stored addresses are in reverse order.
Definition: VPlan.h:2935
A recipe for handling phis that are widened in the vector loop.
Definition: VPlan.h:2292
void addIncoming(VPValue *IncomingV, VPBasicBlock *IncomingBlock)
Adds a pair (IncomingV, IncomingBlock) to the phi.
Definition: VPlan.h:2322
VPValue * getIncomingValue(unsigned I)
Returns the I th incoming VPValue.
Definition: VPlan.h:2331
VPWidenPHIRecipe(PHINode *Phi, VPValue *Start=nullptr)
Create a new VPWidenPHIRecipe for Phi with start value Start.
Definition: VPlan.h:2298
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
VPWidenPHIRecipe * clone() override
Clone the current recipe.
Definition: VPlan.h:2304
~VPWidenPHIRecipe() override=default
VPBasicBlock * getIncomingBlock(unsigned I)
Returns the I th incoming VPBasicBlock.
Definition: VPlan.h:2328
void execute(VPTransformState &State) override
Generate the phi/select nodes.
VPWidenPointerInductionRecipe * clone() override
Clone the current recipe.
Definition: VPlan.h:2222
~VPWidenPointerInductionRecipe() override=default
VPWidenPointerInductionRecipe(PHINode *Phi, VPValue *Start, VPValue *Step, const InductionDescriptor &IndDesc, bool IsScalarAfterVectorization, DebugLoc DL)
Create a new VPWidenPointerInductionRecipe for Phi with start value Start.
Definition: VPlan.h:2213
bool onlyScalarsGenerated(bool IsScalable)
Returns true if only scalar values will be generated.
VPValue * getFirstUnrolledPartOperand()
Returns the VPValue representing the value of this induction at the first unrolled part,...
Definition: VPlan.h:2239
void execute(VPTransformState &State) override
Generate vector values for the pointer induction.
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
VPWidenRecipe is a recipe for producing a widened instruction using the opcode and operands of the re...
Definition: VPlan.h:1431
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPWidenRecipe.
void execute(VPTransformState &State) override
Produce a widened instruction using the opcode and operands of the recipe, processing State....
VPWidenRecipe * clone() override
Clone the current recipe.
Definition: VPlan.h:1447
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
~VPWidenRecipe() override=default
VPWidenRecipe(Instruction &I, iterator_range< IterT > Operands)
Definition: VPlan.h:1442
unsigned getOpcode() const
Definition: VPlan.h:1471
VPWidenRecipe(unsigned VPDefOpcode, Instruction &I, iterator_range< IterT > Operands)
Definition: VPlan.h:1436
static bool classof(const VPRecipeBase *R)
Definition: VPlan.h:1453
static bool classof(const VPUser *U)
Definition: VPlan.h:1458
VPlanPrinter prints a given VPlan to a given output stream.
Definition: VPlan.h:4037
VPlanPrinter(raw_ostream &O, const VPlan &P)
Definition: VPlan.h:4078
LLVM_DUMP_METHOD void dump()
Definition: VPlan.cpp:1277
Class that maps (parts of) an existing VPlan to trees of combined VPInstructions.
Definition: VPlan.h:4288
VPInstruction * buildGraph(ArrayRef< VPValue * > Operands)
Tries to build an SLP tree rooted at Operands and returns a VPInstruction combining Operands,...
Definition: VPlanSLP.cpp:359
bool isCompletelySLP() const
Return true if all visited instruction can be combined.
Definition: VPlan.h:4378
~VPlanSlp()=default
VPlanSlp(VPInterleavedAccessInfo &IAI, VPBasicBlock &BB)
Definition: VPlan.h:4366
unsigned getWidestBundleBits() const
Return the width of the widest combined bundle in bits.
Definition: VPlan.h:4375
VPlan models a candidate for vectorization, encoding various decisions take to produce efficient outp...
Definition: VPlan.h:3761
void printDOT(raw_ostream &O) const
Print this VPlan in DOT format to O.
Definition: VPlan.cpp:1172
std::string getName() const
Return a string with the name of the plan and the applicable VFs and UFs.
Definition: VPlan.cpp:1148
void prepareToExecute(Value *TripCount, Value *VectorTripCount, VPTransformState &State)
Prepare the plan for execution, setting up the required live-in values.
Definition: VPlan.cpp:930
bool hasScalableVF()
Definition: VPlan.h:3945
VPBasicBlock * getEntry()
Definition: VPlan.h:3870
VPValue & getVectorTripCount()
The vector trip count.
Definition: VPlan.h:3928
void setName(const Twine &newName)
Definition: VPlan.h:3973
VPValue & getVFxUF()
Returns VF * UF of the vector loop region.
Definition: VPlan.h:3934
VPValue & getVF()
Returns the VF of the vector loop region.
Definition: VPlan.h:3931
VPValue * getTripCount() const
The trip count of the original loop.
Definition: VPlan.h:3907
VPValue * getOrCreateBackedgeTakenCount()
The backedge taken count of the original loop.
Definition: VPlan.h:3921
iterator_range< SmallSetVector< ElementCount, 2 >::iterator > vectorFactors() const
Returns an iterator range over all VFs of the plan.
Definition: VPlan.h:3951
VPlan(BasicBlock *ScalarHeaderBB, VPValue *TC)
Construct a VPlan with a new VPBasicBlock as entry, a VPIRBasicBlock wrapping ScalarHeaderBB and a tr...
Definition: VPlan.h:3832
const VPBasicBlock * getEntry() const
Definition: VPlan.h:3871
unsigned getUF() const
Definition: VPlan.h:3959
static VPlanPtr createInitialVPlan(Type *InductionTy, PredicatedScalarEvolution &PSE, bool RequiresScalarEpilogueCheck, bool TailFolded, Loop *TheLoop)
Create initial VPlan, having an "entry" VPBasicBlock (wrapping original scalar pre-header) which cont...
Definition: VPlan.cpp:851
bool hasVF(ElementCount VF)
Definition: VPlan.h:3944
void addSCEVExpansion(const SCEV *S, VPValue *V)
Definition: VPlan.h:4024
bool hasUF(unsigned UF) const
Definition: VPlan.h:3957
void setVF(ElementCount VF)
Definition: VPlan.h:3938
auto getExitBlocks()
Return an iterator range over the VPIRBasicBlock wrapping the exit blocks of the VPlan,...
Definition: VPlanCFG.h:309
VPRegionBlock * getVectorLoopRegion()
Returns the VPRegionBlock of the vector loop.
Definition: VPlan.cpp:1079
InstructionCost cost(ElementCount VF, VPCostContext &Ctx)
Return the cost of this plan.
Definition: VPlan.cpp:1073
const VPBasicBlock * getMiddleBlock() const
Returns the 'middle' block of the plan, that is the block that selects whether to execute the scalar ...
Definition: VPlan.h:3885
void resetTripCount(VPValue *NewTripCount)
Resets the trip count for the VPlan.
Definition: VPlan.h:3914
VPBasicBlock * getMiddleBlock()
Definition: VPlan.h:3888
void setEntry(VPBasicBlock *VPBB)
Definition: VPlan.h:3840
VPValue * getOrAddLiveIn(Value *V)
Gets the live-in VPValue for V or adds a new live-in (if none exists yet) for V.
Definition: VPlan.h:3977
LLVM_DUMP_METHOD void dump() const
Dump the plan to stderr (for debugging).
Definition: VPlan.cpp:1178
bool hasScalarVFOnly() const
Definition: VPlan.h:3955
VPBasicBlock * getScalarPreheader() const
Return the VPBasicBlock for the preheader of the scalar loop.
Definition: VPlan.h:3893
void execute(VPTransformState *State)
Generate the IR code for this VPlan.
Definition: VPlan.cpp:977
VPCanonicalIVPHIRecipe * getCanonicalIV()
Returns the canonical induction recipe of the vector loop.
Definition: VPlan.h:4011
void print(raw_ostream &O) const
Print this VPlan to O.
Definition: VPlan.cpp:1131
void addVF(ElementCount VF)
Definition: VPlan.h:3936
VPIRBasicBlock * getScalarHeader() const
Return the VPIRBasicBlock wrapping the header of the scalar loop.
Definition: VPlan.h:3898
VPValue * getLiveIn(Value *V) const
Return the live-in VPValue for V, if there is one or nullptr otherwise.
Definition: VPlan.h:3994
VPValue * getSCEVExpansion(const SCEV *S) const
Definition: VPlan.h:4020
void printLiveIns(raw_ostream &O) const
Print the live-ins of this VPlan to O.
Definition: VPlan.cpp:1095
VPBasicBlock * getVectorPreheader()
Returns the preheader of the vector loop region.
Definition: VPlan.h:3874
void setUF(unsigned UF)
Definition: VPlan.h:3964
VPlan * duplicate()
Clone the current VPlan, update all VPValues of the new VPlan and cloned recipes to refer to the clon...
Definition: VPlan.cpp:1219
LLVM Value Representation.
Definition: Value.h:74
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:255
constexpr bool isScalable() const
Returns whether the quantity is scaled by a runtime quantity (vscale).
Definition: TypeSize.h:171
constexpr ScalarTy getKnownMinValue() const
Returns the minimum value this quantity can represent.
Definition: TypeSize.h:168
An ilist node that can access its parent list.
Definition: ilist_node.h:321
base_list_type::const_reverse_iterator const_reverse_iterator
Definition: ilist.h:125
void pop_back()
Definition: ilist.h:255
base_list_type::reverse_iterator reverse_iterator
Definition: ilist.h:123
base_list_type::const_iterator const_iterator
Definition: ilist.h:122
iterator insert(iterator where, pointer New)
Definition: ilist.h:165
CRTP base class which implements the entire standard iterator facade in terms of a minimal subset of ...
Definition: iterator.h:80
A range adaptor for a pair of iterators.
This class implements an extremely fast bulk output stream that can only output to a stream.
Definition: raw_ostream.h:52
This file defines classes to implement an intrusive doubly linked list class (i.e.
This file defines the ilist_node class template, which is a convenient base class for creating classe...
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ BasicBlock
Various leaf nodes.
Definition: ISDOpcodes.h:71
AttributeList getAttributes(LLVMContext &C, ID id)
Return the attributes for an intrinsic.
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
@ Offset
Definition: DWP.cpp:480
auto find(R &&Range, const T &Val)
Provide wrappers to std::find which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1759
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1739
bool isEqual(const GCNRPTracker::LiveRegSet &S1, const GCNRPTracker::LiveRegSet &S2)
testing::Matcher< const detail::ErrorHolder & > Failed()
Definition: Error.h:198
Value * getRuntimeVF(IRBuilderBase &B, Type *Ty, ElementCount VF)
Return the runtime value for VF.
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
auto cast_or_null(const Y &Val)
Definition: Casting.h:720
auto map_range(ContainerTy &&C, FuncTy F)
Definition: STLExtras.h:377
auto dyn_cast_or_null(const Y &Val)
Definition: Casting.h:759
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1746
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
Definition: MathExtras.h:291
std::unique_ptr< VPlan > VPlanPtr
Definition: VPlan.h:144
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
SmallVector< ValueTypeFromRangeType< R >, Size > to_vector(R &&Range)
Given a range of type R, iterate the entire range and return a SmallVector with elements of the vecto...
Definition: SmallVector.h:1299
iterator_range< filter_iterator< detail::IterOfRange< RangeT >, PredicateT > > make_filter_range(RangeT &&Range, PredicateT Pred)
Convenience function that takes a range of elements and a predicate, and return a new filter_iterator...
Definition: STLExtras.h:573
@ Other
Any other memory.
auto count(R &&Range, const E &Element)
Wrapper function around std::count to count the number of times an element Element occurs in the give...
Definition: STLExtras.h:1938
DWARFExpression::Operation Op
raw_ostream & operator<<(raw_ostream &OS, const APFixedPoint &FX)
Definition: APFixedPoint.h:303
Value * createStepForVF(IRBuilderBase &B, Type *Ty, ElementCount VF, int64_t Step)
Return a value for Step multiplied by VF.
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
Definition: STLExtras.h:1903
unsigned getReciprocalPredBlockProb()
A helper function that returns the reciprocal of the block probability of predicated blocks.
Definition: VPlan.h:92
hash_code hash_combine_range(InputIteratorT first, InputIteratorT last)
Compute a hash_code for a sequence of values.
Definition: Hashing.h:468
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:860
#define N
A range of powers-of-2 vectorization factors with fixed start and adjustable end.
Definition: VPlan.h:97
iterator end()
Definition: VPlan.h:138
const ElementCount Start
Definition: VPlan.h:99
ElementCount End
Definition: VPlan.h:102
iterator begin()
Definition: VPlan.h:137
bool isEmpty() const
Definition: VPlan.h:104
VFRange(const ElementCount &Start, const ElementCount &End)
Definition: VPlan.h:108
Struct to hold various analysis needed for cost computations.
Definition: VPlan.h:688
LLVMContext & LLVMCtx
Definition: VPlan.h:692
LoopVectorizationCostModel & CM
Definition: VPlan.h:693
TargetTransformInfo::OperandValueInfo getOperandInfo(VPValue *V) const
Returns the OperandInfo for V, if it is a live-in.
Definition: VPlan.cpp:1662
bool skipCostComputation(Instruction *UI, bool IsVector) const
Return true if the cost for UI shouldn't be computed, e.g.
InstructionCost getLegacyCost(Instruction *UI, ElementCount VF) const
Return the cost for UI with VF using the legacy cost model as fallback until computing the cost of al...
VPTypeAnalysis Types
Definition: VPlan.h:691
const TargetLibraryInfo & TLI
Definition: VPlan.h:690
VPCostContext(const TargetTransformInfo &TTI, const TargetLibraryInfo &TLI, Type *CanIVTy, LoopVectorizationCostModel &CM)
Definition: VPlan.h:696
const TargetTransformInfo & TTI
Definition: VPlan.h:689
SmallPtrSet< Instruction *, 8 > SkipCostComputation
Definition: VPlan.h:694
A recipe for handling first-order recurrence phis.
Definition: VPlan.h:2337
void execute(VPTransformState &State) override
Generate the phi nodes.
VPFirstOrderRecurrencePHIRecipe * clone() override
Clone the current recipe.
Definition: VPlan.h:2347
VPFirstOrderRecurrencePHIRecipe(PHINode *Phi, VPValue &Start)
Definition: VPlan.h:2338
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this first-order recurrence phi recipe.
static bool classof(const VPHeaderPHIRecipe *R)
Definition: VPlan.h:2343
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
WrapFlagsTy(bool HasNUW, bool HasNSW)
Definition: VPlan.h:947
Hold state information used when constructing the CFG of the output IR, traversing the VPBasicBlocks ...
Definition: VPlan.h:337
BasicBlock * PrevBB
The previous IR BasicBlock created or used.
Definition: VPlan.h:343
SmallDenseMap< VPBasicBlock *, BasicBlock * > VPBB2IRBB
A mapping of each VPBasicBlock to the corresponding BasicBlock.
Definition: VPlan.h:351
VPBasicBlock * PrevVPBB
The previous VPBasicBlock visited. Initially set to null.
Definition: VPlan.h:339
BasicBlock * ExitBB
The last IR BasicBlock in the output IR.
Definition: VPlan.h:347
CFGState(DominatorTree *DT)
Definition: VPlan.h:356
BasicBlock * getPreheaderBBFor(VPRecipeBase *R)
Returns the BasicBlock* mapped to the pre-header of the loop region containing R.
Definition: VPlan.cpp:352
DomTreeUpdater DTU
Updater for the DominatorTree.
Definition: VPlan.h:354
DenseMap< VPValue *, Value * > VPV2Vector
Definition: VPlan.h:254
DenseMap< VPValue *, SmallVector< Value *, 4 > > VPV2Scalars
Definition: VPlan.h:256
VPTransformState holds information passed down when "executing" a VPlan, needed for generating the ou...
Definition: VPlan.h:236
bool hasScalarValue(VPValue *Def, VPLane Lane)
Definition: VPlan.h:268
bool hasVectorValue(VPValue *Def)
Definition: VPlan.h:266
LoopInfo * LI
Hold a pointer to LoopInfo to register new basic blocks in the loop.
Definition: VPlan.h:365
DenseMap< const SCEV *, Value * > ExpandedSCEVs
Map SCEVs to their expanded values.
Definition: VPlan.h:388
VPTypeAnalysis TypeAnalysis
VPlan-based type analysis.
Definition: VPlan.h:391
struct llvm::VPTransformState::DataState Data
void addMetadata(Value *To, Instruction *From)
Add metadata from one instruction to another.
Definition: VPlan.cpp:365
void packScalarIntoVectorValue(VPValue *Def, const VPLane &Lane)
Construct the vector value of a scalarized value V one lane at a time.
Definition: VPlan.cpp:398
Value * get(VPValue *Def, bool IsScalar=false)
Get the generated vector Value for a given VPValue Def if IsScalar is false, otherwise return the gen...
Definition: VPlan.cpp:253
struct llvm::VPTransformState::CFGState CFG
LoopVersioning * LVer
LoopVersioning.
Definition: VPlan.h:384
void addNewMetadata(Instruction *To, const Instruction *Orig)
Add additional metadata to To that was not present on Orig.
Definition: VPlan.cpp:357
void reset(VPValue *Def, Value *V, const VPLane &Lane)
Reset an existing scalar value for Def and a given Lane.
Definition: VPlan.h:305
std::optional< VPLane > Lane
Hold the index to generate specific scalar instructions.
Definition: VPlan.h:249
void set(VPValue *Def, Value *V, const VPLane &Lane)
Set the generated scalar V for Def and the given Lane.
Definition: VPlan.h:295
IRBuilderBase & Builder
Hold a reference to the IRBuilder used to generate output IR code.
Definition: VPlan.h:368
const TargetTransformInfo * TTI
Target Transform Info.
Definition: VPlan.h:241
VPlan * Plan
Pointer to the VPlan code is generated for.
Definition: VPlan.h:374
InnerLoopVectorizer * ILV
Hold a pointer to InnerLoopVectorizer to reuse its IR generation methods.
Definition: VPlan.h:371
void reset(VPValue *Def, Value *V)
Reset an existing vector value for Def and a given Part.
Definition: VPlan.h:289
ElementCount VF
The chosen Vectorization Factor of the loop being vectorized.
Definition: VPlan.h:244
Loop * CurrentVectorLoop
The loop object for the current parent region, or nullptr.
Definition: VPlan.h:377
void setDebugLocFrom(DebugLoc DL)
Set the debug location in the builder using the debug location DL.
Definition: VPlan.cpp:376
void set(VPValue *Def, Value *V, bool IsScalar=false)
Set the generated vector Value for a given VPValue, if IsScalar is false.
Definition: VPlan.h:278
A recipe for widening load operations with vector-predication intrinsics, using the address to load f...
Definition: VPlan.h:3003
void execute(VPTransformState &State) override
Generate the wide load or gather.
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPWidenLoadEVLRecipe.
VPValue * getEVL() const
Return the EVL operand.
Definition: VPlan.h:3015
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
bool onlyFirstLaneUsed(const VPValue *Op) const override
Returns true if the recipe only uses the first lane of operand Op.
Definition: VPlan.h:3031
VPWidenLoadEVLRecipe(VPWidenLoadRecipe &L, VPValue &EVL, VPValue *Mask)
Definition: VPlan.h:3004
A recipe for widening load operations, using the address to load from and an optional mask.
Definition: VPlan.h:2964
VP_CLASSOF_IMPL(VPDef::VPWidenLoadSC)
VPWidenLoadRecipe(LoadInst &Load, VPValue *Addr, VPValue *Mask, bool Consecutive, bool Reverse, DebugLoc DL)
Definition: VPlan.h:2965
bool onlyFirstLaneUsed(const VPValue *Op) const override
Returns true if the recipe only uses the first lane of operand Op.
Definition: VPlan.h:2991
void execute(VPTransformState &State) override
Generate a wide load or gather.
VPWidenLoadRecipe * clone() override
Clone the current recipe.
Definition: VPlan.h:2973
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
A recipe for widening select instructions.
Definition: VPlan.h:1816
bool isInvariantCond() const
Definition: VPlan.h:1848
VPWidenSelectRecipe * clone() override
Clone the current recipe.
Definition: VPlan.h:1824
VPWidenSelectRecipe(SelectInst &I, iterator_range< IterT > Operands)
Definition: VPlan.h:1818
VPValue * getCond() const
Definition: VPlan.h:1844
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPWidenSelectRecipe.
void execute(VPTransformState &State) override
Produce a widened version of the select instruction.
~VPWidenSelectRecipe() override=default
A recipe for widening store operations with vector-predication intrinsics, using the value to store,...
Definition: VPlan.h:3083
VPValue * getStoredValue() const
Return the address accessed by this recipe.
Definition: VPlan.h:3094
void execute(VPTransformState &State) override
Generate the wide store or scatter.
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
bool onlyFirstLaneUsed(const VPValue *Op) const override
Returns true if the recipe only uses the first lane of operand Op.
Definition: VPlan.h:3113
VPWidenStoreEVLRecipe(VPWidenStoreRecipe &S, VPValue &EVL, VPValue *Mask)
Definition: VPlan.h:3084
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPWidenStoreEVLRecipe.
VPValue * getEVL() const
Return the EVL operand.
Definition: VPlan.h:3097
A recipe for widening store operations, using the stored value, the address to store to and an option...
Definition: VPlan.h:3042
void execute(VPTransformState &State) override
Generate a wide store or scatter.
bool onlyFirstLaneUsed(const VPValue *Op) const override
Returns true if the recipe only uses the first lane of operand Op.
Definition: VPlan.h:3071
VPWidenStoreRecipe(StoreInst &Store, VPValue *Addr, VPValue *StoredVal, VPValue *Mask, bool Consecutive, bool Reverse, DebugLoc DL)
Definition: VPlan.h:3043
VP_CLASSOF_IMPL(VPDef::VPWidenStoreSC)
VPValue * getStoredValue() const
Return the value stored by this recipe.
Definition: VPlan.h:3059
VPWidenStoreRecipe * clone() override
Clone the current recipe.
Definition: VPlan.h:3050
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
VPlanIngredient(const Value *V)
Definition: VPlan.h:4087
const Value * V
Definition: VPlan.h:4085
void print(raw_ostream &O) const
Definition: VPlan.cpp:1393