LLVM 17.0.0git
VPlan.h
Go to the documentation of this file.
1//===- VPlan.h - Represent A Vectorizer Plan --------------------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9/// \file
10/// This file contains the declarations of the Vectorization Plan base classes:
11/// 1. VPBasicBlock and VPRegionBlock that inherit from a common pure virtual
12/// VPBlockBase, together implementing a Hierarchical CFG;
13/// 2. Pure virtual VPRecipeBase serving as the base class for recipes contained
14/// within VPBasicBlocks;
15/// 3. VPInstruction, a concrete Recipe and VPUser modeling a single planned
16/// instruction;
17/// 4. The VPlan class holding a candidate for vectorization;
18/// 5. The VPlanPrinter class providing a way to print a plan in dot format;
19/// These are documented in docs/VectorizationPlan.rst.
20//
21//===----------------------------------------------------------------------===//
22
23#ifndef LLVM_TRANSFORMS_VECTORIZE_VPLAN_H
24#define LLVM_TRANSFORMS_VECTORIZE_VPLAN_H
25
26#include "VPlanValue.h"
27#include "llvm/ADT/DenseMap.h"
29#include "llvm/ADT/MapVector.h"
33#include "llvm/ADT/Twine.h"
34#include "llvm/ADT/ilist.h"
35#include "llvm/ADT/ilist_node.h"
38#include "llvm/IR/DebugLoc.h"
39#include "llvm/IR/FMF.h"
41#include <algorithm>
42#include <cassert>
43#include <cstddef>
44#include <string>
45
46namespace llvm {
47
48class BasicBlock;
49class DominatorTree;
50class InductionDescriptor;
51class InnerLoopVectorizer;
52class IRBuilderBase;
53class LoopInfo;
54class PredicateScalarEvolution;
55class raw_ostream;
56class RecurrenceDescriptor;
57class SCEV;
58class Type;
59class VPBasicBlock;
60class VPRegionBlock;
61class VPlan;
62class VPReplicateRecipe;
63class VPlanSlp;
64class Value;
65
66namespace Intrinsic {
67typedef unsigned ID;
68}
69
70/// Returns a calculation for the total number of elements for a given \p VF.
71/// For fixed width vectors this value is a constant, whereas for scalable
72/// vectors it is an expression determined at runtime.
73Value *getRuntimeVF(IRBuilderBase &B, Type *Ty, ElementCount VF);
74
75/// Return a value for Step multiplied by VF.
76Value *createStepForVF(IRBuilderBase &B, Type *Ty, ElementCount VF,
77 int64_t Step);
78
79const SCEV *createTripCountSCEV(Type *IdxTy, PredicatedScalarEvolution &PSE);
80
81/// A range of powers-of-2 vectorization factors with fixed start and
82/// adjustable end. The range includes start and excludes end, e.g.,:
83/// [1, 9) = {1, 2, 4, 8}
84struct VFRange {
85 // A power of 2.
87
88 // Need not be a power of 2. If End <= Start range is empty.
90
91 bool isEmpty() const {
93 }
94
96 : Start(Start), End(End) {
98 "Both Start and End should have the same scalable flag");
100 "Expected Start to be a power of 2");
101 }
102};
103
104using VPlanPtr = std::unique_ptr<VPlan>;
105
106/// In what follows, the term "input IR" refers to code that is fed into the
107/// vectorizer whereas the term "output IR" refers to code that is generated by
108/// the vectorizer.
109
110/// VPLane provides a way to access lanes in both fixed width and scalable
111/// vectors, where for the latter the lane index sometimes needs calculating
112/// as a runtime expression.
113class VPLane {
114public:
115 /// Kind describes how to interpret Lane.
116 enum class Kind : uint8_t {
117 /// For First, Lane is the index into the first N elements of a
118 /// fixed-vector <N x <ElTy>> or a scalable vector <vscale x N x <ElTy>>.
119 First,
120 /// For ScalableLast, Lane is the offset from the start of the last
121 /// N-element subvector in a scalable vector <vscale x N x <ElTy>>. For
122 /// example, a Lane of 0 corresponds to lane `(vscale - 1) * N`, a Lane of
123 /// 1 corresponds to `((vscale - 1) * N) + 1`, etc.
125 };
126
127private:
128 /// in [0..VF)
129 unsigned Lane;
130
131 /// Indicates how the Lane should be interpreted, as described above.
132 Kind LaneKind;
133
134public:
135 VPLane(unsigned Lane, Kind LaneKind) : Lane(Lane), LaneKind(LaneKind) {}
136
138
140 unsigned LaneOffset = VF.getKnownMinValue() - 1;
141 Kind LaneKind;
142 if (VF.isScalable())
143 // In this case 'LaneOffset' refers to the offset from the start of the
144 // last subvector with VF.getKnownMinValue() elements.
146 else
147 LaneKind = VPLane::Kind::First;
148 return VPLane(LaneOffset, LaneKind);
149 }
150
151 /// Returns a compile-time known value for the lane index and asserts if the
152 /// lane can only be calculated at runtime.
153 unsigned getKnownLane() const {
154 assert(LaneKind == Kind::First);
155 return Lane;
156 }
157
158 /// Returns an expression describing the lane index that can be used at
159 /// runtime.
160 Value *getAsRuntimeExpr(IRBuilderBase &Builder, const ElementCount &VF) const;
161
162 /// Returns the Kind of lane offset.
163 Kind getKind() const { return LaneKind; }
164
165 /// Returns true if this is the first lane of the whole vector.
166 bool isFirstLane() const { return Lane == 0 && LaneKind == Kind::First; }
167
168 /// Maps the lane to a cache index based on \p VF.
169 unsigned mapToCacheIndex(const ElementCount &VF) const {
170 switch (LaneKind) {
172 assert(VF.isScalable() && Lane < VF.getKnownMinValue());
173 return VF.getKnownMinValue() + Lane;
174 default:
175 assert(Lane < VF.getKnownMinValue());
176 return Lane;
177 }
178 }
179
180 /// Returns the maxmimum number of lanes that we are able to consider
181 /// caching for \p VF.
182 static unsigned getNumCachedLanes(const ElementCount &VF) {
183 return VF.getKnownMinValue() * (VF.isScalable() ? 2 : 1);
184 }
185};
186
187/// VPIteration represents a single point in the iteration space of the output
188/// (vectorized and/or unrolled) IR loop.
190 /// in [0..UF)
191 unsigned Part;
192
194
195 VPIteration(unsigned Part, unsigned Lane,
197 : Part(Part), Lane(Lane, Kind) {}
198
199 VPIteration(unsigned Part, const VPLane &Lane) : Part(Part), Lane(Lane) {}
200
201 bool isFirstIteration() const { return Part == 0 && Lane.isFirstLane(); }
202};
203
204/// VPTransformState holds information passed down when "executing" a VPlan,
205/// needed for generating the output IR.
210 : VF(VF), UF(UF), LI(LI), DT(DT), Builder(Builder), ILV(ILV), Plan(Plan),
211 LVer(nullptr) {}
212
213 /// The chosen Vectorization and Unroll Factors of the loop being vectorized.
215 unsigned UF;
216
217 /// Hold the indices to generate specific scalar instructions. Null indicates
218 /// that all instances are to be generated, using either scalar or vector
219 /// instructions.
220 std::optional<VPIteration> Instance;
221
222 struct DataState {
223 /// A type for vectorized values in the new loop. Each value from the
224 /// original loop, when vectorized, is represented by UF vector values in
225 /// the new unrolled loop, where UF is the unroll factor.
227
229
233
234 /// Get the generated Value for a given VPValue and a given Part. Note that
235 /// as some Defs are still created by ILV and managed in its ValueMap, this
236 /// method will delegate the call to ILV in such cases in order to provide
237 /// callers a consistent API.
238 /// \see set.
239 Value *get(VPValue *Def, unsigned Part);
240
241 /// Get the generated Value for a given VPValue and given Part and Lane.
242 Value *get(VPValue *Def, const VPIteration &Instance);
243
244 bool hasVectorValue(VPValue *Def, unsigned Part) {
245 auto I = Data.PerPartOutput.find(Def);
246 return I != Data.PerPartOutput.end() && Part < I->second.size() &&
247 I->second[Part];
248 }
249
250 bool hasAnyVectorValue(VPValue *Def) const {
251 return Data.PerPartOutput.find(Def) != Data.PerPartOutput.end();
252 }
253
255 auto I = Data.PerPartScalars.find(Def);
256 if (I == Data.PerPartScalars.end())
257 return false;
258 unsigned CacheIdx = Instance.Lane.mapToCacheIndex(VF);
259 return Instance.Part < I->second.size() &&
260 CacheIdx < I->second[Instance.Part].size() &&
261 I->second[Instance.Part][CacheIdx];
262 }
263
264 /// Set the generated Value for a given VPValue and a given Part.
265 void set(VPValue *Def, Value *V, unsigned Part) {
266 if (!Data.PerPartOutput.count(Def)) {
268 Data.PerPartOutput[Def] = Entry;
269 }
270 Data.PerPartOutput[Def][Part] = V;
271 }
272 /// Reset an existing vector value for \p Def and a given \p Part.
273 void reset(VPValue *Def, Value *V, unsigned Part) {
274 auto Iter = Data.PerPartOutput.find(Def);
275 assert(Iter != Data.PerPartOutput.end() &&
276 "need to overwrite existing value");
277 Iter->second[Part] = V;
278 }
279
280 /// Set the generated scalar \p V for \p Def and the given \p Instance.
281 void set(VPValue *Def, Value *V, const VPIteration &Instance) {
282 auto Iter = Data.PerPartScalars.insert({Def, {}});
283 auto &PerPartVec = Iter.first->second;
284 while (PerPartVec.size() <= Instance.Part)
285 PerPartVec.emplace_back();
286 auto &Scalars = PerPartVec[Instance.Part];
287 unsigned CacheIdx = Instance.Lane.mapToCacheIndex(VF);
288 while (Scalars.size() <= CacheIdx)
289 Scalars.push_back(nullptr);
290 assert(!Scalars[CacheIdx] && "should overwrite existing value");
291 Scalars[CacheIdx] = V;
292 }
293
294 /// Reset an existing scalar value for \p Def and a given \p Instance.
295 void reset(VPValue *Def, Value *V, const VPIteration &Instance) {
296 auto Iter = Data.PerPartScalars.find(Def);
297 assert(Iter != Data.PerPartScalars.end() &&
298 "need to overwrite existing value");
299 assert(Instance.Part < Iter->second.size() &&
300 "need to overwrite existing value");
301 unsigned CacheIdx = Instance.Lane.mapToCacheIndex(VF);
302 assert(CacheIdx < Iter->second[Instance.Part].size() &&
303 "need to overwrite existing value");
304 Iter->second[Instance.Part][CacheIdx] = V;
305 }
306
307 /// Add additional metadata to \p To that was not present on \p Orig.
308 ///
309 /// Currently this is used to add the noalias annotations based on the
310 /// inserted memchecks. Use this for instructions that are *cloned* into the
311 /// vector loop.
312 void addNewMetadata(Instruction *To, const Instruction *Orig);
313
314 /// Add metadata from one instruction to another.
315 ///
316 /// This includes both the original MDs from \p From and additional ones (\see
317 /// addNewMetadata). Use this for *newly created* instructions in the vector
318 /// loop.
320
321 /// Similar to the previous function but it adds the metadata to a
322 /// vector of instructions.
324
325 /// Set the debug location in the builder using the debug location in \p V.
326 void setDebugLocFromInst(const Value *V);
327
328 /// Hold state information used when constructing the CFG of the output IR,
329 /// traversing the VPBasicBlocks and generating corresponding IR BasicBlocks.
330 struct CFGState {
331 /// The previous VPBasicBlock visited. Initially set to null.
333
334 /// The previous IR BasicBlock created or used. Initially set to the new
335 /// header BasicBlock.
336 BasicBlock *PrevBB = nullptr;
337
338 /// The last IR BasicBlock in the output IR. Set to the exit block of the
339 /// vector loop.
340 BasicBlock *ExitBB = nullptr;
341
342 /// A mapping of each VPBasicBlock to the corresponding BasicBlock. In case
343 /// of replication, maps the BasicBlock of the last replica created.
345
346 CFGState() = default;
347
348 /// Returns the BasicBlock* mapped to the pre-header of the loop region
349 /// containing \p R.
352
353 /// Hold a pointer to LoopInfo to register new basic blocks in the loop.
355
356 /// Hold a pointer to Dominator Tree to register new basic blocks in the loop.
358
359 /// Hold a reference to the IRBuilder used to generate output IR code.
361
363
364 /// Hold the canonical scalar IV of the vector loop (start=0, step=VF*UF).
365 Value *CanonicalIV = nullptr;
366
367 /// Hold a pointer to InnerLoopVectorizer to reuse its IR generation methods.
369
370 /// Pointer to the VPlan code is generated for.
372
373 /// Holds recipes that may generate a poison value that is used after
374 /// vectorization, even when their operands are not poison.
376
377 /// The loop object for the current parent region, or nullptr.
379
380 /// LoopVersioning. It's only set up (non-null) if memchecks were
381 /// used.
382 ///
383 /// This is currently only used to add no-alias metadata based on the
384 /// memchecks. The actually versioning is performed manually.
385 std::unique_ptr<LoopVersioning> LVer;
386};
387
388/// VPBlockBase is the building block of the Hierarchical Control-Flow Graph.
389/// A VPBlockBase can be either a VPBasicBlock or a VPRegionBlock.
391 friend class VPBlockUtils;
392
393 const unsigned char SubclassID; ///< Subclass identifier (for isa/dyn_cast).
394
395 /// An optional name for the block.
396 std::string Name;
397
398 /// The immediate VPRegionBlock which this VPBlockBase belongs to, or null if
399 /// it is a topmost VPBlockBase.
400 VPRegionBlock *Parent = nullptr;
401
402 /// List of predecessor blocks.
404
405 /// List of successor blocks.
407
408 /// VPlan containing the block. Can only be set on the entry block of the
409 /// plan.
410 VPlan *Plan = nullptr;
411
412 /// Add \p Successor as the last successor to this block.
413 void appendSuccessor(VPBlockBase *Successor) {
414 assert(Successor && "Cannot add nullptr successor!");
415 Successors.push_back(Successor);
416 }
417
418 /// Add \p Predecessor as the last predecessor to this block.
419 void appendPredecessor(VPBlockBase *Predecessor) {
420 assert(Predecessor && "Cannot add nullptr predecessor!");
421 Predecessors.push_back(Predecessor);
422 }
423
424 /// Remove \p Predecessor from the predecessors of this block.
425 void removePredecessor(VPBlockBase *Predecessor) {
426 auto Pos = find(Predecessors, Predecessor);
427 assert(Pos && "Predecessor does not exist");
428 Predecessors.erase(Pos);
429 }
430
431 /// Remove \p Successor from the successors of this block.
432 void removeSuccessor(VPBlockBase *Successor) {
433 auto Pos = find(Successors, Successor);
434 assert(Pos && "Successor does not exist");
435 Successors.erase(Pos);
436 }
437
438protected:
439 VPBlockBase(const unsigned char SC, const std::string &N)
440 : SubclassID(SC), Name(N) {}
441
442public:
443 /// An enumeration for keeping track of the concrete subclass of VPBlockBase
444 /// that are actually instantiated. Values of this enumeration are kept in the
445 /// SubclassID field of the VPBlockBase objects. They are used for concrete
446 /// type identification.
447 using VPBlockTy = enum { VPBasicBlockSC, VPRegionBlockSC };
448
450
451 virtual ~VPBlockBase() = default;
452
453 const std::string &getName() const { return Name; }
454
455 void setName(const Twine &newName) { Name = newName.str(); }
456
457 /// \return an ID for the concrete type of this object.
458 /// This is used to implement the classof checks. This should not be used
459 /// for any other purpose, as the values may change as LLVM evolves.
460 unsigned getVPBlockID() const { return SubclassID; }
461
462 VPRegionBlock *getParent() { return Parent; }
463 const VPRegionBlock *getParent() const { return Parent; }
464
465 /// \return A pointer to the plan containing the current block.
466 VPlan *getPlan();
467 const VPlan *getPlan() const;
468
469 /// Sets the pointer of the plan containing the block. The block must be the
470 /// entry block into the VPlan.
471 void setPlan(VPlan *ParentPlan);
472
473 void setParent(VPRegionBlock *P) { Parent = P; }
474
475 /// \return the VPBasicBlock that is the entry of this VPBlockBase,
476 /// recursively, if the latter is a VPRegionBlock. Otherwise, if this
477 /// VPBlockBase is a VPBasicBlock, it is returned.
478 const VPBasicBlock *getEntryBasicBlock() const;
480
481 /// \return the VPBasicBlock that is the exiting this VPBlockBase,
482 /// recursively, if the latter is a VPRegionBlock. Otherwise, if this
483 /// VPBlockBase is a VPBasicBlock, it is returned.
484 const VPBasicBlock *getExitingBasicBlock() const;
486
487 const VPBlocksTy &getSuccessors() const { return Successors; }
488 VPBlocksTy &getSuccessors() { return Successors; }
489
491
492 const VPBlocksTy &getPredecessors() const { return Predecessors; }
493 VPBlocksTy &getPredecessors() { return Predecessors; }
494
495 /// \return the successor of this VPBlockBase if it has a single successor.
496 /// Otherwise return a null pointer.
498 return (Successors.size() == 1 ? *Successors.begin() : nullptr);
499 }
500
501 /// \return the predecessor of this VPBlockBase if it has a single
502 /// predecessor. Otherwise return a null pointer.
504 return (Predecessors.size() == 1 ? *Predecessors.begin() : nullptr);
505 }
506
507 size_t getNumSuccessors() const { return Successors.size(); }
508 size_t getNumPredecessors() const { return Predecessors.size(); }
509
510 /// An Enclosing Block of a block B is any block containing B, including B
511 /// itself. \return the closest enclosing block starting from "this", which
512 /// has successors. \return the root enclosing block if all enclosing blocks
513 /// have no successors.
515
516 /// \return the closest enclosing block starting from "this", which has
517 /// predecessors. \return the root enclosing block if all enclosing blocks
518 /// have no predecessors.
520
521 /// \return the successors either attached directly to this VPBlockBase or, if
522 /// this VPBlockBase is the exit block of a VPRegionBlock and has no
523 /// successors of its own, search recursively for the first enclosing
524 /// VPRegionBlock that has successors and return them. If no such
525 /// VPRegionBlock exists, return the (empty) successors of the topmost
526 /// VPBlockBase reached.
529 }
530
531 /// \return the hierarchical successor of this VPBlockBase if it has a single
532 /// hierarchical successor. Otherwise return a null pointer.
535 }
536
537 /// \return the predecessors either attached directly to this VPBlockBase or,
538 /// if this VPBlockBase is the entry block of a VPRegionBlock and has no
539 /// predecessors of its own, search recursively for the first enclosing
540 /// VPRegionBlock that has predecessors and return them. If no such
541 /// VPRegionBlock exists, return the (empty) predecessors of the topmost
542 /// VPBlockBase reached.
545 }
546
547 /// \return the hierarchical predecessor of this VPBlockBase if it has a
548 /// single hierarchical predecessor. Otherwise return a null pointer.
551 }
552
553 /// Set a given VPBlockBase \p Successor as the single successor of this
554 /// VPBlockBase. This VPBlockBase is not added as predecessor of \p Successor.
555 /// This VPBlockBase must have no successors.
557 assert(Successors.empty() && "Setting one successor when others exist.");
558 appendSuccessor(Successor);
559 }
560
561 /// Set two given VPBlockBases \p IfTrue and \p IfFalse to be the two
562 /// successors of this VPBlockBase. This VPBlockBase is not added as
563 /// predecessor of \p IfTrue or \p IfFalse. This VPBlockBase must have no
564 /// successors.
565 void setTwoSuccessors(VPBlockBase *IfTrue, VPBlockBase *IfFalse) {
566 assert(Successors.empty() && "Setting two successors when others exist.");
567 appendSuccessor(IfTrue);
568 appendSuccessor(IfFalse);
569 }
570
571 /// Set each VPBasicBlock in \p NewPreds as predecessor of this VPBlockBase.
572 /// This VPBlockBase must have no predecessors. This VPBlockBase is not added
573 /// as successor of any VPBasicBlock in \p NewPreds.
575 assert(Predecessors.empty() && "Block predecessors already set.");
576 for (auto *Pred : NewPreds)
577 appendPredecessor(Pred);
578 }
579
580 /// Remove all the predecessor of this block.
581 void clearPredecessors() { Predecessors.clear(); }
582
583 /// Remove all the successors of this block.
584 void clearSuccessors() { Successors.clear(); }
585
586 /// The method which generates the output IR that correspond to this
587 /// VPBlockBase, thereby "executing" the VPlan.
588 virtual void execute(VPTransformState *State) = 0;
589
590 /// Delete all blocks reachable from a given VPBlockBase, inclusive.
591 static void deleteCFG(VPBlockBase *Entry);
592
593 /// Return true if it is legal to hoist instructions into this block.
595 // There are currently no constraints that prevent an instruction to be
596 // hoisted into a VPBlockBase.
597 return true;
598 }
599
600 /// Replace all operands of VPUsers in the block with \p NewValue and also
601 /// replaces all uses of VPValues defined in the block with NewValue.
602 virtual void dropAllReferences(VPValue *NewValue) = 0;
603
604#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
605 void printAsOperand(raw_ostream &OS, bool PrintType) const {
606 OS << getName();
607 }
608
609 /// Print plain-text dump of this VPBlockBase to \p O, prefixing all lines
610 /// with \p Indent. \p SlotTracker is used to print unnamed VPValue's using
611 /// consequtive numbers.
612 ///
613 /// Note that the numbering is applied to the whole VPlan, so printing
614 /// individual blocks is consistent with the whole VPlan printing.
615 virtual void print(raw_ostream &O, const Twine &Indent,
616 VPSlotTracker &SlotTracker) const = 0;
617
618 /// Print plain-text dump of this VPlan to \p O.
619 void print(raw_ostream &O) const {
621 print(O, "", SlotTracker);
622 }
623
624 /// Print the successors of this block to \p O, prefixing all lines with \p
625 /// Indent.
626 void printSuccessors(raw_ostream &O, const Twine &Indent) const;
627
628 /// Dump this VPBlockBase to dbgs().
629 LLVM_DUMP_METHOD void dump() const { print(dbgs()); }
630#endif
631};
632
633/// A value that is used outside the VPlan. The operand of the user needs to be
634/// added to the associated LCSSA phi node.
635class VPLiveOut : public VPUser {
636 PHINode *Phi;
637
638public:
640 : VPUser({Op}, VPUser::VPUserID::LiveOut), Phi(Phi) {}
641
642 /// Fixup the wrapped LCSSA phi node in the unique exit block. This simply
643 /// means we need to add the appropriate incoming value from the middle
644 /// block as exiting edges from the scalar epilogue loop (if present) are
645 /// already in place, and we exit the vector loop exclusively to the middle
646 /// block.
647 void fixPhi(VPlan &Plan, VPTransformState &State);
648
649 /// Returns true if the VPLiveOut uses scalars of operand \p Op.
650 bool usesScalars(const VPValue *Op) const override {
652 "Op must be an operand of the recipe");
653 return true;
654 }
655
656 PHINode *getPhi() const { return Phi; }
657};
658
659/// VPRecipeBase is a base class modeling a sequence of one or more output IR
660/// instructions. VPRecipeBase owns the the VPValues it defines through VPDef
661/// and is responsible for deleting its defined values. Single-value
662/// VPRecipeBases that also inherit from VPValue must make sure to inherit from
663/// VPRecipeBase before VPValue.
664class VPRecipeBase : public ilist_node_with_parent<VPRecipeBase, VPBasicBlock>,
665 public VPDef,
666 public VPUser {
667 friend VPBasicBlock;
668 friend class VPBlockUtils;
669
670 /// Each VPRecipe belongs to a single VPBasicBlock.
671 VPBasicBlock *Parent = nullptr;
672
673public:
676
677 template <typename IterT>
680 virtual ~VPRecipeBase() = default;
681
682 /// \return the VPBasicBlock which this VPRecipe belongs to.
683 VPBasicBlock *getParent() { return Parent; }
684 const VPBasicBlock *getParent() const { return Parent; }
685
686 /// The method which generates the output IR instructions that correspond to
687 /// this VPRecipe, thereby "executing" the VPlan.
688 virtual void execute(VPTransformState &State) = 0;
689
690 /// Insert an unlinked recipe into a basic block immediately before
691 /// the specified recipe.
692 void insertBefore(VPRecipeBase *InsertPos);
693 /// Insert an unlinked recipe into \p BB immediately before the insertion
694 /// point \p IP;
696
697 /// Insert an unlinked Recipe into a basic block immediately after
698 /// the specified Recipe.
699 void insertAfter(VPRecipeBase *InsertPos);
700
701 /// Unlink this recipe from its current VPBasicBlock and insert it into
702 /// the VPBasicBlock that MovePos lives in, right after MovePos.
703 void moveAfter(VPRecipeBase *MovePos);
704
705 /// Unlink this recipe and insert into BB before I.
706 ///
707 /// \pre I is a valid iterator into BB.
709
710 /// This method unlinks 'this' from the containing basic block, but does not
711 /// delete it.
712 void removeFromParent();
713
714 /// This method unlinks 'this' from the containing basic block and deletes it.
715 ///
716 /// \returns an iterator pointing to the element after the erased one
718
719 /// Returns the underlying instruction, if the recipe is a VPValue or nullptr
720 /// otherwise.
722 return cast<Instruction>(getVPSingleValue()->getUnderlyingValue());
723 }
725 return cast<Instruction>(getVPSingleValue()->getUnderlyingValue());
726 }
727
728 /// Method to support type inquiry through isa, cast, and dyn_cast.
729 static inline bool classof(const VPDef *D) {
730 // All VPDefs are also VPRecipeBases.
731 return true;
732 }
733
734 static inline bool classof(const VPUser *U) {
736 }
737
738 /// Returns true if the recipe may have side-effects.
739 bool mayHaveSideEffects() const;
740
741 /// Returns true for PHI-like recipes.
742 bool isPhi() const {
743 return getVPDefID() >= VPFirstPHISC && getVPDefID() <= VPLastPHISC;
744 }
745
746 /// Returns true if the recipe may read from memory.
747 bool mayReadFromMemory() const;
748
749 /// Returns true if the recipe may write to memory.
750 bool mayWriteToMemory() const;
751
752 /// Returns true if the recipe may read from or write to memory.
753 bool mayReadOrWriteMemory() const {
755 }
756};
757
758// Helper macro to define common classof implementations for recipes.
759#define VP_CLASSOF_IMPL(VPDefID) \
760 static inline bool classof(const VPDef *D) { \
761 return D->getVPDefID() == VPDefID; \
762 } \
763 static inline bool classof(const VPValue *V) { \
764 auto *R = V->getDefiningRecipe(); \
765 return R && R->getVPDefID() == VPDefID; \
766 } \
767 static inline bool classof(const VPUser *U) { \
768 auto *R = dyn_cast<VPRecipeBase>(U); \
769 return R && R->getVPDefID() == VPDefID; \
770 } \
771 static inline bool classof(const VPRecipeBase *R) { \
772 return R->getVPDefID() == VPDefID; \
773 }
774
775/// This is a concrete Recipe that models a single VPlan-level instruction.
776/// While as any Recipe it may generate a sequence of IR instructions when
777/// executed, these instructions would always form a single-def expression as
778/// the VPInstruction is also a single def-use vertex.
779class VPInstruction : public VPRecipeBase, public VPValue {
780 friend class VPlanSlp;
781
782public:
783 /// VPlan opcodes, extending LLVM IR with idiomatics instructions.
784 enum {
786 Instruction::OtherOpsEnd + 1, // Combines the incoming and previous
787 // values of a first-order recurrence.
795 // The next two are similar to the above, but instead increment the
796 // canonical IV separately for each unrolled part.
801 };
802
803private:
804 typedef unsigned char OpcodeTy;
805 OpcodeTy Opcode;
806 FastMathFlags FMF;
807 DebugLoc DL;
808
809 /// An optional name that can be used for the generated IR instruction.
810 const std::string Name;
811
812 /// Utility method serving execute(): generates a single instance of the
813 /// modeled instruction.
814 void generateInstruction(VPTransformState &State, unsigned Part);
815
816protected:
818
819public:
821 const Twine &Name = "")
822 : VPRecipeBase(VPDef::VPInstructionSC, Operands), VPValue(this),
823 Opcode(Opcode), DL(DL), Name(Name.str()) {}
824
825 VPInstruction(unsigned Opcode, std::initializer_list<VPValue *> Operands,
826 DebugLoc DL = {}, const Twine &Name = "")
828
829 VP_CLASSOF_IMPL(VPDef::VPInstructionSC)
830
833 return new VPInstruction(Opcode, Operands, DL, Name);
834 }
835
836 unsigned getOpcode() const { return Opcode; }
837
838 /// Generate the instruction.
839 /// TODO: We currently execute only per-part unless a specific instance is
840 /// provided.
841 void execute(VPTransformState &State) override;
842
843#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
844 /// Print the VPInstruction to \p O.
845 void print(raw_ostream &O, const Twine &Indent,
846 VPSlotTracker &SlotTracker) const override;
847
848 /// Print the VPInstruction to dbgs() (for debugging).
849 LLVM_DUMP_METHOD void dump() const;
850#endif
851
852 /// Return true if this instruction may modify memory.
853 bool mayWriteToMemory() const {
854 // TODO: we can use attributes of the called function to rule out memory
855 // modifications.
856 return Opcode == Instruction::Store || Opcode == Instruction::Call ||
857 Opcode == Instruction::Invoke || Opcode == SLPStore;
858 }
859
860 bool hasResult() const {
861 // CallInst may or may not have a result, depending on the called function.
862 // Conservatively return calls have results for now.
863 switch (getOpcode()) {
864 case Instruction::Ret:
865 case Instruction::Br:
866 case Instruction::Store:
867 case Instruction::Switch:
868 case Instruction::IndirectBr:
869 case Instruction::Resume:
870 case Instruction::CatchRet:
871 case Instruction::Unreachable:
872 case Instruction::Fence:
873 case Instruction::AtomicRMW:
876 return false;
877 default:
878 return true;
879 }
880 }
881
882 /// Set the fast-math flags.
883 void setFastMathFlags(FastMathFlags FMFNew);
884
885 /// Returns true if the recipe only uses the first lane of operand \p Op.
886 bool onlyFirstLaneUsed(const VPValue *Op) const override {
888 "Op must be an operand of the recipe");
889 if (getOperand(0) != Op)
890 return false;
891 switch (getOpcode()) {
892 default:
893 return false;
900 return true;
901 };
902 llvm_unreachable("switch should return");
903 }
904};
905
906/// VPWidenRecipe is a recipe for producing a copy of vector type its
907/// ingredient. This recipe covers most of the traditional vectorization cases
908/// where each ingredient transforms into a vectorized version of itself.
909class VPWidenRecipe : public VPRecipeBase, public VPValue {
910public:
911 template <typename IterT>
913 : VPRecipeBase(VPDef::VPWidenSC, Operands), VPValue(this, &I) {}
914
915 ~VPWidenRecipe() override = default;
916
917 VP_CLASSOF_IMPL(VPDef::VPWidenSC)
918
919 /// Produce widened copies of all Ingredients.
920 void execute(VPTransformState &State) override;
921
922#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
923 /// Print the recipe.
924 void print(raw_ostream &O, const Twine &Indent,
925 VPSlotTracker &SlotTracker) const override;
926#endif
927};
928
929/// A recipe for widening Call instructions.
930class VPWidenCallRecipe : public VPRecipeBase, public VPValue {
931 /// ID of the vector intrinsic to call when widening the call. If set the
932 /// Intrinsic::not_intrinsic, a library call will be used instead.
933 Intrinsic::ID VectorIntrinsicID;
934
935public:
936 template <typename IterT>
938 Intrinsic::ID VectorIntrinsicID)
939 : VPRecipeBase(VPDef::VPWidenCallSC, CallArguments), VPValue(this, &I),
940 VectorIntrinsicID(VectorIntrinsicID) {}
941
942 ~VPWidenCallRecipe() override = default;
943
944 VP_CLASSOF_IMPL(VPDef::VPWidenCallSC)
945
946 /// Produce a widened version of the call instruction.
947 void execute(VPTransformState &State) override;
948
949#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
950 /// Print the recipe.
951 void print(raw_ostream &O, const Twine &Indent,
952 VPSlotTracker &SlotTracker) const override;
953#endif
954};
955
956/// A recipe for widening select instructions.
958
959 /// Is the condition of the select loop invariant?
960 bool InvariantCond;
961
962public:
963 template <typename IterT>
965 bool InvariantCond)
966 : VPRecipeBase(VPDef::VPWidenSelectSC, Operands), VPValue(this, &I),
967 InvariantCond(InvariantCond) {}
968
969 ~VPWidenSelectRecipe() override = default;
970
971 VP_CLASSOF_IMPL(VPDef::VPWidenSelectSC)
972
973 /// Produce a widened version of the select instruction.
974 void execute(VPTransformState &State) override;
975
976#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
977 /// Print the recipe.
978 void print(raw_ostream &O, const Twine &Indent,
979 VPSlotTracker &SlotTracker) const override;
980#endif
981};
982
983/// A recipe for handling GEP instructions.
984class VPWidenGEPRecipe : public VPRecipeBase, public VPValue {
985 bool IsPtrLoopInvariant;
986 SmallBitVector IsIndexLoopInvariant;
987
988public:
989 template <typename IterT>
991 : VPRecipeBase(VPDef::VPWidenGEPSC, Operands), VPValue(this, GEP),
992 IsIndexLoopInvariant(GEP->getNumIndices(), false) {}
993
994 template <typename IterT>
996 Loop *OrigLoop)
997 : VPRecipeBase(VPDef::VPWidenGEPSC, Operands), VPValue(this, GEP),
998 IsIndexLoopInvariant(GEP->getNumIndices(), false) {
999 IsPtrLoopInvariant = OrigLoop->isLoopInvariant(GEP->getPointerOperand());
1000 for (auto Index : enumerate(GEP->indices()))
1001 IsIndexLoopInvariant[Index.index()] =
1002 OrigLoop->isLoopInvariant(Index.value().get());
1003 }
1004 ~VPWidenGEPRecipe() override = default;
1005
1006 VP_CLASSOF_IMPL(VPDef::VPWidenGEPSC)
1007
1008 /// Generate the gep nodes.
1009 void execute(VPTransformState &State) override;
1010
1011#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1012 /// Print the recipe.
1013 void print(raw_ostream &O, const Twine &Indent,
1014 VPSlotTracker &SlotTracker) const override;
1015#endif
1016};
1017
1018/// A recipe for handling phi nodes of integer and floating-point inductions,
1019/// producing their vector values.
1021 PHINode *IV;
1022 const InductionDescriptor &IndDesc;
1023 bool NeedsVectorIV;
1024
1025public:
1027 const InductionDescriptor &IndDesc,
1028 bool NeedsVectorIV)
1029 : VPRecipeBase(VPDef::VPWidenIntOrFpInductionSC, {Start, Step}),
1030 VPValue(this, IV), IV(IV), IndDesc(IndDesc),
1031 NeedsVectorIV(NeedsVectorIV) {}
1032
1034 const InductionDescriptor &IndDesc,
1035 TruncInst *Trunc, bool NeedsVectorIV)
1036 : VPRecipeBase(VPDef::VPWidenIntOrFpInductionSC, {Start, Step}),
1037 VPValue(this, Trunc), IV(IV), IndDesc(IndDesc),
1038 NeedsVectorIV(NeedsVectorIV) {}
1039
1041
1042 VP_CLASSOF_IMPL(VPDef::VPWidenIntOrFpInductionSC)
1043
1044 /// Generate the vectorized and scalarized versions of the phi node as
1045 /// needed by their users.
1046 void execute(VPTransformState &State) override;
1047
1048#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1049 /// Print the recipe.
1050 void print(raw_ostream &O, const Twine &Indent,
1051 VPSlotTracker &SlotTracker) const override;
1052#endif
1053
1054 /// Returns the start value of the induction.
1056 const VPValue *getStartValue() const { return getOperand(0); }
1057
1058 /// Returns the step value of the induction.
1060 const VPValue *getStepValue() const { return getOperand(1); }
1061
1062 /// Returns the first defined value as TruncInst, if it is one or nullptr
1063 /// otherwise.
1065 return dyn_cast_or_null<TruncInst>(getVPValue(0)->getUnderlyingValue());
1066 }
1067 const TruncInst *getTruncInst() const {
1068 return dyn_cast_or_null<TruncInst>(getVPValue(0)->getUnderlyingValue());
1069 }
1070
1071 PHINode *getPHINode() { return IV; }
1072
1073 /// Returns the induction descriptor for the recipe.
1074 const InductionDescriptor &getInductionDescriptor() const { return IndDesc; }
1075
1076 /// Returns true if the induction is canonical, i.e. starting at 0 and
1077 /// incremented by UF * VF (= the original IV is incremented by 1).
1078 bool isCanonical() const;
1079
1080 /// Returns the scalar type of the induction.
1081 const Type *getScalarType() const {
1082 const TruncInst *TruncI = getTruncInst();
1083 return TruncI ? TruncI->getType() : IV->getType();
1084 }
1085
1086 /// Returns true if a vector phi needs to be created for the induction.
1087 bool needsVectorIV() const { return NeedsVectorIV; }
1088};
1089
1090/// A pure virtual base class for all recipes modeling header phis, including
1091/// phis for first order recurrences, pointer inductions and reductions. The
1092/// start value is the first operand of the recipe and the incoming value from
1093/// the backedge is the second operand.
1094///
1095/// Inductions are modeled using the following sub-classes:
1096/// * VPCanonicalIVPHIRecipe: Canonical scalar induction of the vector loop,
1097/// starting at a specified value (zero for the main vector loop, the resume
1098/// value for the epilogue vector loop) and stepping by 1. The induction
1099/// controls exiting of the vector loop by comparing against the vector trip
1100/// count. Produces a single scalar PHI for the induction value per
1101/// iteration.
1102/// * VPWidenIntOrFpInductionRecipe: Generates vector values for integer and
1103/// floating point inductions with arbitrary start and step values. Produces
1104/// a vector PHI per-part.
1105/// * VPDerivedIVRecipe: Converts the canonical IV value to the corresponding
1106/// value of an IV with different start and step values. Produces a single
1107/// scalar value per iteration
1108/// * VPScalarIVStepsRecipe: Generates scalar values per-lane based on a
1109/// canonical or derived induction.
1110/// * VPWidenPointerInductionRecipe: Generate vector and scalar values for a
1111/// pointer induction. Produces either a vector PHI per-part or scalar values
1112/// per-lane based on the canonical induction.
1114protected:
1115 VPHeaderPHIRecipe(unsigned char VPDefID, PHINode *Phi,
1116 VPValue *Start = nullptr)
1117 : VPRecipeBase(VPDefID, {}), VPValue(this, Phi) {
1118 if (Start)
1119 addOperand(Start);
1120 }
1121
1122public:
1123 ~VPHeaderPHIRecipe() override = default;
1124
1125 /// Method to support type inquiry through isa, cast, and dyn_cast.
1126 static inline bool classof(const VPRecipeBase *B) {
1127 return B->getVPDefID() >= VPDef::VPFirstHeaderPHISC &&
1128 B->getVPDefID() <= VPDef::VPLastPHISC;
1129 }
1130 static inline bool classof(const VPValue *V) {
1131 auto *B = V->getDefiningRecipe();
1132 return B && B->getVPDefID() >= VPRecipeBase::VPFirstHeaderPHISC &&
1133 B->getVPDefID() <= VPRecipeBase::VPLastPHISC;
1134 }
1135
1136 /// Generate the phi nodes.
1137 void execute(VPTransformState &State) override = 0;
1138
1139#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1140 /// Print the recipe.
1141 void print(raw_ostream &O, const Twine &Indent,
1142 VPSlotTracker &SlotTracker) const override = 0;
1143#endif
1144
1145 /// Returns the start value of the phi, if one is set.
1147 return getNumOperands() == 0 ? nullptr : getOperand(0);
1148 }
1150 return getNumOperands() == 0 ? nullptr : getOperand(0);
1151 }
1152
1153 /// Update the start value of the recipe.
1155
1156 /// Returns the incoming value from the loop backedge.
1158 return getOperand(1);
1159 }
1160
1161 /// Returns the backedge value as a recipe. The backedge value is guaranteed
1162 /// to be a recipe.
1165 }
1166};
1167
1169 const InductionDescriptor &IndDesc;
1170
1171 bool IsScalarAfterVectorization;
1172
1173public:
1174 /// Create a new VPWidenPointerInductionRecipe for \p Phi with start value \p
1175 /// Start.
1177 const InductionDescriptor &IndDesc,
1178 bool IsScalarAfterVectorization)
1179 : VPHeaderPHIRecipe(VPDef::VPWidenPointerInductionSC, Phi),
1180 IndDesc(IndDesc),
1181 IsScalarAfterVectorization(IsScalarAfterVectorization) {
1182 addOperand(Start);
1183 addOperand(Step);
1184 }
1185
1187
1188 VP_CLASSOF_IMPL(VPDef::VPWidenPointerInductionSC)
1189
1190 /// Generate vector values for the pointer induction.
1191 void execute(VPTransformState &State) override;
1192
1193 /// Returns true if only scalar values will be generated.
1195
1196 /// Returns the induction descriptor for the recipe.
1197 const InductionDescriptor &getInductionDescriptor() const { return IndDesc; }
1198
1199#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1200 /// Print the recipe.
1201 void print(raw_ostream &O, const Twine &Indent,
1202 VPSlotTracker &SlotTracker) const override;
1203#endif
1204};
1205
1206/// A recipe for handling header phis that are widened in the vector loop.
1207/// In the VPlan native path, all incoming VPValues & VPBasicBlock pairs are
1208/// managed in the recipe directly.
1210 /// List of incoming blocks. Only used in the VPlan native path.
1211 SmallVector<VPBasicBlock *, 2> IncomingBlocks;
1212
1213public:
1214 /// Create a new VPWidenPHIRecipe for \p Phi with start value \p Start.
1215 VPWidenPHIRecipe(PHINode *Phi, VPValue *Start = nullptr)
1216 : VPHeaderPHIRecipe(VPDef::VPWidenPHISC, Phi) {
1217 if (Start)
1218 addOperand(Start);
1219 }
1220
1221 ~VPWidenPHIRecipe() override = default;
1222
1223 VP_CLASSOF_IMPL(VPDef::VPWidenPHISC)
1224
1225 /// Generate the phi/select nodes.
1226 void execute(VPTransformState &State) override;
1227
1228#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1229 /// Print the recipe.
1230 void print(raw_ostream &O, const Twine &Indent,
1231 VPSlotTracker &SlotTracker) const override;
1232#endif
1233
1234 /// Adds a pair (\p IncomingV, \p IncomingBlock) to the phi.
1235 void addIncoming(VPValue *IncomingV, VPBasicBlock *IncomingBlock) {
1236 addOperand(IncomingV);
1237 IncomingBlocks.push_back(IncomingBlock);
1238 }
1239
1240 /// Returns the \p I th incoming VPBasicBlock.
1241 VPBasicBlock *getIncomingBlock(unsigned I) { return IncomingBlocks[I]; }
1242
1243 /// Returns the \p I th incoming VPValue.
1244 VPValue *getIncomingValue(unsigned I) { return getOperand(I); }
1245};
1246
1247/// A recipe for handling first-order recurrence phis. The start value is the
1248/// first operand of the recipe and the incoming value from the backedge is the
1249/// second operand.
1252 : VPHeaderPHIRecipe(VPDef::VPFirstOrderRecurrencePHISC, Phi, &Start) {}
1253
1254 VP_CLASSOF_IMPL(VPDef::VPFirstOrderRecurrencePHISC)
1255
1257 return R->getVPDefID() == VPDef::VPFirstOrderRecurrencePHISC;
1258 }
1259
1260 void execute(VPTransformState &State) override;
1261
1262#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1263 /// Print the recipe.
1264 void print(raw_ostream &O, const Twine &Indent,
1265 VPSlotTracker &SlotTracker) const override;
1266#endif
1267};
1268
1269/// A recipe for handling reduction phis. The start value is the first operand
1270/// of the recipe and the incoming value from the backedge is the second
1271/// operand.
1273 /// Descriptor for the reduction.
1274 const RecurrenceDescriptor &RdxDesc;
1275
1276 /// The phi is part of an in-loop reduction.
1277 bool IsInLoop;
1278
1279 /// The phi is part of an ordered reduction. Requires IsInLoop to be true.
1280 bool IsOrdered;
1281
1282public:
1283 /// Create a new VPReductionPHIRecipe for the reduction \p Phi described by \p
1284 /// RdxDesc.
1286 VPValue &Start, bool IsInLoop = false,
1287 bool IsOrdered = false)
1288 : VPHeaderPHIRecipe(VPDef::VPReductionPHISC, Phi, &Start),
1289 RdxDesc(RdxDesc), IsInLoop(IsInLoop), IsOrdered(IsOrdered) {
1290 assert((!IsOrdered || IsInLoop) && "IsOrdered requires IsInLoop");
1291 }
1292
1293 ~VPReductionPHIRecipe() override = default;
1294
1295 VP_CLASSOF_IMPL(VPDef::VPReductionPHISC)
1296
1298 return R->getVPDefID() == VPDef::VPReductionPHISC;
1299 }
1300
1301 /// Generate the phi/select nodes.
1302 void execute(VPTransformState &State) override;
1303
1304#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1305 /// Print the recipe.
1306 void print(raw_ostream &O, const Twine &Indent,
1307 VPSlotTracker &SlotTracker) const override;
1308#endif
1309
1311 return RdxDesc;
1312 }
1313
1314 /// Returns true, if the phi is part of an ordered reduction.
1315 bool isOrdered() const { return IsOrdered; }
1316
1317 /// Returns true, if the phi is part of an in-loop reduction.
1318 bool isInLoop() const { return IsInLoop; }
1319};
1320
1321/// A recipe for vectorizing a phi-node as a sequence of mask-based select
1322/// instructions.
1323class VPBlendRecipe : public VPRecipeBase, public VPValue {
1324 PHINode *Phi;
1325
1326public:
1327 /// The blend operation is a User of the incoming values and of their
1328 /// respective masks, ordered [I0, M0, I1, M1, ...]. Note that a single value
1329 /// might be incoming with a full mask for which there is no VPValue.
1331 : VPRecipeBase(VPDef::VPBlendSC, Operands), VPValue(this, Phi), Phi(Phi) {
1332 assert(Operands.size() > 0 &&
1333 ((Operands.size() == 1) || (Operands.size() % 2 == 0)) &&
1334 "Expected either a single incoming value or a positive even number "
1335 "of operands");
1336 }
1337
1338 VP_CLASSOF_IMPL(VPDef::VPBlendSC)
1339
1340 /// Return the number of incoming values, taking into account that a single
1341 /// incoming value has no mask.
1342 unsigned getNumIncomingValues() const { return (getNumOperands() + 1) / 2; }
1343
1344 /// Return incoming value number \p Idx.
1345 VPValue *getIncomingValue(unsigned Idx) const { return getOperand(Idx * 2); }
1346
1347 /// Return mask number \p Idx.
1348 VPValue *getMask(unsigned Idx) const { return getOperand(Idx * 2 + 1); }
1349
1350 /// Generate the phi/select nodes.
1351 void execute(VPTransformState &State) override;
1352
1353#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1354 /// Print the recipe.
1355 void print(raw_ostream &O, const Twine &Indent,
1356 VPSlotTracker &SlotTracker) const override;
1357#endif
1358
1359 /// Returns true if the recipe only uses the first lane of operand \p Op.
1360 bool onlyFirstLaneUsed(const VPValue *Op) const override {
1361 assert(is_contained(operands(), Op) &&
1362 "Op must be an operand of the recipe");
1363 // Recursing through Blend recipes only, must terminate at header phi's the
1364 // latest.
1365 return all_of(users(),
1366 [this](VPUser *U) { return U->onlyFirstLaneUsed(this); });
1367 }
1368};
1369
1370/// VPInterleaveRecipe is a recipe for transforming an interleave group of load
1371/// or stores into one wide load/store and shuffles. The first operand of a
1372/// VPInterleave recipe is the address, followed by the stored values, followed
1373/// by an optional mask.
1376
1377 bool HasMask = false;
1378
1379public:
1381 ArrayRef<VPValue *> StoredValues, VPValue *Mask)
1382 : VPRecipeBase(VPDef::VPInterleaveSC, {Addr}), IG(IG) {
1383 for (unsigned i = 0; i < IG->getFactor(); ++i)
1384 if (Instruction *I = IG->getMember(i)) {
1385 if (I->getType()->isVoidTy())
1386 continue;
1387 new VPValue(I, this);
1388 }
1389
1390 for (auto *SV : StoredValues)
1391 addOperand(SV);
1392 if (Mask) {
1393 HasMask = true;
1394 addOperand(Mask);
1395 }
1396 }
1397 ~VPInterleaveRecipe() override = default;
1398
1399 VP_CLASSOF_IMPL(VPDef::VPInterleaveSC)
1400
1401 /// Return the address accessed by this recipe.
1402 VPValue *getAddr() const {
1403 return getOperand(0); // Address is the 1st, mandatory operand.
1404 }
1405
1406 /// Return the mask used by this recipe. Note that a full mask is represented
1407 /// by a nullptr.
1408 VPValue *getMask() const {
1409 // Mask is optional and therefore the last, currently 2nd operand.
1410 return HasMask ? getOperand(getNumOperands() - 1) : nullptr;
1411 }
1412
1413 /// Return the VPValues stored by this interleave group. If it is a load
1414 /// interleave group, return an empty ArrayRef.
1416 // The first operand is the address, followed by the stored values, followed
1417 // by an optional mask.
1420 }
1421
1422 /// Generate the wide load or store, and shuffles.
1423 void execute(VPTransformState &State) override;
1424
1425#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1426 /// Print the recipe.
1427 void print(raw_ostream &O, const Twine &Indent,
1428 VPSlotTracker &SlotTracker) const override;
1429#endif
1430
1432
1433 /// Returns the number of stored operands of this interleave group. Returns 0
1434 /// for load interleave groups.
1435 unsigned getNumStoreOperands() const {
1436 return getNumOperands() - (HasMask ? 2 : 1);
1437 }
1438
1439 /// The recipe only uses the first lane of the address.
1440 bool onlyFirstLaneUsed(const VPValue *Op) const override {
1441 assert(is_contained(operands(), Op) &&
1442 "Op must be an operand of the recipe");
1443 return Op == getAddr() && !llvm::is_contained(getStoredValues(), Op);
1444 }
1445};
1446
1447/// A recipe to represent inloop reduction operations, performing a reduction on
1448/// a vector operand into a scalar value, and adding the result to a chain.
1449/// The Operands are {ChainOp, VecOp, [Condition]}.
1451 /// The recurrence decriptor for the reduction in question.
1452 const RecurrenceDescriptor *RdxDesc;
1453 /// Pointer to the TTI, needed to create the target reduction
1454 const TargetTransformInfo *TTI;
1455
1456public:
1458 VPValue *ChainOp, VPValue *VecOp, VPValue *CondOp,
1459 const TargetTransformInfo *TTI)
1460 : VPRecipeBase(VPDef::VPReductionSC, {ChainOp, VecOp}), VPValue(this, I),
1461 RdxDesc(R), TTI(TTI) {
1462 if (CondOp)
1463 addOperand(CondOp);
1464 }
1465
1466 ~VPReductionRecipe() override = default;
1467
1468 VP_CLASSOF_IMPL(VPDef::VPReductionSC)
1469
1470 /// Generate the reduction in the loop
1471 void execute(VPTransformState &State) override;
1472
1473#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1474 /// Print the recipe.
1475 void print(raw_ostream &O, const Twine &Indent,
1476 VPSlotTracker &SlotTracker) const override;
1477#endif
1478
1479 /// The VPValue of the scalar Chain being accumulated.
1480 VPValue *getChainOp() const { return getOperand(0); }
1481 /// The VPValue of the vector value to be reduced.
1482 VPValue *getVecOp() const { return getOperand(1); }
1483 /// The VPValue of the condition for the block.
1485 return getNumOperands() > 2 ? getOperand(2) : nullptr;
1486 }
1487};
1488
1489/// VPReplicateRecipe replicates a given instruction producing multiple scalar
1490/// copies of the original scalar type, one per lane, instead of producing a
1491/// single copy of widened type for all lanes. If the instruction is known to be
1492/// uniform only one copy, per lane zero, will be generated.
1494 /// Indicator if only a single replica per lane is needed.
1495 bool IsUniform;
1496
1497 /// Indicator if the replicas are also predicated.
1498 bool IsPredicated;
1499
1500 /// Indicator if the scalar values should also be packed into a vector.
1501 bool AlsoPack;
1502
1503public:
1504 template <typename IterT>
1506 bool IsUniform, bool IsPredicated = false)
1507 : VPRecipeBase(VPDef::VPReplicateSC, Operands), VPValue(this, I),
1508 IsUniform(IsUniform), IsPredicated(IsPredicated) {
1509 // Retain the previous behavior of predicateInstructions(), where an
1510 // insert-element of a predicated instruction got hoisted into the
1511 // predicated basic block iff it was its only user. This is achieved by
1512 // having predicated instructions also pack their values into a vector by
1513 // default unless they have a replicated user which uses their scalar value.
1514 AlsoPack = IsPredicated && !I->use_empty();
1515 }
1516
1517 ~VPReplicateRecipe() override = default;
1518
1519 VP_CLASSOF_IMPL(VPDef::VPReplicateSC)
1520
1521 /// Generate replicas of the desired Ingredient. Replicas will be generated
1522 /// for all parts and lanes unless a specific part and lane are specified in
1523 /// the \p State.
1524 void execute(VPTransformState &State) override;
1525
1526 void setAlsoPack(bool Pack) { AlsoPack = Pack; }
1527
1528#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1529 /// Print the recipe.
1530 void print(raw_ostream &O, const Twine &Indent,
1531 VPSlotTracker &SlotTracker) const override;
1532#endif
1533
1534 bool isUniform() const { return IsUniform; }
1535
1536 bool isPacked() const { return AlsoPack; }
1537
1538 bool isPredicated() const { return IsPredicated; }
1539
1540 /// Returns true if the recipe only uses the first lane of operand \p Op.
1541 bool onlyFirstLaneUsed(const VPValue *Op) const override {
1542 assert(is_contained(operands(), Op) &&
1543 "Op must be an operand of the recipe");
1544 return isUniform();
1545 }
1546
1547 /// Returns true if the recipe uses scalars of operand \p Op.
1548 bool usesScalars(const VPValue *Op) const override {
1549 assert(is_contained(operands(), Op) &&
1550 "Op must be an operand of the recipe");
1551 return true;
1552 }
1553};
1554
1555/// A recipe for generating conditional branches on the bits of a mask.
1557public:
1559 : VPRecipeBase(VPDef::VPBranchOnMaskSC, {}) {
1560 if (BlockInMask) // nullptr means all-one mask.
1561 addOperand(BlockInMask);
1562 }
1563
1564 VP_CLASSOF_IMPL(VPDef::VPBranchOnMaskSC)
1565
1566 /// Generate the extraction of the appropriate bit from the block mask and the
1567 /// conditional branch.
1568 void execute(VPTransformState &State) override;
1569
1570#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1571 /// Print the recipe.
1572 void print(raw_ostream &O, const Twine &Indent,
1573 VPSlotTracker &SlotTracker) const override {
1574 O << Indent << "BRANCH-ON-MASK ";
1575 if (VPValue *Mask = getMask())
1576 Mask->printAsOperand(O, SlotTracker);
1577 else
1578 O << " All-One";
1579 }
1580#endif
1581
1582 /// Return the mask used by this recipe. Note that a full mask is represented
1583 /// by a nullptr.
1584 VPValue *getMask() const {
1585 assert(getNumOperands() <= 1 && "should have either 0 or 1 operands");
1586 // Mask is optional.
1587 return getNumOperands() == 1 ? getOperand(0) : nullptr;
1588 }
1589
1590 /// Returns true if the recipe uses scalars of operand \p Op.
1591 bool usesScalars(const VPValue *Op) const override {
1592 assert(is_contained(operands(), Op) &&
1593 "Op must be an operand of the recipe");
1594 return true;
1595 }
1596};
1597
1598/// VPPredInstPHIRecipe is a recipe for generating the phi nodes needed when
1599/// control converges back from a Branch-on-Mask. The phi nodes are needed in
1600/// order to merge values that are set under such a branch and feed their uses.
1601/// The phi nodes can be scalar or vector depending on the users of the value.
1602/// This recipe works in concert with VPBranchOnMaskRecipe.
1604public:
1605 /// Construct a VPPredInstPHIRecipe given \p PredInst whose value needs a phi
1606 /// nodes after merging back from a Branch-on-Mask.
1608 : VPRecipeBase(VPDef::VPPredInstPHISC, PredV), VPValue(this) {}
1609 ~VPPredInstPHIRecipe() override = default;
1610
1611 VP_CLASSOF_IMPL(VPDef::VPPredInstPHISC)
1612
1613 /// Generates phi nodes for live-outs as needed to retain SSA form.
1614 void execute(VPTransformState &State) override;
1615
1616#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1617 /// Print the recipe.
1618 void print(raw_ostream &O, const Twine &Indent,
1619 VPSlotTracker &SlotTracker) const override;
1620#endif
1621
1622 /// Returns true if the recipe uses scalars of operand \p Op.
1623 bool usesScalars(const VPValue *Op) const override {
1624 assert(is_contained(operands(), Op) &&
1625 "Op must be an operand of the recipe");
1626 return true;
1627 }
1628};
1629
1630/// A Recipe for widening load/store operations.
1631/// The recipe uses the following VPValues:
1632/// - For load: Address, optional mask
1633/// - For store: Address, stored value, optional mask
1634/// TODO: We currently execute only per-part unless a specific instance is
1635/// provided.
1637 Instruction &Ingredient;
1638
1639 // Whether the loaded-from / stored-to addresses are consecutive.
1640 bool Consecutive;
1641
1642 // Whether the consecutive loaded/stored addresses are in reverse order.
1643 bool Reverse;
1644
1645 void setMask(VPValue *Mask) {
1646 if (!Mask)
1647 return;
1648 addOperand(Mask);
1649 }
1650
1651 bool isMasked() const {
1652 return isStore() ? getNumOperands() == 3 : getNumOperands() == 2;
1653 }
1654
1655public:
1657 bool Consecutive, bool Reverse)
1658 : VPRecipeBase(VPDef::VPWidenMemoryInstructionSC, {Addr}),
1659 Ingredient(Load), Consecutive(Consecutive), Reverse(Reverse) {
1660 assert((Consecutive || !Reverse) && "Reverse implies consecutive");
1661 new VPValue(this, &Load);
1662 setMask(Mask);
1663 }
1664
1666 VPValue *StoredValue, VPValue *Mask,
1667 bool Consecutive, bool Reverse)
1668 : VPRecipeBase(VPDef::VPWidenMemoryInstructionSC, {Addr, StoredValue}),
1669 Ingredient(Store), Consecutive(Consecutive), Reverse(Reverse) {
1670 assert((Consecutive || !Reverse) && "Reverse implies consecutive");
1671 setMask(Mask);
1672 }
1673
1674 VP_CLASSOF_IMPL(VPDef::VPWidenMemoryInstructionSC)
1675
1676 /// Return the address accessed by this recipe.
1677 VPValue *getAddr() const {
1678 return getOperand(0); // Address is the 1st, mandatory operand.
1679 }
1680
1681 /// Return the mask used by this recipe. Note that a full mask is represented
1682 /// by a nullptr.
1683 VPValue *getMask() const {
1684 // Mask is optional and therefore the last operand.
1685 return isMasked() ? getOperand(getNumOperands() - 1) : nullptr;
1686 }
1687
1688 /// Returns true if this recipe is a store.
1689 bool isStore() const { return isa<StoreInst>(Ingredient); }
1690
1691 /// Return the address accessed by this recipe.
1693 assert(isStore() && "Stored value only available for store instructions");
1694 return getOperand(1); // Stored value is the 2nd, mandatory operand.
1695 }
1696
1697 // Return whether the loaded-from / stored-to addresses are consecutive.
1698 bool isConsecutive() const { return Consecutive; }
1699
1700 // Return whether the consecutive loaded/stored addresses are in reverse
1701 // order.
1702 bool isReverse() const { return Reverse; }
1703
1704 /// Generate the wide load/store.
1705 void execute(VPTransformState &State) override;
1706
1707#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1708 /// Print the recipe.
1709 void print(raw_ostream &O, const Twine &Indent,
1710 VPSlotTracker &SlotTracker) const override;
1711#endif
1712
1713 /// Returns true if the recipe only uses the first lane of operand \p Op.
1714 bool onlyFirstLaneUsed(const VPValue *Op) const override {
1715 assert(is_contained(operands(), Op) &&
1716 "Op must be an operand of the recipe");
1717
1718 // Widened, consecutive memory operations only demand the first lane of
1719 // their address, unless the same operand is also stored. That latter can
1720 // happen with opaque pointers.
1721 return Op == getAddr() && isConsecutive() &&
1722 (!isStore() || Op != getStoredValue());
1723 }
1724
1725 Instruction &getIngredient() const { return Ingredient; }
1726};
1727
1728/// Recipe to expand a SCEV expression.
1730 const SCEV *Expr;
1731 ScalarEvolution &SE;
1732
1733public:
1735 : VPRecipeBase(VPDef::VPExpandSCEVSC, {}), VPValue(this), Expr(Expr),
1736 SE(SE) {}
1737
1738 ~VPExpandSCEVRecipe() override = default;
1739
1740 VP_CLASSOF_IMPL(VPDef::VPExpandSCEVSC)
1741
1742 /// Generate a canonical vector induction variable of the vector loop, with
1743 void execute(VPTransformState &State) override;
1744
1745#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1746 /// Print the recipe.
1747 void print(raw_ostream &O, const Twine &Indent,
1748 VPSlotTracker &SlotTracker) const override;
1749#endif
1750
1751 const SCEV *getSCEV() const { return Expr; }
1752};
1753
1754/// Canonical scalar induction phi of the vector loop. Starting at the specified
1755/// start value (either 0 or the resume value when vectorizing the epilogue
1756/// loop). VPWidenCanonicalIVRecipe represents the vector version of the
1757/// canonical induction variable.
1759 DebugLoc DL;
1760
1761public:
1763 : VPHeaderPHIRecipe(VPDef::VPCanonicalIVPHISC, nullptr, StartV), DL(DL) {}
1764
1765 ~VPCanonicalIVPHIRecipe() override = default;
1766
1767 VP_CLASSOF_IMPL(VPDef::VPCanonicalIVPHISC)
1768
1770 return D->getVPDefID() == VPDef::VPCanonicalIVPHISC;
1771 }
1772
1773 /// Generate the canonical scalar induction phi of the vector loop.
1774 void execute(VPTransformState &State) override;
1775
1776#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1777 /// Print the recipe.
1778 void print(raw_ostream &O, const Twine &Indent,
1779 VPSlotTracker &SlotTracker) const override;
1780#endif
1781
1782 /// Returns the scalar type of the induction.
1783 const Type *getScalarType() const {
1784 return getOperand(0)->getLiveInIRValue()->getType();
1785 }
1786
1787 /// Returns true if the recipe only uses the first lane of operand \p Op.
1788 bool onlyFirstLaneUsed(const VPValue *Op) const override {
1789 assert(is_contained(operands(), Op) &&
1790 "Op must be an operand of the recipe");
1791 return true;
1792 }
1793
1794 /// Check if the induction described by \p ID is canonical, i.e. has the same
1795 /// start, step (of 1), and type as the canonical IV.
1796 bool isCanonical(const InductionDescriptor &ID, Type *Ty) const;
1797};
1798
1799/// A recipe for generating the active lane mask for the vector loop that is
1800/// used to predicate the vector operations.
1801/// TODO: It would be good to use the existing VPWidenPHIRecipe instead and
1802/// remove VPActiveLaneMaskPHIRecipe.
1804 DebugLoc DL;
1805
1806public:
1808 : VPHeaderPHIRecipe(VPDef::VPActiveLaneMaskPHISC, nullptr, StartMask),
1809 DL(DL) {}
1810
1811 ~VPActiveLaneMaskPHIRecipe() override = default;
1812
1813 VP_CLASSOF_IMPL(VPDef::VPActiveLaneMaskPHISC)
1814
1816 return D->getVPDefID() == VPDef::VPActiveLaneMaskPHISC;
1817 }
1818
1819 /// Generate the active lane mask phi of the vector loop.
1820 void execute(VPTransformState &State) override;
1821
1822#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1823 /// Print the recipe.
1824 void print(raw_ostream &O, const Twine &Indent,
1825 VPSlotTracker &SlotTracker) const override;
1826#endif
1827};
1828
1829/// A Recipe for widening the canonical induction variable of the vector loop.
1831public:
1833 : VPRecipeBase(VPDef::VPWidenCanonicalIVSC, {CanonicalIV}),
1834 VPValue(this) {}
1835
1836 ~VPWidenCanonicalIVRecipe() override = default;
1837
1838 VP_CLASSOF_IMPL(VPDef::VPWidenCanonicalIVSC)
1839
1840 /// Generate a canonical vector induction variable of the vector loop, with
1841 /// start = {<Part*VF, Part*VF+1, ..., Part*VF+VF-1> for 0 <= Part < UF}, and
1842 /// step = <VF*UF, VF*UF, ..., VF*UF>.
1843 void execute(VPTransformState &State) override;
1844
1845#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1846 /// Print the recipe.
1847 void print(raw_ostream &O, const Twine &Indent,
1848 VPSlotTracker &SlotTracker) const override;
1849#endif
1850
1851 /// Returns the scalar type of the induction.
1852 const Type *getScalarType() const {
1853 return cast<VPCanonicalIVPHIRecipe>(getOperand(0)->getDefiningRecipe())
1854 ->getScalarType();
1855 }
1856};
1857
1858/// A recipe for converting the canonical IV value to the corresponding value of
1859/// an IV with different start and step values, using Start + CanonicalIV *
1860/// Step.
1862 /// The type of the result value. It may be smaller than the type of the
1863 /// induction and in this case it will get truncated to ResultTy.
1864 Type *ResultTy;
1865
1866 /// Induction descriptor for the induction the canonical IV is transformed to.
1867 const InductionDescriptor &IndDesc;
1868
1869public:
1871 VPCanonicalIVPHIRecipe *CanonicalIV, VPValue *Step,
1872 Type *ResultTy)
1873 : VPRecipeBase(VPDef::VPDerivedIVSC, {Start, CanonicalIV, Step}),
1874 VPValue(this), ResultTy(ResultTy), IndDesc(IndDesc) {}
1875
1876 ~VPDerivedIVRecipe() override = default;
1877
1878 VP_CLASSOF_IMPL(VPDef::VPDerivedIVSC)
1879
1880 /// Generate the transformed value of the induction at offset StartValue (1.
1881 /// operand) + IV (2. operand) * StepValue (3, operand).
1882 void execute(VPTransformState &State) override;
1883
1884#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1885 /// Print the recipe.
1886 void print(raw_ostream &O, const Twine &Indent,
1887 VPSlotTracker &SlotTracker) const override;
1888#endif
1889
1890 VPValue *getStartValue() const { return getOperand(0); }
1891 VPValue *getCanonicalIV() const { return getOperand(1); }
1892 VPValue *getStepValue() const { return getOperand(2); }
1893
1894 /// Returns true if the recipe only uses the first lane of operand \p Op.
1895 bool onlyFirstLaneUsed(const VPValue *Op) const override {
1896 assert(is_contained(operands(), Op) &&
1897 "Op must be an operand of the recipe");
1898 return true;
1899 }
1900};
1901
1902/// A recipe for handling phi nodes of integer and floating-point inductions,
1903/// producing their scalar values.
1905 const InductionDescriptor &IndDesc;
1906
1907public:
1909 VPValue *Step)
1910 : VPRecipeBase(VPDef::VPScalarIVStepsSC, {IV, Step}), VPValue(this),
1911 IndDesc(IndDesc) {}
1912
1913 ~VPScalarIVStepsRecipe() override = default;
1914
1915 VP_CLASSOF_IMPL(VPDef::VPScalarIVStepsSC)
1916
1917 /// Generate the scalarized versions of the phi node as needed by their users.
1918 void execute(VPTransformState &State) override;
1919
1920#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1921 /// Print the recipe.
1922 void print(raw_ostream &O, const Twine &Indent,
1923 VPSlotTracker &SlotTracker) const override;
1924#endif
1925
1926 VPValue *getStepValue() const { return getOperand(1); }
1927
1928 /// Returns true if the recipe only uses the first lane of operand \p Op.
1929 bool onlyFirstLaneUsed(const VPValue *Op) const override {
1930 assert(is_contained(operands(), Op) &&
1931 "Op must be an operand of the recipe");
1932 return true;
1933 }
1934};
1935
1936/// VPBasicBlock serves as the leaf of the Hierarchical Control-Flow Graph. It
1937/// holds a sequence of zero or more VPRecipe's each representing a sequence of
1938/// output IR instructions. All PHI-like recipes must come before any non-PHI recipes.
1940public:
1942
1943private:
1944 /// The VPRecipes held in the order of output instructions to generate.
1945 RecipeListTy Recipes;
1946
1947public:
1948 VPBasicBlock(const Twine &Name = "", VPRecipeBase *Recipe = nullptr)
1949 : VPBlockBase(VPBasicBlockSC, Name.str()) {
1950 if (Recipe)
1951 appendRecipe(Recipe);
1952 }
1953
1954 ~VPBasicBlock() override {
1955 while (!Recipes.empty())
1956 Recipes.pop_back();
1957 }
1958
1959 /// Instruction iterators...
1964
1965 //===--------------------------------------------------------------------===//
1966 /// Recipe iterator methods
1967 ///
1968 inline iterator begin() { return Recipes.begin(); }
1969 inline const_iterator begin() const { return Recipes.begin(); }
1970 inline iterator end() { return Recipes.end(); }
1971 inline const_iterator end() const { return Recipes.end(); }
1972
1973 inline reverse_iterator rbegin() { return Recipes.rbegin(); }
1974 inline const_reverse_iterator rbegin() const { return Recipes.rbegin(); }
1975 inline reverse_iterator rend() { return Recipes.rend(); }
1976 inline const_reverse_iterator rend() const { return Recipes.rend(); }
1977
1978 inline size_t size() const { return Recipes.size(); }
1979 inline bool empty() const { return Recipes.empty(); }
1980 inline const VPRecipeBase &front() const { return Recipes.front(); }
1981 inline VPRecipeBase &front() { return Recipes.front(); }
1982 inline const VPRecipeBase &back() const { return Recipes.back(); }
1983 inline VPRecipeBase &back() { return Recipes.back(); }
1984
1985 /// Returns a reference to the list of recipes.
1986 RecipeListTy &getRecipeList() { return Recipes; }
1987
1988 /// Returns a pointer to a member of the recipe list.
1990 return &VPBasicBlock::Recipes;
1991 }
1992
1993 /// Method to support type inquiry through isa, cast, and dyn_cast.
1994 static inline bool classof(const VPBlockBase *V) {
1995 return V->getVPBlockID() == VPBlockBase::VPBasicBlockSC;
1996 }
1997
1998 void insert(VPRecipeBase *Recipe, iterator InsertPt) {
1999 assert(Recipe && "No recipe to append.");
2000 assert(!Recipe->Parent && "Recipe already in VPlan");
2001 Recipe->Parent = this;
2002 Recipes.insert(InsertPt, Recipe);
2003 }
2004
2005 /// Augment the existing recipes of a VPBasicBlock with an additional
2006 /// \p Recipe as the last recipe.
2007 void appendRecipe(VPRecipeBase *Recipe) { insert(Recipe, end()); }
2008
2009 /// The method which generates the output IR instructions that correspond to
2010 /// this VPBasicBlock, thereby "executing" the VPlan.
2011 void execute(VPTransformState *State) override;
2012
2013 /// Return the position of the first non-phi node recipe in the block.
2015
2016 /// Returns an iterator range over the PHI-like recipes in the block.
2018 return make_range(begin(), getFirstNonPhi());
2019 }
2020
2021 void dropAllReferences(VPValue *NewValue) override;
2022
2023 /// Split current block at \p SplitAt by inserting a new block between the
2024 /// current block and its successors and moving all recipes starting at
2025 /// SplitAt to the new block. Returns the new block.
2026 VPBasicBlock *splitAt(iterator SplitAt);
2027
2029
2030#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2031 /// Print this VPBsicBlock to \p O, prefixing all lines with \p Indent. \p
2032 /// SlotTracker is used to print unnamed VPValue's using consequtive numbers.
2033 ///
2034 /// Note that the numbering is applied to the whole VPlan, so printing
2035 /// individual blocks is consistent with the whole VPlan printing.
2036 void print(raw_ostream &O, const Twine &Indent,
2037 VPSlotTracker &SlotTracker) const override;
2038 using VPBlockBase::print; // Get the print(raw_stream &O) version.
2039#endif
2040
2041 /// If the block has multiple successors, return the branch recipe terminating
2042 /// the block. If there are no or only a single successor, return nullptr;
2044 const VPRecipeBase *getTerminator() const;
2045
2046 /// Returns true if the block is exiting it's parent region.
2047 bool isExiting() const;
2048
2049private:
2050 /// Create an IR BasicBlock to hold the output instructions generated by this
2051 /// VPBasicBlock, and return it. Update the CFGState accordingly.
2052 BasicBlock *createEmptyBasicBlock(VPTransformState::CFGState &CFG);
2053};
2054
2055/// VPRegionBlock represents a collection of VPBasicBlocks and VPRegionBlocks
2056/// which form a Single-Entry-Single-Exiting subgraph of the output IR CFG.
2057/// A VPRegionBlock may indicate that its contents are to be replicated several
2058/// times. This is designed to support predicated scalarization, in which a
2059/// scalar if-then code structure needs to be generated VF * UF times. Having
2060/// this replication indicator helps to keep a single model for multiple
2061/// candidate VF's. The actual replication takes place only once the desired VF
2062/// and UF have been determined.
2064 /// Hold the Single Entry of the SESE region modelled by the VPRegionBlock.
2065 VPBlockBase *Entry;
2066
2067 /// Hold the Single Exiting block of the SESE region modelled by the
2068 /// VPRegionBlock.
2069 VPBlockBase *Exiting;
2070
2071 /// An indicator whether this region is to generate multiple replicated
2072 /// instances of output IR corresponding to its VPBlockBases.
2073 bool IsReplicator;
2074
2075public:
2077 const std::string &Name = "", bool IsReplicator = false)
2078 : VPBlockBase(VPRegionBlockSC, Name), Entry(Entry), Exiting(Exiting),
2079 IsReplicator(IsReplicator) {
2080 assert(Entry->getPredecessors().empty() && "Entry block has predecessors.");
2081 assert(Exiting->getSuccessors().empty() && "Exit block has successors.");
2082 Entry->setParent(this);
2083 Exiting->setParent(this);
2084 }
2085 VPRegionBlock(const std::string &Name = "", bool IsReplicator = false)
2086 : VPBlockBase(VPRegionBlockSC, Name), Entry(nullptr), Exiting(nullptr),
2087 IsReplicator(IsReplicator) {}
2088
2089 ~VPRegionBlock() override {
2090 if (Entry) {
2091 VPValue DummyValue;
2092 Entry->dropAllReferences(&DummyValue);
2093 deleteCFG(Entry);
2094 }
2095 }
2096
2097 /// Method to support type inquiry through isa, cast, and dyn_cast.
2098 static inline bool classof(const VPBlockBase *V) {
2099 return V->getVPBlockID() == VPBlockBase::VPRegionBlockSC;
2100 }
2101
2102 const VPBlockBase *getEntry() const { return Entry; }
2103 VPBlockBase *getEntry() { return Entry; }
2104
2105 /// Set \p EntryBlock as the entry VPBlockBase of this VPRegionBlock. \p
2106 /// EntryBlock must have no predecessors.
2107 void setEntry(VPBlockBase *EntryBlock) {
2108 assert(EntryBlock->getPredecessors().empty() &&
2109 "Entry block cannot have predecessors.");
2110 Entry = EntryBlock;
2111 EntryBlock->setParent(this);
2112 }
2113
2114 const VPBlockBase *getExiting() const { return Exiting; }
2115 VPBlockBase *getExiting() { return Exiting; }
2116
2117 /// Set \p ExitingBlock as the exiting VPBlockBase of this VPRegionBlock. \p
2118 /// ExitingBlock must have no successors.
2119 void setExiting(VPBlockBase *ExitingBlock) {
2120 assert(ExitingBlock->getSuccessors().empty() &&
2121 "Exit block cannot have successors.");
2122 Exiting = ExitingBlock;
2123 ExitingBlock->setParent(this);
2124 }
2125
2126 /// Returns the pre-header VPBasicBlock of the loop region.
2128 assert(!isReplicator() && "should only get pre-header of loop regions");
2130 }
2131
2132 /// An indicator whether this region is to generate multiple replicated
2133 /// instances of output IR corresponding to its VPBlockBases.
2134 bool isReplicator() const { return IsReplicator; }
2135
2136 /// The method which generates the output IR instructions that correspond to
2137 /// this VPRegionBlock, thereby "executing" the VPlan.
2138 void execute(VPTransformState *State) override;
2139
2140 void dropAllReferences(VPValue *NewValue) override;
2141
2142#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2143 /// Print this VPRegionBlock to \p O (recursively), prefixing all lines with
2144 /// \p Indent. \p SlotTracker is used to print unnamed VPValue's using
2145 /// consequtive numbers.
2146 ///
2147 /// Note that the numbering is applied to the whole VPlan, so printing
2148 /// individual regions is consistent with the whole VPlan printing.
2149 void print(raw_ostream &O, const Twine &Indent,
2150 VPSlotTracker &SlotTracker) const override;
2151 using VPBlockBase::print; // Get the print(raw_stream &O) version.
2152#endif
2153};
2154
2155/// VPlan models a candidate for vectorization, encoding various decisions take
2156/// to produce efficient output IR, including which branches, basic-blocks and
2157/// output IR instructions to generate, and their cost. VPlan holds a
2158/// Hierarchical-CFG of VPBasicBlocks and VPRegionBlocks rooted at an Entry
2159/// VPBlock.
2160class VPlan {
2161 friend class VPlanPrinter;
2162 friend class VPSlotTracker;
2163
2164 /// Hold the single entry to the Hierarchical CFG of the VPlan.
2165 VPBlockBase *Entry;
2166
2167 /// Holds the VFs applicable to this VPlan.
2169
2170 /// Holds the UFs applicable to this VPlan. If empty, the VPlan is valid for
2171 /// any UF.
2173
2174 /// Holds the name of the VPlan, for printing.
2175 std::string Name;
2176
2177 /// Holds all the external definitions created for this VPlan. External
2178 /// definitions must be immutable and hold a pointer to their underlying IR.
2179 DenseMap<Value *, VPValue *> VPExternalDefs;
2180
2181 /// Represents the trip count of the original loop, for folding
2182 /// the tail.
2183 VPValue *TripCount = nullptr;
2184
2185 /// Represents the backedge taken count of the original loop, for folding
2186 /// the tail. It equals TripCount - 1.
2187 VPValue *BackedgeTakenCount = nullptr;
2188
2189 /// Represents the vector trip count.
2190 VPValue VectorTripCount;
2191
2192 /// Holds a mapping between Values and their corresponding VPValue inside
2193 /// VPlan.
2194 Value2VPValueTy Value2VPValue;
2195
2196 /// Contains all VPValues that been allocated by addVPValue directly and need
2197 /// to be free when the plan's destructor is called.
2198 SmallVector<VPValue *, 16> VPValuesToFree;
2199
2200 /// Indicates whether it is safe use the Value2VPValue mapping or if the
2201 /// mapping cannot be used any longer, because it is stale.
2202 bool Value2VPValueEnabled = true;
2203
2204 /// Values used outside the plan.
2206
2207public:
2208 VPlan(VPBlockBase *Entry = nullptr) : Entry(Entry) {
2209 if (Entry)
2210 Entry->setPlan(this);
2211 }
2212
2213 ~VPlan();
2214
2215 /// Prepare the plan for execution, setting up the required live-in values.
2216 void prepareToExecute(Value *TripCount, Value *VectorTripCount,
2217 Value *CanonicalIVStartValue, VPTransformState &State,
2218 bool IsEpilogueVectorization);
2219
2220 /// Generate the IR code for this VPlan.
2221 void execute(VPTransformState *State);
2222
2223 VPBlockBase *getEntry() { return Entry; }
2224 const VPBlockBase *getEntry() const { return Entry; }
2225
2227 Entry = Block;
2228 Block->setPlan(this);
2229 return Entry;
2230 }
2231
2232 /// The trip count of the original loop.
2234 if (!TripCount)
2235 TripCount = new VPValue();
2236 return TripCount;
2237 }
2238
2239 /// The backedge taken count of the original loop.
2241 if (!BackedgeTakenCount)
2242 BackedgeTakenCount = new VPValue();
2243 return BackedgeTakenCount;
2244 }
2245
2246 /// The vector trip count.
2247 VPValue &getVectorTripCount() { return VectorTripCount; }
2248
2249 /// Mark the plan to indicate that using Value2VPValue is not safe any
2250 /// longer, because it may be stale.
2251 void disableValue2VPValue() { Value2VPValueEnabled = false; }
2252
2253 void addVF(ElementCount VF) { VFs.insert(VF); }
2254
2256 assert(hasVF(VF) && "Cannot set VF not already in plan");
2257 VFs.clear();
2258 VFs.insert(VF);
2259 }
2260
2261 bool hasVF(ElementCount VF) { return VFs.count(VF); }
2262
2263 bool hasScalarVFOnly() const { return VFs.size() == 1 && VFs[0].isScalar(); }
2264
2265 bool hasUF(unsigned UF) const { return UFs.empty() || UFs.contains(UF); }
2266
2267 void setUF(unsigned UF) {
2268 assert(hasUF(UF) && "Cannot set the UF not already in plan");
2269 UFs.clear();
2270 UFs.insert(UF);
2271 }
2272
2273 /// Return a string with the name of the plan and the applicable VFs and UFs.
2274 std::string getName() const;
2275
2276 void setName(const Twine &newName) { Name = newName.str(); }
2277
2278 /// Get the existing or add a new external definition for \p V.
2280 auto I = VPExternalDefs.insert({V, nullptr});
2281 if (I.second)
2282 I.first->second = new VPValue(V);
2283 return I.first->second;
2284 }
2285
2287 assert(Value2VPValueEnabled &&
2288 "IR value to VPValue mapping may be out of date!");
2289 assert(V && "Trying to add a null Value to VPlan");
2290 assert(!Value2VPValue.count(V) && "Value already exists in VPlan");
2291 VPValue *VPV = new VPValue(V);
2292 Value2VPValue[V] = VPV;
2293 VPValuesToFree.push_back(VPV);
2294 }
2295
2296 void addVPValue(Value *V, VPValue *VPV) {
2297 assert(Value2VPValueEnabled && "Value2VPValue mapping may be out of date!");
2298 assert(V && "Trying to add a null Value to VPlan");
2299 assert(!Value2VPValue.count(V) && "Value already exists in VPlan");
2300 Value2VPValue[V] = VPV;
2301 }
2302
2303 /// Returns the VPValue for \p V. \p OverrideAllowed can be used to disable
2304 /// checking whether it is safe to query VPValues using IR Values.
2305 VPValue *getVPValue(Value *V, bool OverrideAllowed = false) {
2306 assert((OverrideAllowed || isa<Constant>(V) || Value2VPValueEnabled) &&
2307 "Value2VPValue mapping may be out of date!");
2308 assert(V && "Trying to get the VPValue of a null Value");
2309 assert(Value2VPValue.count(V) && "Value does not exist in VPlan");
2310 return Value2VPValue[V];
2311 }
2312
2313 /// Gets the VPValue or adds a new one (if none exists yet) for \p V. \p
2314 /// OverrideAllowed can be used to disable checking whether it is safe to
2315 /// query VPValues using IR Values.
2316 VPValue *getOrAddVPValue(Value *V, bool OverrideAllowed = false) {
2317 assert((OverrideAllowed || isa<Constant>(V) || Value2VPValueEnabled) &&
2318 "Value2VPValue mapping may be out of date!");
2319 assert(V && "Trying to get or add the VPValue of a null Value");
2320 if (!Value2VPValue.count(V))
2321 addVPValue(V);
2322 return getVPValue(V);
2323 }
2324
2326 assert(Value2VPValueEnabled &&
2327 "IR value to VPValue mapping may be out of date!");
2328 Value2VPValue.erase(V);
2329 }
2330
2331#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2332 /// Print this VPlan to \p O.
2333 void print(raw_ostream &O) const;
2334
2335 /// Print this VPlan in DOT format to \p O.
2336 void printDOT(raw_ostream &O) const;
2337
2338 /// Dump the plan to stderr (for debugging).
2339 LLVM_DUMP_METHOD void dump() const;
2340#endif
2341
2342 /// Returns a range mapping the values the range \p Operands to their
2343 /// corresponding VPValues.
2344 iterator_range<mapped_iterator<Use *, std::function<VPValue *(Value *)>>>
2346 std::function<VPValue *(Value *)> Fn = [this](Value *Op) {
2347 return getOrAddVPValue(Op);
2348 };
2349 return map_range(Operands, Fn);
2350 }
2351
2352 /// Returns the VPRegionBlock of the vector loop.
2354 return cast<VPRegionBlock>(getEntry()->getSingleSuccessor());
2355 }
2357 return cast<VPRegionBlock>(getEntry()->getSingleSuccessor());
2358 }
2359
2360 /// Returns the canonical induction recipe of the vector loop.
2363 if (EntryVPBB->empty()) {
2364 // VPlan native path.
2365 EntryVPBB = cast<VPBasicBlock>(EntryVPBB->getSingleSuccessor());
2366 }
2367 return cast<VPCanonicalIVPHIRecipe>(&*EntryVPBB->begin());
2368 }
2369
2370 /// Find and return the VPActiveLaneMaskPHIRecipe from the header - there
2371 /// be only one at most. If there isn't one, then return nullptr.
2373
2374 void addLiveOut(PHINode *PN, VPValue *V);
2375
2377 for (auto &KV : LiveOuts)
2378 delete KV.second;
2379 LiveOuts.clear();
2380 }
2381
2383 delete LiveOuts[PN];
2384 LiveOuts.erase(PN);
2385 }
2386
2388 return LiveOuts;
2389 }
2390
2391private:
2392 /// Add to the given dominator tree the header block and every new basic block
2393 /// that was created between it and the latch block, inclusive.
2394 static void updateDominatorTree(DominatorTree *DT, BasicBlock *LoopLatchBB,
2395 BasicBlock *LoopPreHeaderBB,
2396 BasicBlock *LoopExitBB);
2397};
2398
2399#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2400/// VPlanPrinter prints a given VPlan to a given output stream. The printing is
2401/// indented and follows the dot format.
2403 raw_ostream &OS;
2404 const VPlan &Plan;
2405 unsigned Depth = 0;
2406 unsigned TabWidth = 2;
2407 std::string Indent;
2408 unsigned BID = 0;
2410
2412
2413 /// Handle indentation.
2414 void bumpIndent(int b) { Indent = std::string((Depth += b) * TabWidth, ' '); }
2415
2416 /// Print a given \p Block of the Plan.
2417 void dumpBlock(const VPBlockBase *Block);
2418
2419 /// Print the information related to the CFG edges going out of a given
2420 /// \p Block, followed by printing the successor blocks themselves.
2421 void dumpEdges(const VPBlockBase *Block);
2422
2423 /// Print a given \p BasicBlock, including its VPRecipes, followed by printing
2424 /// its successor blocks.
2425 void dumpBasicBlock(const VPBasicBlock *BasicBlock);
2426
2427 /// Print a given \p Region of the Plan.
2428 void dumpRegion(const VPRegionBlock *Region);
2429
2430 unsigned getOrCreateBID(const VPBlockBase *Block) {
2431 return BlockID.count(Block) ? BlockID[Block] : BlockID[Block] = BID++;
2432 }
2433
2434 Twine getOrCreateName(const VPBlockBase *Block);
2435
2436 Twine getUID(const VPBlockBase *Block);
2437
2438 /// Print the information related to a CFG edge between two VPBlockBases.
2439 void drawEdge(const VPBlockBase *From, const VPBlockBase *To, bool Hidden,
2440 const Twine &Label);
2441
2442public:
2444 : OS(O), Plan(P), SlotTracker(&P) {}
2445
2446 LLVM_DUMP_METHOD void dump();
2447};
2448
2450 const Value *V;
2451
2452 VPlanIngredient(const Value *V) : V(V) {}
2453
2454 void print(raw_ostream &O) const;
2455};
2456
2458 I.print(OS);
2459 return OS;
2460}
2461
2462inline raw_ostream &operator<<(raw_ostream &OS, const VPlan &Plan) {
2463 Plan.print(OS);
2464 return OS;
2465}
2466#endif
2467
2468//===----------------------------------------------------------------------===//
2469// VPlan Utilities
2470//===----------------------------------------------------------------------===//
2471
2472/// Class that provides utilities for VPBlockBases in VPlan.
2474public:
2475 VPBlockUtils() = delete;
2476
2477 /// Insert disconnected VPBlockBase \p NewBlock after \p BlockPtr. Add \p
2478 /// NewBlock as successor of \p BlockPtr and \p BlockPtr as predecessor of \p
2479 /// NewBlock, and propagate \p BlockPtr parent to \p NewBlock. \p BlockPtr's
2480 /// successors are moved from \p BlockPtr to \p NewBlock. \p NewBlock must
2481 /// have neither successors nor predecessors.
2482 static void insertBlockAfter(VPBlockBase *NewBlock, VPBlockBase *BlockPtr) {
2483 assert(NewBlock->getSuccessors().empty() &&
2484 NewBlock->getPredecessors().empty() &&
2485 "Can't insert new block with predecessors or successors.");
2486 NewBlock->setParent(BlockPtr->getParent());
2487 SmallVector<VPBlockBase *> Succs(BlockPtr->successors());
2488 for (VPBlockBase *Succ : Succs) {
2489 disconnectBlocks(BlockPtr, Succ);
2490 connectBlocks(NewBlock, Succ);
2491 }
2492 connectBlocks(BlockPtr, NewBlock);
2493 }
2494
2495 /// Insert disconnected VPBlockBases \p IfTrue and \p IfFalse after \p
2496 /// BlockPtr. Add \p IfTrue and \p IfFalse as succesors of \p BlockPtr and \p
2497 /// BlockPtr as predecessor of \p IfTrue and \p IfFalse. Propagate \p BlockPtr
2498 /// parent to \p IfTrue and \p IfFalse. \p BlockPtr must have no successors
2499 /// and \p IfTrue and \p IfFalse must have neither successors nor
2500 /// predecessors.
2501 static void insertTwoBlocksAfter(VPBlockBase *IfTrue, VPBlockBase *IfFalse,
2502 VPBlockBase *BlockPtr) {
2503 assert(IfTrue->getSuccessors().empty() &&
2504 "Can't insert IfTrue with successors.");
2505 assert(IfFalse->getSuccessors().empty() &&
2506 "Can't insert IfFalse with successors.");
2507 BlockPtr->setTwoSuccessors(IfTrue, IfFalse);
2508 IfTrue->setPredecessors({BlockPtr});
2509 IfFalse->setPredecessors({BlockPtr});
2510 IfTrue->setParent(BlockPtr->getParent());
2511 IfFalse->setParent(BlockPtr->getParent());
2512 }
2513
2514 /// Connect VPBlockBases \p From and \p To bi-directionally. Append \p To to
2515 /// the successors of \p From and \p From to the predecessors of \p To. Both
2516 /// VPBlockBases must have the same parent, which can be null. Both
2517 /// VPBlockBases can be already connected to other VPBlockBases.
2519 assert((From->getParent() == To->getParent()) &&
2520 "Can't connect two block with different parents");
2521 assert(From->getNumSuccessors() < 2 &&
2522 "Blocks can't have more than two successors.");
2523 From->appendSuccessor(To);
2524 To->appendPredecessor(From);
2525 }
2526
2527 /// Disconnect VPBlockBases \p From and \p To bi-directionally. Remove \p To
2528 /// from the successors of \p From and \p From from the predecessors of \p To.
2530 assert(To && "Successor to disconnect is null.");
2531 From->removeSuccessor(To);
2532 To->removePredecessor(From);
2533 }
2534
2535 /// Return an iterator range over \p Range which only includes \p BlockTy
2536 /// blocks. The accesses are casted to \p BlockTy.
2537 template <typename BlockTy, typename T>
2538 static auto blocksOnly(const T &Range) {
2539 // Create BaseTy with correct const-ness based on BlockTy.
2540 using BaseTy = std::conditional_t<std::is_const<BlockTy>::value,
2541 const VPBlockBase, VPBlockBase>;
2542
2543 // We need to first create an iterator range over (const) BlocktTy & instead
2544 // of (const) BlockTy * for filter_range to work properly.
2545 auto Mapped =
2546 map_range(Range, [](BaseTy *Block) -> BaseTy & { return *Block; });
2548 Mapped, [](BaseTy &Block) { return isa<BlockTy>(&Block); });
2549 return map_range(Filter, [](BaseTy &Block) -> BlockTy * {
2550 return cast<BlockTy>(&Block);
2551 });
2552 }
2553};
2554
2557 InterleaveGroupMap;
2558
2559 /// Type for mapping of instruction based interleave groups to VPInstruction
2560 /// interleave groups
2563
2564 /// Recursively \p Region and populate VPlan based interleave groups based on
2565 /// \p IAI.
2566 void visitRegion(VPRegionBlock *Region, Old2NewTy &Old2New,
2568 /// Recursively traverse \p Block and populate VPlan based interleave groups
2569 /// based on \p IAI.
2570 void visitBlock(VPBlockBase *Block, Old2NewTy &Old2New,
2572
2573public:
2575
2578 // Avoid releasing a pointer twice.
2579 for (auto &I : InterleaveGroupMap)
2580 DelSet.insert(I.second);
2581 for (auto *Ptr : DelSet)
2582 delete Ptr;
2583 }
2584
2585 /// Get the interleave group that \p Instr belongs to.
2586 ///
2587 /// \returns nullptr if doesn't have such group.
2590 return InterleaveGroupMap.lookup(Instr);
2591 }
2592};
2593
2594/// Class that maps (parts of) an existing VPlan to trees of combined
2595/// VPInstructions.
2597 enum class OpMode { Failed, Load, Opcode };
2598
2599 /// A DenseMapInfo implementation for using SmallVector<VPValue *, 4> as
2600 /// DenseMap keys.
2601 struct BundleDenseMapInfo {
2602 static SmallVector<VPValue *, 4> getEmptyKey() {
2603 return {reinterpret_cast<VPValue *>(-1)};
2604 }
2605
2606 static SmallVector<VPValue *, 4> getTombstoneKey() {
2607 return {reinterpret_cast<VPValue *>(-2)};
2608 }
2609
2610 static unsigned getHashValue(const SmallVector<VPValue *, 4> &V) {
2611 return static_cast<unsigned>(hash_combine_range(V.begin(), V.end()));
2612 }
2613
2614 static bool isEqual(const SmallVector<VPValue *, 4> &LHS,
2616 return LHS == RHS;
2617 }
2618 };
2619
2620 /// Mapping of values in the original VPlan to a combined VPInstruction.
2622 BundleToCombined;
2623
2625
2626 /// Basic block to operate on. For now, only instructions in a single BB are
2627 /// considered.
2628 const VPBasicBlock &BB;
2629
2630 /// Indicates whether we managed to combine all visited instructions or not.
2631 bool CompletelySLP = true;
2632
2633 /// Width of the widest combined bundle in bits.
2634 unsigned WidestBundleBits = 0;
2635
2636 using MultiNodeOpTy =
2637 typename std::pair<VPInstruction *, SmallVector<VPValue *, 4>>;
2638
2639 // Input operand bundles for the current multi node. Each multi node operand
2640 // bundle contains values not matching the multi node's opcode. They will
2641 // be reordered in reorderMultiNodeOps, once we completed building a
2642 // multi node.
2643 SmallVector<MultiNodeOpTy, 4> MultiNodeOps;
2644
2645 /// Indicates whether we are building a multi node currently.
2646 bool MultiNodeActive = false;
2647
2648 /// Check if we can vectorize Operands together.
2649 bool areVectorizable(ArrayRef<VPValue *> Operands) const;
2650
2651 /// Add combined instruction \p New for the bundle \p Operands.
2652 void addCombined(ArrayRef<VPValue *> Operands, VPInstruction *New);
2653
2654 /// Indicate we hit a bundle we failed to combine. Returns nullptr for now.
2655 VPInstruction *markFailed();
2656
2657 /// Reorder operands in the multi node to maximize sequential memory access
2658 /// and commutative operations.
2659 SmallVector<MultiNodeOpTy, 4> reorderMultiNodeOps();
2660
2661 /// Choose the best candidate to use for the lane after \p Last. The set of
2662 /// candidates to choose from are values with an opcode matching \p Last's
2663 /// or loads consecutive to \p Last.
2664 std::pair<OpMode, VPValue *> getBest(OpMode Mode, VPValue *Last,
2665 SmallPtrSetImpl<VPValue *> &Candidates,
2667
2668#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2669 /// Print bundle \p Values to dbgs().
2670 void dumpBundle(ArrayRef<VPValue *> Values);
2671#endif
2672
2673public:
2674 VPlanSlp(VPInterleavedAccessInfo &IAI, VPBasicBlock &BB) : IAI(IAI), BB(BB) {}
2675
2676 ~VPlanSlp() = default;
2677
2678 /// Tries to build an SLP tree rooted at \p Operands and returns a
2679 /// VPInstruction combining \p Operands, if they can be combined.
2681
2682 /// Return the width of the widest combined bundle in bits.
2683 unsigned getWidestBundleBits() const { return WidestBundleBits; }
2684
2685 /// Return true if all visited instruction can be combined.
2686 bool isCompletelySLP() const { return CompletelySLP; }
2687};
2688
2689namespace vputils {
2690
2691/// Returns true if only the first lane of \p Def is used.
2692bool onlyFirstLaneUsed(VPValue *Def);
2693
2694/// Get or create a VPValue that corresponds to the expansion of \p Expr. If \p
2695/// Expr is a SCEVConstant or SCEVUnknown, return a VPValue wrapping the live-in
2696/// value. Otherwise return a VPExpandSCEVRecipe to expand \p Expr. If \p Plan's
2697/// pre-header already contains a recipe expanding \p Expr, return it. If not,
2698/// create a new one.
2700 ScalarEvolution &SE);
2701
2702/// Returns true if \p VPV is uniform after vectorization.
2704 // A value defined outside the vector region must be uniform after
2705 // vectorization inside a vector region.
2707 return true;
2708 VPRecipeBase *Def = VPV->getDefiningRecipe();
2709 assert(Def && "Must have definition for value defined inside vector region");
2710 if (auto Rep = dyn_cast<VPReplicateRecipe>(Def))
2711 return Rep->isUniform();
2712 return false;
2713}
2714} // end namespace vputils
2715
2716} // end namespace llvm
2717
2718#endif // LLVM_TRANSFORMS_VECTORIZE_VPLAN_H
aarch64 promote const
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
always inline
BlockVerifier::State From
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")
RelocType Type
Definition: COFFYAML.cpp:390
#define LLVM_DUMP_METHOD
Mark debug helper function definitions like dump() that should not be stripped from debug builds.
Definition: Compiler.h:492
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
This file defines the DenseMap class.
This file builds on the ADT/GraphTraits.h file to build generic depth first graph iterator.
uint64_t Addr
std::string Name
Flatten the CFG
Hexagon Common GEP
std::pair< BasicBlock *, unsigned > BlockTy
A pair of (basic block, score).
#define I(x, y, z)
Definition: MD5.cpp:58
mir Rename Register Operands
This file implements a map that provides insertion order iteration.
#define P(N)
static cl::opt< RegAllocEvictionAdvisorAnalysis::AdvisorMode > Mode("regalloc-enable-advisor", cl::Hidden, cl::init(RegAllocEvictionAdvisorAnalysis::AdvisorMode::Default), cl::desc("Enable regalloc advisor mode"), cl::values(clEnumValN(RegAllocEvictionAdvisorAnalysis::AdvisorMode::Default, "default", "Default"), clEnumValN(RegAllocEvictionAdvisorAnalysis::AdvisorMode::Release, "release", "precompiled"), clEnumValN(RegAllocEvictionAdvisorAnalysis::AdvisorMode::Development, "development", "for training")))
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
This file implements the SmallBitVector class.
This file defines the SmallPtrSet class.
This file defines the SmallVector class.
This file contains the declarations of the entities induced by Vectorization Plans,...
#define VP_CLASSOF_IMPL(VPDefID)
Definition: VPlan.h:759
Value * RHS
Value * LHS
static const uint32_t IV[8]
Definition: blake3_impl.h:85
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: ArrayRef.h:41
ArrayRef< T > slice(size_t N, size_t M) const
slice(n, m) - Chop off the first N elements of the array, and keep M elements in the array.
Definition: ArrayRef.h:193
LLVM Basic Block Representation.
Definition: BasicBlock.h:56
This class represents a function call, abstracting a target machine's calling convention.
A debug info location.
Definition: DebugLoc.h:33
ValueT lookup(const_arg_type_t< KeyT > Val) const
lookup - Return the entry for the specified key, or a default constructed value if no such entry exis...
Definition: DenseMap.h:197
bool erase(const KeyT &Val)
Definition: DenseMap.h:302
size_type count(const_arg_type_t< KeyT > Val) const
Return 1 if the specified key is in the map, 0 otherwise.
Definition: DenseMap.h:145
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
Definition: DenseMap.h:207
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
Definition: Dominators.h:166
Convenience struct for specifying and reasoning about fast-math flags.
Definition: FMF.h:21
an instruction for type-safe pointer arithmetic to access elements of arrays and structs
Definition: Instructions.h:940
Common base class shared among various IRBuilders.
Definition: IRBuilder.h:94
A struct for saving information about induction variables.
InnerLoopVectorizer vectorizes loops which contain only one basic block to a specified vectorization ...
The group of interleaved loads/stores sharing the same stride and close to each other.
Definition: VectorUtils.h:623
uint32_t getFactor() const
Definition: VectorUtils.h:639
InstTy * getMember(uint32_t Index) const
Get the member with the given index Index.
Definition: VectorUtils.h:693
Drive the analysis of interleaved memory accesses in the loop.
Definition: VectorUtils.h:765
An instruction for reading from memory.
Definition: Instructions.h:177
Represents a single loop in the control flow graph.
Definition: LoopInfo.h:547
bool isLoopInvariant(const Value *V) const
Return true if the specified value is loop invariant.
Definition: LoopInfo.cpp:60
This class implements a map that also provides access to all stored values in a deterministic order.
Definition: MapVector.h:37
VectorType::iterator erase(typename VectorType::iterator Iterator)
Remove the element given by Iterator.
Definition: MapVector.h:174
The RecurrenceDescriptor is used to identify recurrences variables in a loop.
Definition: IVDescriptors.h:69
This class represents an analyzed expression in the program.
The main scalar evolution driver.
This class represents the LLVM 'select' instruction.
size_type size() const
Determine the number of elements in the SetVector.
Definition: SetVector.h:77
size_type count(const key_type &key) const
Count the number of elements of a given key in the SetVector.
Definition: SetVector.h:208
bool contains(const key_type &key) const
Check if the SetVector contains the given key.
Definition: SetVector.h:202
bool insert(const value_type &X)
Insert a new element into the SetVector.
Definition: SetVector.h:141
void clear()
Completely clear the SetVector.
Definition: SetVector.h:213
bool empty() const
Determine if the SetVector is empty or not.
Definition: SetVector.h:72
This class provides computation of slot numbers for LLVM Assembly writing.
Definition: AsmWriter.cpp:674
This is a 'bitvector' (really, a variable-sized bit array), optimized for the case when the array is ...
A templated base class for SmallPtrSet which provides the typesafe interface that is common across al...
Definition: SmallPtrSet.h:344
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
Definition: SmallPtrSet.h:365
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
Definition: SmallPtrSet.h:450
A SetVector that performs no allocations if smaller than a certain size.
Definition: SetVector.h:301
bool empty() const
Definition: SmallVector.h:94
size_t size() const
Definition: SmallVector.h:91
iterator erase(const_iterator CI)
Definition: SmallVector.h:741
void push_back(const T &Elt)
Definition: SmallVector.h:416
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1200
An instruction for storing to memory.
Definition: Instructions.h:301
This pass provides access to the codegen interfaces that are needed for IR-level transformations.
This class represents a truncation of integer types.
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition: Twine.h:81
std::string str() const
Return the twine contents as a std::string.
Definition: Twine.cpp:17
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
Type * getScalarType() const
If this is a vector type, return the element type, otherwise return 'this'.
Definition: Type.h:341
A Use represents the edge between a Value definition and its users.
Definition: Use.h:43
A recipe for generating the active lane mask for the vector loop that is used to predicate the vector...
Definition: VPlan.h:1803
void execute(VPTransformState &State) override
Generate the active lane mask phi of the vector loop.
static bool classof(const VPHeaderPHIRecipe *D)
Definition: VPlan.h:1815
VPActiveLaneMaskPHIRecipe(VPValue *StartMask, DebugLoc DL)
Definition: VPlan.h:1807
~VPActiveLaneMaskPHIRecipe() override=default
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
VPBasicBlock serves as the leaf of the Hierarchical Control-Flow Graph.
Definition: VPlan.h:1939
RecipeListTy::const_iterator const_iterator
Definition: VPlan.h:1961
void appendRecipe(VPRecipeBase *Recipe)
Augment the existing recipes of a VPBasicBlock with an additional Recipe as the last recipe.
Definition: VPlan.h:2007
RecipeListTy::const_reverse_iterator const_reverse_iterator
Definition: VPlan.h:1963
RecipeListTy::iterator iterator
Instruction iterators...
Definition: VPlan.h:1960
void execute(VPTransformState *State) override
The method which generates the output IR instructions that correspond to this VPBasicBlock,...
Definition: VPlan.cpp:322
RecipeListTy & getRecipeList()
Returns a reference to the list of recipes.
Definition: VPlan.h:1986
iterator end()
Definition: VPlan.h:1970
VPBasicBlock(const Twine &Name="", VPRecipeBase *Recipe=nullptr)
Definition: VPlan.h:1948
iterator begin()
Recipe iterator methods.
Definition: VPlan.h:1968
RecipeListTy::reverse_iterator reverse_iterator
Definition: VPlan.h:1962
iterator_range< iterator > phis()
Returns an iterator range over the PHI-like recipes in the block.
Definition: VPlan.h:2017
iterator getFirstNonPhi()
Return the position of the first non-phi node recipe in the block.
Definition: VPlan.cpp:204
~VPBasicBlock() override
Definition: VPlan.h:1954
VPRegionBlock * getEnclosingLoopRegion()
Definition: VPlan.cpp:424
void dropAllReferences(VPValue *NewValue) override
Replace all operands of VPUsers in the block with NewValue and also replaces all uses of VPValues def...
Definition: VPlan.cpp:389
const_reverse_iterator rbegin() const
Definition: VPlan.h:1974
reverse_iterator rend()
Definition: VPlan.h:1975
VPBasicBlock * splitAt(iterator SplitAt)
Split current block at SplitAt by inserting a new block between the current block and its successors ...
Definition: VPlan.cpp:399
VPRecipeBase & back()
Definition: VPlan.h:1983
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print this VPBsicBlock to O, prefixing all lines with Indent.
Definition: VPlan.cpp:492
const VPRecipeBase & front() const
Definition: VPlan.h:1980
const_iterator begin() const
Definition: VPlan.h:1969
VPRecipeBase & front()
Definition: VPlan.h:1981
bool isExiting() const
Returns true if the block is exiting it's parent region.
Definition: VPlan.cpp:475
VPRecipeBase * getTerminator()
If the block has multiple successors, return the branch recipe terminating the block.
Definition: VPlan.cpp:463
const VPRecipeBase & back() const
Definition: VPlan.h:1982
void insert(VPRecipeBase *Recipe, iterator InsertPt)
Definition: VPlan.h:1998
bool empty() const
Definition: VPlan.h:1979
const_iterator end() const
Definition: VPlan.h:1971
static bool classof(const VPBlockBase *V)
Method to support type inquiry through isa, cast, and dyn_cast.
Definition: VPlan.h:1994
static RecipeListTy VPBasicBlock::* getSublistAccess(VPRecipeBase *)
Returns a pointer to a member of the recipe list.
Definition: VPlan.h:1989
reverse_iterator rbegin()
Definition: VPlan.h:1973
size_t size() const
Definition: VPlan.h:1978
const_reverse_iterator rend() const
Definition: VPlan.h:1976
A recipe for vectorizing a phi-node as a sequence of mask-based select instructions.
Definition: VPlan.h:1323
VPBlendRecipe(PHINode *Phi, ArrayRef< VPValue * > Operands)
The blend operation is a User of the incoming values and of their respective masks,...
Definition: VPlan.h:1330
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
bool onlyFirstLaneUsed(const VPValue *Op) const override
Returns true if the recipe only uses the first lane of operand Op.
Definition: VPlan.h:1360
VPValue * getIncomingValue(unsigned Idx) const
Return incoming value number Idx.
Definition: VPlan.h:1345
VPValue * getMask(unsigned Idx) const
Return mask number Idx.
Definition: VPlan.h:1348
unsigned getNumIncomingValues() const
Return the number of incoming values, taking into account that a single incoming value has no mask.
Definition: VPlan.h:1342
void execute(VPTransformState &State) override
Generate the phi/select nodes.
VPBlockBase is the building block of the Hierarchical Control-Flow Graph.
Definition: VPlan.h:390
VPRegionBlock * getParent()
Definition: VPlan.h:462
VPBlocksTy & getPredecessors()
Definition: VPlan.h:493
const VPBasicBlock * getExitingBasicBlock() const
Definition: VPlan.cpp:169
LLVM_DUMP_METHOD void dump() const
Dump this VPBlockBase to dbgs().
Definition: VPlan.h:629
void setName(const Twine &newName)
Definition: VPlan.h:455
size_t getNumSuccessors() const
Definition: VPlan.h:507
iterator_range< VPBlockBase ** > successors()
Definition: VPlan.h:490
virtual void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const =0
Print plain-text dump of this VPBlockBase to O, prefixing all lines with Indent.
void printSuccessors(raw_ostream &O, const Twine &Indent) const
Print the successors of this block to O, prefixing all lines with Indent.
Definition: VPlan.cpp:480
bool isLegalToHoistInto()
Return true if it is legal to hoist instructions into this block.
Definition: VPlan.h:594
virtual ~VPBlockBase()=default
void print(raw_ostream &O) const
Print plain-text dump of this VPlan to O.
Definition: VPlan.h:619
const VPBlocksTy & getHierarchicalPredecessors()
Definition: VPlan.h:543
size_t getNumPredecessors() const
Definition: VPlan.h:508
void setPredecessors(ArrayRef< VPBlockBase * > NewPreds)
Set each VPBasicBlock in NewPreds as predecessor of this VPBlockBase.
Definition: VPlan.h:574
VPBlockBase * getEnclosingBlockWithPredecessors()
Definition: VPlan.cpp:191
const VPBlocksTy & getPredecessors() const
Definition: VPlan.h:492
static void deleteCFG(VPBlockBase *Entry)
Delete all blocks reachable from a given VPBlockBase, inclusive.
Definition: VPlan.cpp:199
VPlan * getPlan()
Definition: VPlan.cpp:143
void setPlan(VPlan *ParentPlan)
Sets the pointer of the plan containing the block.
Definition: VPlan.cpp:162
const VPRegionBlock * getParent() const
Definition: VPlan.h:463
void printAsOperand(raw_ostream &OS, bool PrintType) const
Definition: VPlan.h:605
const std::string & getName() const
Definition: VPlan.h:453
void clearSuccessors()
Remove all the successors of this block.
Definition: VPlan.h:584
VPBlockBase * getSingleHierarchicalSuccessor()
Definition: VPlan.h:533
void setTwoSuccessors(VPBlockBase *IfTrue, VPBlockBase *IfFalse)
Set two given VPBlockBases IfTrue and IfFalse to be the two successors of this VPBlockBase.
Definition: VPlan.h:565
VPBlockBase * getSinglePredecessor() const
Definition: VPlan.h:503
virtual void execute(VPTransformState *State)=0
The method which generates the output IR that correspond to this VPBlockBase, thereby "executing" the...
const VPBlocksTy & getHierarchicalSuccessors()
Definition: VPlan.h:527
void clearPredecessors()
Remove all the predecessor of this block.
Definition: VPlan.h:581
enum { VPBasicBlockSC, VPRegionBlockSC } VPBlockTy
An enumeration for keeping track of the concrete subclass of VPBlockBase that are actually instantiat...
Definition: VPlan.h:447
unsigned getVPBlockID() const
Definition: VPlan.h:460
VPBlockBase(const unsigned char SC, const std::string &N)
Definition: VPlan.h:439
VPBlocksTy & getSuccessors()
Definition: VPlan.h:488
VPBlockBase * getEnclosingBlockWithSuccessors()
An Enclosing Block of a block B is any block containing B, including B itself.
Definition: VPlan.cpp:183
const VPBasicBlock * getEntryBasicBlock() const
Definition: VPlan.cpp:148
void setOneSuccessor(VPBlockBase *Successor)
Set a given VPBlockBase Successor as the single successor of this VPBlockBase.
Definition: VPlan.h:556
void setParent(VPRegionBlock *P)
Definition: VPlan.h:473
virtual void dropAllReferences(VPValue *NewValue)=0
Replace all operands of VPUsers in the block with NewValue and also replaces all uses of VPValues def...
VPBlockBase * getSingleHierarchicalPredecessor()
Definition: VPlan.h:549
VPBlockBase * getSingleSuccessor() const
Definition: VPlan.h:497
const VPBlocksTy & getSuccessors() const
Definition: VPlan.h:487
Class that provides utilities for VPBlockBases in VPlan.
Definition: VPlan.h:2473
static auto blocksOnly(const T &Range)
Return an iterator range over Range which only includes BlockTy blocks.
Definition: VPlan.h:2538
static void insertBlockAfter(VPBlockBase *NewBlock, VPBlockBase *BlockPtr)
Insert disconnected VPBlockBase NewBlock after BlockPtr.
Definition: VPlan.h:2482
static void insertTwoBlocksAfter(VPBlockBase *IfTrue, VPBlockBase *IfFalse, VPBlockBase *BlockPtr)
Insert disconnected VPBlockBases IfTrue and IfFalse after BlockPtr.
Definition: VPlan.h:2501
static void disconnectBlocks(VPBlockBase *From, VPBlockBase *To)
Disconnect VPBlockBases From and To bi-directionally.
Definition: VPlan.h:2529
static void connectBlocks(VPBlockBase *From, VPBlockBase *To)
Connect VPBlockBases From and To bi-directionally.
Definition: VPlan.h:2518
A recipe for generating conditional branches on the bits of a mask.
Definition: VPlan.h:1556
VPValue * getMask() const
Return the mask used by this recipe.
Definition: VPlan.h:1584
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
Definition: VPlan.h:1572
VPBranchOnMaskRecipe(VPValue *BlockInMask)
Definition: VPlan.h:1558
bool usesScalars(const VPValue *Op) const override
Returns true if the recipe uses scalars of operand Op.
Definition: VPlan.h:1591
void execute(VPTransformState &State) override
Generate the extraction of the appropriate bit from the block mask and the conditional branch.
Canonical scalar induction phi of the vector loop.
Definition: VPlan.h:1758
~VPCanonicalIVPHIRecipe() override=default
static bool classof(const VPHeaderPHIRecipe *D)
Definition: VPlan.h:1769
VPCanonicalIVPHIRecipe(VPValue *StartV, DebugLoc DL)
Definition: VPlan.h:1762
bool onlyFirstLaneUsed(const VPValue *Op) const override
Returns true if the recipe only uses the first lane of operand Op.
Definition: VPlan.h:1788
void execute(VPTransformState &State) override
Generate the canonical scalar induction phi of the vector loop.
bool isCanonical(const InductionDescriptor &ID, Type *Ty) const
Check if the induction described by ID is canonical, i.e.
const Type * getScalarType() const
Returns the scalar type of the induction.
Definition: VPlan.h:1783
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
This class augments a recipe with a set of VPValues defined by the recipe.
Definition: VPlanValue.h:303
VPValue * getVPSingleValue()
Returns the only VPValue defined by the VPDef.
Definition: VPlanValue.h:381
VPValue * getVPValue(unsigned I)
Returns the VPValue with index I defined by the VPDef.
Definition: VPlanValue.h:393
unsigned getVPDefID() const
Definition: VPlanValue.h:413
A recipe for converting the canonical IV value to the corresponding value of an IV with different sta...
Definition: VPlan.h:1861
void execute(VPTransformState &State) override
Generate the transformed value of the induction at offset StartValue (1.
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
VPDerivedIVRecipe(const InductionDescriptor &IndDesc, VPValue *Start, VPCanonicalIVPHIRecipe *CanonicalIV, VPValue *Step, Type *ResultTy)
Definition: VPlan.h:1870
VPValue * getStepValue() const
Definition: VPlan.h:1892
VPValue * getCanonicalIV() const
Definition: VPlan.h:1891
~VPDerivedIVRecipe() override=default
bool onlyFirstLaneUsed(const VPValue *Op) const override
Returns true if the recipe only uses the first lane of operand Op.
Definition: VPlan.h:1895
VPValue * getStartValue() const
Definition: VPlan.h:1890
Recipe to expand a SCEV expression.
Definition: VPlan.h:1729
VPExpandSCEVRecipe(const SCEV *Expr, ScalarEvolution &SE)
Definition: VPlan.h:1734
const SCEV * getSCEV() const
Definition: VPlan.h:1751
void execute(VPTransformState &State) override
Generate a canonical vector induction variable of the vector loop, with.
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
~VPExpandSCEVRecipe() override=default
A pure virtual base class for all recipes modeling header phis, including phis for first order recurr...
Definition: VPlan.h:1113
VPRecipeBase & getBackedgeRecipe()
Returns the backedge value as a recipe.
Definition: VPlan.h:1163
VPHeaderPHIRecipe(unsigned char VPDefID, PHINode *Phi, VPValue *Start=nullptr)
Definition: VPlan.h:1115
static bool classof(const VPValue *V)
Definition: VPlan.h:1130
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override=0
Print the recipe.
VPValue * getStartValue()
Returns the start value of the phi, if one is set.
Definition: VPlan.h:1146
void setStartValue(VPValue *V)
Update the start value of the recipe.
Definition: VPlan.h:1154
VPValue * getStartValue() const
Definition: VPlan.h:1149
static bool classof(const VPRecipeBase *B)
Method to support type inquiry through isa, cast, and dyn_cast.
Definition: VPlan.h:1126
void execute(VPTransformState &State) override=0
Generate the phi nodes.
VPValue * getBackedgeValue()
Returns the incoming value from the loop backedge.
Definition: VPlan.h:1157
~VPHeaderPHIRecipe() override=default
This is a concrete Recipe that models a single VPlan-level instruction.
Definition: VPlan.h:779
bool onlyFirstLaneUsed(const VPValue *Op) const override
Returns true if the recipe only uses the first lane of operand Op.
Definition: VPlan.h:886
VPInstruction * clone() const
Definition: VPlan.h:831
VPInstruction(unsigned Opcode, ArrayRef< VPValue * > Operands, DebugLoc DL, const Twine &Name="")
Definition: VPlan.h:820
bool hasResult() const
Definition: VPlan.h:860
void setUnderlyingInstr(Instruction *I)
Definition: VPlan.h:817
LLVM_DUMP_METHOD void dump() const
Print the VPInstruction to dbgs() (for debugging).
unsigned getOpcode() const
Definition: VPlan.h:836
VPInstruction(unsigned Opcode, std::initializer_list< VPValue * > Operands, DebugLoc DL={}, const Twine &Name="")
Definition: VPlan.h:825
void setFastMathFlags(FastMathFlags FMFNew)
Set the fast-math flags.
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the VPInstruction to O.
bool mayWriteToMemory() const
Return true if this instruction may modify memory.
Definition: VPlan.h:853
void execute(VPTransformState &State) override
Generate the instruction.
@ CanonicalIVIncrementForPartNUW
Definition: VPlan.h:798
@ FirstOrderRecurrenceSplice
Definition: VPlan.h:785
@ CanonicalIVIncrementNUW
Definition: VPlan.h:794
@ CanonicalIVIncrementForPart
Definition: VPlan.h:797
VPInterleaveRecipe is a recipe for transforming an interleave group of load or stores into one wide l...
Definition: VPlan.h:1374
bool onlyFirstLaneUsed(const VPValue *Op) const override
The recipe only uses the first lane of the address.
Definition: VPlan.h:1440
~VPInterleaveRecipe() override=default
VPValue * getAddr() const
Return the address accessed by this recipe.
Definition: VPlan.h:1402
VPValue * getMask() const
Return the mask used by this recipe.
Definition: VPlan.h:1408
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
void execute(VPTransformState &State) override
Generate the wide load or store, and shuffles.
VPInterleaveRecipe(const InterleaveGroup< Instruction > *IG, VPValue *Addr, ArrayRef< VPValue * > StoredValues, VPValue *Mask)
Definition: VPlan.h:1380
ArrayRef< VPValue * > getStoredValues() const
Return the VPValues stored by this interleave group.
Definition: VPlan.h:1415
const InterleaveGroup< Instruction > * getInterleaveGroup()
Definition: VPlan.h:1431
unsigned getNumStoreOperands() const
Returns the number of stored operands of this interleave group.
Definition: VPlan.h:1435
InterleaveGroup< VPInstruction > * getInterleaveGroup(VPInstruction *Instr) const
Get the interleave group that Instr belongs to.
Definition: VPlan.h:2589
In what follows, the term "input IR" refers to code that is fed into the vectorizer whereas the term ...
Definition: VPlan.h:113
static VPLane getLastLaneForVF(const ElementCount &VF)
Definition: VPlan.h:139
static unsigned getNumCachedLanes(const ElementCount &VF)
Returns the maxmimum number of lanes that we are able to consider caching for VF.
Definition: VPlan.h:182
Value * getAsRuntimeExpr(IRBuilderBase &Builder, const ElementCount &VF) const
Returns an expression describing the lane index that can be used at runtime.
Definition: VPlan.cpp:63
VPLane(unsigned Lane, Kind LaneKind)
Definition: VPlan.h:135
Kind getKind() const
Returns the Kind of lane offset.
Definition: VPlan.h:163
bool isFirstLane() const
Returns true if this is the first lane of the whole vector.
Definition: VPlan.h:166
unsigned getKnownLane() const
Returns a compile-time known value for the lane index and asserts if the lane can only be calculated ...
Definition: VPlan.h:153
static VPLane getFirstLane()
Definition: VPlan.h:137
Kind
Kind describes how to interpret Lane.
Definition: VPlan.h:116
@ ScalableLast
For ScalableLast, Lane is the offset from the start of the last N-element subvector in a scalable vec...
@ First
For First, Lane is the index into the first N elements of a fixed-vector <N x <ElTy>> or a scalable v...
unsigned mapToCacheIndex(const ElementCount &VF) const
Maps the lane to a cache index based on VF.
Definition: VPlan.h:169
A value that is used outside the VPlan.
Definition: VPlan.h:635
VPLiveOut(PHINode *Phi, VPValue *Op)
Definition: VPlan.h:639
bool usesScalars(const VPValue *Op) const override
Returns true if the VPLiveOut uses scalars of operand Op.
Definition: VPlan.h:650
PHINode * getPhi() const
Definition: VPlan.h:656
void fixPhi(VPlan &Plan, VPTransformState &State)
Fixup the wrapped LCSSA phi node in the unique exit block.
VPPredInstPHIRecipe is a recipe for generating the phi nodes needed when control converges back from ...
Definition: VPlan.h:1603
~VPPredInstPHIRecipe() override=default
bool usesScalars(const VPValue *Op) const override
Returns true if the recipe uses scalars of operand Op.
Definition: VPlan.h:1623
VPPredInstPHIRecipe(VPValue *PredV)
Construct a VPPredInstPHIRecipe given PredInst whose value needs a phi nodes after merging back from ...
Definition: VPlan.h:1607
void execute(VPTransformState &State) override
Generates phi nodes for live-outs as needed to retain SSA form.
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
VPRecipeBase is a base class modeling a sequence of one or more output IR instructions.
Definition: VPlan.h:666
bool mayReadFromMemory() const
Returns true if the recipe may read from memory.
VPRecipeBase(const unsigned char SC, ArrayRef< VPValue * > Operands)
Definition: VPlan.h:674
bool mayReadOrWriteMemory() const
Returns true if the recipe may read from or write to memory.
Definition: VPlan.h:753
bool mayHaveSideEffects() const
Returns true if the recipe may have side-effects.
Instruction * getUnderlyingInstr()
Returns the underlying instruction, if the recipe is a VPValue or nullptr otherwise.
Definition: VPlan.h:721
bool mayWriteToMemory() const
Returns true if the recipe may write to memory.
virtual ~VPRecipeBase()=default
VPBasicBlock * getParent()
Definition: VPlan.h:683
virtual void execute(VPTransformState &State)=0
The method which generates the output IR instructions that correspond to this VPRecipe,...
void moveBefore(VPBasicBlock &BB, iplist< VPRecipeBase >::iterator I)
Unlink this recipe and insert into BB before I.
void insertBefore(VPRecipeBase *InsertPos)
Insert an unlinked recipe into a basic block immediately before the specified recipe.
void insertAfter(VPRecipeBase *InsertPos)
Insert an unlinked Recipe into a basic block immediately after the specified Recipe.
static bool classof(const VPDef *D)
Method to support type inquiry through isa, cast, and dyn_cast.
Definition: VPlan.h:729
iplist< VPRecipeBase >::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
VPRecipeBase(const unsigned char SC, iterator_range< IterT > Operands)
Definition: VPlan.h:678
const VPBasicBlock * getParent() const
Definition: VPlan.h:684
const Instruction * getUnderlyingInstr() const
Definition: VPlan.h:724
static bool classof(const VPUser *U)
Definition: VPlan.h:734
void removeFromParent()
This method unlinks 'this' from the containing basic block, but does not delete it.
bool isPhi() const
Returns true for PHI-like recipes.
Definition: VPlan.h:742
void moveAfter(VPRecipeBase *MovePos)
Unlink this recipe from its current VPBasicBlock and insert it into the VPBasicBlock that MovePos liv...
A recipe for handling reduction phis.
Definition: VPlan.h:1272
VPReductionPHIRecipe(PHINode *Phi, const RecurrenceDescriptor &RdxDesc, VPValue &Start, bool IsInLoop=false, bool IsOrdered=false)
Create a new VPReductionPHIRecipe for the reduction Phi described by RdxDesc.
Definition: VPlan.h:1285
bool isOrdered() const
Returns true, if the phi is part of an ordered reduction.
Definition: VPlan.h:1315
~VPReductionPHIRecipe() override=default
bool isInLoop() const
Returns true, if the phi is part of an in-loop reduction.
Definition: VPlan.h:1318
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
void execute(VPTransformState &State) override
Generate the phi/select nodes.
static bool classof(const VPHeaderPHIRecipe *R)
Definition: VPlan.h:1297
const RecurrenceDescriptor & getRecurrenceDescriptor() const
Definition: VPlan.h:1310
A recipe to represent inloop reduction operations, performing a reduction on a vector operand into a ...
Definition: VPlan.h:1450
VPValue * getVecOp() const
The VPValue of the vector value to be reduced.
Definition: VPlan.h:1482
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
VPValue * getCondOp() const
The VPValue of the condition for the block.
Definition: VPlan.h:1484
~VPReductionRecipe() override=default
VPValue * getChainOp() const
The VPValue of the scalar Chain being accumulated.
Definition: VPlan.h:1480
VPReductionRecipe(const RecurrenceDescriptor *R, Instruction *I, VPValue *ChainOp, VPValue *VecOp, VPValue *CondOp, const TargetTransformInfo *TTI)
Definition: VPlan.h:1457
void execute(VPTransformState &State) override
Generate the reduction in the loop.
VPRegionBlock represents a collection of VPBasicBlocks and VPRegionBlocks which form a Single-Entry-S...
Definition: VPlan.h:2063
const VPBlockBase * getEntry() const
Definition: VPlan.h:2102
bool isReplicator() const
An indicator whether this region is to generate multiple replicated instances of output IR correspond...
Definition: VPlan.h:2134
void dropAllReferences(VPValue *NewValue) override
Replace all operands of VPUsers in the block with NewValue and also replaces all uses of VPValues def...
Definition: VPlan.cpp:506
void setExiting(VPBlockBase *ExitingBlock)
Set ExitingBlock as the exiting VPBlockBase of this VPRegionBlock.
Definition: VPlan.h:2119
VPBlockBase * getExiting()
Definition: VPlan.h:2115
void setEntry(VPBlockBase *EntryBlock)
Set EntryBlock as the entry VPBlockBase of this VPRegionBlock.
Definition: VPlan.h:2107
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print this VPRegionBlock to O (recursively), prefixing all lines with Indent.
Definition: VPlan.cpp:565
VPRegionBlock(const std::string &Name="", bool IsReplicator=false)
Definition: VPlan.h:2085
VPRegionBlock(VPBlockBase *Entry, VPBlockBase *Exiting, const std::string &Name="", bool IsReplicator=false)
Definition: VPlan.h:2076
void execute(VPTransformState *State) override
The method which generates the output IR instructions that correspond to this VPRegionBlock,...
Definition: VPlan.cpp:513
const VPBlockBase * getExiting() const
Definition: VPlan.h:2114
VPBlockBase * getEntry()
Definition: VPlan.h:2103
VPBasicBlock * getPreheaderVPBB()
Returns the pre-header VPBasicBlock of the loop region.
Definition: VPlan.h:2127
~VPRegionBlock() override
Definition: VPlan.h:2089
static bool classof(const VPBlockBase *V)
Method to support type inquiry through isa, cast, and dyn_cast.
Definition: VPlan.h:2098
VPReplicateRecipe replicates a given instruction producing multiple scalar copies of the original sca...
Definition: VPlan.h:1493
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
void execute(VPTransformState &State) override
Generate replicas of the desired Ingredient.
~VPReplicateRecipe() override=default
bool onlyFirstLaneUsed(const VPValue *Op) const override
Returns true if the recipe only uses the first lane of operand Op.
Definition: VPlan.h:1541
bool usesScalars(const VPValue *Op) const override
Returns true if the recipe uses scalars of operand Op.
Definition: VPlan.h:1548
bool isUniform() const
Definition: VPlan.h:1534
bool isPredicated() const
Definition: VPlan.h:1538
VPReplicateRecipe(Instruction *I, iterator_range< IterT > Operands, bool IsUniform, bool IsPredicated=false)
Definition: VPlan.h:1505
void setAlsoPack(bool Pack)
Definition: VPlan.h:1526
bool isPacked() const
Definition: VPlan.h:1536
A recipe for handling phi nodes of integer and floating-point inductions, producing their scalar valu...
Definition: VPlan.h:1904
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
bool onlyFirstLaneUsed(const VPValue *Op) const override
Returns true if the recipe only uses the first lane of operand Op.
Definition: VPlan.h:1929
VPValue * getStepValue() const
Definition: VPlan.h:1926
VPScalarIVStepsRecipe(const InductionDescriptor &IndDesc, VPValue *IV, VPValue *Step)
Definition: VPlan.h:1908
~VPScalarIVStepsRecipe() override=default
void execute(VPTransformState &State) override
Generate the scalarized versions of the phi node as needed by their users.
This class can be used to assign consecutive numbers to all VPValues in a VPlan and allows querying t...
Definition: VPlanValue.h:431
This class augments VPValue with operands which provide the inverse def-use edges from VPValue's user...
Definition: VPlanValue.h:201
operand_range operands()
Definition: VPlanValue.h:276
void setOperand(unsigned I, VPValue *New)
Definition: VPlanValue.h:256
unsigned getNumOperands() const
Definition: VPlanValue.h:250
operand_iterator op_begin()
Definition: VPlanValue.h:272
VPValue * getOperand(unsigned N) const
Definition: VPlanValue.h:251
virtual bool onlyFirstLaneUsed(const VPValue *Op) const
Returns true if the VPUser only uses the first lane of operand Op.
Definition: VPlanValue.h:291
VPUserID
Subclass identifier (for isa/dyn_cast).
Definition: VPlanValue.h:204
void addOperand(VPValue *Operand)
Definition: VPlanValue.h:245
VPUserID getVPUserID() const
Definition: VPlanValue.h:243
Value * getUnderlyingValue()
Return the underlying Value attached to this VPValue.
Definition: VPlanValue.h:84
VPRecipeBase * getDefiningRecipe()
Returns the recipe defining this VPValue or nullptr if it is not defined by a recipe,...
Definition: VPlan.cpp:113
friend class VPInstruction
Definition: VPlanValue.h:47
void setUnderlyingValue(Value *Val)
Definition: VPlanValue.h:77
Value * getLiveInIRValue()
Returns the underlying IR value, if this VPValue is defined outside the scope of VPlan.
Definition: VPlanValue.h:177
user_range users()
Definition: VPlanValue.h:147
bool isDefinedOutsideVectorRegions() const
Returns true if the VPValue is defined outside any vector regions, i.e.
Definition: VPlanValue.h:191
A recipe for widening Call instructions.
Definition: VPlan.h:930
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
VPWidenCallRecipe(CallInst &I, iterator_range< IterT > CallArguments, Intrinsic::ID VectorIntrinsicID)
Definition: VPlan.h:937
void execute(VPTransformState &State) override
Produce a widened version of the call instruction.
~VPWidenCallRecipe() override=default
A Recipe for widening the canonical induction variable of the vector loop.
Definition: VPlan.h:1830
void execute(VPTransformState &State) override
Generate a canonical vector induction variable of the vector loop, with start = {<Part*VF,...
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
~VPWidenCanonicalIVRecipe() override=default
VPWidenCanonicalIVRecipe(VPCanonicalIVPHIRecipe *CanonicalIV)
Definition: VPlan.h:1832
const Type * getScalarType() const
Returns the scalar type of the induction.
Definition: VPlan.h:1852
A recipe for handling GEP instructions.
Definition: VPlan.h:984
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
void execute(VPTransformState &State) override
Generate the gep nodes.
~VPWidenGEPRecipe() override=default
VPWidenGEPRecipe(GetElementPtrInst *GEP, iterator_range< IterT > Operands, Loop *OrigLoop)
Definition: VPlan.h:995
VPWidenGEPRecipe(GetElementPtrInst *GEP, iterator_range< IterT > Operands)
Definition: VPlan.h:990
A recipe for handling phi nodes of integer and floating-point inductions, producing their vector valu...
Definition: VPlan.h:1020
const TruncInst * getTruncInst() const
Definition: VPlan.h:1067
~VPWidenIntOrFpInductionRecipe() override=default
VPValue * getStartValue()
Returns the start value of the induction.
Definition: VPlan.h:1055
TruncInst * getTruncInst()
Returns the first defined value as TruncInst, if it is one or nullptr otherwise.
Definition: VPlan.h:1064
VPWidenIntOrFpInductionRecipe(PHINode *IV, VPValue *Start, VPValue *Step, const InductionDescriptor &IndDesc, bool NeedsVectorIV)
Definition: VPlan.h:1026
void execute(VPTransformState &State) override
Generate the vectorized and scalarized versions of the phi node as needed by their users.
VPValue * getStepValue()
Returns the step value of the induction.
Definition: VPlan.h:1059
const Type * getScalarType() const
Returns the scalar type of the induction.
Definition: VPlan.h:1081
const VPValue * getStartValue() const
Definition: VPlan.h:1056
bool needsVectorIV() const
Returns true if a vector phi needs to be created for the induction.
Definition: VPlan.h:1087
VPWidenIntOrFpInductionRecipe(PHINode *IV, VPValue *Start, VPValue *Step, const InductionDescriptor &IndDesc, TruncInst *Trunc, bool NeedsVectorIV)
Definition: VPlan.h:1033
const VPValue * getStepValue() const
Definition: VPlan.h:1060
bool isCanonical() const
Returns true if the induction is canonical, i.e.
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
const InductionDescriptor & getInductionDescriptor() const
Returns the induction descriptor for the recipe.
Definition: VPlan.h:1074
A Recipe for widening load/store operations.
Definition: VPlan.h:1636
VPValue * getMask() const
Return the mask used by this recipe.
Definition: VPlan.h:1683
VPValue * getAddr() const
Return the address accessed by this recipe.
Definition: VPlan.h:1677
Instruction & getIngredient() const
Definition: VPlan.h:1725
VPWidenMemoryInstructionRecipe(StoreInst &Store, VPValue *Addr, VPValue *StoredValue, VPValue *Mask, bool Consecutive, bool Reverse)
Definition: VPlan.h:1665
void execute(VPTransformState &State) override
Generate the wide load/store.
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
VPWidenMemoryInstructionRecipe(LoadInst &Load, VPValue *Addr, VPValue *Mask, bool Consecutive, bool Reverse)
Definition: VPlan.h:1656
VPValue * getStoredValue() const
Return the address accessed by this recipe.
Definition: VPlan.h:1692
bool onlyFirstLaneUsed(const VPValue *Op) const override
Returns true if the recipe only uses the first lane of operand Op.
Definition: VPlan.h:1714
bool isStore() const
Returns true if this recipe is a store.
Definition: VPlan.h:1689
A recipe for handling header phis that are widened in the vector loop.
Definition: VPlan.h:1209
void addIncoming(VPValue *IncomingV, VPBasicBlock *IncomingBlock)
Adds a pair (IncomingV, IncomingBlock) to the phi.
Definition: VPlan.h:1235
VPValue * getIncomingValue(unsigned I)
Returns the I th incoming VPValue.
Definition: VPlan.h:1244
VPWidenPHIRecipe(PHINode *Phi, VPValue *Start=nullptr)
Create a new VPWidenPHIRecipe for Phi with start value Start.
Definition: VPlan.h:1215
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
~VPWidenPHIRecipe() override=default
VPBasicBlock * getIncomingBlock(unsigned I)
Returns the I th incoming VPBasicBlock.
Definition: VPlan.h:1241
void execute(VPTransformState &State) override
Generate the phi/select nodes.
bool onlyScalarsGenerated(ElementCount VF)
Returns true if only scalar values will be generated.
const InductionDescriptor & getInductionDescriptor() const
Returns the induction descriptor for the recipe.
Definition: VPlan.h:1197
~VPWidenPointerInductionRecipe() override=default
void execute(VPTransformState &State) override
Generate vector values for the pointer induction.
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
VPWidenPointerInductionRecipe(PHINode *Phi, VPValue *Start, VPValue *Step, const InductionDescriptor &IndDesc, bool IsScalarAfterVectorization)
Create a new VPWidenPointerInductionRecipe for Phi with start value Start.
Definition: VPlan.h:1176
VPWidenRecipe is a recipe for producing a copy of vector type its ingredient.
Definition: VPlan.h:909
void execute(VPTransformState &State) override
Produce widened copies of all Ingredients.
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
~VPWidenRecipe() override=default
VPWidenRecipe(Instruction &I, iterator_range< IterT > Operands)
Definition: VPlan.h:912
A recipe for widening select instructions.
Definition: VPlan.h:957
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
void execute(VPTransformState &State) override
Produce a widened version of the select instruction.
VPWidenSelectRecipe(SelectInst &I, iterator_range< IterT > Operands, bool InvariantCond)
Definition: VPlan.h:964
~VPWidenSelectRecipe() override=default
VPlanPrinter prints a given VPlan to a given output stream.
Definition: VPlan.h:2402
VPlanPrinter(raw_ostream &O, const VPlan &P)
Definition: VPlan.h:2443
LLVM_DUMP_METHOD void dump()
Definition: VPlan.cpp:868
Class that maps (parts of) an existing VPlan to trees of combined VPInstructions.
Definition: VPlan.h:2596
VPInstruction * buildGraph(ArrayRef< VPValue * > Operands)
Tries to build an SLP tree rooted at Operands and returns a VPInstruction combining Operands,...
Definition: VPlanSLP.cpp:359
bool isCompletelySLP() const
Return true if all visited instruction can be combined.
Definition: VPlan.h:2686
~VPlanSlp()=default
VPlanSlp(VPInterleavedAccessInfo &IAI, VPBasicBlock &BB)
Definition: VPlan.h:2674
unsigned getWidestBundleBits() const
Return the width of the widest combined bundle in bits.
Definition: VPlan.h:2683
VPlan models a candidate for vectorization, encoding various decisions take to produce efficient outp...
Definition: VPlan.h:2160
void printDOT(raw_ostream &O) const
Print this VPlan in DOT format to O.
Definition: VPlan.cpp:802
std::string getName() const
Return a string with the name of the plan and the applicable VFs and UFs.
Definition: VPlan.cpp:778
void addVPValue(Value *V, VPValue *VPV)
Definition: VPlan.h:2296
VPValue & getVectorTripCount()
The vector trip count.
Definition: VPlan.h:2247
void setName(const Twine &newName)
Definition: VPlan.h:2276
void removeVPValueFor(Value *V)
Definition: VPlan.h:2325
VPValue * getOrCreateBackedgeTakenCount()
The backedge taken count of the original loop.
Definition: VPlan.h:2240
void removeLiveOut(PHINode *PN)
Definition: VPlan.h:2382
VPBlockBase * getEntry()
Definition: VPlan.h:2223
void clearLiveOuts()
Definition: VPlan.h:2376
VPValue * getOrCreateTripCount()
The trip count of the original loop.
Definition: VPlan.h:2233
void addLiveOut(PHINode *PN, VPValue *V)
Definition: VPlan.cpp:811
void prepareToExecute(Value *TripCount, Value *VectorTripCount, Value *CanonicalIVStartValue, VPTransformState &State, bool IsEpilogueVectorization)
Prepare the plan for execution, setting up the required live-in values.
Definition: VPlan.cpp:608
VPValue * getOrAddExternalDef(Value *V)
Get the existing or add a new external definition for V.
Definition: VPlan.h:2279
VPValue * getOrAddVPValue(Value *V, bool OverrideAllowed=false)
Gets the VPValue or adds a new one (if none exists yet) for V.
Definition: VPlan.h:2316
VPRegionBlock * getVectorLoopRegion()
Returns the VPRegionBlock of the vector loop.
Definition: VPlan.h:2353
const VPRegionBlock * getVectorLoopRegion() const
Definition: VPlan.h:2356
bool hasVF(ElementCount VF)
Definition: VPlan.h:2261
bool hasUF(unsigned UF) const
Definition: VPlan.h:2265
VPlan(VPBlockBase *Entry=nullptr)
Definition: VPlan.h:2208
void setVF(ElementCount VF)
Definition: VPlan.h:2255
void addVPValue(Value *V)
Definition: VPlan.h:2286
VPValue * getVPValue(Value *V, bool OverrideAllowed=false)
Returns the VPValue for V.
Definition: VPlan.h:2305
void disableValue2VPValue()
Mark the plan to indicate that using Value2VPValue is not safe any longer, because it may be stale.
Definition: VPlan.h:2251
const VPBlockBase * getEntry() const
Definition: VPlan.h:2224
LLVM_DUMP_METHOD void dump() const
Dump the plan to stderr (for debugging).
Definition: VPlan.cpp:808
bool hasScalarVFOnly() const
Definition: VPlan.h:2263
iterator_range< mapped_iterator< Use *, std::function< VPValue *(Value *)> > > mapToVPValues(User::op_range Operands)
Returns a range mapping the values the range Operands to their corresponding VPValues.
Definition: VPlan.h:2345
void execute(VPTransformState *State)
Generate the IR code for this VPlan.
Definition: VPlan.cpp:662
VPCanonicalIVPHIRecipe * getCanonicalIV()
Returns the canonical induction recipe of the vector loop.
Definition: VPlan.h:2361
const MapVector< PHINode *, VPLiveOut * > & getLiveOuts() const
Definition: VPlan.h:2387
VPActiveLaneMaskPHIRecipe * getActiveLaneMaskPhi()
Find and return the VPActiveLaneMaskPHIRecipe from the header - there be only one at most.
Definition: VPlan.cpp:599
void print(raw_ostream &O) const
Print this VPlan to O.
Definition: VPlan.cpp:743
void addVF(ElementCount VF)
Definition: VPlan.h:2253
VPBlockBase * setEntry(VPBlockBase *Block)
Definition: VPlan.h:2226
void setUF(unsigned UF)
Definition: VPlan.h:2267
LLVM Value Representation.
Definition: Value.h:74
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:255
constexpr bool isScalable() const
Returns whether the quantity is scaled by a runtime quantity (vscale).
Definition: TypeSize.h:166
constexpr ScalarTy getKnownMinValue() const
Returns the minimum value this quantity can represent.
Definition: TypeSize.h:163
Iterator for intrusive lists based on ilist_node.
An ilist node that can access its parent list.
Definition: ilist_node.h:257
base_list_type::const_reverse_iterator const_reverse_iterator
Definition: ilist.h:182
void pop_back()
Definition: ilist.h:319
base_list_type::reverse_iterator reverse_iterator
Definition: ilist.h:180
base_list_type::const_iterator const_iterator
Definition: ilist.h:179
base_list_type::iterator iterator
Definition: ilist.h:178
iterator insert(iterator where, pointer New)
Definition: ilist.h:229
A range adaptor for a pair of iterators.
This class implements an extremely fast bulk output stream that can only output to a stream.
Definition: raw_ostream.h:52
This file defines classes to implement an intrusive doubly linked list class (i.e.
This file defines the ilist_node class template, which is a convenient base class for creating classe...
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ BasicBlock
Various leaf nodes.
Definition: ISDOpcodes.h:71
VPValue * getOrCreateVPValueForSCEVExpr(VPlan &Plan, const SCEV *Expr, ScalarEvolution &SE)
Get or create a VPValue that corresponds to the expansion of Expr.
Definition: VPlan.cpp:1116
bool isUniformAfterVectorization(VPValue *VPV)
Returns true if VPV is uniform after vectorization.
Definition: VPlan.h:2703
bool onlyFirstLaneUsed(VPValue *Def)
Returns true if only the first lane of Def is used.
Definition: VPlan.cpp:1111
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
auto find(R &&Range, const T &Val)
Provide wrappers to std::find which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1755
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1735
bool isEqual(const GCNRPTracker::LiveRegSet &S1, const GCNRPTracker::LiveRegSet &S2)
testing::Matcher< const detail::ErrorHolder & > Failed()
Definition: Error.h:198
Value * getRuntimeVF(IRBuilderBase &B, Type *Ty, ElementCount VF)
Return the runtime value for VF.
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
auto map_range(ContainerTy &&C, FuncTy F)
Definition: STLExtras.h:434
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
Definition: MathExtras.h:288
std::unique_ptr< VPlan > VPlanPtr
Definition: VPlan.h:104
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
iterator_range< filter_iterator< detail::IterOfRange< RangeT >, PredicateT > > make_filter_range(RangeT &&Range, PredicateT Pred)
Convenience function that takes a range of elements and a predicate, and return a new filter_iterator...
Definition: STLExtras.h:637
detail::enumerator< R > enumerate(R &&TheRange)
Given an input range, returns a new range whose values are are pair (A,B) such that A is the 0-based ...
Definition: STLExtras.h:2264
raw_ostream & operator<<(raw_ostream &OS, const APFixedPoint &FX)
Definition: APFixedPoint.h:292
Value * createStepForVF(IRBuilderBase &B, Type *Ty, ElementCount VF, int64_t Step)
Return a value for Step multiplied by VF.
bool is_contained(R &&Range, const E &Element)
Wrapper function around std::find to detect if an element exists in a container.
Definition: STLExtras.h:1869
const SCEV * createTripCountSCEV(Type *IdxTy, PredicatedScalarEvolution &PSE)
hash_code hash_combine_range(InputIteratorT first, InputIteratorT last)
Compute a hash_code for a sequence of values.
Definition: Hashing.h:486
#define N
A range of powers-of-2 vectorization factors with fixed start and adjustable end.
Definition: VPlan.h:84
const ElementCount Start
Definition: VPlan.h:86
ElementCount End
Definition: VPlan.h:89
bool isEmpty() const
Definition: VPlan.h:91
VFRange(const ElementCount &Start, const ElementCount &End)
Definition: VPlan.h:95
A recipe for handling first-order recurrence phis.
Definition: VPlan.h:1250
void execute(VPTransformState &State) override
Generate the phi nodes.
VPFirstOrderRecurrencePHIRecipe(PHINode *Phi, VPValue &Start)
Definition: VPlan.h:1251
static bool classof(const VPHeaderPHIRecipe *R)
Definition: VPlan.h:1256
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
VPIteration represents a single point in the iteration space of the output (vectorized and/or unrolle...
Definition: VPlan.h:189
VPIteration(unsigned Part, const VPLane &Lane)
Definition: VPlan.h:199
unsigned Part
in [0..UF)
Definition: VPlan.h:191
VPLane Lane
Definition: VPlan.h:193
VPIteration(unsigned Part, unsigned Lane, VPLane::Kind Kind=VPLane::Kind::First)
Definition: VPlan.h:195
bool isFirstIteration() const
Definition: VPlan.h:201
Hold state information used when constructing the CFG of the output IR, traversing the VPBasicBlocks ...
Definition: VPlan.h:330
BasicBlock * PrevBB
The previous IR BasicBlock created or used.
Definition: VPlan.h:336
SmallDenseMap< VPBasicBlock *, BasicBlock * > VPBB2IRBB
A mapping of each VPBasicBlock to the corresponding BasicBlock.
Definition: VPlan.h:344
VPBasicBlock * PrevVPBB
The previous VPBasicBlock visited. Initially set to null.
Definition: VPlan.h:332
BasicBlock * ExitBB
The last IR BasicBlock in the output IR.
Definition: VPlan.h:340
BasicBlock * getPreheaderBBFor(VPRecipeBase *R)
Returns the BasicBlock* mapped to the pre-header of the loop region containing R.
Definition: VPlan.cpp:232
SmallVector< Value *, 2 > PerPartValuesTy
A type for vectorized values in the new loop.
Definition: VPlan.h:226
DenseMap< VPValue *, ScalarsPerPartValuesTy > PerPartScalars
Definition: VPlan.h:231
DenseMap< VPValue *, PerPartValuesTy > PerPartOutput
Definition: VPlan.h:228
VPTransformState holds information passed down when "executing" a VPlan, needed for generating the ou...
Definition: VPlan.h:206
VPValue2ValueTy VPValue2Value
Definition: VPlan.h:362
LoopInfo * LI
Hold a pointer to LoopInfo to register new basic blocks in the loop.
Definition: VPlan.h:354
VPTransformState(ElementCount VF, unsigned UF, LoopInfo *LI, DominatorTree *DT, IRBuilderBase &Builder, InnerLoopVectorizer *ILV, VPlan *Plan)
Definition: VPlan.h:207
void addMetadata(Instruction *To, Instruction *From)
Add metadata from one instruction to another.
Definition: VPlan.cpp:245
Value * get(VPValue *Def, unsigned Part)
Get the generated Value for a given VPValue and a given Part.
struct llvm::VPTransformState::DataState Data
void setDebugLocFromInst(const Value *V)
Set the debug location in the builder using the debug location in V.
Definition: VPlan.cpp:257
void reset(VPValue *Def, Value *V, unsigned Part)
Reset an existing vector value for Def and a given Part.
Definition: VPlan.h:273
struct llvm::VPTransformState::CFGState CFG
void reset(VPValue *Def, Value *V, const VPIteration &Instance)
Reset an existing scalar value for Def and a given Instance.
Definition: VPlan.h:295
void addNewMetadata(Instruction *To, const Instruction *Orig)
Add additional metadata to To that was not present on Orig.
Definition: VPlan.cpp:237
bool hasAnyVectorValue(VPValue *Def) const
Definition: VPlan.h:250
void set(VPValue *Def, Value *V, const VPIteration &Instance)
Set the generated scalar V for Def and the given Instance.
Definition: VPlan.h:281
std::optional< VPIteration > Instance
Hold the indices to generate specific scalar instructions.
Definition: VPlan.h:220
IRBuilderBase & Builder
Hold a reference to the IRBuilder used to generate output IR code.
Definition: VPlan.h:360
SmallPtrSet< VPRecipeBase *, 16 > MayGeneratePoisonRecipes
Holds recipes that may generate a poison value that is used after vectorization, even when their oper...
Definition: VPlan.h:375
DominatorTree * DT
Hold a pointer to Dominator Tree to register new basic blocks in the loop.
Definition: VPlan.h:357
bool hasScalarValue(VPValue *Def, VPIteration Instance)
Definition: VPlan.h:254
VPlan * Plan
Pointer to the VPlan code is generated for.
Definition: VPlan.h:371
InnerLoopVectorizer * ILV
Hold a pointer to InnerLoopVectorizer to reuse its IR generation methods.
Definition: VPlan.h:368
std::unique_ptr< LoopVersioning > LVer
LoopVersioning.
Definition: VPlan.h:385
Value * CanonicalIV
Hold the canonical scalar IV of the vector loop (start=0, step=VF*UF).
Definition: VPlan.h:365
bool hasVectorValue(VPValue *Def, unsigned Part)
Definition: VPlan.h:244
ElementCount VF
The chosen Vectorization and Unroll Factors of the loop being vectorized.
Definition: VPlan.h:214
Loop * CurrentVectorLoop
The loop object for the current parent region, or nullptr.
Definition: VPlan.h:378
void set(VPValue *Def, Value *V, unsigned Part)
Set the generated Value for a given VPValue and a given Part.
Definition: VPlan.h:265
VPlanIngredient(const Value *V)
Definition: VPlan.h:2452
const Value * V
Definition: VPlan.h:2450
void print(raw_ostream &O) const
Definition: VPlan.cpp:977