LLVM 19.0.0git
Go to the documentation of this file.
1//===- SLPVectorizer.h ------------------------------------------*- C++ -*-===//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
8// This pass implements the Bottom Up SLP vectorizer. It detects consecutive
9// stores that can be put together into vector-stores. Next, it attempts to
10// construct vectorizable tree using the use-def chains. If a profitable tree
11// was found, the SLP vectorizer performs vectorization on the tree.
13// The pass is inspired by the work described in the paper:
14// "Loop-Aware SLP in GCC" by Ira Rosen, Dorit Nuzman, Ayal Zaks.
21#include "llvm/ADT/ArrayRef.h"
22#include "llvm/ADT/MapVector.h"
23#include "llvm/ADT/SetVector.h"
25#include "llvm/IR/PassManager.h"
27namespace llvm {
29class AAResults;
30class AssumptionCache;
31class BasicBlock;
32class DemandedBits;
33class DominatorTree;
34class Function;
35class GetElementPtrInst;
36class InsertElementInst;
37class InsertValueInst;
38class Instruction;
39class LoopInfo;
40class OptimizationRemarkEmitter;
41class PHINode;
42class ScalarEvolution;
43class StoreInst;
44class TargetLibraryInfo;
45class TargetTransformInfo;
46class Value;
47class WeakTrackingVH;
49/// A private "module" namespace for types and utilities used by this pass.
50/// These are implementation details and should not be used by clients.
51namespace slpvectorizer {
53class BoUpSLP;
55} // end namespace slpvectorizer
57struct SLPVectorizerPass : public PassInfoMixin<SLPVectorizerPass> {
64 ScalarEvolution *SE = nullptr;
67 AAResults *AA = nullptr;
68 LoopInfo *LI = nullptr;
69 DominatorTree *DT = nullptr;
70 AssumptionCache *AC = nullptr;
71 DemandedBits *DB = nullptr;
72 const DataLayout *DL = nullptr;
77 // Glue for old PM.
79 TargetLibraryInfo *TLI_, AAResults *AA_, LoopInfo *LI_,
84 /// Collect store and getelementptr instructions and organize them
85 /// according to the underlying object of their pointer operands. We sort the
86 /// instructions by their underlying objects to reduce the cost of
87 /// consecutive access queries.
88 ///
89 /// TODO: We can further reduce this cost if we flush the chain creation
90 /// every time we run into a memory barrier.
91 void collectSeedInstructions(BasicBlock *BB);
93 /// Try to vectorize a list of operands.
94 /// \param MaxVFOnly Vectorize only using maximal allowed register size.
95 /// \returns true if a value was vectorized.
96 bool tryToVectorizeList(ArrayRef<Value *> VL, slpvectorizer::BoUpSLP &R,
97 bool MaxVFOnly = false);
99 /// Try to vectorize a chain that may start at the operands of \p I.
100 bool tryToVectorize(Instruction *I, slpvectorizer::BoUpSLP &R);
102 /// Try to vectorize chains that may start at the operands of
103 /// instructions in \p Insts.
104 bool tryToVectorize(ArrayRef<WeakTrackingVH> Insts,
107 /// Vectorize the store instructions collected in Stores.
108 bool vectorizeStoreChains(slpvectorizer::BoUpSLP &R);
110 /// Vectorize the index computations of the getelementptr instructions
111 /// collected in GEPs.
112 bool vectorizeGEPIndices(BasicBlock *BB, slpvectorizer::BoUpSLP &R);
114 /// Try to find horizontal reduction or otherwise, collect instructions
115 /// for postponed vectorization attempts.
116 /// \a P if not null designates phi node the reduction is fed into
117 /// (with reduction operators \a Root or one of its operands, in a basic block
118 /// \a BB).
119 /// \returns true if a horizontal reduction was matched and reduced.
120 /// \returns false if \a V is null or not an instruction,
121 /// or a horizontal reduction was not matched or not possible.
122 bool vectorizeHorReduction(PHINode *P, Instruction *Root, BasicBlock *BB,
125 SmallVectorImpl<WeakTrackingVH> &PostponedInsts);
127 /// Make an attempt to vectorize reduction and then try to vectorize
128 /// postponed binary operations.
129 /// \returns true on any successfull vectorization.
130 bool vectorizeRootInstruction(PHINode *P, Instruction *Root, BasicBlock *BB,
134 /// Try to vectorize trees that start at insertvalue instructions.
135 bool vectorizeInsertValueInst(InsertValueInst *IVI, BasicBlock *BB,
138 /// Try to vectorize trees that start at insertelement instructions.
139 bool vectorizeInsertElementInst(InsertElementInst *IEI, BasicBlock *BB,
142 /// Tries to vectorize \p CmpInts. \Returns true on success.
143 template <typename ItT>
144 bool vectorizeCmpInsts(iterator_range<ItT> CmpInsts, BasicBlock *BB,
147 /// Tries to vectorize constructs started from InsertValueInst or
148 /// InsertElementInst instructions.
149 bool vectorizeInserts(InstSetVector &Instructions, BasicBlock *BB,
152 /// Scan the basic block and look for patterns that are likely to start
153 /// a vectorization chain.
154 bool vectorizeChainsInBlock(BasicBlock *BB, slpvectorizer::BoUpSLP &R);
156 std::optional<bool> vectorizeStoreChain(ArrayRef<Value *> Chain,
158 unsigned Idx, unsigned MinVF,
159 unsigned &Size);
161 bool vectorizeStores(
163 DenseSet<std::tuple<Value *, Value *, Value *, Value *, unsigned>>
164 &Visited);
166 /// The store instructions in a basic block organized by base pointer.
167 StoreListMap Stores;
169 /// The getelementptr instructions in a basic block organized by base pointer.
170 GEPListMap GEPs;
173} // end namespace llvm
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
uint64_t Size
#define F(x, y, z)
Definition: MD5.cpp:55
#define I(x, y, z)
Definition: MD5.cpp:58
This file implements a map that provides insertion order iteration.
#define P(N)
This header defines various interfaces for pass management in LLVM.
This file implements a set that has insertion order iteration characteristics.
This file defines the SmallVector class.
A container for analyses that lazily runs them and caches their results.
Definition: PassManager.h:321
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: ArrayRef.h:41
A cache of @llvm.assume calls within a function.
LLVM Basic Block Representation.
Definition: BasicBlock.h:60
A parsed version of the target data layout string in and methods for querying it.
Definition: DataLayout.h:110
Implements a dense probed hash-table based set.
Definition: DenseSet.h:271
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
Definition: Dominators.h:162
This instruction inserts a single (scalar) element into a VectorType value.
This instruction inserts a struct field of array element value into an aggregate value.
The optimization diagnostic interface.
A set of analyses that are preserved following a run of a transformation pass.
Definition: Analysis.h:109
The main scalar evolution driver.
A SetVector that performs no allocations if smaller than a certain size.
Definition: SetVector.h:370
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: SmallVector.h:586
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1209
Provides information about what library functions are available for the current target.
This pass provides access to the codegen interfaces that are needed for IR-level transformations.
A range adaptor for a pair of iterators.
Bottom Up SLP Vectorizer.
@ BasicBlock
Various leaf nodes.
Definition: ISDOpcodes.h:71
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
A CRTP mix-in to automatically provide informational APIs needed for passes.
Definition: PassManager.h:74
ScalarEvolution * SE
Definition: SLPVectorizer.h:64
AssumptionCache * AC
Definition: SLPVectorizer.h:70
DominatorTree * DT
Definition: SLPVectorizer.h:69
TargetLibraryInfo * TLI
Definition: SLPVectorizer.h:66
const DataLayout * DL
Definition: SLPVectorizer.h:72
PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM)
bool runImpl(Function &F, ScalarEvolution *SE_, TargetTransformInfo *TTI_, TargetLibraryInfo *TLI_, AAResults *AA_, LoopInfo *LI_, DominatorTree *DT_, AssumptionCache *AC_, DemandedBits *DB_, OptimizationRemarkEmitter *ORE_)