LLVM
15.0.0git
|
#include "llvm/Transforms/Vectorize/SLPVectorizer.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/DenseSet.h"
#include "llvm/ADT/Optional.h"
#include "llvm/ADT/PostOrderIterator.h"
#include "llvm/ADT/PriorityQueue.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SetOperations.h"
#include "llvm/ADT/SetVector.h"
#include "llvm/ADT/SmallBitVector.h"
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/SmallSet.h"
#include "llvm/ADT/SmallString.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/ADT/iterator.h"
#include "llvm/ADT/iterator_range.h"
#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/Analysis/AssumptionCache.h"
#include "llvm/Analysis/CodeMetrics.h"
#include "llvm/Analysis/DemandedBits.h"
#include "llvm/Analysis/GlobalsModRef.h"
#include "llvm/Analysis/IVDescriptors.h"
#include "llvm/Analysis/LoopAccessAnalysis.h"
#include "llvm/Analysis/LoopInfo.h"
#include "llvm/Analysis/MemoryLocation.h"
#include "llvm/Analysis/OptimizationRemarkEmitter.h"
#include "llvm/Analysis/ScalarEvolution.h"
#include "llvm/Analysis/ScalarEvolutionExpressions.h"
#include "llvm/Analysis/TargetLibraryInfo.h"
#include "llvm/Analysis/TargetTransformInfo.h"
#include "llvm/Analysis/ValueTracking.h"
#include "llvm/Analysis/VectorUtils.h"
#include "llvm/IR/Attributes.h"
#include "llvm/IR/BasicBlock.h"
#include "llvm/IR/Constant.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/DerivedTypes.h"
#include "llvm/IR/Dominators.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/InstrTypes.h"
#include "llvm/IR/Instruction.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/Intrinsics.h"
#include "llvm/IR/Module.h"
#include "llvm/IR/Operator.h"
#include "llvm/IR/PatternMatch.h"
#include "llvm/IR/Type.h"
#include "llvm/IR/Use.h"
#include "llvm/IR/User.h"
#include "llvm/IR/Value.h"
#include "llvm/IR/ValueHandle.h"
#include "llvm/Pass.h"
#include "llvm/Support/Casting.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Compiler.h"
#include "llvm/Support/DOTGraphTraits.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/GraphWriter.h"
#include "llvm/Support/InstructionCost.h"
#include "llvm/Support/KnownBits.h"
#include "llvm/Support/MathExtras.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Transforms/Utils/InjectTLIMappings.h"
#include "llvm/Transforms/Utils/Local.h"
#include "llvm/Transforms/Utils/LoopUtils.h"
#include "llvm/Transforms/Vectorize.h"
#include <algorithm>
#include <cassert>
#include <cstdint>
#include <iterator>
#include <memory>
#include <set>
#include <string>
#include <tuple>
#include <utility>
#include <vector>
Go to the source code of this file.
Classes | |
class | llvm::slpvectorizer::BoUpSLP |
Bottom Up SLP Vectorizer. More... | |
struct | llvm::slpvectorizer::BoUpSLP::EdgeInfo |
This structure holds any data we need about the edges being traversed during buildTree_rec(). More... | |
class | llvm::slpvectorizer::BoUpSLP::LookAheadHeuristics |
A helper class used for scoring candidates for two consecutive lanes. More... | |
class | llvm::slpvectorizer::BoUpSLP::VLOperands |
A helper data structure to hold the operands of a vector of instructions. More... | |
struct | llvm::GraphTraits< BoUpSLP * > |
struct | llvm::GraphTraits< BoUpSLP * >::ChildIteratorType |
Add the VectorizableTree to the index iterator to be able to return TreeEntry pointers. More... | |
class | llvm::GraphTraits< BoUpSLP * >::nodes_iterator |
For the node iterator we just need to turn the TreeEntry iterator into a TreeEntry* iterator so that it dereferences to NodeRef. More... | |
struct | llvm::DOTGraphTraits< BoUpSLP * > |
Namespaces | |
llvm | |
This is an optimization pass for GlobalISel generic memory operations. | |
llvm::slpvectorizer | |
A private "module" namespace for types and utilities used by this pass. | |
Macros | |
#define | SV_NAME "slp-vectorizer" |
#define | DEBUG_TYPE "SLP" |
Functions | |
STATISTIC (NumVectorInstructions, "Number of vector instructions generated") | |
static bool | isValidElementType (Type *Ty) |
Predicate for the element types that the SLP vectorizer supports. More... | |
static bool | isConstant (Value *V) |
static bool | isVectorLikeInstWithConstOps (Value *V) |
Checks if V is one of vector-like instructions, i.e. More... | |
static bool | allSameBlock (ArrayRef< Value * > VL) |
static bool | allConstant (ArrayRef< Value * > VL) |
static bool | isSplat (ArrayRef< Value * > VL) |
static bool | isCommutative (Instruction *I) |
static bool | isUndefVector (const Value *V) |
Checks if the given value is actually an undefined constant vector. More... | |
static Optional< TargetTransformInfo::ShuffleKind > | isFixedVectorShuffle (ArrayRef< Value * > VL, SmallVectorImpl< int > &Mask) |
Checks if the vector of instructions can be represented as a shuffle, like: x0 = extractelement <4 x i8> x, i32 0 x3 = extractelement <4 x i8> x, i32 3 y1 = extractelement <4 x i8> y, i32 1 y2 = extractelement <4 x i8> y, i32 2 x0x0 = mul i8 x0, x0 x3x3 = mul i8 x3, x3 y1y1 = mul i8 y1, y1 y2y2 = mul i8 y2, y2 ins1 = insertelement <4 x i8> poison, i8 x0x0, i32 0 ins2 = insertelement <4 x i8> ins1, i8 x3x3, i32 1 ins3 = insertelement <4 x i8> ins2, i8 y1y1, i32 2 ins4 = insertelement <4 x i8> ins3, i8 y2y2, i32 3 ret <4 x i8> ins4 can be transformed into: %1 = shufflevector <4 x i8> x, <4 x i8> y, <4 x i32> <i32 0, i32 3, i32 5, i32 6> %2 = mul <4 x i8> %1, %1 ret <4 x i8> %2 We convert this initially to something like: x0 = extractelement <4 x i8> x, i32 0 x3 = extractelement <4 x i8> x, i32 3 y1 = extractelement <4 x i8> y, i32 1 y2 = extractelement <4 x i8> y, i32 2 %1 = insertelement <4 x i8> poison, i8 x0, i32 0 %2 = insertelement <4 x i8> %1, i8 x3, i32 1 %3 = insertelement <4 x i8> %2, i8 y1, i32 2 %4 = insertelement <4 x i8> %3, i8 y2, i32 3 %5 = mul <4 x i8> %4, %4 %6 = extractelement <4 x i8> %5, i32 0 ins1 = insertelement <4 x i8> poison, i8 %6, i32 0 %7 = extractelement <4 x i8> %5, i32 1 ins2 = insertelement <4 x i8> ins1, i8 %7, i32 1 %8 = extractelement <4 x i8> %5, i32 2 ins3 = insertelement <4 x i8> ins2, i8 %8, i32 2 %9 = extractelement <4 x i8> %5, i32 3 ins4 = insertelement <4 x i8> ins3, i8 %9, i32 3 ret <4 x i8> ins4 InstCombiner transforms this into a shuffle and vector mul Mask will return the Shuffle Mask equivalent to the extracted elements. More... | |
static Value * | isOneOf (const InstructionsState &S, Value *Op) |
Chooses the correct key for scheduling data. More... | |
static bool | isValidForAlternation (unsigned Opcode) |
static InstructionsState | getSameOpcode (ArrayRef< Value * > VL, unsigned BaseIndex=0) |
static bool | areCompatibleCmpOps (Value *BaseOp0, Value *BaseOp1, Value *Op0, Value *Op1) |
Checks if the provided operands of 2 cmp instructions are compatible, i.e. More... | |
static bool | allSameType (ArrayRef< Value * > VL) |
static Optional< unsigned > | getExtractIndex (Instruction *E) |
static bool | InTreeUserNeedToExtract (Value *Scalar, Instruction *UserInst, TargetLibraryInfo *TLI) |
static MemoryLocation | getLocation (Instruction *I) |
static bool | isSimple (Instruction *I) |
static void | addMask (SmallVectorImpl< int > &Mask, ArrayRef< int > SubMask) |
Shuffles Mask in accordance with the given SubMask . More... | |
static void | fixupOrderingIndices (SmallVectorImpl< unsigned > &Order) |
Order may have elements assigned special value (size) which is out of bounds. More... | |
static void | llvm::inversePermutation (ArrayRef< unsigned > Indices, SmallVectorImpl< int > &Mask) |
static Optional< unsigned > | llvm::getInsertIndex (const Value *InsertInst, unsigned Offset=0) |
static void | llvm::reorderScalars (SmallVectorImpl< Value * > &Scalars, ArrayRef< int > Mask) |
Reorders the list of scalars in accordance with the given Mask . More... | |
static bool | llvm::areAllOperandsNonInsts (Value *V) |
Checks if the provided value does not require scheduling. More... | |
static bool | llvm::isUsedOutsideBlock (Value *V) |
Checks if the provided value does not require scheduling. More... | |
static bool | llvm::doesNotNeedToBeScheduled (Value *V) |
Checks if the specified value does not require scheduling. More... | |
static bool | llvm::doesNotNeedToSchedule (ArrayRef< Value * > VL) |
Checks if the specified array of instructions does not require scheduling. More... | |
static void | reorderReuses (SmallVectorImpl< int > &Reuses, ArrayRef< int > Mask) |
Reorders the given Reuses mask according to the given Mask . More... | |
static void | reorderOrder (SmallVectorImpl< unsigned > &Order, ArrayRef< int > Mask) |
Reorders the given Order according to the given Mask . More... | |
static LoadsState | canVectorizeLoads (ArrayRef< Value * > VL, const Value *VL0, const TargetTransformInfo &TTI, const DataLayout &DL, ScalarEvolution &SE, LoopInfo &LI, SmallVectorImpl< unsigned > &Order, SmallVectorImpl< Value * > &PointerOps) |
Checks if the given array of loads can be represented as a vectorized, scatter or just simple gather. More... | |
bool | clusterSortPtrAccesses (ArrayRef< Value * > VL, Type *ElemTy, const DataLayout &DL, ScalarEvolution &SE, SmallVectorImpl< unsigned > &SortedIndices) |
static LLVM_DUMP_METHOD void | dumpOrder (const BoUpSLP::OrdersType &Order) |
static bool | needToScheduleSingleInstruction (ArrayRef< Value * > VL) |
static std::pair< size_t, size_t > | generateKeySubkey (Value *V, const TargetLibraryInfo *TLI, function_ref< hash_code(size_t, LoadInst *)> LoadsSubkeyGenerator, bool AllowAlternate) |
Generates key/subkey pair for the given value to provide effective sorting of the values and better detection of the vectorizable values sequences. More... | |
static std::pair< InstructionCost, InstructionCost > | getVectorCallCosts (CallInst *CI, FixedVectorType *VecTy, TargetTransformInfo *TTI, TargetLibraryInfo *TLI) |
static InstructionCost | computeExtractCost (ArrayRef< Value * > VL, FixedVectorType *VecTy, TargetTransformInfo::ShuffleKind ShuffleKind, ArrayRef< int > Mask, TargetTransformInfo &TTI) |
Compute the cost of creating a vector of type VecTy containing the extracted values from VL . More... | |
static void | buildShuffleEntryMask (ArrayRef< Value * > VL, ArrayRef< unsigned > ReorderIndices, ArrayRef< int > ReusesIndices, const function_ref< bool(Instruction *)> IsAltOp, SmallVectorImpl< int > &Mask, SmallVectorImpl< Value * > *OpScalars=nullptr, SmallVectorImpl< Value * > *AltScalars=nullptr) |
Build shuffle mask for shuffle graph entries and lists of main and alternate operations operands. More... | |
static bool | isAlternateInstruction (const Instruction *I, const Instruction *MainOp, const Instruction *AltOp) |
Checks if the specified instruction I is an alternate operation for the given MainOp and AltOp instructions. More... | |
static bool | isLoadCombineCandidateImpl (Value *Root, unsigned NumElts, TargetTransformInfo *TTI, bool MustMatchOrInst) |
static bool | areTwoInsertFromSameBuildVector (InsertElementInst *VU, InsertElementInst *V) |
Check if two insertelement instructions are from the same buildvector. More... | |
static bool | isFirstInsertElement (const InsertElementInst *IE1, const InsertElementInst *IE2) |
Checks if the IE1 instructions is followed by IE2 instruction in the buildvector sequence. More... | |
template<typename T > | |
static T * | performExtractsShuffleAction (MutableArrayRef< std::pair< T *, SmallVector< int >>> ShuffleMask, Value *Base, function_ref< unsigned(T *)> GetVF, function_ref< std::pair< T *, bool >(T *, ArrayRef< int >)> ResizeAction, function_ref< T *(ArrayRef< int >, ArrayRef< T * >)> Action) |
Does the analysis of the provided shuffle masks and performs the requested actions on the vectors with the given shuffle masks. More... | |
static bool | collectValuesToDemote (Value *V, SmallPtrSetImpl< Value * > &Expr, SmallVectorImpl< Value * > &ToDemote, SmallVectorImpl< Value * > &Roots) |
static Optional< unsigned > | getAggregateSize (Instruction *InsertInst) |
static void | findBuildAggregate_rec (Instruction *LastInsertInst, TargetTransformInfo *TTI, SmallVectorImpl< Value * > &BuildVectorOpds, SmallVectorImpl< Value * > &InsertElts, unsigned OperandOffset) |
static bool | findBuildAggregate (Instruction *LastInsertInst, TargetTransformInfo *TTI, SmallVectorImpl< Value * > &BuildVectorOpds, SmallVectorImpl< Value * > &InsertElts) |
Recognize construction of vectors like ra = insertelement <4 x float> poison, float s0, i32 0 rb = insertelement <4 x float> ra, float s1, i32 1 rc = insertelement <4 x float> rb, float s2, i32 2 rd = insertelement <4 x float> rc, float s3, i32 3 starting from the last insertelement or insertvalue instruction. More... | |
static Value * | getReductionValue (const DominatorTree *DT, PHINode *P, BasicBlock *ParentBB, LoopInfo *LI) |
Try and get a reduction value from a phi node. More... | |
static bool | matchRdxBop (Instruction *I, Value *&V0, Value *&V1) |
static bool | tryToVectorizeHorReductionOrInstOperands (PHINode *P, Instruction *Root, BasicBlock *BB, BoUpSLP &R, TargetTransformInfo *TTI, ScalarEvolution &SE, const DataLayout &DL, const TargetLibraryInfo &TLI, const function_ref< bool(Instruction *, BoUpSLP &)> Vectorize) |
Attempt to reduce a horizontal reduction. More... | |
template<typename T > | |
static bool | tryToVectorizeSequence (SmallVectorImpl< T * > &Incoming, function_ref< unsigned(T *)> Limit, function_ref< bool(T *, T *)> Comparator, function_ref< bool(T *, T *)> AreCompatible, function_ref< bool(ArrayRef< T * >, bool)> TryToVectorizeHelper, bool LimitForRegisterSize) |
template<bool IsCompatibility> | |
static bool | compareCmp (Value *V, Value *V2, function_ref< bool(Instruction *)> IsDeleted) |
Compare two cmp instructions. More... | |
Variables | |
cl::opt< bool > | RunSLPVectorization ("vectorize-slp", cl::init(true), cl::Hidden, cl::desc("Run the SLP vectorization passes")) |
static cl::opt< int > | SLPCostThreshold ("slp-threshold", cl::init(0), cl::Hidden, cl::desc("Only vectorize if you gain more than this " "number ")) |
static cl::opt< bool > | ShouldVectorizeHor ("slp-vectorize-hor", cl::init(true), cl::Hidden, cl::desc("Attempt to vectorize horizontal reductions")) |
static cl::opt< bool > | ShouldStartVectorizeHorAtStore ("slp-vectorize-hor-store", cl::init(false), cl::Hidden, cl::desc("Attempt to vectorize horizontal reductions feeding into a store")) |
static cl::opt< int > | MaxVectorRegSizeOption ("slp-max-reg-size", cl::init(128), cl::Hidden, cl::desc("Attempt to vectorize for this register size in bits")) |
static cl::opt< unsigned > | MaxVFOption ("slp-max-vf", cl::init(0), cl::Hidden, cl::desc("Maximum SLP vectorization factor (0=unlimited)")) |
static cl::opt< int > | MaxStoreLookup ("slp-max-store-lookup", cl::init(32), cl::Hidden, cl::desc("Maximum depth of the lookup for consecutive stores.")) |
static cl::opt< int > | ScheduleRegionSizeBudget ("slp-schedule-budget", cl::init(100000), cl::Hidden, cl::desc("Limit the size of the SLP scheduling region per block")) |
Limits the size of scheduling regions in a block. More... | |
static cl::opt< int > | MinVectorRegSizeOption ("slp-min-reg-size", cl::init(128), cl::Hidden, cl::desc("Attempt to vectorize for this register size in bits")) |
static cl::opt< unsigned > | RecursionMaxDepth ("slp-recursion-max-depth", cl::init(12), cl::Hidden, cl::desc("Limit the recursion depth when building a vectorizable tree")) |
static cl::opt< unsigned > | MinTreeSize ("slp-min-tree-size", cl::init(3), cl::Hidden, cl::desc("Only vectorize small trees if they are fully vectorizable")) |
static cl::opt< int > | LookAheadMaxDepth ("slp-max-look-ahead-depth", cl::init(2), cl::Hidden, cl::desc("The maximum look-ahead depth for operand reordering scores")) |
static cl::opt< int > | RootLookAheadMaxDepth ("slp-max-root-look-ahead-depth", cl::init(2), cl::Hidden, cl::desc("The maximum look-ahead depth for searching best rooting option")) |
static cl::opt< bool > | ViewSLPTree ("view-slp-tree", cl::Hidden, cl::desc("Display the SLP trees with Graphviz")) |
static const unsigned | AliasedCheckLimit = 10 |
static const unsigned | MaxMemDepDistance = 160 |
static const int | MinScheduleRegionSize = 16 |
If the ScheduleRegionSizeBudget is exhausted, we allow small scheduling regions to be handled. More... | |
static const char | lv_name [] = "SLP Vectorizer" |
#define DEBUG_TYPE "SLP" |
Definition at line 108 of file SLPVectorizer.cpp.
#define SV_NAME "slp-vectorizer" |
Definition at line 107 of file SLPVectorizer.cpp.
|
static |
Shuffles Mask
in accordance with the given SubMask
.
Definition at line 675 of file SLPVectorizer.cpp.
References llvm::ArrayRef< T >::begin(), E, llvm::ArrayRef< T >::empty(), llvm::ArrayRef< T >::end(), I, llvm::BitmaskEnumDetail::Mask(), llvm::min(), llvm::ArrayRef< T >::size(), and llvm::UndefMaskElem.
Referenced by llvm::slpvectorizer::BoUpSLP::reorderTopToBottom().
VL
are constants (but not globals/constant expressions). Definition at line 252 of file SLPVectorizer.cpp.
References llvm::all_of(), and isConstant().
Referenced by llvm::slpvectorizer::BoUpSLP::isTreeTinyAndNotFullyVectorizable().
VL
are in the same block or false otherwise. Definition at line 231 of file SLPVectorizer.cpp.
References llvm::all_of(), BB, E, llvm::Instruction::getParent(), I, isVectorLikeInstWithConstOps(), and llvm::ArrayRef< T >::size().
VL
have the same type or false otherwise. Definition at line 599 of file SLPVectorizer.cpp.
References llvm::numbers::e, llvm::Intrinsic::getType(), i, and llvm::ArrayRef< T >::size().
Referenced by llvm::slpvectorizer::BoUpSLP::buildTree(), and llvm::slpvectorizer::BoUpSLP::getReorderingData().
|
static |
Checks if the provided operands of 2 cmp instructions are compatible, i.e.
compatible instructions or constants, or just some other regular values.
Definition at line 480 of file SLPVectorizer.cpp.
References getOpcode(), getSameOpcode(), and isConstant().
Referenced by getSameOpcode(), and isAlternateInstruction().
|
static |
Check if two insertelement instructions are from the same buildvector.
Definition at line 6828 of file SLPVectorizer.cpp.
References llvm::getInsertIndex(), llvm::Instruction::getParent(), llvm::InsertElementInst::getType(), and llvm::Value::hasOneUse().
Referenced by llvm::slpvectorizer::BoUpSLP::getTreeCost(), and llvm::slpvectorizer::BoUpSLP::vectorizeTree().
|
static |
Build shuffle mask for shuffle graph entries and lists of main and alternate operations operands.
Definition at line 5734 of file SLPVectorizer.cpp.
References llvm::ArrayRef< T >::empty(), I, llvm::inversePermutation(), llvm::BitmaskEnumDetail::Mask(), llvm::ArrayRef< T >::size(), llvm::transform(), and llvm::UndefMaskElem.
|
static |
Checks if the given array of loads can be represented as a vectorized, scatter or just simple gather.
Definition at line 3483 of file SLPVectorizer.cpp.
References llvm::all_of(), llvm::SmallVectorImpl< T >::clear(), llvm::count_if(), DL, llvm::doesNotNeedToBeScheduled(), llvm::TargetTransformInfo::forceScalarizeMaskedGather(), GEP, llvm::FixedVectorType::get(), llvm::getAlign(), llvm::LoopInfoBase< BlockT, LoopT >::getLoopFor(), getOpcode(), getParent(), llvm::getPointersDiff(), getSameOpcode(), llvm::Value::getType(), llvm::getUnderlyingObject(), if(), isConstant(), llvm::TargetTransformInfo::isLegalMaskedGather(), llvm::Loop::isLoopInvariant(), llvm::min(), P, llvm::SmallVectorImpl< T >::resize(), llvm::ArrayRef< T >::size(), and llvm::sortPtrAccesses().
bool clusterSortPtrAccesses | ( | ArrayRef< Value * > | VL, |
Type * | ElemTy, | ||
const DataLayout & | DL, | ||
ScalarEvolution & | SE, | ||
SmallVectorImpl< unsigned > & | SortedIndices | ||
) |
Definition at line 3572 of file SLPVectorizer.cpp.
References llvm::all_of(), llvm::any_of(), assert(), llvm::sampleprof::Base, llvm::SmallVectorImpl< T >::clear(), DL, llvm::ArrayRef< T >::drop_front(), llvm::enumerate(), llvm::getPointersDiff(), llvm::Value::getType(), int, P, llvm::MapVector< KeyT, ValueT, MapType, VectorType >::size(), llvm::ArrayRef< T >::size(), llvm::stable_sort(), X, and Y.
Referenced by llvm::slpvectorizer::BoUpSLP::findPartiallyOrderedLoads().
|
static |
Definition at line 9682 of file SLPVectorizer.cpp.
References llvm::MCID::Add, llvm::SmallPtrSetImpl< PtrType >::count(), I, llvm::PHINode::incoming_values(), Mul, llvm::MCID::Select, and SI.
Referenced by llvm::slpvectorizer::BoUpSLP::computeMinimumValueSizes().
|
static |
Compare two cmp instructions.
If IsCompatibility is true, function returns true if 2 cmps have same/swapped predicates and mos compatible corresponding operands. If IsCompatibility is false, function implements strict weak ordering relation between two cmp instructions, returning true if the first instruction is "less" than the second, i.e. its predicate is less than the predicate of the second or the operands IDs are less than the operands IDs of the second cmp instruction.
Definition at line 11893 of file SLPVectorizer.cpp.
References E, llvm::User::getOperand(), getSameOpcode(), llvm::CmpInst::getSwappedPredicate(), llvm::Value::getType(), llvm::Type::getTypeID(), I, I1, isValidElementType(), llvm::min(), S, and llvm::NVPTX::PTXLdStInstCode::V2.
|
static |
Compute the cost of creating a vector of type VecTy
containing the extracted values from VL
.
Definition at line 5672 of file SLPVectorizer.cpp.
References llvm::SmallVectorImpl< T >::assign(), llvm::FixedVectorType::get(), llvm::VectorType::getElementType(), getExtractIndex(), llvm::TargetTransformInfo::getNumberOfParts(), llvm::FixedVectorType::getNumElements(), llvm::TargetTransformInfo::getShuffleCost(), llvm::BitmaskEnumDetail::Mask(), llvm::TargetTransformInfo::SK_PermuteSingleSrc, and llvm::UndefMaskElem.
|
static |
Definition at line 4390 of file SLPVectorizer.cpp.
References llvm::dbgs().
|
static |
Recognize construction of vectors like ra = insertelement <4 x float> poison, float s0, i32 0 rb = insertelement <4 x float> ra, float s1, i32 1 rc = insertelement <4 x float> rb, float s2, i32 2 rd = insertelement <4 x float> rc, float s3, i32 3 starting from the last insertelement or insertvalue instruction.
Also recognize homogeneous aggregates like {<2 x float>, <2 x float>}, {{float, float}, {float, float}}, [2 x {float, float}] and so on. See llvm/test/Transforms/SLPVectorizer/X86/pr42022.ll for examples.
Assume LastInsertInst is of InsertElementInst or InsertValueInst type.
Definition at line 11548 of file SLPVectorizer.cpp.
References assert(), llvm::erase_value(), findBuildAggregate_rec(), getAggregateSize(), and llvm::SmallVectorImpl< T >::resize().
|
static |
Definition at line 11507 of file SLPVectorizer.cpp.
References llvm::getInsertIndex(), llvm::User::getOperand(), and llvm::Value::hasOneUse().
Referenced by findBuildAggregate().
|
static |
Order may have elements assigned special value (size) which is out of bounds.
Such indices only appear on places which correspond to undef values (see canReuseExtract for details) and used in order to avoid undef values have effect on operands ordering. The first loop below simply finds all unused indices and then the next loop nest assigns these indices for undef values positions. As an example below Order has two undef positions and they have assigned values 3 and 7 respectively: before: 6 9 5 4 9 2 1 0 after: 6 3 5 4 7 2 1 0
Definition at line 703 of file SLPVectorizer.cpp.
References assert(), llvm::SmallBitVector::count(), llvm::SmallBitVector::find_first(), llvm::SmallBitVector::find_next(), I, llvm::SmallBitVector::none(), llvm::SmallBitVector::reset(), and llvm::SmallBitVector::set().
Referenced by llvm::slpvectorizer::BoUpSLP::getReorderingData(), llvm::slpvectorizer::BoUpSLP::reorderBottomToTop(), reorderOrder(), and llvm::slpvectorizer::BoUpSLP::reorderTopToBottom().
|
static |
Generates key/subkey pair for the given value to provide effective sorting of the values and better detection of the vectorizable values sequences.
The keys/subkeys can be used for better sorting of the values themselves (keys) and in values subgroups (subkeys).
Definition at line 4467 of file SLPVectorizer.cpp.
References llvm::CmpInst::getInversePredicate(), llvm::VFDatabase::getMappings(), llvm::CmpInst::getSwappedPredicate(), llvm::Value::getValueID(), llvm::getVectorIntrinsicIDForCall(), llvm::hash_combine(), llvm::hash_value(), I, llvm::Instruction::isIntDivRem(), llvm::isTriviallyVectorizable(), isUndefVector(), isValidForAlternation(), isVectorLikeInstWithConstOps(), llvm::SPII::Load, and llvm::min().
|
static |
Definition at line 11479 of file SLPVectorizer.cpp.
References llvm::ARM_PROC::IE, llvm::Type::isSingleValueType(), IV, llvm::None, and llvm::ARM_MB::ST.
Referenced by findBuildAggregate().
|
static |
Definition at line 609 of file SLPVectorizer.cpp.
References assert(), E, llvm::ExtractValueInst::getNumIndices(), llvm::ExtractValueInst::idx_begin(), and llvm::None.
Referenced by computeExtractCost().
|
static |
Definition at line 655 of file SLPVectorizer.cpp.
References llvm::MemoryLocation::get(), I, and SI.
|
static |
Try and get a reduction value from a phi node.
Given a phi node P
in a block ParentBB
, consider possible reductions if they come from either ParentBB
or a containing loop latch.
Definition at line 11582 of file SLPVectorizer.cpp.
References llvm::DominatorTree::dominates(), llvm::LoopInfoBase< BlockT, LoopT >::getLoopFor(), llvm::LoopBase< BlockT, LoopT >::getLoopLatch(), and P.
VL
described in InstructionsState, the Opcode that we suppose the whole list could be vectorized even if its structure is diverse. Definition at line 493 of file SLPVectorizer.cpp.
References llvm::any_of(), areCompatibleCmpOps(), assert(), llvm::CmpInst::BAD_ICMP_PREDICATE, E, llvm::CmpInst::getSwappedPredicate(), isValidForAlternation(), and llvm::ArrayRef< T >::size().
Referenced by areCompatibleCmpOps(), canVectorizeLoads(), compareCmp(), llvm::slpvectorizer::BoUpSLP::LookAheadHeuristics::getShallowScore(), and llvm::slpvectorizer::BoUpSLP::VLOperands::reorder().
|
static |
Definition at line 5637 of file SLPVectorizer.cpp.
References Arg, llvm::CallBase::args(), Arguments, llvm::VFShape::get(), llvm::FixedVectorType::get(), llvm::TargetTransformInfo::getCallInstrCost(), llvm::LinearPolySize< ElementCount >::getFixed(), llvm::TargetTransformInfo::getIntrinsicInstrCost(), llvm::FixedVectorType::getNumElements(), llvm::getVectorIntrinsicIDForCall(), llvm::VFDatabase::getVectorizedFunction(), llvm::CallBase::isNoBuiltin(), and llvm::TargetTransformInfo::TCK_RecipThroughput.
|
static |
Definition at line 628 of file SLPVectorizer.cpp.
References llvm::CallBase::arg_size(), llvm::MCID::Call, llvm::numbers::e, llvm::CallBase::getArgOperand(), llvm::Instruction::getOpcode(), llvm::LoadInst::getPointerOperand(), llvm::getVectorIntrinsicIDForCall(), i, llvm::isVectorIntrinsicWithScalarOpAtArg(), LLVM_FALLTHROUGH, llvm::SPII::Load, SI, and llvm::SPII::Store.
|
static |
Checks if the specified instruction I
is an alternate operation for the given MainOp
and AltOp
instructions.
Definition at line 5771 of file SLPVectorizer.cpp.
References areCompatibleCmpOps(), assert(), llvm::Instruction::getOpcode(), llvm::User::getOperand(), llvm::CmpInst::getSwappedPredicate(), and I.
|
static |
I
is commutative, handles CmpInst and BinaryOperator. Definition at line 276 of file SLPVectorizer.cpp.
References I.
Referenced by llvm::slpvectorizer::BoUpSLP::LookAheadHeuristics::getScoreAtLevelRec(), and llvm::FastISel::selectBinaryOp().
|
static |
Definition at line 207 of file SLPVectorizer.cpp.
Referenced by allConstant(), areCompatibleCmpOps(), canVectorizeLoads(), and isVectorLikeInstWithConstOps().
|
static |
Checks if the IE1
instructions is followed by IE2
instruction in the buildvector sequence.
Definition at line 6871 of file SLPVectorizer.cpp.
References llvm::getInsertIndex(), llvm::Value::hasOneUse(), I1, and llvm_unreachable.
Referenced by llvm::slpvectorizer::BoUpSLP::getTreeCost(), and llvm::slpvectorizer::BoUpSLP::vectorizeTree().
|
static |
Checks if the vector of instructions can be represented as a shuffle, like: x0 = extractelement <4 x i8> x, i32 0 x3 = extractelement <4 x i8> x, i32 3 y1 = extractelement <4 x i8> y, i32 1 y2 = extractelement <4 x i8> y, i32 2 x0x0 = mul i8 x0, x0 x3x3 = mul i8 x3, x3 y1y1 = mul i8 y1, y1 y2y2 = mul i8 y2, y2 ins1 = insertelement <4 x i8> poison, i8 x0x0, i32 0 ins2 = insertelement <4 x i8> ins1, i8 x3x3, i32 1 ins3 = insertelement <4 x i8> ins2, i8 y1y1, i32 2 ins4 = insertelement <4 x i8> ins3, i8 y2y2, i32 3 ret <4 x i8> ins4 can be transformed into: %1 = shufflevector <4 x i8> x, <4 x i8> y, <4 x i32> <i32 0, i32 3, i32 5, i32 6> %2 = mul <4 x i8> %1, %1 ret <4 x i8> %2 We convert this initially to something like: x0 = extractelement <4 x i8> x, i32 0 x3 = extractelement <4 x i8> x, i32 3 y1 = extractelement <4 x i8> y, i32 1 y2 = extractelement <4 x i8> y, i32 2 %1 = insertelement <4 x i8> poison, i8 x0, i32 0 %2 = insertelement <4 x i8> %1, i8 x3, i32 1 %3 = insertelement <4 x i8> %2, i8 y1, i32 2 %4 = insertelement <4 x i8> %3, i8 y2, i32 3 %5 = mul <4 x i8> %4, %4 %6 = extractelement <4 x i8> %5, i32 0 ins1 = insertelement <4 x i8> poison, i8 %6, i32 0 %7 = extractelement <4 x i8> %5, i32 1 ins2 = insertelement <4 x i8> ins1, i8 %7, i32 1 %8 = extractelement <4 x i8> %5, i32 2 ins3 = insertelement <4 x i8> ins2, i8 %8, i32 2 %9 = extractelement <4 x i8> %5, i32 3 ins4 = insertelement <4 x i8> ins3, i8 %9, i32 3 ret <4 x i8> ins4 InstCombiner transforms this into a shuffle and vector mul Mask will return the Shuffle Mask equivalent to the extracted elements.
TODO: Can we split off and reuse the shuffle mask detection from TargetTransformInfo::getInstructionThroughput?
Definition at line 350 of file SLPVectorizer.cpp.
References E, llvm::ArrayRef< T >::end(), llvm::find_if(), I, isUndefVector(), llvm::BitmaskEnumDetail::Mask(), llvm::None, llvm::MCID::Select, llvm::ArrayRef< T >::size(), llvm::TargetTransformInfo::SK_PermuteSingleSrc, llvm::TargetTransformInfo::SK_PermuteTwoSrc, llvm::TargetTransformInfo::SK_Select, and llvm::UndefMaskElem.
|
static |
Definition at line 6641 of file SLPVectorizer.cpp.
References llvm::dbgs(), llvm::IntegerType::get(), llvm::Value::getContext(), llvm::Type::getIntegerBitWidth(), llvm::TargetTransformInfo::isTypeLegal(), LLVM_DEBUG, llvm::SPII::Load, llvm::PatternMatch::m_APInt(), llvm::PatternMatch::m_Or(), llvm::PatternMatch::m_Shl(), llvm::PatternMatch::m_Value(), llvm::PatternMatch::m_ZExt(), llvm::PatternMatch::match(), and llvm::APInt::urem().
Referenced by llvm::slpvectorizer::BoUpSLP::isLoadCombineCandidate(), and llvm::slpvectorizer::BoUpSLP::isLoadCombineReductionCandidate().
Chooses the correct key for scheduling data.
If Op
has the same (or alternate) opcode as OpValue
, the key is Op
. Otherwise the key is OpValue
.
Definition at line 456 of file SLPVectorizer.cpp.
|
static |
Definition at line 664 of file SLPVectorizer.cpp.
Referenced by llvm::DOTGraphTraits< DomTreeNode * >::getNodeLabel(), llvm::DOTGraphTraits< DOTFuncInfo * >::getNodeLabel(), llvm::X86TargetLowering::shouldReduceLoadWidth(), llvm::MBFIWrapper::view(), and llvm::MachineBlockFrequencyInfo::view().
VL
are identical or some of them are UndefValue. Definition at line 260 of file SLPVectorizer.cpp.
Referenced by llvm::PPC::get_VSPLTI_elt(), llvm::slpvectorizer::BoUpSLP::isTreeTinyAndNotFullyVectorizable(), and llvm::InstCombinerImpl::visitShuffleVectorInst().
Checks if the given value is actually an undefined constant vector.
Definition at line 288 of file SLPVectorizer.cpp.
References E, llvm::Constant::getAggregateElement(), and I.
Referenced by generateKeySubkey(), llvm::slpvectorizer::BoUpSLP::LookAheadHeuristics::getShallowScore(), isFixedVectorShuffle(), performExtractsShuffleAction(), and llvm::slpvectorizer::BoUpSLP::vectorizeTree().
|
static |
Predicate for the element types that the SLP vectorizer supports.
The most important thing to filter here are types which are invalid in LLVM vectors. We also filter target specific types which have absolutely no meaningful vectorization path such as x86_fp80 and ppc_f128. This just avoids spending time checking the cost model and realizing that they will be inevitably scalarized.
Definition at line 200 of file SLPVectorizer.cpp.
References llvm::Type::isPPC_FP128Ty(), llvm::VectorType::isValidElementType(), and llvm::Type::isX86_FP80Ty().
Referenced by llvm::slpvectorizer::BoUpSLP::canMapToVector(), and compareCmp().
|
static |
Opcode
is allowed as part of of the main/alternate instruction for SLP vectorization.Example of unsupported opcode is SDIV that can potentially cause UB if the "shuffled out" lane would result in division by zero.
Definition at line 468 of file SLPVectorizer.cpp.
References llvm::Instruction::isIntDivRem().
Referenced by generateKeySubkey(), and getSameOpcode().
|
static |
Checks if V
is one of vector-like instructions, i.e.
undef, insertelement/extractelement with constant indices for fixed vector type or extractvalue instruction.
Definition at line 214 of file SLPVectorizer.cpp.
References assert(), I, and isConstant().
Referenced by allSameBlock(), and generateKeySubkey().
|
static |
Definition at line 11626 of file SLPVectorizer.cpp.
References I, llvm::MIPatternMatch::m_BinOp(), llvm::PatternMatch::m_Value(), and llvm::PatternMatch::match().
Referenced by tryToVectorizeHorReductionOrInstOperands().
Definition at line 4448 of file SLPVectorizer.cpp.
References llvm::doesNotNeedToBeScheduled().
|
static |
Does the analysis of the provided shuffle masks and performs the requested actions on the vectors with the given shuffle masks.
It tries to do it in several steps.
Definition at line 6928 of file SLPVectorizer.cpp.
References assert(), llvm::sampleprof::Base, E, I, isUndefVector(), llvm::BitmaskEnumDetail::Mask(), and llvm::UndefMaskElem.
|
static |
Reorders the given Order
according to the given Mask
.
Order
- is the original order of the scalars. Procedure transforms the provided order in accordance with the given Mask
. If the resulting Order
is just an identity order, Order
is cleared.
Definition at line 3389 of file SLPVectorizer.cpp.
References assert(), llvm::SmallVectorImpl< T >::assign(), llvm::SmallVectorImpl< T >::clear(), E, fixupOrderingIndices(), I, llvm::inversePermutation(), llvm::ShuffleVectorInst::isIdentityMask(), llvm::BitmaskEnumDetail::Mask(), reorderReuses(), llvm::SmallVectorImpl< T >::resize(), and llvm::UndefMaskElem.
Referenced by llvm::slpvectorizer::BoUpSLP::reorderBottomToTop(), and llvm::slpvectorizer::BoUpSLP::reorderTopToBottom().
|
static |
Reorders the given Reuses
mask according to the given Mask
.
Reuses
contains original mask for the scalars reused in the node. Procedure transform this mask in accordance with the given Mask
.
Definition at line 3375 of file SLPVectorizer.cpp.
References assert(), E, I, llvm::BitmaskEnumDetail::Mask(), llvm::SmallVectorImpl< T >::swap(), and llvm::UndefMaskElem.
Referenced by llvm::slpvectorizer::BoUpSLP::reorderBottomToTop(), reorderOrder(), and llvm::slpvectorizer::BoUpSLP::reorderTopToBottom().
STATISTIC | ( | NumVectorInstructions | , |
"Number of vector instructions generated" | |||
) |
|
static |
Attempt to reduce a horizontal reduction.
If it is legal to match a horizontal reduction feeding the phi node P with reduction operators Root (or one of its operands) in a basic block BB, then check if it can be done. If horizontal reduction is not found and root instruction is a binary operation, vectorization of the operands is attempted.
Definition at line 11655 of file SLPVectorizer.cpp.
References BB, DL, llvm::Instruction::getParent(), llvm::ARMII::HorizontalReduction, I, llvm::SmallPtrSetImpl< PtrType >::insert(), llvm::PatternMatch::m_Select(), llvm::PatternMatch::m_Value(), llvm::PatternMatch::match(), matchRdxBop(), llvm::User::operand_values(), P, RecursionMaxDepth, and ShouldVectorizeHor.
|
static |
Definition at line 11815 of file SLPVectorizer.cpp.
References llvm::SmallVectorImpl< T >::append(), llvm::dbgs(), E, LLVM_DEBUG, llvm::makeArrayRef(), and llvm::stable_sort().
|
static |
Definition at line 182 of file SLPVectorizer.cpp.
|
static |
|
static |
Definition at line 12441 of file SLPVectorizer.cpp.
|
static |
Definition at line 187 of file SLPVectorizer.cpp.
|
static |
|
static |
Referenced by llvm::slpvectorizer::BoUpSLP::BoUpSLP().
|
static |
Referenced by llvm::slpvectorizer::BoUpSLP::getMaximumVF().
If the ScheduleRegionSizeBudget is exhausted, we allow small scheduling regions to be handled.
Definition at line 191 of file SLPVectorizer.cpp.
|
static |
Referenced by llvm::slpvectorizer::BoUpSLP::BoUpSLP().
|
static |
Referenced by llvm::slpvectorizer::BoUpSLP::findBestRootPair().
cl::opt<bool> RunSLPVectorization("vectorize-slp", cl::init(true), cl::Hidden, cl::desc("Run the SLP vectorization passes")) |
Referenced by llvm::SLPVectorizerPass::runImpl().
|
static |
Limits the size of scheduling regions in a block.
It avoid long compile times for very large blocks where vector instructions are spread over a wide range. This limit is way higher than needed by real-world functions.
|
static |
|
static |
Referenced by tryToVectorizeHorReductionOrInstOperands().
|
static |
|
static |
Referenced by llvm::slpvectorizer::BoUpSLP::getTreeCost().