LLVM 19.0.0git
Classes | Namespaces | Macros | Functions | Variables
SLPVectorizer.cpp File Reference
#include "llvm/Transforms/Vectorize/SLPVectorizer.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/DenseSet.h"
#include "llvm/ADT/PriorityQueue.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SetOperations.h"
#include "llvm/ADT/SetVector.h"
#include "llvm/ADT/SmallBitVector.h"
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/SmallSet.h"
#include "llvm/ADT/SmallString.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/ADT/iterator.h"
#include "llvm/ADT/iterator_range.h"
#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/Analysis/AssumptionCache.h"
#include "llvm/Analysis/CodeMetrics.h"
#include "llvm/Analysis/ConstantFolding.h"
#include "llvm/Analysis/DemandedBits.h"
#include "llvm/Analysis/GlobalsModRef.h"
#include "llvm/Analysis/IVDescriptors.h"
#include "llvm/Analysis/LoopAccessAnalysis.h"
#include "llvm/Analysis/LoopInfo.h"
#include "llvm/Analysis/MemoryLocation.h"
#include "llvm/Analysis/OptimizationRemarkEmitter.h"
#include "llvm/Analysis/ScalarEvolution.h"
#include "llvm/Analysis/ScalarEvolutionExpressions.h"
#include "llvm/Analysis/TargetLibraryInfo.h"
#include "llvm/Analysis/TargetTransformInfo.h"
#include "llvm/Analysis/ValueTracking.h"
#include "llvm/Analysis/VectorUtils.h"
#include "llvm/IR/Attributes.h"
#include "llvm/IR/BasicBlock.h"
#include "llvm/IR/Constant.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/DerivedTypes.h"
#include "llvm/IR/Dominators.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/InstrTypes.h"
#include "llvm/IR/Instruction.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/Intrinsics.h"
#include "llvm/IR/Module.h"
#include "llvm/IR/Operator.h"
#include "llvm/IR/PatternMatch.h"
#include "llvm/IR/Type.h"
#include "llvm/IR/Use.h"
#include "llvm/IR/User.h"
#include "llvm/IR/Value.h"
#include "llvm/IR/ValueHandle.h"
#include "llvm/Pass.h"
#include "llvm/Support/Casting.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Compiler.h"
#include "llvm/Support/DOTGraphTraits.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/GraphWriter.h"
#include "llvm/Support/InstructionCost.h"
#include "llvm/Support/KnownBits.h"
#include "llvm/Support/MathExtras.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Transforms/Utils/InjectTLIMappings.h"
#include "llvm/Transforms/Utils/Local.h"
#include "llvm/Transforms/Utils/LoopUtils.h"
#include <algorithm>
#include <cassert>
#include <cstdint>
#include <iterator>
#include <memory>
#include <optional>
#include <set>
#include <string>
#include <tuple>
#include <utility>

Go to the source code of this file.

Classes

class  llvm::slpvectorizer::BoUpSLP
 Bottom Up SLP Vectorizer. More...
 
struct  llvm::slpvectorizer::BoUpSLP::EdgeInfo
 This structure holds any data we need about the edges being traversed during buildTree_rec(). More...
 
class  llvm::slpvectorizer::BoUpSLP::LookAheadHeuristics
 A helper class used for scoring candidates for two consecutive lanes. More...
 
class  llvm::slpvectorizer::BoUpSLP::VLOperands
 A helper data structure to hold the operands of a vector of instructions. More...
 
struct  llvm::GraphTraits< BoUpSLP * >
 
struct  llvm::GraphTraits< BoUpSLP * >::ChildIteratorType
 Add the VectorizableTree to the index iterator to be able to return TreeEntry pointers. More...
 
class  llvm::GraphTraits< BoUpSLP * >::nodes_iterator
 For the node iterator we just need to turn the TreeEntry iterator into a TreeEntry* iterator so that it dereferences to NodeRef. More...
 
struct  llvm::DOTGraphTraits< BoUpSLP * >
 
class  llvm::slpvectorizer::BoUpSLP::ShuffleCostEstimator
 Merges shuffle masks and emits final shuffle instruction, if required. More...
 
class  llvm::slpvectorizer::BoUpSLP::ShuffleInstructionBuilder
 Merges shuffle masks and emits final shuffle instruction, if required. More...
 

Namespaces

namespace  llvm
 This is an optimization pass for GlobalISel generic memory operations.
 
namespace  llvm::slpvectorizer
 A private "module" namespace for types and utilities used by this pass.
 

Macros

#define SV_NAME   "slp-vectorizer"
 
#define DEBUG_TYPE   "SLP"
 

Functions

 STATISTIC (NumVectorInstructions, "Number of vector instructions generated")
 
static bool isValidElementType (Type *Ty)
 Predicate for the element types that the SLP vectorizer supports.
 
static bool isConstant (Value *V)
 
static bool isVectorLikeInstWithConstOps (Value *V)
 Checks if V is one of vector-like instructions, i.e.
 
static std::string shortBundleName (ArrayRef< Value * > VL)
 Print a short descriptor of the instruction bundle suitable for debug output.
 
static bool allSameBlock (ArrayRef< Value * > VL)
 
static bool allConstant (ArrayRef< Value * > VL)
 
static bool isSplat (ArrayRef< Value * > VL)
 
static bool isCommutative (Instruction *I)
 
static std::optional< unsignedgetInsertIndex (const Value *InsertInst, unsigned Offset=0)
 
static SmallBitVector buildUseMask (int VF, ArrayRef< int > Mask, UseMask MaskArg)
 Prepares a use bitset for the given mask either for the first argument or for the second.
 
template<bool IsPoisonOnly = false>
static SmallBitVector isUndefVector (const Value *V, const SmallBitVector &UseMask={})
 Checks if the given value is actually an undefined constant vector.
 
static std::optional< TargetTransformInfo::ShuffleKindisFixedVectorShuffle (ArrayRef< Value * > VL, SmallVectorImpl< int > &Mask)
 Checks if the vector of instructions can be represented as a shuffle, like: x0 = extractelement <4 x i8> x, i32 0 x3 = extractelement <4 x i8> x, i32 3 y1 = extractelement <4 x i8> y, i32 1 y2 = extractelement <4 x i8> y, i32 2 x0x0 = mul i8 x0, x0 x3x3 = mul i8 x3, x3 y1y1 = mul i8 y1, y1 y2y2 = mul i8 y2, y2 ins1 = insertelement <4 x i8> poison, i8 x0x0, i32 0 ins2 = insertelement <4 x i8> ins1, i8 x3x3, i32 1 ins3 = insertelement <4 x i8> ins2, i8 y1y1, i32 2 ins4 = insertelement <4 x i8> ins3, i8 y2y2, i32 3 ret <4 x i8> ins4 can be transformed into: %1 = shufflevector <4 x i8> x, <4 x i8> y, <4 x i32> <i32 0, i32 3, i32 5, i32 6> %2 = mul <4 x i8> %1, %1 ret <4 x i8> %2 Mask will return the Shuffle Mask equivalent to the extracted elements.
 
static std::optional< unsignedgetExtractIndex (Instruction *E)
 
static ValueisOneOf (const InstructionsState &S, Value *Op)
 Chooses the correct key for scheduling data.
 
static bool isValidForAlternation (unsigned Opcode)
 
static InstructionsState getSameOpcode (ArrayRef< Value * > VL, const TargetLibraryInfo &TLI, unsigned BaseIndex=0)
 
static bool areCompatibleCmpOps (Value *BaseOp0, Value *BaseOp1, Value *Op0, Value *Op1, const TargetLibraryInfo &TLI)
 Checks if the provided operands of 2 cmp instructions are compatible, i.e.
 
static bool isCmpSameOrSwapped (const CmpInst *BaseCI, const CmpInst *CI, const TargetLibraryInfo &TLI)
 
static bool allSameType (ArrayRef< Value * > VL)
 
static bool doesInTreeUserNeedToExtract (Value *Scalar, Instruction *UserInst, TargetLibraryInfo *TLI)
 
static MemoryLocation getLocation (Instruction *I)
 
static bool isSimple (Instruction *I)
 
static void addMask (SmallVectorImpl< int > &Mask, ArrayRef< int > SubMask, bool ExtendingManyInputs=false)
 Shuffles Mask in accordance with the given SubMask.
 
static void fixupOrderingIndices (MutableArrayRef< unsigned > Order)
 Order may have elements assigned special value (size) which is out of bounds.
 
static void llvm::inversePermutation (ArrayRef< unsigned > Indices, SmallVectorImpl< int > &Mask)
 
static void llvm::reorderScalars (SmallVectorImpl< Value * > &Scalars, ArrayRef< int > Mask)
 Reorders the list of scalars in accordance with the given Mask.
 
static bool llvm::areAllOperandsNonInsts (Value *V)
 Checks if the provided value does not require scheduling.
 
static bool llvm::isUsedOutsideBlock (Value *V)
 Checks if the provided value does not require scheduling.
 
static bool llvm::doesNotNeedToBeScheduled (Value *V)
 Checks if the specified value does not require scheduling.
 
static bool llvm::doesNotNeedToSchedule (ArrayRef< Value * > VL)
 Checks if the specified array of instructions does not require scheduling.
 
static void reorderReuses (SmallVectorImpl< int > &Reuses, ArrayRef< int > Mask)
 Reorders the given Reuses mask according to the given Mask.
 
static void reorderOrder (SmallVectorImpl< unsigned > &Order, ArrayRef< int > Mask, bool BottomOrder=false)
 Reorders the given Order according to the given Mask.
 
static bool arePointersCompatible (Value *Ptr1, Value *Ptr2, const TargetLibraryInfo &TLI, bool CompareOpcodes=true)
 
template<typename T >
static Align computeCommonAlignment (ArrayRef< Value * > VL)
 Calculates minimal alignment as a common alignment.
 
static bool isReverseOrder (ArrayRef< unsigned > Order)
 Check if Order represents reverse order.
 
static LoadsState canVectorizeLoads (ArrayRef< Value * > VL, const Value *VL0, const TargetTransformInfo &TTI, const DataLayout &DL, ScalarEvolution &SE, LoopInfo &LI, const TargetLibraryInfo &TLI, SmallVectorImpl< unsigned > &Order, SmallVectorImpl< Value * > &PointerOps)
 Checks if the given array of loads can be represented as a vectorized, scatter or just simple gather.
 
static bool clusterSortPtrAccesses (ArrayRef< Value * > VL, Type *ElemTy, const DataLayout &DL, ScalarEvolution &SE, SmallVectorImpl< unsigned > &SortedIndices)
 
static bool areTwoInsertFromSameBuildVector (InsertElementInst *VU, InsertElementInst *V, function_ref< Value *(InsertElementInst *)> GetBaseOperand)
 Check if two insertelement instructions are from the same buildvector.
 
static bool isRepeatedNonIdentityClusteredMask (ArrayRef< int > Mask, unsigned Sz)
 Checks if the given mask is a "clustered" mask with the same clusters of size Sz, which are not identity submasks.
 
static void combineOrders (MutableArrayRef< unsigned > Order, ArrayRef< unsigned > SecondaryOrder)
 
static LLVM_DUMP_METHOD void dumpOrder (const BoUpSLP::OrdersType &Order)
 
static bool needToScheduleSingleInstruction (ArrayRef< Value * > VL)
 
static std::pair< size_t, size_t > generateKeySubkey (Value *V, const TargetLibraryInfo *TLI, function_ref< hash_code(size_t, LoadInst *)> LoadsSubkeyGenerator, bool AllowAlternate)
 Generates key/subkey pair for the given value to provide effective sorting of the values and better detection of the vectorizable values sequences.
 
static bool isAlternateInstruction (const Instruction *I, const Instruction *MainOp, const Instruction *AltOp, const TargetLibraryInfo &TLI)
 Checks if the specified instruction I is an alternate operation for the given MainOp and AltOp instructions.
 
static std::pair< InstructionCost, InstructionCostgetVectorCallCosts (CallInst *CI, FixedVectorType *VecTy, TargetTransformInfo *TTI, TargetLibraryInfo *TLI)
 
static InstructionCost getShuffleCost (const TargetTransformInfo &TTI, TTI::ShuffleKind Kind, VectorType *Tp, ArrayRef< int > Mask=std::nullopt, TTI::TargetCostKind CostKind=TTI::TCK_RecipThroughput, int Index=0, VectorType *SubTp=nullptr, ArrayRef< const Value * > Args=std::nullopt)
 Returns the cost of the shuffle instructions with the given Kind, vector type Tp and optional Mask.
 
static std::pair< InstructionCost, InstructionCostgetGEPCosts (const TargetTransformInfo &TTI, ArrayRef< Value * > Ptrs, Value *BasePtr, unsigned Opcode, TTI::TargetCostKind CostKind, Type *ScalarTy, VectorType *VecTy)
 Calculate the scalar and the vector costs from vectorizing set of GEPs.
 
static bool isLoadCombineCandidateImpl (Value *Root, unsigned NumElts, TargetTransformInfo *TTI, bool MustMatchOrInst)
 
static bool isFirstInsertElement (const InsertElementInst *IE1, const InsertElementInst *IE2)
 Checks if the IE1 instructions is followed by IE2 instruction in the buildvector sequence.
 
template<typename T >
static TperformExtractsShuffleAction (MutableArrayRef< std::pair< T *, SmallVector< int > > > ShuffleMask, Value *Base, function_ref< unsigned(T *)> GetVF, function_ref< std::pair< T *, bool >(T *, ArrayRef< int >, bool)> ResizeAction, function_ref< T *(ArrayRef< int >, ArrayRef< T * >)> Action)
 Does the analysis of the provided shuffle masks and performs the requested actions on the vectors with the given shuffle masks.
 
static std::optional< unsignedgetAggregateSize (Instruction *InsertInst)
 
static void findBuildAggregate_rec (Instruction *LastInsertInst, TargetTransformInfo *TTI, SmallVectorImpl< Value * > &BuildVectorOpds, SmallVectorImpl< Value * > &InsertElts, unsigned OperandOffset)
 
static bool findBuildAggregate (Instruction *LastInsertInst, TargetTransformInfo *TTI, SmallVectorImpl< Value * > &BuildVectorOpds, SmallVectorImpl< Value * > &InsertElts)
 Recognize construction of vectors like ra = insertelement <4 x float> poison, float s0, i32 0 rb = insertelement <4 x float> ra, float s1, i32 1 rc = insertelement <4 x float> rb, float s2, i32 2 rd = insertelement <4 x float> rc, float s3, i32 3 starting from the last insertelement or insertvalue instruction.
 
static InstructiongetReductionInstr (const DominatorTree *DT, PHINode *P, BasicBlock *ParentBB, LoopInfo *LI)
 Try and get a reduction instruction from a phi node.
 
static bool matchRdxBop (Instruction *I, Value *&V0, Value *&V1)
 
static InstructiontryGetSecondaryReductionRoot (PHINode *Phi, Instruction *Root)
 We could have an initial reduction that is not an add.
 
static InstructiongetNonPhiOperand (Instruction *I, PHINode *Phi)
 Returns the first operand of I that does not match Phi.
 
static bool isReductionCandidate (Instruction *I)
 \Returns true if I is a candidate instruction for reduction vectorization.
 
template<typename T >
static bool tryToVectorizeSequence (SmallVectorImpl< T * > &Incoming, function_ref< bool(T *, T *)> Comparator, function_ref< bool(T *, T *)> AreCompatible, function_ref< bool(ArrayRef< T * >, bool)> TryToVectorizeHelper, bool MaxVFOnly, BoUpSLP &R)
 
template<bool IsCompatibility>
static bool compareCmp (Value *V, Value *V2, TargetLibraryInfo &TLI, const DominatorTree &DT)
 Compare two cmp instructions.
 

Variables

static cl::opt< boolRunSLPVectorization ("vectorize-slp", cl::init(true), cl::Hidden, cl::desc("Run the SLP vectorization passes"))
 
static cl::opt< int > SLPCostThreshold ("slp-threshold", cl::init(0), cl::Hidden, cl::desc("Only vectorize if you gain more than this " "number "))
 
static cl::opt< boolShouldVectorizeHor ("slp-vectorize-hor", cl::init(true), cl::Hidden, cl::desc("Attempt to vectorize horizontal reductions"))
 
static cl::opt< boolShouldStartVectorizeHorAtStore ("slp-vectorize-hor-store", cl::init(false), cl::Hidden, cl::desc("Attempt to vectorize horizontal reductions feeding into a store"))
 
static cl::opt< boolAllowHorRdxIdenityOptimization ("slp-optimize-identity-hor-reduction-ops", cl::init(true), cl::Hidden, cl::desc("Allow optimization of original scalar identity operations on " "matched horizontal reductions."))
 
static cl::opt< int > MaxVectorRegSizeOption ("slp-max-reg-size", cl::init(128), cl::Hidden, cl::desc("Attempt to vectorize for this register size in bits"))
 
static cl::opt< unsignedMaxVFOption ("slp-max-vf", cl::init(0), cl::Hidden, cl::desc("Maximum SLP vectorization factor (0=unlimited)"))
 
static cl::opt< int > ScheduleRegionSizeBudget ("slp-schedule-budget", cl::init(100000), cl::Hidden, cl::desc("Limit the size of the SLP scheduling region per block"))
 Limits the size of scheduling regions in a block.
 
static cl::opt< int > MinVectorRegSizeOption ("slp-min-reg-size", cl::init(128), cl::Hidden, cl::desc("Attempt to vectorize for this register size in bits"))
 
static cl::opt< unsignedRecursionMaxDepth ("slp-recursion-max-depth", cl::init(12), cl::Hidden, cl::desc("Limit the recursion depth when building a vectorizable tree"))
 
static cl::opt< unsignedMinTreeSize ("slp-min-tree-size", cl::init(3), cl::Hidden, cl::desc("Only vectorize small trees if they are fully vectorizable"))
 
static cl::opt< int > LookAheadMaxDepth ("slp-max-look-ahead-depth", cl::init(2), cl::Hidden, cl::desc("The maximum look-ahead depth for operand reordering scores"))
 
static cl::opt< int > RootLookAheadMaxDepth ("slp-max-root-look-ahead-depth", cl::init(2), cl::Hidden, cl::desc("The maximum look-ahead depth for searching best rooting option"))
 
static cl::opt< unsignedMinProfitableStridedLoads ("slp-min-strided-loads", cl::init(2), cl::Hidden, cl::desc("The minimum number of loads, which should be considered strided, " "if the stride is > 1 or is runtime value"))
 
static cl::opt< unsignedMaxProfitableLoadStride ("slp-max-stride", cl::init(8), cl::Hidden, cl::desc("The maximum stride, considered to be profitable."))
 
static cl::opt< boolViewSLPTree ("view-slp-tree", cl::Hidden, cl::desc("Display the SLP trees with Graphviz"))
 
static const unsigned AliasedCheckLimit = 10
 
static const unsigned MaxMemDepDistance = 160
 
static const int MinScheduleRegionSize = 16
 If the ScheduleRegionSizeBudget is exhausted, we allow small scheduling regions to be handled.
 

Macro Definition Documentation

◆ DEBUG_TYPE

#define DEBUG_TYPE   "SLP"

Definition at line 106 of file SLPVectorizer.cpp.

◆ SV_NAME

#define SV_NAME   "slp-vectorizer"

Definition at line 105 of file SLPVectorizer.cpp.

Function Documentation

◆ addMask()

static void addMask ( SmallVectorImpl< int > &  Mask,
ArrayRef< int >  SubMask,
bool  ExtendingManyInputs = false 
)
static

Shuffles Mask in accordance with the given SubMask.

Parameters
ExtendingManyInputsSupports reshuffling of the mask with not only one but two input vectors.

Definition at line 833 of file SLPVectorizer.cpp.

References assert(), llvm::ArrayRef< T >::begin(), E, llvm::ArrayRef< T >::empty(), llvm::ArrayRef< T >::end(), I, Idx, llvm::PoisonMaskElem, and llvm::ArrayRef< T >::size().

Referenced by llvm::slpvectorizer::BoUpSLP::getReorderingData(), and llvm::slpvectorizer::BoUpSLP::reorderTopToBottom().

◆ allConstant()

static bool allConstant ( ArrayRef< Value * >  VL)
static
Returns
True if all of the values in VL are constants (but not globals/constant expressions).

Definition at line 274 of file SLPVectorizer.cpp.

References llvm::all_of(), and isConstant().

Referenced by llvm::slpvectorizer::BoUpSLP::getReorderingData(), and llvm::slpvectorizer::BoUpSLP::isTreeTinyAndNotFullyVectorizable().

◆ allSameBlock()

static bool allSameBlock ( ArrayRef< Value * >  VL)
static
Returns
true if all of the instructions in VL are in the same block or false otherwise.

Definition at line 253 of file SLPVectorizer.cpp.

References llvm::all_of(), E, llvm::BasicBlock::getParent(), llvm::Instruction::getParent(), I, isVectorLikeInstWithConstOps(), and llvm::ArrayRef< T >::size().

◆ allSameType()

static bool allSameType ( ArrayRef< Value * >  VL)
static
Returns
true if all of the values in VL have the same type or false otherwise.

Definition at line 778 of file SLPVectorizer.cpp.

References llvm::all_of(), llvm::ArrayRef< T >::drop_front(), and llvm::ArrayRef< T >::front().

Referenced by llvm::slpvectorizer::BoUpSLP::buildTree(), and llvm::slpvectorizer::BoUpSLP::getReorderingData().

◆ areCompatibleCmpOps()

static bool areCompatibleCmpOps ( Value BaseOp0,
Value BaseOp1,
Value Op0,
Value Op1,
const TargetLibraryInfo TLI 
)
static

Checks if the provided operands of 2 cmp instructions are compatible, i.e.

compatible instructions or constants, or just some other regular values.

Definition at line 603 of file SLPVectorizer.cpp.

References getOpcode(), getSameOpcode(), and isConstant().

Referenced by isCmpSameOrSwapped().

◆ arePointersCompatible()

static bool arePointersCompatible ( Value Ptr1,
Value Ptr2,
const TargetLibraryInfo TLI,
bool  CompareOpcodes = true 
)
static

Definition at line 3962 of file SLPVectorizer.cpp.

References getOpcode(), getSameOpcode(), llvm::getUnderlyingObject(), and isConstant().

Referenced by canVectorizeLoads().

◆ areTwoInsertFromSameBuildVector()

static bool areTwoInsertFromSameBuildVector ( InsertElementInst VU,
InsertElementInst V,
function_ref< Value *(InsertElementInst *)>  GetBaseOperand 
)
static

◆ buildUseMask()

static SmallBitVector buildUseMask ( int  VF,
ArrayRef< int >  Mask,
UseMask  MaskArg 
)
static

Prepares a use bitset for the given mask either for the first argument or for the second.

Definition at line 364 of file SLPVectorizer.cpp.

References llvm::enumerate(), Idx, and llvm::PoisonMaskElem.

Referenced by performExtractsShuffleAction().

◆ canVectorizeLoads()

static LoadsState canVectorizeLoads ( ArrayRef< Value * >  VL,
const Value VL0,
const TargetTransformInfo TTI,
const DataLayout DL,
ScalarEvolution SE,
LoopInfo LI,
const TargetLibraryInfo TLI,
SmallVectorImpl< unsigned > &  Order,
SmallVectorImpl< Value * > &  PointerOps 
)
static

◆ clusterSortPtrAccesses()

static bool clusterSortPtrAccesses ( ArrayRef< Value * >  VL,
Type ElemTy,
const DataLayout DL,
ScalarEvolution SE,
SmallVectorImpl< unsigned > &  SortedIndices 
)
static

◆ combineOrders()

static void combineOrders ( MutableArrayRef< unsigned Order,
ArrayRef< unsigned SecondaryOrder 
)
static

◆ compareCmp()

template<bool IsCompatibility>
static bool compareCmp ( Value V,
Value V2,
TargetLibraryInfo TLI,
const DominatorTree DT 
)
static

Compare two cmp instructions.

If IsCompatibility is true, function returns true if 2 cmps have same/swapped predicates and mos compatible corresponding operands. If IsCompatibility is false, function implements strict weak ordering relation between two cmp instructions, returning true if the first instruction is "less" than the second, i.e. its predicate is less than the predicate of the second or the operands IDs are less than the operands IDs of the second cmp instruction.

Definition at line 16160 of file SLPVectorizer.cpp.

References assert(), E, llvm::DomTreeNodeBase< NodeT >::getDFSNumIn(), llvm::DominatorTreeBase< NodeT, IsPostDom >::getNode(), llvm::User::getOperand(), getSameOpcode(), llvm::CmpInst::getSwappedPredicate(), llvm::Value::getType(), llvm::Type::getTypeID(), llvm::Value::getValueID(), I, and isValidElementType().

◆ computeCommonAlignment()

template<typename T >
static Align computeCommonAlignment ( ArrayRef< Value * >  VL)
static

Calculates minimal alignment as a common alignment.

Definition at line 3983 of file SLPVectorizer.cpp.

References llvm::ArrayRef< T >::drop_front(), llvm::ArrayRef< T >::front(), and llvm::getAlign().

◆ doesInTreeUserNeedToExtract()

static bool doesInTreeUserNeedToExtract ( Value Scalar,
Instruction UserInst,
TargetLibraryInfo TLI 
)
static
Returns
True if in-tree use also needs extract. This refers to possible scalar operand in vectorized instruction.

Definition at line 785 of file SLPVectorizer.cpp.

References llvm::any_of(), llvm::CallBase::args(), llvm::enumerate(), llvm::Instruction::getOpcode(), llvm::LoadInst::getPointerOperand(), and llvm::getVectorIntrinsicIDForCall().

Referenced by llvm::slpvectorizer::BoUpSLP::buildExternalUses().

◆ dumpOrder()

static LLVM_DUMP_METHOD void dumpOrder ( const BoUpSLP::OrdersType Order)
static

Definition at line 5337 of file SLPVectorizer.cpp.

References llvm::dbgs(), and Idx.

◆ findBuildAggregate()

static bool findBuildAggregate ( Instruction LastInsertInst,
TargetTransformInfo TTI,
SmallVectorImpl< Value * > &  BuildVectorOpds,
SmallVectorImpl< Value * > &  InsertElts 
)
static

Recognize construction of vectors like ra = insertelement <4 x float> poison, float s0, i32 0 rb = insertelement <4 x float> ra, float s1, i32 1 rc = insertelement <4 x float> rb, float s2, i32 2 rd = insertelement <4 x float> rc, float s3, i32 3 starting from the last insertelement or insertvalue instruction.

Also recognize homogeneous aggregates like {<2 x float>, <2 x float>}, {{float, float}, {float, float}}, [2 x {float, float}] and so on. See llvm/test/Transforms/SLPVectorizer/X86/pr42022.ll for examples.

Assume LastInsertInst is of InsertElementInst or InsertValueInst type.

Returns
true if it matches.

Definition at line 15789 of file SLPVectorizer.cpp.

References assert(), llvm::SmallVectorBase< Size_T >::empty(), llvm::erase(), findBuildAggregate_rec(), getAggregateSize(), llvm::SmallVectorImpl< T >::resize(), and llvm::SmallVectorBase< Size_T >::size().

◆ findBuildAggregate_rec()

static void findBuildAggregate_rec ( Instruction LastInsertInst,
TargetTransformInfo TTI,
SmallVectorImpl< Value * > &  BuildVectorOpds,
SmallVectorImpl< Value * > &  InsertElts,
unsigned  OperandOffset 
)
static

◆ fixupOrderingIndices()

static void fixupOrderingIndices ( MutableArrayRef< unsigned Order)
static

Order may have elements assigned special value (size) which is out of bounds.

Such indices only appear on places which correspond to undef values (see canReuseExtract for details) and used in order to avoid undef values have effect on operands ordering. The first loop below simply finds all unused indices and then the next loop nest assigns these indices for undef values positions. As an example below Order has two undef positions and they have assigned values 3 and 7 respectively: before: 6 9 5 4 9 2 1 0 after: 6 3 5 4 7 2 1 0

Definition at line 870 of file SLPVectorizer.cpp.

References assert(), llvm::SmallBitVector::count(), llvm::SmallBitVector::find_first(), llvm::SmallBitVector::find_next(), I, Idx, llvm::SmallBitVector::none(), llvm::SmallBitVector::reset(), llvm::SmallBitVector::set(), and llvm::ArrayRef< T >::size().

Referenced by llvm::slpvectorizer::BoUpSLP::getReorderingData(), llvm::slpvectorizer::BoUpSLP::reorderBottomToTop(), reorderOrder(), and llvm::slpvectorizer::BoUpSLP::reorderTopToBottom().

◆ generateKeySubkey()

static std::pair< size_t, size_t > generateKeySubkey ( Value V,
const TargetLibraryInfo TLI,
function_ref< hash_code(size_t, LoadInst *)>  LoadsSubkeyGenerator,
bool  AllowAlternate 
)
static

Generates key/subkey pair for the given value to provide effective sorting of the values and better detection of the vectorizable values sequences.

The keys/subkeys can be used for better sorting of the values themselves (keys) and in values subgroups (subkeys).

Definition at line 5414 of file SLPVectorizer.cpp.

References llvm::SmallBitVector::all(), generateKeySubkey(), llvm::CmpInst::getInversePredicate(), llvm::VFDatabase::getMappings(), llvm::CmpInst::getSwappedPredicate(), llvm::getVectorIntrinsicIDForCall(), llvm::hash_combine(), llvm::hash_value(), I, llvm::Instruction::isIntDivRem(), llvm::isTriviallyVectorizable(), isUndefVector(), isValidForAlternation(), and isVectorLikeInstWithConstOps().

Referenced by generateKeySubkey().

◆ getAggregateSize()

static std::optional< unsigned > getAggregateSize ( Instruction InsertInst)
static

Definition at line 15722 of file SLPVectorizer.cpp.

References llvm::Type::isSingleValueType(), and IV.

Referenced by findBuildAggregate().

◆ getExtractIndex()

static std::optional< unsigned > getExtractIndex ( Instruction E)
static
Returns
True if Extract{Value,Element} instruction extracts element Idx.

Definition at line 523 of file SLPVectorizer.cpp.

References assert(), and E.

Referenced by llvm::slpvectorizer::BoUpSLP::getReorderingData().

◆ getGEPCosts()

static std::pair< InstructionCost, InstructionCost > getGEPCosts ( const TargetTransformInfo TTI,
ArrayRef< Value * >  Ptrs,
Value BasePtr,
unsigned  Opcode,
TTI::TargetCostKind  CostKind,
Type ScalarTy,
VectorType VecTy 
)
static

◆ getInsertIndex()

static std::optional< unsigned > getInsertIndex ( const Value InsertInst,
unsigned  Offset = 0 
)
static
Returns
inserting index of InsertElement or InsertValue instruction, using Offset as base offset for index.

Definition at line 311 of file SLPVectorizer.cpp.

References I, IV, and llvm::Offset.

Referenced by areTwoInsertFromSameBuildVector(), findBuildAggregate_rec(), llvm::slpvectorizer::BoUpSLP::getReorderingData(), llvm::slpvectorizer::BoUpSLP::getTreeCost(), isFirstInsertElement(), and llvm::slpvectorizer::BoUpSLP::vectorizeTree().

◆ getLocation()

static MemoryLocation getLocation ( Instruction I)
static
Returns
the AA location that is being access by the instruction.

Definition at line 811 of file SLPVectorizer.cpp.

References llvm::MemoryLocation::get(), and I.

◆ getNonPhiOperand()

static Instruction * getNonPhiOperand ( Instruction I,
PHINode Phi 
)
static

Returns the first operand of I that does not match Phi.

If operand is not an instruction it returns nullptr.

Definition at line 15911 of file SLPVectorizer.cpp.

References I, and matchRdxBop().

◆ getReductionInstr()

static Instruction * getReductionInstr ( const DominatorTree DT,
PHINode P,
BasicBlock ParentBB,
LoopInfo LI 
)
static

Try and get a reduction instruction from a phi node.

Given a phi node P in a block ParentBB, consider possible reductions if they come from either ParentBB or a containing loop latch.

Returns
A candidate reduction value if possible, or
nullptr
if not possible.

Definition at line 15823 of file SLPVectorizer.cpp.

References llvm::DominatorTree::dominates(), llvm::LoopInfoBase< BlockT, LoopT >::getLoopFor(), llvm::LoopBase< BlockT, LoopT >::getLoopLatch(), and P.

◆ getSameOpcode()

static InstructionsState getSameOpcode ( ArrayRef< Value * >  VL,
const TargetLibraryInfo TLI,
unsigned  BaseIndex 
)
static

◆ getShuffleCost()

static InstructionCost getShuffleCost ( const TargetTransformInfo TTI,
TTI::ShuffleKind  Kind,
VectorType Tp,
ArrayRef< int >  Mask = std::nullopt,
TTI::TargetCostKind  CostKind = TTI::TCK_RecipThroughput,
int  Index = 0,
VectorType SubTp = nullptr,
ArrayRef< const Value * >  Args = std::nullopt 
)
static

Returns the cost of the shuffle instructions with the given Kind, vector type Tp and optional Mask.

Adds SLP-specifc cost estimation for insert subvector pattern.

Definition at line 7168 of file SLPVectorizer.cpp.

References CostKind, llvm::FixedVectorType::get(), llvm::TargetTransformInfo::getShuffleCost(), llvm::ShuffleVectorInst::isInsertSubvectorMask(), llvm::TargetTransformInfo::SK_InsertSubvector, llvm::TargetTransformInfo::SK_PermuteTwoSrc, and llvm::TargetTransformInfo::TCK_RecipThroughput.

Referenced by llvm::slpvectorizer::BoUpSLP::getTreeCost().

◆ getVectorCallCosts()

static std::pair< InstructionCost, InstructionCost > getVectorCallCosts ( CallInst CI,
FixedVectorType VecTy,
TargetTransformInfo TTI,
TargetLibraryInfo TLI 
)
static

◆ isAlternateInstruction()

static bool isAlternateInstruction ( const Instruction I,
const Instruction MainOp,
const Instruction AltOp,
const TargetLibraryInfo TLI 
)
static

Checks if the specified instruction I is an alternate operation for the given MainOp and AltOp instructions.

Definition at line 6783 of file SLPVectorizer.cpp.

References assert(), llvm::Instruction::getOpcode(), llvm::CmpInst::getSwappedPredicate(), I, isCmpSameOrSwapped(), and P.

◆ isCmpSameOrSwapped()

static bool isCmpSameOrSwapped ( const CmpInst BaseCI,
const CmpInst CI,
const TargetLibraryInfo TLI 
)
static
Returns
true if a compare instruction CI has similar "look" and same predicate as BaseCI, "as is" or with its operands and predicate swapped, false otherwise.

Definition at line 617 of file SLPVectorizer.cpp.

References areCompatibleCmpOps(), assert(), llvm::User::getOperand(), llvm::CmpInst::getPredicate(), llvm::CmpInst::getSwappedPredicate(), and llvm::Value::getType().

Referenced by getSameOpcode(), and isAlternateInstruction().

◆ isCommutative()

static bool isCommutative ( Instruction I)
static
Returns
True if I is commutative, handles CmpInst and BinaryOperator.

Definition at line 298 of file SLPVectorizer.cpp.

References I.

Referenced by llvm::slpvectorizer::BoUpSLP::LookAheadHeuristics::getScoreAtLevelRec(), and llvm::FastISel::selectBinaryOp().

◆ isConstant()

static bool isConstant ( Value V)
static
Returns
True if the value is a constant (but not globals/constant expressions).

Definition at line 218 of file SLPVectorizer.cpp.

Referenced by allConstant(), areCompatibleCmpOps(), and isVectorLikeInstWithConstOps().

◆ isFirstInsertElement()

static bool isFirstInsertElement ( const InsertElementInst IE1,
const InsertElementInst IE2 
)
static

Checks if the IE1 instructions is followed by IE2 instruction in the buildvector sequence.

Definition at line 9105 of file SLPVectorizer.cpp.

References getInsertIndex(), llvm::Value::hasOneUse(), and llvm_unreachable.

Referenced by llvm::slpvectorizer::BoUpSLP::getTreeCost(), and llvm::slpvectorizer::BoUpSLP::vectorizeTree().

◆ isFixedVectorShuffle()

static std::optional< TargetTransformInfo::ShuffleKind > isFixedVectorShuffle ( ArrayRef< Value * >  VL,
SmallVectorImpl< int > &  Mask 
)
static

Checks if the vector of instructions can be represented as a shuffle, like: x0 = extractelement <4 x i8> x, i32 0 x3 = extractelement <4 x i8> x, i32 3 y1 = extractelement <4 x i8> y, i32 1 y2 = extractelement <4 x i8> y, i32 2 x0x0 = mul i8 x0, x0 x3x3 = mul i8 x3, x3 y1y1 = mul i8 y1, y1 y2y2 = mul i8 y2, y2 ins1 = insertelement <4 x i8> poison, i8 x0x0, i32 0 ins2 = insertelement <4 x i8> ins1, i8 x3x3, i32 1 ins3 = insertelement <4 x i8> ins2, i8 y1y1, i32 2 ins4 = insertelement <4 x i8> ins3, i8 y2y2, i32 3 ret <4 x i8> ins4 can be transformed into: %1 = shufflevector <4 x i8> x, <4 x i8> y, <4 x i32> <i32 0, i32 3, i32 5, i32 6> %2 = mul <4 x i8> %1, %1 ret <4 x i8> %2 Mask will return the Shuffle Mask equivalent to the extracted elements.

TODO: Can we split off and reuse the shuffle mask detection from ShuffleVectorInst/getShuffleCost?

Definition at line 454 of file SLPVectorizer.cpp.

References E, llvm::ArrayRef< T >::end(), llvm::find_if(), I, Idx, isUndefVector(), llvm::PoisonMaskElem, Select, llvm::ArrayRef< T >::size(), Size, llvm::TargetTransformInfo::SK_PermuteSingleSrc, llvm::TargetTransformInfo::SK_PermuteTwoSrc, llvm::TargetTransformInfo::SK_Select, and Unknown.

◆ isLoadCombineCandidateImpl()

static bool isLoadCombineCandidateImpl ( Value Root,
unsigned  NumElts,
TargetTransformInfo TTI,
bool  MustMatchOrInst 
)
static

◆ isOneOf()

static Value * isOneOf ( const InstructionsState &  S,
Value Op 
)
static

Chooses the correct key for scheduling data.

If Op has the same (or alternate) opcode as OpValue, the key is Op. Otherwise the key is OpValue.

Definition at line 578 of file SLPVectorizer.cpp.

References I.

◆ isReductionCandidate()

static bool isReductionCandidate ( Instruction I)
static

\Returns true if I is a candidate instruction for reduction vectorization.

Definition at line 15920 of file SLPVectorizer.cpp.

References I, llvm::PatternMatch::m_Select(), llvm::PatternMatch::m_Value(), llvm::PatternMatch::match(), and matchRdxBop().

◆ isRepeatedNonIdentityClusteredMask()

static bool isRepeatedNonIdentityClusteredMask ( ArrayRef< int >  Mask,
unsigned  Sz 
)
static

Checks if the given mask is a "clustered" mask with the same clusters of size Sz, which are not identity submasks.

Definition at line 4511 of file SLPVectorizer.cpp.

References E, I, and llvm::ShuffleVectorInst::isIdentityMask().

◆ isReverseOrder()

static bool isReverseOrder ( ArrayRef< unsigned Order)
static

Check if Order represents reverse order.

Definition at line 3991 of file SLPVectorizer.cpp.

References llvm::all_of(), llvm::ArrayRef< T >::empty(), llvm::enumerate(), and llvm::ArrayRef< T >::size().

◆ isSimple()

static bool isSimple ( Instruction I)
static

◆ isSplat()

static bool isSplat ( ArrayRef< Value * >  VL)
static
Returns
True if all of the values in VL are identical or some of them are UndefValue.

Definition at line 282 of file SLPVectorizer.cpp.

◆ isUndefVector()

template<bool IsPoisonOnly = false>
static SmallBitVector isUndefVector ( const Value V,
const SmallBitVector UseMask = {} 
)
static

Checks if the given value is actually an undefined constant vector.

Also, if the UseMask is not empty, tries to check if the non-masked elements actually mask the insertelement buildvector, if any.

Definition at line 385 of file SLPVectorizer.cpp.

Referenced by generateKeySubkey(), llvm::slpvectorizer::BoUpSLP::LookAheadHeuristics::getShallowScore(), isFixedVectorShuffle(), and performExtractsShuffleAction().

◆ isValidElementType()

static bool isValidElementType ( Type Ty)
static

Predicate for the element types that the SLP vectorizer supports.

The most important thing to filter here are types which are invalid in LLVM vectors. We also filter target specific types which have absolutely no meaningful vectorization path such as x86_fp80 and ppc_f128. This just avoids spending time checking the cost model and realizing that they will be inevitably scalarized.

Definition at line 211 of file SLPVectorizer.cpp.

References llvm::Type::isPPC_FP128Ty(), and llvm::Type::isX86_FP80Ty().

Referenced by llvm::slpvectorizer::BoUpSLP::canMapToVector(), compareCmp(), llvm::slpvectorizer::BoUpSLP::findReusedOrderedScalars(), and llvm::slpvectorizer::BoUpSLP::LookAheadHeuristics::getShallowScore().

◆ isValidForAlternation()

static bool isValidForAlternation ( unsigned  Opcode)
static
Returns
true if Opcode is allowed as part of the main/alternate instruction for SLP vectorization.

Example of unsupported opcode is SDIV that can potentially cause UB if the "shuffled out" lane would result in division by zero.

Definition at line 590 of file SLPVectorizer.cpp.

References llvm::Instruction::isIntDivRem().

Referenced by generateKeySubkey(), and getSameOpcode().

◆ isVectorLikeInstWithConstOps()

static bool isVectorLikeInstWithConstOps ( Value V)
static

Checks if V is one of vector-like instructions, i.e.

undef, insertelement/extractelement with constant indices for fixed vector type or extractvalue instruction.

Definition at line 225 of file SLPVectorizer.cpp.

References assert(), I, and isConstant().

Referenced by allSameBlock(), generateKeySubkey(), and getSameOpcode().

◆ matchRdxBop()

static bool matchRdxBop ( Instruction I,
Value *&  V0,
Value *&  V1 
)
static

◆ needToScheduleSingleInstruction()

static bool needToScheduleSingleInstruction ( ArrayRef< Value * >  VL)
static
Returns
true if the specified list of values has only one instruction that requires scheduling, false otherwise.

Definition at line 5395 of file SLPVectorizer.cpp.

References llvm::doesNotNeedToBeScheduled().

◆ performExtractsShuffleAction()

template<typename T >
static T * performExtractsShuffleAction ( MutableArrayRef< std::pair< T *, SmallVector< int > > >  ShuffleMask,
Value Base,
function_ref< unsigned(T *)>  GetVF,
function_ref< std::pair< T *, bool >(T *, ArrayRef< int >, bool)>  ResizeAction,
function_ref< T *(ArrayRef< int >, ArrayRef< T * >)>  Action 
)
static

Does the analysis of the provided shuffle masks and performs the requested actions on the vectors with the given shuffle masks.

It tries to do it in several steps.

  1. If the Base vector is not undef vector, resizing the very first mask to have common VF and perform action for 2 input vectors (including non-undef Base). Other shuffle masks are combined with the resulting after the 1 stage and processed as a shuffle of 2 elements.
  2. If the Base is undef vector and have only 1 shuffle mask, perform the action only for 1 vector with the given mask, if it is not the identity mask.
  3. If > 2 masks are used, perform the remaining shuffle actions for 2 vectors, combing the masks properly between the steps.

Definition at line 9160 of file SLPVectorizer.cpp.

References llvm::SmallBitVector::all(), assert(), llvm::sampleprof::Base, buildUseMask(), E, I, Idx, isUndefVector(), llvm::PoisonMaskElem, and llvm::SmallBitVector::test().

◆ reorderOrder()

static void reorderOrder ( SmallVectorImpl< unsigned > &  Order,
ArrayRef< int >  Mask,
bool  BottomOrder = false 
)
static

◆ reorderReuses()

static void reorderReuses ( SmallVectorImpl< int > &  Reuses,
ArrayRef< int >  Mask 
)
static

Reorders the given Reuses mask according to the given Mask.

Reuses contains original mask for the scalars reused in the node. Procedure transform this mask in accordance with the given Mask.

Definition at line 3737 of file SLPVectorizer.cpp.

References assert(), llvm::SmallVectorTemplateCommon< T, typename >::begin(), E, llvm::SmallVectorTemplateCommon< T, typename >::end(), I, llvm::PoisonMaskElem, llvm::SmallVectorBase< Size_T >::size(), and llvm::SmallVectorImpl< T >::swap().

Referenced by llvm::slpvectorizer::BoUpSLP::reorderBottomToTop(), and reorderOrder().

◆ shortBundleName()

static std::string shortBundleName ( ArrayRef< Value * >  VL)
static

Print a short descriptor of the instruction bundle suitable for debug output.

Definition at line 242 of file SLPVectorizer.cpp.

References llvm::raw_ostream::flush(), llvm::ArrayRef< T >::front(), OS, and llvm::ArrayRef< T >::size().

Referenced by llvm::slpvectorizer::BoUpSLP::getTreeCost().

◆ STATISTIC()

STATISTIC ( NumVectorInstructions  ,
"Number of vector instructions generated"   
)

◆ tryGetSecondaryReductionRoot()

static Instruction * tryGetSecondaryReductionRoot ( PHINode Phi,
Instruction Root 
)
static

We could have an initial reduction that is not an add.

r *= v1 + v2 + v3 + v4 In such a case start looking for a tree rooted in the first '+'. \Returns the new root if found, which may be nullptr if not an instruction.

Definition at line 15893 of file SLPVectorizer.cpp.

References assert(), llvm::User::getOperand(), LHS, and RHS.

◆ tryToVectorizeSequence()

template<typename T >
static bool tryToVectorizeSequence ( SmallVectorImpl< T * > &  Incoming,
function_ref< bool(T *, T *)>  Comparator,
function_ref< bool(T *, T *)>  AreCompatible,
function_ref< bool(ArrayRef< T * >, bool)>  TryToVectorizeHelper,
bool  MaxVFOnly,
BoUpSLP R 
)
static

Variable Documentation

◆ AliasedCheckLimit

const unsigned AliasedCheckLimit = 10
static

Definition at line 193 of file SLPVectorizer.cpp.

◆ AllowHorRdxIdenityOptimization

cl::opt< bool > AllowHorRdxIdenityOptimization("slp-optimize-identity-hor-reduction-ops", cl::init(true), cl::Hidden, cl::desc("Allow optimization of original scalar identity operations on " "matched horizontal reductions.")) ( "slp-optimize-identity-hor-reduction-ops"  ,
cl::init(true ,
cl::Hidden  ,
cl::desc("Allow optimization of original scalar identity operations on " "matched horizontal reductions.")   
)
static

◆ LookAheadMaxDepth

cl::opt< int > LookAheadMaxDepth("slp-max-look-ahead-depth", cl::init(2), cl::Hidden, cl::desc("The maximum look-ahead depth for operand reordering scores")) ( "slp-max-look-ahead-depth"  ,
cl::init(2)  ,
cl::Hidden  ,
cl::desc("The maximum look-ahead depth for operand reordering scores")   
)
static

◆ MaxMemDepDistance

const unsigned MaxMemDepDistance = 160
static

Definition at line 198 of file SLPVectorizer.cpp.

◆ MaxProfitableLoadStride

cl::opt< unsigned > MaxProfitableLoadStride("slp-max-stride", cl::init(8), cl::Hidden, cl::desc("The maximum stride, considered to be profitable.")) ( "slp-max-stride"  ,
cl::init(8)  ,
cl::Hidden  ,
cl::desc("The maximum stride, considered to be profitable.")   
)
static

Referenced by canVectorizeLoads().

◆ MaxVectorRegSizeOption

cl::opt< int > MaxVectorRegSizeOption("slp-max-reg-size", cl::init(128), cl::Hidden, cl::desc("Attempt to vectorize for this register size in bits")) ( "slp-max-reg-size"  ,
cl::init(128)  ,
cl::Hidden  ,
cl::desc("Attempt to vectorize for this register size in bits")   
)
static

◆ MaxVFOption

cl::opt< unsigned > MaxVFOption("slp-max-vf", cl::init(0), cl::Hidden, cl::desc("Maximum SLP vectorization factor (0=unlimited)")) ( "slp-max-vf"  ,
cl::init(0)  ,
cl::Hidden  ,
cl::desc("Maximum SLP vectorization factor (0=unlimited)")   
)
static

◆ MinProfitableStridedLoads

cl::opt< unsigned > MinProfitableStridedLoads("slp-min-strided-loads", cl::init(2), cl::Hidden, cl::desc("The minimum number of loads, which should be considered strided, " "if the stride is > 1 or is runtime value")) ( "slp-min-strided-loads"  ,
cl::init(2)  ,
cl::Hidden  ,
cl::desc("The minimum number of loads, which should be considered strided, " "if the stride is > 1 or is runtime value")   
)
static

Referenced by canVectorizeLoads().

◆ MinScheduleRegionSize

const int MinScheduleRegionSize = 16
static

If the ScheduleRegionSizeBudget is exhausted, we allow small scheduling regions to be handled.

Definition at line 202 of file SLPVectorizer.cpp.

◆ MinTreeSize

cl::opt< unsigned > MinTreeSize("slp-min-tree-size", cl::init(3), cl::Hidden, cl::desc("Only vectorize small trees if they are fully vectorizable")) ( "slp-min-tree-size"  ,
cl::init(3)  ,
cl::Hidden  ,
cl::desc("Only vectorize small trees if they are fully vectorizable")   
)
static

◆ MinVectorRegSizeOption

cl::opt< int > MinVectorRegSizeOption("slp-min-reg-size", cl::init(128), cl::Hidden, cl::desc("Attempt to vectorize for this register size in bits")) ( "slp-min-reg-size"  ,
cl::init(128)  ,
cl::Hidden  ,
cl::desc("Attempt to vectorize for this register size in bits")   
)
static

◆ RecursionMaxDepth

cl::opt< unsigned > RecursionMaxDepth("slp-recursion-max-depth", cl::init(12), cl::Hidden, cl::desc("Limit the recursion depth when building a vectorizable tree")) ( "slp-recursion-max-depth"  ,
cl::init(12)  ,
cl::Hidden  ,
cl::desc("Limit the recursion depth when building a vectorizable tree")   
)
static

◆ RootLookAheadMaxDepth

cl::opt< int > RootLookAheadMaxDepth("slp-max-root-look-ahead-depth", cl::init(2), cl::Hidden, cl::desc("The maximum look-ahead depth for searching best rooting option")) ( "slp-max-root-look-ahead-depth"  ,
cl::init(2)  ,
cl::Hidden  ,
cl::desc("The maximum look-ahead depth for searching best rooting option")   
)
static

◆ RunSLPVectorization

cl::opt< bool > RunSLPVectorization("vectorize-slp", cl::init(true), cl::Hidden, cl::desc("Run the SLP vectorization passes")) ( "vectorize-slp"  ,
cl::init(true ,
cl::Hidden  ,
cl::desc("Run the SLP vectorization passes")   
)
static

◆ ScheduleRegionSizeBudget

cl::opt< int > ScheduleRegionSizeBudget("slp-schedule-budget", cl::init(100000), cl::Hidden, cl::desc("Limit the size of the SLP scheduling region per block")) ( "slp-schedule-budget"  ,
cl::init(100000)  ,
cl::Hidden  ,
cl::desc("Limit the size of the SLP scheduling region per block")   
)
static

Limits the size of scheduling regions in a block.

It avoid long compile times for very large blocks where vector instructions are spread over a wide range. This limit is way higher than needed by real-world functions.

◆ ShouldStartVectorizeHorAtStore

cl::opt< bool > ShouldStartVectorizeHorAtStore("slp-vectorize-hor-store", cl::init(false), cl::Hidden, cl::desc( "Attempt to vectorize horizontal reductions feeding into a store")) ( "slp-vectorize-hor-store"  ,
cl::init(false)  ,
cl::Hidden  ,
cl::desc( "Attempt to vectorize horizontal reductions feeding into a store")   
)
static

◆ ShouldVectorizeHor

cl::opt< bool > ShouldVectorizeHor("slp-vectorize-hor", cl::init(true), cl::Hidden, cl::desc("Attempt to vectorize horizontal reductions")) ( "slp-vectorize-hor"  ,
cl::init(true ,
cl::Hidden  ,
cl::desc("Attempt to vectorize horizontal reductions")   
)
static

◆ SLPCostThreshold

cl::opt< int > SLPCostThreshold("slp-threshold", cl::init(0), cl::Hidden, cl::desc("Only vectorize if you gain more than this " "number ")) ( "slp-threshold"  ,
cl::init(0)  ,
cl::Hidden  ,
cl::desc("Only vectorize if you gain more than this " "number ")   
)
static

◆ ViewSLPTree

cl::opt< bool > ViewSLPTree("view-slp-tree", cl::Hidden, cl::desc("Display the SLP trees with Graphviz")) ( "view-slp-tree"  ,
cl::Hidden  ,
cl::desc("Display the SLP trees with Graphviz")   
)
static