LLVM  14.0.0git
Classes | Namespaces | Macros | Functions | Variables
SLPVectorizer.cpp File Reference
#include "llvm/Transforms/Vectorize/SLPVectorizer.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/DenseSet.h"
#include "llvm/ADT/Optional.h"
#include "llvm/ADT/PostOrderIterator.h"
#include "llvm/ADT/PriorityQueue.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SetOperations.h"
#include "llvm/ADT/SetVector.h"
#include "llvm/ADT/SmallBitVector.h"
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/SmallSet.h"
#include "llvm/ADT/SmallString.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/ADT/iterator.h"
#include "llvm/ADT/iterator_range.h"
#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/Analysis/AssumptionCache.h"
#include "llvm/Analysis/CodeMetrics.h"
#include "llvm/Analysis/DemandedBits.h"
#include "llvm/Analysis/GlobalsModRef.h"
#include "llvm/Analysis/IVDescriptors.h"
#include "llvm/Analysis/LoopAccessAnalysis.h"
#include "llvm/Analysis/LoopInfo.h"
#include "llvm/Analysis/MemoryLocation.h"
#include "llvm/Analysis/OptimizationRemarkEmitter.h"
#include "llvm/Analysis/ScalarEvolution.h"
#include "llvm/Analysis/ScalarEvolutionExpressions.h"
#include "llvm/Analysis/TargetLibraryInfo.h"
#include "llvm/Analysis/TargetTransformInfo.h"
#include "llvm/Analysis/ValueTracking.h"
#include "llvm/Analysis/VectorUtils.h"
#include "llvm/IR/Attributes.h"
#include "llvm/IR/BasicBlock.h"
#include "llvm/IR/Constant.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/DebugLoc.h"
#include "llvm/IR/DerivedTypes.h"
#include "llvm/IR/Dominators.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/InstrTypes.h"
#include "llvm/IR/Instruction.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/Intrinsics.h"
#include "llvm/IR/Module.h"
#include "llvm/IR/NoFolder.h"
#include "llvm/IR/Operator.h"
#include "llvm/IR/PatternMatch.h"
#include "llvm/IR/Type.h"
#include "llvm/IR/Use.h"
#include "llvm/IR/User.h"
#include "llvm/IR/Value.h"
#include "llvm/IR/ValueHandle.h"
#include "llvm/IR/Verifier.h"
#include "llvm/InitializePasses.h"
#include "llvm/Pass.h"
#include "llvm/Support/Casting.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Compiler.h"
#include "llvm/Support/DOTGraphTraits.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/GraphWriter.h"
#include "llvm/Support/InstructionCost.h"
#include "llvm/Support/KnownBits.h"
#include "llvm/Support/MathExtras.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Transforms/Utils/InjectTLIMappings.h"
#include "llvm/Transforms/Utils/LoopUtils.h"
#include "llvm/Transforms/Vectorize.h"
#include <algorithm>
#include <cassert>
#include <cstdint>
#include <iterator>
#include <memory>
#include <set>
#include <string>
#include <tuple>
#include <utility>
#include <vector>

Go to the source code of this file.

Classes

class  llvm::slpvectorizer::BoUpSLP
 Bottom Up SLP Vectorizer. More...
 
struct  llvm::slpvectorizer::BoUpSLP::EdgeInfo
 This structure holds any data we need about the edges being traversed during buildTree_rec(). More...
 
class  llvm::slpvectorizer::BoUpSLP::VLOperands
 A helper data structure to hold the operands of a vector of instructions. More...
 
struct  llvm::slpvectorizer::BoUpSLP::BlockScheduling::ReadyList
 
struct  llvm::GraphTraits< BoUpSLP * >
 
struct  llvm::GraphTraits< BoUpSLP * >::ChildIteratorType
 Add the VectorizableTree to the index iterator to be able to return TreeEntry pointers. More...
 
class  llvm::GraphTraits< BoUpSLP * >::nodes_iterator
 For the node iterator we just need to turn the TreeEntry iterator into a TreeEntry* iterator so that it dereferences to NodeRef. More...
 
struct  llvm::DOTGraphTraits< BoUpSLP * >
 

Namespaces

 llvm
 ---------------------— PointerInfo ------------------------------------—
 
 llvm::slpvectorizer
 A private "module" namespace for types and utilities used by this pass.
 

Macros

#define SV_NAME   "slp-vectorizer"
 
#define DEBUG_TYPE   "SLP"
 

Functions

 STATISTIC (NumVectorInstructions, "Number of vector instructions generated")
 
static bool isValidElementType (Type *Ty)
 Predicate for the element types that the SLP vectorizer supports. More...
 
static bool isConstant (Value *V)
 
static bool isVectorLikeInstWithConstOps (Value *V)
 Checks if V is one of vector-like instructions, i.e. More...
 
static bool allSameBlock (ArrayRef< Value * > VL)
 
static bool allConstant (ArrayRef< Value * > VL)
 
static bool isSplat (ArrayRef< Value * > VL)
 
static bool isCommutative (Instruction *I)
 
static Optional< TargetTransformInfo::ShuffleKindisShuffle (ArrayRef< Value * > VL, SmallVectorImpl< int > &Mask)
 Checks if the vector of instructions can be represented as a shuffle, like: x0 = extractelement <4 x i8> x, i32 0 x3 = extractelement <4 x i8> x, i32 3 y1 = extractelement <4 x i8> y, i32 1 y2 = extractelement <4 x i8> y, i32 2 x0x0 = mul i8 x0, x0 x3x3 = mul i8 x3, x3 y1y1 = mul i8 y1, y1 y2y2 = mul i8 y2, y2 ins1 = insertelement <4 x i8> poison, i8 x0x0, i32 0 ins2 = insertelement <4 x i8> ins1, i8 x3x3, i32 1 ins3 = insertelement <4 x i8> ins2, i8 y1y1, i32 2 ins4 = insertelement <4 x i8> ins3, i8 y2y2, i32 3 ret <4 x i8> ins4 can be transformed into: %1 = shufflevector <4 x i8> x, <4 x i8> y, <4 x i32> <i32 0, i32 3, i32 5, i32 6> %2 = mul <4 x i8> %1, %1 ret <4 x i8> %2 We convert this initially to something like: x0 = extractelement <4 x i8> x, i32 0 x3 = extractelement <4 x i8> x, i32 3 y1 = extractelement <4 x i8> y, i32 1 y2 = extractelement <4 x i8> y, i32 2 %1 = insertelement <4 x i8> poison, i8 x0, i32 0 %2 = insertelement <4 x i8> %1, i8 x3, i32 1 %3 = insertelement <4 x i8> %2, i8 y1, i32 2 %4 = insertelement <4 x i8> %3, i8 y2, i32 3 %5 = mul <4 x i8> %4, %4 %6 = extractelement <4 x i8> %5, i32 0 ins1 = insertelement <4 x i8> poison, i8 %6, i32 0 %7 = extractelement <4 x i8> %5, i32 1 ins2 = insertelement <4 x i8> ins1, i8 %7, i32 1 %8 = extractelement <4 x i8> %5, i32 2 ins3 = insertelement <4 x i8> ins2, i8 %8, i32 2 %9 = extractelement <4 x i8> %5, i32 3 ins4 = insertelement <4 x i8> ins3, i8 %9, i32 3 ret <4 x i8> ins4 InstCombiner transforms this into a shuffle and vector mul Mask will return the Shuffle Mask equivalent to the extracted elements. More...
 
static ValueisOneOf (const InstructionsState &S, Value *Op)
 Chooses the correct key for scheduling data. More...
 
static bool isValidForAlternation (unsigned Opcode)
 
static InstructionsState getSameOpcode (ArrayRef< Value * > VL, unsigned BaseIndex=0)
 
static bool allSameType (ArrayRef< Value * > VL)
 
static Optional< unsigned > getExtractIndex (Instruction *E)
 
static bool InTreeUserNeedToExtract (Value *Scalar, Instruction *UserInst, TargetLibraryInfo *TLI)
 
static MemoryLocation getLocation (Instruction *I, AAResults *AA)
 
static bool isSimple (Instruction *I)
 
static void addMask (SmallVectorImpl< int > &Mask, ArrayRef< int > SubMask)
 Shuffles Mask in accordance with the given SubMask. More...
 
static void fixupOrderingIndices (SmallVectorImpl< unsigned > &Order)
 Order may have elements assigned special value (size) which is out of bounds. More...
 
static void llvm::inversePermutation (ArrayRef< unsigned > Indices, SmallVectorImpl< int > &Mask)
 
static Optional< intllvm::getInsertIndex (Value *InsertInst, unsigned Offset)
 
static void llvm::reorderScalars (SmallVectorImpl< Value * > &Scalars, ArrayRef< int > Mask)
 Reorders the list of scalars in accordance with the given Order and then the Mask. More...
 
static void reorderReuses (SmallVectorImpl< int > &Reuses, ArrayRef< int > Mask)
 Reorders the given Reuses mask according to the given Mask. More...
 
static void reorderOrder (SmallVectorImpl< unsigned > &Order, ArrayRef< int > Mask)
 Reorders the given Order according to the given Mask. More...
 
static LoadsState canVectorizeLoads (ArrayRef< Value * > VL, const Value *VL0, const TargetTransformInfo &TTI, const DataLayout &DL, ScalarEvolution &SE, SmallVectorImpl< unsigned > &Order, SmallVectorImpl< Value * > &PointerOps)
 Checks if the given array of loads can be represented as a vectorized, scatter or just simple gather. More...
 
static std::pair< InstructionCost, InstructionCostgetVectorCallCosts (CallInst *CI, FixedVectorType *VecTy, TargetTransformInfo *TTI, TargetLibraryInfo *TLI)
 
static InstructionCost computeExtractCost (ArrayRef< Value * > VL, FixedVectorType *VecTy, TargetTransformInfo::ShuffleKind ShuffleKind, ArrayRef< int > Mask, TargetTransformInfo &TTI)
 Compute the cost of creating a vector of type VecTy containing the extracted values from VL. More...
 
static void buildSuffleEntryMask (ArrayRef< Value * > VL, ArrayRef< unsigned > ReorderIndices, ArrayRef< int > ReusesIndices, const function_ref< bool(Instruction *)> IsAltOp, SmallVectorImpl< int > &Mask, SmallVectorImpl< Value * > *OpScalars=nullptr, SmallVectorImpl< Value * > *AltScalars=nullptr)
 Build shuffle mask for shuffle graph entries and lists of main and alternate operations operands. More...
 
static bool isLoadCombineCandidateImpl (Value *Root, unsigned NumElts, TargetTransformInfo *TTI, bool MustMatchOrInst)
 
static bool collectValuesToDemote (Value *V, SmallPtrSetImpl< Value * > &Expr, SmallVectorImpl< Value * > &ToDemote, SmallVectorImpl< Value * > &Roots)
 
static Optional< unsigned > getAggregateSize (Instruction *InsertInst)
 
static bool findBuildAggregate_rec (Instruction *LastInsertInst, TargetTransformInfo *TTI, SmallVectorImpl< Value * > &BuildVectorOpds, SmallVectorImpl< Value * > &InsertElts, unsigned OperandOffset)
 
static bool findBuildAggregate (Instruction *LastInsertInst, TargetTransformInfo *TTI, SmallVectorImpl< Value * > &BuildVectorOpds, SmallVectorImpl< Value * > &InsertElts)
 Recognize construction of vectors like ra = insertelement <4 x float> poison, float s0, i32 0 rb = insertelement <4 x float> ra, float s1, i32 1 rc = insertelement <4 x float> rb, float s2, i32 2 rd = insertelement <4 x float> rc, float s3, i32 3 starting from the last insertelement or insertvalue instruction. More...
 
static ValuegetReductionValue (const DominatorTree *DT, PHINode *P, BasicBlock *ParentBB, LoopInfo *LI)
 Try and get a reduction value from a phi node. More...
 
static bool matchRdxBop (Instruction *I, Value *&V0, Value *&V1)
 
static bool tryToVectorizeHorReductionOrInstOperands (PHINode *P, Instruction *Root, BasicBlock *BB, BoUpSLP &R, TargetTransformInfo *TTI, const function_ref< bool(Instruction *, BoUpSLP &)> Vectorize)
 Attempt to reduce a horizontal reduction. More...
 

Variables

cl::opt< bool > RunSLPVectorization ("vectorize-slp", cl::init(true), cl::Hidden, cl::desc("Run the SLP vectorization passes"))
 
static cl::opt< intSLPCostThreshold ("slp-threshold", cl::init(0), cl::Hidden, cl::desc("Only vectorize if you gain more than this " "number "))
 
static cl::opt< bool > ShouldVectorizeHor ("slp-vectorize-hor", cl::init(true), cl::Hidden, cl::desc("Attempt to vectorize horizontal reductions"))
 
static cl::opt< bool > ShouldStartVectorizeHorAtStore ("slp-vectorize-hor-store", cl::init(false), cl::Hidden, cl::desc("Attempt to vectorize horizontal reductions feeding into a store"))
 
static cl::opt< intMaxVectorRegSizeOption ("slp-max-reg-size", cl::init(128), cl::Hidden, cl::desc("Attempt to vectorize for this register size in bits"))
 
static cl::opt< unsigned > MaxVFOption ("slp-max-vf", cl::init(0), cl::Hidden, cl::desc("Maximum SLP vectorization factor (0=unlimited)"))
 
static cl::opt< intMaxStoreLookup ("slp-max-store-lookup", cl::init(32), cl::Hidden, cl::desc("Maximum depth of the lookup for consecutive stores."))
 
static cl::opt< intScheduleRegionSizeBudget ("slp-schedule-budget", cl::init(100000), cl::Hidden, cl::desc("Limit the size of the SLP scheduling region per block"))
 Limits the size of scheduling regions in a block. More...
 
static cl::opt< intMinVectorRegSizeOption ("slp-min-reg-size", cl::init(128), cl::Hidden, cl::desc("Attempt to vectorize for this register size in bits"))
 
static cl::opt< unsigned > RecursionMaxDepth ("slp-recursion-max-depth", cl::init(12), cl::Hidden, cl::desc("Limit the recursion depth when building a vectorizable tree"))
 
static cl::opt< unsigned > MinTreeSize ("slp-min-tree-size", cl::init(3), cl::Hidden, cl::desc("Only vectorize small trees if they are fully vectorizable"))
 
static cl::opt< intLookAheadMaxDepth ("slp-max-look-ahead-depth", cl::init(2), cl::Hidden, cl::desc("The maximum look-ahead depth for operand reordering scores"))
 
static cl::opt< unsigned > LookAheadUsersBudget ("slp-look-ahead-users-budget", cl::init(2), cl::Hidden, cl::desc("The maximum number of users to visit while visiting the " "predecessors. This prevents compilation time increase."))
 
static cl::opt< bool > ViewSLPTree ("view-slp-tree", cl::Hidden, cl::desc("Display the SLP trees with Graphviz"))
 
static const unsigned AliasedCheckLimit = 10
 
static const unsigned MaxMemDepDistance = 160
 
static const int MinScheduleRegionSize = 16
 If the ScheduleRegionSizeBudget is exhausted, we allow small scheduling regions to be handled. More...
 
static const char lv_name [] = "SLP Vectorizer"
 

Macro Definition Documentation

◆ DEBUG_TYPE

#define DEBUG_TYPE   "SLP"

Definition at line 108 of file SLPVectorizer.cpp.

◆ SV_NAME

#define SV_NAME   "slp-vectorizer"

Definition at line 107 of file SLPVectorizer.cpp.

Function Documentation

◆ addMask()

static void addMask ( SmallVectorImpl< int > &  Mask,
ArrayRef< int SubMask 
)
static

◆ allConstant()

static bool allConstant ( ArrayRef< Value * >  VL)
static
Returns
True if all of the values in VL are constants (but not globals/constant expressions).

Definition at line 251 of file SLPVectorizer.cpp.

References llvm::all_of(), and isConstant().

◆ allSameBlock()

static bool allSameBlock ( ArrayRef< Value * >  VL)
static
Returns
true if all of the instructions in VL are in the same block or false otherwise.

Definition at line 230 of file SLPVectorizer.cpp.

References llvm::all_of(), BB, E, llvm::Instruction::getParent(), I, isVectorLikeInstWithConstOps(), and llvm::ArrayRef< T >::size().

Referenced by llvm::slpvectorizer::BoUpSLP::reorderBottomToTop(), and llvm::slpvectorizer::BoUpSLP::reorderTopToBottom().

◆ allSameType()

static bool allSameType ( ArrayRef< Value * >  VL)
static
Returns
true if all of the values in VL have the same type or false otherwise.

Definition at line 485 of file SLPVectorizer.cpp.

References llvm::numbers::e, llvm::Intrinsic::getType(), i, and llvm::ArrayRef< T >::size().

Referenced by llvm::slpvectorizer::BoUpSLP::buildTree(), llvm::slpvectorizer::BoUpSLP::reorderBottomToTop(), and llvm::slpvectorizer::BoUpSLP::reorderTopToBottom().

◆ buildSuffleEntryMask()

static void buildSuffleEntryMask ( ArrayRef< Value * >  VL,
ArrayRef< unsigned >  ReorderIndices,
ArrayRef< int ReusesIndices,
const function_ref< bool(Instruction *)>  IsAltOp,
SmallVectorImpl< int > &  Mask,
SmallVectorImpl< Value * > *  OpScalars = nullptr,
SmallVectorImpl< Value * > *  AltScalars = nullptr 
)
static

Build shuffle mask for shuffle graph entries and lists of main and alternate operations operands.

Definition at line 4145 of file SLPVectorizer.cpp.

References llvm::ArrayRef< T >::empty(), I, llvm::inversePermutation(), llvm::BitmaskEnumDetail::Mask(), llvm::ArrayRef< T >::size(), transform, and llvm::UndefMaskElem.

◆ canVectorizeLoads()

static LoadsState canVectorizeLoads ( ArrayRef< Value * >  VL,
const Value VL0,
const TargetTransformInfo TTI,
const DataLayout DL,
ScalarEvolution SE,
SmallVectorImpl< unsigned > &  Order,
SmallVectorImpl< Value * > &  PointerOps 
)
static

◆ collectValuesToDemote()

static bool collectValuesToDemote ( Value V,
SmallPtrSetImpl< Value * > &  Expr,
SmallVectorImpl< Value * > &  ToDemote,
SmallVectorImpl< Value * > &  Roots 
)
static

◆ computeExtractCost()

static InstructionCost computeExtractCost ( ArrayRef< Value * >  VL,
FixedVectorType VecTy,
TargetTransformInfo::ShuffleKind  ShuffleKind,
ArrayRef< int Mask,
TargetTransformInfo TTI 
)
static

◆ findBuildAggregate()

static bool findBuildAggregate ( Instruction LastInsertInst,
TargetTransformInfo TTI,
SmallVectorImpl< Value * > &  BuildVectorOpds,
SmallVectorImpl< Value * > &  InsertElts 
)
static

Recognize construction of vectors like ra = insertelement <4 x float> poison, float s0, i32 0 rb = insertelement <4 x float> ra, float s1, i32 1 rc = insertelement <4 x float> rb, float s2, i32 2 rd = insertelement <4 x float> rc, float s3, i32 3 starting from the last insertelement or insertvalue instruction.

Also recognize homogeneous aggregates like {<2 x float>, <2 x float>}, {{float, float}, {float, float}}, [2 x {float, float}] and so on. See llvm/test/Transforms/SLPVectorizer/X86/pr42022.ll for examples.

Assume LastInsertInst is of InsertElementInst or InsertValueInst type.

Returns
true if it matches.

Definition at line 8677 of file SLPVectorizer.cpp.

References assert(), llvm::erase_value(), findBuildAggregate_rec(), getAggregateSize(), and llvm::SmallVectorImpl< T >::resize().

◆ findBuildAggregate_rec()

static bool findBuildAggregate_rec ( Instruction LastInsertInst,
TargetTransformInfo TTI,
SmallVectorImpl< Value * > &  BuildVectorOpds,
SmallVectorImpl< Value * > &  InsertElts,
unsigned  OperandOffset 
)
static

◆ fixupOrderingIndices()

static void fixupOrderingIndices ( SmallVectorImpl< unsigned > &  Order)
static

Order may have elements assigned special value (size) which is out of bounds.

Such indices only appear on places which correspond to undef values (see canReuseExtract for details) and used in order to avoid undef values have effect on operands ordering. The first loop below simply finds all unused indices and then the next loop nest assigns these indices for undef values positions. As an example below Order has two undef positions and they have assigned values 3 and 7 respectively: before: 6 9 5 4 9 2 1 0 after: 6 3 5 4 7 2 1 0

Returns
Fixed ordering.

Definition at line 590 of file SLPVectorizer.cpp.

References assert(), E, llvm::SmallBitVector::find_first(), llvm::SmallBitVector::find_next(), I, and llvm::SmallBitVector::set().

Referenced by reorderOrder().

◆ getAggregateSize()

static Optional<unsigned> getAggregateSize ( Instruction InsertInst)
static

◆ getExtractIndex()

static Optional<unsigned> getExtractIndex ( Instruction E)
static
Returns
True if Extract{Value,Element} instruction extracts element Idx.

Definition at line 495 of file SLPVectorizer.cpp.

References assert(), E, llvm::ExtractValueInst::getNumIndices(), llvm::ExtractValueInst::idx_begin(), and llvm::None.

Referenced by computeExtractCost().

◆ getLocation()

static MemoryLocation getLocation ( Instruction I,
AAResults AA 
)
static
Returns
the AA location that is being access by the instruction.

Definition at line 541 of file SLPVectorizer.cpp.

References llvm::MemoryLocation::get(), I, and SI.

◆ getReductionValue()

static Value* getReductionValue ( const DominatorTree DT,
PHINode P,
BasicBlock ParentBB,
LoopInfo LI 
)
static

Try and get a reduction value from a phi node.

Given a phi node P in a block ParentBB, consider possible reductions if they come from either ParentBB or a containing loop latch.

Returns
A candidate reduction value if possible, or
nullptr
if not possible.

Definition at line 8713 of file SLPVectorizer.cpp.

References llvm::DominatorTree::dominates(), llvm::SLPVectorizerPass::DT, llvm::LoopInfoBase< BlockT, LoopT >::getLoopFor(), llvm::LoopBase< BlockT, LoopT >::getLoopLatch(), llvm::SLPVectorizerPass::LI, and P.

◆ getSameOpcode()

static InstructionsState getSameOpcode ( ArrayRef< Value * >  VL,
unsigned  BaseIndex = 0 
)
static
Returns
analysis of the Instructions in VL described in InstructionsState, the Opcode that we suppose the whole list could be vectorized even if its structure is diverse.

Definition at line 434 of file SLPVectorizer.cpp.

References llvm::any_of(), assert(), E, isValidForAlternation(), and llvm::ArrayRef< T >::size().

◆ getVectorCallCosts()

static std::pair<InstructionCost, InstructionCost> getVectorCallCosts ( CallInst CI,
FixedVectorType VecTy,
TargetTransformInfo TTI,
TargetLibraryInfo TLI 
)
static

◆ InTreeUserNeedToExtract()

static bool InTreeUserNeedToExtract ( Value Scalar,
Instruction UserInst,
TargetLibraryInfo TLI 
)
static

◆ isCommutative()

static bool isCommutative ( Instruction I)
static
Returns
True if I is commutative, handles CmpInst and BinaryOperator.

Definition at line 266 of file SLPVectorizer.cpp.

References I.

Referenced by llvm::FastISel::selectBinaryOp().

◆ isConstant()

static bool isConstant ( Value V)
static
Returns
True if the value is a constant (but not globals/constant expressions).

Definition at line 206 of file SLPVectorizer.cpp.

Referenced by allConstant(), and isVectorLikeInstWithConstOps().

◆ isLoadCombineCandidateImpl()

static bool isLoadCombineCandidateImpl ( Value Root,
unsigned  NumElts,
TargetTransformInfo TTI,
bool  MustMatchOrInst 
)
static

◆ isOneOf()

static Value* isOneOf ( const InstructionsState &  S,
Value Op 
)
static

Chooses the correct key for scheduling data.

If Op has the same (or alternate) opcode as OpValue, the key is Op. Otherwise the key is OpValue.

Definition at line 412 of file SLPVectorizer.cpp.

References I, and S.

◆ isShuffle()

static Optional<TargetTransformInfo::ShuffleKind> isShuffle ( ArrayRef< Value * >  VL,
SmallVectorImpl< int > &  Mask 
)
static

Checks if the vector of instructions can be represented as a shuffle, like: x0 = extractelement <4 x i8> x, i32 0 x3 = extractelement <4 x i8> x, i32 3 y1 = extractelement <4 x i8> y, i32 1 y2 = extractelement <4 x i8> y, i32 2 x0x0 = mul i8 x0, x0 x3x3 = mul i8 x3, x3 y1y1 = mul i8 y1, y1 y2y2 = mul i8 y2, y2 ins1 = insertelement <4 x i8> poison, i8 x0x0, i32 0 ins2 = insertelement <4 x i8> ins1, i8 x3x3, i32 1 ins3 = insertelement <4 x i8> ins2, i8 y1y1, i32 2 ins4 = insertelement <4 x i8> ins3, i8 y2y2, i32 3 ret <4 x i8> ins4 can be transformed into: %1 = shufflevector <4 x i8> x, <4 x i8> y, <4 x i32> <i32 0, i32 3, i32 5, i32 6> %2 = mul <4 x i8> %1, %1 ret <4 x i8> %2 We convert this initially to something like: x0 = extractelement <4 x i8> x, i32 0 x3 = extractelement <4 x i8> x, i32 3 y1 = extractelement <4 x i8> y, i32 1 y2 = extractelement <4 x i8> y, i32 2 %1 = insertelement <4 x i8> poison, i8 x0, i32 0 %2 = insertelement <4 x i8> %1, i8 x3, i32 1 %3 = insertelement <4 x i8> %2, i8 y1, i32 2 %4 = insertelement <4 x i8> %3, i8 y2, i32 3 %5 = mul <4 x i8> %4, %4 %6 = extractelement <4 x i8> %5, i32 0 ins1 = insertelement <4 x i8> poison, i8 %6, i32 0 %7 = extractelement <4 x i8> %5, i32 1 ins2 = insertelement <4 x i8> ins1, i8 %7, i32 1 %8 = extractelement <4 x i8> %5, i32 2 ins3 = insertelement <4 x i8> ins2, i8 %8, i32 2 %9 = extractelement <4 x i8> %5, i32 3 ins4 = insertelement <4 x i8> ins3, i8 %9, i32 3 ret <4 x i8> ins4 InstCombiner transforms this into a shuffle and vector mul Mask will return the Shuffle Mask equivalent to the extracted elements.

TODO: Can we split off and reuse the shuffle mask detection from TargetTransformInfo::getInstructionThroughput?

Definition at line 320 of file SLPVectorizer.cpp.

References E, I, llvm::BitmaskEnumDetail::Mask(), llvm::None, llvm::MCID::Select, llvm::Check::Size, llvm::ArrayRef< T >::size(), llvm::TargetTransformInfo::SK_PermuteSingleSrc, llvm::TargetTransformInfo::SK_PermuteTwoSrc, llvm::TargetTransformInfo::SK_Select, and llvm::UndefMaskElem.

◆ isSimple()

static bool isSimple ( Instruction I)
static
Returns
True if the instruction is not a volatile or atomic load/store.

Definition at line 550 of file SLPVectorizer.cpp.

References I, MI, and SI.

Referenced by llvm::DOTGraphTraits< DOTFuncInfo * >::getNodeLabel(), llvm::X86TargetLowering::shouldReduceLoadWidth(), llvm::MBFIWrapper::view(), and llvm::MachineBlockFrequencyInfo::view().

◆ isSplat()

static bool isSplat ( ArrayRef< Value * >  VL)
static
Returns
True if all of the values in VL are identical.

Definition at line 258 of file SLPVectorizer.cpp.

References llvm::numbers::e, i, and llvm::ArrayRef< T >::size().

Referenced by llvm::PPC::get_VSPLTI_elt(), and llvm::InstCombinerImpl::visitShuffleVectorInst().

◆ isValidElementType()

static bool isValidElementType ( Type Ty)
static

Predicate for the element types that the SLP vectorizer supports.

The most important thing to filter here are types which are invalid in LLVM vectors. We also filter target specific types which have absolutely no meaningful vectorization path such as x86_fp80 and ppc_f128. This just avoids spending time checking the cost model and realizing that they will be inevitably scalarized.

Definition at line 199 of file SLPVectorizer.cpp.

References llvm::Type::isPPC_FP128Ty(), llvm::VectorType::isValidElementType(), and llvm::Type::isX86_FP80Ty().

Referenced by llvm::slpvectorizer::BoUpSLP::canMapToVector().

◆ isValidForAlternation()

static bool isValidForAlternation ( unsigned  Opcode)
static
Returns
true if Opcode is allowed as part of of the main/alternate instruction for SLP vectorization.

Example of unsupported opcode is SDIV that can potentially cause UB if the "shuffled out" lane would result in division by zero.

Definition at line 424 of file SLPVectorizer.cpp.

References llvm::Instruction::isIntDivRem().

Referenced by getSameOpcode().

◆ isVectorLikeInstWithConstOps()

static bool isVectorLikeInstWithConstOps ( Value V)
static

Checks if V is one of vector-like instructions, i.e.

undef, insertelement/extractelement with constant indices for fixed vector type or extractvalue instruction.

Definition at line 213 of file SLPVectorizer.cpp.

References assert(), I, and isConstant().

Referenced by allSameBlock().

◆ matchRdxBop()

static bool matchRdxBop ( Instruction I,
Value *&  V0,
Value *&  V1 
)
static

◆ reorderOrder()

static void reorderOrder ( SmallVectorImpl< unsigned > &  Order,
ArrayRef< int Mask 
)
static

Reorders the given Order according to the given Mask.

Order - is the original order of the scalars. Procedure transforms the provided order in accordance with the given Mask. If the resulting Order is just an identity order, Order is cleared.

Definition at line 2647 of file SLPVectorizer.cpp.

References assert(), llvm::SmallVectorImpl< T >::assign(), llvm::SmallVectorImpl< T >::clear(), E, fixupOrderingIndices(), I, llvm::inversePermutation(), llvm::ShuffleVectorInst::isIdentityMask(), llvm::BitmaskEnumDetail::Mask(), reorderReuses(), llvm::SmallVectorImpl< T >::resize(), and llvm::UndefMaskElem.

◆ reorderReuses()

static void reorderReuses ( SmallVectorImpl< int > &  Reuses,
ArrayRef< int Mask 
)
static

Reorders the given Reuses mask according to the given Mask.

Reuses contains original mask for the scalars reused in the node. Procedure transform this mask in accordance with the given Mask.

Definition at line 2633 of file SLPVectorizer.cpp.

References assert(), E, I, llvm::BitmaskEnumDetail::Mask(), llvm::SmallVectorImpl< T >::swap(), and llvm::UndefMaskElem.

Referenced by reorderOrder().

◆ STATISTIC()

STATISTIC ( NumVectorInstructions  ,
"Number of vector instructions generated  
)

◆ tryToVectorizeHorReductionOrInstOperands()

static bool tryToVectorizeHorReductionOrInstOperands ( PHINode P,
Instruction Root,
BasicBlock BB,
BoUpSLP &  R,
TargetTransformInfo TTI,
const function_ref< bool(Instruction *, BoUpSLP &)>  Vectorize 
)
static

Attempt to reduce a horizontal reduction.

If it is legal to match a horizontal reduction feeding the phi node P with reduction operators Root (or one of its operands) in a basic block BB, then check if it can be done. If horizontal reduction is not found and root instruction is a binary operation, vectorization of the operands is attempted.

Returns
true if a horizontal reduction was matched and reduced or operands of one of the binary instruction were vectorized.
false if a horizontal reduction was not matched (or not possible) or no vectorization of any binary operation feeding Root instruction was performed.

Definition at line 8786 of file SLPVectorizer.cpp.

References BB, llvm::Instruction::getParent(), llvm::ARMII::HorizontalReduction, I, llvm::SmallVectorImpl< T >::insert(), llvm::PatternMatch::m_Select(), llvm::PatternMatch::m_Value(), llvm::PatternMatch::match(), matchRdxBop(), llvm::User::operand_values(), P, RecursionMaxDepth, and ShouldVectorizeHor.

Variable Documentation

◆ AliasedCheckLimit

const unsigned AliasedCheckLimit = 10
static

Definition at line 181 of file SLPVectorizer.cpp.

◆ LookAheadMaxDepth

cl::opt<int> LookAheadMaxDepth("slp-max-look-ahead-depth", cl::init(2), cl::Hidden, cl::desc("The maximum look-ahead depth for operand reordering scores"))
static

◆ LookAheadUsersBudget

cl::opt<unsigned> LookAheadUsersBudget("slp-look-ahead-users-budget", cl::init(2), cl::Hidden, cl::desc("The maximum number of users to visit while visiting the " "predecessors. This prevents compilation time increase."))
static

◆ lv_name

const char lv_name[] = "SLP Vectorizer"
static

Definition at line 9456 of file SLPVectorizer.cpp.

◆ MaxMemDepDistance

const unsigned MaxMemDepDistance = 160
static

Definition at line 186 of file SLPVectorizer.cpp.

◆ MaxStoreLookup

cl::opt<int> MaxStoreLookup("slp-max-store-lookup", cl::init(32), cl::Hidden, cl::desc("Maximum depth of the lookup for consecutive stores."))
static

◆ MaxVectorRegSizeOption

cl::opt<int> MaxVectorRegSizeOption("slp-max-reg-size", cl::init(128), cl::Hidden, cl::desc("Attempt to vectorize for this register size in bits"))
static

◆ MaxVFOption

cl::opt<unsigned> MaxVFOption("slp-max-vf", cl::init(0), cl::Hidden, cl::desc("Maximum SLP vectorization factor (0=unlimited)"))
static

◆ MinScheduleRegionSize

const int MinScheduleRegionSize = 16
static

If the ScheduleRegionSizeBudget is exhausted, we allow small scheduling regions to be handled.

Definition at line 190 of file SLPVectorizer.cpp.

◆ MinTreeSize

cl::opt<unsigned> MinTreeSize("slp-min-tree-size", cl::init(3), cl::Hidden, cl::desc("Only vectorize small trees if they are fully vectorizable"))
static

◆ MinVectorRegSizeOption

cl::opt<int> MinVectorRegSizeOption("slp-min-reg-size", cl::init(128), cl::Hidden, cl::desc("Attempt to vectorize for this register size in bits"))
static

◆ RecursionMaxDepth

cl::opt<unsigned> RecursionMaxDepth("slp-recursion-max-depth", cl::init(12), cl::Hidden, cl::desc("Limit the recursion depth when building a vectorizable tree"))
static

◆ RunSLPVectorization

cl::opt<bool> RunSLPVectorization("vectorize-slp", cl::init(true), cl::Hidden, cl::desc("Run the SLP vectorization passes"))

◆ ScheduleRegionSizeBudget

cl::opt<int> ScheduleRegionSizeBudget("slp-schedule-budget", cl::init(100000), cl::Hidden, cl::desc("Limit the size of the SLP scheduling region per block"))
static

Limits the size of scheduling regions in a block.

It avoid long compile times for very large blocks where vector instructions are spread over a wide range. This limit is way higher than needed by real-world functions.

◆ ShouldStartVectorizeHorAtStore

cl::opt<bool> ShouldStartVectorizeHorAtStore("slp-vectorize-hor-store", cl::init(false), cl::Hidden, cl::desc( "Attempt to vectorize horizontal reductions feeding into a store"))
static

◆ ShouldVectorizeHor

cl::opt<bool> ShouldVectorizeHor("slp-vectorize-hor", cl::init(true), cl::Hidden, cl::desc("Attempt to vectorize horizontal reductions"))
static

◆ SLPCostThreshold

cl::opt<int> SLPCostThreshold("slp-threshold", cl::init(0), cl::Hidden, cl::desc("Only vectorize if you gain more than this " "number "))
static

◆ ViewSLPTree

cl::opt<bool> ViewSLPTree("view-slp-tree", cl::Hidden, cl::desc("Display the SLP trees with Graphviz"))
static