LLVM  6.0.0svn
Classes | Namespaces | Macros | Functions | Variables
SLPVectorizer.cpp File Reference
#include "llvm/Transforms/Vectorize/SLPVectorizer.h"
#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/DenseSet.h"
#include "llvm/ADT/MapVector.h"
#include "llvm/ADT/None.h"
#include "llvm/ADT/Optional.h"
#include "llvm/ADT/PostOrderIterator.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SetVector.h"
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/SmallSet.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/ADT/iterator.h"
#include "llvm/ADT/iterator_range.h"
#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/Analysis/CodeMetrics.h"
#include "llvm/Analysis/DemandedBits.h"
#include "llvm/Analysis/GlobalsModRef.h"
#include "llvm/Analysis/LoopAccessAnalysis.h"
#include "llvm/Analysis/LoopInfo.h"
#include "llvm/Analysis/MemoryLocation.h"
#include "llvm/Analysis/OptimizationRemarkEmitter.h"
#include "llvm/Analysis/ScalarEvolution.h"
#include "llvm/Analysis/ScalarEvolutionExpressions.h"
#include "llvm/Analysis/TargetLibraryInfo.h"
#include "llvm/Analysis/TargetTransformInfo.h"
#include "llvm/Analysis/ValueTracking.h"
#include "llvm/Analysis/VectorUtils.h"
#include "llvm/IR/Attributes.h"
#include "llvm/IR/BasicBlock.h"
#include "llvm/IR/Constant.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/DebugLoc.h"
#include "llvm/IR/DerivedTypes.h"
#include "llvm/IR/Dominators.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/InstrTypes.h"
#include "llvm/IR/Instruction.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/Intrinsics.h"
#include "llvm/IR/Module.h"
#include "llvm/IR/NoFolder.h"
#include "llvm/IR/Operator.h"
#include "llvm/IR/PassManager.h"
#include "llvm/IR/PatternMatch.h"
#include "llvm/IR/Type.h"
#include "llvm/IR/Use.h"
#include "llvm/IR/User.h"
#include "llvm/IR/Value.h"
#include "llvm/IR/ValueHandle.h"
#include "llvm/IR/Verifier.h"
#include "llvm/Pass.h"
#include "llvm/Support/Casting.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Compiler.h"
#include "llvm/Support/DOTGraphTraits.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/GraphWriter.h"
#include "llvm/Support/KnownBits.h"
#include "llvm/Support/MathExtras.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Transforms/Utils/LoopUtils.h"
#include "llvm/Transforms/Vectorize.h"
#include <algorithm>
#include <cassert>
#include <cstdint>
#include <iterator>
#include <memory>
#include <set>
#include <string>
#include <tuple>
#include <utility>
#include <vector>

Go to the source code of this file.

Classes

class  llvm::slpvectorizer::BoUpSLP
 Bottom Up SLP Vectorizer. More...
 
struct  llvm::slpvectorizer::BoUpSLP::BlockScheduling::ReadyList
 
struct  llvm::GraphTraits< BoUpSLP * >
 
struct  llvm::GraphTraits< GraphType >::ChildIteratorType
 Add the VectorizableTree to the index iterator to be able to return TreeEntry pointers. More...
 
struct  llvm::DOTGraphTraits< BoUpSLP * >
 

Namespaces

 llvm
 Compute iterated dominance frontiers using a linear time algorithm.
 
 llvm::slpvectorizer
 A private "module" namespace for types and utilities used by this pass.
 

Macros

#define SV_NAME   "slp-vectorizer"
 
#define DEBUG_TYPE   "SLP"
 

Functions

 STATISTIC (NumVectorInstructions, "Number of vector instructions generated")
 
static bool isValidElementType (Type *Ty)
 Predicate for the element types that the SLP vectorizer supports. More...
 
static bool allSameBlock (ArrayRef< Value *> VL)
 
static bool allConstant (ArrayRef< Value *> VL)
 
static bool isSplat (ArrayRef< Value *> VL)
 
static Optional< TargetTransformInfo::ShuffleKindisShuffle (ArrayRef< Value *> VL)
 Checks if the vector of instructions can be represented as a shuffle, like: x0 = extractelement <4 x i8> x, i32 0 x3 = extractelement <4 x i8> x, i32 3 y1 = extractelement <4 x i8> y, i32 1 y2 = extractelement <4 x i8> y, i32 2 x0x0 = mul i8 x0, x0 x3x3 = mul i8 x3, x3 y1y1 = mul i8 y1, y1 y2y2 = mul i8 y2, y2 ins1 = insertelement <4 x i8> undef, i8 x0x0, i32 0 ins2 = insertelement <4 x i8> ins1, i8 x3x3, i32 1 ins3 = insertelement <4 x i8> ins2, i8 y1y1, i32 2 ins4 = insertelement <4 x i8> ins3, i8 y2y2, i32 3 ret <4 x i8> ins4 can be transformed into: %1 = shufflevector <4 x i8> x, <4 x i8> y, <4 x i32> <i32 0, i32 3, i32 5, i32 6> %2 = mul <4 x i8> %1, %1 ret <4 x i8> %2 We convert this initially to something like: x0 = extractelement <4 x i8> x, i32 0 x3 = extractelement <4 x i8> x, i32 3 y1 = extractelement <4 x i8> y, i32 1 y2 = extractelement <4 x i8> y, i32 2 %1 = insertelement <4 x i8> undef, i8 x0, i32 0 %2 = insertelement <4 x i8> %1, i8 x3, i32 1 %3 = insertelement <4 x i8> %2, i8 y1, i32 2 %4 = insertelement <4 x i8> %3, i8 y2, i32 3 %5 = mul <4 x i8> %4, %4 %6 = extractelement <4 x i8> %5, i32 0 ins1 = insertelement <4 x i8> undef, i8 %6, i32 0 %7 = extractelement <4 x i8> %5, i32 1 ins2 = insertelement <4 x i8> ins1, i8 %7, i32 1 %8 = extractelement <4 x i8> %5, i32 2 ins3 = insertelement <4 x i8> ins2, i8 %8, i32 2 %9 = extractelement <4 x i8> %5, i32 3 ins4 = insertelement <4 x i8> ins3, i8 %9, i32 3 ret <4 x i8> ins4 InstCombiner transforms this into a shuffle and vector mul. More...
 
static unsigned getAltOpcode (unsigned Op)
 
static bool isOdd (unsigned Value)
 
static bool sameOpcodeOrAlt (unsigned Opcode, unsigned AltOpcode, unsigned CheckedOpcode)
 
static ValueisOneOf (Value *OpValue, Value *Op)
 Chooses the correct key for scheduling data. More...
 
static RawInstructionsData getMainOpcode (ArrayRef< Value *> VL)
 Checks the list of the vectorized instructions VL and returns info about this list. More...
 
static InstructionsState getSameOpcode (ArrayRef< Value *> VL)
 
static bool allSameType (ArrayRef< Value *> VL)
 
static bool matchExtractIndex (Instruction *E, unsigned Idx, unsigned Opcode)
 
static bool InTreeUserNeedToExtract (Value *Scalar, Instruction *UserInst, TargetLibraryInfo *TLI)
 
static MemoryLocation getLocation (Instruction *I, AliasAnalysis *AA)
 
static bool isSimple (Instruction *I)
 
static bool shouldReorderOperands (int i, unsigned Opcode, Instruction &I, ArrayRef< Value *> Left, ArrayRef< Value *> Right, bool AllSameOpcodeLeft, bool AllSameOpcodeRight, bool SplatLeft, bool SplatRight, Value *&VLeft, Value *&VRight)
 
static bool collectValuesToDemote (Value *V, SmallPtrSetImpl< Value *> &Expr, SmallVectorImpl< Value *> &ToDemote, SmallVectorImpl< Value *> &Roots)
 
static bool hasValueBeenRAUWed (ArrayRef< Value *> VL, ArrayRef< WeakTrackingVH > VH, unsigned SliceBegin, unsigned SliceSize)
 Check that the Values in the slice in VL array are still existent in the WeakTrackingVH array. More...
 
static ValuecreateRdxShuffleMask (unsigned VecLen, unsigned NumEltsToRdx, bool IsPairwise, bool IsLeft, IRBuilder<> &Builder)
 Generate a shuffle mask to be used in a reduction tree. More...
 
static bool findBuildVector (InsertElementInst *LastInsertElem, SmallVectorImpl< Value *> &BuildVector, SmallVectorImpl< Value *> &BuildVectorOpds)
 Recognize construction of vectors like ra = insertelement <4 x float> undef, float s0, i32 0 rb = insertelement <4 x float> ra, float s1, i32 1 rc = insertelement <4 x float> rb, float s2, i32 2 rd = insertelement <4 x float> rc, float s3, i32 3 starting from the last insertelement instruction. More...
 
static bool findBuildAggregate (InsertValueInst *IV, SmallVectorImpl< Value *> &BuildVector, SmallVectorImpl< Value *> &BuildVectorOpds)
 Like findBuildVector, but looks for construction of aggregate. More...
 
static bool PhiTypeSorterFunc (Value *V, Value *V2)
 
static ValuegetReductionValue (const DominatorTree *DT, PHINode *P, BasicBlock *ParentBB, LoopInfo *LI)
 Try and get a reduction value from a phi node. More...
 
static bool tryToVectorizeHorReductionOrInstOperands (PHINode *P, Instruction *Root, BasicBlock *BB, BoUpSLP &R, TargetTransformInfo *TTI, const function_ref< bool(Instruction *, BoUpSLP &)> Vectorize)
 Attempt to reduce a horizontal reduction. More...
 

Variables

static cl::opt< int > SLPCostThreshold ("slp-threshold", cl::init(0), cl::Hidden, cl::desc("Only vectorize if you gain more than this " "number "))
 
static cl::opt< boolShouldVectorizeHor ("slp-vectorize-hor", cl::init(true), cl::Hidden, cl::desc("Attempt to vectorize horizontal reductions"))
 
static cl::opt< boolShouldStartVectorizeHorAtStore ("slp-vectorize-hor-store", cl::init(false), cl::Hidden, cl::desc("Attempt to vectorize horizontal reductions feeding into a store"))
 
static cl::opt< int > MaxVectorRegSizeOption ("slp-max-reg-size", cl::init(128), cl::Hidden, cl::desc("Attempt to vectorize for this register size in bits"))
 
static cl::opt< int > ScheduleRegionSizeBudget ("slp-schedule-budget", cl::init(100000), cl::Hidden, cl::desc("Limit the size of the SLP scheduling region per block"))
 Limits the size of scheduling regions in a block. More...
 
static cl::opt< int > MinVectorRegSizeOption ("slp-min-reg-size", cl::init(128), cl::Hidden, cl::desc("Attempt to vectorize for this register size in bits"))
 
static cl::opt< unsignedRecursionMaxDepth ("slp-recursion-max-depth", cl::init(12), cl::Hidden, cl::desc("Limit the recursion depth when building a vectorizable tree"))
 
static cl::opt< unsignedMinTreeSize ("slp-min-tree-size", cl::init(3), cl::Hidden, cl::desc("Only vectorize small trees if they are fully vectorizable"))
 
static cl::opt< boolViewSLPTree ("view-slp-tree", cl::Hidden, cl::desc("Display the SLP trees with Graphviz"))
 
static const unsigned AliasedCheckLimit = 10
 
static const unsigned MaxMemDepDistance = 160
 
static const int MinScheduleRegionSize = 16
 If the ScheduleRegionSizeBudget is exhausted, we allow small scheduling regions to be handled. More...
 
static const char lv_name [] = "SLP Vectorizer"
 

Macro Definition Documentation

◆ DEBUG_TYPE

#define DEBUG_TYPE   "SLP"

Definition at line 105 of file SLPVectorizer.cpp.

◆ SV_NAME

#define SV_NAME   "slp-vectorizer"

Definition at line 104 of file SLPVectorizer.cpp.

Referenced by createRdxShuffleMask(), and hasValueBeenRAUWed().

Function Documentation

◆ allConstant()

static bool allConstant ( ArrayRef< Value *>  VL)
static
Returns
True if all of the values in VL are constants.

Definition at line 195 of file SLPVectorizer.cpp.

Referenced by llvm::slpvectorizer::BoUpSLP::canMapToVector(), and llvm::DOTGraphTraits< BoUpSLP * >::getNodeAttributes().

◆ allSameBlock()

static bool allSameBlock ( ArrayRef< Value *>  VL)
static
Returns
true if all of the instructions in VL are in the same block or false otherwise.

Definition at line 178 of file SLPVectorizer.cpp.

References llvm::dyn_cast(), llvm::Instruction::getParent(), I, and llvm::ArrayRef< T >::size().

Referenced by llvm::slpvectorizer::BoUpSLP::canMapToVector(), and llvm::DOTGraphTraits< BoUpSLP * >::getNodeAttributes().

◆ allSameType()

static bool allSameType ( ArrayRef< Value *>  VL)
static
Returns
true if all of the values in VL have the same type or false otherwise.

Definition at line 445 of file SLPVectorizer.cpp.

References getType(), and llvm::ArrayRef< T >::size().

Referenced by llvm::slpvectorizer::BoUpSLP::canMapToVector(), and llvm::DOTGraphTraits< BoUpSLP * >::getNodeAttributes().

◆ collectValuesToDemote()

static bool collectValuesToDemote ( Value V,
SmallPtrSetImpl< Value *> &  Expr,
SmallVectorImpl< Value *> &  ToDemote,
SmallVectorImpl< Value *> &  Roots 
)
static

◆ createRdxShuffleMask()

static Value* createRdxShuffleMask ( unsigned  VecLen,
unsigned  NumEltsToRdx,
bool  IsPairwise,
bool  IsLeft,
IRBuilder<> &  Builder 
)
static

Generate a shuffle mask to be used in a reduction tree.

Parameters
VecLenThe length of the vector to be reduced.
NumEltsToRdxThe number of elements that should be reduced in the vector.
IsPairwiseWhether the reduction is a pairwise or splitting reduction. A pairwise reduction will generate a mask of <0,2,...> or <1,3,..> while a splitting reduction will generate <2,3, undef,undef> for a vector of 4 and NumElts = 2.
IsLeftTrue will generate a mask of even elements, odd otherwise.

Definition at line 4769 of file SLPVectorizer.cpp.

References llvm::MCID::Add, llvm::SmallVectorImpl< T >::append(), assert(), B, llvm::SmallVectorTemplateCommon< T >::back(), clear(), llvm::MapVector< KeyT, ValueT, MapType, VectorType >::count(), llvm::IRBuilder< T, Inserter >::CreateBinOp(), llvm::IRBuilder< T, Inserter >::CreateExtractElement(), llvm::IRBuilder< T, Inserter >::CreateFCmpOGT(), llvm::IRBuilder< T, Inserter >::CreateFCmpOLT(), llvm::IRBuilder< T, Inserter >::CreateICmpSGT(), llvm::IRBuilder< T, Inserter >::CreateICmpSLT(), llvm::IRBuilder< T, Inserter >::CreateICmpUGT(), llvm::IRBuilder< T, Inserter >::CreateICmpULT(), llvm::IRBuilder< T, Inserter >::CreateSelect(), llvm::IRBuilder< T, Inserter >::CreateShuffleVector(), llvm::createSimpleTargetReduction(), llvm::dbgs(), DEBUG, llvm::dyn_cast(), llvm::SmallVectorBase::empty(), llvm::MapVector< KeyT, ValueT, MapType, VectorType >::end(), llvm::MapVector< KeyT, ValueT, MapType, VectorType >::erase(), llvm::MapVector< KeyT, ValueT, MapType, VectorType >::find(), llvm::VectorType::get(), llvm::ConstantVector::get(), llvm::UndefValue::get(), llvm::TargetTransformInfo::getArithmeticInstrCost(), llvm::TargetTransformInfo::getArithmeticReductionCost(), llvm::TargetTransformInfo::getCmpSelInstrCost(), llvm::Instruction::getDebugLoc(), llvm::IRBuilderBase::getInt32(), llvm::IRBuilderBase::getInt32Ty(), llvm::TargetTransformInfo::getMinMaxReductionCost(), llvm::Instruction::getOpcode(), llvm::User::getOperand(), llvm::Instruction::getParent(), llvm::Value::getType(), llvm::Value::hasNUses(), llvm::Value::hasOneUse(), I, llvm::is_contained(), llvm::Instruction::isAssociative(), isAssociative(), llvm::TargetTransformInfo::ReductionFlags::IsMaxOp, llvm::isPowerOf2_32(), llvm::TargetTransformInfo::ReductionFlags::IsSigned, isValidElementType(), Kind, llvm_unreachable, llvm::PatternMatch::m_BinOp(), llvm::PatternMatch::m_OrdFMax(), llvm::PatternMatch::m_OrdFMin(), llvm::PatternMatch::m_SMax(), llvm::PatternMatch::m_SMin(), llvm::PatternMatch::m_UMax(), llvm::PatternMatch::m_UMin(), llvm::PatternMatch::m_UnordFMax(), llvm::PatternMatch::m_UnordFMin(), llvm::PatternMatch::m_Value(), llvm::makeArrayRef(), llvm::CmpInst::makeCmpResultType(), llvm::PatternMatch::match(), llvm::TargetTransformInfo::ReductionFlags::NoNaN, llvm::User::operands(), llvm::operator!=(), llvm::operator==(), P, llvm::SmallVectorTemplateBase< T, isPodLike< T >::value >::pop_back(), llvm::PowerOf2Floor(), llvm::propagateIRFlags(), llvm::SmallVectorTemplateBase< T, isPodLike< T >::value >::push_back(), llvm::ArrayRef< T >::rbegin(), llvm::ArrayRef< T >::rend(), llvm::Value::replaceAllUsesWith(), llvm::MCID::Select, llvm::IRBuilderBase::SetCurrentDebugLocation(), llvm::FastMathFlags::setFast(), llvm::IRBuilderBase::setFastMathFlags(), SI, llvm::SmallVectorTemplateCommon< T >::size(), SLPCostThreshold, and SV_NAME.

◆ findBuildAggregate()

static bool findBuildAggregate ( InsertValueInst IV,
SmallVectorImpl< Value *> &  BuildVector,
SmallVectorImpl< Value *> &  BuildVectorOpds 
)
static

◆ findBuildVector()

static bool findBuildVector ( InsertElementInst LastInsertElem,
SmallVectorImpl< Value *> &  BuildVector,
SmallVectorImpl< Value *> &  BuildVectorOpds 
)
static

Recognize construction of vectors like ra = insertelement <4 x float> undef, float s0, i32 0 rb = insertelement <4 x float> ra, float s1, i32 1 rc = insertelement <4 x float> rb, float s2, i32 2 rd = insertelement <4 x float> rc, float s3, i32 3 starting from the last insertelement instruction.

Returns true if it matches

Definition at line 5608 of file SLPVectorizer.cpp.

References llvm::SmallVectorTemplateCommon< T, typename >::begin(), llvm::dyn_cast(), llvm::SmallVectorTemplateCommon< T, typename >::end(), llvm::User::getOperand(), llvm::Value::hasOneUse(), llvm::SmallVectorTemplateBase< T, isPodLike >::push_back(), and llvm::reverse().

Referenced by tryToVectorizeHorReductionOrInstOperands().

◆ getAltOpcode()

static unsigned getAltOpcode ( unsigned  Op)
static
Returns
Opcode that can be clubbed with Op to create an alternate sequence which can later be merged as a ShuffleVector instruction.

Definition at line 325 of file SLPVectorizer.cpp.

References llvm::MCID::Add.

Referenced by getSameOpcode(), llvm::slpvectorizer::BoUpSLP::getTreeCost(), isOneOf(), and shouldReorderOperands().

◆ getLocation()

static MemoryLocation getLocation ( Instruction I,
AliasAnalysis AA 
)
static

◆ getMainOpcode()

static RawInstructionsData getMainOpcode ( ArrayRef< Value *>  VL)
static

Checks the list of the vectorized instructions VL and returns info about this list.

Definition at line 379 of file SLPVectorizer.cpp.

References llvm::dyn_cast(), E, llvm::Instruction::getOpcode(), I, and llvm::ArrayRef< T >::size().

Referenced by getSameOpcode().

◆ getReductionValue()

static Value* getReductionValue ( const DominatorTree DT,
PHINode P,
BasicBlock ParentBB,
LoopInfo LI 
)
static

Try and get a reduction value from a phi node.

Given a phi node P in a block ParentBB, consider possible reductions if they come from either ParentBB or a containing loop latch.

Returns
A candidate reduction value if possible, or
nullptr
if not possible.

Definition at line 5660 of file SLPVectorizer.cpp.

References llvm::DominatorTree::dominates(), llvm::dyn_cast(), llvm::PHINode::getIncomingBlock(), llvm::PHINode::getIncomingValue(), llvm::LoopInfoBase< BlockT, LoopT >::getLoopFor(), llvm::LoopBase< BlockT, LoopT >::getLoopLatch(), llvm::Instruction::getParent(), and getParent().

Referenced by tryToVectorizeHorReductionOrInstOperands().

◆ getSameOpcode()

static InstructionsState getSameOpcode ( ArrayRef< Value *>  VL)
static
Returns
analysis of the Instructions in VL described in InstructionsState, the Opcode that we suppose the whole list could be vectorized even if its structure is diverse.

Definition at line 421 of file SLPVectorizer.cpp.

References E, getAltOpcode(), getMainOpcode(), I, isOdd(), and llvm::ArrayRef< T >::size().

Referenced by llvm::slpvectorizer::BoUpSLP::canMapToVector(), llvm::DOTGraphTraits< BoUpSLP * >::getNodeAttributes(), and shouldReorderOperands().

◆ hasValueBeenRAUWed()

static bool hasValueBeenRAUWed ( ArrayRef< Value *>  VL,
ArrayRef< WeakTrackingVH VH,
unsigned  SliceBegin,
unsigned  SliceSize 
)
static

Check that the Values in the slice in VL array are still existent in the WeakTrackingVH array.

Vectorization of part of the VL array may cause later values in the VL array to become invalid. We track when this has happened in the WeakTrackingVH array.

Definition at line 4348 of file SLPVectorizer.cpp.

References assert(), B, llvm::ArrayRef< T >::begin(), llvm::detail::DenseSetImpl< ValueT, SmallDenseMap< ValueT, detail::DenseSetEmpty, InlineBuckets, ValueInfoT, detail::DenseSetPair< ValueT > >, ValueInfoT >::count(), llvm::SetVector< T, Vector, Set >::count(), llvm::dbgs(), DEBUG, llvm::dyn_cast(), llvm::ArrayRef< T >::empty(), llvm::ArrayRef< T >::end(), llvm::lltok::equal, GEP, llvm::VectorType::get(), llvm::Instruction::getOpcode(), llvm::User::getOperand(), llvm::Instruction::getParent(), getType(), llvm::Value::getType(), llvm::GetUnderlyingObject(), llvm::Value::hasOneUse(), I, llvm::SetVector< T, Vector, Set >::insert(), llvm::detail::DenseSetImpl< ValueT, SmallDenseMap< ValueT, detail::DenseSetEmpty, InlineBuckets, ValueInfoT, detail::DenseSetPair< ValueT > >, ValueInfoT >::insert(), llvm::DenseMapBase< SmallDenseMap< KeyT, ValueT, InlineBuckets, KeyInfoT, BucketT >, KeyT, ValueT, KeyInfoT, BucketT >::insert(), llvm::isConsecutiveAccess(), llvm::isPowerOf2_32(), isValidElementType(), llvm::max(), llvm::None, P, llvm::PowerOf2Floor(), llvm::Type::print(), llvm::SmallVectorImpl< T >::resize(), llvm::reverse(), SI, llvm::SmallVectorTemplateCommon< T, typename >::size(), llvm::ArrayRef< T >::size(), llvm::AMDGPU::HSAMD::Kernel::Arg::Key::Size, llvm::ArrayRef< T >::slice(), SLPCostThreshold, llvm::raw_string_ostream::str(), and SV_NAME.

◆ InTreeUserNeedToExtract()

static bool InTreeUserNeedToExtract ( Value Scalar,
Instruction UserInst,
TargetLibraryInfo TLI 
)
static

◆ isOdd()

static bool isOdd ( unsigned  Value)
static

Definition at line 340 of file SLPVectorizer.cpp.

Referenced by getSameOpcode(), and shouldReorderOperands().

◆ isOneOf()

static Value* isOneOf ( Value OpValue,
Value Op 
)
static

Chooses the correct key for scheduling data.

If Op has the same (or alternate) opcode as OpValue, the key is Op. Otherwise the key is OpValue.

Definition at line 352 of file SLPVectorizer.cpp.

References llvm::dyn_cast(), getAltOpcode(), I, and sameOpcodeOrAlt().

Referenced by llvm::slpvectorizer::BoUpSLP::optimizeGatherSequence(), and shouldReorderOperands().

◆ isShuffle()

static Optional<TargetTransformInfo::ShuffleKind> isShuffle ( ArrayRef< Value *>  VL)
static

Checks if the vector of instructions can be represented as a shuffle, like: x0 = extractelement <4 x i8> x, i32 0 x3 = extractelement <4 x i8> x, i32 3 y1 = extractelement <4 x i8> y, i32 1 y2 = extractelement <4 x i8> y, i32 2 x0x0 = mul i8 x0, x0 x3x3 = mul i8 x3, x3 y1y1 = mul i8 y1, y1 y2y2 = mul i8 y2, y2 ins1 = insertelement <4 x i8> undef, i8 x0x0, i32 0 ins2 = insertelement <4 x i8> ins1, i8 x3x3, i32 1 ins3 = insertelement <4 x i8> ins2, i8 y1y1, i32 2 ins4 = insertelement <4 x i8> ins3, i8 y2y2, i32 3 ret <4 x i8> ins4 can be transformed into: %1 = shufflevector <4 x i8> x, <4 x i8> y, <4 x i32> <i32 0, i32 3, i32 5, i32 6> %2 = mul <4 x i8> %1, %1 ret <4 x i8> %2 We convert this initially to something like: x0 = extractelement <4 x i8> x, i32 0 x3 = extractelement <4 x i8> x, i32 3 y1 = extractelement <4 x i8> y, i32 1 y2 = extractelement <4 x i8> y, i32 2 %1 = insertelement <4 x i8> undef, i8 x0, i32 0 %2 = insertelement <4 x i8> %1, i8 x3, i32 1 %3 = insertelement <4 x i8> %2, i8 y1, i32 2 %4 = insertelement <4 x i8> %3, i8 y2, i32 3 %5 = mul <4 x i8> %4, %4 %6 = extractelement <4 x i8> %5, i32 0 ins1 = insertelement <4 x i8> undef, i8 %6, i32 0 %7 = extractelement <4 x i8> %5, i32 1 ins2 = insertelement <4 x i8> ins1, i8 %7, i32 1 %8 = extractelement <4 x i8> %5, i32 2 ins3 = insertelement <4 x i8> ins2, i8 %8, i32 2 %9 = extractelement <4 x i8> %5, i32 3 ins4 = insertelement <4 x i8> ins3, i8 %9, i32 3 ret <4 x i8> ins4 InstCombiner transforms this into a shuffle and vector mul.

Definition at line 250 of file SLPVectorizer.cpp.

References llvm::dyn_cast(), E, I, llvm::None, llvm::ArrayRef< T >::size(), llvm::AMDGPU::HSAMD::Kernel::Arg::Key::Size, llvm::TargetTransformInfo::SK_Alternate, llvm::TargetTransformInfo::SK_PermuteSingleSrc, and llvm::TargetTransformInfo::SK_PermuteTwoSrc.

Referenced by llvm::slpvectorizer::BoUpSLP::canMapToVector().

◆ isSimple()

static bool isSimple ( Instruction I)
static

◆ isSplat()

static bool isSplat ( ArrayRef< Value *>  VL)
static

◆ isValidElementType()

static bool isValidElementType ( Type Ty)
static

Predicate for the element types that the SLP vectorizer supports.

The most important thing to filter here are types which are invalid in LLVM vectors. We also filter target specific types which have absolutely no meaningful vectorization path such as x86_fp80 and ppc_f128. This just avoids spending time checking the cost model and realizing that they will be inevitably scalarized.

Definition at line 171 of file SLPVectorizer.cpp.

References llvm::Type::isPPC_FP128Ty(), llvm::VectorType::isValidElementType(), and llvm::Type::isX86_FP80Ty().

Referenced by llvm::slpvectorizer::BoUpSLP::canMapToVector(), createRdxShuffleMask(), llvm::ArrayType::get(), llvm::VectorType::get(), llvm::PointerType::get(), llvm::VectorType::getDoubleElementsVectorType(), llvm::PointerType::getElementType(), llvm::DOTGraphTraits< BoUpSLP * >::getNodeAttributes(), hasValueBeenRAUWed(), llvm::PointerType::isLoadableOrStorableType(), and llvm::StructType::setBody().

◆ matchExtractIndex()

static bool matchExtractIndex ( Instruction E,
unsigned  Idx,
unsigned  Opcode 
)
static
Returns
True if Extract{Value,Element} instruction extracts element Idx.

Definition at line 455 of file SLPVectorizer.cpp.

References assert(), llvm::dyn_cast(), E, llvm::ExtractValueInst::getNumIndices(), llvm::User::getOperand(), llvm::ConstantInt::getZExtValue(), and llvm::ExtractValueInst::idx_begin().

Referenced by llvm::slpvectorizer::BoUpSLP::canMapToVector().

◆ PhiTypeSorterFunc()

static bool PhiTypeSorterFunc ( Value V,
Value V2 
)
static

Definition at line 5649 of file SLPVectorizer.cpp.

References llvm::Value::getType().

Referenced by tryToVectorizeHorReductionOrInstOperands().

◆ sameOpcodeOrAlt()

static bool sameOpcodeOrAlt ( unsigned  Opcode,
unsigned  AltOpcode,
unsigned  CheckedOpcode 
)
static

◆ shouldReorderOperands()

static bool shouldReorderOperands ( int  i,
unsigned  Opcode,
Instruction I,
ArrayRef< Value *>  Left,
ArrayRef< Value *>  Right,
bool  AllSameOpcodeLeft,
bool  AllSameOpcodeRight,
bool  SplatLeft,
bool  SplatRight,
Value *&  VLeft,
Value *&  VRight 
)
static

Definition at line 2546 of file SLPVectorizer.cpp.

References llvm::MCID::Add, llvm::PHINode::addIncoming(), llvm::all_of(), AS, assert(), llvm::ArrayRef< T >::back(), llvm::ArrayRef< T >::begin(), llvm::MCID::Call, llvm::dbgs(), DEBUG, llvm::dyn_cast(), llvm::ArrayRef< T >::empty(), llvm::ArrayRef< T >::end(), F(), llvm::VectorType::get(), llvm::PointerType::get(), llvm::ConstantVector::get(), llvm::UndefValue::get(), llvm::LoadInst::getAlignment(), llvm::StoreInst::getAlignment(), getAltOpcode(), llvm::CallInst::getArgOperand(), llvm::CallInst::getCalledFunction(), llvm::Instruction::getDebugLoc(), llvm::Intrinsic::getDeclaration(), llvm::BasicBlock::getFirstNonPHI(), llvm::PHINode::getIncomingBlock(), llvm::PHINode::getIncomingValueForBlock(), llvm::Function::getIntrinsicID(), llvm::ilist_node_impl< OptionsT >::getIterator(), llvm::CallInst::getNumArgOperands(), llvm::SequentialType::getNumElements(), llvm::PHINode::getNumIncomingValues(), llvm::Instruction::getOpcode(), llvm::CastInst::getOpcode(), llvm::User::getOperand(), llvm::OperandBundleUser< InstrTy, OpIteratorTy >::getOperandBundlesAsDefs(), llvm::Instruction::getParent(), getParent(), llvm::LoadInst::getPointerAddressSpace(), llvm::StoreInst::getPointerAddressSpace(), llvm::LoadInst::getPointerOperand(), llvm::StoreInst::getPointerOperand(), llvm::Type::getPointerTo(), llvm::PPC::getPredicate(), getSameOpcode(), llvm::BasicBlock::getTerminator(), llvm::Value::getType(), llvm::StoreInst::getValueOperand(), llvm::getVectorIntrinsicIDForCall(), llvm::hasVectorInstrinsicScalarOpd(), I, llvm::SmallSet< T, N, C >::insert(), llvm::Instruction::isBinaryOp(), llvm::Instruction::isCommutative(), llvm::isConsecutiveAccess(), isOdd(), isOneOf(), llvm::AArch64CC::LE, llvm_unreachable, llvm::SPII::Load, llvm::make_range(), llvm::BitmaskEnumDetail::Mask(), llvm::Intrinsic::not_intrinsic, llvm::propagateIRFlags(), llvm::propagateMetadata(), llvm::SmallVectorTemplateBase< T, isPodLike >::push_back(), llvm::SmallVectorTemplateBase< T, isPodLike< T >::value >::push_back(), llvm::RISCVFenceField::R, sameOpcodeOrAlt(), llvm::MCID::Select, llvm::LoadInst::setAlignment(), llvm::StoreInst::setAlignment(), SI, llvm::ArrayRef< T >::size(), llvm::SPII::Store, std::swap(), and llvm::RegState::Undef.

◆ STATISTIC()

STATISTIC ( NumVectorInstructions  ,
"Number of vector instructions generated"   
)

◆ tryToVectorizeHorReductionOrInstOperands()

static bool tryToVectorizeHorReductionOrInstOperands ( PHINode P,
Instruction Root,
BasicBlock BB,
BoUpSLP &  R,
TargetTransformInfo TTI,
const function_ref< bool(Instruction *, BoUpSLP &)>  Vectorize 
)
static

Attempt to reduce a horizontal reduction.

If it is legal to match a horizontal reduction feeding the phi node P with reduction operators Root (or one of its operands) in a basic block BB, then check if it can be done. If horizontal reduction is not found and root instruction is a binary operation, vectorization of the operands is attempted.

Returns
true if a horizontal reduction was matched and reduced or operands of one of the binary instruction were vectorized.
false if a horizontal reduction was not matched (or not possible) or no vectorization of any binary operation feeding Root instruction was performed.

Definition at line 5716 of file SLPVectorizer.cpp.

References assert(), llvm::SmallVectorTemplateCommon< T >::begin(), llvm::ArrayRef< T >::begin(), llvm::BasicBlock::begin(), llvm::SmallSet< T, N, C >::clear(), llvm::SmallVectorImpl< T >::clear(), llvm::SmallSet< T, N, C >::count(), llvm::detail::DenseSetImpl< ValueT, SmallDenseMap< ValueT, detail::DenseSetEmpty, InlineBuckets, ValueInfoT, detail::DenseSetPair< ValueT > >, ValueInfoT >::count(), llvm::dbgs(), DEBUG, llvm::dyn_cast(), E, llvm::SmallVectorTemplateCommon< T >::end(), llvm::ArrayRef< T >::end(), llvm::BasicBlock::end(), llvm::errs(), findBuildAggregate(), findBuildVector(), GEP, llvm::Module::getDataLayout(), llvm::BasicBlock::getModule(), llvm::PHINode::getNumIncomingValues(), llvm::User::getOperand(), llvm::Instruction::getParent(), getReductionValue(), llvm::Value::getType(), I, llvm::SmallSet< T, N, C >::insert(), llvm::detail::DenseSetImpl< ValueT, SmallDenseMap< ValueT, detail::DenseSetEmpty, InlineBuckets, ValueInfoT, detail::DenseSetPair< ValueT > >, ValueInfoT >::insert(), llvm::SmallVectorImpl< T >::insert(), llvm::makeArrayRef(), llvm::None, PhiTypeSorterFunc(), llvm::SmallVectorTemplateBase< T, isPodLike< T >::value >::push_back(), RecursionMaxDepth, llvm::SetVector< T, Vector, Set >::remove(), llvm::reverse(), ShouldStartVectorizeHorAtStore, ShouldVectorizeHor, and SI.

Variable Documentation

◆ AliasedCheckLimit

const unsigned AliasedCheckLimit = 10
static

Definition at line 153 of file SLPVectorizer.cpp.

◆ lv_name

const char lv_name[] = "SLP Vectorizer"
static

Definition at line 6113 of file SLPVectorizer.cpp.

◆ MaxMemDepDistance

const unsigned MaxMemDepDistance = 160
static

Definition at line 158 of file SLPVectorizer.cpp.

◆ MaxVectorRegSizeOption

cl::opt<int> MaxVectorRegSizeOption("slp-max-reg-size", cl::init(128), cl::Hidden, cl::desc("Attempt to vectorize for this register size in bits"))
static

◆ MinScheduleRegionSize

const int MinScheduleRegionSize = 16
static

If the ScheduleRegionSizeBudget is exhausted, we allow small scheduling regions to be handled.

Definition at line 162 of file SLPVectorizer.cpp.

◆ MinTreeSize

cl::opt<unsigned> MinTreeSize("slp-min-tree-size", cl::init(3), cl::Hidden, cl::desc("Only vectorize small trees if they are fully vectorizable"))
static

◆ MinVectorRegSizeOption

cl::opt<int> MinVectorRegSizeOption("slp-min-reg-size", cl::init(128), cl::Hidden, cl::desc("Attempt to vectorize for this register size in bits"))
static

◆ RecursionMaxDepth

cl::opt<unsigned> RecursionMaxDepth("slp-recursion-max-depth", cl::init(12), cl::Hidden, cl::desc("Limit the recursion depth when building a vectorizable tree"))
static

◆ ScheduleRegionSizeBudget

cl::opt<int> ScheduleRegionSizeBudget("slp-schedule-budget", cl::init(100000), cl::Hidden, cl::desc("Limit the size of the SLP scheduling region per block"))
static

Limits the size of scheduling regions in a block.

It avoid long compile times for very large blocks where vector instructions are spread over a wide range. This limit is way higher than needed by real-world functions.

Referenced by llvm::slpvectorizer::BoUpSLP::BlockScheduling::ReadyList::insert().

◆ ShouldStartVectorizeHorAtStore

cl::opt<bool> ShouldStartVectorizeHorAtStore("slp-vectorize-hor-store", cl::init(false), cl::Hidden, cl::desc( "Attempt to vectorize horizontal reductions feeding into a store"))
static

◆ ShouldVectorizeHor

cl::opt<bool> ShouldVectorizeHor("slp-vectorize-hor", cl::init(true), cl::Hidden, cl::desc("Attempt to vectorize horizontal reductions"))
static

◆ SLPCostThreshold

cl::opt<int> SLPCostThreshold("slp-threshold", cl::init(0), cl::Hidden, cl::desc("Only vectorize if you gain more than this " "number "))
static

◆ ViewSLPTree

cl::opt<bool> ViewSLPTree("view-slp-tree", cl::Hidden, cl::desc("Display the SLP trees with Graphviz"))
static