LLVM  6.0.0svn
Namespaces | Macros | Functions | Variables
LoopVectorize.cpp File Reference
#include "llvm/Transforms/Vectorize/LoopVectorize.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/Hashing.h"
#include "llvm/ADT/MapVector.h"
#include "llvm/ADT/Optional.h"
#include "llvm/ADT/SCCIterator.h"
#include "llvm/ADT/SetVector.h"
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/SmallSet.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/ADT/StringExtras.h"
#include "llvm/Analysis/CodeMetrics.h"
#include "llvm/Analysis/GlobalsModRef.h"
#include "llvm/Analysis/LoopInfo.h"
#include "llvm/Analysis/LoopIterator.h"
#include "llvm/Analysis/LoopPass.h"
#include "llvm/Analysis/ScalarEvolutionExpander.h"
#include "llvm/Analysis/ScalarEvolutionExpressions.h"
#include "llvm/Analysis/ValueTracking.h"
#include "llvm/Analysis/VectorUtils.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/DebugInfo.h"
#include "llvm/IR/DerivedTypes.h"
#include "llvm/IR/DiagnosticInfo.h"
#include "llvm/IR/Dominators.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/LLVMContext.h"
#include "llvm/IR/Module.h"
#include "llvm/IR/PatternMatch.h"
#include "llvm/IR/Type.h"
#include "llvm/IR/User.h"
#include "llvm/IR/Value.h"
#include "llvm/IR/ValueHandle.h"
#include "llvm/IR/Verifier.h"
#include "llvm/Pass.h"
#include "llvm/Support/BranchProbability.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Transforms/Scalar.h"
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
#include "llvm/Transforms/Utils/Local.h"
#include "llvm/Transforms/Utils/LoopSimplify.h"
#include "llvm/Transforms/Utils/LoopUtils.h"
#include "llvm/Transforms/Utils/LoopVersioning.h"
#include "llvm/Transforms/Vectorize.h"
#include <algorithm>
#include <map>
#include <tuple>

Go to the source code of this file.

Namespaces

 llvm
 Compute iterated dominance frontiers using a linear time algorithm.
 

Macros

#define LV_NAME   "loop-vectorize"
 
#define DEBUG_TYPE   LV_NAME
 

Functions

 STATISTIC (LoopsVectorized, "Number of loops vectorized")
 
 STATISTIC (LoopsAnalyzed, "Number of loops analyzed for vectorization")
 
static OptimizationRemarkAnalysis createMissedAnalysis (const char *PassName, StringRef RemarkName, Loop *TheLoop, Instruction *I=nullptr)
 Create an analysis remark that explains why vectorization failed. More...
 
static void cse (BasicBlock *BB)
 Perform cse of induction variable instructions. More...
 
static unsigned getScalarizationOverhead (Instruction *I, unsigned VF, const TargetTransformInfo &TTI)
 Estimate the overhead of scalarizing an instruction. More...
 
static unsigned getVectorCallCost (CallInst *CI, unsigned VF, const TargetTransformInfo &TTI, const TargetLibraryInfo *TLI, bool &NeedToScalarize)
 
static unsigned getVectorIntrinsicCost (CallInst *CI, unsigned VF, const TargetTransformInfo &TTI, const TargetLibraryInfo *TLI)
 
static TypesmallestIntegerVectorType (Type *T1, Type *T2)
 
static TypelargestIntegerVectorType (Type *T1, Type *T2)
 
static bool mayDivideByZero (Instruction &I)
 A helper function for checking whether an integer division-related instruction may divide by zero (in which case it must be predicated if executed conditionally in the scalar code). More...
 
static bool canIfConvertPHINodes (BasicBlock *BB)
 Check whether it is safe to if-convert this phi node. More...
 
static TypeconvertPointerToIntegerType (const DataLayout &DL, Type *Ty)
 
static TypegetWiderType (const DataLayout &DL, Type *Ty0, Type *Ty1)
 
static bool hasOutsideLoopUser (const Loop *TheLoop, Instruction *Inst, SmallPtrSetImpl< Value *> &AllowedExit)
 Check that the instruction has outside loop users and is not an identified reduction variable. More...
 
static const SCEVgetAddressAccessSCEV (Value *Ptr, LoopVectorizationLegality *Legal, ScalarEvolution *SE, const Loop *TheLoop)
 Gets Address Access SCEV after verifying that the access pattern is loop invariant except the induction variable dependence. More...
 
static bool isStrideMul (Instruction *I, LoopVectorizationLegality *Legal)
 
Passllvm::createLoopVectorizePass (bool NoUnrolling=false, bool AlwaysVectorize=true)
 
static void AddRuntimeUnrollDisableMetaData (Loop *L)
 

Variables

static cl::opt< boolEnableIfConversion ("enable-if-conversion", cl::init(true), cl::Hidden, cl::desc("Enable if-conversion during vectorization."))
 
static cl::opt< unsignedTinyTripCountVectorThreshold ("vectorizer-min-trip-count", cl::init(16), cl::Hidden, cl::desc("Loops with a constant trip count that is smaller than this " "value are vectorized only if no scalar iteration overheads " "are incurred."))
 Loops with a known constant trip count below this number are vectorized only if no scalar iteration overheads are incurred. More...
 
static cl::opt< boolMaximizeBandwidth ("vectorizer-maximize-bandwidth", cl::init(false), cl::Hidden, cl::desc("Maximize bandwidth when selecting vectorization factor which " "will be determined by the smallest type in loop."))
 
static cl::opt< boolEnableInterleavedMemAccesses ("enable-interleaved-mem-accesses", cl::init(false), cl::Hidden, cl::desc("Enable vectorization on interleaved memory accesses in a loop"))
 
static cl::opt< unsignedMaxInterleaveGroupFactor ("max-interleave-group-factor", cl::Hidden, cl::desc("Maximum factor for an interleaved access group (default = 8)"), cl::init(8))
 Maximum factor for an interleaved memory access. More...
 
static const unsigned TinyTripCountInterleaveThreshold = 128
 We don't interleave loops with a known constant trip count below this number. More...
 
static cl::opt< unsignedForceTargetNumScalarRegs ("force-target-num-scalar-regs", cl::init(0), cl::Hidden, cl::desc("A flag that overrides the target's number of scalar registers."))
 
static cl::opt< unsignedForceTargetNumVectorRegs ("force-target-num-vector-regs", cl::init(0), cl::Hidden, cl::desc("A flag that overrides the target's number of vector registers."))
 
static const unsigned MaxInterleaveFactor = 16
 Maximum vectorization interleave count. More...
 
static cl::opt< unsignedForceTargetMaxScalarInterleaveFactor ("force-target-max-scalar-interleave", cl::init(0), cl::Hidden, cl::desc("A flag that overrides the target's max interleave factor for " "scalar loops."))
 
static cl::opt< unsignedForceTargetMaxVectorInterleaveFactor ("force-target-max-vector-interleave", cl::init(0), cl::Hidden, cl::desc("A flag that overrides the target's max interleave factor for " "vectorized loops."))
 
static cl::opt< unsignedForceTargetInstructionCost ("force-target-instruction-cost", cl::init(0), cl::Hidden, cl::desc("A flag that overrides the target's expected cost for " "an instruction to a single constant value. Mostly " "useful for getting consistent testing."))
 
static cl::opt< unsignedSmallLoopCost ("small-loop-cost", cl::init(20), cl::Hidden, cl::desc("The cost of a loop that is considered 'small' by the interleaver."))
 
static cl::opt< boolLoopVectorizeWithBlockFrequency ("loop-vectorize-with-block-frequency", cl::init(false), cl::Hidden, cl::desc("Enable the use of the block frequency analysis to access PGO " "heuristics minimizing code growth in cold regions and being more " "aggressive in hot regions."))
 
static cl::opt< boolEnableLoadStoreRuntimeInterleave ("enable-loadstore-runtime-interleave", cl::init(true), cl::Hidden, cl::desc("Enable runtime interleaving until load/store ports are saturated"))
 
static cl::opt< unsignedNumberOfStoresToPredicate ("vectorize-num-stores-pred", cl::init(1), cl::Hidden, cl::desc("Max number of stores to be predicated behind an if."))
 The number of stores in a loop that are allowed to need predication. More...
 
static cl::opt< boolEnableIndVarRegisterHeur ("enable-ind-var-reg-heur", cl::init(true), cl::Hidden, cl::desc("Count the induction variable only once when interleaving"))
 
static cl::opt< boolEnableCondStoresVectorization ("enable-cond-stores-vec", cl::init(true), cl::Hidden, cl::desc("Enable if predication of stores during vectorization."))
 
static cl::opt< unsignedMaxNestedScalarReductionIC ("max-nested-scalar-reduction-interleave", cl::init(2), cl::Hidden, cl::desc("The maximum interleave count to use when interleaving a scalar " "reduction in a nested loop."))
 
static cl::opt< unsignedPragmaVectorizeMemoryCheckThreshold ("pragma-vectorize-memory-check-threshold", cl::init(128), cl::Hidden, cl::desc("The maximum allowed number of runtime memory checks with a " "vectorize(enable) pragma."))
 
static cl::opt< unsignedVectorizeSCEVCheckThreshold ("vectorize-scev-check-threshold", cl::init(16), cl::Hidden, cl::desc("The maximum number of SCEV checks allowed."))
 
static cl::opt< unsignedPragmaVectorizeSCEVCheckThreshold ("pragma-vectorize-scev-check-threshold", cl::init(128), cl::Hidden, cl::desc("The maximum number of SCEV checks allowed with a " "vectorize(enable) pragma"))
 
static const char lv_name [] = "Loop Vectorization"
 

Macro Definition Documentation

◆ DEBUG_TYPE

#define DEBUG_TYPE   LV_NAME

Definition at line 108 of file LoopVectorize.cpp.

Referenced by createMissedAnalysis().

◆ LV_NAME

#define LV_NAME   "loop-vectorize"

Function Documentation

◆ AddRuntimeUnrollDisableMetaData()

static void AddRuntimeUnrollDisableMetaData ( Loop L)
static

◆ canIfConvertPHINodes()

static bool canIfConvertPHINodes ( BasicBlock BB)
static

◆ convertPointerToIntegerType()

static Type* convertPointerToIntegerType ( const DataLayout DL,
Type Ty 
)
static

◆ createMissedAnalysis()

static OptimizationRemarkAnalysis createMissedAnalysis ( const char PassName,
StringRef  RemarkName,
Loop TheLoop,
Instruction I = nullptr 
)
static

Create an analysis remark that explains why vectorization failed.

PassName is the name of the pass (e.g. can be AlwaysPrint). RemarkName is the identifier for the remark. If I is passed it is an instruction that prevents vectorization. Otherwise TheLoop is used for the location of the remark.

Returns
the remark object that can be streamed to.

Definition at line 228 of file LoopVectorize.cpp.

References A, llvm::abs(), llvm::MCID::Add, llvm::LoopBase< BlockT, LoopT >::addBasicBlockToLoop(), llvm::LoopBase< BlockT, LoopT >::addChildLoop(), addFastMathFlag(), llvm::PHINode::addIncoming(), llvm::AnalysisUsage::addPreserved(), llvm::AnalysisUsage::addRequired(), llvm::AMDGPU::CodeObject::Kernel::Arg::Key::Align, llvm::OptimizationRemarkAnalysis::AlwaysPrint, llvm::any_of(), llvm::AMDGPU::CodeObject::Kernel::Key::Args, assert(), B, llvm::AMDGPUISD::BFI, llvm::LoopAccessInfo::blockNeedsPredication(), C, llvm::Instruction::clone(), llvm::DILocation::cloneWithDuplicationFactor(), llvm::concatenateVectors(), Context, llvm::DenseMapBase< DenseMap< KeyT, ValueT, KeyInfoT, BucketT >, KeyT, ValueT, KeyInfoT, BucketT >::count(), llvm::CmpInst::Create(), llvm::PHINode::Create(), llvm::BranchInst::Create(), llvm::createInterleaveMask(), llvm::CastInst::CreatePointerCast(), llvm::createStrideMask(), llvm::dbgs(), DEBUG, DEBUG_TYPE, llvm::Loop::dump(), llvm::dyn_cast(), llvm::OptimizationRemarkEmitter::emit(), llvm::LoopBase< BlockT, LoopT >::empty(), llvm::WebAssembly::End, llvm::StringRef::endswith(), llvm::DenseMapBase< DenseMap< KeyT, ValueT, KeyInfoT, BucketT >, KeyT, ValueT, KeyInfoT, BucketT >::erase(), llvm::Instruction::eraseFromParent(), llvm::SCEVExpander::expandCodeFor(), F, llvm::MapVector< KeyT, ValueT, MapType, VectorType >::find(), Flags, llvm::raw_ostream::flush(), function, llvm::ConstantInt::get(), llvm::ConstantFP::get(), llvm::VectorType::get(), llvm::ConstantAsMetadata::get(), llvm::ConstantVector::get(), llvm::MDString::get(), llvm::MDNode::get(), llvm::UndefValue::get(), llvm::DataLayout::getABITypeAlignment(), getAlignment(), llvm::PHINode::getBasicBlockIndex(), llvm::CastInst::getCastOpcode(), llvm::ScalarEvolution::getContext(), llvm::Module::getDataLayout(), llvm::Instruction::getDebugLoc(), llvm::LoopAccessInfo::getDepChecker(), llvm::MemoryDepChecker::getDependences(), llvm::LoopBase< BlockT, LoopT >::getExitBlock(), llvm::BasicBlock::getFirstInsertionPt(), llvm::LoopBase< BlockT, LoopT >::getHeader(), llvm::PHINode::getIncomingValueForBlock(), llvm::InductionDescriptor::getInductionOpcode(), llvm::Type::getInt32Ty(), llvm::LoopBase< BlockT, LoopT >::getLoopLatch(), llvm::LoopBase< BlockT, LoopT >::getLoopPreheader(), llvm::LoopAccessInfo::getMaxSafeDepDistBytes(), llvm::Instruction::getModule(), llvm::BasicBlock::getModule(), llvm::Module::getModuleIdentifier(), llvm::Value::getName(), llvm::User::getNumOperands(), llvm::MDNode::getNumOperands(), llvm::Instruction::getOpcode(), llvm::User::getOperand(), llvm::MDNode::getOperand(), llvm::Instruction::getParent(), llvm::BasicBlock::getParent(), llvm::GlobalValue::getParent(), llvm::LoopBase< BlockT, LoopT >::getParentLoop(), llvm::PassRegistry::getPassRegistry(), getPointerOperand(), llvm::Type::getPointerTo(), llvm::Type::getPrimitiveSizeInBits(), llvm::getPtrStride(), llvm::Type::getScalarType(), llvm::PredicatedScalarEvolution::getSE(), llvm::ConstantInt::getSigned(), llvm::ConstantVector::getSplat(), llvm::Loop::getStartLoc(), llvm::InductionDescriptor::getStartValue(), llvm::InductionDescriptor::getStep(), llvm::MDString::getString(), llvm::BasicBlock::getTerminator(), llvm::SCEV::getType(), llvm::Value::getType(), llvm::DataLayout::getTypeAllocSize(), llvm::DataLayout::getTypeAllocSizeInBits(), llvm::DataLayout::getTypeSizeInBits(), llvm::DataLayout::getTypeStoreSize(), llvm::Type::getVectorNumElements(), llvm::ScaledNumbers::getWidth(), llvm::ConstantInt::getZExtValue(), H, llvm::hash_combine(), llvm::hash_combine_range(), I, llvm::CmpInst::ICMP_EQ, llvm::CmpInst::ICMP_ULE, llvm::CmpInst::ICMP_ULT, llvm::initializeLoopVectorizePass(), llvm::SmallSet< T, N, C >::insert(), llvm::DenseMapBase< DenseMap< KeyT, ValueT, KeyInfoT, BucketT >, KeyT, ValueT, KeyInfoT, BucketT >::insert(), llvm::Type::isAggregateType(), isEqual(), llvm::Type::isFloatingPointTy(), llvm::Instruction::isIdenticalTo(), llvm::Type::isIntegerTy(), llvm::VectorizerParams::isInterleaveForced(), llvm::TargetTransformInfo::isLegalMaskedGather(), llvm::TargetTransformInfo::isLegalMaskedLoad(), llvm::TargetTransformInfo::isLegalMaskedScatter(), llvm::TargetTransformInfo::isLegalMaskedStore(), llvm::isPowerOf2_32(), llvm::HexagonMCInstrInfo::isPredicated(), llvm::TargetTransformInfo::isTruncateFree(), llvm::Type::isVectorTy(), llvm::Type::isVoidTy(), Kind, List, llvm_unreachable, llvm::DenseMapBase< DenseMap< KeyT, ValueT, KeyInfoT, BucketT >, KeyT, ValueT, KeyInfoT, BucketT >::lookup(), LV_NAME, llvm::make_range(), llvm::BitmaskEnumDetail::Mask(), llvm::max(), MaxInterleaveFactor, MaxInterleaveGroupFactor, llvm::VectorizerParams::MaxVectorWidth, Name, llvm::StringRef::npos, op, llvm::User::op_begin(), llvm::User::op_end(), P, llvm::HexagonII::PostInc, PragmaVectorizeMemoryCheckThreshold, llvm::cl::Prefix, llvm::propagateMetadata(), Ptr, llvm::SmallVectorTemplateBase< T, isPodLike >::push_back(), llvm::SmallVectorTemplateBase< T, isPodLike< T >::value >::push_back(), llvm::ReplaceInstWithInst(), llvm::MDNode::replaceOperandWith(), llvm::LoopVectorizePass::runImpl(), llvm::VectorizerParams::RuntimeMemoryCheckThreshold, llvm::NVPTX::PTXLdStInstCode::Scalar, llvm::IRBuilderBase::SetCurrentDebugLocation(), llvm::PHINode::setIncomingValue(), llvm::Loop::setLoopID(), llvm::Value::setName(), llvm::User::setOperand(), llvm::FastMathFlags::setUnsafeAlgebra(), SI, llvm::cl::Sink, llvm::SmallVectorTemplateCommon< T >::size(), llvm::ArrayRef< T >::size(), llvm::AMDGPU::CodeObject::Kernel::Arg::Key::Size, llvm::BasicBlock::splitBasicBlock(), llvm::StringRef::startswith(), llvm::Twine::str(), llvm::StringRef::substr(), llvm::InductionDescriptor::transform(), llvm::Value::users(), llvm::User::value_op_begin(), llvm::User::value_op_end(), llvm::VectorizerParams::VectorizationFactor, VectorizationFactor, and llvm::VectorizerParams::VectorizationInterleave.

Referenced by canIfConvertPHINodes(), hasOutsideLoopUser(), and llvm::LoopVectorizePass::processLoop().

◆ cse()

static void cse ( BasicBlock BB)
static

◆ getAddressAccessSCEV()

static const SCEV* getAddressAccessSCEV ( Value Ptr,
LoopVectorizationLegality *  Legal,
ScalarEvolution SE,
const Loop TheLoop 
)
static

Gets Address Access SCEV after verifying that the access pattern is loop invariant except the induction variable dependence.

This SCEV can be sent to the Target in order to estimate the address calculation cost.

Definition at line 7004 of file LoopVectorize.cpp.

References llvm::dyn_cast(), llvm::User::getNumOperands(), llvm::ScalarEvolution::getSCEV(), llvm::ScalarEvolution::isLoopInvariant(), and Ptr.

Referenced by isStrideMul().

◆ getScalarizationOverhead()

static unsigned getScalarizationOverhead ( Instruction I,
unsigned  VF,
const TargetTransformInfo TTI 
)
static

Estimate the overhead of scalarizing an instruction.

This is a convenience wrapper for the type-based getScalarizationOverhead API.

Definition at line 3674 of file LoopVectorize.cpp.

References llvm::TargetTransformInfo::getOperandsScalarizationOverhead(), llvm::TargetTransformInfo::getScalarizationOverhead(), llvm::Value::getType(), I, llvm::Type::isVoidTy(), llvm::User::operand_values(), and llvm::TargetTransformInfo::supportsEfficientVectorElementLoadStore().

Referenced by getVectorCallCost(), and isStrideMul().

◆ getVectorCallCost()

static unsigned getVectorCallCost ( CallInst CI,
unsigned  VF,
const TargetTransformInfo TTI,
const TargetLibraryInfo TLI,
bool NeedToScalarize 
)
static

◆ getVectorIntrinsicCost()

static unsigned getVectorIntrinsicCost ( CallInst CI,
unsigned  VF,
const TargetTransformInfo TTI,
const TargetLibraryInfo TLI 
)
static

◆ getWiderType()

static Type* getWiderType ( const DataLayout DL,
Type Ty0,
Type Ty1 
)
static

◆ hasOutsideLoopUser()

static bool hasOutsideLoopUser ( const Loop TheLoop,
Instruction Inst,
SmallPtrSetImpl< Value *> &  AllowedExit 
)
static

Check that the instruction has outside loop users and is not an identified reduction variable.

Definition at line 5245 of file LoopVectorize.cpp.

References A, llvm::AMDGPU::CodeObject::Kernel::Arg::Key::Align, llvm::all_of(), assert(), B, llvm::SetVector< T, Vector, Set >::begin(), llvm::sys::path::begin(), llvm::LoopBlocksDFS::beginRPO(), llvm::LoopAccessInfo::blockNeedsPredication(), C, llvm::computeMinimumValueSizes(), llvm::LoopBase< BlockT, LoopT >::contains(), convertPointerToIntegerType(), llvm::SmallSet< T, N, C >::count(), llvm::DenseMapBase< DenseMap< KeyT, ValueT, KeyInfoT, BucketT >, KeyT, ValueT, KeyInfoT, BucketT >::count(), llvm::SetVector< T, Vector, Set >::count(), llvm::SmallPtrSetImpl< PtrType >::count(), createMissedAnalysis(), llvm::dbgs(), DEBUG, DFS(), llvm::dyn_cast(), E, llvm::SmallVectorBase::empty(), llvm::MapVector< KeyT, ValueT, MapType, VectorType >::empty(), EnableCondStoresVectorization, EnableIndVarRegisterHeur, EnableLoadStoreRuntimeInterleave, llvm::SetVector< T, Vector, Set >::end(), llvm::sys::path::end(), llvm::LoopBlocksDFS::endRPO(), llvm::SmallSet< T, N, C >::erase(), F, ForceTargetInstructionCost, ForceTargetMaxScalarInterleaveFactor, ForceTargetMaxVectorInterleaveFactor, ForceTargetNumScalarRegs, ForceTargetNumVectorRegs, llvm::SCEVConstant::getAPInt(), llvm::InductionDescriptor::getConstIntStepValue(), llvm::Module::getDataLayout(), llvm::PointerType::getElementType(), llvm::Function::getFnAttribute(), llvm::PHINode::getIncomingValueForBlock(), llvm::InductionDescriptor::getKind(), llvm::RecurrenceDescriptor::getLoopExitInstr(), llvm::Instruction::getModule(), llvm::PHINode::getNumIncomingValues(), llvm::Instruction::getOpcode(), llvm::User::getOperand(), llvm::Instruction::getParent(), llvm::BasicBlock::getParent(), getPointerOperand(), llvm::getPtrStride(), llvm::RecurrenceDescriptor::getRecurrenceType(), llvm::Type::getScalarType(), llvm::APInt::getSExtValue(), llvm::InductionDescriptor::getStartValue(), llvm::BasicBlock::getTerminator(), llvm::Value::getType(), llvm::DataLayout::getTypeSizeInBits(), llvm::RecurrenceDescriptor::getUnsafeAlgebraInst(), llvm::InductionDescriptor::getUnsafeAlgebraInst(), llvm::Attribute::getValueAsString(), llvm::getVectorIntrinsicIDForCall(), getWiderType(), llvm::Value::hasOneUse(), llvm::RecurrenceDescriptor::hasUnsafeAlgebra(), llvm::InductionDescriptor::hasUnsafeAlgebra(), llvm::Instruction::hasUnsafeAlgebra(), llvm::hasVectorInstrinsicScalarOpd(), I, llvm::InductionDescriptor::IK_IntInduction, llvm::InductionDescriptor::IK_PtrInduction, llvm::SmallSet< T, N, C >::insert(), llvm::SetVector< T, SmallVector< T, N >, SmallDenseSet< T, N > >::insert(), llvm::SetVector< T, Vector, Set >::insert(), llvm::SmallPtrSetImpl< PtrType >::insert(), llvm::Instruction::isBinaryOp(), llvm::RecurrenceDescriptor::isFirstOrderRecurrence(), llvm::Type::isFloatingPointTy(), llvm::InductionDescriptor::isInductionPHI(), llvm::Type::isIntegerTy(), llvm::ConstantInt::isOne(), llvm::Type::isPointerTy(), llvm::HexagonMCInstrInfo::isPredicated(), llvm::RecurrenceDescriptor::isReductionPHI(), llvm::VectorType::isValidElementType(), llvm::Type::isVoidTy(), List, llvm::SPII::Load, llvm::make_range(), llvm::max(), MaximizeBandwidth, MaxNestedScalarReductionIC, mayDivideByZero(), llvm::Instruction::mayReadFromMemory(), llvm::Instruction::mayThrow(), llvm::Instruction::mayWriteToMemory(), llvm::None, NumberOfStoresToPredicate, llvm::User::operand_values(), llvm::User::operands(), llvm::LoopBlocksDFS::perform(), llvm::SmallVectorImpl< T >::pop_back_val(), llvm::PowerOf2Floor(), Ptr, llvm::SmallVectorTemplateBase< T, isPodLike< T >::value >::push_back(), llvm::SmallVectorTemplateBase< T, isPodLike >::push_back(), llvm::MapVector< KeyT, ValueT, MapType, VectorType >::rbegin(), llvm::RegUsage, llvm::SetVector< T, SmallVector< T, N >, SmallDenseSet< T, N > >::remove(), llvm::MapVector< KeyT, ValueT, MapType, VectorType >::rend(), llvm::replaceSymbolicStrideSCEV(), SI, llvm::SmallSet< T, N, C >::size(), llvm::SetVector< T, Vector, Set >::size(), llvm::SmallPtrSetImplBase::size(), llvm::ArrayRef< T >::size(), llvm::AMDGPU::CodeObject::Kernel::Arg::Key::Size, llvm::BasicBlock::size(), SmallLoopCost, llvm::ARM_MB::ST, llvm::SPII::Store, llvm::Value::users(), VectorizationFactor, and llvm::AArch64CC::VS.

◆ isStrideMul()

static bool isStrideMul ( Instruction I,
LoopVectorizationLegality *  Legal 
)
static

Definition at line 7027 of file LoopVectorize.cpp.

References llvm::MCID::Add, AS, assert(), C, llvm::MCID::Call, llvm::dyn_cast(), llvm::SmallVectorBase::empty(), first, llvm::IntegerType::get(), llvm::VectorType::get(), getAddressAccessSCEV(), llvm::LoadInst::getAlignment(), llvm::SelectInst::getCondition(), llvm::Type::getContext(), llvm::Value::getContext(), llvm::CastInst::getDestTy(), llvm::Type::getInt1Ty(), llvm::PHINode::getNumIncomingValues(), llvm::Instruction::getOpcode(), llvm::User::getOperand(), llvm::Instruction::getParent(), llvm::LoadInst::getPointerAddressSpace(), getPointerOperand(), getScalarizationOverhead(), llvm::Type::getScalarType(), llvm::getSplatValue(), llvm::CastInst::getSrcTy(), llvm::BranchInst::getSuccessor(), llvm::Value::getType(), llvm::ConstantInt::getValue(), getVectorCallCost(), getVectorIntrinsicCost(), llvm::getVectorIntrinsicIDForCall(), I, llvm::SmallPtrSetImpl< PtrType >::insert(), llvm::BranchInst::isConditional(), llvm::APInt::isPowerOf2(), llvm::Type::isVectorTy(), largestIntegerVectorType(), LLVM_FALLTHROUGH, llvm::SPII::Load, N, llvm::TargetTransformInfo::OK_AnyValue, llvm::TargetTransformInfo::OK_NonUniformConstantValue, llvm::TargetTransformInfo::OK_UniformConstantValue, llvm::TargetTransformInfo::OK_UniformValue, llvm::TargetTransformInfo::OP_None, llvm::TargetTransformInfo::OP_PowerOf2, llvm::User::operand_values(), llvm::User::operands(), llvm::SmallVectorImpl< T >::pop_back_val(), Ptr, llvm::SmallVectorTemplateBase< T, isPodLike >::push_back(), llvm::SmallVectorTemplateBase< T, isPodLike< T >::value >::push_back(), llvm::MCID::Select, SI, llvm::TargetTransformInfo::SK_Broadcast, llvm::TargetTransformInfo::SK_ExtractSubvector, llvm::TargetTransformInfo::SK_Reverse, smallestIntegerVectorType(), and llvm::SPII::Store.

◆ largestIntegerVectorType()

static Type* largestIntegerVectorType ( Type T1,
Type T2 
)
static

Definition at line 3770 of file LoopVectorize.cpp.

References addFastMathFlag(), llvm::PHINode::addIncoming(), llvm::all_of(), assert(), B, llvm::SetVector< T, Vector, Set >::begin(), C, llvm::SetVector< T, Vector, Set >::clear(), llvm::SmallPtrSetImpl< PtrType >::count(), llvm::PHINode::Create(), llvm::IRBuilder< T, Inserter >::CreateBinOp(), llvm::IRBuilder< T, Inserter >::CreateExtractElement(), llvm::IRBuilder< T, Inserter >::CreateICmp(), llvm::IRBuilder< T, Inserter >::CreateInsertElement(), llvm::RecurrenceDescriptor::createMinMaxOp(), llvm::IRBuilder< T, Inserter >::CreateSelect(), llvm::IRBuilder< T, Inserter >::CreateSExtOrTrunc(), llvm::IRBuilder< T, Inserter >::CreateShuffleVector(), llvm::createTargetReduction(), llvm::IRBuilder< T, Inserter >::CreateZExtOrTrunc(), cse(), DEBUG, llvm::dyn_cast(), E, llvm::SetVector< T, Vector, Set >::end(), llvm::Instruction::eraseFromParent(), llvm::IntegerType::get(), llvm::ConstantInt::get(), llvm::VectorType::get(), llvm::ConstantVector::get(), llvm::UndefValue::get(), llvm::PHINode::getBasicBlockIndex(), llvm::BranchInst::getCondition(), llvm::BasicBlock::getContext(), llvm::Type::getContext(), llvm::Module::getDataLayout(), llvm::PHINode::getIncomingBlock(), llvm::PHINode::getIncomingValue(), llvm::PHINode::getIncomingValueForBlock(), llvm::PHINode::getIncomingValueNumForOperand(), llvm::Type::getInt1Ty(), llvm::Instruction::getModule(), llvm::PHINode::getNumIncomingValues(), llvm::User::getOperand(), llvm::Instruction::getParent(), llvm::RecurrenceDescriptor::getRecurrenceBinOp(), llvm::RecurrenceDescriptor::getRecurrenceIdentity(), llvm::RecurrenceDescriptor::getRecurrenceKind(), llvm::Type::getScalarType(), llvm::BasicBlock::getSingleSuccessor(), llvm::ConstantVector::getSplat(), llvm::BranchInst::getSuccessor(), llvm::BasicBlock::getTerminator(), llvm::Value::getType(), llvm::Type::getVectorElementType(), llvm::Type::getVectorNumElements(), I, llvm::ARM_PROC::IE, llvm::InductionDescriptor::IK_FpInduction, llvm::InductionDescriptor::IK_IntInduction, llvm::InductionDescriptor::IK_NoInduction, llvm::InductionDescriptor::IK_PtrInduction, llvm::tgtok::In, llvm::SmallPtrSetImpl< PtrType >::insert(), llvm::is_contained(), llvm::BranchInst::isConditional(), llvm::Type::isPointerTy(), llvm_unreachable, llvm::Instruction::mayHaveSideEffects(), llvm::Instruction::moveBefore(), llvm::User::op_begin(), llvm::User::op_end(), P, llvm::pred_begin(), llvm::pred_end(), llvm::predecessors(), llvm::Value::replaceAllUsesWith(), llvm::RecurrenceDescriptor::RK_FloatMinMax, llvm::RecurrenceDescriptor::RK_IntegerMinMax, llvm::PHINode::setIncomingValue(), llvm::Value::setName(), SI, smallestIntegerVectorType(), llvm::SplitBlockAndInsertIfThen(), T1, llvm::Value::takeName(), llvm::Value::use_empty(), llvm::Value::uses(), and Zero.

Referenced by isStrideMul().

◆ mayDivideByZero()

static bool mayDivideByZero ( Instruction I)
static

◆ smallestIntegerVectorType()

static Type* smallestIntegerVectorType ( Type T1,
Type T2 
)
static

Definition at line 3765 of file LoopVectorize.cpp.

References llvm::Type::getVectorElementType(), and T1.

Referenced by isStrideMul(), and largestIntegerVectorType().

◆ STATISTIC() [1/2]

STATISTIC ( LoopsVectorized  ,
"Number of loops vectorized"   
)

◆ STATISTIC() [2/2]

STATISTIC ( LoopsAnalyzed  ,
"Number of loops analyzed for vectorization"   
)

Variable Documentation

◆ EnableCondStoresVectorization

cl::opt<bool> EnableCondStoresVectorization("enable-cond-stores-vec", cl::init(true), cl::Hidden, cl::desc("Enable if predication of stores during vectorization."))
static

Referenced by hasOutsideLoopUser().

◆ EnableIfConversion

cl::opt<bool> EnableIfConversion("enable-if-conversion", cl::init(true), cl::Hidden, cl::desc("Enable if-conversion during vectorization."))
static

Referenced by canIfConvertPHINodes().

◆ EnableIndVarRegisterHeur

cl::opt<bool> EnableIndVarRegisterHeur("enable-ind-var-reg-heur", cl::init(true), cl::Hidden, cl::desc("Count the induction variable only once when interleaving"))
static

Referenced by hasOutsideLoopUser().

◆ EnableInterleavedMemAccesses

cl::opt<bool> EnableInterleavedMemAccesses("enable-interleaved-mem-accesses", cl::init(false), cl::Hidden, cl::desc("Enable vectorization on interleaved memory accesses in a loop"))
static

Referenced by canIfConvertPHINodes().

◆ EnableLoadStoreRuntimeInterleave

cl::opt<bool> EnableLoadStoreRuntimeInterleave("enable-loadstore-runtime-interleave", cl::init(true), cl::Hidden, cl::desc( "Enable runtime interleaving until load/store ports are saturated"))
static

Referenced by hasOutsideLoopUser().

◆ ForceTargetInstructionCost

cl::opt<unsigned> ForceTargetInstructionCost("force-target-instruction-cost", cl::init(0), cl::Hidden, cl::desc("A flag that overrides the target's expected cost for " "an instruction to a single constant value. Mostly " "useful for getting consistent testing."))
static

Referenced by hasOutsideLoopUser().

◆ ForceTargetMaxScalarInterleaveFactor

cl::opt<unsigned> ForceTargetMaxScalarInterleaveFactor("force-target-max-scalar-interleave", cl::init(0), cl::Hidden, cl::desc("A flag that overrides the target's max interleave factor for " "scalar loops."))
static

Referenced by hasOutsideLoopUser().

◆ ForceTargetMaxVectorInterleaveFactor

cl::opt<unsigned> ForceTargetMaxVectorInterleaveFactor("force-target-max-vector-interleave", cl::init(0), cl::Hidden, cl::desc("A flag that overrides the target's max interleave factor for " "vectorized loops."))
static

Referenced by hasOutsideLoopUser().

◆ ForceTargetNumScalarRegs

cl::opt<unsigned> ForceTargetNumScalarRegs("force-target-num-scalar-regs", cl::init(0), cl::Hidden, cl::desc("A flag that overrides the target's number of scalar registers."))
static

Referenced by hasOutsideLoopUser().

◆ ForceTargetNumVectorRegs

cl::opt<unsigned> ForceTargetNumVectorRegs("force-target-num-vector-regs", cl::init(0), cl::Hidden, cl::desc("A flag that overrides the target's number of vector registers."))
static

Referenced by hasOutsideLoopUser().

◆ LoopVectorizeWithBlockFrequency

cl::opt<bool> LoopVectorizeWithBlockFrequency("loop-vectorize-with-block-frequency", cl::init(false), cl::Hidden, cl::desc("Enable the use of the block frequency analysis to access PGO " "heuristics minimizing code growth in cold regions and being more " "aggressive in hot regions."))
static

◆ lv_name

const char lv_name[] = "Loop Vectorization"
static

Definition at line 7554 of file LoopVectorize.cpp.

◆ MaximizeBandwidth

cl::opt<bool> MaximizeBandwidth("vectorizer-maximize-bandwidth", cl::init(false), cl::Hidden, cl::desc("Maximize bandwidth when selecting vectorization factor which " "will be determined by the smallest type in loop."))
static

Referenced by hasOutsideLoopUser().

◆ MaxInterleaveFactor

const unsigned MaxInterleaveFactor = 16
static

Maximum vectorization interleave count.

Definition at line 153 of file LoopVectorize.cpp.

Referenced by createMissedAnalysis().

◆ MaxInterleaveGroupFactor

cl::opt<unsigned> MaxInterleaveGroupFactor("max-interleave-group-factor", cl::Hidden, cl::desc("Maximum factor for an interleaved access group (default = 8)"), cl::init(8))
static

Maximum factor for an interleaved memory access.

Referenced by createMissedAnalysis().

◆ MaxNestedScalarReductionIC

cl::opt<unsigned> MaxNestedScalarReductionIC("max-nested-scalar-reduction-interleave", cl::init(2), cl::Hidden, cl::desc("The maximum interleave count to use when interleaving a scalar " "reduction in a nested loop."))
static

Referenced by hasOutsideLoopUser().

◆ NumberOfStoresToPredicate

cl::opt<unsigned> NumberOfStoresToPredicate("vectorize-num-stores-pred", cl::init(1), cl::Hidden, cl::desc("Max number of stores to be predicated behind an if."))
static

The number of stores in a loop that are allowed to need predication.

Referenced by hasOutsideLoopUser().

◆ PragmaVectorizeMemoryCheckThreshold

cl::opt<unsigned> PragmaVectorizeMemoryCheckThreshold("pragma-vectorize-memory-check-threshold", cl::init(128), cl::Hidden, cl::desc("The maximum allowed number of runtime memory checks with a " "vectorize(enable) pragma."))
static

Referenced by createMissedAnalysis().

◆ PragmaVectorizeSCEVCheckThreshold

cl::opt<unsigned> PragmaVectorizeSCEVCheckThreshold("pragma-vectorize-scev-check-threshold", cl::init(128), cl::Hidden, cl::desc("The maximum number of SCEV checks allowed with a " "vectorize(enable) pragma"))
static

Referenced by canIfConvertPHINodes().

◆ SmallLoopCost

cl::opt<unsigned> SmallLoopCost("small-loop-cost", cl::init(20), cl::Hidden, cl::desc( "The cost of a loop that is considered 'small' by the interleaver."))
static

Referenced by hasOutsideLoopUser().

◆ TinyTripCountInterleaveThreshold

const unsigned TinyTripCountInterleaveThreshold = 128
static

We don't interleave loops with a known constant trip count below this number.

Definition at line 142 of file LoopVectorize.cpp.

◆ TinyTripCountVectorThreshold

cl::opt<unsigned> TinyTripCountVectorThreshold("vectorizer-min-trip-count", cl::init(16), cl::Hidden, cl::desc("Loops with a constant trip count that is smaller than this " "value are vectorized only if no scalar iteration overheads " "are incurred."))
static

Loops with a known constant trip count below this number are vectorized only if no scalar iteration overheads are incurred.

Referenced by llvm::LoopVectorizePass::processLoop().

◆ VectorizeSCEVCheckThreshold

cl::opt<unsigned> VectorizeSCEVCheckThreshold("vectorize-scev-check-threshold", cl::init(16), cl::Hidden, cl::desc("The maximum number of SCEV checks allowed."))
static

Referenced by canIfConvertPHINodes().