|
LLVM
3.7.0
|
#include "llvm/Transforms/Vectorize.h"#include "llvm/ADT/DenseMap.h"#include "llvm/ADT/EquivalenceClasses.h"#include "llvm/ADT/Hashing.h"#include "llvm/ADT/MapVector.h"#include "llvm/ADT/SetVector.h"#include "llvm/ADT/SmallPtrSet.h"#include "llvm/ADT/SmallSet.h"#include "llvm/ADT/SmallVector.h"#include "llvm/ADT/Statistic.h"#include "llvm/ADT/StringExtras.h"#include "llvm/Analysis/AliasAnalysis.h"#include "llvm/Analysis/AliasSetTracker.h"#include "llvm/Analysis/AssumptionCache.h"#include "llvm/Analysis/BlockFrequencyInfo.h"#include "llvm/Analysis/CodeMetrics.h"#include "llvm/Analysis/LoopAccessAnalysis.h"#include "llvm/Analysis/LoopInfo.h"#include "llvm/Analysis/LoopIterator.h"#include "llvm/Analysis/LoopPass.h"#include "llvm/Analysis/ScalarEvolution.h"#include "llvm/Analysis/ScalarEvolutionExpander.h"#include "llvm/Analysis/ScalarEvolutionExpressions.h"#include "llvm/Analysis/TargetTransformInfo.h"#include "llvm/Analysis/ValueTracking.h"#include "llvm/IR/Constants.h"#include "llvm/IR/DataLayout.h"#include "llvm/IR/DebugInfo.h"#include "llvm/IR/DerivedTypes.h"#include "llvm/IR/DiagnosticInfo.h"#include "llvm/IR/Dominators.h"#include "llvm/IR/Function.h"#include "llvm/IR/IRBuilder.h"#include "llvm/IR/Instructions.h"#include "llvm/IR/IntrinsicInst.h"#include "llvm/IR/LLVMContext.h"#include "llvm/IR/Module.h"#include "llvm/IR/PatternMatch.h"#include "llvm/IR/Type.h"#include "llvm/IR/Value.h"#include "llvm/IR/ValueHandle.h"#include "llvm/IR/Verifier.h"#include "llvm/Pass.h"#include "llvm/Support/BranchProbability.h"#include "llvm/Support/CommandLine.h"#include "llvm/Support/Debug.h"#include "llvm/Support/raw_ostream.h"#include "llvm/Transforms/Scalar.h"#include "llvm/Transforms/Utils/BasicBlockUtils.h"#include "llvm/Transforms/Utils/Local.h"#include "llvm/Analysis/VectorUtils.h"#include "llvm/Transforms/Utils/LoopUtils.h"#include <algorithm>#include <map>#include <tuple>Go to the source code of this file.
Namespaces | |
| llvm | |
| Compute iterated dominance frontiers using a linear time algorithm. | |
Macros | |
| #define | LV_NAME "loop-vectorize" |
| #define | DEBUG_TYPE LV_NAME |
Functions | |
| STATISTIC (LoopsVectorized,"Number of loops vectorized") | |
| STATISTIC (LoopsAnalyzed,"Number of loops analyzed for vectorization") | |
| static Constant * | getInterleavedMask (IRBuilder<> &Builder, unsigned VF, unsigned NumVec) |
| static Constant * | getStridedMask (IRBuilder<> &Builder, unsigned Start, unsigned Stride, unsigned VF) |
| static Constant * | getSequentialMask (IRBuilder<> &Builder, unsigned NumInt, unsigned NumUndef) |
| static Value * | ConcatenateTwoVectors (IRBuilder<> &Builder, Value *V1, Value *V2) |
| static Value * | ConcatenateVectors (IRBuilder<> &Builder, ArrayRef< Value * > InputList) |
| static Instruction * | getFirstInst (Instruction *FirstInst, Value *V, Instruction *Loc) |
| static bool | isPredicatedBlock (unsigned BlockNum) |
| Check whether this block is a predicated block. More... | |
| static void | cse (SmallVector< BasicBlock *, 4 > &BBs) |
| Perform cse of induction variable instructions. More... | |
| static Value * | addFastMathFlag (Value *V) |
| Adds a 'fast' flag to floating point operations. More... | |
| static unsigned | getScalarizationOverhead (Type *Ty, bool Insert, bool Extract, const TargetTransformInfo &TTI) |
| Estimate the overhead of scalarizing a value. More... | |
| static unsigned | getVectorCallCost (CallInst *CI, unsigned VF, const TargetTransformInfo &TTI, const TargetLibraryInfo *TLI, bool &NeedToScalarize) |
| static unsigned | getVectorIntrinsicCost (CallInst *CI, unsigned VF, const TargetTransformInfo &TTI, const TargetLibraryInfo *TLI) |
| static bool | canIfConvertPHINodes (BasicBlock *BB) |
| Check whether it is safe to if-convert this phi node. More... | |
| static Type * | convertPointerToIntegerType (const DataLayout &DL, Type *Ty) |
| static Type * | getWiderType (const DataLayout &DL, Type *Ty0, Type *Ty1) |
| static bool | hasOutsideLoopUser (const Loop *TheLoop, Instruction *Inst, SmallPtrSetImpl< Value * > &Reductions) |
| Check that the instruction has outside loop users and is not an identified reduction variable. More... | |
| static bool | isLikelyComplexAddressComputation (Value *Ptr, LoopVectorizationLegality *Legal, ScalarEvolution *SE, const Loop *TheLoop) |
| Check whether the address computation for a non-consecutive memory access looks like an unlikely candidate for being merged into the indexing mode. More... | |
| static bool | isStrideMul (Instruction *I, LoopVectorizationLegality *Legal) |
| Pass * | llvm::createLoopVectorizePass (bool NoUnrolling=false, bool AlwaysVectorize=true) |
Variables | |
| static cl::opt< bool > | EnableIfConversion ("enable-if-conversion", cl::init(true), cl::Hidden, cl::desc("Enable if-conversion during vectorization.")) |
| static cl::opt< unsigned > | TinyTripCountVectorThreshold ("vectorizer-min-trip-count", cl::init(16), cl::Hidden, cl::desc("Don't vectorize loops with a constant ""trip count that is smaller than this ""value.")) |
| We don't vectorize loops with a known constant trip count below this number. More... | |
| static cl::opt< bool > | EnableMemAccessVersioning ("enable-mem-access-versioning", cl::init(true), cl::Hidden, cl::desc("Enable symblic stride memory access versioning")) |
| This enables versioning on the strides of symbolically striding memory accesses in code like the following. More... | |
| static cl::opt< bool > | EnableInterleavedMemAccesses ("enable-interleaved-mem-accesses", cl::init(false), cl::Hidden, cl::desc("Enable vectorization on interleaved memory accesses in a loop")) |
| static cl::opt< unsigned > | MaxInterleaveGroupFactor ("max-interleave-group-factor", cl::Hidden, cl::desc("Maximum factor for an interleaved access group (default = 8)"), cl::init(8)) |
| Maximum factor for an interleaved memory access. More... | |
| static const unsigned | TinyTripCountInterleaveThreshold = 128 |
| We don't interleave loops with a known constant trip count below this number. More... | |
| static cl::opt< unsigned > | ForceTargetNumScalarRegs ("force-target-num-scalar-regs", cl::init(0), cl::Hidden, cl::desc("A flag that overrides the target's number of scalar registers.")) |
| static cl::opt< unsigned > | ForceTargetNumVectorRegs ("force-target-num-vector-regs", cl::init(0), cl::Hidden, cl::desc("A flag that overrides the target's number of vector registers.")) |
| static const unsigned | MaxInterleaveFactor = 16 |
| Maximum vectorization interleave count. More... | |
| static cl::opt< unsigned > | ForceTargetMaxScalarInterleaveFactor ("force-target-max-scalar-interleave", cl::init(0), cl::Hidden, cl::desc("A flag that overrides the target's max interleave factor for ""scalar loops.")) |
| static cl::opt< unsigned > | ForceTargetMaxVectorInterleaveFactor ("force-target-max-vector-interleave", cl::init(0), cl::Hidden, cl::desc("A flag that overrides the target's max interleave factor for ""vectorized loops.")) |
| static cl::opt< unsigned > | ForceTargetInstructionCost ("force-target-instruction-cost", cl::init(0), cl::Hidden, cl::desc("A flag that overrides the target's expected cost for ""an instruction to a single constant value. Mostly ""useful for getting consistent testing.")) |
| static cl::opt< unsigned > | SmallLoopCost ("small-loop-cost", cl::init(20), cl::Hidden, cl::desc("The cost of a loop that is considered 'small' by the interleaver.")) |
| static cl::opt< bool > | LoopVectorizeWithBlockFrequency ("loop-vectorize-with-block-frequency", cl::init(false), cl::Hidden, cl::desc("Enable the use of the block frequency analysis to access PGO ""heuristics minimizing code growth in cold regions and being more ""aggressive in hot regions.")) |
| static cl::opt< bool > | EnableLoadStoreRuntimeInterleave ("enable-loadstore-runtime-interleave", cl::init(true), cl::Hidden, cl::desc("Enable runtime interleaving until load/store ports are saturated")) |
| static cl::opt< unsigned > | NumberOfStoresToPredicate ("vectorize-num-stores-pred", cl::init(1), cl::Hidden, cl::desc("Max number of stores to be predicated behind an if.")) |
| The number of stores in a loop that are allowed to need predication. More... | |
| static cl::opt< bool > | EnableIndVarRegisterHeur ("enable-ind-var-reg-heur", cl::init(true), cl::Hidden, cl::desc("Count the induction variable only once when interleaving")) |
| static cl::opt< bool > | EnableCondStoresVectorization ("enable-cond-stores-vec", cl::init(false), cl::Hidden, cl::desc("Enable if predication of stores during vectorization.")) |
| static cl::opt< unsigned > | MaxNestedScalarReductionIC ("max-nested-scalar-reduction-interleave", cl::init(2), cl::Hidden, cl::desc("The maximum interleave count to use when interleaving a scalar ""reduction in a nested loop.")) |
| static const char | lv_name [] = "Loop Vectorization" |
| #define DEBUG_TYPE LV_NAME |
Definition at line 109 of file LoopVectorize.cpp.
| #define LV_NAME "loop-vectorize" |
Definition at line 108 of file LoopVectorize.cpp.
Adds a 'fast' flag to floating point operations.
Definition at line 2926 of file LoopVectorize.cpp.
References llvm::FastMathFlags::setUnsafeAlgebra().
|
static |
Check whether it is safe to if-convert this phi node.
Phi nodes with constant expressions that can trap are not safe to if convert.
Definition at line 3728 of file LoopVectorize.cpp.
References llvm::BasicBlock::begin(), llvm::dyn_cast(), llvm::BasicBlock::end(), llvm::PHINode::getIncomingValue(), llvm::PHINode::getNumIncomingValues(), and I.
Definition at line 1936 of file LoopVectorize.cpp.
References llvm::IRBuilder< preserveNames, T, Inserter >::CreateShuffleVector(), llvm::dyn_cast(), llvm::UndefValue::get(), llvm::VectorType::getNumElements(), llvm::Type::getScalarType(), getSequentialMask(), and llvm::Value::getType().
Referenced by ConcatenateVectors().
Definition at line 1960 of file LoopVectorize.cpp.
References llvm::SmallVectorImpl< T >::append(), llvm::ArrayRef< T >::begin(), ConcatenateTwoVectors(), llvm::ArrayRef< T >::end(), llvm::Value::getType(), llvm::SmallVectorTemplateBase< T, isPodLike< T >::value >::push_back(), llvm::SmallVectorTemplateCommon< T >::size(), and llvm::ArrayRef< T >::size().
|
static |
Definition at line 3892 of file LoopVectorize.cpp.
References llvm::Type::getContext(), llvm::Type::getInt32Ty(), llvm::DataLayout::getIntPtrType(), llvm::Type::getScalarSizeInBits(), and llvm::Type::isPointerTy().
Referenced by getWiderType().
|
static |
Perform cse of induction variable instructions.
Definition at line 2896 of file LoopVectorize.cpp.
References llvm::BasicBlock::begin(), llvm::BasicBlock::end(), llvm::Instruction::eraseFromParent(), llvm::tgtok::In, isPredicatedBlock(), llvm::DenseMapBase< SmallDenseMap< KeyT, ValueT, InlineBuckets, KeyInfoT, BucketT >, KeyT, ValueT, KeyInfoT, BucketT >::lookup(), llvm::Value::replaceAllUsesWith(), and llvm::SmallVectorTemplateCommon< T >::size().
|
static |
Definition at line 2412 of file LoopVectorize.cpp.
References llvm::Instruction::getParent().
Definition at line 1896 of file LoopVectorize.cpp.
References llvm::ConstantVector::get(), llvm::IRBuilderBase::getInt32(), and llvm::SmallVectorTemplateBase< T, isPodLike< T >::value >::push_back().
|
static |
Estimate the overhead of scalarizing a value.
Insert and Extract are set if the result needs to be inserted and/or extracted from vectors.
Definition at line 2937 of file LoopVectorize.cpp.
References ExtractElement(), llvm::TargetTransformInfo::getVectorInstrCost(), llvm::Type::getVectorNumElements(), llvm::Type::isVectorTy(), and llvm::Type::isVoidTy().
Referenced by getVectorCallCost().
|
static |
Definition at line 1920 of file LoopVectorize.cpp.
References llvm::ConstantVector::get(), llvm::UndefValue::get(), llvm::IRBuilderBase::getInt32(), llvm::IRBuilderBase::getInt32Ty(), llvm::SmallVectorTemplateBase< T, isPodLike< T >::value >::push_back(), and llvm::RegState::Undef.
Referenced by ConcatenateTwoVectors().
|
static |
Definition at line 1908 of file LoopVectorize.cpp.
References llvm::ConstantVector::get(), llvm::IRBuilderBase::getInt32(), and llvm::SmallVectorTemplateBase< T, isPodLike< T >::value >::push_back().
|
static |
Definition at line 2959 of file LoopVectorize.cpp.
References llvm::CallInst::arg_operands(), llvm::CallInst::getCalledFunction(), llvm::TargetTransformInfo::getCallInstrCost(), llvm::Value::getName(), getScalarizationOverhead(), llvm::Value::getType(), llvm::TargetLibraryInfo::isFunctionVectorizable(), llvm::CallInst::isNoBuiltin(), llvm::SmallVectorTemplateBase< T, isPodLike< T >::value >::push_back(), and llvm::SmallVectorTemplateCommon< T >::size().
|
static |
Definition at line 3010 of file LoopVectorize.cpp.
References llvm::CallInst::getArgOperand(), llvm::getIntrinsicIDForCall(), llvm::TargetTransformInfo::getIntrinsicInstrCost(), llvm::CallInst::getNumArgOperands(), llvm::Value::getType(), and llvm::SmallVectorTemplateBase< T, isPodLike >::push_back().
|
static |
Definition at line 3904 of file LoopVectorize.cpp.
References convertPointerToIntegerType(), and llvm::Type::getScalarSizeInBits().
|
static |
Check that the instruction has outside loop users and is not an identified reduction variable.
Definition at line 3914 of file LoopVectorize.cpp.
References llvm::LoopBase< N, M >::contains(), llvm::SmallPtrSetImpl< PtrType >::count(), llvm::dbgs(), DEBUG, and llvm::Value::users().
|
static |
Check whether the address computation for a non-consecutive memory access looks like an unlikely candidate for being merged into the indexing mode.
We look for a GEP which has one index that is an induction variable and all other indices are loop invariant. If the stride of this access is also within a small bound we decide that this address computation can likely be merged into the addressing mode. In all other cases, we identify the address computation as complex.
Definition at line 4939 of file LoopVectorize.cpp.
References llvm::dyn_cast(), llvm::APInt::getBitWidth(), llvm::User::getNumOperands(), llvm::User::getOperand(), llvm::ScalarEvolution::getSCEV(), llvm::APInt::getSExtValue(), llvm::SCEVAddRecExpr::getStepRecurrence(), llvm::SCEVConstant::getValue(), llvm::ConstantInt::getValue(), and llvm::ScalarEvolution::isLoopInvariant().
Check whether this block is a predicated block.
Due to if predication of stores we might create a sequence of "if(pred) a[i] = ...; " blocks. We start with one vectorized basic block. For every conditional block we split this vectorized block. Therefore, every second block will be a predicated one.
Definition at line 2891 of file LoopVectorize.cpp.
Referenced by cse().
|
static |
Definition at line 4983 of file LoopVectorize.cpp.
References llvm::User::getOperand().
| STATISTIC | ( | LoopsVectorized | , |
| "Number of loops vectorized" | |||
| ) |
|
static |
|
static |
|
static |
|
static |
|
static |
|
static |
This enables versioning on the strides of symbolically striding memory accesses in code like the following.
for (i = 0; i < N; ++i) A[i * Stride1] += B[i * Stride2] ...
Will be roughly translated to if (Stride1 == 1 && Stride2 == 1) { for (i = 0; i < N; i+=4) A[i:i+3] += ... } else ...
|
static |
|
static |
|
static |
|
static |
|
static |
|
static |
|
static |
Definition at line 5240 of file LoopVectorize.cpp.
Maximum vectorization interleave count.
Definition at line 164 of file LoopVectorize.cpp.
|
static |
Maximum factor for an interleaved memory access.
|
static |
|
static |
The number of stores in a loop that are allowed to need predication.
|
static |
We don't interleave loops with a known constant trip count below this number.
Definition at line 153 of file LoopVectorize.cpp.
1.8.6