|
LLVM
4.0.0
|
#include "llvm/Transforms/Vectorize/LoopVectorize.h"#include "llvm/ADT/DenseMap.h"#include "llvm/ADT/Hashing.h"#include "llvm/ADT/MapVector.h"#include "llvm/ADT/SCCIterator.h"#include "llvm/ADT/SetVector.h"#include "llvm/ADT/SmallPtrSet.h"#include "llvm/ADT/SmallSet.h"#include "llvm/ADT/SmallVector.h"#include "llvm/ADT/Statistic.h"#include "llvm/ADT/StringExtras.h"#include "llvm/Analysis/CodeMetrics.h"#include "llvm/Analysis/GlobalsModRef.h"#include "llvm/Analysis/LoopInfo.h"#include "llvm/Analysis/LoopIterator.h"#include "llvm/Analysis/LoopPass.h"#include "llvm/Analysis/ScalarEvolutionExpander.h"#include "llvm/Analysis/ScalarEvolutionExpressions.h"#include "llvm/Analysis/ValueTracking.h"#include "llvm/Analysis/VectorUtils.h"#include "llvm/IR/Constants.h"#include "llvm/IR/DataLayout.h"#include "llvm/IR/DebugInfo.h"#include "llvm/IR/DerivedTypes.h"#include "llvm/IR/DiagnosticInfo.h"#include "llvm/IR/Dominators.h"#include "llvm/IR/Function.h"#include "llvm/IR/IRBuilder.h"#include "llvm/IR/Instructions.h"#include "llvm/IR/IntrinsicInst.h"#include "llvm/IR/LLVMContext.h"#include "llvm/IR/Module.h"#include "llvm/IR/PatternMatch.h"#include "llvm/IR/Type.h"#include "llvm/IR/User.h"#include "llvm/IR/Value.h"#include "llvm/IR/ValueHandle.h"#include "llvm/IR/Verifier.h"#include "llvm/Pass.h"#include "llvm/Support/BranchProbability.h"#include "llvm/Support/CommandLine.h"#include "llvm/Support/Debug.h"#include "llvm/Support/raw_ostream.h"#include "llvm/Transforms/Scalar.h"#include "llvm/Transforms/Utils/BasicBlockUtils.h"#include "llvm/Transforms/Utils/Local.h"#include "llvm/Transforms/Utils/LoopUtils.h"#include "llvm/Transforms/Utils/LoopVersioning.h"#include "llvm/Transforms/Vectorize.h"#include <algorithm>#include <map>#include <tuple>Go to the source code of this file.
Namespaces | |
| llvm | |
| Compute iterated dominance frontiers using a linear time algorithm. | |
Macros | |
| #define | LV_NAME "loop-vectorize" |
| #define | DEBUG_TYPE LV_NAME |
Functions | |
| STATISTIC (LoopsVectorized,"Number of loops vectorized") | |
| STATISTIC (LoopsAnalyzed,"Number of loops analyzed for vectorization") | |
| static OptimizationRemarkAnalysis | createMissedAnalysis (const char *PassName, StringRef RemarkName, Loop *TheLoop, Instruction *I=nullptr) |
| Create an analysis remark that explains why vectorization failed. More... | |
| static Constant * | getInterleavedMask (IRBuilder<> &Builder, unsigned VF, unsigned NumVec) |
| static Constant * | getStridedMask (IRBuilder<> &Builder, unsigned Start, unsigned Stride, unsigned VF) |
| static Constant * | getSequentialMask (IRBuilder<> &Builder, unsigned NumInt, unsigned NumUndef) |
| static Value * | ConcatenateTwoVectors (IRBuilder<> &Builder, Value *V1, Value *V2) |
| static Value * | ConcatenateVectors (IRBuilder<> &Builder, ArrayRef< Value * > InputList) |
| static void | cse (BasicBlock *BB) |
| Perform cse of induction variable instructions. More... | |
| static Value * | addFastMathFlag (Value *V) |
| Adds a 'fast' flag to floating point operations. More... | |
| static unsigned | getScalarizationOverhead (Type *Ty, bool Insert, bool Extract, const TargetTransformInfo &TTI) |
| Estimate the overhead of scalarizing a value based on its type. More... | |
| static unsigned | getScalarizationOverhead (SmallVectorImpl< Type * > &OpTys, Type *RetTy, const TargetTransformInfo &TTI) |
| Estimate the overhead of scalarizing an Instruction based on the types of its operands and return value. More... | |
| static unsigned | getScalarizationOverhead (Instruction *I, unsigned VF, const TargetTransformInfo &TTI) |
| Estimate the overhead of scalarizing an instruction. More... | |
| static unsigned | getVectorCallCost (CallInst *CI, unsigned VF, const TargetTransformInfo &TTI, const TargetLibraryInfo *TLI, bool &NeedToScalarize) |
| static unsigned | getVectorIntrinsicCost (CallInst *CI, unsigned VF, const TargetTransformInfo &TTI, const TargetLibraryInfo *TLI) |
| static Type * | smallestIntegerVectorType (Type *T1, Type *T2) |
| static Type * | largestIntegerVectorType (Type *T1, Type *T2) |
| static bool | mayDivideByZero (Instruction &I) |
| A helper function for checking whether an integer division-related instruction may divide by zero (in which case it must be predicated if executed conditionally in the scalar code). More... | |
| static bool | canIfConvertPHINodes (BasicBlock *BB) |
| Check whether it is safe to if-convert this phi node. More... | |
| static Type * | convertPointerToIntegerType (const DataLayout &DL, Type *Ty) |
| static Type * | getWiderType (const DataLayout &DL, Type *Ty0, Type *Ty1) |
| static bool | hasOutsideLoopUser (const Loop *TheLoop, Instruction *Inst, SmallPtrSetImpl< Value * > &AllowedExit) |
| Check that the instruction has outside loop users and is not an identified reduction variable. More... | |
| static const SCEV * | getAddressAccessSCEV (Value *Ptr, LoopVectorizationLegality *Legal, ScalarEvolution *SE, const Loop *TheLoop) |
| Gets Address Access SCEV after verifying that the access pattern is loop invariant except the induction variable dependence. More... | |
| static bool | isStrideMul (Instruction *I, LoopVectorizationLegality *Legal) |
| Pass * | llvm::createLoopVectorizePass (bool NoUnrolling=false, bool AlwaysVectorize=true) |
| static void | AddRuntimeUnrollDisableMetaData (Loop *L) |
Variables | |
| static cl::opt< bool > | EnableIfConversion ("enable-if-conversion", cl::init(true), cl::Hidden, cl::desc("Enable if-conversion during vectorization.")) |
| static cl::opt< unsigned > | TinyTripCountVectorThreshold ("vectorizer-min-trip-count", cl::init(16), cl::Hidden, cl::desc("Don't vectorize loops with a constant ""trip count that is smaller than this ""value.")) |
| We don't vectorize loops with a known constant trip count below this number. More... | |
| static cl::opt< bool > | MaximizeBandwidth ("vectorizer-maximize-bandwidth", cl::init(false), cl::Hidden, cl::desc("Maximize bandwidth when selecting vectorization factor which ""will be determined by the smallest type in loop.")) |
| static cl::opt< bool > | EnableInterleavedMemAccesses ("enable-interleaved-mem-accesses", cl::init(false), cl::Hidden, cl::desc("Enable vectorization on interleaved memory accesses in a loop")) |
| static cl::opt< unsigned > | MaxInterleaveGroupFactor ("max-interleave-group-factor", cl::Hidden, cl::desc("Maximum factor for an interleaved access group (default = 8)"), cl::init(8)) |
| Maximum factor for an interleaved memory access. More... | |
| static const unsigned | TinyTripCountInterleaveThreshold = 128 |
| We don't interleave loops with a known constant trip count below this number. More... | |
| static cl::opt< unsigned > | ForceTargetNumScalarRegs ("force-target-num-scalar-regs", cl::init(0), cl::Hidden, cl::desc("A flag that overrides the target's number of scalar registers.")) |
| static cl::opt< unsigned > | ForceTargetNumVectorRegs ("force-target-num-vector-regs", cl::init(0), cl::Hidden, cl::desc("A flag that overrides the target's number of vector registers.")) |
| static const unsigned | MaxInterleaveFactor = 16 |
| Maximum vectorization interleave count. More... | |
| static cl::opt< unsigned > | ForceTargetMaxScalarInterleaveFactor ("force-target-max-scalar-interleave", cl::init(0), cl::Hidden, cl::desc("A flag that overrides the target's max interleave factor for ""scalar loops.")) |
| static cl::opt< unsigned > | ForceTargetMaxVectorInterleaveFactor ("force-target-max-vector-interleave", cl::init(0), cl::Hidden, cl::desc("A flag that overrides the target's max interleave factor for ""vectorized loops.")) |
| static cl::opt< unsigned > | ForceTargetInstructionCost ("force-target-instruction-cost", cl::init(0), cl::Hidden, cl::desc("A flag that overrides the target's expected cost for ""an instruction to a single constant value. Mostly ""useful for getting consistent testing.")) |
| static cl::opt< unsigned > | SmallLoopCost ("small-loop-cost", cl::init(20), cl::Hidden, cl::desc("The cost of a loop that is considered 'small' by the interleaver.")) |
| static cl::opt< bool > | LoopVectorizeWithBlockFrequency ("loop-vectorize-with-block-frequency", cl::init(false), cl::Hidden, cl::desc("Enable the use of the block frequency analysis to access PGO ""heuristics minimizing code growth in cold regions and being more ""aggressive in hot regions.")) |
| static cl::opt< bool > | EnableLoadStoreRuntimeInterleave ("enable-loadstore-runtime-interleave", cl::init(true), cl::Hidden, cl::desc("Enable runtime interleaving until load/store ports are saturated")) |
| static cl::opt< unsigned > | NumberOfStoresToPredicate ("vectorize-num-stores-pred", cl::init(1), cl::Hidden, cl::desc("Max number of stores to be predicated behind an if.")) |
| The number of stores in a loop that are allowed to need predication. More... | |
| static cl::opt< bool > | EnableIndVarRegisterHeur ("enable-ind-var-reg-heur", cl::init(true), cl::Hidden, cl::desc("Count the induction variable only once when interleaving")) |
| static cl::opt< bool > | EnableCondStoresVectorization ("enable-cond-stores-vec", cl::init(true), cl::Hidden, cl::desc("Enable if predication of stores during vectorization.")) |
| static cl::opt< unsigned > | MaxNestedScalarReductionIC ("max-nested-scalar-reduction-interleave", cl::init(2), cl::Hidden, cl::desc("The maximum interleave count to use when interleaving a scalar ""reduction in a nested loop.")) |
| static cl::opt< unsigned > | PragmaVectorizeMemoryCheckThreshold ("pragma-vectorize-memory-check-threshold", cl::init(128), cl::Hidden, cl::desc("The maximum allowed number of runtime memory checks with a ""vectorize(enable) pragma.")) |
| static cl::opt< unsigned > | VectorizeSCEVCheckThreshold ("vectorize-scev-check-threshold", cl::init(16), cl::Hidden, cl::desc("The maximum number of SCEV checks allowed.")) |
| static cl::opt< unsigned > | PragmaVectorizeSCEVCheckThreshold ("pragma-vectorize-scev-check-threshold", cl::init(128), cl::Hidden, cl::desc("The maximum number of SCEV checks allowed with a ""vectorize(enable) pragma")) |
| static const char | lv_name [] = "Loop Vectorization" |
| #define DEBUG_TYPE LV_NAME |
Definition at line 106 of file LoopVectorize.cpp.
| #define LV_NAME "loop-vectorize" |
Definition at line 105 of file LoopVectorize.cpp.
Referenced by llvm::LoopVectorizePass::processLoop().
Adds a 'fast' flag to floating point operations.
Definition at line 3593 of file LoopVectorize.cpp.
References fuzzer::Flags, and llvm::FastMathFlags::setUnsafeAlgebra().
|
static |
Definition at line 7314 of file LoopVectorize.cpp.
References llvm::dyn_cast(), llvm::MDString::get(), llvm::MDNode::get(), llvm::LoopBase< N, M >::getHeader(), llvm::Loop::getLoopID(), llvm::MDNode::getNumOperands(), llvm::MDNode::getOperand(), llvm::MDString::getString(), i, llvm::SmallVectorTemplateBase< T, isPodLike< T >::value >::push_back(), llvm::MDNode::replaceOperandWith(), llvm::Loop::setLoopID(), and llvm::StringRef::startswith().
Referenced by llvm::LoopVectorizePass::processLoop().
|
static |
Check whether it is safe to if-convert this phi node.
Phi nodes with constant expressions that can trap are not safe to if convert.
Definition at line 4995 of file LoopVectorize.cpp.
References llvm::dyn_cast(), I, and llvm::PHINode::incoming_values().
Definition at line 2598 of file LoopVectorize.cpp.
References assert(), llvm::IRBuilder< T, Inserter >::CreateShuffleVector(), llvm::dyn_cast(), llvm::UndefValue::get(), llvm::SequentialType::getNumElements(), llvm::Type::getScalarType(), getSequentialMask(), llvm::Value::getType(), and llvm::BitmaskEnumDetail::Mask().
Referenced by ConcatenateVectors().
Definition at line 2622 of file LoopVectorize.cpp.
References llvm::SmallVectorImpl< T >::append(), assert(), llvm::ArrayRef< T >::begin(), ConcatenateTwoVectors(), llvm::ArrayRef< T >::end(), llvm::Value::getType(), i, llvm::SmallVectorTemplateBase< T, isPodLike >::push_back(), llvm::SmallVectorTemplateCommon< T, typename >::size(), and llvm::ArrayRef< T >::size().
|
static |
Definition at line 5172 of file LoopVectorize.cpp.
References llvm::Type::getContext(), llvm::Type::getInt32Ty(), llvm::DataLayout::getIntPtrType(), llvm::Type::getScalarSizeInBits(), and llvm::Type::isPointerTy().
Referenced by getWiderType().
|
static |
Create an analysis remark that explains why vectorization failed.
PassName is the name of the pass (e.g. can be AlwaysPrint). RemarkName is the identifier for the remark. If I is passed it is an instruction that prevents vectorization. Otherwise TheLoop is used for the location of the remark.
Definition at line 225 of file LoopVectorize.cpp.
References llvm::LoopBase< N, M >::getHeader(), llvm::Loop::getStartLoc(), and I.
Referenced by llvm::LoopVectorizePass::processLoop().
|
static |
Perform cse of induction variable instructions.
Definition at line 3571 of file LoopVectorize.cpp.
References llvm::BasicBlock::begin(), E, llvm::BasicBlock::end(), llvm::Instruction::eraseFromParent(), llvm::tgtok::In, llvm::DenseMapBase< SmallDenseMap< KeyT, ValueT, InlineBuckets, KeyInfoT, BucketT >, KeyT, ValueT, KeyInfoT, BucketT >::lookup(), and llvm::Value::replaceAllUsesWith().
|
static |
Gets Address Access SCEV after verifying that the access pattern is loop invariant except the induction variable dependence.
This SCEV can be sent to the Target in order to estimate the address calculation cost.
Definition at line 6807 of file LoopVectorize.cpp.
References llvm::dyn_cast(), llvm::User::getNumOperands(), llvm::User::getOperand(), llvm::ScalarEvolution::getSCEV(), i, llvm::ScalarEvolution::isLoopInvariant(), and Ptr.
Definition at line 2558 of file LoopVectorize.cpp.
References llvm::ConstantVector::get(), llvm::IRBuilderBase::getInt32(), i, llvm::BitmaskEnumDetail::Mask(), and llvm::SmallVectorTemplateBase< T, isPodLike< T >::value >::push_back().
|
static |
Estimate the overhead of scalarizing a value based on its type.
Insert and Extract are set if the result needs to be inserted and/or extracted from vectors.
Definition at line 3605 of file LoopVectorize.cpp.
References assert(), E, llvm::TargetTransformInfo::getVectorInstrCost(), llvm::Type::getVectorNumElements(), I, llvm::Type::isVectorTy(), and llvm::Type::isVoidTy().
Referenced by getScalarizationOverhead(), and getVectorCallCost().
|
static |
Estimate the overhead of scalarizing an Instruction based on the types of its operands and return value.
Definition at line 3625 of file LoopVectorize.cpp.
References getScalarizationOverhead().
|
static |
Estimate the overhead of scalarizing an instruction.
This is a convenience wrapper for the type-based getScalarizationOverhead API.
Definition at line 3639 of file LoopVectorize.cpp.
References llvm::User::getNumOperands(), llvm::User::getOperand(), getScalarizationOverhead(), and llvm::Value::getType().
|
static |
Definition at line 2582 of file LoopVectorize.cpp.
References llvm::ConstantVector::get(), llvm::UndefValue::get(), llvm::IRBuilderBase::getInt32(), llvm::IRBuilderBase::getInt32Ty(), i, llvm::BitmaskEnumDetail::Mask(), llvm::SmallVectorTemplateBase< T, isPodLike< T >::value >::push_back(), and llvm::RegState::Undef.
Referenced by ConcatenateTwoVectors().
|
static |
Definition at line 2570 of file LoopVectorize.cpp.
References llvm::ConstantVector::get(), llvm::IRBuilderBase::getInt32(), i, llvm::BitmaskEnumDetail::Mask(), and llvm::SmallVectorTemplateBase< T, isPodLike< T >::value >::push_back().
|
static |
Definition at line 3658 of file LoopVectorize.cpp.
References llvm::CallInst::arg_operands(), llvm::CallInst::getCalledFunction(), llvm::TargetTransformInfo::getCallInstrCost(), llvm::Value::getName(), getScalarizationOverhead(), llvm::Value::getType(), llvm::TargetLibraryInfo::isFunctionVectorizable(), llvm::CallInst::isNoBuiltin(), and llvm::SmallVectorTemplateBase< T, isPodLike< T >::value >::push_back().
|
static |
Definition at line 3706 of file LoopVectorize.cpp.
References llvm::CallInst::arg_operands(), assert(), llvm::TargetTransformInfo::getIntrinsicInstrCost(), llvm::Value::getType(), llvm::getVectorIntrinsicIDForCall(), and llvm::SmallVectorTemplateBase< T, isPodLike >::push_back().
|
static |
Definition at line 5184 of file LoopVectorize.cpp.
References convertPointerToIntegerType(), and llvm::Type::getScalarSizeInBits().
|
static |
Check that the instruction has outside loop users and is not an identified reduction variable.
Definition at line 5194 of file LoopVectorize.cpp.
References llvm::LoopBase< N, M >::contains(), llvm::SmallPtrSetImpl< PtrType >::count(), llvm::dbgs(), DEBUG, and llvm::Value::users().
|
static |
Definition at line 6830 of file LoopVectorize.cpp.
References llvm::User::getOperand().
Definition at line 3729 of file LoopVectorize.cpp.
References llvm::Type::getVectorElementType().
|
static |
A helper function for checking whether an integer division-related instruction may divide by zero (in which case it must be predicated if executed conditionally in the scalar code).
TODO: It may be worthwhile to generalize and check isKnownNonZero(). Non-zero divisors that are non compile-time constants will not be converted into multiplication, so we will still end up scalarizing the division, but can do so w/o predication.
Definition at line 4695 of file LoopVectorize.cpp.
References assert(), llvm::dyn_cast(), llvm::Instruction::getOpcode(), llvm::User::getOperand(), and llvm::ConstantInt::isZero().
Definition at line 3724 of file LoopVectorize.cpp.
References llvm::Type::getVectorElementType().
| STATISTIC | ( | LoopsVectorized | , |
| "Number of loops vectorized" | |||
| ) |
|
static |
|
static |
|
static |
|
static |
|
static |
|
static |
|
static |
|
static |
|
static |
|
static |
|
static |
Referenced by llvm::LoopVectorizePass::processLoop().
|
static |
Definition at line 7165 of file LoopVectorize.cpp.
|
static |
Maximum vectorization interleave count.
Definition at line 150 of file LoopVectorize.cpp.
|
static |
Maximum factor for an interleaved memory access.
|
static |
|
static |
The number of stores in a loop that are allowed to need predication.
|
static |
|
static |
|
static |
We don't interleave loops with a known constant trip count below this number.
Definition at line 139 of file LoopVectorize.cpp.
|
static |
We don't vectorize loops with a known constant trip count below this number.
Referenced by llvm::LoopVectorizePass::processLoop().
1.8.6