LLVM  mainline
Namespaces | Defines | Functions | Variables
LoopVectorize.cpp File Reference
#include "llvm/Transforms/Vectorize.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/Hashing.h"
#include "llvm/ADT/MapVector.h"
#include "llvm/ADT/SetVector.h"
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/SmallSet.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/ADT/StringExtras.h"
#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/Analysis/BasicAliasAnalysis.h"
#include "llvm/Analysis/AliasSetTracker.h"
#include "llvm/Analysis/AssumptionCache.h"
#include "llvm/Analysis/BlockFrequencyInfo.h"
#include "llvm/Analysis/CodeMetrics.h"
#include "llvm/Analysis/DemandedBits.h"
#include "llvm/Analysis/GlobalsModRef.h"
#include "llvm/Analysis/LoopAccessAnalysis.h"
#include "llvm/Analysis/LoopInfo.h"
#include "llvm/Analysis/LoopIterator.h"
#include "llvm/Analysis/LoopPass.h"
#include "llvm/Analysis/ScalarEvolution.h"
#include "llvm/Analysis/ScalarEvolutionExpander.h"
#include "llvm/Analysis/ScalarEvolutionExpressions.h"
#include "llvm/Analysis/TargetTransformInfo.h"
#include "llvm/Analysis/ValueTracking.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/DebugInfo.h"
#include "llvm/IR/DerivedTypes.h"
#include "llvm/IR/DiagnosticInfo.h"
#include "llvm/IR/Dominators.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/LLVMContext.h"
#include "llvm/IR/Module.h"
#include "llvm/IR/PatternMatch.h"
#include "llvm/IR/Type.h"
#include "llvm/IR/Value.h"
#include "llvm/IR/ValueHandle.h"
#include "llvm/IR/Verifier.h"
#include "llvm/Pass.h"
#include "llvm/Support/BranchProbability.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Transforms/Scalar.h"
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
#include "llvm/Transforms/Utils/Local.h"
#include "llvm/Analysis/VectorUtils.h"
#include "llvm/Transforms/Utils/LoopUtils.h"
#include <algorithm>
#include <functional>
#include <map>
#include <tuple>

Go to the source code of this file.

Namespaces

namespace  llvm
 

Compute iterated dominance frontiers using a linear time algorithm.


Defines

#define LV_NAME   "loop-vectorize"
#define DEBUG_TYPE   LV_NAME

Functions

 STATISTIC (LoopsVectorized,"Number of loops vectorized")
 STATISTIC (LoopsAnalyzed,"Number of loops analyzed for vectorization")
static ConstantgetInterleavedMask (IRBuilder<> &Builder, unsigned VF, unsigned NumVec)
static ConstantgetStridedMask (IRBuilder<> &Builder, unsigned Start, unsigned Stride, unsigned VF)
static ConstantgetSequentialMask (IRBuilder<> &Builder, unsigned NumInt, unsigned NumUndef)
static ValueConcatenateTwoVectors (IRBuilder<> &Builder, Value *V1, Value *V2)
static ValueConcatenateVectors (IRBuilder<> &Builder, ArrayRef< Value * > InputList)
static bool isPredicatedBlock (unsigned BlockNum)
 Check whether this block is a predicated block.
static void cse (SmallVector< BasicBlock *, 4 > &BBs)
 Perform cse of induction variable instructions.
static ValueaddFastMathFlag (Value *V)
 Adds a 'fast' flag to floating point operations.
static unsigned getScalarizationOverhead (Type *Ty, bool Insert, bool Extract, const TargetTransformInfo &TTI)
 Estimate the overhead of scalarizing a value.
static unsigned getVectorCallCost (CallInst *CI, unsigned VF, const TargetTransformInfo &TTI, const TargetLibraryInfo *TLI, bool &NeedToScalarize)
static unsigned getVectorIntrinsicCost (CallInst *CI, unsigned VF, const TargetTransformInfo &TTI, const TargetLibraryInfo *TLI)
static TypesmallestIntegerVectorType (Type *T1, Type *T2)
static TypelargestIntegerVectorType (Type *T1, Type *T2)
static bool canIfConvertPHINodes (BasicBlock *BB)
 Check whether it is safe to if-convert this phi node.
static TypeconvertPointerToIntegerType (const DataLayout &DL, Type *Ty)
static TypegetWiderType (const DataLayout &DL, Type *Ty0, Type *Ty1)
static bool hasOutsideLoopUser (const Loop *TheLoop, Instruction *Inst, SmallPtrSetImpl< Value * > &Reductions)
 Check that the instruction has outside loop users and is not an identified reduction variable.
static bool isLikelyComplexAddressComputation (Value *Ptr, LoopVectorizationLegality *Legal, ScalarEvolution *SE, const Loop *TheLoop)
 Check whether the address computation for a non-consecutive memory access looks like an unlikely candidate for being merged into the indexing mode.
static bool isStrideMul (Instruction *I, LoopVectorizationLegality *Legal)
Passllvm::createLoopVectorizePass (bool NoUnrolling=false, bool AlwaysVectorize=true)

Variables

static cl::opt< boolEnableIfConversion ("enable-if-conversion", cl::init(true), cl::Hidden, cl::desc("Enable if-conversion during vectorization."))
static cl::opt< unsignedTinyTripCountVectorThreshold ("vectorizer-min-trip-count", cl::init(16), cl::Hidden, cl::desc("Don't vectorize loops with a constant ""trip count that is smaller than this ""value."))
 We don't vectorize loops with a known constant trip count below this number.
static cl::opt< boolMaximizeBandwidth ("vectorizer-maximize-bandwidth", cl::init(false), cl::Hidden, cl::desc("Maximize bandwidth when selecting vectorization factor which ""will be determined by the smallest type in loop."))
static cl::opt< boolEnableMemAccessVersioning ("enable-mem-access-versioning", cl::init(true), cl::Hidden, cl::desc("Enable symbolic stride memory access versioning"))
 This enables versioning on the strides of symbolically striding memory accesses in code like the following.
static cl::opt< boolEnableInterleavedMemAccesses ("enable-interleaved-mem-accesses", cl::init(false), cl::Hidden, cl::desc("Enable vectorization on interleaved memory accesses in a loop"))
static cl::opt< unsignedMaxInterleaveGroupFactor ("max-interleave-group-factor", cl::Hidden, cl::desc("Maximum factor for an interleaved access group (default = 8)"), cl::init(8))
 Maximum factor for an interleaved memory access.
static const unsigned TinyTripCountInterleaveThreshold = 128
 We don't interleave loops with a known constant trip count below this number.
static cl::opt< unsignedForceTargetNumScalarRegs ("force-target-num-scalar-regs", cl::init(0), cl::Hidden, cl::desc("A flag that overrides the target's number of scalar registers."))
static cl::opt< unsignedForceTargetNumVectorRegs ("force-target-num-vector-regs", cl::init(0), cl::Hidden, cl::desc("A flag that overrides the target's number of vector registers."))
static const unsigned MaxInterleaveFactor = 16
 Maximum vectorization interleave count.
static cl::opt< unsignedForceTargetMaxScalarInterleaveFactor ("force-target-max-scalar-interleave", cl::init(0), cl::Hidden, cl::desc("A flag that overrides the target's max interleave factor for ""scalar loops."))
static cl::opt< unsignedForceTargetMaxVectorInterleaveFactor ("force-target-max-vector-interleave", cl::init(0), cl::Hidden, cl::desc("A flag that overrides the target's max interleave factor for ""vectorized loops."))
static cl::opt< unsignedForceTargetInstructionCost ("force-target-instruction-cost", cl::init(0), cl::Hidden, cl::desc("A flag that overrides the target's expected cost for ""an instruction to a single constant value. Mostly ""useful for getting consistent testing."))
static cl::opt< unsignedSmallLoopCost ("small-loop-cost", cl::init(20), cl::Hidden, cl::desc("The cost of a loop that is considered 'small' by the interleaver."))
static cl::opt< boolLoopVectorizeWithBlockFrequency ("loop-vectorize-with-block-frequency", cl::init(false), cl::Hidden, cl::desc("Enable the use of the block frequency analysis to access PGO ""heuristics minimizing code growth in cold regions and being more ""aggressive in hot regions."))
static cl::opt< boolEnableLoadStoreRuntimeInterleave ("enable-loadstore-runtime-interleave", cl::init(true), cl::Hidden, cl::desc("Enable runtime interleaving until load/store ports are saturated"))
static cl::opt< unsignedNumberOfStoresToPredicate ("vectorize-num-stores-pred", cl::init(1), cl::Hidden, cl::desc("Max number of stores to be predicated behind an if."))
 The number of stores in a loop that are allowed to need predication.
static cl::opt< boolEnableIndVarRegisterHeur ("enable-ind-var-reg-heur", cl::init(true), cl::Hidden, cl::desc("Count the induction variable only once when interleaving"))
static cl::opt< boolEnableCondStoresVectorization ("enable-cond-stores-vec", cl::init(false), cl::Hidden, cl::desc("Enable if predication of stores during vectorization."))
static cl::opt< unsignedMaxNestedScalarReductionIC ("max-nested-scalar-reduction-interleave", cl::init(2), cl::Hidden, cl::desc("The maximum interleave count to use when interleaving a scalar ""reduction in a nested loop."))
static cl::opt< unsignedPragmaVectorizeMemoryCheckThreshold ("pragma-vectorize-memory-check-threshold", cl::init(128), cl::Hidden, cl::desc("The maximum allowed number of runtime memory checks with a ""vectorize(enable) pragma."))
static cl::opt< unsignedVectorizeSCEVCheckThreshold ("vectorize-scev-check-threshold", cl::init(16), cl::Hidden, cl::desc("The maximum number of SCEV checks allowed."))
static cl::opt< unsignedPragmaVectorizeSCEVCheckThreshold ("pragma-vectorize-scev-check-threshold", cl::init(128), cl::Hidden, cl::desc("The maximum number of SCEV checks allowed with a ""vectorize(enable) pragma"))
static const char lv_name [] = "Loop Vectorization"

Define Documentation

#define DEBUG_TYPE   LV_NAME

Definition at line 112 of file LoopVectorize.cpp.

#define LV_NAME   "loop-vectorize"

Definition at line 111 of file LoopVectorize.cpp.


Function Documentation

static Value* addFastMathFlag ( Value V) [static]

Adds a 'fast' flag to floating point operations.

Definition at line 3047 of file LoopVectorize.cpp.

References llvm::FastMathFlags::setUnsafeAlgebra().

static bool canIfConvertPHINodes ( BasicBlock BB) [static]

Check whether it is safe to if-convert this phi node.

Phi nodes with constant expressions that can trap are not safe to if convert.

Definition at line 3988 of file LoopVectorize.cpp.

References llvm::BasicBlock::begin(), llvm::dyn_cast(), E, llvm::BasicBlock::end(), llvm::PHINode::getIncomingValue(), llvm::PHINode::getNumIncomingValues(), and I.

static Value* ConcatenateTwoVectors ( IRBuilder<> &  Builder,
Value V1,
Value V2 
) [static]
static Value* ConcatenateVectors ( IRBuilder<> &  Builder,
ArrayRef< Value * >  InputList 
) [static]
static Type* convertPointerToIntegerType ( const DataLayout DL,
Type Ty 
) [static]
static void cse ( SmallVector< BasicBlock *, 4 > &  BBs) [static]
static Constant* getInterleavedMask ( IRBuilder<> &  Builder,
unsigned  VF,
unsigned  NumVec 
) [static]
static unsigned getScalarizationOverhead ( Type Ty,
bool  Insert,
bool  Extract,
const TargetTransformInfo TTI 
) [static]

Estimate the overhead of scalarizing a value.

Insert and Extract are set if the result needs to be inserted and/or extracted from vectors.

Definition at line 3058 of file LoopVectorize.cpp.

References ExtractElement(), llvm::TargetTransformInfo::getVectorInstrCost(), llvm::Type::getVectorNumElements(), llvm::InsertElement, llvm::Type::isVectorTy(), and llvm::Type::isVoidTy().

Referenced by getVectorCallCost().

static Constant* getSequentialMask ( IRBuilder<> &  Builder,
unsigned  NumInt,
unsigned  NumUndef 
) [static]
static Constant* getStridedMask ( IRBuilder<> &  Builder,
unsigned  Start,
unsigned  Stride,
unsigned  VF 
) [static]
static unsigned getVectorCallCost ( CallInst CI,
unsigned  VF,
const TargetTransformInfo TTI,
const TargetLibraryInfo TLI,
bool NeedToScalarize 
) [static]
static unsigned getVectorIntrinsicCost ( CallInst CI,
unsigned  VF,
const TargetTransformInfo TTI,
const TargetLibraryInfo TLI 
) [static]
static Type* getWiderType ( const DataLayout DL,
Type Ty0,
Type Ty1 
) [static]
static bool hasOutsideLoopUser ( const Loop TheLoop,
Instruction Inst,
SmallPtrSetImpl< Value * > &  Reductions 
) [static]

Check that the instruction has outside loop users and is not an identified reduction variable.

Definition at line 4192 of file LoopVectorize.cpp.

References llvm::LoopBase< BlockT, LoopT >::contains(), llvm::SmallPtrSetImpl< PtrType >::count(), llvm::dbgs(), DEBUG, and llvm::Value::users().

static bool isLikelyComplexAddressComputation ( Value Ptr,
LoopVectorizationLegality *  Legal,
ScalarEvolution SE,
const Loop TheLoop 
) [static]

Check whether the address computation for a non-consecutive memory access looks like an unlikely candidate for being merged into the indexing mode.

We look for a GEP which has one index that is an induction variable and all other indices are loop invariant. If the stride of this access is also within a small bound we decide that this address computation can likely be merged into the addressing mode. In all other cases, we identify the address computation as complex.

Definition at line 5274 of file LoopVectorize.cpp.

References llvm::dyn_cast(), llvm::SCEVConstant::getAPInt(), llvm::APInt::getBitWidth(), llvm::User::getNumOperands(), llvm::User::getOperand(), llvm::ScalarEvolution::getSCEV(), llvm::APInt::getSExtValue(), llvm::SCEVAddRecExpr::getStepRecurrence(), and llvm::ScalarEvolution::isLoopInvariant().

static bool isPredicatedBlock ( unsigned  BlockNum) [static]

Check whether this block is a predicated block.

Due to if predication of stores we might create a sequence of "if(pred) a[i] = ...; " blocks. We start with one vectorized basic block. For every conditional block we split this vectorized block. Therefore, every second block will be a predicated one.

Definition at line 3012 of file LoopVectorize.cpp.

Referenced by cse().

static bool isStrideMul ( Instruction I,
LoopVectorizationLegality *  Legal 
) [static]

Definition at line 5318 of file LoopVectorize.cpp.

References llvm::User::getOperand().

static Type* largestIntegerVectorType ( Type T1,
Type T2 
) [static]
static Type* smallestIntegerVectorType ( Type T1,
Type T2 
) [static]
STATISTIC ( LoopsVectorized  ,
"Number of loops vectorized"   
)
STATISTIC ( LoopsAnalyzed  ,
"Number of loops analyzed for vectorization"   
)

Variable Documentation

cl::opt<bool> EnableCondStoresVectorization("enable-cond-stores-vec", cl::init(false), cl::Hidden, cl::desc("Enable if predication of stores during vectorization.")) [static]
cl::opt<bool> EnableIfConversion("enable-if-conversion", cl::init(true), cl::Hidden, cl::desc("Enable if-conversion during vectorization.")) [static]
cl::opt<bool> EnableIndVarRegisterHeur("enable-ind-var-reg-heur", cl::init(true), cl::Hidden, cl::desc("Count the induction variable only once when interleaving")) [static]
cl::opt<bool> EnableInterleavedMemAccesses("enable-interleaved-mem-accesses", cl::init(false), cl::Hidden, cl::desc("Enable vectorization on interleaved memory accesses in a loop")) [static]
cl::opt<bool> EnableLoadStoreRuntimeInterleave("enable-loadstore-runtime-interleave", cl::init(true), cl::Hidden, cl::desc("Enable runtime interleaving until load/store ports are saturated")) [static]
cl::opt<bool> EnableMemAccessVersioning("enable-mem-access-versioning", cl::init(true), cl::Hidden, cl::desc("Enable symbolic stride memory access versioning")) [static]

This enables versioning on the strides of symbolically striding memory accesses in code like the following.

for (i = 0; i < N; ++i) A[i * Stride1] += B[i * Stride2] ...

Will be roughly translated to if (Stride1 == 1 && Stride2 == 1) { for (i = 0; i < N; i+=4) A[i:i+3] += ... } else ...

cl::opt<unsigned> ForceTargetInstructionCost("force-target-instruction-cost", cl::init(0), cl::Hidden, cl::desc("A flag that overrides the target's expected cost for ""an instruction to a single constant value. Mostly ""useful for getting consistent testing.")) [static]
cl::opt<unsigned> ForceTargetMaxScalarInterleaveFactor("force-target-max-scalar-interleave", cl::init(0), cl::Hidden, cl::desc("A flag that overrides the target's max interleave factor for ""scalar loops.")) [static]
cl::opt<unsigned> ForceTargetMaxVectorInterleaveFactor("force-target-max-vector-interleave", cl::init(0), cl::Hidden, cl::desc("A flag that overrides the target's max interleave factor for ""vectorized loops.")) [static]
cl::opt<unsigned> ForceTargetNumScalarRegs("force-target-num-scalar-regs", cl::init(0), cl::Hidden, cl::desc("A flag that overrides the target's number of scalar registers.")) [static]
cl::opt<unsigned> ForceTargetNumVectorRegs("force-target-num-vector-regs", cl::init(0), cl::Hidden, cl::desc("A flag that overrides the target's number of vector registers.")) [static]
cl::opt<bool> LoopVectorizeWithBlockFrequency("loop-vectorize-with-block-frequency", cl::init(false), cl::Hidden, cl::desc("Enable the use of the block frequency analysis to access PGO ""heuristics minimizing code growth in cold regions and being more ""aggressive in hot regions.")) [static]
const char lv_name[] = "Loop Vectorization" [static]

Definition at line 5601 of file LoopVectorize.cpp.

cl::opt<bool> MaximizeBandwidth("vectorizer-maximize-bandwidth", cl::init(false), cl::Hidden, cl::desc("Maximize bandwidth when selecting vectorization factor which ""will be determined by the smallest type in loop.")) [static]

Maximum vectorization interleave count.

Definition at line 172 of file LoopVectorize.cpp.

cl::opt<unsigned> MaxInterleaveGroupFactor("max-interleave-group-factor", cl::Hidden, cl::desc("Maximum factor for an interleaved access group (default = 8)"), cl::init(8)) [static]

Maximum factor for an interleaved memory access.

cl::opt<unsigned> MaxNestedScalarReductionIC("max-nested-scalar-reduction-interleave", cl::init(2), cl::Hidden, cl::desc("The maximum interleave count to use when interleaving a scalar ""reduction in a nested loop.")) [static]
cl::opt<unsigned> NumberOfStoresToPredicate("vectorize-num-stores-pred", cl::init(1), cl::Hidden, cl::desc("Max number of stores to be predicated behind an if.")) [static]

The number of stores in a loop that are allowed to need predication.

cl::opt<unsigned> PragmaVectorizeMemoryCheckThreshold("pragma-vectorize-memory-check-threshold", cl::init(128), cl::Hidden, cl::desc("The maximum allowed number of runtime memory checks with a ""vectorize(enable) pragma.")) [static]
cl::opt<unsigned> PragmaVectorizeSCEVCheckThreshold("pragma-vectorize-scev-check-threshold", cl::init(128), cl::Hidden, cl::desc("The maximum number of SCEV checks allowed with a ""vectorize(enable) pragma")) [static]
cl::opt<unsigned> SmallLoopCost("small-loop-cost", cl::init(20), cl::Hidden, cl::desc("The cost of a loop that is considered 'small' by the interleaver.")) [static]

We don't interleave loops with a known constant trip count below this number.

Definition at line 161 of file LoopVectorize.cpp.

cl::opt<unsigned> TinyTripCountVectorThreshold("vectorizer-min-trip-count", cl::init(16), cl::Hidden, cl::desc("Don't vectorize loops with a constant ""trip count that is smaller than this ""value.")) [static]

We don't vectorize loops with a known constant trip count below this number.

cl::opt<unsigned> VectorizeSCEVCheckThreshold("vectorize-scev-check-threshold", cl::init(16), cl::Hidden, cl::desc("The maximum number of SCEV checks allowed.")) [static]