LLVM  3.7.0
Namespaces | Macros | Functions | Variables
LoopVectorize.cpp File Reference
#include "llvm/Transforms/Vectorize.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/EquivalenceClasses.h"
#include "llvm/ADT/Hashing.h"
#include "llvm/ADT/MapVector.h"
#include "llvm/ADT/SetVector.h"
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/SmallSet.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/ADT/StringExtras.h"
#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/Analysis/AliasSetTracker.h"
#include "llvm/Analysis/AssumptionCache.h"
#include "llvm/Analysis/BlockFrequencyInfo.h"
#include "llvm/Analysis/CodeMetrics.h"
#include "llvm/Analysis/LoopAccessAnalysis.h"
#include "llvm/Analysis/LoopInfo.h"
#include "llvm/Analysis/LoopIterator.h"
#include "llvm/Analysis/LoopPass.h"
#include "llvm/Analysis/ScalarEvolution.h"
#include "llvm/Analysis/ScalarEvolutionExpander.h"
#include "llvm/Analysis/ScalarEvolutionExpressions.h"
#include "llvm/Analysis/TargetTransformInfo.h"
#include "llvm/Analysis/ValueTracking.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/DebugInfo.h"
#include "llvm/IR/DerivedTypes.h"
#include "llvm/IR/DiagnosticInfo.h"
#include "llvm/IR/Dominators.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/LLVMContext.h"
#include "llvm/IR/Module.h"
#include "llvm/IR/PatternMatch.h"
#include "llvm/IR/Type.h"
#include "llvm/IR/Value.h"
#include "llvm/IR/ValueHandle.h"
#include "llvm/IR/Verifier.h"
#include "llvm/Pass.h"
#include "llvm/Support/BranchProbability.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Transforms/Scalar.h"
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
#include "llvm/Transforms/Utils/Local.h"
#include "llvm/Analysis/VectorUtils.h"
#include "llvm/Transforms/Utils/LoopUtils.h"
#include <algorithm>
#include <map>
#include <tuple>

Go to the source code of this file.

Namespaces

 llvm
 Compute iterated dominance frontiers using a linear time algorithm.
 

Macros

#define LV_NAME   "loop-vectorize"
 
#define DEBUG_TYPE   LV_NAME
 

Functions

 STATISTIC (LoopsVectorized,"Number of loops vectorized")
 
 STATISTIC (LoopsAnalyzed,"Number of loops analyzed for vectorization")
 
static ConstantgetInterleavedMask (IRBuilder<> &Builder, unsigned VF, unsigned NumVec)
 
static ConstantgetStridedMask (IRBuilder<> &Builder, unsigned Start, unsigned Stride, unsigned VF)
 
static ConstantgetSequentialMask (IRBuilder<> &Builder, unsigned NumInt, unsigned NumUndef)
 
static ValueConcatenateTwoVectors (IRBuilder<> &Builder, Value *V1, Value *V2)
 
static ValueConcatenateVectors (IRBuilder<> &Builder, ArrayRef< Value * > InputList)
 
static InstructiongetFirstInst (Instruction *FirstInst, Value *V, Instruction *Loc)
 
static bool isPredicatedBlock (unsigned BlockNum)
 Check whether this block is a predicated block. More...
 
static void cse (SmallVector< BasicBlock *, 4 > &BBs)
 Perform cse of induction variable instructions. More...
 
static ValueaddFastMathFlag (Value *V)
 Adds a 'fast' flag to floating point operations. More...
 
static unsigned getScalarizationOverhead (Type *Ty, bool Insert, bool Extract, const TargetTransformInfo &TTI)
 Estimate the overhead of scalarizing a value. More...
 
static unsigned getVectorCallCost (CallInst *CI, unsigned VF, const TargetTransformInfo &TTI, const TargetLibraryInfo *TLI, bool &NeedToScalarize)
 
static unsigned getVectorIntrinsicCost (CallInst *CI, unsigned VF, const TargetTransformInfo &TTI, const TargetLibraryInfo *TLI)
 
static bool canIfConvertPHINodes (BasicBlock *BB)
 Check whether it is safe to if-convert this phi node. More...
 
static TypeconvertPointerToIntegerType (const DataLayout &DL, Type *Ty)
 
static TypegetWiderType (const DataLayout &DL, Type *Ty0, Type *Ty1)
 
static bool hasOutsideLoopUser (const Loop *TheLoop, Instruction *Inst, SmallPtrSetImpl< Value * > &Reductions)
 Check that the instruction has outside loop users and is not an identified reduction variable. More...
 
static bool isLikelyComplexAddressComputation (Value *Ptr, LoopVectorizationLegality *Legal, ScalarEvolution *SE, const Loop *TheLoop)
 Check whether the address computation for a non-consecutive memory access looks like an unlikely candidate for being merged into the indexing mode. More...
 
static bool isStrideMul (Instruction *I, LoopVectorizationLegality *Legal)
 
Passllvm::createLoopVectorizePass (bool NoUnrolling=false, bool AlwaysVectorize=true)
 

Variables

static cl::opt< boolEnableIfConversion ("enable-if-conversion", cl::init(true), cl::Hidden, cl::desc("Enable if-conversion during vectorization."))
 
static cl::opt< unsignedTinyTripCountVectorThreshold ("vectorizer-min-trip-count", cl::init(16), cl::Hidden, cl::desc("Don't vectorize loops with a constant ""trip count that is smaller than this ""value."))
 We don't vectorize loops with a known constant trip count below this number. More...
 
static cl::opt< boolEnableMemAccessVersioning ("enable-mem-access-versioning", cl::init(true), cl::Hidden, cl::desc("Enable symblic stride memory access versioning"))
 This enables versioning on the strides of symbolically striding memory accesses in code like the following. More...
 
static cl::opt< boolEnableInterleavedMemAccesses ("enable-interleaved-mem-accesses", cl::init(false), cl::Hidden, cl::desc("Enable vectorization on interleaved memory accesses in a loop"))
 
static cl::opt< unsignedMaxInterleaveGroupFactor ("max-interleave-group-factor", cl::Hidden, cl::desc("Maximum factor for an interleaved access group (default = 8)"), cl::init(8))
 Maximum factor for an interleaved memory access. More...
 
static const unsigned TinyTripCountInterleaveThreshold = 128
 We don't interleave loops with a known constant trip count below this number. More...
 
static cl::opt< unsignedForceTargetNumScalarRegs ("force-target-num-scalar-regs", cl::init(0), cl::Hidden, cl::desc("A flag that overrides the target's number of scalar registers."))
 
static cl::opt< unsignedForceTargetNumVectorRegs ("force-target-num-vector-regs", cl::init(0), cl::Hidden, cl::desc("A flag that overrides the target's number of vector registers."))
 
static const unsigned MaxInterleaveFactor = 16
 Maximum vectorization interleave count. More...
 
static cl::opt< unsignedForceTargetMaxScalarInterleaveFactor ("force-target-max-scalar-interleave", cl::init(0), cl::Hidden, cl::desc("A flag that overrides the target's max interleave factor for ""scalar loops."))
 
static cl::opt< unsignedForceTargetMaxVectorInterleaveFactor ("force-target-max-vector-interleave", cl::init(0), cl::Hidden, cl::desc("A flag that overrides the target's max interleave factor for ""vectorized loops."))
 
static cl::opt< unsignedForceTargetInstructionCost ("force-target-instruction-cost", cl::init(0), cl::Hidden, cl::desc("A flag that overrides the target's expected cost for ""an instruction to a single constant value. Mostly ""useful for getting consistent testing."))
 
static cl::opt< unsignedSmallLoopCost ("small-loop-cost", cl::init(20), cl::Hidden, cl::desc("The cost of a loop that is considered 'small' by the interleaver."))
 
static cl::opt< boolLoopVectorizeWithBlockFrequency ("loop-vectorize-with-block-frequency", cl::init(false), cl::Hidden, cl::desc("Enable the use of the block frequency analysis to access PGO ""heuristics minimizing code growth in cold regions and being more ""aggressive in hot regions."))
 
static cl::opt< boolEnableLoadStoreRuntimeInterleave ("enable-loadstore-runtime-interleave", cl::init(true), cl::Hidden, cl::desc("Enable runtime interleaving until load/store ports are saturated"))
 
static cl::opt< unsignedNumberOfStoresToPredicate ("vectorize-num-stores-pred", cl::init(1), cl::Hidden, cl::desc("Max number of stores to be predicated behind an if."))
 The number of stores in a loop that are allowed to need predication. More...
 
static cl::opt< boolEnableIndVarRegisterHeur ("enable-ind-var-reg-heur", cl::init(true), cl::Hidden, cl::desc("Count the induction variable only once when interleaving"))
 
static cl::opt< boolEnableCondStoresVectorization ("enable-cond-stores-vec", cl::init(false), cl::Hidden, cl::desc("Enable if predication of stores during vectorization."))
 
static cl::opt< unsignedMaxNestedScalarReductionIC ("max-nested-scalar-reduction-interleave", cl::init(2), cl::Hidden, cl::desc("The maximum interleave count to use when interleaving a scalar ""reduction in a nested loop."))
 
static const char lv_name [] = "Loop Vectorization"
 

Macro Definition Documentation

#define DEBUG_TYPE   LV_NAME

Definition at line 109 of file LoopVectorize.cpp.

#define LV_NAME   "loop-vectorize"

Definition at line 108 of file LoopVectorize.cpp.

Function Documentation

static Value* addFastMathFlag ( Value V)
static

Adds a 'fast' flag to floating point operations.

Definition at line 2926 of file LoopVectorize.cpp.

References llvm::FastMathFlags::setUnsafeAlgebra().

static bool canIfConvertPHINodes ( BasicBlock BB)
static

Check whether it is safe to if-convert this phi node.

Phi nodes with constant expressions that can trap are not safe to if convert.

Definition at line 3728 of file LoopVectorize.cpp.

References llvm::BasicBlock::begin(), llvm::dyn_cast(), llvm::BasicBlock::end(), llvm::PHINode::getIncomingValue(), llvm::PHINode::getNumIncomingValues(), and I.

static Value* ConcatenateTwoVectors ( IRBuilder<> &  Builder,
Value V1,
Value V2 
)
static
static Value* ConcatenateVectors ( IRBuilder<> &  Builder,
ArrayRef< Value * >  InputList 
)
static
static Type* convertPointerToIntegerType ( const DataLayout DL,
Type Ty 
)
static
static void cse ( SmallVector< BasicBlock *, 4 > &  BBs)
static
static Instruction* getFirstInst ( Instruction FirstInst,
Value V,
Instruction Loc 
)
static

Definition at line 2412 of file LoopVectorize.cpp.

References llvm::Instruction::getParent().

static Constant* getInterleavedMask ( IRBuilder<> &  Builder,
unsigned  VF,
unsigned  NumVec 
)
static
static unsigned getScalarizationOverhead ( Type Ty,
bool  Insert,
bool  Extract,
const TargetTransformInfo TTI 
)
static

Estimate the overhead of scalarizing a value.

Insert and Extract are set if the result needs to be inserted and/or extracted from vectors.

Definition at line 2937 of file LoopVectorize.cpp.

References ExtractElement(), llvm::TargetTransformInfo::getVectorInstrCost(), llvm::Type::getVectorNumElements(), llvm::Type::isVectorTy(), and llvm::Type::isVoidTy().

Referenced by getVectorCallCost().

static Constant* getSequentialMask ( IRBuilder<> &  Builder,
unsigned  NumInt,
unsigned  NumUndef 
)
static
static Constant* getStridedMask ( IRBuilder<> &  Builder,
unsigned  Start,
unsigned  Stride,
unsigned  VF 
)
static
static unsigned getVectorCallCost ( CallInst CI,
unsigned  VF,
const TargetTransformInfo TTI,
const TargetLibraryInfo TLI,
bool NeedToScalarize 
)
static
static unsigned getVectorIntrinsicCost ( CallInst CI,
unsigned  VF,
const TargetTransformInfo TTI,
const TargetLibraryInfo TLI 
)
static
static Type* getWiderType ( const DataLayout DL,
Type Ty0,
Type Ty1 
)
static
static bool hasOutsideLoopUser ( const Loop TheLoop,
Instruction Inst,
SmallPtrSetImpl< Value * > &  Reductions 
)
static

Check that the instruction has outside loop users and is not an identified reduction variable.

Definition at line 3914 of file LoopVectorize.cpp.

References llvm::LoopBase< N, M >::contains(), llvm::SmallPtrSetImpl< PtrType >::count(), llvm::dbgs(), DEBUG, and llvm::Value::users().

static bool isLikelyComplexAddressComputation ( Value Ptr,
LoopVectorizationLegality *  Legal,
ScalarEvolution SE,
const Loop TheLoop 
)
static

Check whether the address computation for a non-consecutive memory access looks like an unlikely candidate for being merged into the indexing mode.

We look for a GEP which has one index that is an induction variable and all other indices are loop invariant. If the stride of this access is also within a small bound we decide that this address computation can likely be merged into the addressing mode. In all other cases, we identify the address computation as complex.

Definition at line 4939 of file LoopVectorize.cpp.

References llvm::dyn_cast(), llvm::APInt::getBitWidth(), llvm::User::getNumOperands(), llvm::User::getOperand(), llvm::ScalarEvolution::getSCEV(), llvm::APInt::getSExtValue(), llvm::SCEVAddRecExpr::getStepRecurrence(), llvm::SCEVConstant::getValue(), llvm::ConstantInt::getValue(), and llvm::ScalarEvolution::isLoopInvariant().

static bool isPredicatedBlock ( unsigned  BlockNum)
static

Check whether this block is a predicated block.

Due to if predication of stores we might create a sequence of "if(pred) a[i] = ...; " blocks. We start with one vectorized basic block. For every conditional block we split this vectorized block. Therefore, every second block will be a predicated one.

Definition at line 2891 of file LoopVectorize.cpp.

Referenced by cse().

static bool isStrideMul ( Instruction I,
LoopVectorizationLegality *  Legal 
)
static

Definition at line 4983 of file LoopVectorize.cpp.

References llvm::User::getOperand().

STATISTIC ( LoopsVectorized  ,
"Number of loops vectorized"   
)
STATISTIC ( LoopsAnalyzed  ,
"Number of loops analyzed for vectorization"   
)

Variable Documentation

cl::opt<bool> EnableCondStoresVectorization("enable-cond-stores-vec", cl::init(false), cl::Hidden, cl::desc("Enable if predication of stores during vectorization."))
static
cl::opt<bool> EnableIfConversion("enable-if-conversion", cl::init(true), cl::Hidden, cl::desc("Enable if-conversion during vectorization."))
static
cl::opt<bool> EnableIndVarRegisterHeur("enable-ind-var-reg-heur", cl::init(true), cl::Hidden, cl::desc("Count the induction variable only once when interleaving"))
static
cl::opt<bool> EnableInterleavedMemAccesses("enable-interleaved-mem-accesses", cl::init(false), cl::Hidden, cl::desc("Enable vectorization on interleaved memory accesses in a loop"))
static
cl::opt<bool> EnableLoadStoreRuntimeInterleave("enable-loadstore-runtime-interleave", cl::init(true), cl::Hidden, cl::desc("Enable runtime interleaving until load/store ports are saturated"))
static
cl::opt<bool> EnableMemAccessVersioning("enable-mem-access-versioning", cl::init(true), cl::Hidden, cl::desc("Enable symblic stride memory access versioning"))
static

This enables versioning on the strides of symbolically striding memory accesses in code like the following.

for (i = 0; i < N; ++i) A[i * Stride1] += B[i * Stride2] ...

Will be roughly translated to if (Stride1 == 1 && Stride2 == 1) { for (i = 0; i < N; i+=4) A[i:i+3] += ... } else ...

cl::opt<unsigned> ForceTargetInstructionCost("force-target-instruction-cost", cl::init(0), cl::Hidden, cl::desc("A flag that overrides the target's expected cost for ""an instruction to a single constant value. Mostly ""useful for getting consistent testing."))
static
cl::opt<unsigned> ForceTargetMaxScalarInterleaveFactor("force-target-max-scalar-interleave", cl::init(0), cl::Hidden, cl::desc("A flag that overrides the target's max interleave factor for ""scalar loops."))
static
cl::opt<unsigned> ForceTargetMaxVectorInterleaveFactor("force-target-max-vector-interleave", cl::init(0), cl::Hidden, cl::desc("A flag that overrides the target's max interleave factor for ""vectorized loops."))
static
cl::opt<unsigned> ForceTargetNumScalarRegs("force-target-num-scalar-regs", cl::init(0), cl::Hidden, cl::desc("A flag that overrides the target's number of scalar registers."))
static
cl::opt<unsigned> ForceTargetNumVectorRegs("force-target-num-vector-regs", cl::init(0), cl::Hidden, cl::desc("A flag that overrides the target's number of vector registers."))
static
cl::opt<bool> LoopVectorizeWithBlockFrequency("loop-vectorize-with-block-frequency", cl::init(false), cl::Hidden, cl::desc("Enable the use of the block frequency analysis to access PGO ""heuristics minimizing code growth in cold regions and being more ""aggressive in hot regions."))
static
const char lv_name[] = "Loop Vectorization"
static

Definition at line 5240 of file LoopVectorize.cpp.

const unsigned MaxInterleaveFactor = 16
static

Maximum vectorization interleave count.

Definition at line 164 of file LoopVectorize.cpp.

cl::opt<unsigned> MaxInterleaveGroupFactor("max-interleave-group-factor", cl::Hidden, cl::desc("Maximum factor for an interleaved access group (default = 8)"), cl::init(8))
static

Maximum factor for an interleaved memory access.

cl::opt<unsigned> MaxNestedScalarReductionIC("max-nested-scalar-reduction-interleave", cl::init(2), cl::Hidden, cl::desc("The maximum interleave count to use when interleaving a scalar ""reduction in a nested loop."))
static
cl::opt<unsigned> NumberOfStoresToPredicate("vectorize-num-stores-pred", cl::init(1), cl::Hidden, cl::desc("Max number of stores to be predicated behind an if."))
static

The number of stores in a loop that are allowed to need predication.

cl::opt<unsigned> SmallLoopCost("small-loop-cost", cl::init(20), cl::Hidden, cl::desc("The cost of a loop that is considered 'small' by the interleaver."))
static
const unsigned TinyTripCountInterleaveThreshold = 128
static

We don't interleave loops with a known constant trip count below this number.

Definition at line 153 of file LoopVectorize.cpp.

cl::opt<unsigned> TinyTripCountVectorThreshold("vectorizer-min-trip-count", cl::init(16), cl::Hidden, cl::desc("Don't vectorize loops with a constant ""trip count that is smaller than this ""value."))
static

We don't vectorize loops with a known constant trip count below this number.