LLVM  mainline
Namespaces | Defines | Functions | Variables
LoopVectorize.cpp File Reference
#include "llvm/Transforms/Vectorize.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/EquivalenceClasses.h"
#include "llvm/ADT/Hashing.h"
#include "llvm/ADT/MapVector.h"
#include "llvm/ADT/SetVector.h"
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/SmallSet.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/ADT/StringExtras.h"
#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/Analysis/AliasSetTracker.h"
#include "llvm/Analysis/AssumptionCache.h"
#include "llvm/Analysis/BlockFrequencyInfo.h"
#include "llvm/Analysis/CodeMetrics.h"
#include "llvm/Analysis/LoopAccessAnalysis.h"
#include "llvm/Analysis/LoopInfo.h"
#include "llvm/Analysis/LoopIterator.h"
#include "llvm/Analysis/LoopPass.h"
#include "llvm/Analysis/ScalarEvolution.h"
#include "llvm/Analysis/ScalarEvolutionExpander.h"
#include "llvm/Analysis/ScalarEvolutionExpressions.h"
#include "llvm/Analysis/TargetTransformInfo.h"
#include "llvm/Analysis/ValueTracking.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/DebugInfo.h"
#include "llvm/IR/DerivedTypes.h"
#include "llvm/IR/DiagnosticInfo.h"
#include "llvm/IR/Dominators.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/LLVMContext.h"
#include "llvm/IR/Module.h"
#include "llvm/IR/PatternMatch.h"
#include "llvm/IR/Type.h"
#include "llvm/IR/Value.h"
#include "llvm/IR/ValueHandle.h"
#include "llvm/IR/Verifier.h"
#include "llvm/Pass.h"
#include "llvm/Support/BranchProbability.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Transforms/Scalar.h"
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
#include "llvm/Transforms/Utils/Local.h"
#include "llvm/Transforms/Utils/VectorUtils.h"
#include "llvm/Transforms/Utils/LoopUtils.h"
#include <algorithm>
#include <map>
#include <tuple>

Go to the source code of this file.

Namespaces

namespace  llvm
 

Compute iterated dominance frontiers using a linear time algorithm.


Defines

#define LV_NAME   "loop-vectorize"
#define DEBUG_TYPE   LV_NAME

Functions

 STATISTIC (LoopsVectorized,"Number of loops vectorized")
 STATISTIC (LoopsAnalyzed,"Number of loops analyzed for vectorization")
static unsigned getGEPInductionOperand (const GetElementPtrInst *Gep)
 Find the operand of the GEP that should be checked for consecutive stores.
static ConstantgetInterleavedMask (IRBuilder<> &Builder, unsigned VF, unsigned NumVec)
static ConstantgetStridedMask (IRBuilder<> &Builder, unsigned Start, unsigned Stride, unsigned VF)
static ConstantgetSequentialMask (IRBuilder<> &Builder, unsigned NumInt, unsigned NumUndef)
static ValueConcatenateTwoVectors (IRBuilder<> &Builder, Value *V1, Value *V2)
static ValueConcatenateVectors (IRBuilder<> &Builder, ArrayRef< Value * > InputList)
static InstructiongetFirstInst (Instruction *FirstInst, Value *V, Instruction *Loc)
static bool isPredicatedBlock (unsigned BlockNum)
 Check whether this block is a predicated block.
static void cse (SmallVector< BasicBlock *, 4 > &BBs)
 Perform cse of induction variable instructions.
static ValueaddFastMathFlag (Value *V)
 Adds a 'fast' flag to floating point operations.
static unsigned getScalarizationOverhead (Type *Ty, bool Insert, bool Extract, const TargetTransformInfo &TTI)
 Estimate the overhead of scalarizing a value.
static unsigned getVectorCallCost (CallInst *CI, unsigned VF, const TargetTransformInfo &TTI, const TargetLibraryInfo *TLI, bool &NeedToScalarize)
static unsigned getVectorIntrinsicCost (CallInst *CI, unsigned VF, const TargetTransformInfo &TTI, const TargetLibraryInfo *TLI)
static bool canIfConvertPHINodes (BasicBlock *BB)
 Check whether it is safe to if-convert this phi node.
static TypeconvertPointerToIntegerType (const DataLayout &DL, Type *Ty)
static TypegetWiderType (const DataLayout &DL, Type *Ty0, Type *Ty1)
static bool hasOutsideLoopUser (const Loop *TheLoop, Instruction *Inst, SmallPtrSetImpl< Value * > &Reductions)
 Check that the instruction has outside loop users and is not an identified reduction variable.
static ValuestripGetElementPtr (Value *Ptr, ScalarEvolution *SE, Loop *Lp)
 Remove GEPs whose indices but the last one are loop invariant and return the induction operand of the gep pointer.
static ValuegetUniqueCastUse (Value *Ptr, Loop *Lp, Type *Ty)
 Look for a cast use of the passed value.
static ValuegetStrideFromPointer (Value *Ptr, ScalarEvolution *SE, Loop *Lp)
 Get the stride of a pointer access in a loop.
static bool isLikelyComplexAddressComputation (Value *Ptr, LoopVectorizationLegality *Legal, ScalarEvolution *SE, const Loop *TheLoop)
 Check whether the address computation for a non-consecutive memory access looks like an unlikely candidate for being merged into the indexing mode.
static bool isStrideMul (Instruction *I, LoopVectorizationLegality *Legal)
Passllvm::createLoopVectorizePass (bool NoUnrolling=false, bool AlwaysVectorize=true)

Variables

static cl::opt< boolEnableIfConversion ("enable-if-conversion", cl::init(true), cl::Hidden, cl::desc("Enable if-conversion during vectorization."))
static cl::opt< unsignedTinyTripCountVectorThreshold ("vectorizer-min-trip-count", cl::init(16), cl::Hidden, cl::desc("Don't vectorize loops with a constant ""trip count that is smaller than this ""value."))
 We don't vectorize loops with a known constant trip count below this number.
static cl::opt< boolEnableMemAccessVersioning ("enable-mem-access-versioning", cl::init(true), cl::Hidden, cl::desc("Enable symblic stride memory access versioning"))
 This enables versioning on the strides of symbolically striding memory accesses in code like the following.
static cl::opt< boolEnableInterleavedMemAccesses ("enable-interleaved-mem-accesses", cl::init(false), cl::Hidden, cl::desc("Enable vectorization on interleaved memory accesses in a loop"))
static cl::opt< unsignedMaxInterleaveGroupFactor ("max-interleave-group-factor", cl::Hidden, cl::desc("Maximum factor for an interleaved access group (default = 8)"), cl::init(8))
 Maximum factor for an interleaved memory access.
static const unsigned TinyTripCountUnrollThreshold = 128
 We don't unroll loops with a known constant trip count below this number.
static cl::opt< unsignedForceTargetNumScalarRegs ("force-target-num-scalar-regs", cl::init(0), cl::Hidden, cl::desc("A flag that overrides the target's number of scalar registers."))
static cl::opt< unsignedForceTargetNumVectorRegs ("force-target-num-vector-regs", cl::init(0), cl::Hidden, cl::desc("A flag that overrides the target's number of vector registers."))
static const unsigned MaxInterleaveFactor = 16
 Maximum vectorization interleave count.
static cl::opt< unsignedForceTargetMaxScalarInterleaveFactor ("force-target-max-scalar-interleave", cl::init(0), cl::Hidden, cl::desc("A flag that overrides the target's max interleave factor for ""scalar loops."))
static cl::opt< unsignedForceTargetMaxVectorInterleaveFactor ("force-target-max-vector-interleave", cl::init(0), cl::Hidden, cl::desc("A flag that overrides the target's max interleave factor for ""vectorized loops."))
static cl::opt< unsignedForceTargetInstructionCost ("force-target-instruction-cost", cl::init(0), cl::Hidden, cl::desc("A flag that overrides the target's expected cost for ""an instruction to a single constant value. Mostly ""useful for getting consistent testing."))
static cl::opt< unsignedSmallLoopCost ("small-loop-cost", cl::init(20), cl::Hidden, cl::desc("The cost of a loop that is considered 'small' by the unroller."))
static cl::opt< boolLoopVectorizeWithBlockFrequency ("loop-vectorize-with-block-frequency", cl::init(false), cl::Hidden, cl::desc("Enable the use of the block frequency analysis to access PGO ""heuristics minimizing code growth in cold regions and being more ""aggressive in hot regions."))
static cl::opt< boolEnableLoadStoreRuntimeUnroll ("enable-loadstore-runtime-unroll", cl::init(true), cl::Hidden, cl::desc("Enable runtime unrolling until load/store ports are saturated"))
static cl::opt< unsignedNumberOfStoresToPredicate ("vectorize-num-stores-pred", cl::init(1), cl::Hidden, cl::desc("Max number of stores to be predicated behind an if."))
 The number of stores in a loop that are allowed to need predication.
static cl::opt< boolEnableIndVarRegisterHeur ("enable-ind-var-reg-heur", cl::init(true), cl::Hidden, cl::desc("Count the induction variable only once when unrolling"))
static cl::opt< boolEnableCondStoresVectorization ("enable-cond-stores-vec", cl::init(false), cl::Hidden, cl::desc("Enable if predication of stores during vectorization."))
static cl::opt< unsignedMaxNestedScalarReductionUF ("max-nested-scalar-reduction-unroll", cl::init(2), cl::Hidden, cl::desc("The maximum unroll factor to use when unrolling a scalar ""reduction in a nested loop."))
static const char lv_name [] = "Loop Vectorization"

Define Documentation

#define DEBUG_TYPE   LV_NAME

Definition at line 109 of file LoopVectorize.cpp.

#define LV_NAME   "loop-vectorize"

Definition at line 108 of file LoopVectorize.cpp.


Function Documentation

static Value* addFastMathFlag ( Value V) [static]

Adds a 'fast' flag to floating point operations.

Definition at line 2951 of file LoopVectorize.cpp.

References llvm::FastMathFlags::setUnsafeAlgebra().

static bool canIfConvertPHINodes ( BasicBlock BB) [static]

Check whether it is safe to if-convert this phi node.

Phi nodes with constant expressions that can trap are not safe to if convert.

Definition at line 3751 of file LoopVectorize.cpp.

References llvm::BasicBlock::begin(), llvm::dyn_cast(), llvm::BasicBlock::end(), llvm::PHINode::getIncomingValue(), llvm::PHINode::getNumIncomingValues(), and I.

static Value* ConcatenateTwoVectors ( IRBuilder<> &  Builder,
Value V1,
Value V2 
) [static]
static Value* ConcatenateVectors ( IRBuilder<> &  Builder,
ArrayRef< Value * >  InputList 
) [static]
static Type* convertPointerToIntegerType ( const DataLayout DL,
Type Ty 
) [static]
static void cse ( SmallVector< BasicBlock *, 4 > &  BBs) [static]
static Instruction* getFirstInst ( Instruction FirstInst,
Value V,
Instruction Loc 
) [static]

Definition at line 2425 of file LoopVectorize.cpp.

References llvm::Instruction::getParent().

Find the operand of the GEP that should be checked for consecutive stores.

This ignores trailing indices that have no effect on the final pointer.

Definition at line 1764 of file LoopVectorize.cpp.

References advance(), llvm::DL, llvm::gep_type_begin(), llvm::Module::getDataLayout(), llvm::Instruction::getModule(), llvm::User::getNumOperands(), llvm::User::getOperand(), llvm::Type::getScalarType(), llvm::GetElementPtrInst::getType(), llvm::DataLayout::getTypeAllocSize(), llvm::PatternMatch::m_Zero(), and llvm::PatternMatch::match().

Referenced by stripGetElementPtr().

static Constant* getInterleavedMask ( IRBuilder<> &  Builder,
unsigned  VF,
unsigned  NumVec 
) [static]
static unsigned getScalarizationOverhead ( Type Ty,
bool  Insert,
bool  Extract,
const TargetTransformInfo TTI 
) [static]

Estimate the overhead of scalarizing a value.

Insert and Extract are set if the result needs to be inserted and/or extracted from vectors.

Definition at line 2962 of file LoopVectorize.cpp.

References ExtractElement(), llvm::TargetTransformInfo::getVectorInstrCost(), llvm::Type::getVectorNumElements(), llvm::InsertElement, llvm::Type::isVectorTy(), and llvm::Type::isVoidTy().

Referenced by getVectorCallCost().

static Constant* getSequentialMask ( IRBuilder<> &  Builder,
unsigned  NumInt,
unsigned  NumUndef 
) [static]
static Constant* getStridedMask ( IRBuilder<> &  Builder,
unsigned  Start,
unsigned  Stride,
unsigned  VF 
) [static]
static Value* getStrideFromPointer ( Value Ptr,
ScalarEvolution SE,
Loop Lp 
) [static]
static Value* getUniqueCastUse ( Value Ptr,
Loop Lp,
Type Ty 
) [static]

Look for a cast use of the passed value.

Definition at line 4151 of file LoopVectorize.cpp.

References llvm::dyn_cast(), llvm::Value::getType(), and llvm::Value::users().

Referenced by getStrideFromPointer().

static unsigned getVectorCallCost ( CallInst CI,
unsigned  VF,
const TargetTransformInfo TTI,
const TargetLibraryInfo TLI,
bool NeedToScalarize 
) [static]
static unsigned getVectorIntrinsicCost ( CallInst CI,
unsigned  VF,
const TargetTransformInfo TTI,
const TargetLibraryInfo TLI 
) [static]
static Type* getWiderType ( const DataLayout DL,
Type Ty0,
Type Ty1 
) [static]
static bool hasOutsideLoopUser ( const Loop TheLoop,
Instruction Inst,
SmallPtrSetImpl< Value * > &  Reductions 
) [static]

Check that the instruction has outside loop users and is not an identified reduction variable.

Definition at line 3936 of file LoopVectorize.cpp.

References llvm::LoopBase< BlockT, LoopT >::contains(), llvm::SmallPtrSetImpl< PtrType >::count(), llvm::dbgs(), DEBUG, and llvm::Value::users().

static bool isLikelyComplexAddressComputation ( Value Ptr,
LoopVectorizationLegality *  Legal,
ScalarEvolution SE,
const Loop TheLoop 
) [static]

Check whether the address computation for a non-consecutive memory access looks like an unlikely candidate for being merged into the indexing mode.

We look for a GEP which has one index that is an induction variable and all other indices are loop invariant. If the stride of this access is also within a small bound we decide that this address computation can likely be merged into the addressing mode. In all other cases, we identify the address computation as complex.

Definition at line 5073 of file LoopVectorize.cpp.

References llvm::dyn_cast(), llvm::APInt::getBitWidth(), llvm::User::getNumOperands(), llvm::User::getOperand(), llvm::ScalarEvolution::getSCEV(), llvm::APInt::getSExtValue(), llvm::SCEVAddRecExpr::getStepRecurrence(), llvm::SCEVConstant::getValue(), llvm::ConstantInt::getValue(), and llvm::ScalarEvolution::isLoopInvariant().

static bool isPredicatedBlock ( unsigned  BlockNum) [static]

Check whether this block is a predicated block.

Due to if predication of stores we might create a sequence of "if(pred) a[i] = ...; " blocks. We start with one vectorized basic block. For every conditional block we split this vectorized block. Therefore, every second block will be a predicated one.

Definition at line 2916 of file LoopVectorize.cpp.

Referenced by cse().

static bool isStrideMul ( Instruction I,
LoopVectorizationLegality *  Legal 
) [static]

Definition at line 5117 of file LoopVectorize.cpp.

References llvm::User::getOperand().

STATISTIC ( LoopsVectorized  ,
"Number of loops vectorized"   
)
STATISTIC ( LoopsAnalyzed  ,
"Number of loops analyzed for vectorization"   
)
static Value* stripGetElementPtr ( Value Ptr,
ScalarEvolution SE,
Loop Lp 
) [static]

Remove GEPs whose indices but the last one are loop invariant and return the induction operand of the gep pointer.

Definition at line 4134 of file LoopVectorize.cpp.

References llvm::dyn_cast(), getGEPInductionOperand(), llvm::User::getNumOperands(), llvm::User::getOperand(), llvm::ScalarEvolution::getSCEV(), and llvm::ScalarEvolution::isLoopInvariant().

Referenced by getStrideFromPointer().


Variable Documentation

cl::opt<bool> EnableCondStoresVectorization("enable-cond-stores-vec", cl::init(false), cl::Hidden, cl::desc("Enable if predication of stores during vectorization.")) [static]
cl::opt<bool> EnableIfConversion("enable-if-conversion", cl::init(true), cl::Hidden, cl::desc("Enable if-conversion during vectorization.")) [static]
cl::opt<bool> EnableIndVarRegisterHeur("enable-ind-var-reg-heur", cl::init(true), cl::Hidden, cl::desc("Count the induction variable only once when unrolling")) [static]
cl::opt<bool> EnableInterleavedMemAccesses("enable-interleaved-mem-accesses", cl::init(false), cl::Hidden, cl::desc("Enable vectorization on interleaved memory accesses in a loop")) [static]
cl::opt<bool> EnableLoadStoreRuntimeUnroll("enable-loadstore-runtime-unroll", cl::init(true), cl::Hidden, cl::desc("Enable runtime unrolling until load/store ports are saturated")) [static]
cl::opt<bool> EnableMemAccessVersioning("enable-mem-access-versioning", cl::init(true), cl::Hidden, cl::desc("Enable symblic stride memory access versioning")) [static]

This enables versioning on the strides of symbolically striding memory accesses in code like the following.

for (i = 0; i < N; ++i) A[i * Stride1] += B[i * Stride2] ...

Will be roughly translated to if (Stride1 == 1 && Stride2 == 1) { for (i = 0; i < N; i+=4) A[i:i+3] += ... } else ...

cl::opt<unsigned> ForceTargetInstructionCost("force-target-instruction-cost", cl::init(0), cl::Hidden, cl::desc("A flag that overrides the target's expected cost for ""an instruction to a single constant value. Mostly ""useful for getting consistent testing.")) [static]
cl::opt<unsigned> ForceTargetMaxScalarInterleaveFactor("force-target-max-scalar-interleave", cl::init(0), cl::Hidden, cl::desc("A flag that overrides the target's max interleave factor for ""scalar loops.")) [static]
cl::opt<unsigned> ForceTargetMaxVectorInterleaveFactor("force-target-max-vector-interleave", cl::init(0), cl::Hidden, cl::desc("A flag that overrides the target's max interleave factor for ""vectorized loops.")) [static]
cl::opt<unsigned> ForceTargetNumScalarRegs("force-target-num-scalar-regs", cl::init(0), cl::Hidden, cl::desc("A flag that overrides the target's number of scalar registers.")) [static]
cl::opt<unsigned> ForceTargetNumVectorRegs("force-target-num-vector-regs", cl::init(0), cl::Hidden, cl::desc("A flag that overrides the target's number of vector registers.")) [static]
cl::opt<bool> LoopVectorizeWithBlockFrequency("loop-vectorize-with-block-frequency", cl::init(false), cl::Hidden, cl::desc("Enable the use of the block frequency analysis to access PGO ""heuristics minimizing code growth in cold regions and being more ""aggressive in hot regions.")) [static]
const char lv_name[] = "Loop Vectorization" [static]

Definition at line 5374 of file LoopVectorize.cpp.

Maximum vectorization interleave count.

Definition at line 163 of file LoopVectorize.cpp.

cl::opt<unsigned> MaxInterleaveGroupFactor("max-interleave-group-factor", cl::Hidden, cl::desc("Maximum factor for an interleaved access group (default = 8)"), cl::init(8)) [static]

Maximum factor for an interleaved memory access.

cl::opt<unsigned> MaxNestedScalarReductionUF("max-nested-scalar-reduction-unroll", cl::init(2), cl::Hidden, cl::desc("The maximum unroll factor to use when unrolling a scalar ""reduction in a nested loop.")) [static]
cl::opt<unsigned> NumberOfStoresToPredicate("vectorize-num-stores-pred", cl::init(1), cl::Hidden, cl::desc("Max number of stores to be predicated behind an if.")) [static]

The number of stores in a loop that are allowed to need predication.

cl::opt<unsigned> SmallLoopCost("small-loop-cost", cl::init(20), cl::Hidden, cl::desc("The cost of a loop that is considered 'small' by the unroller.")) [static]

We don't unroll loops with a known constant trip count below this number.

Definition at line 152 of file LoopVectorize.cpp.

cl::opt<unsigned> TinyTripCountVectorThreshold("vectorizer-min-trip-count", cl::init(16), cl::Hidden, cl::desc("Don't vectorize loops with a constant ""trip count that is smaller than this ""value.")) [static]

We don't vectorize loops with a known constant trip count below this number.