LLVM  4.0.0
Namespaces | Macros | Functions | Variables
LoopVectorize.cpp File Reference
#include "llvm/Transforms/Vectorize/LoopVectorize.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/Hashing.h"
#include "llvm/ADT/MapVector.h"
#include "llvm/ADT/SCCIterator.h"
#include "llvm/ADT/SetVector.h"
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/SmallSet.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/ADT/StringExtras.h"
#include "llvm/Analysis/CodeMetrics.h"
#include "llvm/Analysis/GlobalsModRef.h"
#include "llvm/Analysis/LoopInfo.h"
#include "llvm/Analysis/LoopIterator.h"
#include "llvm/Analysis/LoopPass.h"
#include "llvm/Analysis/ScalarEvolutionExpander.h"
#include "llvm/Analysis/ScalarEvolutionExpressions.h"
#include "llvm/Analysis/ValueTracking.h"
#include "llvm/Analysis/VectorUtils.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/DebugInfo.h"
#include "llvm/IR/DerivedTypes.h"
#include "llvm/IR/DiagnosticInfo.h"
#include "llvm/IR/Dominators.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/LLVMContext.h"
#include "llvm/IR/Module.h"
#include "llvm/IR/PatternMatch.h"
#include "llvm/IR/Type.h"
#include "llvm/IR/User.h"
#include "llvm/IR/Value.h"
#include "llvm/IR/ValueHandle.h"
#include "llvm/IR/Verifier.h"
#include "llvm/Pass.h"
#include "llvm/Support/BranchProbability.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Transforms/Scalar.h"
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
#include "llvm/Transforms/Utils/Local.h"
#include "llvm/Transforms/Utils/LoopUtils.h"
#include "llvm/Transforms/Utils/LoopVersioning.h"
#include "llvm/Transforms/Vectorize.h"
#include <algorithm>
#include <map>
#include <tuple>

Go to the source code of this file.

Namespaces

 llvm
 Compute iterated dominance frontiers using a linear time algorithm.
 

Macros

#define LV_NAME   "loop-vectorize"
 
#define DEBUG_TYPE   LV_NAME
 

Functions

 STATISTIC (LoopsVectorized,"Number of loops vectorized")
 
 STATISTIC (LoopsAnalyzed,"Number of loops analyzed for vectorization")
 
static OptimizationRemarkAnalysis createMissedAnalysis (const char *PassName, StringRef RemarkName, Loop *TheLoop, Instruction *I=nullptr)
 Create an analysis remark that explains why vectorization failed. More...
 
static ConstantgetInterleavedMask (IRBuilder<> &Builder, unsigned VF, unsigned NumVec)
 
static ConstantgetStridedMask (IRBuilder<> &Builder, unsigned Start, unsigned Stride, unsigned VF)
 
static ConstantgetSequentialMask (IRBuilder<> &Builder, unsigned NumInt, unsigned NumUndef)
 
static ValueConcatenateTwoVectors (IRBuilder<> &Builder, Value *V1, Value *V2)
 
static ValueConcatenateVectors (IRBuilder<> &Builder, ArrayRef< Value * > InputList)
 
static void cse (BasicBlock *BB)
 Perform cse of induction variable instructions. More...
 
static ValueaddFastMathFlag (Value *V)
 Adds a 'fast' flag to floating point operations. More...
 
static unsigned getScalarizationOverhead (Type *Ty, bool Insert, bool Extract, const TargetTransformInfo &TTI)
 Estimate the overhead of scalarizing a value based on its type. More...
 
static unsigned getScalarizationOverhead (SmallVectorImpl< Type * > &OpTys, Type *RetTy, const TargetTransformInfo &TTI)
 Estimate the overhead of scalarizing an Instruction based on the types of its operands and return value. More...
 
static unsigned getScalarizationOverhead (Instruction *I, unsigned VF, const TargetTransformInfo &TTI)
 Estimate the overhead of scalarizing an instruction. More...
 
static unsigned getVectorCallCost (CallInst *CI, unsigned VF, const TargetTransformInfo &TTI, const TargetLibraryInfo *TLI, bool &NeedToScalarize)
 
static unsigned getVectorIntrinsicCost (CallInst *CI, unsigned VF, const TargetTransformInfo &TTI, const TargetLibraryInfo *TLI)
 
static TypesmallestIntegerVectorType (Type *T1, Type *T2)
 
static TypelargestIntegerVectorType (Type *T1, Type *T2)
 
static bool mayDivideByZero (Instruction &I)
 A helper function for checking whether an integer division-related instruction may divide by zero (in which case it must be predicated if executed conditionally in the scalar code). More...
 
static bool canIfConvertPHINodes (BasicBlock *BB)
 Check whether it is safe to if-convert this phi node. More...
 
static TypeconvertPointerToIntegerType (const DataLayout &DL, Type *Ty)
 
static TypegetWiderType (const DataLayout &DL, Type *Ty0, Type *Ty1)
 
static bool hasOutsideLoopUser (const Loop *TheLoop, Instruction *Inst, SmallPtrSetImpl< Value * > &AllowedExit)
 Check that the instruction has outside loop users and is not an identified reduction variable. More...
 
static const SCEVgetAddressAccessSCEV (Value *Ptr, LoopVectorizationLegality *Legal, ScalarEvolution *SE, const Loop *TheLoop)
 Gets Address Access SCEV after verifying that the access pattern is loop invariant except the induction variable dependence. More...
 
static bool isStrideMul (Instruction *I, LoopVectorizationLegality *Legal)
 
Passllvm::createLoopVectorizePass (bool NoUnrolling=false, bool AlwaysVectorize=true)
 
static void AddRuntimeUnrollDisableMetaData (Loop *L)
 

Variables

static cl::opt< boolEnableIfConversion ("enable-if-conversion", cl::init(true), cl::Hidden, cl::desc("Enable if-conversion during vectorization."))
 
static cl::opt< unsignedTinyTripCountVectorThreshold ("vectorizer-min-trip-count", cl::init(16), cl::Hidden, cl::desc("Don't vectorize loops with a constant ""trip count that is smaller than this ""value."))
 We don't vectorize loops with a known constant trip count below this number. More...
 
static cl::opt< boolMaximizeBandwidth ("vectorizer-maximize-bandwidth", cl::init(false), cl::Hidden, cl::desc("Maximize bandwidth when selecting vectorization factor which ""will be determined by the smallest type in loop."))
 
static cl::opt< boolEnableInterleavedMemAccesses ("enable-interleaved-mem-accesses", cl::init(false), cl::Hidden, cl::desc("Enable vectorization on interleaved memory accesses in a loop"))
 
static cl::opt< unsignedMaxInterleaveGroupFactor ("max-interleave-group-factor", cl::Hidden, cl::desc("Maximum factor for an interleaved access group (default = 8)"), cl::init(8))
 Maximum factor for an interleaved memory access. More...
 
static const unsigned TinyTripCountInterleaveThreshold = 128
 We don't interleave loops with a known constant trip count below this number. More...
 
static cl::opt< unsignedForceTargetNumScalarRegs ("force-target-num-scalar-regs", cl::init(0), cl::Hidden, cl::desc("A flag that overrides the target's number of scalar registers."))
 
static cl::opt< unsignedForceTargetNumVectorRegs ("force-target-num-vector-regs", cl::init(0), cl::Hidden, cl::desc("A flag that overrides the target's number of vector registers."))
 
static const unsigned MaxInterleaveFactor = 16
 Maximum vectorization interleave count. More...
 
static cl::opt< unsignedForceTargetMaxScalarInterleaveFactor ("force-target-max-scalar-interleave", cl::init(0), cl::Hidden, cl::desc("A flag that overrides the target's max interleave factor for ""scalar loops."))
 
static cl::opt< unsignedForceTargetMaxVectorInterleaveFactor ("force-target-max-vector-interleave", cl::init(0), cl::Hidden, cl::desc("A flag that overrides the target's max interleave factor for ""vectorized loops."))
 
static cl::opt< unsignedForceTargetInstructionCost ("force-target-instruction-cost", cl::init(0), cl::Hidden, cl::desc("A flag that overrides the target's expected cost for ""an instruction to a single constant value. Mostly ""useful for getting consistent testing."))
 
static cl::opt< unsignedSmallLoopCost ("small-loop-cost", cl::init(20), cl::Hidden, cl::desc("The cost of a loop that is considered 'small' by the interleaver."))
 
static cl::opt< boolLoopVectorizeWithBlockFrequency ("loop-vectorize-with-block-frequency", cl::init(false), cl::Hidden, cl::desc("Enable the use of the block frequency analysis to access PGO ""heuristics minimizing code growth in cold regions and being more ""aggressive in hot regions."))
 
static cl::opt< boolEnableLoadStoreRuntimeInterleave ("enable-loadstore-runtime-interleave", cl::init(true), cl::Hidden, cl::desc("Enable runtime interleaving until load/store ports are saturated"))
 
static cl::opt< unsignedNumberOfStoresToPredicate ("vectorize-num-stores-pred", cl::init(1), cl::Hidden, cl::desc("Max number of stores to be predicated behind an if."))
 The number of stores in a loop that are allowed to need predication. More...
 
static cl::opt< boolEnableIndVarRegisterHeur ("enable-ind-var-reg-heur", cl::init(true), cl::Hidden, cl::desc("Count the induction variable only once when interleaving"))
 
static cl::opt< boolEnableCondStoresVectorization ("enable-cond-stores-vec", cl::init(true), cl::Hidden, cl::desc("Enable if predication of stores during vectorization."))
 
static cl::opt< unsignedMaxNestedScalarReductionIC ("max-nested-scalar-reduction-interleave", cl::init(2), cl::Hidden, cl::desc("The maximum interleave count to use when interleaving a scalar ""reduction in a nested loop."))
 
static cl::opt< unsignedPragmaVectorizeMemoryCheckThreshold ("pragma-vectorize-memory-check-threshold", cl::init(128), cl::Hidden, cl::desc("The maximum allowed number of runtime memory checks with a ""vectorize(enable) pragma."))
 
static cl::opt< unsignedVectorizeSCEVCheckThreshold ("vectorize-scev-check-threshold", cl::init(16), cl::Hidden, cl::desc("The maximum number of SCEV checks allowed."))
 
static cl::opt< unsignedPragmaVectorizeSCEVCheckThreshold ("pragma-vectorize-scev-check-threshold", cl::init(128), cl::Hidden, cl::desc("The maximum number of SCEV checks allowed with a ""vectorize(enable) pragma"))
 
static const char lv_name [] = "Loop Vectorization"
 

Macro Definition Documentation

#define DEBUG_TYPE   LV_NAME

Definition at line 106 of file LoopVectorize.cpp.

#define LV_NAME   "loop-vectorize"

Definition at line 105 of file LoopVectorize.cpp.

Referenced by llvm::LoopVectorizePass::processLoop().

Function Documentation

static Value* addFastMathFlag ( Value V)
static

Adds a 'fast' flag to floating point operations.

Definition at line 3593 of file LoopVectorize.cpp.

References fuzzer::Flags, and llvm::FastMathFlags::setUnsafeAlgebra().

static void AddRuntimeUnrollDisableMetaData ( Loop L)
static
static bool canIfConvertPHINodes ( BasicBlock BB)
static

Check whether it is safe to if-convert this phi node.

Phi nodes with constant expressions that can trap are not safe to if convert.

Definition at line 4995 of file LoopVectorize.cpp.

References llvm::dyn_cast(), I, and llvm::PHINode::incoming_values().

static Value* ConcatenateTwoVectors ( IRBuilder<> &  Builder,
Value V1,
Value V2 
)
static
static Value* ConcatenateVectors ( IRBuilder<> &  Builder,
ArrayRef< Value * >  InputList 
)
static
static Type* convertPointerToIntegerType ( const DataLayout DL,
Type Ty 
)
static
static OptimizationRemarkAnalysis createMissedAnalysis ( const char *  PassName,
StringRef  RemarkName,
Loop TheLoop,
Instruction I = nullptr 
)
static

Create an analysis remark that explains why vectorization failed.

PassName is the name of the pass (e.g. can be AlwaysPrint). RemarkName is the identifier for the remark. If I is passed it is an instruction that prevents vectorization. Otherwise TheLoop is used for the location of the remark.

Returns
the remark object that can be streamed to.

Definition at line 225 of file LoopVectorize.cpp.

References llvm::LoopBase< N, M >::getHeader(), llvm::Loop::getStartLoc(), and I.

Referenced by llvm::LoopVectorizePass::processLoop().

static void cse ( BasicBlock BB)
static
static const SCEV* getAddressAccessSCEV ( Value Ptr,
LoopVectorizationLegality *  Legal,
ScalarEvolution SE,
const Loop TheLoop 
)
static

Gets Address Access SCEV after verifying that the access pattern is loop invariant except the induction variable dependence.

This SCEV can be sent to the Target in order to estimate the address calculation cost.

Definition at line 6807 of file LoopVectorize.cpp.

References llvm::dyn_cast(), llvm::User::getNumOperands(), llvm::User::getOperand(), llvm::ScalarEvolution::getSCEV(), i, llvm::ScalarEvolution::isLoopInvariant(), and Ptr.

static Constant* getInterleavedMask ( IRBuilder<> &  Builder,
unsigned  VF,
unsigned  NumVec 
)
static
static unsigned getScalarizationOverhead ( Type Ty,
bool  Insert,
bool  Extract,
const TargetTransformInfo TTI 
)
static

Estimate the overhead of scalarizing a value based on its type.

Insert and Extract are set if the result needs to be inserted and/or extracted from vectors.

Definition at line 3605 of file LoopVectorize.cpp.

References assert(), E, llvm::TargetTransformInfo::getVectorInstrCost(), llvm::Type::getVectorNumElements(), I, llvm::Type::isVectorTy(), and llvm::Type::isVoidTy().

Referenced by getScalarizationOverhead(), and getVectorCallCost().

static unsigned getScalarizationOverhead ( SmallVectorImpl< Type * > &  OpTys,
Type RetTy,
const TargetTransformInfo TTI 
)
static

Estimate the overhead of scalarizing an Instruction based on the types of its operands and return value.

Definition at line 3625 of file LoopVectorize.cpp.

References getScalarizationOverhead().

static unsigned getScalarizationOverhead ( Instruction I,
unsigned  VF,
const TargetTransformInfo TTI 
)
static

Estimate the overhead of scalarizing an instruction.

This is a convenience wrapper for the type-based getScalarizationOverhead API.

Definition at line 3639 of file LoopVectorize.cpp.

References llvm::User::getNumOperands(), llvm::User::getOperand(), getScalarizationOverhead(), and llvm::Value::getType().

static Constant* getSequentialMask ( IRBuilder<> &  Builder,
unsigned  NumInt,
unsigned  NumUndef 
)
static
static Constant* getStridedMask ( IRBuilder<> &  Builder,
unsigned  Start,
unsigned  Stride,
unsigned  VF 
)
static
static unsigned getVectorCallCost ( CallInst CI,
unsigned  VF,
const TargetTransformInfo TTI,
const TargetLibraryInfo TLI,
bool NeedToScalarize 
)
static
static unsigned getVectorIntrinsicCost ( CallInst CI,
unsigned  VF,
const TargetTransformInfo TTI,
const TargetLibraryInfo TLI 
)
static
static Type* getWiderType ( const DataLayout DL,
Type Ty0,
Type Ty1 
)
static
static bool hasOutsideLoopUser ( const Loop TheLoop,
Instruction Inst,
SmallPtrSetImpl< Value * > &  AllowedExit 
)
static

Check that the instruction has outside loop users and is not an identified reduction variable.

Definition at line 5194 of file LoopVectorize.cpp.

References llvm::LoopBase< N, M >::contains(), llvm::SmallPtrSetImpl< PtrType >::count(), llvm::dbgs(), DEBUG, and llvm::Value::users().

static bool isStrideMul ( Instruction I,
LoopVectorizationLegality *  Legal 
)
static

Definition at line 6830 of file LoopVectorize.cpp.

References llvm::User::getOperand().

static Type* largestIntegerVectorType ( Type T1,
Type T2 
)
static

Definition at line 3729 of file LoopVectorize.cpp.

References llvm::Type::getVectorElementType().

static bool mayDivideByZero ( Instruction I)
static

A helper function for checking whether an integer division-related instruction may divide by zero (in which case it must be predicated if executed conditionally in the scalar code).

TODO: It may be worthwhile to generalize and check isKnownNonZero(). Non-zero divisors that are non compile-time constants will not be converted into multiplication, so we will still end up scalarizing the division, but can do so w/o predication.

Definition at line 4695 of file LoopVectorize.cpp.

References assert(), llvm::dyn_cast(), llvm::Instruction::getOpcode(), llvm::User::getOperand(), and llvm::ConstantInt::isZero().

static Type* smallestIntegerVectorType ( Type T1,
Type T2 
)
static

Definition at line 3724 of file LoopVectorize.cpp.

References llvm::Type::getVectorElementType().

STATISTIC ( LoopsVectorized  ,
"Number of loops vectorized"   
)
STATISTIC ( LoopsAnalyzed  ,
"Number of loops analyzed for vectorization"   
)

Variable Documentation

cl::opt<bool> EnableCondStoresVectorization("enable-cond-stores-vec", cl::init(true), cl::Hidden, cl::desc("Enable if predication of stores during vectorization."))
static
cl::opt<bool> EnableIfConversion("enable-if-conversion", cl::init(true), cl::Hidden, cl::desc("Enable if-conversion during vectorization."))
static
cl::opt<bool> EnableIndVarRegisterHeur("enable-ind-var-reg-heur", cl::init(true), cl::Hidden, cl::desc("Count the induction variable only once when interleaving"))
static
cl::opt<bool> EnableInterleavedMemAccesses("enable-interleaved-mem-accesses", cl::init(false), cl::Hidden, cl::desc("Enable vectorization on interleaved memory accesses in a loop"))
static
cl::opt<bool> EnableLoadStoreRuntimeInterleave("enable-loadstore-runtime-interleave", cl::init(true), cl::Hidden, cl::desc("Enable runtime interleaving until load/store ports are saturated"))
static
cl::opt<unsigned> ForceTargetInstructionCost("force-target-instruction-cost", cl::init(0), cl::Hidden, cl::desc("A flag that overrides the target's expected cost for ""an instruction to a single constant value. Mostly ""useful for getting consistent testing."))
static
cl::opt<unsigned> ForceTargetMaxScalarInterleaveFactor("force-target-max-scalar-interleave", cl::init(0), cl::Hidden, cl::desc("A flag that overrides the target's max interleave factor for ""scalar loops."))
static
cl::opt<unsigned> ForceTargetMaxVectorInterleaveFactor("force-target-max-vector-interleave", cl::init(0), cl::Hidden, cl::desc("A flag that overrides the target's max interleave factor for ""vectorized loops."))
static
cl::opt<unsigned> ForceTargetNumScalarRegs("force-target-num-scalar-regs", cl::init(0), cl::Hidden, cl::desc("A flag that overrides the target's number of scalar registers."))
static
cl::opt<unsigned> ForceTargetNumVectorRegs("force-target-num-vector-regs", cl::init(0), cl::Hidden, cl::desc("A flag that overrides the target's number of vector registers."))
static
cl::opt<bool> LoopVectorizeWithBlockFrequency("loop-vectorize-with-block-frequency", cl::init(false), cl::Hidden, cl::desc("Enable the use of the block frequency analysis to access PGO ""heuristics minimizing code growth in cold regions and being more ""aggressive in hot regions."))
static
const char lv_name[] = "Loop Vectorization"
static

Definition at line 7165 of file LoopVectorize.cpp.

cl::opt<bool> MaximizeBandwidth("vectorizer-maximize-bandwidth", cl::init(false), cl::Hidden, cl::desc("Maximize bandwidth when selecting vectorization factor which ""will be determined by the smallest type in loop."))
static
const unsigned MaxInterleaveFactor = 16
static

Maximum vectorization interleave count.

Definition at line 150 of file LoopVectorize.cpp.

cl::opt<unsigned> MaxInterleaveGroupFactor("max-interleave-group-factor", cl::Hidden, cl::desc("Maximum factor for an interleaved access group (default = 8)"), cl::init(8))
static

Maximum factor for an interleaved memory access.

cl::opt<unsigned> MaxNestedScalarReductionIC("max-nested-scalar-reduction-interleave", cl::init(2), cl::Hidden, cl::desc("The maximum interleave count to use when interleaving a scalar ""reduction in a nested loop."))
static
cl::opt<unsigned> NumberOfStoresToPredicate("vectorize-num-stores-pred", cl::init(1), cl::Hidden, cl::desc("Max number of stores to be predicated behind an if."))
static

The number of stores in a loop that are allowed to need predication.

cl::opt<unsigned> PragmaVectorizeMemoryCheckThreshold("pragma-vectorize-memory-check-threshold", cl::init(128), cl::Hidden, cl::desc("The maximum allowed number of runtime memory checks with a ""vectorize(enable) pragma."))
static
cl::opt<unsigned> PragmaVectorizeSCEVCheckThreshold("pragma-vectorize-scev-check-threshold", cl::init(128), cl::Hidden, cl::desc("The maximum number of SCEV checks allowed with a ""vectorize(enable) pragma"))
static
cl::opt<unsigned> SmallLoopCost("small-loop-cost", cl::init(20), cl::Hidden, cl::desc("The cost of a loop that is considered 'small' by the interleaver."))
static
const unsigned TinyTripCountInterleaveThreshold = 128
static

We don't interleave loops with a known constant trip count below this number.

Definition at line 139 of file LoopVectorize.cpp.

cl::opt<unsigned> TinyTripCountVectorThreshold("vectorizer-min-trip-count", cl::init(16), cl::Hidden, cl::desc("Don't vectorize loops with a constant ""trip count that is smaller than this ""value."))
static

We don't vectorize loops with a known constant trip count below this number.

Referenced by llvm::LoopVectorizePass::processLoop().

cl::opt<unsigned> VectorizeSCEVCheckThreshold("vectorize-scev-check-threshold", cl::init(16), cl::Hidden, cl::desc("The maximum number of SCEV checks allowed."))
static