LLVM API Documentation

Namespaces | Defines | Functions | Variables
LoopVectorize.cpp File Reference
#include "llvm/Transforms/Vectorize.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/EquivalenceClasses.h"
#include "llvm/ADT/Hashing.h"
#include "llvm/ADT/MapVector.h"
#include "llvm/ADT/SetVector.h"
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/SmallSet.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/ADT/StringExtras.h"
#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/Analysis/AliasSetTracker.h"
#include "llvm/Analysis/BlockFrequencyInfo.h"
#include "llvm/Analysis/LoopInfo.h"
#include "llvm/Analysis/LoopIterator.h"
#include "llvm/Analysis/LoopPass.h"
#include "llvm/Analysis/ScalarEvolution.h"
#include "llvm/Analysis/ScalarEvolutionExpander.h"
#include "llvm/Analysis/ScalarEvolutionExpressions.h"
#include "llvm/Analysis/TargetTransformInfo.h"
#include "llvm/Analysis/ValueTracking.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/DebugInfo.h"
#include "llvm/IR/DerivedTypes.h"
#include "llvm/IR/DiagnosticInfo.h"
#include "llvm/IR/Dominators.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/LLVMContext.h"
#include "llvm/IR/Module.h"
#include "llvm/IR/PatternMatch.h"
#include "llvm/IR/Type.h"
#include "llvm/IR/Value.h"
#include "llvm/IR/ValueHandle.h"
#include "llvm/IR/Verifier.h"
#include "llvm/Pass.h"
#include "llvm/Support/BranchProbability.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Transforms/Scalar.h"
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
#include "llvm/Transforms/Utils/Local.h"
#include "llvm/Transforms/Utils/VectorUtils.h"
#include <algorithm>
#include <map>
#include <tuple>

Go to the source code of this file.

Namespaces

namespace  llvm
 

List of target independent CodeGen pass IDs.


Defines

#define LV_NAME   "loop-vectorize"
#define DEBUG_TYPE   LV_NAME

Functions

 STATISTIC (LoopsVectorized,"Number of loops vectorized")
 STATISTIC (LoopsAnalyzed,"Number of loops analyzed for vectorization")
static ValuestripIntegerCast (Value *V)
static const SCEVreplaceSymbolicStrideSCEV (ScalarEvolution *SE, ValueToValueMap &PtrToStride, Value *Ptr, Value *OrigPtr=nullptr)
 Replaces the symbolic stride in a pointer SCEV expression by one.
static unsigned getGEPInductionOperand (const DataLayout *DL, const GetElementPtrInst *Gep)
 Find the operand of the GEP that should be checked for consecutive stores. This ignores trailing indices that have no effect on the final pointer.
static InstructiongetFirstInst (Instruction *FirstInst, Value *V, Instruction *Loc)
static unsigned getReductionBinOp (LoopVectorizationLegality::ReductionKind Kind)
 This function translates the reduction kind to an LLVM binary operator.
ValuecreateMinMaxOp (IRBuilder<> &Builder, LoopVectorizationLegality::MinMaxReductionKind RK, Value *Left, Value *Right)
static bool isPredicatedBlock (unsigned BlockNum)
 Check whether this block is a predicated block. Due to if predication of stores we might create a sequence of "if(pred) a[i] = ...; " blocks. We start with one vectorized basic block. For every conditional block we split this vectorized block. Therefore, every second block will be a predicated one.
static void cse (SmallVector< BasicBlock *, 4 > &BBs)
 Perform cse of induction variable instructions.
static ValueaddFastMathFlag (Value *V)
 Adds a 'fast' flag to floating point operations.
static bool canIfConvertPHINodes (BasicBlock *BB)
 Check whether it is safe to if-convert this phi node.
static TypeconvertPointerToIntegerType (const DataLayout &DL, Type *Ty)
static TypegetWiderType (const DataLayout &DL, Type *Ty0, Type *Ty1)
static bool hasOutsideLoopUser (const Loop *TheLoop, Instruction *Inst, SmallPtrSet< Value *, 4 > &Reductions)
 Check that the instruction has outside loop users and is not an identified reduction variable.
static ValuestripGetElementPtr (Value *Ptr, ScalarEvolution *SE, const DataLayout *DL, Loop *Lp)
 Remove GEPs whose indices but the last one are loop invariant and return the induction operand of the gep pointer.
static ValuegetUniqueCastUse (Value *Ptr, Loop *Lp, Type *Ty)
 Look for a cast use of the passed value.
static ValuegetStrideFromPointer (Value *Ptr, ScalarEvolution *SE, const DataLayout *DL, Loop *Lp)
 Get the stride of a pointer access in a loop. Looks for symbolic strides "a[i*stride]". Returns the symbolic stride as a pointer to the Value, or null otherwise.
static bool hasComputableBounds (ScalarEvolution *SE, ValueToValueMap &Strides, Value *Ptr)
 Check whether a pointer can participate in a runtime bounds check.
static int isStridedPtr (ScalarEvolution *SE, const DataLayout *DL, Value *Ptr, const Loop *Lp, ValueToValueMap &StridesMap)
 Check the stride of the pointer and ensure that it does not wrap in the address space.
static bool isInBoundsGep (Value *Ptr)
static bool hasMultipleUsesOf (Instruction *I, SmallPtrSet< Instruction *, 8 > &Insts)
static bool areAllUsesIn (Instruction *I, SmallPtrSet< Instruction *, 8 > &Set)
static bool isLikelyComplexAddressComputation (Value *Ptr, LoopVectorizationLegality *Legal, ScalarEvolution *SE, const Loop *TheLoop)
 Check whether the address computation for a non-consecutive memory access looks like an unlikely candidate for being merged into the indexing mode.
static bool isStrideMul (Instruction *I, LoopVectorizationLegality *Legal)
Passllvm::createLoopVectorizePass (bool NoUnrolling=false, bool AlwaysVectorize=true)

Variables

static cl::opt< unsignedVectorizationFactor ("force-vector-width", cl::init(0), cl::Hidden, cl::desc("Sets the SIMD width. Zero is autoselect."))
static cl::opt< unsignedVectorizationUnroll ("force-vector-unroll", cl::init(0), cl::Hidden, cl::desc("Sets the vectorization unroll count. ""Zero is autoselect."))
static cl::opt< boolEnableIfConversion ("enable-if-conversion", cl::init(true), cl::Hidden, cl::desc("Enable if-conversion during vectorization."))
static cl::opt< unsignedTinyTripCountVectorThreshold ("vectorizer-min-trip-count", cl::init(16), cl::Hidden, cl::desc("Don't vectorize loops with a constant ""trip count that is smaller than this ""value."))
 We don't vectorize loops with a known constant trip count below this number.
static cl::opt< boolEnableMemAccessVersioning ("enable-mem-access-versioning", cl::init(true), cl::Hidden, cl::desc("Enable symblic stride memory access versioning"))
static const unsigned TinyTripCountUnrollThreshold = 128
 We don't unroll loops with a known constant trip count below this number.
static const unsigned RuntimeMemoryCheckThreshold = 8
static const unsigned MaxVectorWidth = 64
 Maximum simd width.
static cl::opt< unsignedForceTargetNumScalarRegs ("force-target-num-scalar-regs", cl::init(0), cl::Hidden, cl::desc("A flag that overrides the target's number of scalar registers."))
static cl::opt< unsignedForceTargetNumVectorRegs ("force-target-num-vector-regs", cl::init(0), cl::Hidden, cl::desc("A flag that overrides the target's number of vector registers."))
static const unsigned MaxUnrollFactor = 16
 Maximum vectorization unroll count.
static cl::opt< unsignedForceTargetMaxScalarUnrollFactor ("force-target-max-scalar-unroll", cl::init(0), cl::Hidden, cl::desc("A flag that overrides the target's max unroll factor for scalar ""loops."))
static cl::opt< unsignedForceTargetMaxVectorUnrollFactor ("force-target-max-vector-unroll", cl::init(0), cl::Hidden, cl::desc("A flag that overrides the target's max unroll factor for ""vectorized loops."))
static cl::opt< unsignedForceTargetInstructionCost ("force-target-instruction-cost", cl::init(0), cl::Hidden, cl::desc("A flag that overrides the target's expected cost for ""an instruction to a single constant value. Mostly ""useful for getting consistent testing."))
static cl::opt< unsignedSmallLoopCost ("small-loop-cost", cl::init(20), cl::Hidden, cl::desc("The cost of a loop that is considered 'small' by the unroller."))
static cl::opt< boolLoopVectorizeWithBlockFrequency ("loop-vectorize-with-block-frequency", cl::init(false), cl::Hidden, cl::desc("Enable the use of the block frequency analysis to access PGO ""heuristics minimizing code growth in cold regions and being more ""aggressive in hot regions."))
static cl::opt< boolEnableLoadStoreRuntimeUnroll ("enable-loadstore-runtime-unroll", cl::init(true), cl::Hidden, cl::desc("Enable runtime unrolling until load/store ports are saturated"))
static cl::opt< unsignedNumberOfStoresToPredicate ("vectorize-num-stores-pred", cl::init(1), cl::Hidden, cl::desc("Max number of stores to be predicated behind an if."))
 The number of stores in a loop that are allowed to need predication.
static cl::opt< boolEnableIndVarRegisterHeur ("enable-ind-var-reg-heur", cl::init(true), cl::Hidden, cl::desc("Count the induction variable only once when unrolling"))
static cl::opt< boolEnableCondStoresVectorization ("enable-cond-stores-vec", cl::init(false), cl::Hidden, cl::desc("Enable if predication of stores during vectorization."))
static const char lv_name [] = "Loop Vectorization"

Define Documentation

#define DEBUG_TYPE   LV_NAME

Definition at line 101 of file LoopVectorize.cpp.

#define LV_NAME   "loop-vectorize"

Definition at line 100 of file LoopVectorize.cpp.


Function Documentation

static Value* addFastMathFlag ( Value V) [static]

Adds a 'fast' flag to floating point operations.

Definition at line 2652 of file LoopVectorize.cpp.

References llvm::FastMathFlags::setUnsafeAlgebra().

static bool areAllUsesIn ( Instruction I,
SmallPtrSet< Instruction *, 8 > &  Set 
) [static]
static bool canIfConvertPHINodes ( BasicBlock BB) [static]

Check whether it is safe to if-convert this phi node.

Phi nodes with constant expressions that can trap are not safe to if convert.

Definition at line 3369 of file LoopVectorize.cpp.

References llvm::BasicBlock::begin(), llvm::dyn_cast(), llvm::BasicBlock::end(), llvm::PHINode::getIncomingValue(), llvm::PHINode::getNumIncomingValues(), and I.

static Type* convertPointerToIntegerType ( const DataLayout DL,
Type Ty 
) [static]
Value* createMinMaxOp ( IRBuilder<> &  Builder,
LoopVectorizationLegality::MinMaxReductionKind  RK,
Value Left,
Value Right 
)
static void cse ( SmallVector< BasicBlock *, 4 > &  BBs) [static]
static Instruction* getFirstInst ( Instruction FirstInst,
Value V,
Instruction Loc 
) [static]

Definition at line 1921 of file LoopVectorize.cpp.

References llvm::Instruction::getParent().

static unsigned getGEPInductionOperand ( const DataLayout DL,
const GetElementPtrInst Gep 
) [static]

Find the operand of the GEP that should be checked for consecutive stores. This ignores trailing indices that have no effect on the final pointer.

Definition at line 1521 of file LoopVectorize.cpp.

References advance(), llvm::gep_type_begin(), llvm::User::getNumOperands(), llvm::User::getOperand(), llvm::Type::getScalarType(), llvm::GetElementPtrInst::getType(), llvm::DataLayout::getTypeAllocSize(), llvm::PatternMatch::m_Zero(), and llvm::PatternMatch::match().

Referenced by stripGetElementPtr().

static unsigned getReductionBinOp ( LoopVectorizationLegality::ReductionKind  Kind) [static]

This function translates the reduction kind to an LLVM binary operator.

Definition at line 2522 of file LoopVectorize.cpp.

References llvm::APIntOps::And(), llvm::FCmp, llvm_unreachable, llvm::APIntOps::Or(), and llvm::APIntOps::Xor().

static Value* getStrideFromPointer ( Value Ptr,
ScalarEvolution SE,
const DataLayout DL,
Loop Lp 
) [static]
static Value* getUniqueCastUse ( Value Ptr,
Loop Lp,
Type Ty 
) [static]

Look for a cast use of the passed value.

Definition at line 3771 of file LoopVectorize.cpp.

References llvm::dyn_cast(), llvm::Value::getType(), and llvm::Value::users().

Referenced by getStrideFromPointer().

static Type* getWiderType ( const DataLayout DL,
Type Ty0,
Type Ty1 
) [static]
static bool hasComputableBounds ( ScalarEvolution SE,
ValueToValueMap Strides,
Value Ptr 
) [static]

Check whether a pointer can participate in a runtime bounds check.

Definition at line 4006 of file LoopVectorize.cpp.

References llvm::dyn_cast(), llvm::SCEVAddRecExpr::isAffine(), and replaceSymbolicStrideSCEV().

static bool hasMultipleUsesOf ( Instruction I,
SmallPtrSet< Instruction *, 8 > &  Insts 
) [static]
static bool hasOutsideLoopUser ( const Loop TheLoop,
Instruction Inst,
SmallPtrSet< Value *, 4 > &  Reductions 
) [static]

Check that the instruction has outside loop users and is not an identified reduction variable.

Definition at line 3535 of file LoopVectorize.cpp.

References llvm::LoopBase< BlockT, LoopT >::contains(), llvm::SmallPtrSetImpl< PtrType >::count(), llvm::dbgs(), DEBUG, and llvm::Value::users().

static bool isInBoundsGep ( Value Ptr) [static]

Definition at line 4335 of file LoopVectorize.cpp.

Referenced by isStridedPtr().

static bool isLikelyComplexAddressComputation ( Value Ptr,
LoopVectorizationLegality *  Legal,
ScalarEvolution SE,
const Loop TheLoop 
) [static]

Check whether the address computation for a non-consecutive memory access looks like an unlikely candidate for being merged into the indexing mode.

We look for a GEP which has one index that is an induction variable and all other indices are loop invariant. If the stride of this access is also within a small bound we decide that this address computation can likely be merged into the addressing mode. In all other cases, we identify the address computation as complex.

Definition at line 5708 of file LoopVectorize.cpp.

References llvm::dyn_cast(), llvm::APInt::getBitWidth(), llvm::User::getNumOperands(), llvm::User::getOperand(), llvm::ScalarEvolution::getSCEV(), llvm::APInt::getSExtValue(), llvm::SCEVAddRecExpr::getStepRecurrence(), llvm::SCEVConstant::getValue(), llvm::ConstantInt::getValue(), and llvm::ScalarEvolution::isLoopInvariant().

static bool isPredicatedBlock ( unsigned  BlockNum) [static]

Check whether this block is a predicated block. Due to if predication of stores we might create a sequence of "if(pred) a[i] = ...; " blocks. We start with one vectorized basic block. For every conditional block we split this vectorized block. Therefore, every second block will be a predicated one.

Definition at line 2617 of file LoopVectorize.cpp.

Referenced by cse().

static int isStridedPtr ( ScalarEvolution SE,
const DataLayout DL,
Value Ptr,
const Loop Lp,
ValueToValueMap StridesMap 
) [static]
static bool isStrideMul ( Instruction I,
LoopVectorizationLegality *  Legal 
) [static]

Definition at line 5752 of file LoopVectorize.cpp.

References llvm::User::getOperand().

static const SCEV* replaceSymbolicStrideSCEV ( ScalarEvolution SE,
ValueToValueMap PtrToStride,
Value Ptr,
Value OrigPtr = nullptr 
) [static]

Replaces the symbolic stride in a pointer SCEV expression by one.

If OrigPtr is not null, use it to look up the stride value instead of Ptr.

Definition at line 1428 of file LoopVectorize.cpp.

References llvm::dbgs(), DEBUG, llvm::DenseMapBase< DerivedT, KeyT, ValueT, KeyInfoT >::end(), llvm::DenseMapBase< DerivedT, KeyT, ValueT, KeyInfoT >::find(), llvm::ConstantInt::get(), llvm::ScalarEvolution::getSCEV(), llvm::Value::getType(), llvm::SCEVParameterRewriter::rewrite(), and stripIntegerCast().

Referenced by hasComputableBounds(), and isStridedPtr().

STATISTIC ( LoopsVectorized  ,
"Number of loops vectorized"   
)
STATISTIC ( LoopsAnalyzed  ,
"Number of loops analyzed for vectorization"   
)
static Value* stripGetElementPtr ( Value Ptr,
ScalarEvolution SE,
const DataLayout DL,
Loop Lp 
) [static]

Remove GEPs whose indices but the last one are loop invariant and return the induction operand of the gep pointer.

Definition at line 3753 of file LoopVectorize.cpp.

References llvm::dyn_cast(), getGEPInductionOperand(), llvm::User::getNumOperands(), llvm::User::getOperand(), llvm::ScalarEvolution::getSCEV(), and llvm::ScalarEvolution::isLoopInvariant().

Referenced by getStrideFromPointer().

static Value* stripIntegerCast ( Value V) [static]

Definition at line 1417 of file LoopVectorize.cpp.

Referenced by replaceSymbolicStrideSCEV().


Variable Documentation

cl::opt<bool> EnableCondStoresVectorization("enable-cond-stores-vec", cl::init(false), cl::Hidden, cl::desc("Enable if predication of stores during vectorization.")) [static]
cl::opt<bool> EnableIfConversion("enable-if-conversion", cl::init(true), cl::Hidden, cl::desc("Enable if-conversion during vectorization.")) [static]
cl::opt<bool> EnableIndVarRegisterHeur("enable-ind-var-reg-heur", cl::init(true), cl::Hidden, cl::desc("Count the induction variable only once when unrolling")) [static]
cl::opt<bool> EnableLoadStoreRuntimeUnroll("enable-loadstore-runtime-unroll", cl::init(true), cl::Hidden, cl::desc("Enable runtime unrolling until load/store ports are saturated")) [static]
cl::opt<bool> EnableMemAccessVersioning("enable-mem-access-versioning", cl::init(true), cl::Hidden, cl::desc("Enable symblic stride memory access versioning")) [static]

This enables versioning on the strides of symbolically striding memory accesses in code like the following. for (i = 0; i < N; ++i) A[i * Stride1] += B[i * Stride2] ...

Will be roughly translated to if (Stride1 == 1 && Stride2 == 1) { for (i = 0; i < N; i+=4) A[i:i+3] += ... } else ...

cl::opt<unsigned> ForceTargetInstructionCost("force-target-instruction-cost", cl::init(0), cl::Hidden, cl::desc("A flag that overrides the target's expected cost for ""an instruction to a single constant value. Mostly ""useful for getting consistent testing.")) [static]
cl::opt<unsigned> ForceTargetMaxScalarUnrollFactor("force-target-max-scalar-unroll", cl::init(0), cl::Hidden, cl::desc("A flag that overrides the target's max unroll factor for scalar ""loops.")) [static]
cl::opt<unsigned> ForceTargetMaxVectorUnrollFactor("force-target-max-vector-unroll", cl::init(0), cl::Hidden, cl::desc("A flag that overrides the target's max unroll factor for ""vectorized loops.")) [static]
cl::opt<unsigned> ForceTargetNumScalarRegs("force-target-num-scalar-regs", cl::init(0), cl::Hidden, cl::desc("A flag that overrides the target's number of scalar registers.")) [static]
cl::opt<unsigned> ForceTargetNumVectorRegs("force-target-num-vector-regs", cl::init(0), cl::Hidden, cl::desc("A flag that overrides the target's number of vector registers.")) [static]
cl::opt<bool> LoopVectorizeWithBlockFrequency("loop-vectorize-with-block-frequency", cl::init(false), cl::Hidden, cl::desc("Enable the use of the block frequency analysis to access PGO ""heuristics minimizing code growth in cold regions and being more ""aggressive in hot regions.")) [static]
const char lv_name[] = "Loop Vectorization" [static]

Definition at line 5959 of file LoopVectorize.cpp.

Maximum vectorization unroll count.

Definition at line 161 of file LoopVectorize.cpp.

const unsigned MaxVectorWidth = 64 [static]

Maximum simd width.

Definition at line 150 of file LoopVectorize.cpp.

cl::opt<unsigned> NumberOfStoresToPredicate("vectorize-num-stores-pred", cl::init(1), cl::Hidden, cl::desc("Max number of stores to be predicated behind an if.")) [static]

The number of stores in a loop that are allowed to need predication.

When performing memory disambiguation checks at runtime do not make more than this number of comparisons.

Definition at line 147 of file LoopVectorize.cpp.

cl::opt<unsigned> SmallLoopCost("small-loop-cost", cl::init(20), cl::Hidden, cl::desc("The cost of a loop that is considered 'small' by the unroller.")) [static]

We don't unroll loops with a known constant trip count below this number.

Definition at line 143 of file LoopVectorize.cpp.

cl::opt<unsigned> TinyTripCountVectorThreshold("vectorizer-min-trip-count", cl::init(16), cl::Hidden, cl::desc("Don't vectorize loops with a constant ""trip count that is smaller than this ""value.")) [static]

We don't vectorize loops with a known constant trip count below this number.

cl::opt<unsigned> VectorizationFactor("force-vector-width", cl::init(0), cl::Hidden, cl::desc("Sets the SIMD width. Zero is autoselect.")) [static]
cl::opt<unsigned> VectorizationUnroll("force-vector-unroll", cl::init(0), cl::Hidden, cl::desc("Sets the vectorization unroll count. ""Zero is autoselect.")) [static]