LLVM  6.0.0svn
Classes | Namespaces | Macros | Functions | Variables
LoopVectorize.cpp File Reference
#include "llvm/Transforms/Vectorize/LoopVectorize.h"
#include "VPlan.h"
#include "VPlanBuilder.h"
#include "llvm/ADT/APInt.h"
#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/DenseMapInfo.h"
#include "llvm/ADT/Hashing.h"
#include "llvm/ADT/MapVector.h"
#include "llvm/ADT/None.h"
#include "llvm/ADT/Optional.h"
#include "llvm/ADT/SCCIterator.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SetVector.h"
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/SmallSet.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/ADT/Twine.h"
#include "llvm/ADT/iterator_range.h"
#include "llvm/Analysis/AssumptionCache.h"
#include "llvm/Analysis/BasicAliasAnalysis.h"
#include "llvm/Analysis/BlockFrequencyInfo.h"
#include "llvm/Analysis/CodeMetrics.h"
#include "llvm/Analysis/DemandedBits.h"
#include "llvm/Analysis/GlobalsModRef.h"
#include "llvm/Analysis/LoopAccessAnalysis.h"
#include "llvm/Analysis/LoopAnalysisManager.h"
#include "llvm/Analysis/LoopInfo.h"
#include "llvm/Analysis/LoopIterator.h"
#include "llvm/Analysis/OptimizationRemarkEmitter.h"
#include "llvm/Analysis/ScalarEvolution.h"
#include "llvm/Analysis/ScalarEvolutionExpander.h"
#include "llvm/Analysis/ScalarEvolutionExpressions.h"
#include "llvm/Analysis/TargetLibraryInfo.h"
#include "llvm/Analysis/TargetTransformInfo.h"
#include "llvm/Analysis/VectorUtils.h"
#include "llvm/IR/Attributes.h"
#include "llvm/IR/BasicBlock.h"
#include "llvm/IR/CFG.h"
#include "llvm/IR/Constant.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/DebugInfoMetadata.h"
#include "llvm/IR/DebugLoc.h"
#include "llvm/IR/DerivedTypes.h"
#include "llvm/IR/DiagnosticInfo.h"
#include "llvm/IR/Dominators.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/InstrTypes.h"
#include "llvm/IR/Instruction.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/Intrinsics.h"
#include "llvm/IR/LLVMContext.h"
#include "llvm/IR/Metadata.h"
#include "llvm/IR/Module.h"
#include "llvm/IR/Operator.h"
#include "llvm/IR/Type.h"
#include "llvm/IR/Use.h"
#include "llvm/IR/User.h"
#include "llvm/IR/Value.h"
#include "llvm/IR/ValueHandle.h"
#include "llvm/IR/Verifier.h"
#include "llvm/Pass.h"
#include "llvm/Support/Casting.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Compiler.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/MathExtras.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
#include "llvm/Transforms/Utils/LoopSimplify.h"
#include "llvm/Transforms/Utils/LoopUtils.h"
#include "llvm/Transforms/Utils/LoopVersioning.h"
#include <algorithm>
#include <cassert>
#include <cstdint>
#include <cstdlib>
#include <functional>
#include <iterator>
#include <limits>
#include <memory>
#include <string>
#include <tuple>
#include <utility>
#include <vector>

Go to the source code of this file.

Classes

class  llvm::InnerLoopVectorizer
 InnerLoopVectorizer vectorizes loops which contain only one basic block to a specified vectorization factor (VF). More...
 
class  llvm::InnerLoopUnroller
 
class  llvm::LoopVectorizationPlanner
 InnerLoopVectorizer vectorizes loops which contain only one basic LoopVectorizationPlanner - drives the vectorization process after having passed Legality checks. More...
 
struct  llvm::LoopVectorizationPlanner::VFRange
 A range of powers-of-2 vectorization factors with fixed start and adjustable end. More...
 

Namespaces

 llvm
 Compute iterated dominance frontiers using a linear time algorithm.
 

Macros

#define LV_NAME   "loop-vectorize"
 
#define DEBUG_TYPE   LV_NAME
 

Functions

 STATISTIC (LoopsVectorized, "Number of loops vectorized")
 
 STATISTIC (LoopsAnalyzed, "Number of loops analyzed for vectorization")
 
static OptimizationRemarkAnalysis createMissedAnalysis (const char *PassName, StringRef RemarkName, Loop *TheLoop, Instruction *I=nullptr)
 Create an analysis remark that explains why vectorization failed. More...
 
static bool hasCyclesInLoopBody (const Loop &L)
 Returns true if the given loop body has a cycle, excluding the loop itself. More...
 
static TypeToVectorTy (Type *Scalar, unsigned VF)
 A helper function for converting Scalar types to vector types. More...
 
static ValuegetPointerOperand (Value *I)
 A helper function that returns the pointer operand of a load or store instruction. More...
 
static TypegetMemInstValueType (Value *I)
 A helper function that returns the type of loaded or stored value. More...
 
static unsigned getMemInstAlignment (Value *I)
 A helper function that returns the alignment of load or store instruction. More...
 
static unsigned getMemInstAddressSpace (Value *I)
 A helper function that returns the address space of the pointer operand of load or store instruction. More...
 
static bool hasIrregularType (Type *Ty, const DataLayout &DL, unsigned VF)
 A helper function that returns true if the given type is irregular. More...
 
static unsigned getReciprocalPredBlockProb ()
 A helper function that returns the reciprocal of the block probability of predicated blocks. More...
 
static ValueaddFastMathFlag (Value *V)
 A helper function that adds a 'fast' flag to floating-point operations. More...
 
static ConstantgetSignedIntOrFpConstant (Type *Ty, int64_t C)
 A helper function that returns an integer or floating-point constant with value C. More...
 
static InstructiongetDebugLocFromInstOrOperands (Instruction *I)
 Look for a meaningful debug location on the instruction or it's operands. More...
 
static std::string getDebugLocString (const Loop *L)
 
static void emitMissedWarning (Function *F, Loop *L, const LoopVectorizeHints &LH, OptimizationRemarkEmitter *ORE)
 
static void addAcyclicInnerLoop (Loop &L, SmallVectorImpl< Loop *> &V)
 
static void cse (BasicBlock *BB)
 Perform cse of induction variable instructions. More...
 
static unsigned getScalarizationOverhead (Instruction *I, unsigned VF, const TargetTransformInfo &TTI)
 Estimate the overhead of scalarizing an instruction. More...
 
static unsigned getVectorCallCost (CallInst *CI, unsigned VF, const TargetTransformInfo &TTI, const TargetLibraryInfo *TLI, bool &NeedToScalarize)
 
static unsigned getVectorIntrinsicCost (CallInst *CI, unsigned VF, const TargetTransformInfo &TTI, const TargetLibraryInfo *TLI)
 
static TypesmallestIntegerVectorType (Type *T1, Type *T2)
 
static TypelargestIntegerVectorType (Type *T1, Type *T2)
 
static bool mayDivideByZero (Instruction &I)
 A helper function for checking whether an integer division-related instruction may divide by zero (in which case it must be predicated if executed conditionally in the scalar code). More...
 
static bool canIfConvertPHINodes (BasicBlock *BB)
 Check whether it is safe to if-convert this phi node. More...
 
static TypeconvertPointerToIntegerType (const DataLayout &DL, Type *Ty)
 
static TypegetWiderType (const DataLayout &DL, Type *Ty0, Type *Ty1)
 
static bool hasOutsideLoopUser (const Loop *TheLoop, Instruction *Inst, SmallPtrSetImpl< Value *> &AllowedExit)
 Check that the instruction has outside loop users and is not an identified reduction variable. More...
 
static const SCEVgetAddressAccessSCEV (Value *Ptr, LoopVectorizationLegality *Legal, ScalarEvolution *SE, const Loop *TheLoop)
 Gets Address Access SCEV after verifying that the access pattern is loop invariant except the induction variable dependence. More...
 
static bool isStrideMul (Instruction *I, LoopVectorizationLegality *Legal)
 
Passllvm::createLoopVectorizePass (bool NoUnrolling=false, bool AlwaysVectorize=true)
 
static void AddRuntimeUnrollDisableMetaData (Loop *L)
 

Variables

static cl::opt< boolEnableIfConversion ("enable-if-conversion", cl::init(true), cl::Hidden, cl::desc("Enable if-conversion during vectorization."))
 
static cl::opt< unsignedTinyTripCountVectorThreshold ("vectorizer-min-trip-count", cl::init(16), cl::Hidden, cl::desc("Loops with a constant trip count that is smaller than this " "value are vectorized only if no scalar iteration overheads " "are incurred."))
 Loops with a known constant trip count below this number are vectorized only if no scalar iteration overheads are incurred. More...
 
static cl::opt< boolMaximizeBandwidth ("vectorizer-maximize-bandwidth", cl::init(false), cl::Hidden, cl::desc("Maximize bandwidth when selecting vectorization factor which " "will be determined by the smallest type in loop."))
 
static cl::opt< boolEnableInterleavedMemAccesses ("enable-interleaved-mem-accesses", cl::init(false), cl::Hidden, cl::desc("Enable vectorization on interleaved memory accesses in a loop"))
 
static cl::opt< unsignedMaxInterleaveGroupFactor ("max-interleave-group-factor", cl::Hidden, cl::desc("Maximum factor for an interleaved access group (default = 8)"), cl::init(8))
 Maximum factor for an interleaved memory access. More...
 
static const unsigned TinyTripCountInterleaveThreshold = 128
 We don't interleave loops with a known constant trip count below this number. More...
 
static cl::opt< unsignedForceTargetNumScalarRegs ("force-target-num-scalar-regs", cl::init(0), cl::Hidden, cl::desc("A flag that overrides the target's number of scalar registers."))
 
static cl::opt< unsignedForceTargetNumVectorRegs ("force-target-num-vector-regs", cl::init(0), cl::Hidden, cl::desc("A flag that overrides the target's number of vector registers."))
 
static const unsigned MaxInterleaveFactor = 16
 Maximum vectorization interleave count. More...
 
static cl::opt< unsignedForceTargetMaxScalarInterleaveFactor ("force-target-max-scalar-interleave", cl::init(0), cl::Hidden, cl::desc("A flag that overrides the target's max interleave factor for " "scalar loops."))
 
static cl::opt< unsignedForceTargetMaxVectorInterleaveFactor ("force-target-max-vector-interleave", cl::init(0), cl::Hidden, cl::desc("A flag that overrides the target's max interleave factor for " "vectorized loops."))
 
static cl::opt< unsignedForceTargetInstructionCost ("force-target-instruction-cost", cl::init(0), cl::Hidden, cl::desc("A flag that overrides the target's expected cost for " "an instruction to a single constant value. Mostly " "useful for getting consistent testing."))
 
static cl::opt< unsignedSmallLoopCost ("small-loop-cost", cl::init(20), cl::Hidden, cl::desc("The cost of a loop that is considered 'small' by the interleaver."))
 
static cl::opt< boolLoopVectorizeWithBlockFrequency ("loop-vectorize-with-block-frequency", cl::init(false), cl::Hidden, cl::desc("Enable the use of the block frequency analysis to access PGO " "heuristics minimizing code growth in cold regions and being more " "aggressive in hot regions."))
 
static cl::opt< boolEnableLoadStoreRuntimeInterleave ("enable-loadstore-runtime-interleave", cl::init(true), cl::Hidden, cl::desc("Enable runtime interleaving until load/store ports are saturated"))
 
static cl::opt< unsignedNumberOfStoresToPredicate ("vectorize-num-stores-pred", cl::init(1), cl::Hidden, cl::desc("Max number of stores to be predicated behind an if."))
 The number of stores in a loop that are allowed to need predication. More...
 
static cl::opt< boolEnableIndVarRegisterHeur ("enable-ind-var-reg-heur", cl::init(true), cl::Hidden, cl::desc("Count the induction variable only once when interleaving"))
 
static cl::opt< boolEnableCondStoresVectorization ("enable-cond-stores-vec", cl::init(true), cl::Hidden, cl::desc("Enable if predication of stores during vectorization."))
 
static cl::opt< unsignedMaxNestedScalarReductionIC ("max-nested-scalar-reduction-interleave", cl::init(2), cl::Hidden, cl::desc("The maximum interleave count to use when interleaving a scalar " "reduction in a nested loop."))
 
static cl::opt< unsignedPragmaVectorizeMemoryCheckThreshold ("pragma-vectorize-memory-check-threshold", cl::init(128), cl::Hidden, cl::desc("The maximum allowed number of runtime memory checks with a " "vectorize(enable) pragma."))
 
static cl::opt< unsignedVectorizeSCEVCheckThreshold ("vectorize-scev-check-threshold", cl::init(16), cl::Hidden, cl::desc("The maximum number of SCEV checks allowed."))
 
static cl::opt< unsignedPragmaVectorizeSCEVCheckThreshold ("pragma-vectorize-scev-check-threshold", cl::init(128), cl::Hidden, cl::desc("The maximum number of SCEV checks allowed with a " "vectorize(enable) pragma"))
 
static const char lv_name [] = "Loop Vectorization"
 

Macro Definition Documentation

◆ DEBUG_TYPE

#define DEBUG_TYPE   LV_NAME

Definition at line 143 of file LoopVectorize.cpp.

Referenced by canIfConvertPHINodes(), and emitMissedWarning().

◆ LV_NAME

#define LV_NAME   "loop-vectorize"

Function Documentation

◆ addAcyclicInnerLoop()

static void addAcyclicInnerLoop ( Loop L,
SmallVectorImpl< Loop *> &  V 
)
static

◆ addFastMathFlag()

static Value* addFastMathFlag ( Value V)
static

◆ AddRuntimeUnrollDisableMetaData()

static void AddRuntimeUnrollDisableMetaData ( Loop L)
static

Definition at line 7625 of file LoopVectorize.cpp.

References assert(), llvm::VPTransformState::Builder, Context, llvm::IRBuilder< T, Inserter >::CreateSelect(), llvm::dyn_cast(), E, llvm::ArrayRef< T >::empty(), llvm::WebAssembly::End, llvm::VPTransformState::get(), llvm::MDString::get(), llvm::MDNode::get(), llvm::BasicBlock::getContext(), llvm::LoopBase< BlockT, LoopT >::getHeader(), llvm::PHINode::getIncomingValue(), llvm::ilist_node_impl< OptionsT >::getIterator(), llvm::Loop::getLoopID(), llvm::PHINode::getNumIncomingValues(), llvm::MDNode::getNumOperands(), llvm::MDNode::getOperand(), llvm::InnerLoopVectorizer::getOrCreateVectorValue(), llvm::VPRecipeBase::getVPRecipeID(), I, llvm::VPTransformState::ILV, llvm::tgtok::In, llvm::VPTransformState::Instance, llvm::make_range(), llvm::BitmaskEnumDetail::Mask(), llvm::RISCVFenceField::O, print(), llvm::Value::printAsOperand(), llvm::SmallVectorTemplateBase< T, isPodLike< T >::value >::push_back(), llvm::MDNode::replaceOperandWith(), llvm::InnerLoopVectorizer::setDebugLocFromInst(), llvm::Loop::setLoopID(), llvm::VectorizerValueMap::setVectorValue(), llvm::ArrayRef< T >::size(), llvm::VPTransformState::UF, llvm::Value::use_empty(), llvm::VPTransformState::ValueMap, llvm::InnerLoopVectorizer::vectorizeInterleaveGroup(), llvm::InnerLoopVectorizer::vectorizeMemoryInstruction(), llvm::VPTransformState::VF, llvm::InnerLoopVectorizer::widenInstruction(), llvm::InnerLoopVectorizer::widenIntOrFpInduction(), and llvm::InnerLoopVectorizer::widenPHIInstruction().

Referenced by llvm::LoopVectorizePass::processLoop().

◆ canIfConvertPHINodes()

static bool canIfConvertPHINodes ( BasicBlock BB)
static

◆ convertPointerToIntegerType()

static Type* convertPointerToIntegerType ( const DataLayout DL,
Type Ty 
)
static

◆ createMissedAnalysis()

static OptimizationRemarkAnalysis createMissedAnalysis ( const char PassName,
StringRef  RemarkName,
Loop TheLoop,
Instruction I = nullptr 
)
static

Create an analysis remark that explains why vectorization failed.

PassName is the name of the pass (e.g. can be AlwaysPrint). RemarkName is the identifier for the remark. If I is passed it is an instruction that prevents vectorization. Otherwise TheLoop is used for the location of the remark.

Returns
the remark object that can be streamed to.

Definition at line 263 of file LoopVectorize.cpp.

References llvm::LoopBase< BlockT, LoopT >::getHeader(), llvm::Loop::getStartLoc(), and I.

Referenced by canIfConvertPHINodes(), emitMissedWarning(), hasOutsideLoopUser(), and llvm::LoopVectorizePass::processLoop().

◆ cse()

static void cse ( BasicBlock BB)
static

◆ emitMissedWarning()

static void emitMissedWarning ( Function F,
Loop L,
const LoopVectorizeHints &  LH,
OptimizationRemarkEmitter ORE 
)
static

◆ getAddressAccessSCEV()

static const SCEV* getAddressAccessSCEV ( Value Ptr,
LoopVectorizationLegality *  Legal,
ScalarEvolution SE,
const Loop TheLoop 
)
static

Gets Address Access SCEV after verifying that the access pattern is loop invariant except the induction variable dependence.

This SCEV can be sent to the Target in order to estimate the address calculation cost.

Definition at line 6902 of file LoopVectorize.cpp.

References llvm::dyn_cast(), llvm::User::getNumOperands(), llvm::ScalarEvolution::getSCEV(), and llvm::ScalarEvolution::isLoopInvariant().

Referenced by isStrideMul().

◆ getDebugLocFromInstOrOperands()

static Instruction* getDebugLocFromInstOrOperands ( Instruction I)
static

Look for a meaningful debug location on the instruction or it's operands.

Definition at line 774 of file LoopVectorize.cpp.

References llvm::Empty, llvm::Instruction::getDebugLoc(), I, llvm::User::op_begin(), and llvm::User::op_end().

Referenced by llvm::InnerLoopVectorizer::createInductionVariable(), and llvm::InnerLoopVectorizer::createVectorizedLoopSkeleton().

◆ getDebugLocString()

static std::string getDebugLocString ( const Loop L)
static

◆ getMemInstAddressSpace()

static unsigned getMemInstAddressSpace ( Value I)
static

A helper function that returns the address space of the pointer operand of load or store instruction.

Definition at line 356 of file LoopVectorize.cpp.

References assert(), and I.

Referenced by hasOutsideLoopUser(), isStrideMul(), llvm::InnerLoopVectorizer::vectorizeInterleaveGroup(), and llvm::InnerLoopVectorizer::vectorizeMemoryInstruction().

◆ getMemInstAlignment()

static unsigned getMemInstAlignment ( Value I)
static

A helper function that returns the alignment of load or store instruction.

Definition at line 346 of file LoopVectorize.cpp.

References assert(), getAlignment(), and I.

Referenced by hasOutsideLoopUser(), isStrideMul(), and llvm::InnerLoopVectorizer::vectorizeMemoryInstruction().

◆ getMemInstValueType()

static Type* getMemInstValueType ( Value I)
static

A helper function that returns the type of loaded or stored value.

Definition at line 337 of file LoopVectorize.cpp.

References assert(), and I.

Referenced by isStrideMul(), llvm::InnerLoopVectorizer::vectorizeInterleaveGroup(), and llvm::InnerLoopVectorizer::vectorizeMemoryInstruction().

◆ getPointerOperand()

static Value* getPointerOperand ( Value I)
static

A helper function that returns the pointer operand of a load or store instruction.

Definition at line 328 of file LoopVectorize.cpp.

References SI.

Referenced by canIfConvertPHINodes(), llvm::createLoopVectorizePass(), emitMissedWarning(), hasOutsideLoopUser(), isStrideMul(), llvm::InnerLoopVectorizer::vectorizeInterleaveGroup(), and llvm::InnerLoopVectorizer::vectorizeMemoryInstruction().

◆ getReciprocalPredBlockProb()

static unsigned getReciprocalPredBlockProb ( )
static

A helper function that returns the reciprocal of the block probability of predicated blocks.

If we return X, we are assuming the predicated block will execute once for for every X iterations of the loop header.

TODO: We should use actual block probability here, if available. Currently, we always assume predicated blocks have a 50% chance of executing.

Definition at line 386 of file LoopVectorize.cpp.

Referenced by hasOutsideLoopUser(), and isStrideMul().

◆ getScalarizationOverhead()

static unsigned getScalarizationOverhead ( Instruction I,
unsigned  VF,
const TargetTransformInfo TTI 
)
static

◆ getSignedIntOrFpConstant()

static Constant* getSignedIntOrFpConstant ( Type Ty,
int64_t  C 
)
static

A helper function that returns an integer or floating-point constant with value C.

Definition at line 400 of file LoopVectorize.cpp.

References llvm::ConstantFP::get(), llvm::ConstantInt::getSigned(), and llvm::Type::isIntegerTy().

Referenced by llvm::InnerLoopVectorizer::buildScalarSteps(), and llvm::InnerLoopVectorizer::createVectorIntOrFpInductionPHI().

◆ getVectorCallCost()

static unsigned getVectorCallCost ( CallInst CI,
unsigned  VF,
const TargetTransformInfo TTI,
const TargetLibraryInfo TLI,
bool NeedToScalarize 
)
static

◆ getVectorIntrinsicCost()

static unsigned getVectorIntrinsicCost ( CallInst CI,
unsigned  VF,
const TargetTransformInfo TTI,
const TargetLibraryInfo TLI 
)
static

◆ getWiderType()

static Type* getWiderType ( const DataLayout DL,
Type Ty0,
Type Ty1 
)
static

◆ hasCyclesInLoopBody()

static bool hasCyclesInLoopBody ( const Loop L)
static

Returns true if the given loop body has a cycle, excluding the loop itself.

Definition at line 297 of file LoopVectorize.cpp.

References llvm::dbgs(), DEBUG, llvm::Loop::dump(), llvm::LoopBase< BlockT, LoopT >::empty(), and llvm::make_range().

Referenced by addAcyclicInnerLoop().

◆ hasIrregularType()

static bool hasIrregularType ( Type Ty,
const DataLayout DL,
unsigned  VF 
)
static

A helper function that returns true if the given type is irregular.

The type is irregular if its allocated size doesn't equal the store size of an element of the corresponding vector type at the given vectorization factor.

Definition at line 367 of file LoopVectorize.cpp.

References llvm::VectorType::get(), llvm::DataLayout::getTypeAllocSize(), llvm::DataLayout::getTypeAllocSizeInBits(), llvm::DataLayout::getTypeSizeInBits(), and llvm::DataLayout::getTypeStoreSize().

Referenced by hasOutsideLoopUser().

◆ hasOutsideLoopUser()

static bool hasOutsideLoopUser ( const Loop TheLoop,
Instruction Inst,
SmallPtrSetImpl< Value *> &  AllowedExit 
)
static

Check that the instruction has outside loop users and is not an identified reduction variable.

Definition at line 5137 of file LoopVectorize.cpp.

References llvm::PredicatedScalarEvolution::addPredicate(), llvm::AMDGPU::HSAMD::Kernel::Arg::Key::Align, llvm::all_of(), assert(), B, llvm::SetVector< T, Vector, Set >::begin(), llvm::sys::path::begin(), llvm::LoopBlocksDFS::beginRPO(), llvm::LoopAccessInfo::blockNeedsPredication(), C, llvm::computeMinimumValueSizes(), llvm::LoopBase< BlockT, LoopT >::contains(), convertPointerToIntegerType(), llvm::InnerLoopVectorizer::Cost, llvm::SmallSet< T, N, C >::count(), llvm::DenseMapBase< DenseMap< KeyT, ValueT, KeyInfoT, BucketT >, KeyT, ValueT, KeyInfoT, BucketT >::count(), llvm::SetVector< T, Vector, Set >::count(), llvm::SmallPtrSetImpl< PtrType >::count(), createMissedAnalysis(), llvm::dbgs(), DEBUG, DFS(), llvm::InnerLoopVectorizer::DT, llvm::dyn_cast(), E, llvm::OptimizationRemarkEmitter::emit(), llvm::SmallVectorBase::empty(), llvm::MapVector< KeyT, ValueT, MapType, VectorType >::empty(), llvm::TargetTransformInfo::enableAggressiveInterleaving(), EnableCondStoresVectorization, EnableIndVarRegisterHeur, EnableLoadStoreRuntimeInterleave, llvm::SetVector< T, Vector, Set >::end(), llvm::sys::path::end(), llvm::LoopBlocksDFS::endRPO(), llvm::SmallSet< T, N, C >::erase(), ForceTargetInstructionCost, ForceTargetMaxScalarInterleaveFactor, ForceTargetMaxVectorInterleaveFactor, ForceTargetNumScalarRegs, ForceTargetNumVectorRegs, llvm::SCEVConstant::getAPInt(), llvm::TargetTransformInfo::getCFInstrCost(), llvm::InductionDescriptor::getConstIntStepValue(), llvm::Module::getDataLayout(), llvm::PointerType::getElementType(), llvm::Function::getFnAttribute(), llvm::PHINode::getIncomingValueForBlock(), llvm::InductionDescriptor::getKind(), llvm::RecurrenceDescriptor::getLoopExitInstr(), llvm::TargetTransformInfo::getMaxInterleaveFactor(), getMemInstAddressSpace(), getMemInstAlignment(), llvm::ScalarEvolution::getMinusSCEV(), llvm::Instruction::getModule(), llvm::TargetTransformInfo::getNumberOfRegisters(), llvm::PHINode::getNumIncomingValues(), llvm::Instruction::getOpcode(), llvm::User::getOperand(), llvm::Instruction::getParent(), llvm::BasicBlock::getParent(), getPointerOperand(), llvm::getPtrStride(), getReciprocalPredBlockProb(), llvm::RecurrenceDescriptor::getRecurrenceType(), llvm::TargetTransformInfo::getRegisterBitWidth(), llvm::TargetTransformInfo::getScalarizationOverhead(), llvm::Type::getScalarType(), llvm::PredicatedScalarEvolution::getSCEV(), llvm::PredicatedScalarEvolution::getSE(), llvm::APInt::getSExtValue(), llvm::ScalarEvolution::getSmallConstantTripCount(), llvm::InductionDescriptor::getStartValue(), llvm::BasicBlock::getTerminator(), llvm::Value::getType(), llvm::DataLayout::getTypeSizeInBits(), llvm::PredicatedScalarEvolution::getUnionPredicate(), llvm::RecurrenceDescriptor::getUnsafeAlgebraInst(), llvm::InductionDescriptor::getUnsafeAlgebraInst(), llvm::Attribute::getValueAsString(), llvm::getVectorIntrinsicIDForCall(), getWiderType(), llvm::TargetTransformInfo::hasBranchDivergence(), hasIrregularType(), llvm::Value::hasOneUse(), llvm::RecurrenceDescriptor::hasUnsafeAlgebra(), llvm::InductionDescriptor::hasUnsafeAlgebra(), llvm::hasVectorInstrinsicScalarOpd(), I, llvm::InductionDescriptor::IK_IntInduction, llvm::InductionDescriptor::IK_PtrInduction, llvm::InnerLoopVectorizer::Induction, llvm::SmallSet< T, N, C >::insert(), llvm::SetVector< T, Vector, Set >::insert(), llvm::SetVector< T, SmallVector< T, N >, SmallDenseSet< T, N > >::insert(), llvm::SmallPtrSetImpl< PtrType >::insert(), llvm::SCEVUnionPredicate::isAlwaysTrue(), llvm::Instruction::isBinaryOp(), llvm::Instruction::isFast(), llvm::RecurrenceDescriptor::isFirstOrderRecurrence(), llvm::Type::isFloatingPointTy(), llvm::TargetLibraryInfo::isFunctionVectorizable(), llvm::InductionDescriptor::isInductionPHI(), llvm::Type::isIntegerTy(), llvm::ConstantInt::isOne(), llvm::Type::isPointerTy(), llvm::isPowerOf2_32(), llvm::HexagonMCInstrInfo::isPredicated(), llvm::RecurrenceDescriptor::isReductionPHI(), llvm::VectorType::isValidElementType(), llvm::Type::isVoidTy(), llvm::InnerLoopVectorizer::Legal, llvm::InnerLoopVectorizer::LI, List, llvm::SPII::Load, llvm::make_range(), llvm::max(), MaximizeBandwidth, MaxNestedScalarReductionIC, mayDivideByZero(), llvm::Instruction::mayReadFromMemory(), llvm::Instruction::mayThrow(), llvm::Instruction::mayWriteToMemory(), llvm::None, NumberOfStoresToPredicate, llvm::User::operand_values(), llvm::User::operands(), llvm::InnerLoopVectorizer::ORE, llvm::LoopBlocksDFS::perform(), llvm::SmallVectorImpl< T >::pop_back_val(), llvm::PowerOf2Floor(), llvm::InnerLoopVectorizer::PSE, llvm::SmallVectorTemplateBase< T, isPodLike< T >::value >::push_back(), llvm::SmallVectorTemplateBase< T, isPodLike >::push_back(), llvm::MapVector< KeyT, ValueT, MapType, VectorType >::rbegin(), llvm::RegUsage, llvm::SetVector< T, SmallVector< T, N >, SmallDenseSet< T, N > >::remove(), llvm::MapVector< KeyT, ValueT, MapType, VectorType >::rend(), llvm::replaceSymbolicStrideSCEV(), SI, llvm::SmallSet< T, N, C >::size(), llvm::SetVector< T, Vector, Set >::size(), llvm::SmallPtrSetImplBase::size(), llvm::ArrayRef< T >::size(), llvm::AMDGPU::HSAMD::Kernel::Arg::Key::Size, llvm::BasicBlock::size(), SmallLoopCost, llvm::ARM_MB::ST, llvm::SPII::Store, llvm::InnerLoopVectorizer::TLI, ToVectorTy(), llvm::InnerLoopVectorizer::TTI, llvm::Value::users(), VectorizationFactor, llvm::InnerLoopVectorizer::VF, and llvm::AArch64CC::VS.

◆ isStrideMul()

static bool isStrideMul ( Instruction I,
LoopVectorizationLegality *  Legal 
)
static

Definition at line 6925 of file LoopVectorize.cpp.

References llvm::MCID::Add, AS, assert(), C, llvm::MCID::Call, llvm::InnerLoopVectorizer::Cost, llvm::dyn_cast(), llvm::SmallVectorBase::empty(), first, llvm::IntegerType::get(), llvm::VectorType::get(), getAddressAccessSCEV(), llvm::TargetTransformInfo::getAddressComputationCost(), llvm::LoadInst::getAlignment(), llvm::TargetTransformInfo::getArithmeticInstrCost(), llvm::TargetTransformInfo::getCastInstrCost(), llvm::TargetTransformInfo::getCFInstrCost(), llvm::TargetTransformInfo::getCmpSelInstrCost(), llvm::SelectInst::getCondition(), llvm::Type::getContext(), llvm::Value::getContext(), llvm::TargetTransformInfo::getGatherScatterOpCost(), llvm::Type::getInt1Ty(), llvm::TargetTransformInfo::getInterleavedMemoryOpCost(), llvm::TargetTransformInfo::getMaskedMemoryOpCost(), getMemInstAddressSpace(), getMemInstAlignment(), getMemInstValueType(), llvm::TargetTransformInfo::getMemoryOpCost(), llvm::TargetTransformInfo::getNumberOfParts(), llvm::PHINode::getNumIncomingValues(), llvm::Instruction::getOpcode(), llvm::User::getOperand(), llvm::Instruction::getParent(), llvm::LoadInst::getPointerAddressSpace(), getPointerOperand(), getReciprocalPredBlockProb(), llvm::TargetTransformInfo::getScalarizationOverhead(), getScalarizationOverhead(), llvm::Type::getScalarType(), llvm::PredicatedScalarEvolution::getSE(), llvm::TargetTransformInfo::getShuffleCost(), llvm::getSplatValue(), llvm::BranchInst::getSuccessor(), llvm::Value::getType(), llvm::ConstantInt::getValue(), getVectorCallCost(), getVectorIntrinsicCost(), llvm::getVectorIntrinsicIDForCall(), I, llvm::SmallPtrSetImpl< PtrType >::insert(), llvm::BranchInst::isConditional(), llvm::APInt::isPowerOf2(), llvm::Type::isVectorTy(), largestIntegerVectorType(), llvm::InnerLoopVectorizer::Legal, llvm::InnerLoopVectorizer::LI, LLVM_FALLTHROUGH, llvm::SPII::Load, llvm::max(), N, llvm::TargetTransformInfo::OK_AnyValue, llvm::TargetTransformInfo::OK_NonUniformConstantValue, llvm::TargetTransformInfo::OK_UniformConstantValue, llvm::TargetTransformInfo::OK_UniformValue, llvm::TargetTransformInfo::OP_None, llvm::TargetTransformInfo::OP_PowerOf2, llvm::User::operand_values(), llvm::User::operands(), llvm::SmallVectorImpl< T >::pop_back_val(), llvm::TargetTransformInfo::prefersVectorizedAddressing(), llvm::InnerLoopVectorizer::PSE, llvm::SmallVectorTemplateBase< T, isPodLike >::push_back(), llvm::SmallVectorTemplateBase< T, isPodLike< T >::value >::push_back(), llvm::MCID::Select, SI, llvm::TargetTransformInfo::SK_Broadcast, llvm::TargetTransformInfo::SK_ExtractSubvector, llvm::TargetTransformInfo::SK_Reverse, smallestIntegerVectorType(), llvm::SPII::Store, llvm::InnerLoopVectorizer::TLI, ToVectorTy(), llvm::InnerLoopVectorizer::TTI, and llvm::InnerLoopVectorizer::VF.

◆ largestIntegerVectorType()

static Type* largestIntegerVectorType ( Type T1,
Type T2 
)
static

Definition at line 3930 of file LoopVectorize.cpp.

References llvm::Type::getVectorElementType(), and T1.

Referenced by isStrideMul().

◆ mayDivideByZero()

static bool mayDivideByZero ( Instruction I)
static

A helper function for checking whether an integer division-related instruction may divide by zero (in which case it must be predicated if executed conditionally in the scalar code).

TODO: It may be worthwhile to generalize and check isKnownNonZero(). Non-zero divisors that are non compile-time constants will not be converted into multiplication, so we will still end up scalarizing the division, but can do so w/o predication.

Definition at line 4615 of file LoopVectorize.cpp.

References assert(), llvm::dyn_cast(), llvm::Instruction::getOpcode(), llvm::User::getOperand(), and llvm::ConstantInt::isZero().

Referenced by hasOutsideLoopUser().

◆ smallestIntegerVectorType()

static Type* smallestIntegerVectorType ( Type T1,
Type T2 
)
static

◆ STATISTIC() [1/2]

STATISTIC ( LoopsVectorized  ,
"Number of loops vectorized"   
)

◆ STATISTIC() [2/2]

STATISTIC ( LoopsAnalyzed  ,
"Number of loops analyzed for vectorization"   
)

◆ ToVectorTy()

static Type* ToVectorTy ( Type Scalar,
unsigned  VF 
)
static

A helper function for converting Scalar types to vector types.

If the incoming type is void, we return void. If the VF is 1, we return the scalar type.

Definition at line 316 of file LoopVectorize.cpp.

References llvm::VectorType::get(), and llvm::Type::isVoidTy().

Referenced by emitMissedWarning(), getScalarizationOverhead(), getVectorCallCost(), hasOutsideLoopUser(), isStrideMul(), and llvm::InnerLoopVectorizer::widenInstruction().

Variable Documentation

◆ EnableCondStoresVectorization

cl::opt<bool> EnableCondStoresVectorization("enable-cond-stores-vec", cl::init(true), cl::Hidden, cl::desc("Enable if predication of stores during vectorization."))
static

Referenced by hasOutsideLoopUser().

◆ EnableIfConversion

cl::opt<bool> EnableIfConversion("enable-if-conversion", cl::init(true), cl::Hidden, cl::desc("Enable if-conversion during vectorization."))
static

Referenced by canIfConvertPHINodes().

◆ EnableIndVarRegisterHeur

cl::opt<bool> EnableIndVarRegisterHeur("enable-ind-var-reg-heur", cl::init(true), cl::Hidden, cl::desc("Count the induction variable only once when interleaving"))
static

Referenced by hasOutsideLoopUser().

◆ EnableInterleavedMemAccesses

cl::opt<bool> EnableInterleavedMemAccesses("enable-interleaved-mem-accesses", cl::init(false), cl::Hidden, cl::desc("Enable vectorization on interleaved memory accesses in a loop"))
static

Referenced by canIfConvertPHINodes().

◆ EnableLoadStoreRuntimeInterleave

cl::opt<bool> EnableLoadStoreRuntimeInterleave("enable-loadstore-runtime-interleave", cl::init(true), cl::Hidden, cl::desc( "Enable runtime interleaving until load/store ports are saturated"))
static

Referenced by hasOutsideLoopUser().

◆ ForceTargetInstructionCost

cl::opt<unsigned> ForceTargetInstructionCost("force-target-instruction-cost", cl::init(0), cl::Hidden, cl::desc("A flag that overrides the target's expected cost for " "an instruction to a single constant value. Mostly " "useful for getting consistent testing."))
static

Referenced by hasOutsideLoopUser().

◆ ForceTargetMaxScalarInterleaveFactor

cl::opt<unsigned> ForceTargetMaxScalarInterleaveFactor("force-target-max-scalar-interleave", cl::init(0), cl::Hidden, cl::desc("A flag that overrides the target's max interleave factor for " "scalar loops."))
static

Referenced by hasOutsideLoopUser().

◆ ForceTargetMaxVectorInterleaveFactor

cl::opt<unsigned> ForceTargetMaxVectorInterleaveFactor("force-target-max-vector-interleave", cl::init(0), cl::Hidden, cl::desc("A flag that overrides the target's max interleave factor for " "vectorized loops."))
static

Referenced by hasOutsideLoopUser().

◆ ForceTargetNumScalarRegs

cl::opt<unsigned> ForceTargetNumScalarRegs("force-target-num-scalar-regs", cl::init(0), cl::Hidden, cl::desc("A flag that overrides the target's number of scalar registers."))
static

Referenced by hasOutsideLoopUser().

◆ ForceTargetNumVectorRegs

cl::opt<unsigned> ForceTargetNumVectorRegs("force-target-num-vector-regs", cl::init(0), cl::Hidden, cl::desc("A flag that overrides the target's number of vector registers."))
static

Referenced by hasOutsideLoopUser().

◆ LoopVectorizeWithBlockFrequency

cl::opt<bool> LoopVectorizeWithBlockFrequency("loop-vectorize-with-block-frequency", cl::init(false), cl::Hidden, cl::desc("Enable the use of the block frequency analysis to access PGO " "heuristics minimizing code growth in cold regions and being more " "aggressive in hot regions."))
static

◆ lv_name

const char lv_name[] = "Loop Vectorization"
static

Definition at line 7452 of file LoopVectorize.cpp.

◆ MaximizeBandwidth

cl::opt<bool> MaximizeBandwidth("vectorizer-maximize-bandwidth", cl::init(false), cl::Hidden, cl::desc("Maximize bandwidth when selecting vectorization factor which " "will be determined by the smallest type in loop."))
static

Referenced by hasOutsideLoopUser().

◆ MaxInterleaveFactor

const unsigned MaxInterleaveFactor = 16
static

Maximum vectorization interleave count.

Definition at line 188 of file LoopVectorize.cpp.

Referenced by llvm::InnerLoopVectorizer::addMetadata().

◆ MaxInterleaveGroupFactor

cl::opt<unsigned> MaxInterleaveGroupFactor("max-interleave-group-factor", cl::Hidden, cl::desc("Maximum factor for an interleaved access group (default = 8)"), cl::init(8))
static

Maximum factor for an interleaved memory access.

Referenced by llvm::InnerLoopVectorizer::addMetadata().

◆ MaxNestedScalarReductionIC

cl::opt<unsigned> MaxNestedScalarReductionIC("max-nested-scalar-reduction-interleave", cl::init(2), cl::Hidden, cl::desc("The maximum interleave count to use when interleaving a scalar " "reduction in a nested loop."))
static

Referenced by hasOutsideLoopUser().

◆ NumberOfStoresToPredicate

cl::opt<unsigned> NumberOfStoresToPredicate("vectorize-num-stores-pred", cl::init(1), cl::Hidden, cl::desc("Max number of stores to be predicated behind an if."))
static

The number of stores in a loop that are allowed to need predication.

Referenced by hasOutsideLoopUser().

◆ PragmaVectorizeMemoryCheckThreshold

cl::opt<unsigned> PragmaVectorizeMemoryCheckThreshold("pragma-vectorize-memory-check-threshold", cl::init(128), cl::Hidden, cl::desc("The maximum allowed number of runtime memory checks with a " "vectorize(enable) pragma."))
static

◆ PragmaVectorizeSCEVCheckThreshold

cl::opt<unsigned> PragmaVectorizeSCEVCheckThreshold("pragma-vectorize-scev-check-threshold", cl::init(128), cl::Hidden, cl::desc("The maximum number of SCEV checks allowed with a " "vectorize(enable) pragma"))
static

Referenced by canIfConvertPHINodes().

◆ SmallLoopCost

cl::opt<unsigned> SmallLoopCost("small-loop-cost", cl::init(20), cl::Hidden, cl::desc( "The cost of a loop that is considered 'small' by the interleaver."))
static

Referenced by hasOutsideLoopUser().

◆ TinyTripCountInterleaveThreshold

const unsigned TinyTripCountInterleaveThreshold = 128
static

We don't interleave loops with a known constant trip count below this number.

Definition at line 177 of file LoopVectorize.cpp.

◆ TinyTripCountVectorThreshold

cl::opt<unsigned> TinyTripCountVectorThreshold("vectorizer-min-trip-count", cl::init(16), cl::Hidden, cl::desc("Loops with a constant trip count that is smaller than this " "value are vectorized only if no scalar iteration overheads " "are incurred."))
static

Loops with a known constant trip count below this number are vectorized only if no scalar iteration overheads are incurred.

Referenced by llvm::LoopVectorizePass::processLoop().

◆ VectorizeSCEVCheckThreshold

cl::opt<unsigned> VectorizeSCEVCheckThreshold("vectorize-scev-check-threshold", cl::init(16), cl::Hidden, cl::desc("The maximum number of SCEV checks allowed."))
static

Referenced by canIfConvertPHINodes().