#include "llvm/Transforms/Vectorize/LoopVectorize.h"
#include "LoopVectorizationPlanner.h"
#include "VPRecipeBuilder.h"
#include "VPlan.h"
#include "VPlanAnalysis.h"
#include "VPlanCFG.h"
#include "VPlanHelpers.h"
#include "VPlanPatternMatch.h"
#include "VPlanTransforms.h"
#include "VPlanUtils.h"
#include "VPlanVerifier.h"
#include "llvm/ADT/APInt.h"
#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/DenseMapInfo.h"
#include "llvm/ADT/Hashing.h"
#include "llvm/ADT/MapVector.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/ADT/Twine.h"
#include "llvm/ADT/TypeSwitch.h"
#include "llvm/ADT/iterator_range.h"
#include "llvm/Analysis/AssumptionCache.h"
#include "llvm/Analysis/BasicAliasAnalysis.h"
#include "llvm/Analysis/BlockFrequencyInfo.h"
#include "llvm/Analysis/CFG.h"
#include "llvm/Analysis/CodeMetrics.h"
#include "llvm/Analysis/DemandedBits.h"
#include "llvm/Analysis/GlobalsModRef.h"
#include "llvm/Analysis/LoopAccessAnalysis.h"
#include "llvm/Analysis/LoopAnalysisManager.h"
#include "llvm/Analysis/LoopInfo.h"
#include "llvm/Analysis/LoopIterator.h"
#include "llvm/Analysis/OptimizationRemarkEmitter.h"
#include "llvm/Analysis/ProfileSummaryInfo.h"
#include "llvm/Analysis/ScalarEvolution.h"
#include "llvm/Analysis/ScalarEvolutionExpressions.h"
#include "llvm/Analysis/ScalarEvolutionPatternMatch.h"
#include "llvm/Analysis/TargetLibraryInfo.h"
#include "llvm/Analysis/TargetTransformInfo.h"
#include "llvm/Analysis/ValueTracking.h"
#include "llvm/Analysis/VectorUtils.h"
#include "llvm/IR/Attributes.h"
#include "llvm/IR/BasicBlock.h"
#include "llvm/IR/CFG.h"
#include "llvm/IR/Constant.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/DebugInfo.h"
#include "llvm/IR/DebugLoc.h"
#include "llvm/IR/DerivedTypes.h"
#include "llvm/IR/DiagnosticInfo.h"
#include "llvm/IR/Dominators.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/InstrTypes.h"
#include "llvm/IR/Instruction.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/Intrinsics.h"
#include "llvm/IR/MDBuilder.h"
#include "llvm/IR/Metadata.h"
#include "llvm/IR/Module.h"
#include "llvm/IR/Operator.h"
#include "llvm/IR/PatternMatch.h"
#include "llvm/IR/ProfDataUtils.h"
#include "llvm/IR/Type.h"
#include "llvm/IR/Use.h"
#include "llvm/IR/User.h"
#include "llvm/IR/Value.h"
#include "llvm/IR/Verifier.h"
#include "llvm/Support/Casting.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/InstructionCost.h"
#include "llvm/Support/MathExtras.h"
#include "llvm/Support/NativeFormatting.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
#include "llvm/Transforms/Utils/InjectTLIMappings.h"
#include "llvm/Transforms/Utils/Local.h"
#include "llvm/Transforms/Utils/LoopSimplify.h"
#include "llvm/Transforms/Utils/LoopUtils.h"
#include "llvm/Transforms/Utils/LoopVersioning.h"
#include "llvm/Transforms/Utils/ScalarEvolutionExpander.h"
#include "llvm/Transforms/Utils/SizeOpts.h"
#include "llvm/Transforms/Vectorize/LoopVectorizationLegality.h"
#include <algorithm>
#include <cassert>
#include <cmath>
#include <cstdint>
#include <functional>
#include <iterator>
#include <limits>
#include <memory>
#include <string>
#include <tuple>
#include <utility>

Classes
class	llvm::InnerLoopVectorizer
	InnerLoopVectorizer vectorizes loops which contain only one basic block to a specified vectorization factor (VF). More...
struct	llvm::EpilogueLoopVectorizationInfo
	Encapsulate information regarding vectorization of a loop and its epilogue. More...
class	llvm::InnerLoopAndEpilogueVectorizer
	An extension of the inner loop vectorizer that creates a skeleton for a vectorized loop that has its epilogue (residual) also vectorized. More...
class	llvm::EpilogueVectorizerMainLoop
	A specialized derived class of inner loop vectorizer that performs vectorization of main loops in the process of vectorizing loops and their epilogues. More...
class	llvm::EpilogueVectorizerEpilogueLoop
class	llvm::LoopVectorizationCostModel
	LoopVectorizationCostModel - estimates the expected speedups due to vectorization. More...

Namespaces
namespace	llvm
	This is an optimization pass for GlobalISel generic memory operations.

Macros
#define	LV_NAME "loop-vectorize"
#define	DEBUG_TYPE LV_NAME

Enumerations
enum class	TailFoldingPolicyTy { None = 0 , PreferFoldTail , MustFoldTail }
	Option tail-folding-policy controls the tail-folding strategy and lists all available options. More...
enum	llvm::EpilogueLowering { llvm::CM_EpilogueAllowed , llvm::CM_EpilogueNotAllowedOptSize , llvm::CM_EpilogueNotAllowedLowTripLoop , llvm::CM_EpilogueNotNeededFoldTail , llvm::CM_EpilogueNotAllowedFoldTail }
enum class	llvm::AliasMaskingStatus { llvm::NotDecided , llvm::Disabled , llvm::Enabled }

Functions
	STATISTIC (LoopsVectorized, "Number of loops vectorized")
	STATISTIC (LoopsAnalyzed, "Number of loops analyzed for vectorization")
	STATISTIC (LoopsEpilogueVectorized, "Number of epilogues vectorized")
	STATISTIC (LoopsEarlyExitVectorized, "Number of early exit loops vectorized")
	STATISTIC (LoopsPartialAliasVectorized, "Number of partial aliasing loops vectorized")
static bool	hasIrregularType (Type *Ty, const DataLayout &DL)
	A helper function that returns true if the given type is irregular.
static ElementCount	getSmallConstantTripCount (ScalarEvolution SE, const Loop L)
	A version of ScalarEvolution::getSmallConstantTripCount that returns an ElementCount to include loops whose trip count is a function of vscale.
static unsigned	getMaxTCFromNonZeroRange (PredicatedScalarEvolution &PSE, Loop *L)
	Get the maximum trip count for `L` from the SCEV unsigned range, excluding zero from the range.
static std::optional< ElementCount >	getSmallBestKnownTC (PredicatedScalarEvolution &PSE, Loop *L, bool CanUseConstantMax=true, bool CanExcludeZeroTrips=false)
	Returns "best known" trip count, which is either a valid positive trip count or std::nullopt when an estimate cannot be made (including when the trip count would overflow), for the specified loop `L` as defined by the following procedure: 1) Returns exact trip count if it is known.
static DebugLoc	getDebugLocFromInstOrOperands (Instruction *I)
	Look for a meaningful debug location on the instruction or its operands.
Value *	llvm::getRuntimeVF (IRBuilderBase &B, Type *Ty, ElementCount VF)
	Return the runtime value for VF.
static bool	useActiveLaneMask (TailFoldingStyle Style)
static bool	useActiveLaneMaskForControlFlow (TailFoldingStyle Style)
static bool	isExplicitVecOuterLoop (Loop OuterLp, OptimizationRemarkEmitter ORE)
static void	collectSupportedLoops (Loop &L, LoopInfo LI, OptimizationRemarkEmitter ORE, SmallVectorImpl< Loop * > &V)
static bool	isIndvarOverflowCheckKnownFalse (const LoopVectorizationCostModel *Cost, ElementCount VF, std::optional< unsigned > UF=std::nullopt)
	For the given VF and UF and maximum trip count computed for the loop, return whether the induction variable might overflow in the vectorized loop.
static bool	useMaskedInterleavedAccesses (const TargetTransformInfo &TTI)
static VPIRBasicBlock *	replaceVPBBWithIRVPBB (VPBasicBlock VPBB, BasicBlock IRBB, VPlan *Plan=nullptr)
	Replace `VPBB` with a VPIRBasicBlock wrapping `IRBB`.
static void	addFullyUnrolledInstructionsToIgnore (Loop L, const LoopVectorizationLegality::InductionList &IL, SmallPtrSetImpl< Instruction > &InstsToIgnore)
	Knowing that loop `L` executes a single vector iteration, add instructions that will get simplified and thus should not have any cost to `InstsToIgnore`.
static void	legacyCSE (BasicBlock *BB)
	FIXME: This legacy common-subexpression-elimination routine is scheduled for removal, in favor of the VPlan-based one.
static unsigned	estimateElementCount (ElementCount VF, std::optional< unsigned > VScale)
	This function attempts to return a value that represents the ElementCount at runtime.
static bool	hasVectorLibraryVariantFor (const CallInst &CI, ElementCount VF, bool MaskRequired, const TargetLibraryInfo *TLI)
	Returns true iff `CI` has a library vector variant usable at `VF:` a mapping with matching VF, masked if required, whose vector function is declared in the module.
static Type *	maybeVectorizeType (Type *Ty, ElementCount VF)
static Intrinsic::ID	getMaskedDivRemIntrinsic (unsigned Opcode)
static bool	willGenerateVectors (VPlan &Plan, ElementCount VF, const TargetTransformInfo &TTI)
	Check if any recipe of `Plan` will generate a vector value, which will be assigned a vector register.
static bool	hasReplicatorRegion (VPlan &Plan)
static bool	hasFindLastReductionPhi (VPlan &Plan)
	Returns true if the VPlan contains a VPReductionPHIRecipe with FindLast recurrence kind.
static bool	hasUnsupportedHeaderPhiRecipe (VPlan &Plan)
	Returns true if the VPlan contains header phi recipes that are not currently supported for epilogue vectorization.
static const SCEV *	getAddressAccessSCEV (Value Ptr, PredicatedScalarEvolution &PSE, const Loop TheLoop)
	Gets the address access SCEV for Ptr, if it should be used for cost modeling according to isAddressSCEVForCost.
static void	printOptimizedVPlan (VPlan &)
static EpilogueLowering	getEpilogueLowering (Function F, Loop L, LoopVectorizeHints &Hints, bool OptForSize, TargetTransformInfo TTI, TargetLibraryInfo TLI, LoopVectorizationLegality &LVL, InterleavedAccessInfo *IAI)
static EpilogueLowering	getEpilogueTailLowering (const LoopVectorizationCostModel &MainCM, const Loop L, OptimizationRemarkEmitter ORE)
	Determine how to lower the epilogue for the vector epilogue loop.
static void	checkMixedPrecision (Loop L, OptimizationRemarkEmitter ORE)
static InstructionCost	calculateEarlyExitCost (VPCostContext &CostCtx, VPlan &Plan, ElementCount VF)
	For loops with uncountable early exits, find the cost of doing work when exiting the loop early, such as calculating the final exit values of variables used outside the loop.
static bool	isOutsideLoopWorkProfitable (GeneratedRTChecks &Checks, VectorizationFactor &VF, Loop *L, PredicatedScalarEvolution &PSE, VPCostContext &CostCtx, VPlan &Plan, EpilogueLowering SEL, std::optional< unsigned > VScale)
	This function determines whether or not it's still profitable to vectorize the loop given the extra work we have to do outside of the loop:
static SmallVector< VPInstruction * >	preparePlanForMainVectorLoop (VPlan &MainPlan, VPlan &EpiPlan)
	Prepare `MainPlan` for vectorizing the main vector loop during epilogue vectorization.
static SmallVector< Instruction * >	preparePlanForEpilogueVectorLoop (VPlan &MainPlan, VPlan &Plan, Loop *L, const SCEV2ValueTy &ExpandedSCEVs, EpilogueLoopVectorizationInfo &EPI, LoopVectorizationCostModel &CM, VFSelectionContext &Config, ScalarEvolution &SE)
	Prepare `Plan` for vectorizing the epilogue loop.
static void	fixScalarResumeValuesFromBypass (BasicBlock BypassBlock, Loop L, VPlan &BestEpiPlan, ArrayRef< VPInstruction * > ResumeValues)
static void	connectEpilogueVectorLoop (VPlan &EpiPlan, Loop L, EpilogueLoopVectorizationInfo &EPI, DominatorTree DT, GeneratedRTChecks &Checks, ArrayRef< Instruction * > InstsToMove, ArrayRef< VPInstruction * > ResumeValues)
	Connect the epilogue vector loop generated for `EpiPlan` to the main vector loop, after both plans have executed, updating branches from the iteration and runtime checks of the main loop, as well as updating various phis.

Variables
const char	VerboseDebug [] = DEBUG_TYPE "-verbose"
static cl::opt< bool >	EnableEpilogueVectorization ("enable-epilogue-vectorization", cl::init(true), cl::Hidden, cl::desc("Enable vectorization of epilogue loops."))
static cl::opt< unsigned >	EpilogueVectorizationForceVF ("epilogue-vectorization-force-VF", cl::init(1), cl::Hidden, cl::desc("When epilogue vectorization is enabled, and a value greater than " "1 is specified, forces the given VF for all applicable epilogue " "loops."))
static cl::opt< unsigned >	EpilogueVectorizationMinVF ("epilogue-vectorization-minimum-VF", cl::Hidden, cl::desc("Only loops with vectorization factor equal to or larger than " "the specified value are considered for epilogue vectorization."))
static cl::opt< unsigned >	TinyTripCountVectorThreshold ("vectorizer-min-trip-count", cl::init(16), cl::Hidden, cl::desc("Loops with a constant trip count that is smaller than this " "value are vectorized only if no scalar iteration overheads " "are incurred."))
	Loops with a known constant trip count below this number are vectorized only if no scalar iteration overheads are incurred.
static cl::opt< unsigned >	VectorizeMemoryCheckThreshold ("vectorize-memory-check-threshold", cl::init(128), cl::Hidden, cl::desc("The maximum allowed number of runtime memory checks"))
static cl::opt< bool >	ForcePartialAliasingVectorization ("force-partial-aliasing-vectorization", cl::init(false), cl::Hidden, cl::desc("Replace pointer diff checks with alias masks."))
static cl::opt< TailFoldingPolicyTy >	TailFoldingPolicy ("tail-folding-policy", cl::init(TailFoldingPolicyTy::None), cl::Hidden, cl::desc("Tail-folding preferences over creating an epilogue loop."), cl::values(clEnumValN(TailFoldingPolicyTy::None, "dont-fold-tail", "Don't tail-fold loops."), clEnumValN(TailFoldingPolicyTy::PreferFoldTail, "prefer-fold-tail", "prefer tail-folding, otherwise create an epilogue when " "appropriate."), clEnumValN(TailFoldingPolicyTy::MustFoldTail, "must-fold-tail", "always tail-fold, don't attempt vectorization if " "tail-folding fails.")))
static cl::opt< TailFoldingPolicyTy >	EpilogueTailFoldingPolicy ("epilogue-tail-folding-policy", cl::Hidden, cl::desc("Epilogue-tail-folding preferences over creating an epilogue loop."), cl::values(clEnumValN(TailFoldingPolicyTy::None, "dont-fold-tail", "Don't tail-fold loops."), clEnumValN(TailFoldingPolicyTy::PreferFoldTail, "prefer-fold-tail", "prefer tail-folding, otherwise create an epilogue when " "appropriate.")))
static cl::opt< TailFoldingStyle >	ForceTailFoldingStyle ("force-tail-folding-style", cl::desc("Force the tail folding style"), cl::init(TailFoldingStyle::None), cl::values(clEnumValN(TailFoldingStyle::None, "none", "Disable tail folding"), clEnumValN(TailFoldingStyle::Data, "data", "Create lane mask for data only, using active.lane.mask intrinsic"), clEnumValN(TailFoldingStyle::DataWithoutLaneMask, "data-without-lane-mask", "Create lane mask with compare/stepvector"), clEnumValN(TailFoldingStyle::DataAndControlFlow, "data-and-control", "Create lane mask using active.lane.mask intrinsic, and use " "it for both data and control flow"), clEnumValN(TailFoldingStyle::DataWithEVL, "data-with-evl", "Use predicated EVL instructions for tail folding. If EVL " "is unsupported, fallback to data-without-lane-mask.")))
static cl::opt< bool >	EnableInterleavedMemAccesses ("enable-interleaved-mem-accesses", cl::init(false), cl::Hidden, cl::desc("Enable vectorization on interleaved memory accesses in a loop"))
static cl::opt< bool >	EnableMaskedInterleavedMemAccesses ("enable-masked-interleaved-mem-accesses", cl::init(false), cl::Hidden, cl::desc("Enable vectorization on masked interleaved memory accesses in a loop"))
	An interleave-group may need masking if it resides in a block that needs predication, or in order to mask away gaps.
static cl::opt< unsigned >	ForceTargetNumScalarRegs ("force-target-num-scalar-regs", cl::init(0), cl::Hidden, cl::desc("A flag that overrides the target's number of scalar registers."))
static cl::opt< unsigned >	ForceTargetNumVectorRegs ("force-target-num-vector-regs", cl::init(0), cl::Hidden, cl::desc("A flag that overrides the target's number of vector registers."))
static cl::opt< unsigned >	ForceTargetMaxScalarInterleaveFactor ("force-target-max-scalar-interleave", cl::init(0), cl::Hidden, cl::desc("A flag that overrides the target's max interleave factor for " "scalar loops."))
static cl::opt< unsigned >	ForceTargetMaxVectorInterleaveFactor ("force-target-max-vector-interleave", cl::init(0), cl::Hidden, cl::desc("A flag that overrides the target's max interleave factor for " "vectorized loops."))
static cl::opt< unsigned >	SmallLoopCost ("small-loop-cost", cl::init(20), cl::Hidden, cl::desc("The cost of a loop that is considered 'small' by the interleaver."))
static cl::opt< bool >	LoopVectorizeWithBlockFrequency ("loop-vectorize-with-block-frequency", cl::init(true), cl::Hidden, cl::desc("Enable the use of the block frequency analysis to access PGO " "heuristics minimizing code growth in cold regions and being more " "aggressive in hot regions."))
static cl::opt< bool >	EnableLoadStoreRuntimeInterleave ("enable-loadstore-runtime-interleave", cl::init(true), cl::Hidden, cl::desc("Enable runtime interleaving until load/store ports are saturated"))
cl::opt< unsigned >	NumberOfStoresToPredicate ("vectorize-num-stores-pred", cl::init(1), cl::Hidden, cl::desc("Max number of stores to be predicated behind an if."))
	The number of stores in a loop that are allowed to need predication.
static cl::opt< unsigned >	VectorizeSCEVCheckThreshold ("vectorize-scev-check-threshold", cl::init(16), cl::Hidden, cl::desc("The maximum number of SCEV checks allowed."))
static cl::opt< unsigned >	PragmaVectorizeSCEVCheckThreshold ("pragma-vectorize-scev-check-threshold", cl::init(128), cl::Hidden, cl::desc("The maximum number of SCEV checks allowed with a " "vectorize(enable) pragma"))
static cl::opt< bool >	EnableIndVarRegisterHeur ("enable-ind-var-reg-heur", cl::init(true), cl::Hidden, cl::desc("Count the induction variable only once when interleaving"))
static cl::opt< unsigned >	MaxNestedScalarReductionIC ("max-nested-scalar-reduction-interleave", cl::init(2), cl::Hidden, cl::desc("The maximum interleave count to use when interleaving a scalar " "reduction in a nested loop."))
static cl::opt< bool >	ForceOrderedReductions ("force-ordered-reductions", cl::init(false), cl::Hidden, cl::desc("Enable the vectorisation of loops with in-order (strict) " "FP reductions"))
static cl::opt< bool >	PreferPredicatedReductionSelect ("prefer-predicated-reduction-select", cl::init(false), cl::Hidden, cl::desc("Prefer predicating a reduction operation over an after loop select."))
cl::opt< bool >	VPlanBuildOuterloopStressTest ("vplan-build-outerloop-stress-test", cl::init(false), cl::Hidden, cl::desc("Build VPlan for every supported loop nest in the function and bail " "out right after the build (stress test the VPlan H-CFG construction " "in the VPlan-native vectorization path)."))
static cl::opt< cl::boolOrDefault >	ForceMaskedDivRem ("force-widen-divrem-via-masked-intrinsic", cl::Hidden, cl::desc("Override cost based masked intrinsic widening " "for div/rem instructions"))
static cl::opt< bool >	EnableEarlyExitVectorization ("enable-early-exit-vectorization", cl::init(true), cl::Hidden, cl::desc("Enable vectorization of early exit loops with uncountable exits."))
static cl::opt< bool >	EnableEarlyExitVectorizationWithSideEffects ("enable-early-exit-vectorization-with-side-effects", cl::init(false), cl::Hidden, cl::desc("Enable vectorization of early exit loops with uncountable exits " "and side effects"))
static constexpr uint32_t	MinItersBypassWeights [] = {1, 127}

Macro Definition Documentation

◆ DEBUG_TYPE

#define DEBUG_TYPE LV_NAME

Definition at line 164 of file LoopVectorize.cpp.

◆ LV_NAME

#define LV_NAME "loop-vectorize"

Definition at line 163 of file LoopVectorize.cpp.

Enumeration Type Documentation

◆ TailFoldingPolicyTy

enum class TailFoldingPolicyTy

strong

Option tail-folding-policy controls the tail-folding strategy and lists all available options.

The vectorizer will attempt to fold the tail-loop into the vector loop (main/epilogue loops) and predicate the instructions accordingly. If tail-folding fails, there are different fallback strategies depending on these values:

Enumerator
None
PreferFoldTail
MustFoldTail

Definition at line 213 of file LoopVectorize.cpp.

Function Documentation

◆ addFullyUnrolledInstructionsToIgnore()

void addFullyUnrolledInstructionsToIgnore	(	Loop *	L,
		const LoopVectorizationLegality::InductionList &	IL,
		SmallPtrSetImpl< Instruction * > &	InstsToIgnore )

static

Knowing that loop L executes a single vector iteration, add instructions that will get simplified and thus should not have any cost to InstsToIgnore.

Definition at line 2019 of file LoopVectorize.cpp.

References llvm::all_of(), llvm::cast(), llvm::SmallPtrSetImpl< PtrType >::insert(), IV, and llvm::Value::users().

◆ calculateEarlyExitCost()

InstructionCost calculateEarlyExitCost	(	VPCostContext &	CostCtx,
		VPlan &	Plan,
		ElementCount	VF )

static

For loops with uncountable early exits, find the cost of doing work when exiting the loop early, such as calculating the final exit values of variables used outside the loop.

TODO: This is currently overly pessimistic because the loop may not take the early exit, but better to keep this conservative for now. In future, it might be possible to relax this by using branch probabilities.

Definition at line 7252 of file LoopVectorize.cpp.

References llvm::dbgs(), llvm::VPlan::getExitBlocks(), llvm::VPlan::getMiddleBlock(), and LLVM_DEBUG.

Referenced by isOutsideLoopWorkProfitable().

◆ checkMixedPrecision()

void checkMixedPrecision	(	Loop *	L,
		OptimizationRemarkEmitter *	ORE )

static

Definition at line 7205 of file LoopVectorize.cpp.

References llvm::dyn_cast(), llvm::OptimizationRemarkEmitter::emit(), llvm::SmallVectorTemplateCommon< T, typename >::empty(), I, llvm::SmallPtrSetImpl< PtrType >::insert(), llvm::isa(), LV_NAME, llvm::SmallVectorImpl< T >::pop_back_val(), and llvm::SmallVectorTemplateBase< T, bool >::push_back().

Referenced by llvm::LoopVectorizePass::processLoop().

◆ collectSupportedLoops()

void collectSupportedLoops	(	Loop &	L,
		LoopInfo *	LI,
		OptimizationRemarkEmitter *	ORE,
		SmallVectorImpl< Loop * > &	V )

static

Definition at line 1905 of file LoopVectorize.cpp.

References collectSupportedLoops(), llvm::containsIrreducibleCFG(), llvm::EnableVPlanNativePath, isExplicitVecOuterLoop(), llvm::LoopBlocksRPO::perform(), and VPlanBuildOuterloopStressTest.

Referenced by collectSupportedLoops(), and llvm::LoopVectorizePass::runImpl().

◆ connectEpilogueVectorLoop()

void connectEpilogueVectorLoop	(	VPlan &	EpiPlan,
		Loop *	L,
		EpilogueLoopVectorizationInfo &	EPI,
		DominatorTree *	DT,
		GeneratedRTChecks &	Checks,
		ArrayRef< Instruction * >	InstsToMove,
		ArrayRef< VPInstruction * >	ResumeValues )

static

Connect the epilogue vector loop generated for EpiPlan to the main vector loop, after both plans have executed, updating branches from the iteration and runtime checks of the main loop, as well as updating various phis.

InstsToMove contains instructions that need to be moved to the preheader of the epilogue vector loop.

Definition at line 7731 of file LoopVectorize.cpp.

References llvm::GenericDomTreeUpdater< DerivedT, DomTreeT, PostDomTreeT >::applyUpdates(), assert(), llvm::cast(), llvm::DominatorTreeBase< BasicBlock, false >::Delete, llvm::EpilogueLoopVectorizationInfo::EpilogueIterationCountCheck, fixScalarResumeValuesFromBypass(), llvm::VPlan::getEntry(), llvm::BasicBlock::getFirstNonPHIIt(), llvm::VPlan::getScalarPreheader(), llvm::BasicBlock::getSinglePredecessor(), llvm::BasicBlock::getTerminator(), I, llvm::DominatorTreeBase< BasicBlock, false >::Insert, llvm::EpilogueLoopVectorizationInfo::MainLoopIterationCountCheck, llvm::make_early_inc_range(), llvm::make_pointer_range(), llvm::none_of(), llvm::BasicBlock::phis(), and llvm::User::replaceUsesOfWith().

Referenced by llvm::LoopVectorizePass::processLoop().

◆ estimateElementCount()

unsigned estimateElementCount	(	ElementCount	VF,
		std::optional< unsigned >	VScale )

static

This function attempts to return a value that represents the ElementCount at runtime.

For fixed-width VFs we know this precisely at compile time, but for scalable VFs we calculate it based on an estimate of the vscale value.

Definition at line 2091 of file LoopVectorize.cpp.

References assert(), llvm::details::FixedOrScalableQuantity< LeafTy, ValueTy >::getKnownMinValue(), and llvm::details::FixedOrScalableQuantity< LeafTy, ValueTy >::isScalable().

Referenced by llvm::LoopVectorizationPlanner::executePlan(), llvm::LoopVectorizationCostModel::isEpilogueVectorizationProfitable(), isOutsideLoopWorkProfitable(), preparePlanForEpilogueVectorLoop(), llvm::LoopVectorizationPlanner::selectBestEpiloguePlan(), and llvm::LoopVectorizationPlanner::selectInterleaveCount().

◆ fixScalarResumeValuesFromBypass()

void fixScalarResumeValuesFromBypass	(	BasicBlock *	BypassBlock,
		Loop *	L,
		VPlan &	BestEpiPlan,
		ArrayRef< VPInstruction * >	ResumeValues )

static

Definition at line 7695 of file LoopVectorize.cpp.

References llvm::cast(), llvm::VPlan::getScalarHeader(), llvm::VPlan::getScalarPreheader(), llvm::BasicBlock::phis(), llvm::VPBasicBlock::phis(), llvm::predecessors(), and llvm::zip().

Referenced by connectEpilogueVectorLoop().

◆ getAddressAccessSCEV()

const SCEV * getAddressAccessSCEV	(	Value *	Ptr,
		PredicatedScalarEvolution &	PSE,
		const Loop *	TheLoop )

static

Gets the address access SCEV for Ptr, if it should be used for cost modeling according to isAddressSCEVForCost.

This SCEV can be sent to the Target in order to estimate the address calculation cost.

Definition at line 4204 of file LoopVectorize.cpp.

References llvm::PredicatedScalarEvolution::getSCEV(), llvm::PredicatedScalarEvolution::getSE(), and llvm::vputils::isAddressSCEVForCost().

Referenced by llvm::VPReplicateRecipe::computeCost().

◆ getDebugLocFromInstOrOperands()

DebugLoc getDebugLocFromInstOrOperands ( Instruction * I )

static

Look for a meaningful debug location on the instruction or its operands.

Definition at line 721 of file LoopVectorize.cpp.

References DenseMapInfo< LocallyHashedType >::Empty, for(), getDebugLocFromInstOrOperands(), llvm::DebugLoc::getUnknown(), and I.

Referenced by getDebugLocFromInstOrOperands().

◆ getEpilogueLowering()

EpilogueLowering getEpilogueLowering	(	Function *	F,
		Loop *	L,
		LoopVectorizeHints &	Hints,
		bool	OptForSize,
		TargetTransformInfo *	TTI,
		TargetLibraryInfo *	TLI,
		LoopVectorizationLegality &	LVL,
		InterleavedAccessInfo *	IAI )

static

Definition at line 7124 of file LoopVectorize.cpp.

References llvm::CM_EpilogueAllowed, llvm::CM_EpilogueNotAllowedFoldTail, llvm::CM_EpilogueNotAllowedOptSize, llvm::CM_EpilogueNotNeededFoldTail, F, llvm::LoopVectorizeHints::FK_Disabled, llvm::LoopVectorizeHints::FK_Enabled, llvm::LoopVectorizeHints::getForce(), llvm::LoopVectorizeHints::getPredicate(), MustFoldTail, None, PreferFoldTail, and TailFoldingPolicy.

Referenced by llvm::LoopVectorizePass::processLoop().

◆ getEpilogueTailLowering()

EpilogueLowering getEpilogueTailLowering	(	const LoopVectorizationCostModel &	MainCM,
		const Loop *	L,
		OptimizationRemarkEmitter *	ORE )

static

Determine how to lower the epilogue for the vector epilogue loop.

Check if there are any conflicts that prevent tail-folding the epilogue.

Returns: CM_EpilogueNotNeededFoldTail if epilogue tail-folding is possible, otherwise CM_EpilogueAllowed.

Definition at line 7167 of file LoopVectorize.cpp.

References llvm::CM_EpilogueAllowed, llvm::CM_EpilogueNotNeededFoldTail, llvm::dbgs(), EnableEpilogueVectorization, EpilogueTailFoldingPolicy, llvm::LoopVectorizationCostModel::isEpilogueAllowed(), LLVM_DEBUG, PreferFoldTail, llvm::LoopVectorizationUtils::reportVectorizationInfo(), and llvm::LoopVectorizationCostModel::requiresScalarEpilogue().

Referenced by llvm::LoopVectorizePass::processLoop().

◆ getMaskedDivRemIntrinsic()

Intrinsic::ID getMaskedDivRemIntrinsic ( unsigned Opcode )

static

Definition at line 2514 of file LoopVectorize.cpp.

References llvm_unreachable.

Referenced by llvm::LoopVectorizationCostModel::getDivRemSpeculationCost().

◆ getMaxTCFromNonZeroRange()

unsigned getMaxTCFromNonZeroRange	(	PredicatedScalarEvolution &	PSE,
		Loop *	L )

static

Get the maximum trip count for L from the SCEV unsigned range, excluding zero from the range.

Only valid when not folding the tail, as the minimum iteration count check guards against a zero trip count. Returns 0 if unknown.

Definition at line 454 of file LoopVectorize.cpp.

References llvm::APInt::getActiveBits(), llvm::PredicatedScalarEvolution::getBackedgeTakenCount(), llvm::PredicatedScalarEvolution::getSE(), llvm::ScalarEvolution::getTripCountFromExitCount(), llvm::SCEV::getType(), llvm::ConstantRange::getUnsignedMax(), llvm::ScalarEvolution::getUnsignedRange(), llvm::APInt::getZExtValue(), llvm::isa(), and llvm::APInt::isZero().

Referenced by llvm::LoopVectorizationCostModel::computeMaxVF(), and getSmallBestKnownTC().

◆ getSmallBestKnownTC()

std::optional< ElementCount > getSmallBestKnownTC	(	PredicatedScalarEvolution &	PSE,
		Loop *	L,
		bool	CanUseConstantMax = true,
		bool	CanExcludeZeroTrips = false )

static

Returns "best known" trip count, which is either a valid positive trip count or std::nullopt when an estimate cannot be made (including when the trip count would overflow), for the specified loop L as defined by the following procedure: 1) Returns exact trip count if it is known.

2) Returns expected trip count according to profile data if any. 3) Returns upper bound estimate if known, and if CanUseConstantMax. 4) Returns the maximum trip count from the SCEV range excluding zero, if CanUseConstantMax and CanExcludeZeroTrips. 5) Returns std::nullopt if all of the above failed.

Definition at line 479 of file LoopVectorize.cpp.

References llvm::ElementCount::getFixed(), llvm::getLoopEstimatedTripCount(), getMaxTCFromNonZeroRange(), llvm::PredicatedScalarEvolution::getSE(), llvm::PredicatedScalarEvolution::getSmallConstantMaxTripCount(), getSmallConstantTripCount(), and LoopVectorizeWithBlockFrequency.

Referenced by llvm::LoopVectorizationCostModel::computeMaxVF(), isOutsideLoopWorkProfitable(), llvm::LoopVectorizePass::processLoop(), and llvm::LoopVectorizationPlanner::selectInterleaveCount().

◆ getSmallConstantTripCount()

ElementCount getSmallConstantTripCount	(	ScalarEvolution *	SE,
		const Loop *	L )

static

A version of ScalarEvolution::getSmallConstantTripCount that returns an ElementCount to include loops whose trip count is a function of vscale.

Definition at line 428 of file LoopVectorize.cpp.

References llvm::cast(), llvm::APInt::getActiveBits(), llvm::ScalarEvolution::getBackedgeTakenCount(), llvm::ElementCount::getFixed(), llvm::ElementCount::getScalable(), llvm::ScalarEvolution::getSmallConstantTripCount(), llvm::ScalarEvolution::getTripCountFromExitCount(), llvm::SCEV::getType(), llvm::APInt::getZExtValue(), hasNoUnsignedWrap(), llvm::isa(), llvm::SCEVPatternMatch::m_scev_APInt(), llvm::SCEVPatternMatch::m_scev_Mul(), llvm::SCEVPatternMatch::m_SCEVVScale(), and llvm::SCEVPatternMatch::match().

Referenced by llvm::LoopVectorizationCostModel::computeMaxVF(), getSmallBestKnownTC(), and llvm::LoopVectorizationPlanner::selectInterleaveCount().

◆ hasFindLastReductionPhi()

bool hasFindLastReductionPhi ( VPlan & Plan )

static

Returns true if the VPlan contains a VPReductionPHIRecipe with FindLast recurrence kind.

Definition at line 3362 of file LoopVectorize.cpp.

References llvm::any_of(), llvm::VPBlockBase::getEntryBasicBlock(), llvm::VPlan::getVectorLoopRegion(), and llvm::VPBasicBlock::phis().

Referenced by llvm::LoopVectorizationPlanner::selectInterleaveCount().

◆ hasIrregularType()

bool hasIrregularType	(	Type *	Ty,
		const DataLayout &	DL )

static

A helper function that returns true if the given type is irregular.

The type is irregular if its allocated size doesn't equal the store size of an element of the corresponding vector type.

Definition at line 419 of file LoopVectorize.cpp.

References DL.

Referenced by llvm::LoopVectorizationCostModel::interleavedAccessCanBeWidened(), and llvm::LoopVectorizationCostModel::memoryInstructionCanBeWidened().

◆ hasReplicatorRegion()

bool hasReplicatorRegion ( VPlan & Plan )

static

Definition at line 3354 of file LoopVectorize.cpp.

References llvm::any_of(), llvm::VPBlockUtils::blocksOnly(), llvm::VPRegionBlock::getEntry(), llvm::VPlan::getVectorLoopRegion(), and llvm::vp_depth_first_shallow().

Referenced by llvm::LoopVectorizationPlanner::computeBestVF().

◆ hasUnsupportedHeaderPhiRecipe()

bool hasUnsupportedHeaderPhiRecipe ( VPlan & Plan )

static

Returns true if the VPlan contains header phi recipes that are not currently supported for epilogue vectorization.

Definition at line 3374 of file LoopVectorize.cpp.

References llvm::any_of(), llvm::VPBlockBase::getEntryBasicBlock(), llvm::VPlan::getVectorLoopRegion(), and llvm::VPBasicBlock::phis().

◆ hasVectorLibraryVariantFor()

bool hasVectorLibraryVariantFor	(	const CallInst &	CI,
		ElementCount	VF,
		bool	MaskRequired,
		const TargetLibraryInfo *	TLI )

static

Returns true iff CI has a library vector variant usable at VF: a mapping with matching VF, masked if required, whose vector function is declared in the module.

Such variants are priced by VPWidenCallRecipe::computeCost rather than by scalarization.

Definition at line 2105 of file LoopVectorize.cpp.

References llvm::any_of(), llvm::Module::getFunction(), llvm::VFDatabase::getMappings(), llvm::Instruction::getModule(), and llvm::CallBase::isNoBuiltin().

Referenced by llvm::LoopVectorizationCostModel::getVectorCallCost(), and llvm::LoopVectorizationCostModel::isScalarWithPredication().

◆ isExplicitVecOuterLoop()

bool isExplicitVecOuterLoop	(	Loop *	OuterLp,
		OptimizationRemarkEmitter *	ORE )

static

Definition at line 1877 of file LoopVectorize.cpp.

References llvm::LoopVectorizeHints::allowVectorization(), assert(), llvm::dbgs(), llvm::LoopVectorizeHints::emitRemarkWithHints(), llvm::LoopVectorizeHints::FK_Undefined, llvm::LoopVectorizeHints::getForce(), llvm::LoopBase< BlockT, LoopT >::getHeader(), llvm::LoopVectorizeHints::getInterleave(), llvm::BasicBlock::getParent(), llvm::LoopBase< BlockT, LoopT >::isInnermost(), and LLVM_DEBUG.

Referenced by collectSupportedLoops().

◆ isIndvarOverflowCheckKnownFalse()

bool isIndvarOverflowCheckKnownFalse	(	const LoopVectorizationCostModel *	Cost,
		ElementCount	VF,
		std::optional< unsigned >	UF = std::nullopt )

static

For the given VF and UF and maximum trip count computed for the loop, return whether the induction variable might overflow in the vectorized loop.

If not, then we know a runtime overflow check always evaluates to false and can be removed.

Definition at line 1939 of file LoopVectorize.cpp.

References llvm::details::FixedOrScalableQuantity< LeafTy, ValueTy >::getKnownMinValue(), llvm::IntegerType::getMask(), llvm::getMaxVScale(), and llvm::details::FixedOrScalableQuantity< LeafTy, ValueTy >::isScalable().

◆ isOutsideLoopWorkProfitable()

bool isOutsideLoopWorkProfitable	(	GeneratedRTChecks &	Checks,
		VectorizationFactor &	VF,
		Loop *	L,
		PredicatedScalarEvolution &	PSE,
		VPCostContext &	CostCtx,
		VPlan &	Plan,
		EpilogueLowering	SEL,
		std::optional< unsigned >	VScale )

static

This function determines whether or not it's still profitable to vectorize the loop given the extra work we have to do outside of the loop:

Perform the runtime checks before entering the loop to ensure it's safe to vectorize.
In the case of loops with uncountable early exits, we may have to do extra work when exiting the loop early, such as calculating the final exit values of variables used outside the loop.
The middle block.

Definition at line 7278 of file LoopVectorize.cpp.

References llvm::alignTo(), calculateEarlyExitCost(), llvm::CM_EpilogueAllowed, llvm::VectorizationFactor::Cost, llvm::VPBasicBlock::cost(), llvm::dbgs(), llvm::divideCeil(), estimateElementCount(), llvm::ElementCount::getFixed(), llvm::VPlan::getMiddleBlock(), getSmallBestKnownTC(), llvm::InstructionCost::getValue(), llvm::details::FixedOrScalableQuantity< ElementCount, unsigned >::isKnownLT(), llvm::ElementCount::isScalar(), llvm::InstructionCost::isValid(), LLVM_DEBUG, llvm::VectorizationFactor::MinProfitableTripCount, llvm::VectorizationFactor::ScalarCost, VectorizeMemoryCheckThreshold, and llvm::VectorizationFactor::Width.

Referenced by llvm::LoopVectorizePass::processLoop().

◆ legacyCSE()

void legacyCSE ( BasicBlock * BB )

static

FIXME: This legacy common-subexpression-elimination routine is scheduled for removal, in favor of the VPlan-based one.

Definition at line 2068 of file LoopVectorize.cpp.

References llvm::DenseMapBase< DerivedT, KeyT, ValueT, KeyInfoT, BucketT >::lookup(), and llvm::make_early_inc_range().

Referenced by llvm::InnerLoopVectorizer::fixVectorizedLoop().

◆ maybeVectorizeType()

Type * maybeVectorizeType	(	Type *	Ty,
		ElementCount	VF )

static

Definition at line 2147 of file LoopVectorize.cpp.

References llvm::canVectorizeTy(), llvm::ElementCount::isScalar(), and llvm::toVectorizedTy().

Referenced by llvm::LoopVectorizationCostModel::getVectorIntrinsicCost().

◆ preparePlanForEpilogueVectorLoop()

SmallVector< Instruction * > preparePlanForEpilogueVectorLoop	(	VPlan &	MainPlan,
		VPlan &	Plan,
		Loop *	L,
		const SCEV2ValueTy &	ExpandedSCEVs,
		EpilogueLoopVectorizationInfo &	EPI,
		LoopVectorizationCostModel &	CM,
		VFSelectionContext &	Config,
		ScalarEvolution &	SE )

static

Prepare Plan for vectorizing the epilogue loop.

That is, re-use expanded SCEVs from ExpandedSCEVs and set resume values for header recipes. Some reductions require creating new instructions to compute the resume values. They are collected in a vector and returned. They must be moved to the preheader of the vector epilogue loop, after created by the execution of Plan.

Definition at line 7481 of file LoopVectorize.cpp.

References llvm::Add, llvm::VPlanTransforms::addMinimumVectorEpilogueIterationCheck(), llvm::all_of(), llvm::any_of(), assert(), llvm::cast(), llvm::VPInstruction::ComputeReductionResult, llvm::VPBuilder::createAdd(), llvm::dyn_cast(), llvm::EpilogueLoopVectorizationInfo::EpilogueUF, llvm::EpilogueLoopVectorizationInfo::EpilogueVF, estimateElementCount(), llvm::vputils::findCanonicalIVIncrement(), llvm::vputils::findRecipe(), llvm::VPRegionBlock::getCanonicalIV(), llvm::VPlan::getEntry(), llvm::VPBlockBase::getEntryBasicBlock(), llvm::VPIRFlags::getFastMathFlagsOrNone(), llvm::VPBasicBlock::getFirstNonPhi(), llvm::PHINode::getIncomingValueForBlock(), llvm::VPValue::getLiveInIRValue(), llvm::VPInstruction::getOpcode(), llvm::VPUser::getOperand(), llvm::VPlan::getOrAddLiveIn(), llvm::getRecurrenceIdentity(), llvm::VPReductionPHIRecipe::getRecurrenceKind(), llvm::VPlan::getScalarPreheader(), llvm::VPHeaderPHIRecipe::getStartValue(), llvm::VPBuilder::getToInsertAfter(), llvm::VPlan::getTripCount(), llvm::Value::getType(), llvm::VPValue::getUnderlyingValue(), llvm::VPlan::getVectorLoopRegion(), llvm::VPReductionPHIRecipe::getVFScaleFactor(), llvm::VFSelectionContext::getVScaleForTuning(), I, llvm::Increment, llvm::isa(), llvm::RecurrenceDescriptor::isAnyOfRecurrenceKind(), llvm::RecurrenceDescriptor::isSubRecurrenceKind(), llvm::ElementCount::isVector(), IV, llvm::DenseMapBase< DerivedT, KeyT, ValueT, KeyInfoT, BucketT >::lookup(), llvm::PatternMatch::m_SpecificInt(), llvm::EpilogueLoopVectorizationInfo::MainLoopIterationCountCheck, llvm::EpilogueLoopVectorizationInfo::MainLoopUF, llvm::EpilogueLoopVectorizationInfo::MainLoopVF, llvm::make_early_inc_range(), llvm::PatternMatch::match(), llvm::PatternMatch::match_fn(), llvm::SmallVectorTemplateBase< T, bool >::push_back(), llvm::VPInstruction::ReductionStartVector, llvm::VPValue::replaceAllUsesWith(), llvm::LoopVectorizationCostModel::requiresScalarEpilogue(), llvm::VPlan::resetTripCount(), RUN_VPLAN_PASS, llvm::VPBlockBase::setName(), llvm::VPUser::setOperand(), llvm::Sub, and llvm::EpilogueLoopVectorizationInfo::VectorTripCount.

Referenced by llvm::LoopVectorizePass::processLoop().

◆ preparePlanForMainVectorLoop()

SmallVector< VPInstruction * > preparePlanForMainVectorLoop	(	VPlan &	MainPlan,
		VPlan &	EpiPlan )

static

Prepare MainPlan for vectorizing the main vector loop during epilogue vectorization.

Definition at line 7395 of file LoopVectorize.cpp.

References assert(), llvm::VPBasicBlock::begin(), llvm::cast(), llvm::VPBuilder::createNaryOp(), llvm::VPBuilder::createScalarPhi(), llvm::dyn_cast(), llvm::iterator_range< IteratorT >::end(), llvm::find_if(), llvm::VPBasicBlock::front(), llvm::VPRegionBlock::getCanonicalIV(), llvm::VPlan::getEntry(), llvm::VPBlockBase::getExitingBasicBlock(), llvm::VPValue::getLiveInIRValue(), llvm::VPlan::getMiddleBlock(), llvm::VPlan::getScalarHeader(), llvm::VPlan::getScalarPreheader(), llvm::VPValue::getScalarType(), llvm::VPBasicBlock::getTerminator(), llvm::VPlan::getVectorLoopRegion(), llvm::VPlan::getZero(), llvm::isa(), llvm::isGuaranteedNotToBeUndefOrPoison(), llvm::VPlanPatternMatch::m_BranchOnCount(), llvm::VPlanPatternMatch::m_VPValue(), llvm::map_range(), llvm::PatternMatch::match(), llvm::VPlanPatternMatch::matchFindIVResult(), llvm::VPRecipeBase::moveBefore(), llvm::VPBasicBlock::phis(), llvm::VPValue::replaceUsesWithIf(), llvm::VPInstruction::ResumeForEpilogue, llvm::VPInstruction::setName(), llvm::VPUser::setOperand(), and llvm::to_vector().

Referenced by llvm::LoopVectorizePass::processLoop().

◆ printOptimizedVPlan()

void printOptimizedVPlan ( VPlan & )

static

Definition at line 6480 of file LoopVectorize.cpp.

◆ replaceVPBBWithIRVPBB()

VPIRBasicBlock * replaceVPBBWithIRVPBB	(	VPBasicBlock *	VPBB,
		BasicBlock *	IRBB,
		VPlan *	Plan = nullptr )

static

Replace VPBB with a VPIRBasicBlock wrapping IRBB.

All recipes from VPBB are moved to the end of the newly created VPIRBasicBlock. All predecessors and successors of VPBB, if any, are rewired to the new VPIRBasicBlock. If VPBB may be unreachable, Plan must be passed.

Definition at line 1982 of file LoopVectorize.cpp.

References llvm::VPBasicBlock::begin(), llvm::VPBasicBlock::end(), llvm::VPBasicBlock::getFirstNonPhi(), llvm::VPBlockBase::getPlan(), llvm::make_early_inc_range(), llvm::make_range(), llvm::VPBasicBlock::phis(), and llvm::VPBlockUtils::reassociateBlocks().

Referenced by llvm::LoopVectorizationPlanner::executePlan().

◆ STATISTIC() [1/5]

STATISTIC	(	LoopsAnalyzed	,
		"Number of loops analyzed for vectorization"	)

◆ STATISTIC() [2/5]

STATISTIC	(	LoopsEarlyExitVectorized	,
		"Number of early exit loops vectorized"	)

◆ STATISTIC() [3/5]

STATISTIC	(	LoopsEpilogueVectorized	,
		"Number of epilogues vectorized"	)

◆ STATISTIC() [4/5]

STATISTIC	(	LoopsPartialAliasVectorized	,
		"Number of partial aliasing loops vectorized"	)

◆ STATISTIC() [5/5]

STATISTIC	(	LoopsVectorized	,
		"Number of loops vectorized"	)

◆ useActiveLaneMask()

bool useActiveLaneMask ( TailFoldingStyle Style )

static

Definition at line 1854 of file LoopVectorize.cpp.

References llvm::Data, and llvm::DataAndControlFlow.

◆ useActiveLaneMaskForControlFlow()

bool useActiveLaneMaskForControlFlow ( TailFoldingStyle Style )

static

Definition at line 1859 of file LoopVectorize.cpp.

References llvm::DataAndControlFlow.

◆ useMaskedInterleavedAccesses()

bool useMaskedInterleavedAccesses ( const TargetTransformInfo & TTI )

static

Definition at line 1970 of file LoopVectorize.cpp.

References EnableMaskedInterleavedMemAccesses.

Referenced by llvm::LoopVectorizationCostModel::computeMaxVF(), llvm::LoopVectorizationCostModel::interleavedAccessCanBeWidened(), llvm::LoopVectorizationPlanner::plan(), and llvm::LoopVectorizePass::processLoop().

◆ willGenerateVectors()

bool willGenerateVectors	(	VPlan &	Plan,
		ElementCount	VF,
		const TargetTransformInfo &	TTI )

static

Check if any recipe of Plan will generate a vector value, which will be assigned a vector register.

Definition at line 3260 of file LoopVectorize.cpp.

References llvm::any_of(), assert(), llvm::VPBlockUtils::blocksOnly(), llvm::collectEphemeralRecipesForVPlan(), llvm::detail::DenseSetImpl< ValueT, MapTy, ValueInfoT >::contains(), llvm::getContainedTypes(), llvm::VPRegionBlock::getEntry(), llvm::details::FixedOrScalableQuantity< LeafTy, ValueTy >::getFixedValue(), llvm::details::FixedOrScalableQuantity< LeafTy, ValueTy >::getKnownMinValue(), llvm::VPValue::getScalarType(), llvm::VPlan::getVectorLoopRegion(), llvm::detail::DenseSetImpl< ValueT, MapTy, ValueInfoT >::insert(), llvm::isa(), llvm::details::FixedOrScalableQuantity< LeafTy, ValueTy >::isScalable(), llvm::ElementCount::isVector(), llvm_unreachable, llvm::toVectorizedTy(), and llvm::vp_depth_first_shallow().

Referenced by llvm::LoopVectorizationPlanner::computeBestVF().

Variable Documentation

◆ EnableEarlyExitVectorization

cl::opt< bool > EnableEarlyExitVectorization("enable-early-exit-vectorization", cl::init(true), cl::Hidden, cl::desc( "Enable vectorization of early exit loops with uncountable exits."))	(	"enable-early-exit-vectorization"	,
		cl::init(true)	,
		cl::Hidden	,
		cl::desc( "Enable vectorization of early exit loops with uncountable exits.")	)

static

Referenced by llvm::LoopVectorizePass::processLoop(), and llvm::LoopVectorizationCostModel::requiresScalarEpilogue().

◆ EnableEarlyExitVectorizationWithSideEffects

cl::opt< bool > EnableEarlyExitVectorizationWithSideEffects("enable-early-exit-vectorization-with-side-effects", cl::init(false), cl::Hidden, cl::desc("Enable vectorization of early exit loops with uncountable exits " "and side effects"))	(	"enable-early-exit-vectorization-with-side-effects"	,
		cl::init(false)	,
		cl::Hidden	,
		cl::desc("Enable vectorization of early exit loops with uncountable exits " "and side effects")	)

static

Referenced by llvm::LoopVectorizePass::processLoop().

◆ EnableEpilogueVectorization

cl::opt< bool > EnableEpilogueVectorization("enable-epilogue-vectorization", cl::init(true), cl::Hidden, cl::desc("Enable vectorization of epilogue loops."))	(	"enable-epilogue-vectorization"	,
		cl::init(true)	,
		cl::Hidden	,
		cl::desc("Enable vectorization of epilogue loops.")	)

static

Referenced by getEpilogueTailLowering(), and llvm::LoopVectorizationPlanner::selectBestEpiloguePlan().

◆ EnableIndVarRegisterHeur

cl::opt< bool > EnableIndVarRegisterHeur("enable-ind-var-reg-heur", cl::init(true), cl::Hidden, cl::desc("Count the induction variable only once when interleaving"))	(	"enable-ind-var-reg-heur"	,
		cl::init(true)	,
		cl::Hidden	,
		cl::desc("Count the induction variable only once when interleaving")	)

static

Referenced by llvm::LoopVectorizationPlanner::selectInterleaveCount().

◆ EnableInterleavedMemAccesses

cl::opt< bool > EnableInterleavedMemAccesses("enable-interleaved-mem-accesses", cl::init(false), cl::Hidden, cl::desc("Enable vectorization on interleaved memory accesses in a loop"))	(	"enable-interleaved-mem-accesses"	,
		cl::init(false)	,
		cl::Hidden	,
		cl::desc("Enable vectorization on interleaved memory accesses in a loop")	)

static

Referenced by llvm::LoopVectorizePass::processLoop().

◆ EnableLoadStoreRuntimeInterleave

cl::opt< bool > EnableLoadStoreRuntimeInterleave("enable-loadstore-runtime-interleave", cl::init(true), cl::Hidden, cl::desc( "Enable runtime interleaving until load/store ports are saturated"))	(	"enable-loadstore-runtime-interleave"	,
		cl::init(true)	,
		cl::Hidden	,
		cl::desc( "Enable runtime interleaving until load/store ports are saturated")	)

static

Referenced by llvm::LoopVectorizationPlanner::selectInterleaveCount().

◆ EnableMaskedInterleavedMemAccesses

cl::opt< bool > EnableMaskedInterleavedMemAccesses("enable-masked-interleaved-mem-accesses", cl::init(false), cl::Hidden, cl::desc("Enable vectorization on masked interleaved memory accesses in a loop"))	(	"enable-masked-interleaved-mem-accesses"	,
		cl::init(false)	,
		cl::Hidden	,
		cl::desc("Enable vectorization on masked interleaved memory accesses in a loop")	)

static

An interleave-group may need masking if it resides in a block that needs predication, or in order to mask away gaps.

Referenced by useMaskedInterleavedAccesses().

◆ EpilogueTailFoldingPolicy

cl::opt< TailFoldingPolicyTy > EpilogueTailFoldingPolicy("epilogue-tail-folding-policy", cl::Hidden, cl::desc( "Epilogue-tail-folding preferences over creating an epilogue loop."), cl::values( clEnumValN(TailFoldingPolicyTy::None, "dont-fold-tail", "Don't tail-fold loops."), clEnumValN(TailFoldingPolicyTy::PreferFoldTail, "prefer-fold-tail", "prefer tail-folding, otherwise create an epilogue when " "appropriate.")))	(	"epilogue-tail-folding-policy"	,
		cl::Hidden	,
		cl::desc( "Epilogue-tail-folding preferences over creating an epilogue loop.")	,
		cl::values( clEnumValN(TailFoldingPolicyTy::None, "dont-fold-tail", "Don't tail-fold loops."), clEnumValN(TailFoldingPolicyTy::PreferFoldTail, "prefer-fold-tail", "prefer tail-folding, otherwise create an epilogue when " "appropriate."))	)

static

Referenced by getEpilogueTailLowering().

◆ EpilogueVectorizationForceVF

cl::opt< unsigned > EpilogueVectorizationForceVF("epilogue-vectorization-force-VF", cl::init(1), cl::Hidden, cl::desc("When epilogue vectorization is enabled, and a value greater than " "1 is specified, forces the given VF for all applicable epilogue " "loops."))	(	"epilogue-vectorization-force-VF"	,
		cl::init(1)	,
		cl::Hidden	,
		cl::desc("When epilogue vectorization is enabled, and a value greater than " "1 is specified, forces the given VF for all applicable epilogue " "loops.")	)

static

Referenced by llvm::LoopVectorizationPlanner::computeBestVF(), llvm::LoopVectorizationPlanner::plan(), and llvm::LoopVectorizationPlanner::selectBestEpiloguePlan().

◆ EpilogueVectorizationMinVF

cl::opt< unsigned > EpilogueVectorizationMinVF("epilogue-vectorization-minimum-VF", cl::Hidden, cl::desc("Only loops with vectorization factor equal to or larger than " "the specified value are considered for epilogue vectorization."))	(	"epilogue-vectorization-minimum-VF"	,
		cl::Hidden	,
		cl::desc("Only loops with vectorization factor equal to or larger than " "the specified value are considered for epilogue vectorization.")	)

static

Referenced by llvm::LoopVectorizationCostModel::isEpilogueVectorizationProfitable().

◆ ForceMaskedDivRem

cl::opt< cl::boolOrDefault > ForceMaskedDivRem("force-widen-divrem-via-masked-intrinsic", cl::Hidden, cl::desc("Override cost based masked intrinsic widening " "for div/rem instructions"))	(	"force-widen-divrem-via-masked-intrinsic"	,
		cl::Hidden	,
		cl::desc("Override cost based masked intrinsic widening " "for div/rem instructions")	)

static

Referenced by llvm::LoopVectorizationCostModel::isDivRemScalarWithPredication().

◆ ForceOrderedReductions

cl::opt< bool > ForceOrderedReductions("force-ordered-reductions", cl::init(false), cl::Hidden, cl::desc("Enable the vectorisation of loops with in-order (strict) " "FP reductions"))	(	"force-ordered-reductions"	,
		cl::init(false)	,
		cl::Hidden	,
		cl::desc("Enable the vectorisation of loops with in-order (strict) " "FP reductions")	)

static

Referenced by llvm::LoopVectorizePass::processLoop().

◆ ForcePartialAliasingVectorization

cl::opt< bool > ForcePartialAliasingVectorization("force-partial-aliasing-vectorization", cl::init(false), cl::Hidden, cl::desc("Replace pointer diff checks with alias masks."))	(	"force-partial-aliasing-vectorization"	,
		cl::init(false)	,
		cl::Hidden	,
		cl::desc("Replace pointer diff checks with alias masks.")	)

static

Referenced by llvm::LoopVectorizationCostModel::tryToEnablePartialAliasMasking().

◆ ForceTailFoldingStyle

cl::opt< TailFoldingStyle > ForceTailFoldingStyle("force-tail-folding-style", cl::desc("Force the tail folding style"), cl::init(TailFoldingStyle::None), cl::values( clEnumValN(TailFoldingStyle::None, "none", "Disable tail folding"), clEnumValN( TailFoldingStyle::Data, "data", "Create lane mask for data only, using active.lane.mask intrinsic"), clEnumValN(TailFoldingStyle::DataWithoutLaneMask, "data-without-lane-mask", "Create lane mask with compare/stepvector"), clEnumValN(TailFoldingStyle::DataAndControlFlow, "data-and-control", "Create lane mask using active.lane.mask intrinsic, and use " "it for both data and control flow"), clEnumValN(TailFoldingStyle::DataWithEVL, "data-with-evl", "Use predicated EVL instructions for tail folding. If EVL " "is unsupported, fallback to data-without-lane-mask.")))	(	"force-tail-folding-style"	,
		cl::desc("Force the tail folding style")	,
		cl::init(TailFoldingStyle::None)	,
		cl::values( clEnumValN(TailFoldingStyle::None, "none", "Disable tail folding"), clEnumValN( TailFoldingStyle::Data, "data", "Create lane mask for data only, using active.lane.mask intrinsic"), clEnumValN(TailFoldingStyle::DataWithoutLaneMask, "data-without-lane-mask", "Create lane mask with compare/stepvector"), clEnumValN(TailFoldingStyle::DataAndControlFlow, "data-and-control", "Create lane mask using active.lane.mask intrinsic, and use " "it for both data and control flow"), clEnumValN(TailFoldingStyle::DataWithEVL, "data-with-evl", "Use predicated EVL instructions for tail folding. If EVL " "is unsupported, fallback to data-without-lane-mask."))	)

static

Referenced by llvm::LoopVectorizationCostModel::setTailFoldingStyle().

◆ ForceTargetMaxScalarInterleaveFactor

cl::opt< unsigned > ForceTargetMaxScalarInterleaveFactor("force-target-max-scalar-interleave", cl::init(0), cl::Hidden, cl::desc("A flag that overrides the target's max interleave factor for " "scalar loops."))	(	"force-target-max-scalar-interleave"	,
		cl::init(0)	,
		cl::Hidden	,
		cl::desc("A flag that overrides the target's max interleave factor for " "scalar loops.")	)

static

Referenced by llvm::LoopVectorizationPlanner::selectInterleaveCount().

◆ ForceTargetMaxVectorInterleaveFactor

cl::opt< unsigned > ForceTargetMaxVectorInterleaveFactor("force-target-max-vector-interleave", cl::init(0), cl::Hidden, cl::desc("A flag that overrides the target's max interleave factor for " "vectorized loops."))	(	"force-target-max-vector-interleave"	,
		cl::init(0)	,
		cl::Hidden	,
		cl::desc("A flag that overrides the target's max interleave factor for " "vectorized loops.")	)

static

Referenced by llvm::LoopVectorizationPlanner::selectInterleaveCount().

◆ ForceTargetNumScalarRegs

cl::opt< unsigned > ForceTargetNumScalarRegs("force-target-num-scalar-regs", cl::init(0), cl::Hidden, cl::desc("A flag that overrides the target's number of scalar registers."))	(	"force-target-num-scalar-regs"	,
		cl::init(0)	,
		cl::Hidden	,
		cl::desc("A flag that overrides the target's number of scalar registers.")	)

static

Referenced by llvm::LoopVectorizationPlanner::selectInterleaveCount().

◆ ForceTargetNumVectorRegs

cl::opt< unsigned > ForceTargetNumVectorRegs("force-target-num-vector-regs", cl::init(0), cl::Hidden, cl::desc("A flag that overrides the target's number of vector registers."))	(	"force-target-num-vector-regs"	,
		cl::init(0)	,
		cl::Hidden	,
		cl::desc("A flag that overrides the target's number of vector registers.")	)

static

Referenced by llvm::LoopVectorizationPlanner::selectInterleaveCount().

◆ LoopVectorizeWithBlockFrequency

cl::opt< bool > LoopVectorizeWithBlockFrequency("loop-vectorize-with-block-frequency", cl::init(true), cl::Hidden, cl::desc("Enable the use of the block frequency analysis to access PGO " "heuristics minimizing code growth in cold regions and being more " "aggressive in hot regions."))	(	"loop-vectorize-with-block-frequency"	,
		cl::init(true)	,
		cl::Hidden	,
		cl::desc("Enable the use of the block frequency analysis to access PGO " "heuristics minimizing code growth in cold regions and being more " "aggressive in hot regions.")	)

static

Referenced by getSmallBestKnownTC().

◆ MaxNestedScalarReductionIC

cl::opt< unsigned > MaxNestedScalarReductionIC("max-nested-scalar-reduction-interleave", cl::init(2), cl::Hidden, cl::desc("The maximum interleave count to use when interleaving a scalar " "reduction in a nested loop."))	(	"max-nested-scalar-reduction-interleave"	,
		cl::init(2)	,
		cl::Hidden	,
		cl::desc("The maximum interleave count to use when interleaving a scalar " "reduction in a nested loop.")	)

static

Referenced by llvm::LoopVectorizationPlanner::selectInterleaveCount().

◆ MinItersBypassWeights

uint32_t MinItersBypassWeights[] = {1, 127}

staticconstexpr

Definition at line 414 of file LoopVectorize.cpp.

Referenced by llvm::VPlanTransforms::addIterationCountCheckBlock(), llvm::LoopVectorizationPlanner::addMinimumIterationCheck(), llvm::VPlanTransforms::addMinimumIterationCheck(), and llvm::LoopVectorizePass::processLoop().

◆ NumberOfStoresToPredicate

cl::opt< unsigned > NumberOfStoresToPredicate("vectorize-num-stores-pred", cl::init(1), cl::Hidden, cl::desc("Max number of stores to be predicated behind an if."))	(	"vectorize-num-stores-pred"	,
		cl::init(1)	,
		cl::Hidden	,
		cl::desc("Max number of stores to be predicated behind an if.")	)

The number of stores in a loop that are allowed to need predication.

Referenced by llvm::LoopVectorizationCostModel::useEmulatedMaskMemRefHack(), and llvm::VPCostContext::useEmulatedMaskMemRefHack().

◆ PragmaVectorizeSCEVCheckThreshold

cl::opt< unsigned > PragmaVectorizeSCEVCheckThreshold("pragma-vectorize-scev-check-threshold", cl::init(128), cl::Hidden, cl::desc("The maximum number of SCEV checks allowed with a " "vectorize(enable) pragma"))	(	"pragma-vectorize-scev-check-threshold"	,
		cl::init(128)	,
		cl::Hidden	,
		cl::desc("The maximum number of SCEV checks allowed with a " "vectorize(enable) pragma")	)

static

◆ PreferPredicatedReductionSelect

cl::opt< bool > PreferPredicatedReductionSelect("prefer-predicated-reduction-select", cl::init(false), cl::Hidden, cl::desc( "Prefer predicating a reduction operation over an after loop select."))	(	"prefer-predicated-reduction-select"	,
		cl::init(false)	,
		cl::Hidden	,
		cl::desc( "Prefer predicating a reduction operation over an after loop select.")	)

static

Referenced by llvm::LoopVectorizationCostModel::usePredicatedReductionSelect().

◆ SmallLoopCost

cl::opt< unsigned > SmallLoopCost("small-loop-cost", cl::init(20), cl::Hidden, cl::desc( "The cost of a loop that is considered 'small' by the interleaver."))	(	"small-loop-cost"	,
		cl::init(20)	,
		cl::Hidden	,
		cl::desc( "The cost of a loop that is considered 'small' by the interleaver.")	)

static

Referenced by llvm::LoopVectorizationPlanner::selectInterleaveCount().

◆ TailFoldingPolicy

cl::opt< TailFoldingPolicyTy > TailFoldingPolicy("tail-folding-policy", cl::init(TailFoldingPolicyTy::None), cl::Hidden, cl::desc("Tail-folding preferences over creating an epilogue loop."), cl::values( clEnumValN(TailFoldingPolicyTy::None, "dont-fold-tail", "Don't tail-fold loops."), clEnumValN(TailFoldingPolicyTy::PreferFoldTail, "prefer-fold-tail", "prefer tail-folding, otherwise create an epilogue when " "appropriate."), clEnumValN(TailFoldingPolicyTy::MustFoldTail, "must-fold-tail", "always tail-fold, don't attempt vectorization if " "tail-folding fails.")))	(	"tail-folding-policy"	,
		cl::init(TailFoldingPolicyTy::None)	,
		cl::Hidden	,
		cl::desc("Tail-folding preferences over creating an epilogue loop.")	,
		cl::values( clEnumValN(TailFoldingPolicyTy::None, "dont-fold-tail", "Don't tail-fold loops."), clEnumValN(TailFoldingPolicyTy::PreferFoldTail, "prefer-fold-tail", "prefer tail-folding, otherwise create an epilogue when " "appropriate."), clEnumValN(TailFoldingPolicyTy::MustFoldTail, "must-fold-tail", "always tail-fold, don't attempt vectorization if " "tail-folding fails."))	)

static

Referenced by getEpilogueLowering().

◆ TinyTripCountVectorThreshold

cl::opt< unsigned > TinyTripCountVectorThreshold("vectorizer-min-trip-count", cl::init(16), cl::Hidden, cl::desc("Loops with a constant trip count that is smaller than this " "value are vectorized only if no scalar iteration overheads " "are incurred."))	(	"vectorizer-min-trip-count"	,
		cl::init(16)	,
		cl::Hidden	,
		cl::desc("Loops with a constant trip count that is smaller than this " "value are vectorized only if no scalar iteration overheads " "are incurred.")	)

static

Loops with a known constant trip count below this number are vectorized only if no scalar iteration overheads are incurred.

Referenced by llvm::LoopVectorizePass::processLoop().

◆ VectorizeMemoryCheckThreshold

cl::opt< unsigned > VectorizeMemoryCheckThreshold("vectorize-memory-check-threshold", cl::init(128), cl::Hidden, cl::desc("The maximum allowed number of runtime memory checks"))	(	"vectorize-memory-check-threshold"	,
		cl::init(128)	,
		cl::Hidden	,
		cl::desc("The maximum allowed number of runtime memory checks")	)

static

Referenced by isOutsideLoopWorkProfitable().

◆ VectorizeSCEVCheckThreshold

cl::opt< unsigned > VectorizeSCEVCheckThreshold("vectorize-scev-check-threshold", cl::init(16), cl::Hidden, cl::desc("The maximum number of SCEV checks allowed."))	(	"vectorize-scev-check-threshold"	,
		cl::init(16)	,
		cl::Hidden	,
		cl::desc("The maximum number of SCEV checks allowed.")	)

static

◆ VerboseDebug

const char VerboseDebug[] = DEBUG_TYPE "-verbose"

Definition at line 167 of file LoopVectorize.cpp.

◆ VPlanBuildOuterloopStressTest

cl::opt< bool > VPlanBuildOuterloopStressTest("vplan-build-outerloop-stress-test", cl::init(false), cl::Hidden, cl::desc( "Build VPlan for every supported loop nest in the function and bail " "out right after the build (stress test the VPlan H-CFG construction " "in the VPlan-native vectorization path)."))	(	"vplan-build-outerloop-stress-test"	,
		cl::init(false)	,
		cl::Hidden	,
		cl::desc( "Build VPlan for every supported loop nest in the function and bail " "out right after the build (stress test the VPlan H-CFG construction " "in the VPlan-native vectorization path).")	)

Classes

Namespaces

Macros

Enumerations

Functions

Variables

Macro Definition Documentation

◆ DEBUG_TYPE

◆ LV_NAME

Enumeration Type Documentation

◆ TailFoldingPolicyTy

Function Documentation

◆ addFullyUnrolledInstructionsToIgnore()

◆ calculateEarlyExitCost()

◆ checkMixedPrecision()

◆ collectSupportedLoops()

◆ connectEpilogueVectorLoop()

◆ estimateElementCount()

◆ fixScalarResumeValuesFromBypass()

◆ getAddressAccessSCEV()

◆ getDebugLocFromInstOrOperands()

◆ getEpilogueLowering()

◆ getEpilogueTailLowering()

◆ getMaskedDivRemIntrinsic()

◆ getMaxTCFromNonZeroRange()

◆ getSmallBestKnownTC()

◆ getSmallConstantTripCount()

◆ hasFindLastReductionPhi()

◆ hasIrregularType()

◆ hasReplicatorRegion()

◆ hasUnsupportedHeaderPhiRecipe()

◆ hasVectorLibraryVariantFor()

◆ isExplicitVecOuterLoop()

◆ isIndvarOverflowCheckKnownFalse()

◆ isOutsideLoopWorkProfitable()

◆ legacyCSE()

◆ maybeVectorizeType()

◆ preparePlanForEpilogueVectorLoop()

◆ preparePlanForMainVectorLoop()

◆ printOptimizedVPlan()

◆ replaceVPBBWithIRVPBB()

◆ STATISTIC() [1/5]

◆ STATISTIC() [2/5]

◆ STATISTIC() [3/5]

◆ STATISTIC() [4/5]

◆ STATISTIC() [5/5]

◆ useActiveLaneMask()

◆ useActiveLaneMaskForControlFlow()

◆ useMaskedInterleavedAccesses()

◆ willGenerateVectors()

Variable Documentation

◆ EnableEarlyExitVectorization

◆ EnableEarlyExitVectorizationWithSideEffects

◆ EnableEpilogueVectorization

◆ EnableIndVarRegisterHeur

◆ EnableInterleavedMemAccesses

◆ EnableLoadStoreRuntimeInterleave

◆ EnableMaskedInterleavedMemAccesses

◆ EpilogueTailFoldingPolicy

◆ EpilogueVectorizationForceVF

◆ EpilogueVectorizationMinVF

◆ ForceMaskedDivRem

◆ ForceOrderedReductions

◆ ForcePartialAliasingVectorization

◆ ForceTailFoldingStyle

◆ ForceTargetMaxScalarInterleaveFactor

◆ ForceTargetMaxVectorInterleaveFactor

◆ ForceTargetNumScalarRegs

◆ ForceTargetNumVectorRegs

◆ LoopVectorizeWithBlockFrequency

◆ MaxNestedScalarReductionIC

◆ MinItersBypassWeights

◆ NumberOfStoresToPredicate

◆ PragmaVectorizeSCEVCheckThreshold

◆ PreferPredicatedReductionSelect

◆ SmallLoopCost

◆ TailFoldingPolicy

◆ TinyTripCountVectorThreshold

◆ VectorizeMemoryCheckThreshold

◆ VectorizeSCEVCheckThreshold