#include "llvm/Analysis/LoopAccessAnalysis.h"
#include "llvm/ADT/APInt.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/EquivalenceClasses.h"
#include "llvm/ADT/PointerIntPair.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SetVector.h"
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/SmallSet.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/Analysis/AliasSetTracker.h"
#include "llvm/Analysis/AssumeBundleQueries.h"
#include "llvm/Analysis/AssumptionCache.h"
#include "llvm/Analysis/LoopAnalysisManager.h"
#include "llvm/Analysis/LoopInfo.h"
#include "llvm/Analysis/LoopIterator.h"
#include "llvm/Analysis/MemoryLocation.h"
#include "llvm/Analysis/OptimizationRemarkEmitter.h"
#include "llvm/Analysis/ScalarEvolution.h"
#include "llvm/Analysis/ScalarEvolutionExpressions.h"
#include "llvm/Analysis/ScalarEvolutionPatternMatch.h"
#include "llvm/Analysis/TargetLibraryInfo.h"
#include "llvm/Analysis/TargetTransformInfo.h"
#include "llvm/Analysis/ValueTracking.h"
#include "llvm/Analysis/VectorUtils.h"
#include "llvm/IR/BasicBlock.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/DebugLoc.h"
#include "llvm/IR/DerivedTypes.h"
#include "llvm/IR/DiagnosticInfo.h"
#include "llvm/IR/Dominators.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/InstrTypes.h"
#include "llvm/IR/Instruction.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/PassManager.h"
#include "llvm/IR/Type.h"
#include "llvm/IR/Value.h"
#include "llvm/IR/ValueHandle.h"
#include "llvm/Support/Casting.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/raw_ostream.h"
#include <algorithm>
#include <cassert>
#include <cstdint>
#include <iterator>
#include <utility>
#include <variant>
#include <vector>

Macros
#define	DEBUG_TYPE "loop-accesses"

Functions
static const SCEV *	addSCEVNoOverflow (const SCEV A, const SCEV B, ScalarEvolution &SE)
	Returns `A` + `B`, if it is guaranteed not to unsigned wrap.
static const SCEV *	mulSCEVNoOverflow (const SCEV A, const SCEV B, ScalarEvolution &SE)
	Returns `A` * `B`, if it is guaranteed not to unsigned wrap.
static bool	evaluatePtrAddRecAtMaxBTCWillNotWrap (const SCEVAddRecExpr AR, const SCEV MaxBTC, const SCEV EltSize, ScalarEvolution &SE, const DataLayout &DL, DominatorTree DT, AssumptionCache *AC, std::optional< ScalarEvolution::LoopGuards > &LoopGuards)
	Return true, if evaluating `AR` at `MaxBTC` cannot wrap, because `AR` at `MaxBTC` is guaranteed inbounds of the accessed object.
static const SCEV *	getMinFromExprs (const SCEV I, const SCEV J, ScalarEvolution *SE)
	Compare `I` and `J` and return the minimum.
static DenseMap< const RuntimeCheckingPtrGroup *, unsigned >	getPtrToIdxMap (ArrayRef< RuntimeCheckingPtrGroup > CheckingGroups)
	Assign each RuntimeCheckingPtrGroup pointer an index for stable UTC output.
static std::optional< int64_t >	getStrideFromAddRec (const SCEVAddRecExpr AR, const Loop Lp, Type AccessTy, Value Ptr, PredicatedScalarEvolution &PSE)
	Try to compute a constant stride for `AR`.
static bool	isNoWrap (PredicatedScalarEvolution &PSE, const SCEVAddRecExpr AR, Value Ptr, Type AccessTy, const Loop L, bool Assume, const DominatorTree &DT, std::optional< int64_t > Stride=std::nullopt)
	Check whether `AR` is a non-wrapping AddRec.
static void	visitPointers (Value StartPtr, const Loop &InnermostLoop, function_ref< void(Value )> AddPointer)
static void	findForkedSCEVs (ScalarEvolution SE, const Loop L, Value Ptr, SmallVectorImpl< PointerIntPair< const SCEV , 1, bool > > &ScevList, unsigned Depth)
static bool	isSafeDependenceDistance (const DataLayout &DL, ScalarEvolution &SE, const SCEV &MaxBTC, const SCEV &Dist, uint64_t MaxStride)
	Given a dependence-distance `Dist` between two memory accesses, that have strides in the same direction whose absolute value of the maximum stride is given in `MaxStride`, in a loop whose maximum backedge taken count is `MaxBTC`, check if it is possible to prove statically that the dependence distance is larger than the range that the accesses will travel through the execution of the loop.
static bool	areStridedAccessesIndependent (uint64_t Distance, uint64_t Stride, uint64_t TypeByteSize)
	Check the dependence for two accesses with the same stride `Stride`.
static Value *	getLoopVariantGEPOperand (Value Ptr, ScalarEvolution SE, Loop *Lp)
	If `Ptr` is a GEP, which has a loop-variant operand, return that operand.
static const SCEV *	getStrideFromPointer (Value Ptr, ScalarEvolution SE, Loop *Lp)
	Get the stride of a pointer access in a loop.

Variables
static cl::opt< unsigned, true >	VectorizationFactor ("force-vector-width", cl::Hidden, cl::desc("Sets the SIMD width. Zero is autoselect."), cl::location(VectorizerParams::VectorizationFactor))
static cl::opt< unsigned, true >	VectorizationInterleave ("force-vector-interleave", cl::Hidden, cl::desc("Sets the vectorization interleave count. " "Zero is autoselect."), cl::location(VectorizerParams::VectorizationInterleave))
static cl::opt< unsigned, true >	RuntimeMemoryCheckThreshold ("runtime-memory-check-threshold", cl::Hidden, cl::desc("When performing memory disambiguation checks at runtime do not " "generate more than this number of comparisons (default = 8)."), cl::location(VectorizerParams::RuntimeMemoryCheckThreshold), cl::init(8))
static cl::opt< unsigned >	MemoryCheckMergeThreshold ("memory-check-merge-threshold", cl::Hidden, cl::desc("Maximum number of comparisons done when trying to merge " "runtime memory checks. (default = 100)"), cl::init(100))
	The maximum iterations used to merge memory checks.
static cl::opt< unsigned >	MaxDependences ("max-dependences", cl::Hidden, cl::desc("Maximum number of dependences collected by " "loop-access analysis (default = 100)"), cl::init(100))
	We collect dependences up to this threshold.
static cl::opt< bool >	EnableMemAccessVersioning ("enable-mem-access-versioning", cl::init(true), cl::Hidden, cl::desc("Enable symbolic stride memory access versioning"))
	This enables versioning on the strides of symbolically striding memory accesses in code like the following.
static cl::opt< bool >	EnableForwardingConflictDetection ("store-to-load-forwarding-conflict-detection", cl::Hidden, cl::desc("Enable conflict detection in loop-access analysis"), cl::init(true))
	Enable store-to-load forwarding conflict detection.
static cl::opt< unsigned >	MaxForkedSCEVDepth ("max-forked-scev-depth", cl::Hidden, cl::desc("Maximum recursion depth when finding forked SCEVs (default = 5)"), cl::init(5))
static cl::opt< bool >	SpeculateUnitStride ("laa-speculate-unit-stride", cl::Hidden, cl::desc("Speculate that non-constant strides are unit in LAA"), cl::init(true))
static cl::opt< bool, true >	HoistRuntimeChecks ("hoist-runtime-checks", cl::Hidden, cl::desc("Hoist inner loop runtime memory checks to outer loop if possible"), cl::location(VectorizerParams::HoistRuntimeChecks), cl::init(true))

Macro Definition Documentation

◆ DEBUG_TYPE

#define DEBUG_TYPE "loop-accesses"

Definition at line 72 of file LoopAccessAnalysis.cpp.

Function Documentation

◆ addSCEVNoOverflow()

const SCEV * addSCEVNoOverflow	(	const SCEV *	A,
		const SCEV *	B,
		ScalarEvolution &	SE )

static

Returns A + B, if it is guaranteed not to unsigned wrap.

Otherwise return nullptr. A and B must have the same type.

Definition at line 195 of file LoopAccessAnalysis.cpp.

References A(), B(), llvm::ScalarEvolution::getAddExpr(), and llvm::ScalarEvolution::willNotOverflow().

Referenced by evaluatePtrAddRecAtMaxBTCWillNotWrap().

◆ areStridedAccessesIndependent()

bool areStridedAccessesIndependent	(	uint64_t	Distance,
		uint64_t	Stride,
		uint64_t	TypeByteSize )

static

Check the dependence for two accesses with the same stride Stride.

Distance is the positive distance in bytes, and TypeByteSize is type size in bytes.

Returns: true if they are independent.

Definition at line 2017 of file LoopAccessAnalysis.cpp.

References assert().

◆ evaluatePtrAddRecAtMaxBTCWillNotWrap()

bool evaluatePtrAddRecAtMaxBTCWillNotWrap	(	const SCEVAddRecExpr *	AR,
		const SCEV *	MaxBTC,
		const SCEV *	EltSize,
		ScalarEvolution &	SE,
		const DataLayout &	DL,
		DominatorTree *	DT,
		AssumptionCache *	AC,
		std::optional< ScalarEvolution::LoopGuards > &	LoopGuards )

static

Return true, if evaluating AR at MaxBTC cannot wrap, because AR at MaxBTC is guaranteed inbounds of the accessed object.

Definition at line 213 of file LoopAccessAnalysis.cpp.

Referenced by llvm::getStartAndEndForAccess().

◆ findForkedSCEVs()

void findForkedSCEVs	(	ScalarEvolution *	SE,
		const Loop *	L,
		Value *	Ptr,
		SmallVectorImpl< PointerIntPair< const SCEV *, 1, bool > > &	ScevList,
		unsigned	Depth )

static

◆ getLoopVariantGEPOperand()

Value * getLoopVariantGEPOperand	(	Value *	Ptr,
		ScalarEvolution *	SE,
		Loop *	Lp )

static

If Ptr is a GEP, which has a loop-variant operand, return that operand.

Otherwise, return Ptr.

Definition at line 2967 of file LoopAccessAnalysis.cpp.

References llvm::dyn_cast(), GEP, llvm::ScalarEvolution::getSCEV(), and llvm::ScalarEvolution::isLoopInvariant().

Referenced by getStrideFromPointer().

◆ getMinFromExprs()

const SCEV * getMinFromExprs	(	const SCEV *	I,
		const SCEV *	J,
		ScalarEvolution *	SE )

static

Compare I and J and return the minimum.

Return nullptr in case we couldn't find an answer.

Definition at line 572 of file LoopAccessAnalysis.cpp.

References llvm::ScalarEvolution::computeConstantDifference(), and I.

Referenced by llvm::RuntimeCheckingPtrGroup::addPointer().

◆ getPtrToIdxMap()

DenseMap< const RuntimeCheckingPtrGroup *, unsigned > getPtrToIdxMap ( ArrayRef< RuntimeCheckingPtrGroup > CheckingGroups )

static

Assign each RuntimeCheckingPtrGroup pointer an index for stable UTC output.

Definition at line 765 of file LoopAccessAnalysis.cpp.

References llvm::enumerate().

Referenced by llvm::RuntimePointerChecking::print(), and llvm::RuntimePointerChecking::printChecks().

◆ getStrideFromAddRec()

std::optional< int64_t > getStrideFromAddRec	(	const SCEVAddRecExpr *	AR,
		const Loop *	Lp,
		Type *	AccessTy,
		Value *	Ptr,
		PredicatedScalarEvolution &	PSE )

static

Try to compute a constant stride for AR.

Used by getPtrStride and isNoWrap.

Definition at line 983 of file LoopAccessAnalysis.cpp.

References llvm::dbgs(), DL, llvm::BasicBlock::getDataLayout(), llvm::details::FixedOrScalableQuantity< LeafTy, ValueTy >::getFixedValue(), llvm::LoopBase< BlockT, LoopT >::getHeader(), llvm::SCEVAddRecExpr::getLoop(), llvm::PredicatedScalarEvolution::getSE(), llvm::SCEVAddRecExpr::getStepRecurrence(), llvm::isa(), LLVM_DEBUG, llvm::SCEVPatternMatch::m_scev_APInt(), llvm::PatternMatch::match(), Size, and llvm::APInt::trySExtValue().

Referenced by llvm::getPtrStride(), and isNoWrap().

◆ getStrideFromPointer()

const SCEV * getStrideFromPointer	(	Value *	Ptr,
		ScalarEvolution *	SE,
		Loop *	Lp )

static

Get the stride of a pointer access in a loop.

Looks for symbolic strides "a[i*stride]". Returns the symbolic stride, or null otherwise.

Definition at line 2988 of file LoopAccessAnalysis.cpp.

References llvm::CallingConv::C, llvm::dyn_cast(), getLoopVariantGEPOperand(), llvm::ScalarEvolution::getSCEV(), llvm::Value::getType(), llvm::isa(), llvm::ScalarEvolution::isLoopInvariant(), llvm::SCEVPatternMatch::m_SCEV(), llvm::SCEVPatternMatch::m_scev_AffineAddRec(), llvm::SCEVPatternMatch::m_scev_Mul(), llvm::SCEVPatternMatch::m_SCEVConstant(), llvm::SCEVPatternMatch::m_SpecificLoop(), and llvm::PatternMatch::match().

◆ isNoWrap()

bool isNoWrap	(	PredicatedScalarEvolution &	PSE,
		const SCEVAddRecExpr *	AR,
		Value *	Ptr,
		Type *	AccessTy,
		const Loop *	L,
		bool	Assume,
		const DominatorTree &	DT,
		std::optional< int64_t >	Stride = std::nullopt )

static

Check whether AR is a non-wrapping AddRec.

If Ptr is not nullptr, use informating from the IR pointer value to determine no-wrap.

Definition at line 1033 of file LoopAccessAnalysis.cpp.

References llvm::any_of(), llvm::dbgs(), llvm::dyn_cast_if_present(), GEP, llvm::SCEVNAryExpr::getNoWrapFlags(), llvm::Type::getPointerAddressSpace(), getStrideFromAddRec(), llvm::SCEVAddRecExpr::getType(), llvm::PredicatedScalarEvolution::hasNoOverflow(), llvm::SCEVWrapPredicate::IncrementNUSW, LLVM_DEBUG, llvm::SCEV::NoWrapMask, llvm::NullPointerIsDefined(), and llvm::PredicatedScalarEvolution::setNoOverflow().

Referenced by llvm::getPtrStride().

◆ isSafeDependenceDistance()

bool isSafeDependenceDistance	(	const DataLayout &	DL,
		ScalarEvolution &	SE,
		const SCEV &	MaxBTC,
		const SCEV &	Dist,
		uint64_t	MaxStride )

static

Given a dependence-distance Dist between two memory accesses, that have strides in the same direction whose absolute value of the maximum stride is given in MaxStride, in a loop whose maximum backedge taken count is MaxBTC, check if it is possible to prove statically that the dependence distance is larger than the range that the accesses will travel through the execution of the loop.

If so, return true; false otherwise. This is useful for example in loops such as the following (PR31098):

for (i = 0; i < D; ++i) {
           = out[i];
  out[i+D] =
}

Definition at line 1962 of file LoopAccessAnalysis.cpp.

References DL, llvm::ScalarEvolution::getConstant(), llvm::ScalarEvolution::getMinusSCEV(), llvm::ScalarEvolution::getMulExpr(), llvm::ScalarEvolution::getNegativeSCEV(), llvm::ScalarEvolution::getNoopOrSignExtend(), llvm::SCEV::getType(), llvm::ScalarEvolution::getZeroExtendExpr(), llvm::ScalarEvolution::isKnownPositive(), and llvm::Minus.

◆ mulSCEVNoOverflow()

const SCEV * mulSCEVNoOverflow	(	const SCEV *	A,
		const SCEV *	B,
		ScalarEvolution &	SE )

static

Returns A * B, if it is guaranteed not to unsigned wrap.

Otherwise return nullptr. A and B must have the same type.

Definition at line 204 of file LoopAccessAnalysis.cpp.

References A(), B(), llvm::ScalarEvolution::getMulExpr(), and llvm::ScalarEvolution::willNotOverflow().

Referenced by evaluatePtrAddRecAtMaxBTCWillNotWrap().

◆ visitPointers()

void visitPointers	(	Value *	StartPtr,
		const Loop &	InnermostLoop,
		function_ref< void(Value *)>	AddPointer )

static

Definition at line 1089 of file LoopAccessAnalysis.cpp.

References llvm::append_range(), llvm::LoopBase< BlockT, LoopT >::contains(), llvm::dyn_cast(), llvm::SmallVectorTemplateCommon< T, typename >::empty(), llvm::LoopBase< BlockT, LoopT >::getHeader(), llvm::SmallPtrSetImpl< PtrType >::insert(), llvm::SmallVectorImpl< T >::pop_back_val(), and llvm::SmallVectorTemplateBase< T, bool >::push_back().

Referenced by llvm::MemoryDepChecker::addAccess(), and llvm::MemoryDepChecker::addAccess().

Variable Documentation

◆ EnableForwardingConflictDetection

cl::opt< bool > EnableForwardingConflictDetection("store-to-load-forwarding-conflict-detection", cl::Hidden, cl::desc("Enable conflict detection in loop-access analysis"), cl::init(true))	(	"store-to-load-forwarding-conflict-detection"	,
		cl::Hidden	,
		cl::desc("Enable conflict detection in loop-access analysis")	,
		cl::init(true)	)

static

Enable store-to-load forwarding conflict detection.

This option can be disabled for correctness testing.

◆ EnableMemAccessVersioning

cl::opt< bool > EnableMemAccessVersioning("enable-mem-access-versioning", cl::init(true), cl::Hidden, cl::desc("Enable symbolic stride memory access versioning"))	(	"enable-mem-access-versioning"	,
		cl::init(true)	,
		cl::Hidden	,
		cl::desc("Enable symbolic stride memory access versioning")	)

static

This enables versioning on the strides of symbolically striding memory accesses in code like the following.

for (i = 0; i < N; ++i) A[i * Stride1] += B[i * Stride2] ...

Will be roughly translated to if (Stride1 == 1 && Stride2 == 1) { for (i = 0; i < N; i+=4) A[i:i+3] += ... } else ...

◆ HoistRuntimeChecks

cl::opt< bool, true > HoistRuntimeChecks("hoist-runtime-checks", cl::Hidden, cl::desc( "Hoist inner loop runtime memory checks to outer loop if possible"), cl::location(VectorizerParams::HoistRuntimeChecks), cl::init(true))	(	"hoist-runtime-checks"	,
		cl::Hidden	,
		cl::desc( "Hoist inner loop runtime memory checks to outer loop if possible")	,
		cl::location(VectorizerParams::HoistRuntimeChecks)	,
		cl::init(true)	)

static

Referenced by llvm::addRuntimeChecks(), llvm::appendLoopsToWorklist< Loop & >(), expandBounds(), and expandBounds().

◆ MaxDependences

cl::opt< unsigned > MaxDependences("max-dependences", cl::Hidden, cl::desc("Maximum number of dependences collected by " "loop-access analysis (default = 100)"), cl::init(100))	(	"max-dependences"	,
		cl::Hidden	,
		cl::desc("Maximum number of dependences collected by " "loop-access analysis (default = 100)")	,
		cl::init(100)	)

static

We collect dependences up to this threshold.

Referenced by llvm::MemoryDepChecker::areDepsSafe().

◆ MaxForkedSCEVDepth

cl::opt< unsigned > MaxForkedSCEVDepth("max-forked-scev-depth", cl::Hidden, cl::desc("Maximum recursion depth when finding forked SCEVs (default = 5)"), cl::init(5))	(	"max-forked-scev-depth"	,
		cl::Hidden	,
		cl::desc("Maximum recursion depth when finding forked SCEVs (default = 5)")	,
		cl::init(5)	)

static

◆ MemoryCheckMergeThreshold

cl::opt< unsigned > MemoryCheckMergeThreshold("memory-check-merge-threshold", cl::Hidden, cl::desc("Maximum number of comparisons done when trying to merge " "runtime memory checks. (default = 100)"), cl::init(100))	(	"memory-check-merge-threshold"	,
		cl::Hidden	,
		cl::desc("Maximum number of comparisons done when trying to merge " "runtime memory checks. (default = 100)")	,
		cl::init(100)	)

static

The maximum iterations used to merge memory checks.

◆ RuntimeMemoryCheckThreshold

cl::opt< unsigned, true > RuntimeMemoryCheckThreshold("runtime-memory-check-threshold", cl::Hidden, cl::desc("When performing memory disambiguation checks at runtime do not " "generate more than this number of comparisons (default = 8)."), cl::location(VectorizerParams::RuntimeMemoryCheckThreshold), cl::init(8))	(	"runtime-memory-check-threshold"	,
		cl::Hidden	,
		cl::desc("When performing memory disambiguation checks at runtime do not " "generate more than this number of comparisons (default = 8).")	,
		cl::location(VectorizerParams::RuntimeMemoryCheckThreshold)	,
		cl::init(8)	)

static

◆ SpeculateUnitStride

cl::opt< bool > SpeculateUnitStride("laa-speculate-unit-stride", cl::Hidden, cl::desc("Speculate that non-constant strides are unit in LAA"), cl::init(true))	(	"laa-speculate-unit-stride"	,
		cl::Hidden	,
		cl::desc("Speculate that non-constant strides are unit in LAA")	,
		cl::init(true)	)

static

◆ VectorizationFactor

cl::opt< unsigned, true > VectorizationFactor("force-vector-width", cl::Hidden, cl::desc("Sets the SIMD width. Zero is autoselect."), cl::location(VectorizerParams::VectorizationFactor))	(	"force-vector-width"	,
		cl::Hidden	,
		cl::desc("Sets the SIMD width. Zero is autoselect.")	,
		cl::location(VectorizerParams::VectorizationFactor)	)

static

◆ VectorizationInterleave

cl::opt< unsigned, true > VectorizationInterleave("force-vector-interleave", cl::Hidden, cl::desc("Sets the vectorization interleave count. " "Zero is autoselect."), cl::location( VectorizerParams::VectorizationInterleave))	(	"force-vector-interleave"	,
		cl::Hidden	,
		cl::desc("Sets the vectorization interleave count. " "Zero is autoselect.")	,
		cl::location( VectorizerParams::VectorizationInterleave)	)

static

Macros

Functions

Variables

Macro Definition Documentation

◆ DEBUG_TYPE

Function Documentation

◆ addSCEVNoOverflow()

◆ areStridedAccessesIndependent()

◆ evaluatePtrAddRecAtMaxBTCWillNotWrap()

◆ findForkedSCEVs()

◆ getLoopVariantGEPOperand()

◆ getMinFromExprs()

◆ getPtrToIdxMap()

◆ getStrideFromAddRec()

◆ getStrideFromPointer()

◆ isNoWrap()

◆ isSafeDependenceDistance()

◆ mulSCEVNoOverflow()

◆ visitPointers()

Variable Documentation

◆ EnableForwardingConflictDetection

◆ EnableMemAccessVersioning

◆ HoistRuntimeChecks

◆ MaxDependences

◆ MaxForkedSCEVDepth

◆ MemoryCheckMergeThreshold

◆ RuntimeMemoryCheckThreshold

◆ SpeculateUnitStride

◆ VectorizationFactor

◆ VectorizationInterleave