43#define DEBUG_TYPE "loop-cache-cost"
47 cl::desc(
"Use this to specify the default trip count of a loop"));
54 cl::desc(
"Use this to specify the max. distance between array elements "
55 "accessed in a loop so that the elements are classified to have "
63 assert(!
Loops.empty() &&
"Expecting a non-empy loop vector");
68 if (ParentLoop ==
nullptr) {
69 assert(
Loops.size() == 1 &&
"Expecting a single loop");
103 return StepRec == &ElemSize;
119 <<
" could not be computed, using DefaultTripCount\n");
131 OS << R.StoreOrLoadInst;
132 OS <<
", IsValid=false.";
136 OS << *R.BasePointer;
137 for (
const SCEV *Subscript : R.Subscripts)
138 OS <<
"[" << *Subscript <<
"]";
142 OS <<
"[" << *
Size <<
"]";
149 : StoreOrLoadInst(StoreOrLoadInst), SE(SE) {
151 "Expecting a load or store instruction");
153 IsValid = delinearize(LI);
162 assert(IsValid &&
"Expecting a valid reference");
164 if (BasePointer !=
Other.getBasePointer() && !isAliased(
Other,
AA)) {
166 <<
"No spacial reuse: different base pointers\n");
171 if (NumSubscripts !=
Other.getNumSubscripts()) {
173 <<
"No spacial reuse: different number of subscripts\n");
182 << *
Other.getSubscript(SubNum) <<
"\n");
190 const SCEV *OtherLastSubscript =
Other.getLastSubscript();
192 SE.getMinusSCEV(LastSubscript, OtherLastSubscript));
194 if (Diff ==
nullptr) {
196 <<
"No spacial reuse, difference between subscript:\n\t"
197 << *LastSubscript <<
"\n\t" << OtherLastSubscript
198 <<
"\nis not constant.\n");
202 bool InSameCacheLine = (Diff->
getValue()->getSExtValue() < CLS);
211 return InSameCacheLine;
216 unsigned MaxDistance,
const Loop &L,
218 assert(IsValid &&
"Expecting a valid reference");
220 if (BasePointer !=
Other.getBasePointer() && !isAliased(
Other,
AA)) {
222 <<
"No temporal reuse: different base pointer\n");
226 std::unique_ptr<Dependence>
D =
234 if (
D->isLoopIndependent()) {
242 int LoopDepth = L.getLoopDepth();
243 int Levels =
D->getLevels();
244 for (
int Level = 1; Level <= Levels; ++Level) {
245 const SCEV *Distance =
D->getDistance(Level);
248 if (SCEVConst ==
nullptr) {
254 if (Level != LoopDepth && !CI.
isZero()) {
256 <<
"No temporal reuse: distance is not zero at depth=" << Level
259 }
else if (Level == LoopDepth && CI.
getSExtValue() > MaxDistance) {
262 <<
"No temporal reuse: distance is greater than MaxDistance at depth="
273 unsigned CLS)
const {
274 assert(IsValid &&
"Expecting a valid reference");
276 dbgs().
indent(2) <<
"Computing cache cost for:\n";
281 if (isLoopInvariant(L)) {
287 assert(TripCount &&
"Expecting valid TripCount");
290 const SCEV *RefCost =
nullptr;
291 const SCEV *Stride =
nullptr;
292 if (isConsecutive(L, Stride, CLS)) {
295 assert(Stride !=
nullptr &&
296 "Stride should not be null for consecutive access!");
299 Stride = SE.getNoopOrAnyExtend(Stride, WiderType);
300 TripCount = SE.getNoopOrZeroExtend(TripCount, WiderType);
301 const SCEV *Numerator = SE.getMulExpr(Stride, TripCount);
310 <<
"Access is consecutive: RefCost=(TripCount*Stride)/CLS="
311 << *RefCost <<
"\n");
322 int Index = getSubscriptIndex(L);
323 assert(Index >= 0 &&
"Could not locate a valid Index");
328 const SCEV *TripCount =
333 RefCost = SE.getMulExpr(SE.getNoopOrZeroExtend(RefCost, WiderType),
334 SE.getNoopOrZeroExtend(TripCount, WiderType));
338 <<
"Access is not consecutive: RefCost=" << *RefCost <<
"\n");
340 assert(RefCost &&
"Expecting a valid RefCost");
347 return ConstantCost->getValue()->getLimitedValue(
348 std::numeric_limits<int64_t>::max());
351 <<
"RefCost is not a constant! Setting to RefCost=InvalidCost "
352 "(invalid value).\n");
357bool IndexedReference::tryDelinearizeFixedSize(
367 SE.
getConstant(Subscripts[Idx]->getType(), ArraySizes[Idx - 1]));
370 dbgs() <<
"Delinearized subscripts of fixed-size array\n"
377bool IndexedReference::delinearize(
const LoopInfo &LI) {
378 assert(Subscripts.empty() &&
"Subscripts should be empty");
379 assert(Sizes.empty() &&
"Sizes should be empty");
380 assert(!IsValid &&
"Should be called once from the constructor");
381 LLVM_DEBUG(
dbgs() <<
"Delinearizing: " << StoreOrLoadInst <<
"\n");
383 const SCEV *ElemSize = SE.getElementSize(&StoreOrLoadInst);
384 const BasicBlock *BB = StoreOrLoadInst.getParent();
387 const SCEV *AccessFn =
391 if (BasePointer ==
nullptr) {
394 <<
"ERROR: failed to delinearize, can't identify base pointer\n");
398 bool IsFixedSize =
false;
400 if (tryDelinearizeFixedSize(AccessFn, Subscripts)) {
403 Sizes.push_back(ElemSize);
405 <<
"', AccessFn: " << *AccessFn <<
"\n");
408 AccessFn = SE.getMinusSCEV(AccessFn, BasePointer);
413 <<
"', AccessFn: " << *AccessFn <<
"\n");
415 SE.getElementSize(&StoreOrLoadInst));
418 if (Subscripts.empty() || Sizes.empty() ||
419 Subscripts.size() != Sizes.size()) {
424 <<
"ERROR: failed to delinearize reference\n");
438 if (StepRec && SE.isKnownNegative(StepRec))
439 AccessFn = SE.getAddRecExpr(
440 AccessFnAR->
getStart(), SE.getNegativeSCEV(StepRec),
442 const SCEV *Div = SE.getUDivExactExpr(AccessFn, ElemSize);
443 Subscripts.push_back(Div);
444 Sizes.push_back(ElemSize);
447 return all_of(Subscripts, [&](
const SCEV *Subscript) {
448 return isSimpleAddRecurrence(*Subscript, *L);
455bool IndexedReference::isLoopInvariant(
const Loop &L)
const {
457 assert(Addr !=
nullptr &&
"Expecting either a load or a store instruction");
458 assert(SE.isSCEVable(Addr->
getType()) &&
"Addr should be SCEVable");
460 if (SE.isLoopInvariant(SE.getSCEV(Addr), &L))
465 bool allCoeffForLoopAreZero =
all_of(Subscripts, [&](
const SCEV *Subscript) {
466 return isCoeffForLoopZeroOrInvariant(*Subscript, L);
469 return allCoeffForLoopAreZero;
472bool IndexedReference::isConsecutive(
const Loop &L,
const SCEV *&Stride,
473 unsigned CLS)
const {
476 const SCEV *LastSubscript = Subscripts.back();
477 for (
const SCEV *Subscript : Subscripts) {
478 if (Subscript == LastSubscript)
480 if (!isCoeffForLoopZeroOrInvariant(*Subscript, L))
485 const SCEV *Coeff = getLastCoefficient();
486 const SCEV *ElemSize = Sizes.back();
498 Stride = SE.getMulExpr(SE.getNoopOrSignExtend(Coeff, WiderType),
499 SE.getNoopOrSignExtend(ElemSize, WiderType));
502 Stride = SE.isKnownNegative(Stride) ? SE.getNegativeSCEV(Stride) : Stride;
506int IndexedReference::getSubscriptIndex(
const Loop &L)
const {
509 if (AR && AR->
getLoop() == &L) {
516const SCEV *IndexedReference::getLastCoefficient()
const {
522bool IndexedReference::isCoeffForLoopZeroOrInvariant(
const SCEV &Subscript,
523 const Loop &L)
const {
525 return (AR !=
nullptr) ? AR->
getLoop() != &
L
526 : SE.isLoopInvariant(&Subscript, &L);
529bool IndexedReference::isSimpleAddRecurrence(
const SCEV &Subscript,
530 const Loop &L)
const {
543 if (!SE.isLoopInvariant(Start, &L) || !SE.isLoopInvariant(Step, &L))
560 for (
const auto &LC : CC.LoopCosts) {
561 const Loop *L = LC.first;
562 OS <<
"Loop '" << L->getName() <<
"' has cost = " << LC.second <<
"\n";
570 std::optional<unsigned> TRT)
572 TTI(TTI), AA(AA), DI(DI) {
573 assert(!Loops.empty() &&
"Expecting a non-empty loop vector.");
575 for (
const Loop *L : Loops) {
576 unsigned TripCount = SE.getSmallConstantTripCount(L);
578 TripCounts.push_back({L, TripCount});
581 calculateCacheFootprint();
584std::unique_ptr<CacheCost>
588 LLVM_DEBUG(
dbgs() <<
"Expecting the outermost loop in a loop nest\n");
596 LLVM_DEBUG(
dbgs() <<
"Cannot compute cache cost of loop nest with more "
597 "than one innermost loop\n");
601 return std::make_unique<CacheCost>(Loops, AR.
LI, AR.
SE, AR.
TTI, AR.
AA, DI, TRT);
604void CacheCost::calculateCacheFootprint() {
607 if (!populateReferenceGroups(RefGroups))
614 [L](
const LoopCacheCostTy &LCC) {
return LCC.first == L; }) &&
615 "Should not add duplicate element");
616 CacheCostTy LoopCost = computeLoopCacheCost(*L, RefGroups);
617 LoopCosts.
push_back(std::make_pair(L, LoopCost));
625 assert(RefGroups.
empty() &&
"Reference groups should be empty");
627 unsigned CLS = TTI.getCacheLineSize();
629 assert(InnerMostLoop !=
nullptr &&
"Expecting a valid innermost loop");
631 for (BasicBlock *BB : InnerMostLoop->
getBlocks()) {
632 for (Instruction &
I : *BB) {
636 std::unique_ptr<IndexedReference>
R(
new IndexedReference(
I, LI, SE));
642 const IndexedReference &Representative = *RefGroup.front();
644 dbgs() <<
"References:\n";
661 std::optional<bool> HasTemporalReuse =
662 R->hasTemporalReuse(Representative, *TRT, *InnerMostLoop, DI, AA);
663 std::optional<bool> HasSpacialReuse =
664 R->hasSpacialReuse(Representative, CLS, AA);
666 if ((HasTemporalReuse && *HasTemporalReuse) ||
667 (HasSpacialReuse && *HasSpacialReuse)) {
668 RefGroup.push_back(std::move(R));
677 RefGroups.push_back(std::move(RG));
682 if (RefGroups.empty())
686 dbgs() <<
"\nIDENTIFIED REFERENCE GROUPS:\n";
689 dbgs().
indent(2) <<
"RefGroup " << n <<
":\n";
690 for (
const auto &
IR : RG)
701CacheCost::computeLoopCacheCost(
const Loop &L,
703 if (!
L.isLoopSimplifyForm())
707 <<
"' as innermost loop.\n");
711 for (
const auto &TC : TripCounts) {
714 TripCountsProduct *= TC.second;
719 CacheCostTy RefGroupCost = computeRefGroupCacheCost(RG, L);
720 LoopCost += RefGroupCost * TripCountsProduct;
724 <<
"' has cost=" << LoopCost <<
"\n");
730 const Loop &L)
const {
731 assert(!RG.
empty() &&
"Reference group should have at least one member.");
733 const IndexedReference *Representative = RG.
front().get();
743 Function *
F = L.getHeader()->getParent();
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
This file builds on the ADT/GraphTraits.h file to build a generic breadth first graph iterator.
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")
Legalize the Machine IR a function s Machine IR
static bool isOneDimensionalArray(const SCEV &AccessFn, const SCEV &ElemSize, const Loop &L, ScalarEvolution &SE)
static cl::opt< unsigned > TemporalReuseThreshold("temporal-reuse-threshold", cl::init(2), cl::Hidden, cl::desc("Use this to specify the max. distance between array elements " "accessed in a loop so that the elements are classified to have " "temporal reuse"))
static const SCEV * computeTripCount(const Loop &L, const SCEV &ElemSize, ScalarEvolution &SE)
Compute the trip count for the given loop L or assume a default value if it is not a compile time con...
static Loop * getInnerMostLoop(const LoopVectorTy &Loops)
Retrieve the innermost loop in the given loop nest Loops.
static cl::opt< unsigned > DefaultTripCount("default-trip-count", cl::init(100), cl::Hidden, cl::desc("Use this to specify the default trip count of a loop"))
This file defines the interface for the loop cache analysis.
Provides some synthesis utilities to produce sequences of values.
This file defines the SmallVector class.
bool isMustAlias(const MemoryLocation &LocA, const MemoryLocation &LocB)
A trivial helper function to check to see if the specified pointers are must-alias.
CacheCost represents the estimated cost of a inner loop as the number of cache lines used by the memo...
CacheCost(const LoopVectorTy &Loops, const LoopInfo &LI, ScalarEvolution &SE, TargetTransformInfo &TTI, AAResults &AA, DependenceInfo &DI, std::optional< unsigned > TRT=std::nullopt)
Construct a CacheCost object for the loop nest described by Loops.
static std::unique_ptr< CacheCost > getCacheCost(Loop &Root, LoopStandardAnalysisResults &AR, DependenceInfo &DI, std::optional< unsigned > TRT=std::nullopt)
Create a CacheCost for the loop nest rooted by Root.
@ ICMP_ULT
unsigned less than
This is the shared class of boolean and integer constants.
bool isZero() const
This is just a convenience method to make client code smaller for a common code.
int64_t getSExtValue() const
Return the constant as a 64-bit integer value after it has been sign extended as appropriate for the ...
DependenceInfo - This class is the main dependence-analysis driver.
LLVM_ABI std::unique_ptr< Dependence > depends(Instruction *Src, Instruction *Dst, bool UnderRuntimeAssumptions=false)
depends - Tests for a dependence between the Src and Dst instructions.
Represents a memory reference as a base pointer and a set of indexing operations.
CacheCostTy computeRefCost(const Loop &L, unsigned CLS) const
Compute the cost of the reference w.r.t.
const SCEV * getSubscript(unsigned SubNum) const
std::optional< bool > hasSpacialReuse(const IndexedReference &Other, unsigned CLS, AAResults &AA) const
Return true/false if the current object and the indexed reference Other are/aren't in the same cache ...
std::optional< bool > hasTemporalReuse(const IndexedReference &Other, unsigned MaxDistance, const Loop &L, DependenceInfo &DI, AAResults &AA) const
Return true if the current object and the indexed reference Other have distance smaller than MaxDista...
IndexedReference(Instruction &StoreOrLoadInst, const LoopInfo &LI, ScalarEvolution &SE)
Construct an indexed reference given a StoreOrLoadInst instruction.
const SCEV * getLastSubscript() const
size_t getNumSubscripts() const
static InstructionCost getInvalid(CostType Val=0)
This class provides an interface for updating the loop pass manager based on mutations to the loop ne...
bool isOutermost() const
Return true if the loop does not have a parent (natural) loop.
unsigned getLoopDepth() const
Return the nesting level of this loop.
ArrayRef< BlockT * > getBlocks() const
Get a list of the basic blocks which make up this loop.
LoopT * getParentLoop() const
Return the parent loop if it exists or nullptr for top level loops.
PreservedAnalyses run(Loop &L, LoopAnalysisManager &AM, LoopStandardAnalysisResults &AR, LPMUpdater &U)
LoopT * getLoopFor(const BlockT *BB) const
Return the inner most loop that BB lives in.
Represents a single loop in the control flow graph.
static LLVM_ABI MemoryLocation get(const LoadInst *LI)
Return a location with information about the memory reference by the given instruction.
A set of analyses that are preserved following a run of a transformation pass.
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
This node represents a polynomial recurrence on the trip count of the specified loop.
const SCEV * getStart() const
const SCEV * getStepRecurrence(ScalarEvolution &SE) const
Constructs and returns the recurrence indicating how much this expression steps by.
bool isAffine() const
Return true if this represents an expression A + B*x where A and B are loop invariant values.
const Loop * getLoop() const
This class represents a constant integer value.
ConstantInt * getValue() const
This class represents an analyzed expression in the program.
LLVM_ABI Type * getType() const
Return the LLVM type of this SCEV expression.
The main scalar evolution driver.
LLVM_ABI const SCEV * getNegativeSCEV(const SCEV *V, SCEV::NoWrapFlags Flags=SCEV::FlagAnyWrap)
Return the SCEV object corresponding to -V.
LLVM_ABI bool isKnownNegative(const SCEV *S)
Test if the given expression is known to be negative.
LLVM_ABI const SCEV * getBackedgeTakenCount(const Loop *L, ExitCountKind Kind=Exact)
If the specified loop has a predictable backedge-taken count, return it, otherwise return a SCEVCould...
LLVM_ABI const SCEV * getConstant(ConstantInt *V)
LLVM_ABI const SCEV * getTripCountFromExitCount(const SCEV *ExitCount)
A version of getTripCountFromExitCount below which always picks an evaluation type which can not resu...
LLVM_ABI bool isLoopInvariant(const SCEV *S, const Loop *L)
Return true if the value of the given SCEV is unchanging in the specified loop.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
The instances of the Type class are immutable: once they are created, they are never changed.
LLVM_ABI Type * getExtendedType() const
Given scalar/vector integer type, returns a type with elements twice as wide as in the original type.
Type * getType() const
All values are typed, get the type of this value.
This class implements an extremely fast bulk output stream that can only output to a stream.
raw_ostream & indent(unsigned NumSpaces)
indent - Insert 'NumSpaces' spaces.
Abstract Attribute helper functions.
@ BasicBlock
Various leaf nodes.
initializer< Ty > init(const Ty &Val)
This is an optimization pass for GlobalISel generic memory operations.
FunctionAddr VTableAddr Value
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
InstructionCost CacheCostTy
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
const Value * getLoadStorePointerOperand(const Value *V)
A helper function that returns the pointer operand of a load or store instruction.
void append_range(Container &C, Range &&R)
Wrapper function to append range R to container C.
SmallVector< std::unique_ptr< IndexedReference >, 8 > ReferenceGroupTy
A reference group represents a set of memory references that exhibit temporal or spacial reuse.
const Value * getPointerOperand(const Value *V)
A helper function that returns the pointer operand of a load, store or GEP instruction.
AnalysisManager< Loop, LoopStandardAnalysisResults & > LoopAnalysisManager
The loop analysis manager.
auto dyn_cast_or_null(const Y &Val)
SmallVector< Loop *, 8 > LoopVectorTy
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
bool none_of(R &&Range, UnaryPredicate P)
Provide wrappers to std::none_of which take ranges instead of having to pass begin/end explicitly.
bool is_sorted(R &&Range, Compare C)
Wrapper function around std::is_sorted to check if elements in a range R are sorted with respect to a...
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
iterator_range< bf_iterator< T > > breadth_first(const T &G)
bool tryDelinearizeFixedSizeImpl(ScalarEvolution *SE, Instruction *Inst, const SCEV *AccessFn, SmallVectorImpl< const SCEV * > &Subscripts, SmallVectorImpl< int > &Sizes)
Implementation of fixed size array delinearization.
raw_ostream & operator<<(raw_ostream &OS, const APFixedPoint &FX)
void delinearize(ScalarEvolution &SE, const SCEV *Expr, SmallVectorImpl< const SCEV * > &Subscripts, SmallVectorImpl< const SCEV * > &Sizes, const SCEV *ElementSize)
Split this SCEVAddRecExpr into two vectors of SCEVs representing the subscripts and sizes of an array...
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
auto seq(T Begin, T End)
Iterate over an integral type from Begin up to - but not including - End.
SmallVector< ReferenceGroupTy, 8 > ReferenceGroupsTy
The adaptor from a function pass to a loop pass computes these analyses and makes them available to t...
TargetTransformInfo & TTI