43#define DEBUG_TYPE "loop-cache-cost"
47 cl::desc(
"Use this to specify the default trip count of a loop"));
54 cl::desc(
"Use this to specify the max. distance between array elements "
55 "accessed in a loop so that the elements are classified to have "
63 assert(!
Loops.empty() &&
"Expecting a non-empy loop vector");
68 if (ParentLoop ==
nullptr) {
69 assert(
Loops.size() == 1 &&
"Expecting a single loop");
92 if (isa<SCEVAddRecExpr>(Start) || isa<SCEVAddRecExpr>(Step))
103 return StepRec == &ElemSize;
112 const SCEV *TripCount = (!isa<SCEVCouldNotCompute>(BackedgeTakenCount) &&
113 isa<SCEVConstant>(BackedgeTakenCount))
119 <<
" could not be computed, using DefaultTripCount\n");
131 OS << R.StoreOrLoadInst;
132 OS <<
", IsValid=false.";
136 OS << *R.BasePointer;
137 for (
const SCEV *Subscript : R.Subscripts)
138 OS <<
"[" << *Subscript <<
"]";
142 OS <<
"[" << *
Size <<
"]";
149 : StoreOrLoadInst(StoreOrLoadInst), SE(SE) {
150 assert((isa<StoreInst>(StoreOrLoadInst) || isa<LoadInst>(StoreOrLoadInst)) &&
151 "Expecting a load or store instruction");
153 IsValid = delinearize(LI);
155 LLVM_DEBUG(
dbgs().indent(2) <<
"Succesfully delinearized: " << *
this
162 assert(IsValid &&
"Expecting a valid reference");
164 if (BasePointer !=
Other.getBasePointer() && !isAliased(
Other, AA)) {
166 <<
"No spacial reuse: different base pointers\n");
171 if (NumSubscripts !=
Other.getNumSubscripts()) {
173 <<
"No spacial reuse: different number of subscripts\n");
178 for (
auto SubNum : seq<unsigned>(0, NumSubscripts - 1)) {
180 LLVM_DEBUG(
dbgs().indent(2) <<
"No spacial reuse, different subscripts: "
182 << *
Other.getSubscript(SubNum) <<
"\n");
190 const SCEV *OtherLastSubscript =
Other.getLastSubscript();
194 if (Diff ==
nullptr) {
196 <<
"No spacial reuse, difference between subscript:\n\t"
197 << *LastSubscript <<
"\n\t" << OtherLastSubscript
198 <<
"\nis not constant.\n");
206 dbgs().indent(2) <<
"Found spacial reuse.\n";
211 return InSameCacheLine;
216 unsigned MaxDistance,
const Loop &L,
218 assert(IsValid &&
"Expecting a valid reference");
220 if (BasePointer !=
Other.getBasePointer() && !isAliased(
Other, AA)) {
222 <<
"No temporal reuse: different base pointer\n");
226 std::unique_ptr<Dependence>
D =
227 DI.
depends(&StoreOrLoadInst, &
Other.StoreOrLoadInst,
true);
230 LLVM_DEBUG(
dbgs().indent(2) <<
"No temporal reuse: no dependence\n");
234 if (
D->isLoopIndependent()) {
242 int LoopDepth = L.getLoopDepth();
243 int Levels =
D->getLevels();
244 for (
int Level = 1; Level <= Levels; ++Level) {
245 const SCEV *Distance =
D->getDistance(Level);
246 const SCEVConstant *SCEVConst = dyn_cast_or_null<SCEVConstant>(Distance);
248 if (SCEVConst ==
nullptr) {
249 LLVM_DEBUG(
dbgs().indent(2) <<
"No temporal reuse: distance unknown\n");
254 if (Level != LoopDepth && !CI.
isZero()) {
256 <<
"No temporal reuse: distance is not zero at depth=" << Level
259 }
else if (Level == LoopDepth && CI.
getSExtValue() > MaxDistance) {
262 <<
"No temporal reuse: distance is greater than MaxDistance at depth="
273 unsigned CLS)
const {
274 assert(IsValid &&
"Expecting a valid reference");
276 dbgs().
indent(2) <<
"Computing cache cost for:\n";
281 if (isLoopInvariant(L)) {
282 LLVM_DEBUG(
dbgs().indent(4) <<
"Reference is loop invariant: RefCost=1\n");
287 assert(TripCount &&
"Expecting valid TripCount");
290 const SCEV *RefCost =
nullptr;
291 const SCEV *Stride =
nullptr;
292 if (isConsecutive(L, Stride, CLS)) {
295 assert(Stride !=
nullptr &&
296 "Stride should not be null for consecutive access!");
310 <<
"Access is consecutive: RefCost=(TripCount*Stride)/CLS="
311 << *RefCost <<
"\n");
322 int Index = getSubscriptIndex(L);
323 assert(
Index >= 0 &&
"Could not locate a valid Index");
328 const SCEV *TripCount =
336 <<
"Access is not consecutive: RefCost=" << *RefCost <<
"\n");
338 assert(RefCost &&
"Expecting a valid RefCost");
341 if (
auto ConstantCost = dyn_cast<SCEVConstant>(RefCost))
342 return ConstantCost->getValue()->getZExtValue();
345 <<
"RefCost is not a constant! Setting to RefCost=InvalidCost "
346 "(invalid value).\n");
351bool IndexedReference::tryDelinearizeFixedSize(
359 for (
auto Idx : seq<unsigned>(1, Subscripts.
size()))
364 dbgs() <<
"Delinearized subscripts of fixed-size array\n"
371bool IndexedReference::delinearize(
const LoopInfo &LI) {
372 assert(Subscripts.
empty() &&
"Subscripts should be empty");
373 assert(Sizes.empty() &&
"Sizes should be empty");
374 assert(!IsValid &&
"Should be called once from the constructor");
375 LLVM_DEBUG(
dbgs() <<
"Delinearizing: " << StoreOrLoadInst <<
"\n");
381 const SCEV *AccessFn =
385 if (BasePointer ==
nullptr) {
388 <<
"ERROR: failed to delinearize, can't identify base pointer\n");
392 bool IsFixedSize =
false;
394 if (tryDelinearizeFixedSize(AccessFn, Subscripts)) {
397 Sizes.push_back(ElemSize);
399 <<
"', AccessFn: " << *AccessFn <<
"\n");
407 <<
"', AccessFn: " << *AccessFn <<
"\n");
412 if (Subscripts.
empty() || Sizes.empty() ||
413 Subscripts.
size() != Sizes.size()) {
418 <<
"ERROR: failed to delinearize reference\n");
429 const SCEVAddRecExpr *AccessFnAR = dyn_cast<SCEVAddRecExpr>(AccessFn);
439 Sizes.push_back(ElemSize);
442 return all_of(Subscripts, [&](
const SCEV *Subscript) {
443 return isSimpleAddRecurrence(*Subscript, *L);
450bool IndexedReference::isLoopInvariant(
const Loop &L)
const {
452 assert(
Addr !=
nullptr &&
"Expecting either a load or a store instruction");
460 bool allCoeffForLoopAreZero =
all_of(Subscripts, [&](
const SCEV *Subscript) {
461 return isCoeffForLoopZeroOrInvariant(*Subscript, L);
464 return allCoeffForLoopAreZero;
467bool IndexedReference::isConsecutive(
const Loop &L,
const SCEV *&Stride,
468 unsigned CLS)
const {
471 const SCEV *LastSubscript = Subscripts.
back();
472 for (
const SCEV *Subscript : Subscripts) {
473 if (Subscript == LastSubscript)
475 if (!isCoeffForLoopZeroOrInvariant(*Subscript, L))
480 const SCEV *Coeff = getLastCoefficient();
481 const SCEV *ElemSize = Sizes.back();
501int IndexedReference::getSubscriptIndex(
const Loop &L)
const {
504 if (AR && AR->
getLoop() == &L) {
511const SCEV *IndexedReference::getLastCoefficient()
const {
513 auto *AR = cast<SCEVAddRecExpr>(LastSubscript);
517bool IndexedReference::isCoeffForLoopZeroOrInvariant(
const SCEV &Subscript,
518 const Loop &L)
const {
520 return (AR !=
nullptr) ? AR->
getLoop() != &
L
524bool IndexedReference::isSimpleAddRecurrence(
const SCEV &Subscript,
525 const Loop &L)
const {
526 if (!isa<SCEVAddRecExpr>(Subscript))
555 for (
const auto &LC :
CC.LoopCosts) {
556 const Loop *L = LC.first;
557 OS <<
"Loop '" << L->getName() <<
"' has cost = " << LC.second <<
"\n";
565 std::optional<unsigned> TRT)
567 TTI(
TTI), AA(AA), DI(DI) {
568 assert(!
Loops.empty() &&
"Expecting a non-empty loop vector.");
576 calculateCacheFootprint();
579std::unique_ptr<CacheCost>
583 LLVM_DEBUG(
dbgs() <<
"Expecting the outermost loop in a loop nest\n");
591 LLVM_DEBUG(
dbgs() <<
"Cannot compute cache cost of loop nest with more "
592 "than one innermost loop\n");
596 return std::make_unique<CacheCost>(
Loops, AR.
LI, AR.
SE, AR.
TTI, AR.
AA, DI, TRT);
599void CacheCost::calculateCacheFootprint() {
602 if (!populateReferenceGroups(RefGroups))
606 for (
const Loop *L : Loops) {
609 [L](
const LoopCacheCostTy &LCC) {
return LCC.first == L; }) &&
610 "Should not add duplicate element");
611 CacheCostTy LoopCost = computeLoopCacheCost(*L, RefGroups);
612 LoopCosts.
push_back(std::make_pair(L, LoopCost));
620 assert(RefGroups.
empty() &&
"Reference groups should be empty");
624 assert(InnerMostLoop !=
nullptr &&
"Expecting a valid innermost loop");
628 if (!isa<StoreInst>(
I) && !isa<LoadInst>(
I))
639 dbgs() <<
"References:\n";
656 std::optional<bool> HasTemporalReuse =
657 R->hasTemporalReuse(Representative, *TRT, *InnerMostLoop, DI, AA);
658 std::optional<bool> HasSpacialReuse =
659 R->hasSpacialReuse(Representative, CLS, AA);
661 if ((HasTemporalReuse && *HasTemporalReuse) ||
662 (HasSpacialReuse && *HasSpacialReuse)) {
663 RefGroup.push_back(std::move(R));
672 RefGroups.push_back(std::move(RG));
677 if (RefGroups.empty())
681 dbgs() <<
"\nIDENTIFIED REFERENCE GROUPS:\n";
684 dbgs().
indent(2) <<
"RefGroup " << n <<
":\n";
685 for (
const auto &
IR : RG)
696CacheCost::computeLoopCacheCost(
const Loop &L,
698 if (!
L.isLoopSimplifyForm())
702 <<
"' as innermost loop.\n");
706 for (
const auto &TC : TripCounts) {
709 TripCountsProduct *= TC.second;
714 CacheCostTy RefGroupCost = computeRefGroupCacheCost(RG, L);
715 LoopCost += RefGroupCost * TripCountsProduct;
719 <<
"' has cost=" << LoopCost <<
"\n");
725 const Loop &L)
const {
726 assert(!RG.
empty() &&
"Reference group should have at least one member.");
738 Function *
F = L.getHeader()->getParent();
This file builds on the ADT/GraphTraits.h file to build a generic breadth first graph iterator.
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
Legalize the Machine IR a function s Machine IR
static bool isOneDimensionalArray(const SCEV &AccessFn, const SCEV &ElemSize, const Loop &L, ScalarEvolution &SE)
static cl::opt< unsigned > TemporalReuseThreshold("temporal-reuse-threshold", cl::init(2), cl::Hidden, cl::desc("Use this to specify the max. distance between array elements " "accessed in a loop so that the elements are classified to have " "temporal reuse"))
static const SCEV * computeTripCount(const Loop &L, const SCEV &ElemSize, ScalarEvolution &SE)
Compute the trip count for the given loop L or assume a default value if it is not a compile time con...
static Loop * getInnerMostLoop(const LoopVectorTy &Loops)
Retrieve the innermost loop in the given loop nest Loops.
static cl::opt< unsigned > DefaultTripCount("default-trip-count", cl::init(100), cl::Hidden, cl::desc("Use this to specify the default trip count of a loop"))
This file defines the interface for the loop cache analysis.
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
Provides some synthesis utilities to produce sequences of values.
This file defines the SmallVector class.
bool isMustAlias(const MemoryLocation &LocA, const MemoryLocation &LocB)
A trivial helper function to check to see if the specified pointers are must-alias.
A container for analyses that lazily runs them and caches their results.
LLVM Basic Block Representation.
CacheCost represents the estimated cost of a inner loop as the number of cache lines used by the memo...
CacheCost(const LoopVectorTy &Loops, const LoopInfo &LI, ScalarEvolution &SE, TargetTransformInfo &TTI, AAResults &AA, DependenceInfo &DI, std::optional< unsigned > TRT=std::nullopt)
Construct a CacheCost object for the loop nest described by Loops.
static std::unique_ptr< CacheCost > getCacheCost(Loop &Root, LoopStandardAnalysisResults &AR, DependenceInfo &DI, std::optional< unsigned > TRT=std::nullopt)
Create a CacheCost for the loop nest rooted by Root.
static CacheCostTy constexpr InvalidCost
@ ICMP_ULT
unsigned less than
This is the shared class of boolean and integer constants.
bool isZero() const
This is just a convenience method to make client code smaller for a common code.
int64_t getSExtValue() const
Return the constant as a 64-bit integer value after it has been sign extended as appropriate for the ...
DependenceInfo - This class is the main dependence-analysis driver.
std::unique_ptr< Dependence > depends(Instruction *Src, Instruction *Dst, bool PossiblyLoopIndependent)
depends - Tests for a dependence between the Src and Dst instructions.
Represents a memory reference as a base pointer and a set of indexing operations.
CacheCostTy computeRefCost(const Loop &L, unsigned CLS) const
Compute the cost of the reference w.r.t.
const SCEV * getSubscript(unsigned SubNum) const
std::optional< bool > hasSpacialReuse(const IndexedReference &Other, unsigned CLS, AAResults &AA) const
Return true/false if the current object and the indexed reference Other are/aren't in the same cache ...
std::optional< bool > hasTemporalReuse(const IndexedReference &Other, unsigned MaxDistance, const Loop &L, DependenceInfo &DI, AAResults &AA) const
Return true if the current object and the indexed reference Other have distance smaller than MaxDista...
IndexedReference(Instruction &StoreOrLoadInst, const LoopInfo &LI, ScalarEvolution &SE)
Construct an indexed reference given a StoreOrLoadInst instruction.
const SCEV * getLastSubscript() const
size_t getNumSubscripts() const
This class provides an interface for updating the loop pass manager based on mutations to the loop ne...
bool isOutermost() const
Return true if the loop does not have a parent (natural) loop.
unsigned getLoopDepth() const
Return the nesting level of this loop.
ArrayRef< BlockT * > getBlocks() const
Get a list of the basic blocks which make up this loop.
LoopT * getParentLoop() const
Return the parent loop if it exists or nullptr for top level loops.
PreservedAnalyses run(Loop &L, LoopAnalysisManager &AM, LoopStandardAnalysisResults &AR, LPMUpdater &U)
LoopT * getLoopFor(const BlockT *BB) const
Return the inner most loop that BB lives in.
Represents a single loop in the control flow graph.
static MemoryLocation get(const LoadInst *LI)
Return a location with information about the memory reference by the given instruction.
A set of analyses that are preserved following a run of a transformation pass.
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
This node represents a polynomial recurrence on the trip count of the specified loop.
const SCEV * getStart() const
const SCEV * getStepRecurrence(ScalarEvolution &SE) const
Constructs and returns the recurrence indicating how much this expression steps by.
bool isAffine() const
Return true if this represents an expression A + B*x where A and B are loop invariant values.
const Loop * getLoop() const
This class represents a constant integer value.
ConstantInt * getValue() const
NoWrapFlags getNoWrapFlags(NoWrapFlags Mask=NoWrapMask) const
This class represents an analyzed expression in the program.
Type * getType() const
Return the LLVM type of this SCEV expression.
The main scalar evolution driver.
const SCEV * getNegativeSCEV(const SCEV *V, SCEV::NoWrapFlags Flags=SCEV::FlagAnyWrap)
Return the SCEV object corresponding to -V.
const SCEV * getUDivCeilSCEV(const SCEV *N, const SCEV *D)
Compute ceil(N / D).
Type * getWiderType(Type *Ty1, Type *Ty2) const
bool isKnownNegative(const SCEV *S)
Test if the given expression is known to be negative.
const SCEV * getSCEVAtScope(const SCEV *S, const Loop *L)
Return a SCEV expression for the specified value at the specified scope in the program.
const SCEV * getBackedgeTakenCount(const Loop *L, ExitCountKind Kind=Exact)
If the specified loop has a predictable backedge-taken count, return it, otherwise return a SCEVCould...
const SCEV * getConstant(ConstantInt *V)
const SCEV * getSCEV(Value *V)
Return a SCEV expression for the full generality of the specified expression.
const SCEV * getNoopOrSignExtend(const SCEV *V, Type *Ty)
Return a SCEV corresponding to a conversion of the input value to the specified type.
const SCEV * getTripCountFromExitCount(const SCEV *ExitCount)
A version of getTripCountFromExitCount below which always picks an evaluation type which can not resu...
bool isLoopInvariant(const SCEV *S, const Loop *L)
Return true if the value of the given SCEV is unchanging in the specified loop.
bool isKnownPredicate(ICmpInst::Predicate Pred, const SCEV *LHS, const SCEV *RHS)
Test if the given expression is known to satisfy the condition described by Pred, LHS,...
const SCEV * getAddRecExpr(const SCEV *Start, const SCEV *Step, const Loop *L, SCEV::NoWrapFlags Flags)
Get an add recurrence expression for the specified loop.
bool isSCEVable(Type *Ty) const
Test if values of the given type are analyzable within the SCEV framework.
const SCEV * getNoopOrAnyExtend(const SCEV *V, Type *Ty)
Return a SCEV corresponding to a conversion of the input value to the specified type.
const SCEV * getMinusSCEV(const SCEV *LHS, const SCEV *RHS, SCEV::NoWrapFlags Flags=SCEV::FlagAnyWrap, unsigned Depth=0)
Return LHS-RHS.
const SCEV * getNoopOrZeroExtend(const SCEV *V, Type *Ty)
Return a SCEV corresponding to a conversion of the input value to the specified type.
unsigned getSmallConstantTripCount(const Loop *L)
Returns the exact trip count of the loop if we can compute it, and the result is a small constant.
const SCEV * getPointerBase(const SCEV *V)
Transitively follow the chain of pointer-type operands until reaching a SCEV that does not have a sin...
const SCEV * getMulExpr(SmallVectorImpl< const SCEV * > &Ops, SCEV::NoWrapFlags Flags=SCEV::FlagAnyWrap, unsigned Depth=0)
Get a canonical multiply expression, or something simpler if possible.
const SCEV * getElementSize(Instruction *Inst)
Return the size of an element read or written by Inst.
const SCEV * getUDivExactExpr(const SCEV *LHS, const SCEV *RHS)
Get a canonical unsigned division expression, or something simpler if possible.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
void push_back(const T &Elt)
The instances of the Type class are immutable: once they are created, they are never changed.
LLVM Value Representation.
const ParentTy * getParent() const
This class implements an extremely fast bulk output stream that can only output to a stream.
raw_ostream & indent(unsigned NumSpaces)
indent - Insert 'NumSpaces' spaces.
initializer< Ty > init(const Ty &Val)
This is an optimization pass for GlobalISel generic memory operations.
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
const Value * getLoadStorePointerOperand(const Value *V)
A helper function that returns the pointer operand of a load or store instruction.
void append_range(Container &C, Range &&R)
Wrapper function to append range R to container C.
const Value * getPointerOperand(const Value *V)
A helper function that returns the pointer operand of a load, store or GEP instruction.
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
bool none_of(R &&Range, UnaryPredicate P)
Provide wrappers to std::none_of which take ranges instead of having to pass begin/end explicitly.
bool is_sorted(R &&Range, Compare C)
Wrapper function around std::is_sorted to check if elements in a range R are sorted with respect to a...
iterator_range< bf_iterator< T > > breadth_first(const T &G)
bool tryDelinearizeFixedSizeImpl(ScalarEvolution *SE, Instruction *Inst, const SCEV *AccessFn, SmallVectorImpl< const SCEV * > &Subscripts, SmallVectorImpl< int > &Sizes)
Implementation of fixed size array delinearization.
raw_ostream & operator<<(raw_ostream &OS, const APFixedPoint &FX)
void delinearize(ScalarEvolution &SE, const SCEV *Expr, SmallVectorImpl< const SCEV * > &Subscripts, SmallVectorImpl< const SCEV * > &Sizes, const SCEV *ElementSize)
Split this SCEVAddRecExpr into two vectors of SCEVs representing the subscripts and sizes of an array...
The adaptor from a function pass to a loop pass computes these analyses and makes them available to t...
TargetTransformInfo & TTI