Go to the documentation of this file.
43 #define DEBUG_TYPE "loop-cache-cost"
47 cl::desc(
"Use this to specify the default trip count of a loop"));
54 cl::desc(
"Use this to specify the max. distance between array elements "
55 "accessed in a loop so that the elements are classified to have "
63 assert(!
Loops.empty() &&
"Expecting a non-empy loop vector");
68 if (ParentLoop ==
nullptr) {
69 assert(
Loops.size() == 1 &&
"Expecting a single loop");
92 if (isa<SCEVAddRecExpr>(Start) || isa<SCEVAddRecExpr>(Step))
103 return StepRec == &ElemSize;
112 const SCEV *TripCount = (!isa<SCEVCouldNotCompute>(BackedgeTakenCount) &&
113 isa<SCEVConstant>(BackedgeTakenCount))
119 <<
" could not be computed, using DefaultTripCount\n");
131 OS << R.StoreOrLoadInst;
132 OS <<
", IsValid=false.";
136 OS << *R.BasePointer;
137 for (
const SCEV *Subscript : R.Subscripts)
138 OS <<
"[" << *Subscript <<
"]";
141 for (
const SCEV *Size : R.Sizes)
142 OS <<
"[" << *Size <<
"]";
149 : StoreOrLoadInst(StoreOrLoadInst), SE(SE) {
150 assert((isa<StoreInst>(StoreOrLoadInst) || isa<LoadInst>(StoreOrLoadInst)) &&
151 "Expecting a load or store instruction");
153 IsValid = delinearize(LI);
155 LLVM_DEBUG(
dbgs().indent(2) <<
"Succesfully delinearized: " << *
this
162 assert(IsValid &&
"Expecting a valid reference");
164 if (BasePointer !=
Other.getBasePointer() && !isAliased(
Other, AA)) {
166 <<
"No spacial reuse: different base pointers\n");
171 if (NumSubscripts !=
Other.getNumSubscripts()) {
173 <<
"No spacial reuse: different number of subscripts\n");
178 for (
auto SubNum : seq<unsigned>(0, NumSubscripts - 1)) {
180 LLVM_DEBUG(
dbgs().indent(2) <<
"No spacial reuse, different subscripts: "
182 << *
Other.getSubscript(SubNum) <<
"\n");
190 const SCEV *OtherLastSubscript =
Other.getLastSubscript();
194 if (Diff ==
nullptr) {
196 <<
"No spacial reuse, difference between subscript:\n\t"
197 << *LastSubscript <<
"\n\t" << OtherLastSubscript
198 <<
"\nis not constant.\n");
206 dbgs().indent(2) <<
"Found spacial reuse.\n";
211 return InSameCacheLine;
216 unsigned MaxDistance,
const Loop &L,
218 assert(IsValid &&
"Expecting a valid reference");
220 if (BasePointer !=
Other.getBasePointer() && !isAliased(
Other, AA)) {
222 <<
"No temporal reuse: different base pointer\n");
226 std::unique_ptr<Dependence>
D =
227 DI.
depends(&StoreOrLoadInst, &
Other.StoreOrLoadInst,
true);
230 LLVM_DEBUG(
dbgs().indent(2) <<
"No temporal reuse: no dependence\n");
234 if (
D->isLoopIndependent()) {
243 int Levels =
D->getLevels();
244 for (
int Level = 1; Level <= Levels; ++Level) {
245 const SCEV *Distance =
D->getDistance(Level);
246 const SCEVConstant *SCEVConst = dyn_cast_or_null<SCEVConstant>(Distance);
248 if (SCEVConst ==
nullptr) {
249 LLVM_DEBUG(
dbgs().indent(2) <<
"No temporal reuse: distance unknown\n");
254 if (Level != LoopDepth && !CI.
isZero()) {
256 <<
"No temporal reuse: distance is not zero at depth=" << Level
259 }
else if (Level == LoopDepth && CI.
getSExtValue() > MaxDistance) {
262 <<
"No temporal reuse: distance is greater than MaxDistance at depth="
273 unsigned CLS)
const {
274 assert(IsValid &&
"Expecting a valid reference");
276 dbgs().
indent(2) <<
"Computing cache cost for:\n";
281 if (isLoopInvariant(L)) {
282 LLVM_DEBUG(
dbgs().indent(4) <<
"Reference is loop invariant: RefCost=1\n");
287 assert(TripCount &&
"Expecting valid TripCount");
290 const SCEV *RefCost =
nullptr;
291 const SCEV *Stride =
nullptr;
292 if (isConsecutive(L, Stride, CLS)) {
295 assert(Stride !=
nullptr &&
296 "Stride should not be null for consecutive access!");
305 <<
"Access is consecutive: RefCost=(TripCount*Stride)/CLS="
306 << *RefCost <<
"\n");
317 int Index = getSubscriptIndex(L);
318 assert(
Index >= 0 &&
"Cound not locate a valid Index");
323 const SCEV *TripCount =
331 <<
"Access is not consecutive: RefCost=" << *RefCost <<
"\n");
333 assert(RefCost &&
"Expecting a valid RefCost");
336 if (
auto ConstantCost = dyn_cast<SCEVConstant>(RefCost))
337 return ConstantCost->getValue()->getSExtValue();
340 <<
"RefCost is not a constant! Setting to RefCost=InvalidCost "
341 "(invalid value).\n");
346 bool IndexedReference::tryDelinearizeFixedSize(
354 for (
auto Idx : seq<unsigned>(1, Subscripts.size()))
356 SE.
getConstant(Subscripts[Idx]->getType(), ArraySizes[Idx - 1]));
359 dbgs() <<
"Delinearized subscripts of fixed-size array\n"
366 bool IndexedReference::delinearize(
const LoopInfo &LI) {
367 assert(Subscripts.empty() &&
"Subscripts should be empty");
368 assert(Sizes.empty() &&
"Sizes should be empty");
369 assert(!IsValid &&
"Should be called once from the constructor");
370 LLVM_DEBUG(
dbgs() <<
"Delinearizing: " << StoreOrLoadInst <<
"\n");
376 const SCEV *AccessFn =
380 if (BasePointer ==
nullptr) {
383 <<
"ERROR: failed to delinearize, can't identify base pointer\n");
387 bool IsFixedSize =
false;
389 if (tryDelinearizeFixedSize(AccessFn, Subscripts)) {
392 Sizes.push_back(ElemSize);
394 <<
"', AccessFn: " << *AccessFn <<
"\n");
402 <<
"', AccessFn: " << *AccessFn <<
"\n");
407 if (Subscripts.empty() || Sizes.empty() ||
408 Subscripts.size() != Sizes.size()) {
413 <<
"ERROR: failed to delinearize reference\n");
424 const SCEVAddRecExpr *AccessFnAR = dyn_cast<SCEVAddRecExpr>(AccessFn);
433 Subscripts.push_back(Div);
434 Sizes.push_back(ElemSize);
437 return all_of(Subscripts, [&](
const SCEV *Subscript) {
438 return isSimpleAddRecurrence(*Subscript, *L);
445 bool IndexedReference::isLoopInvariant(
const Loop &L)
const {
447 assert(
Addr !=
nullptr &&
"Expecting either a load or a store instruction");
455 bool allCoeffForLoopAreZero =
all_of(Subscripts, [&](
const SCEV *Subscript) {
456 return isCoeffForLoopZeroOrInvariant(*Subscript, L);
459 return allCoeffForLoopAreZero;
462 bool IndexedReference::isConsecutive(
const Loop &L,
const SCEV *&Stride,
463 unsigned CLS)
const {
466 const SCEV *LastSubscript = Subscripts.back();
467 for (
const SCEV *Subscript : Subscripts) {
468 if (Subscript == LastSubscript)
470 if (!isCoeffForLoopZeroOrInvariant(*Subscript, L))
475 const SCEV *Coeff = getLastCoefficient();
476 const SCEV *ElemSize = Sizes.back();
496 int IndexedReference::getSubscriptIndex(
const Loop &L)
const {
499 if (AR && AR->
getLoop() == &L) {
506 const SCEV *IndexedReference::getLastCoefficient()
const {
508 auto *AR = cast<SCEVAddRecExpr>(LastSubscript);
512 bool IndexedReference::isCoeffForLoopZeroOrInvariant(
const SCEV &Subscript,
513 const Loop &L)
const {
515 return (AR !=
nullptr) ? AR->
getLoop() != &L
519 bool IndexedReference::isSimpleAddRecurrence(
const SCEV &Subscript,
520 const Loop &L)
const {
521 if (!isa<SCEVAddRecExpr>(Subscript))
550 for (
const auto &LC :
CC.LoopCosts) {
551 const Loop *L = LC.first;
552 OS <<
"Loop '" << L->
getName() <<
"' has cost = " << LC.second <<
"\n";
560 std::optional<unsigned> TRT)
562 TTI(
TTI), AA(AA), DI(DI) {
563 assert(!
Loops.empty() &&
"Expecting a non-empty loop vector.");
568 TripCounts.push_back({L, TripCount});
571 calculateCacheFootprint();
574 std::unique_ptr<CacheCost>
578 LLVM_DEBUG(
dbgs() <<
"Expecting the outermost loop in a loop nest\n");
586 LLVM_DEBUG(
dbgs() <<
"Cannot compute cache cost of loop nest with more "
587 "than one innermost loop\n");
591 return std::make_unique<CacheCost>(
Loops, AR.
LI, AR.
SE, AR.
TTI, AR.
AA, DI, TRT);
594 void CacheCost::calculateCacheFootprint() {
597 if (!populateReferenceGroups(RefGroups))
601 for (
const Loop *L : Loops) {
604 [L](
const LoopCacheCostTy &LCC) {
return LCC.first == L; }) &&
605 "Should not add duplicate element");
606 CacheCostTy LoopCost = computeLoopCacheCost(*L, RefGroups);
607 LoopCosts.push_back(std::make_pair(L, LoopCost));
615 assert(RefGroups.empty() &&
"Reference groups should be empty");
619 assert(InnerMostLoop !=
nullptr &&
"Expecting a valid innermost loop");
623 if (!isa<StoreInst>(
I) && !isa<LoadInst>(
I))
634 dbgs() <<
"References:\n";
651 std::optional<bool> HasTemporalReuse =
652 R->hasTemporalReuse(Representative, *TRT, *InnerMostLoop, DI, AA);
653 std::optional<bool> HasSpacialReuse =
654 R->hasSpacialReuse(Representative, CLS, AA);
656 if ((HasTemporalReuse && *HasTemporalReuse) ||
657 (HasSpacialReuse && *HasSpacialReuse)) {
672 if (RefGroups.empty())
676 dbgs() <<
"\nIDENTIFIED REFERENCE GROUPS:\n";
680 for (
const auto &
IR : RG)
691 CacheCost::computeLoopCacheCost(
const Loop &L,
697 <<
"' as innermost loop.\n");
701 for (
const auto &TC : TripCounts) {
704 TripCountsProduct *= TC.second;
709 CacheCostTy RefGroupCost = computeRefGroupCacheCost(RG, L);
710 LoopCost += RefGroupCost * TripCountsProduct;
714 <<
"' has cost=" << LoopCost <<
"\n");
720 const Loop &L)
const {
721 assert(!RG.empty() &&
"Reference group should have at least one member.");
A set of analyses that are preserved following a run of a transformation pass.
static cl::opt< unsigned > TemporalReuseThreshold("temporal-reuse-threshold", cl::init(2), cl::Hidden, cl::desc("Use this to specify the max. distance between array elements " "accessed in a loop so that the elements are classified to have " "temporal reuse"))
static bool isOneDimensionalArray(const SCEV &AccessFn, const SCEV &ElemSize, const Loop &L, ScalarEvolution &SE)
static MemoryLocation get(const LoadInst *LI)
Return a location with information about the memory reference by the given instruction.
const SCEV * getNegativeSCEV(const SCEV *V, SCEV::NoWrapFlags Flags=SCEV::FlagAnyWrap)
Return the SCEV object corresponding to -V.
This is an optimization pass for GlobalISel generic memory operations.
bool none_of(R &&Range, UnaryPredicate P)
Provide wrappers to std::none_of which take ranges instead of having to pass begin/end explicitly.
bool isAffine() const
Return true if this represents an expression A + B*x where A and B are loop invariant values.
const Function * getParent() const
Return the enclosing method, or null if none.
const SCEV * getStart() const
const SCEV * getSubscript(unsigned SubNum) const
Represents a single loop in the control flow graph.
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
const SCEV * getAddRecExpr(const SCEV *Start, const SCEV *Step, const Loop *L, SCEV::NoWrapFlags Flags)
Get an add recurrence expression for the specified loop.
The main scalar evolution driver.
const SCEV * getNoopOrAnyExtend(const SCEV *V, Type *Ty)
Return a SCEV corresponding to a conversion of the input value to the specified type.
The instances of the Type class are immutable: once they are created, they are never changed.
The adaptor from a function pass to a loop pass computes these analyses and makes them available to t...
static Loop * getInnerMostLoop(const LoopVectorTy &Loops)
Retrieve the innermost loop in the given loop nest Loops.
const SCEV * getUDivExactExpr(const SCEV *LHS, const SCEV *RHS)
Get a canonical unsigned division expression, or something simpler if possible.
Type * getWiderType(Type *Ty1, Type *Ty2) const
ConstantInt * getValue() const
const SCEV * getPointerBase(const SCEV *V)
Transitively follow the chain of pointer-type operands until reaching a SCEV that does not have a sin...
LLVM Basic Block Representation.
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
const SCEV * getMulExpr(SmallVectorImpl< const SCEV * > &Ops, SCEV::NoWrapFlags Flags=SCEV::FlagAnyWrap, unsigned Depth=0)
Get a canonical multiply expression, or something simpler if possible.
This is the shared class of boolean and integer constants.
LoopT * getParentLoop() const
Return the parent loop if it exists or nullptr for top level loops.
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
const SCEV * getTripCountFromExitCount(const SCEV *ExitCount, bool Extend=true)
Convert from an "exit count" (i.e.
static cl::opt< unsigned > DefaultTripCount("default-trip-count", cl::init(100), cl::Hidden, cl::desc("Use this to specify the default trip count of a loop"))
const SCEV * getUDivExpr(const SCEV *LHS, const SCEV *RHS)
Get a canonical unsigned division expression, or something simpler if possible.
size_t getNumSubscripts() const
iterator_range< bf_iterator< T > > breadth_first(const T &G)
PreservedAnalyses run(Loop &L, LoopAnalysisManager &AM, LoopStandardAnalysisResults &AR, LPMUpdater &U)
bool tryDelinearizeFixedSizeImpl(ScalarEvolution *SE, Instruction *Inst, const SCEV *AccessFn, SmallVectorImpl< const SCEV * > &Subscripts, SmallVectorImpl< int > &Sizes)
Implementation of fixed size array delinearization.
ArrayRef< BlockT * > getBlocks() const
Get a list of the basic blocks which make up this loop.
bool isKnownPredicate(ICmpInst::Predicate Pred, const SCEV *LHS, const SCEV *RHS)
Test if the given expression is known to satisfy the condition described by Pred, LHS,...
CacheCostTy computeRefCost(const Loop &L, unsigned CLS) const
Compute the cost of the reference w.r.t.
This class implements an extremely fast bulk output stream that can only output to a stream.
raw_ostream & operator<<(raw_ostream &OS, const APFixedPoint &FX)
Statically lint checks LLVM IR
IndexedReference(Instruction &StoreOrLoadInst, const LoopInfo &LI, ScalarEvolution &SE)
Construct an indexed reference given a StoreOrLoadInst instruction.
std::optional< bool > hasSpacialReuse(const IndexedReference &Other, unsigned CLS, AAResults &AA) const
Return true/false if the current object and the indexed reference Other are/aren't in the same cache ...
StringRef getName() const
const SCEV * getSCEV(Value *V)
Return a SCEV expression for the full generality of the specified expression.
unsigned getSmallConstantTripCount(const Loop *L)
Returns the exact trip count of the loop if we can compute it, and the result is a small constant.
bool isLoopSimplifyForm() const
Return true if the Loop is in the form that the LoopSimplify form transforms loops to,...
This class represents an analyzed expression in the program.
DependenceInfo - This class is the main dependence-analysis driver.
static std::unique_ptr< CacheCost > getCacheCost(Loop &Root, LoopStandardAnalysisResults &AR, DependenceInfo &DI, std::optional< unsigned > TRT=std::nullopt)
Create a CacheCost for the loop nest rooted by Root.
static const SCEV * computeTripCount(const Loop &L, const SCEV &ElemSize, ScalarEvolution &SE)
Compute the trip count for the given loop L or assume a default value if it is not a compile time con...
CacheCost(const LoopVectorTy &Loops, const LoopInfo &LI, ScalarEvolution &SE, TargetTransformInfo &TTI, AAResults &AA, DependenceInfo &DI, std::optional< unsigned > TRT=std::nullopt)
Construct a CacheCost object for the loop nest described by Loops.
const Value * getPointerOperand(const Value *V)
A helper function that returns the pointer operand of a load, store or GEP instruction.
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")
This class provides an interface for updating the loop pass manager based on mutations to the loop ne...
compiles ldr LCPI1_0 ldr ldr mov lsr tst moveq r1 ldr LCPI1_1 and r0 bx lr It would be better to do something like to fold the shift into the conditional move
LoopT * getLoopFor(const BlockT *BB) const
Return the inner most loop that BB lives in.
Represents a memory reference as a base pointer and a set of indexing operations.
initializer< Ty > init(const Ty &Val)
This class represents a constant integer value.
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
unsigned getLoopDepth() const
Return the nesting level of this loop.
const SCEV * getLastSubscript() const
const SCEV * getNoopOrSignExtend(const SCEV *V, Type *Ty)
Return a SCEV corresponding to a conversion of the input value to the specified type.
NoWrapFlags getNoWrapFlags(NoWrapFlags Mask=NoWrapMask) const
const SCEV * getConstant(ConstantInt *V)
@ ICMP_ULT
unsigned less than
bool isZero() const
This is just a convenience method to make client code smaller for a common code.
bool isOutermost() const
Return true if the loop does not have a parent (natural) loop.
const SCEV * getElementSize(Instruction *Inst)
Return the size of an element read or written by Inst.
void append_range(Container &C, Range &&R)
Wrapper function to append a range to a container.
int64_t getSExtValue() const
Return the constant as a 64-bit integer value after it has been sign extended as appropriate for the ...
bool isLoopInvariant(const SCEV *S, const Loop *L)
Return true if the value of the given SCEV is unchanging in the specified loop.
const Loop * getLoop() const
void delinearize(ScalarEvolution &SE, const SCEV *Expr, SmallVectorImpl< const SCEV * > &Subscripts, SmallVectorImpl< const SCEV * > &Sizes, const SCEV *ElementSize)
Split this SCEVAddRecExpr into two vectors of SCEVs representing the subscripts and sizes of an array...
const SCEV * getMinusSCEV(const SCEV *LHS, const SCEV *RHS, SCEV::NoWrapFlags Flags=SCEV::FlagAnyWrap, unsigned Depth=0)
Return LHS-RHS.
TargetTransformInfo & TTI
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
This node represents a polynomial recurrence on the trip count of the specified loop.
BlockT * getHeader() const
bool isSCEVable(Type *Ty) const
Test if values of the given type are analyzable within the SCEV framework.
bool is_sorted(R &&Range, Compare C)
Wrapper function around std::is_sorted to check if elements in a range R are sorted with respect to a...
std::unique_ptr< Dependence > depends(Instruction *Src, Instruction *Dst, bool PossiblyLoopIndependent)
depends - Tests for a dependence between the Src and Dst instructions.
std::optional< std::vector< StOtherPiece > > Other
raw_ostream & indent(unsigned NumSpaces)
indent - Insert 'NumSpaces' spaces.
const BasicBlock * getParent() const
CacheCost represents the estimated cost of a inner loop as the number of cache lines used by the memo...
bool isMustAlias(const MemoryLocation &LocA, const MemoryLocation &LocB)
A trivial helper function to check to see if the specified pointers are must-alias.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Type * getType() const
Return the LLVM type of this SCEV expression.
A container for analyses that lazily runs them and caches their results.
const Value * getLoadStorePointerOperand(const Value *V)
A helper function that returns the pointer operand of a load or store instruction.
Common register allocation spilling lr str ldr sxth r3 ldr mla r4 can lr mov lr str ldr sxth r3 mla r4 and then merge mul and lr str ldr sxth r3 mla r4 It also increase the likelihood the store may become dead bb27 Successors according to LLVM BB
The same transformation can work with an even modulo with the addition of a and shrink the compare RHS by the same amount Unless the target supports that transformation probably isn t worthwhile The transformation can also easily be made to work with non zero equality for n
const SCEV * getBackedgeTakenCount(const Loop *L, ExitCountKind Kind=Exact)
If the specified loop has a predictable backedge-taken count, return it, otherwise return a SCEVCould...
static constexpr CacheCostTy InvalidCost
bool isKnownNegative(const SCEV *S)
Test if the given expression is known to be negative.
const SCEV * getSCEVAtScope(const SCEV *S, const Loop *L)
Return a SCEV expression for the specified value at the specified scope in the program.
LLVM Value Representation.
const SCEV * getStepRecurrence(ScalarEvolution &SE) const
Constructs and returns the recurrence indicating how much this expression steps by.
std::optional< bool > hasTemporalReuse(const IndexedReference &Other, unsigned MaxDistance, const Loop &L, DependenceInfo &DI, AAResults &AA) const
Return true if the current object and the indexed reference Other have distance smaller than MaxDista...