Go to the documentation of this file.
43 #define DEBUG_TYPE "loop-cache-cost"
47 cl::desc(
"Use this to specify the default trip count of a loop"));
54 cl::desc(
"Use this to specify the max. distance between array elements "
55 "accessed in a loop so that the elements are classified to have "
63 assert(!
Loops.empty() &&
"Expecting a non-empy loop vector");
68 if (ParentLoop ==
nullptr) {
69 assert(
Loops.size() == 1 &&
"Expecting a single loop");
92 if (isa<SCEVAddRecExpr>(Start) || isa<SCEVAddRecExpr>(Step))
103 return StepRec == &ElemSize;
112 const SCEV *TripCount = (!isa<SCEVCouldNotCompute>(BackedgeTakenCount) &&
113 isa<SCEVConstant>(BackedgeTakenCount))
119 <<
" could not be computed, using DefaultTripCount\n");
131 OS << R.StoreOrLoadInst;
132 OS <<
", IsValid=false.";
136 OS << *R.BasePointer;
137 for (
const SCEV *Subscript : R.Subscripts)
138 OS <<
"[" << *Subscript <<
"]";
141 for (
const SCEV *Size : R.Sizes)
142 OS <<
"[" << *Size <<
"]";
149 : StoreOrLoadInst(StoreOrLoadInst), SE(SE) {
150 assert((isa<StoreInst>(StoreOrLoadInst) || isa<LoadInst>(StoreOrLoadInst)) &&
151 "Expecting a load or store instruction");
153 IsValid = delinearize(LI);
155 LLVM_DEBUG(
dbgs().indent(2) <<
"Succesfully delinearized: " << *
this
162 assert(IsValid &&
"Expecting a valid reference");
164 if (BasePointer !=
Other.getBasePointer() && !isAliased(
Other,
AA)) {
166 <<
"No spacial reuse: different base pointers\n");
171 if (NumSubscripts !=
Other.getNumSubscripts()) {
173 <<
"No spacial reuse: different number of subscripts\n");
178 for (
auto SubNum : seq<unsigned>(0, NumSubscripts - 1)) {
180 LLVM_DEBUG(
dbgs().indent(2) <<
"No spacial reuse, different subscripts: "
182 << *
Other.getSubscript(SubNum) <<
"\n");
190 const SCEV *OtherLastSubscript =
Other.getLastSubscript();
194 if (Diff ==
nullptr) {
196 <<
"No spacial reuse, difference between subscript:\n\t"
197 << *LastSubscript <<
"\n\t" << OtherLastSubscript
198 <<
"\nis not constant.\n");
206 dbgs().indent(2) <<
"Found spacial reuse.\n";
211 return InSameCacheLine;
215 unsigned MaxDistance,
219 assert(IsValid &&
"Expecting a valid reference");
221 if (BasePointer !=
Other.getBasePointer() && !isAliased(
Other,
AA)) {
223 <<
"No temporal reuse: different base pointer\n");
227 std::unique_ptr<Dependence>
D =
228 DI.
depends(&StoreOrLoadInst, &
Other.StoreOrLoadInst,
true);
231 LLVM_DEBUG(
dbgs().indent(2) <<
"No temporal reuse: no dependence\n");
235 if (
D->isLoopIndependent()) {
244 int Levels =
D->getLevels();
247 const SCEVConstant *SCEVConst = dyn_cast_or_null<SCEVConstant>(Distance);
249 if (SCEVConst ==
nullptr) {
250 LLVM_DEBUG(
dbgs().indent(2) <<
"No temporal reuse: distance unknown\n");
257 <<
"No temporal reuse: distance is not zero at depth=" <<
Level
263 <<
"No temporal reuse: distance is greater than MaxDistance at depth="
274 unsigned CLS)
const {
275 assert(IsValid &&
"Expecting a valid reference");
277 dbgs().
indent(2) <<
"Computing cache cost for:\n";
282 if (isLoopInvariant(L)) {
283 LLVM_DEBUG(
dbgs().indent(4) <<
"Reference is loop invariant: RefCost=1\n");
288 assert(TripCount &&
"Expecting valid TripCount");
291 const SCEV *RefCost =
nullptr;
292 if (isConsecutive(L, CLS)) {
295 const SCEV *Coeff = getLastCoefficient();
296 const SCEV *ElemSize = Sizes.back();
298 "Expecting the same type");
310 <<
"Access is consecutive: RefCost=(TripCount*Stride)/CLS="
311 << *RefCost <<
"\n");
322 int Index = getSubscriptIndex(L);
323 assert(Index >= 0 &&
"Cound not locate a valid Index");
328 const SCEV *TripCount =
336 <<
"Access is not consecutive: RefCost=" << *RefCost <<
"\n");
338 assert(RefCost &&
"Expecting a valid RefCost");
341 if (
auto ConstantCost = dyn_cast<SCEVConstant>(RefCost))
342 return ConstantCost->getValue()->getSExtValue();
345 <<
"RefCost is not a constant! Setting to RefCost=InvalidCost "
346 "(invalid value).\n");
351 bool IndexedReference::tryDelinearizeFixedSize(
359 auto *SrcGEP = dyn_cast<GetElementPtrInst>(SrcPtr);
368 if (SrcSizes.empty() || SrcSubscripts.size() <= 1) {
369 SrcSubscripts.
clear();
377 if (SrcBasePtr != SrcBase->
getValue()) {
378 SrcSubscripts.
clear();
382 assert(SrcSubscripts.size() == SrcSizes.size() + 1 &&
383 "Expected equal number of entries in the list of size and "
386 for (
auto Idx : seq<unsigned>(1, Subscripts.size()))
387 Sizes.push_back(SE->
getConstant(Subscripts[Idx]->getType(), SrcSizes[Idx - 1]));
390 dbgs() <<
"Delinearized subscripts of fixed-size array\n"
391 <<
"SrcGEP:" << *SrcGEP <<
"\n";
396 bool IndexedReference::delinearize(
const LoopInfo &LI) {
397 assert(Subscripts.empty() &&
"Subscripts should be empty");
398 assert(Sizes.empty() &&
"Sizes should be empty");
399 assert(!IsValid &&
"Should be called once from the constructor");
400 LLVM_DEBUG(
dbgs() <<
"Delinearizing: " << StoreOrLoadInst <<
"\n");
406 const SCEV *AccessFn =
410 if (BasePointer ==
nullptr) {
413 <<
"ERROR: failed to delinearize, can't identify base pointer\n");
417 bool IsFixedSize =
false;
419 if (tryDelinearizeFixedSize(&SE, &StoreOrLoadInst, AccessFn, Subscripts)) {
422 Sizes.push_back(ElemSize);
424 <<
"', AccessFn: " << *AccessFn <<
"\n");
432 <<
"', AccessFn: " << *AccessFn <<
"\n");
437 if (Subscripts.empty() || Sizes.empty() ||
438 Subscripts.size() != Sizes.size()) {
443 <<
"ERROR: failed to delinearize reference\n");
454 const SCEVAddRecExpr *AccessFnAR = dyn_cast<SCEVAddRecExpr>(AccessFn);
463 Subscripts.push_back(Div);
464 Sizes.push_back(ElemSize);
467 return all_of(Subscripts, [&](
const SCEV *Subscript) {
468 return isSimpleAddRecurrence(*Subscript, *L);
475 bool IndexedReference::isLoopInvariant(
const Loop &L)
const {
477 assert(
Addr !=
nullptr &&
"Expecting either a load or a store instruction");
485 bool allCoeffForLoopAreZero =
all_of(Subscripts, [&](
const SCEV *Subscript) {
486 return isCoeffForLoopZeroOrInvariant(*Subscript, L);
489 return allCoeffForLoopAreZero;
492 bool IndexedReference::isConsecutive(
const Loop &L,
unsigned CLS)
const {
495 const SCEV *LastSubscript = Subscripts.back();
496 for (
const SCEV *Subscript : Subscripts) {
497 if (Subscript == LastSubscript)
499 if (!isCoeffForLoopZeroOrInvariant(*Subscript, L))
504 const SCEV *Coeff = getLastCoefficient();
505 const SCEV *ElemSize = Sizes.back();
513 int IndexedReference::getSubscriptIndex(
const Loop &L)
const {
516 if (AR && AR->
getLoop() == &L) {
523 const SCEV *IndexedReference::getLastCoefficient()
const {
525 auto *AR = cast<SCEVAddRecExpr>(LastSubscript);
529 bool IndexedReference::isCoeffForLoopZeroOrInvariant(
const SCEV &Subscript,
530 const Loop &L)
const {
532 return (AR !=
nullptr) ? AR->
getLoop() != &L
536 bool IndexedReference::isSimpleAddRecurrence(
const SCEV &Subscript,
537 const Loop &L)
const {
538 if (!isa<SCEVAddRecExpr>(Subscript))
560 return AA.isMustAlias(Loc1, Loc2);
567 for (
const auto &LC : CC.LoopCosts) {
568 const Loop *L = LC.first;
569 OS <<
"Loop '" << L->
getName() <<
"' has cost = " << LC.second <<
"\n";
580 assert(!
Loops.empty() &&
"Expecting a non-empty loop vector.");
585 TripCounts.push_back({L, TripCount});
588 calculateCacheFootprint();
591 std::unique_ptr<CacheCost>
595 LLVM_DEBUG(
dbgs() <<
"Expecting the outermost loop in a loop nest\n");
603 LLVM_DEBUG(
dbgs() <<
"Cannot compute cache cost of loop nest with more "
604 "than one innermost loop\n");
608 return std::make_unique<CacheCost>(
Loops, AR.
LI, AR.
SE, AR.
TTI, AR.
AA, DI, TRT);
611 void CacheCost::calculateCacheFootprint() {
614 if (!populateReferenceGroups(RefGroups))
618 for (
const Loop *L : Loops) {
621 [L](
const LoopCacheCostTy &LCC) {
return LCC.first == L; }) &&
622 "Should not add duplicate element");
623 CacheCostTy LoopCost = computeLoopCacheCost(*L, RefGroups);
624 LoopCosts.push_back(std::make_pair(L, LoopCost));
632 assert(RefGroups.empty() &&
"Reference groups should be empty");
636 assert(InnerMostLoop !=
nullptr &&
"Expecting a valid innermost loop");
640 if (!isa<StoreInst>(
I) && !isa<LoadInst>(
I))
651 dbgs() <<
"References:\n";
669 R->hasTemporalReuse(Representative, *TRT, *InnerMostLoop, DI,
AA);
671 R->hasSpacialReuse(Representative, CLS,
AA);
673 if ((HasTemporalReuse.hasValue() && *HasTemporalReuse) ||
674 (HasSpacialReuse.
hasValue() && *HasSpacialReuse)) {
689 if (RefGroups.empty())
693 dbgs() <<
"\nIDENTIFIED REFERENCE GROUPS:\n";
697 for (
const auto &
IR : RG)
708 CacheCost::computeLoopCacheCost(
const Loop &L,
714 <<
"' as innermost loop.\n");
718 for (
const auto &TC : TripCounts) {
721 TripCountsProduct *= TC.second;
726 CacheCostTy RefGroupCost = computeRefGroupCacheCost(RG, L);
727 LoopCost += RefGroupCost * TripCountsProduct;
731 <<
"' has cost=" << LoopCost <<
"\n");
737 const Loop &L)
const {
738 assert(!
RG.empty() &&
"Reference group should have at least one member.");
A set of analyses that are preserved following a run of a transformation pass.
static cl::opt< unsigned > TemporalReuseThreshold("temporal-reuse-threshold", cl::init(2), cl::Hidden, cl::desc("Use this to specify the max. distance between array elements " "accessed in a loop so that the elements are classified to have " "temporal reuse"))
static bool isOneDimensionalArray(const SCEV &AccessFn, const SCEV &ElemSize, const Loop &L, ScalarEvolution &SE)
static MemoryLocation get(const LoadInst *LI)
Return a location with information about the memory reference by the given instruction.
const SCEV * getNegativeSCEV(const SCEV *V, SCEV::NoWrapFlags Flags=SCEV::FlagAnyWrap)
Return the SCEV object corresponding to -V.
This is an optimization pass for GlobalISel generic memory operations.
bool none_of(R &&Range, UnaryPredicate P)
Provide wrappers to std::none_of which take ranges instead of having to pass begin/end explicitly.
Optional< bool > hasSpacialReuse(const IndexedReference &Other, unsigned CLS, AAResults &AA) const
Return true/false if the current object and the indexed reference Other are/aren't in the same cache ...
bool isAffine() const
Return true if this represents an expression A + B*x where A and B are loop invariant values.
const Function * getParent() const
Return the enclosing method, or null if none.
const SCEV * getStart() const
const SCEV * getSubscript(unsigned SubNum) const
Represents a single loop in the control flow graph.
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
const SCEV * getAddRecExpr(const SCEV *Start, const SCEV *Step, const Loop *L, SCEV::NoWrapFlags Flags)
Get an add recurrence expression for the specified loop.
The main scalar evolution driver.
const SCEV * getNoopOrAnyExtend(const SCEV *V, Type *Ty)
Return a SCEV corresponding to a conversion of the input value to the specified type.
The instances of the Type class are immutable: once they are created, they are never changed.
The adaptor from a function pass to a loop pass computes these analyses and makes them available to t...
static Loop * getInnerMostLoop(const LoopVectorTy &Loops)
Retrieve the innermost loop in the given loop nest Loops.
const SCEV * getUDivExactExpr(const SCEV *LHS, const SCEV *RHS)
Get a canonical unsigned division expression, or something simpler if possible.
Type * getWiderType(Type *Ty1, Type *Ty2) const
ConstantInt * getValue() const
const SCEV * getPointerBase(const SCEV *V)
Transitively follow the chain of pointer-type operands until reaching a SCEV that does not have a sin...
LLVM Basic Block Representation.
constexpr bool hasValue() const
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
const SCEV * getMulExpr(SmallVectorImpl< const SCEV * > &Ops, SCEV::NoWrapFlags Flags=SCEV::FlagAnyWrap, unsigned Depth=0)
Get a canonical multiply expression, or something simpler if possible.
This is the shared class of boolean and integer constants.
LoopT * getParentLoop() const
Return the parent loop if it exists or nullptr for top level loops.
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
const SCEV * getTripCountFromExitCount(const SCEV *ExitCount, bool Extend=true)
Convert from an "exit count" (i.e.
static std::unique_ptr< CacheCost > getCacheCost(Loop &Root, LoopStandardAnalysisResults &AR, DependenceInfo &DI, Optional< unsigned > TRT=None)
Create a CacheCost for the loop nest rooted by Root.
static cl::opt< unsigned > DefaultTripCount("default-trip-count", cl::init(100), cl::Hidden, cl::desc("Use this to specify the default trip count of a loop"))
const SCEV * getUDivExpr(const SCEV *LHS, const SCEV *RHS)
Get a canonical unsigned division expression, or something simpler if possible.
size_t getNumSubscripts() const
iterator_range< bf_iterator< T > > breadth_first(const T &G)
PreservedAnalyses run(Loop &L, LoopAnalysisManager &AM, LoopStandardAnalysisResults &AR, LPMUpdater &U)
ArrayRef< BlockT * > getBlocks() const
Get a list of the basic blocks which make up this loop.
bool isKnownPredicate(ICmpInst::Predicate Pred, const SCEV *LHS, const SCEV *RHS)
Test if the given expression is known to satisfy the condition described by Pred, LHS,...
CacheCostTy computeRefCost(const Loop &L, unsigned CLS) const
Compute the cost of the reference w.r.t.
This class implements an extremely fast bulk output stream that can only output to a stream.
Optional< bool > hasTemporalReuse(const IndexedReference &Other, unsigned MaxDistance, const Loop &L, DependenceInfo &DI, AAResults &AA) const
Return true if the current object and the indexed reference Other have distance smaller than MaxDista...
raw_ostream & operator<<(raw_ostream &OS, const APFixedPoint &FX)
Statically lint checks LLVM IR
IndexedReference(Instruction &StoreOrLoadInst, const LoopInfo &LI, ScalarEvolution &SE)
Construct an indexed reference given a StoreOrLoadInst instruction.
StringRef getName() const
const SCEV * getSCEV(Value *V)
Return a SCEV expression for the full generality of the specified expression.
unsigned getSmallConstantTripCount(const Loop *L)
Returns the exact trip count of the loop if we can compute it, and the result is a small constant.
bool isLoopSimplifyForm() const
Return true if the Loop is in the form that the LoopSimplify form transforms loops to,...
This class represents an analyzed expression in the program.
DependenceInfo - This class is the main dependence-analysis driver.
static const SCEV * computeTripCount(const Loop &L, const SCEV &ElemSize, ScalarEvolution &SE)
Compute the trip count for the given loop L or assume a default value if it is not a compile time con...
const Value * getPointerOperand(const Value *V)
A helper function that returns the pointer operand of a load, store or GEP instruction.
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")
This class provides an interface for updating the loop pass manager based on mutations to the loop ne...
compiles ldr LCPI1_0 ldr ldr mov lsr tst moveq r1 ldr LCPI1_1 and r0 bx lr It would be better to do something like to fold the shift into the conditional move
LoopT * getLoopFor(const BlockT *BB) const
Return the inner most loop that BB lives in.
Represents a memory reference as a base pointer and a set of indexing operations.
initializer< Ty > init(const Ty &Val)
This class represents a constant integer value.
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
unsigned getLoopDepth() const
Return the nesting level of this loop.
const SCEV * getLastSubscript() const
NoWrapFlags getNoWrapFlags(NoWrapFlags Mask=NoWrapMask) const
const SCEV * getConstant(ConstantInt *V)
@ ICMP_ULT
unsigned less than
bool isZero() const
This is just a convenience method to make client code smaller for a common code.
bool isOutermost() const
Return true if the loop does not have a parent (natural) loop.
const SCEV * getElementSize(Instruction *Inst)
Return the size of an element read or written by Inst.
void append_range(Container &C, Range &&R)
Wrapper function to append a range to a container.
bool getIndexExpressionsFromGEP(ScalarEvolution &SE, const GetElementPtrInst *GEP, SmallVectorImpl< const SCEV * > &Subscripts, SmallVectorImpl< int > &Sizes)
Gathers the individual index expressions from a GEP instruction.
int64_t getSExtValue() const
Return the constant as a 64-bit integer value after it has been sign extended as appropriate for the ...
const Value * stripPointerCasts() const
Strip off pointer casts, all-zero GEPs and address space casts.
bool isLoopInvariant(const SCEV *S, const Loop *L)
Return true if the value of the given SCEV is unchanging in the specified loop.
const Loop * getLoop() const
void delinearize(ScalarEvolution &SE, const SCEV *Expr, SmallVectorImpl< const SCEV * > &Subscripts, SmallVectorImpl< const SCEV * > &Sizes, const SCEV *ElementSize)
Split this SCEVAddRecExpr into two vectors of SCEVs representing the subscripts and sizes of an array...
const SCEV * getMinusSCEV(const SCEV *LHS, const SCEV *RHS, SCEV::NoWrapFlags Flags=SCEV::FlagAnyWrap, unsigned Depth=0)
Return LHS-RHS.
TargetTransformInfo & TTI
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
This node represents a polynomial recurrence on the trip count of the specified loop.
This means that we are dealing with an entirely unknown SCEV value, and only represent it as its LLVM...
BlockT * getHeader() const
bool isSCEVable(Type *Ty) const
Test if values of the given type are analyzable within the SCEV framework.
bool is_sorted(R &&Range, Compare C)
Wrapper function around std::is_sorted to check if elements in a range R are sorted with respect to a...
std::unique_ptr< Dependence > depends(Instruction *Src, Instruction *Dst, bool PossiblyLoopIndependent)
depends - Tests for a dependence between the Src and Dst instructions.
raw_ostream & indent(unsigned NumSpaces)
indent - Insert 'NumSpaces' spaces.
const BasicBlock * getParent() const
CacheCost represents the estimated cost of a inner loop as the number of cache lines used by the memo...
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Type * getType() const
Return the LLVM type of this SCEV expression.
A container for analyses that lazily runs them and caches their results.
const Value * getLoadStorePointerOperand(const Value *V)
A helper function that returns the pointer operand of a load or store instruction.
Common register allocation spilling lr str ldr sxth r3 ldr mla r4 can lr mov lr str ldr sxth r3 mla r4 and then merge mul and lr str ldr sxth r3 mla r4 It also increase the likelihood the store may become dead bb27 Successors according to LLVM BB
The same transformation can work with an even modulo with the addition of a and shrink the compare RHS by the same amount Unless the target supports that transformation probably isn t worthwhile The transformation can also easily be made to work with non zero equality for n
const SCEV * getBackedgeTakenCount(const Loop *L, ExitCountKind Kind=Exact)
If the specified loop has a predictable backedge-taken count, return it, otherwise return a SCEVCould...
static constexpr CacheCostTy InvalidCost
bool isKnownNegative(const SCEV *S)
Test if the given expression is known to be negative.
const SCEV * getSCEVAtScope(const SCEV *S, const Loop *L)
Return a SCEV expression for the specified value at the specified scope in the program.
CacheCost(const LoopVectorTy &Loops, const LoopInfo &LI, ScalarEvolution &SE, TargetTransformInfo &TTI, AAResults &AA, DependenceInfo &DI, Optional< unsigned > TRT=None)
Construct a CacheCost object for the loop nest described by Loops.
LLVM Value Representation.
const SCEV * getStepRecurrence(ScalarEvolution &SE) const
Constructs and returns the recurrence indicating how much this expression steps by.
Optional< std::vector< StOtherPiece > > Other