LLVM 22.0.0git
LoopCacheAnalysis.cpp
Go to the documentation of this file.
1//===- LoopCacheAnalysis.cpp - Loop Cache Analysis -------------------------==//
2//
3// The LLVM Compiler Infrastructure
4//
5// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
6// See https://llvm.org/LICENSE.txt for license information.
7// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
8//
9//===----------------------------------------------------------------------===//
10///
11/// \file
12/// This file defines the implementation for the loop cache analysis.
13/// The implementation is largely based on the following paper:
14///
15/// Compiler Optimizations for Improving Data Locality
16/// By: Steve Carr, Katherine S. McKinley, Chau-Wen Tseng
17/// http://www.cs.utexas.edu/users/mckinley/papers/asplos-1994.pdf
18///
19/// The general approach taken to estimate the number of cache lines used by the
20/// memory references in an inner loop is:
21/// 1. Partition memory references that exhibit temporal or spacial reuse
22/// into reference groups.
23/// 2. For each loop L in the a loop nest LN:
24/// a. Compute the cost of the reference group
25/// b. Compute the loop cost by summing up the reference groups costs
26//===----------------------------------------------------------------------===//
27
30#include "llvm/ADT/Sequence.h"
39#include "llvm/Support/Debug.h"
40
41using namespace llvm;
42
43#define DEBUG_TYPE "loop-cache-cost"
44
46 "default-trip-count", cl::init(100), cl::Hidden,
47 cl::desc("Use this to specify the default trip count of a loop"));
48
49// In this analysis two array references are considered to exhibit temporal
50// reuse if they access either the same memory location, or a memory location
51// with distance smaller than a configurable threshold.
53 "temporal-reuse-threshold", cl::init(2), cl::Hidden,
54 cl::desc("Use this to specify the max. distance between array elements "
55 "accessed in a loop so that the elements are classified to have "
56 "temporal reuse"));
57
58/// Retrieve the innermost loop in the given loop nest \p Loops. It returns a
59/// nullptr if any loops in the loop vector supplied has more than one sibling.
60/// The loop vector is expected to contain loops collected in breadth-first
61/// order.
63 assert(!Loops.empty() && "Expecting a non-empy loop vector");
64
65 Loop *LastLoop = Loops.back();
66 Loop *ParentLoop = LastLoop->getParentLoop();
67
68 if (ParentLoop == nullptr) {
69 assert(Loops.size() == 1 && "Expecting a single loop");
70 return LastLoop;
71 }
72
73 return (llvm::is_sorted(Loops,
74 [](const Loop *L1, const Loop *L2) {
75 return L1->getLoopDepth() < L2->getLoopDepth();
76 }))
77 ? LastLoop
78 : nullptr;
79}
80
81static bool isOneDimensionalArray(const SCEV &AccessFn, const SCEV &ElemSize,
82 const Loop &L, ScalarEvolution &SE) {
83 const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(&AccessFn);
84 if (!AR || !AR->isAffine())
85 return false;
86
87 assert(AR->getLoop() && "AR should have a loop");
88
89 // Check that start and increment are not add recurrences.
90 const SCEV *Start = AR->getStart();
91 const SCEV *Step = AR->getStepRecurrence(SE);
92 if (isa<SCEVAddRecExpr>(Start) || isa<SCEVAddRecExpr>(Step))
93 return false;
94
95 // Check that start and increment are both invariant in the loop.
96 if (!SE.isLoopInvariant(Start, &L) || !SE.isLoopInvariant(Step, &L))
97 return false;
98
99 const SCEV *StepRec = AR->getStepRecurrence(SE);
100 if (StepRec && SE.isKnownNegative(StepRec))
101 StepRec = SE.getNegativeSCEV(StepRec);
102
103 return StepRec == &ElemSize;
104}
105
106/// Compute the trip count for the given loop \p L or assume a default value if
107/// it is not a compile time constant. Return the SCEV expression for the trip
108/// count.
109static const SCEV *computeTripCount(const Loop &L, const SCEV &ElemSize,
110 ScalarEvolution &SE) {
111 const SCEV *BackedgeTakenCount = SE.getBackedgeTakenCount(&L);
112 const SCEV *TripCount = (!isa<SCEVCouldNotCompute>(BackedgeTakenCount) &&
113 isa<SCEVConstant>(BackedgeTakenCount))
114 ? SE.getTripCountFromExitCount(BackedgeTakenCount)
115 : nullptr;
116
117 if (!TripCount) {
118 LLVM_DEBUG(dbgs() << "Trip count of loop " << L.getName()
119 << " could not be computed, using DefaultTripCount\n");
120 TripCount = SE.getConstant(ElemSize.getType(), DefaultTripCount);
121 }
122
123 return TripCount;
124}
125
126//===----------------------------------------------------------------------===//
127// IndexedReference implementation
128//
130 if (!R.IsValid) {
131 OS << R.StoreOrLoadInst;
132 OS << ", IsValid=false.";
133 return OS;
134 }
135
136 OS << *R.BasePointer;
137 for (const SCEV *Subscript : R.Subscripts)
138 OS << "[" << *Subscript << "]";
139
140 OS << ", Sizes: ";
141 for (const SCEV *Size : R.Sizes)
142 OS << "[" << *Size << "]";
143
144 return OS;
145}
146
148 const LoopInfo &LI, ScalarEvolution &SE)
149 : StoreOrLoadInst(StoreOrLoadInst), SE(SE) {
150 assert((isa<StoreInst>(StoreOrLoadInst) || isa<LoadInst>(StoreOrLoadInst)) &&
151 "Expecting a load or store instruction");
152
153 IsValid = delinearize(LI);
154 if (IsValid)
155 LLVM_DEBUG(dbgs().indent(2) << "Succesfully delinearized: " << *this
156 << "\n");
157}
158
159std::optional<bool>
161 AAResults &AA) const {
162 assert(IsValid && "Expecting a valid reference");
163
164 if (BasePointer != Other.getBasePointer() && !isAliased(Other, AA)) {
166 << "No spacial reuse: different base pointers\n");
167 return false;
168 }
169
170 unsigned NumSubscripts = getNumSubscripts();
171 if (NumSubscripts != Other.getNumSubscripts()) {
173 << "No spacial reuse: different number of subscripts\n");
174 return false;
175 }
176
177 // all subscripts must be equal, except the leftmost one (the last one).
178 for (auto SubNum : seq<unsigned>(0, NumSubscripts - 1)) {
179 if (getSubscript(SubNum) != Other.getSubscript(SubNum)) {
180 LLVM_DEBUG(dbgs().indent(2) << "No spacial reuse, different subscripts: "
181 << "\n\t" << *getSubscript(SubNum) << "\n\t"
182 << *Other.getSubscript(SubNum) << "\n");
183 return false;
184 }
185 }
186
187 // the difference between the last subscripts must be less than the cache line
188 // size.
189 const SCEV *LastSubscript = getLastSubscript();
190 const SCEV *OtherLastSubscript = Other.getLastSubscript();
192 SE.getMinusSCEV(LastSubscript, OtherLastSubscript));
193
194 if (Diff == nullptr) {
196 << "No spacial reuse, difference between subscript:\n\t"
197 << *LastSubscript << "\n\t" << OtherLastSubscript
198 << "\nis not constant.\n");
199 return std::nullopt;
200 }
201
202 bool InSameCacheLine = (Diff->getValue()->getSExtValue() < CLS);
203
204 LLVM_DEBUG({
205 if (InSameCacheLine)
206 dbgs().indent(2) << "Found spacial reuse.\n";
207 else
208 dbgs().indent(2) << "No spacial reuse.\n";
209 });
210
211 return InSameCacheLine;
212}
213
214std::optional<bool>
216 unsigned MaxDistance, const Loop &L,
217 DependenceInfo &DI, AAResults &AA) const {
218 assert(IsValid && "Expecting a valid reference");
219
220 if (BasePointer != Other.getBasePointer() && !isAliased(Other, AA)) {
222 << "No temporal reuse: different base pointer\n");
223 return false;
224 }
225
226 std::unique_ptr<Dependence> D =
227 DI.depends(&StoreOrLoadInst, &Other.StoreOrLoadInst);
228
229 if (D == nullptr) {
230 LLVM_DEBUG(dbgs().indent(2) << "No temporal reuse: no dependence\n");
231 return false;
232 }
233
234 if (D->isLoopIndependent()) {
235 LLVM_DEBUG(dbgs().indent(2) << "Found temporal reuse\n");
236 return true;
237 }
238
239 // Check the dependence distance at every loop level. There is temporal reuse
240 // if the distance at the given loop's depth is small (|d| <= MaxDistance) and
241 // it is zero at every other loop level.
242 int LoopDepth = L.getLoopDepth();
243 int Levels = D->getLevels();
244 for (int Level = 1; Level <= Levels; ++Level) {
245 const SCEV *Distance = D->getDistance(Level);
246 const SCEVConstant *SCEVConst = dyn_cast_or_null<SCEVConstant>(Distance);
247
248 if (SCEVConst == nullptr) {
249 LLVM_DEBUG(dbgs().indent(2) << "No temporal reuse: distance unknown\n");
250 return std::nullopt;
251 }
252
253 const ConstantInt &CI = *SCEVConst->getValue();
254 if (Level != LoopDepth && !CI.isZero()) {
256 << "No temporal reuse: distance is not zero at depth=" << Level
257 << "\n");
258 return false;
259 } else if (Level == LoopDepth && CI.getSExtValue() > MaxDistance) {
261 dbgs().indent(2)
262 << "No temporal reuse: distance is greater than MaxDistance at depth="
263 << Level << "\n");
264 return false;
265 }
266 }
267
268 LLVM_DEBUG(dbgs().indent(2) << "Found temporal reuse\n");
269 return true;
270}
271
273 unsigned CLS) const {
274 assert(IsValid && "Expecting a valid reference");
275 LLVM_DEBUG({
276 dbgs().indent(2) << "Computing cache cost for:\n";
277 dbgs().indent(4) << *this << "\n";
278 });
279
280 // If the indexed reference is loop invariant the cost is one.
281 if (isLoopInvariant(L)) {
282 LLVM_DEBUG(dbgs().indent(4) << "Reference is loop invariant: RefCost=1\n");
283 return 1;
284 }
285
286 const SCEV *TripCount = computeTripCount(L, *Sizes.back(), SE);
287 assert(TripCount && "Expecting valid TripCount");
288 LLVM_DEBUG(dbgs() << "TripCount=" << *TripCount << "\n");
289
290 const SCEV *RefCost = nullptr;
291 const SCEV *Stride = nullptr;
292 if (isConsecutive(L, Stride, CLS)) {
293 // If the indexed reference is 'consecutive' the cost is
294 // (TripCount*Stride)/CLS.
295 assert(Stride != nullptr &&
296 "Stride should not be null for consecutive access!");
297 Type *WiderType = SE.getWiderType(Stride->getType(), TripCount->getType());
298 const SCEV *CacheLineSize = SE.getConstant(WiderType, CLS);
299 Stride = SE.getNoopOrAnyExtend(Stride, WiderType);
300 TripCount = SE.getNoopOrZeroExtend(TripCount, WiderType);
301 const SCEV *Numerator = SE.getMulExpr(Stride, TripCount);
302 // Round the fractional cost up to the nearest integer number.
303 // The impact is the most significant when cost is calculated
304 // to be a number less than one, because it makes more sense
305 // to say one cache line is used rather than zero cache line
306 // is used.
307 RefCost = SE.getUDivCeilSCEV(Numerator, CacheLineSize);
308
310 << "Access is consecutive: RefCost=(TripCount*Stride)/CLS="
311 << *RefCost << "\n");
312 } else {
313 // If the indexed reference is not 'consecutive' the cost is proportional to
314 // the trip count and the depth of the dimension which the subject loop
315 // subscript is accessing. We try to estimate this by multiplying the cost
316 // by the trip counts of loops corresponding to the inner dimensions. For
317 // example, given the indexed reference 'A[i][j][k]', and assuming the
318 // i-loop is in the innermost position, the cost would be equal to the
319 // iterations of the i-loop multiplied by iterations of the j-loop.
320 RefCost = TripCount;
321
322 int Index = getSubscriptIndex(L);
323 assert(Index >= 0 && "Could not locate a valid Index");
324
325 for (unsigned I = Index + 1; I < getNumSubscripts() - 1; ++I) {
327 assert(AR && AR->getLoop() && "Expecting valid loop");
328 const SCEV *TripCount =
329 computeTripCount(*AR->getLoop(), *Sizes.back(), SE);
330 Type *WiderType = SE.getWiderType(RefCost->getType(), TripCount->getType());
331 // For the multiplication result to fit, request a type twice as wide.
332 WiderType = WiderType->getExtendedType();
333 RefCost = SE.getMulExpr(SE.getNoopOrZeroExtend(RefCost, WiderType),
334 SE.getNoopOrZeroExtend(TripCount, WiderType));
335 }
336
338 << "Access is not consecutive: RefCost=" << *RefCost << "\n");
339 }
340 assert(RefCost && "Expecting a valid RefCost");
341
342 // Attempt to fold RefCost into a constant.
343 // CacheCostTy is a signed integer, but the tripcount value can be large
344 // and may not fit, so saturate/limit the value to the maximum signed
345 // integer value.
346 if (auto ConstantCost = dyn_cast<SCEVConstant>(RefCost))
347 return ConstantCost->getValue()->getLimitedValue(
348 std::numeric_limits<int64_t>::max());
349
351 << "RefCost is not a constant! Setting to RefCost=InvalidCost "
352 "(invalid value).\n");
353
355}
356
357bool IndexedReference::tryDelinearizeFixedSize(
358 const SCEV *AccessFn, SmallVectorImpl<const SCEV *> &Subscripts) {
359 SmallVector<int, 4> ArraySizes;
360 if (!tryDelinearizeFixedSizeImpl(&SE, &StoreOrLoadInst, AccessFn, Subscripts,
361 ArraySizes))
362 return false;
363
364 // Populate Sizes with scev expressions to be used in calculations later.
365 for (auto Idx : seq<unsigned>(1, Subscripts.size()))
366 Sizes.push_back(
367 SE.getConstant(Subscripts[Idx]->getType(), ArraySizes[Idx - 1]));
368
369 LLVM_DEBUG({
370 dbgs() << "Delinearized subscripts of fixed-size array\n"
371 << "GEP:" << *getLoadStorePointerOperand(&StoreOrLoadInst)
372 << "\n";
373 });
374 return true;
375}
376
377bool IndexedReference::delinearize(const LoopInfo &LI) {
378 assert(Subscripts.empty() && "Subscripts should be empty");
379 assert(Sizes.empty() && "Sizes should be empty");
380 assert(!IsValid && "Should be called once from the constructor");
381 LLVM_DEBUG(dbgs() << "Delinearizing: " << StoreOrLoadInst << "\n");
382
383 const SCEV *ElemSize = SE.getElementSize(&StoreOrLoadInst);
384 const BasicBlock *BB = StoreOrLoadInst.getParent();
385
386 if (Loop *L = LI.getLoopFor(BB)) {
387 const SCEV *AccessFn =
388 SE.getSCEVAtScope(getPointerOperand(&StoreOrLoadInst), L);
389
390 BasePointer = dyn_cast<SCEVUnknown>(SE.getPointerBase(AccessFn));
391 if (BasePointer == nullptr) {
393 dbgs().indent(2)
394 << "ERROR: failed to delinearize, can't identify base pointer\n");
395 return false;
396 }
397
398 bool IsFixedSize = false;
399 // Try to delinearize fixed-size arrays.
400 if (tryDelinearizeFixedSize(AccessFn, Subscripts)) {
401 IsFixedSize = true;
402 // The last element of Sizes is the element size.
403 Sizes.push_back(ElemSize);
404 LLVM_DEBUG(dbgs().indent(2) << "In Loop '" << L->getName()
405 << "', AccessFn: " << *AccessFn << "\n");
406 }
407
408 AccessFn = SE.getMinusSCEV(AccessFn, BasePointer);
409
410 // Try to delinearize parametric-size arrays.
411 if (!IsFixedSize) {
412 LLVM_DEBUG(dbgs().indent(2) << "In Loop '" << L->getName()
413 << "', AccessFn: " << *AccessFn << "\n");
414 llvm::delinearize(SE, AccessFn, Subscripts, Sizes,
415 SE.getElementSize(&StoreOrLoadInst));
416 }
417
418 if (Subscripts.empty() || Sizes.empty() ||
419 Subscripts.size() != Sizes.size()) {
420 // Attempt to determine whether we have a single dimensional array access.
421 // before giving up.
422 if (!isOneDimensionalArray(*AccessFn, *ElemSize, *L, SE)) {
423 LLVM_DEBUG(dbgs().indent(2)
424 << "ERROR: failed to delinearize reference\n");
425 Subscripts.clear();
426 Sizes.clear();
427 return false;
428 }
429
430 // The array may be accessed in reverse, for example:
431 // for (i = N; i > 0; i--)
432 // A[i] = 0;
433 // In this case, reconstruct the access function using the absolute value
434 // of the step recurrence.
435 const SCEVAddRecExpr *AccessFnAR = dyn_cast<SCEVAddRecExpr>(AccessFn);
436 const SCEV *StepRec = AccessFnAR ? AccessFnAR->getStepRecurrence(SE) : nullptr;
437
438 if (StepRec && SE.isKnownNegative(StepRec))
439 AccessFn = SE.getAddRecExpr(
440 AccessFnAR->getStart(), SE.getNegativeSCEV(StepRec),
442 const SCEV *Div = SE.getUDivExactExpr(AccessFn, ElemSize);
443 Subscripts.push_back(Div);
444 Sizes.push_back(ElemSize);
445 }
446
447 return all_of(Subscripts, [&](const SCEV *Subscript) {
448 return isSimpleAddRecurrence(*Subscript, *L);
449 });
450 }
451
452 return false;
453}
454
455bool IndexedReference::isLoopInvariant(const Loop &L) const {
456 Value *Addr = getPointerOperand(&StoreOrLoadInst);
457 assert(Addr != nullptr && "Expecting either a load or a store instruction");
458 assert(SE.isSCEVable(Addr->getType()) && "Addr should be SCEVable");
459
460 if (SE.isLoopInvariant(SE.getSCEV(Addr), &L))
461 return true;
462
463 // The indexed reference is loop invariant if none of the coefficients use
464 // the loop induction variable.
465 bool allCoeffForLoopAreZero = all_of(Subscripts, [&](const SCEV *Subscript) {
466 return isCoeffForLoopZeroOrInvariant(*Subscript, L);
467 });
468
469 return allCoeffForLoopAreZero;
470}
471
472bool IndexedReference::isConsecutive(const Loop &L, const SCEV *&Stride,
473 unsigned CLS) const {
474 // The indexed reference is 'consecutive' if the only coefficient that uses
475 // the loop induction variable is the last one...
476 const SCEV *LastSubscript = Subscripts.back();
477 for (const SCEV *Subscript : Subscripts) {
478 if (Subscript == LastSubscript)
479 continue;
480 if (!isCoeffForLoopZeroOrInvariant(*Subscript, L))
481 return false;
482 }
483
484 // ...and the access stride is less than the cache line size.
485 const SCEV *Coeff = getLastCoefficient();
486 const SCEV *ElemSize = Sizes.back();
487 Type *WiderType = SE.getWiderType(Coeff->getType(), ElemSize->getType());
488 // FIXME: This assumes that all values are signed integers which may
489 // be incorrect in unusual codes and incorrectly use sext instead of zext.
490 // for (uint32_t i = 0; i < 512; ++i) {
491 // uint8_t trunc = i;
492 // A[trunc] = 42;
493 // }
494 // This consecutively iterates twice over A. If `trunc` is sign-extended,
495 // we would conclude that this may iterate backwards over the array.
496 // However, LoopCacheAnalysis is heuristic anyway and transformations must
497 // not result in wrong optimizations if the heuristic was incorrect.
498 Stride = SE.getMulExpr(SE.getNoopOrSignExtend(Coeff, WiderType),
499 SE.getNoopOrSignExtend(ElemSize, WiderType));
500 const SCEV *CacheLineSize = SE.getConstant(Stride->getType(), CLS);
501
502 Stride = SE.isKnownNegative(Stride) ? SE.getNegativeSCEV(Stride) : Stride;
503 return SE.isKnownPredicate(ICmpInst::ICMP_ULT, Stride, CacheLineSize);
504}
505
506int IndexedReference::getSubscriptIndex(const Loop &L) const {
507 for (auto Idx : seq<int>(0, getNumSubscripts())) {
508 const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(getSubscript(Idx));
509 if (AR && AR->getLoop() == &L) {
510 return Idx;
511 }
512 }
513 return -1;
514}
515
516const SCEV *IndexedReference::getLastCoefficient() const {
517 const SCEV *LastSubscript = getLastSubscript();
518 auto *AR = cast<SCEVAddRecExpr>(LastSubscript);
519 return AR->getStepRecurrence(SE);
520}
521
522bool IndexedReference::isCoeffForLoopZeroOrInvariant(const SCEV &Subscript,
523 const Loop &L) const {
524 const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(&Subscript);
525 return (AR != nullptr) ? AR->getLoop() != &L
526 : SE.isLoopInvariant(&Subscript, &L);
527}
528
529bool IndexedReference::isSimpleAddRecurrence(const SCEV &Subscript,
530 const Loop &L) const {
531 if (!isa<SCEVAddRecExpr>(Subscript))
532 return false;
533
534 const SCEVAddRecExpr *AR = cast<SCEVAddRecExpr>(&Subscript);
535 assert(AR->getLoop() && "AR should have a loop");
536
537 if (!AR->isAffine())
538 return false;
539
540 const SCEV *Start = AR->getStart();
541 const SCEV *Step = AR->getStepRecurrence(SE);
542
543 if (!SE.isLoopInvariant(Start, &L) || !SE.isLoopInvariant(Step, &L))
544 return false;
545
546 return true;
547}
548
549bool IndexedReference::isAliased(const IndexedReference &Other,
550 AAResults &AA) const {
551 const auto &Loc1 = MemoryLocation::get(&StoreOrLoadInst);
552 const auto &Loc2 = MemoryLocation::get(&Other.StoreOrLoadInst);
553 return AA.isMustAlias(Loc1, Loc2);
554}
555
556//===----------------------------------------------------------------------===//
557// CacheCost implementation
558//
560 for (const auto &LC : CC.LoopCosts) {
561 const Loop *L = LC.first;
562 OS << "Loop '" << L->getName() << "' has cost = " << LC.second << "\n";
563 }
564 return OS;
565}
566
569 AAResults &AA, DependenceInfo &DI,
570 std::optional<unsigned> TRT)
571 : Loops(Loops), TRT(TRT.value_or(TemporalReuseThreshold)), LI(LI), SE(SE),
572 TTI(TTI), AA(AA), DI(DI) {
573 assert(!Loops.empty() && "Expecting a non-empty loop vector.");
574
575 for (const Loop *L : Loops) {
576 unsigned TripCount = SE.getSmallConstantTripCount(L);
577 TripCount = (TripCount == 0) ? DefaultTripCount : TripCount;
578 TripCounts.push_back({L, TripCount});
579 }
580
581 calculateCacheFootprint();
582}
583
584std::unique_ptr<CacheCost>
586 DependenceInfo &DI, std::optional<unsigned> TRT) {
587 if (!Root.isOutermost()) {
588 LLVM_DEBUG(dbgs() << "Expecting the outermost loop in a loop nest\n");
589 return nullptr;
590 }
591
592 LoopVectorTy Loops;
593 append_range(Loops, breadth_first(&Root));
594
595 if (!getInnerMostLoop(Loops)) {
596 LLVM_DEBUG(dbgs() << "Cannot compute cache cost of loop nest with more "
597 "than one innermost loop\n");
598 return nullptr;
599 }
600
601 return std::make_unique<CacheCost>(Loops, AR.LI, AR.SE, AR.TTI, AR.AA, DI, TRT);
602}
603
604void CacheCost::calculateCacheFootprint() {
605 LLVM_DEBUG(dbgs() << "POPULATING REFERENCE GROUPS\n");
606 ReferenceGroupsTy RefGroups;
607 if (!populateReferenceGroups(RefGroups))
608 return;
609
610 LLVM_DEBUG(dbgs() << "COMPUTING LOOP CACHE COSTS\n");
611 for (const Loop *L : Loops) {
613 LoopCosts,
614 [L](const LoopCacheCostTy &LCC) { return LCC.first == L; }) &&
615 "Should not add duplicate element");
616 CacheCostTy LoopCost = computeLoopCacheCost(*L, RefGroups);
617 LoopCosts.push_back(std::make_pair(L, LoopCost));
618 }
619
620 sortLoopCosts();
621 RefGroups.clear();
622}
623
624bool CacheCost::populateReferenceGroups(ReferenceGroupsTy &RefGroups) const {
625 assert(RefGroups.empty() && "Reference groups should be empty");
626
627 unsigned CLS = TTI.getCacheLineSize();
628 Loop *InnerMostLoop = getInnerMostLoop(Loops);
629 assert(InnerMostLoop != nullptr && "Expecting a valid innermost loop");
630
631 for (BasicBlock *BB : InnerMostLoop->getBlocks()) {
632 for (Instruction &I : *BB) {
633 if (!isa<StoreInst>(I) && !isa<LoadInst>(I))
634 continue;
635
636 std::unique_ptr<IndexedReference> R(new IndexedReference(I, LI, SE));
637 if (!R->isValid())
638 continue;
639
640 bool Added = false;
641 for (ReferenceGroupTy &RefGroup : RefGroups) {
642 const IndexedReference &Representative = *RefGroup.front();
643 LLVM_DEBUG({
644 dbgs() << "References:\n";
645 dbgs().indent(2) << *R << "\n";
646 dbgs().indent(2) << Representative << "\n";
647 });
648
649
650 // FIXME: Both positive and negative access functions will be placed
651 // into the same reference group, resulting in a bi-directional array
652 // access such as:
653 // for (i = N; i > 0; i--)
654 // A[i] = A[N - i];
655 // having the same cost calculation as a single dimention access pattern
656 // for (i = 0; i < N; i++)
657 // A[i] = A[i];
658 // when in actuality, depending on the array size, the first example
659 // should have a cost closer to 2x the second due to the two cache
660 // access per iteration from opposite ends of the array
661 std::optional<bool> HasTemporalReuse =
662 R->hasTemporalReuse(Representative, *TRT, *InnerMostLoop, DI, AA);
663 std::optional<bool> HasSpacialReuse =
664 R->hasSpacialReuse(Representative, CLS, AA);
665
666 if ((HasTemporalReuse && *HasTemporalReuse) ||
667 (HasSpacialReuse && *HasSpacialReuse)) {
668 RefGroup.push_back(std::move(R));
669 Added = true;
670 break;
671 }
672 }
673
674 if (!Added) {
676 RG.push_back(std::move(R));
677 RefGroups.push_back(std::move(RG));
678 }
679 }
680 }
681
682 if (RefGroups.empty())
683 return false;
684
685 LLVM_DEBUG({
686 dbgs() << "\nIDENTIFIED REFERENCE GROUPS:\n";
687 int n = 1;
688 for (const ReferenceGroupTy &RG : RefGroups) {
689 dbgs().indent(2) << "RefGroup " << n << ":\n";
690 for (const auto &IR : RG)
691 dbgs().indent(4) << *IR << "\n";
692 n++;
693 }
694 dbgs() << "\n";
695 });
696
697 return true;
698}
699
701CacheCost::computeLoopCacheCost(const Loop &L,
702 const ReferenceGroupsTy &RefGroups) const {
703 if (!L.isLoopSimplifyForm())
705
706 LLVM_DEBUG(dbgs() << "Considering loop '" << L.getName()
707 << "' as innermost loop.\n");
708
709 // Compute the product of the trip counts of each other loop in the nest.
710 CacheCostTy TripCountsProduct = 1;
711 for (const auto &TC : TripCounts) {
712 if (TC.first == &L)
713 continue;
714 TripCountsProduct *= TC.second;
715 }
716
717 CacheCostTy LoopCost = 0;
718 for (const ReferenceGroupTy &RG : RefGroups) {
719 CacheCostTy RefGroupCost = computeRefGroupCacheCost(RG, L);
720 LoopCost += RefGroupCost * TripCountsProduct;
721 }
722
723 LLVM_DEBUG(dbgs().indent(2) << "Loop '" << L.getName()
724 << "' has cost=" << LoopCost << "\n");
725
726 return LoopCost;
727}
728
729CacheCostTy CacheCost::computeRefGroupCacheCost(const ReferenceGroupTy &RG,
730 const Loop &L) const {
731 assert(!RG.empty() && "Reference group should have at least one member.");
732
733 const IndexedReference *Representative = RG.front().get();
734 return Representative->computeRefCost(L, TTI.getCacheLineSize());
735}
736
737//===----------------------------------------------------------------------===//
738// LoopCachePrinterPass implementation
739//
742 LPMUpdater &U) {
743 Function *F = L.getHeader()->getParent();
744 DependenceInfo DI(F, &AR.AA, &AR.SE, &AR.LI);
745
746 if (auto CC = CacheCost::getCacheCost(L, AR, DI))
747 OS << *CC;
748
749 return PreservedAnalyses::all();
750}
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
This file builds on the ADT/GraphTraits.h file to build a generic breadth first graph iterator.
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")
Hexagon Hardware Loops
Legalize the Machine IR a function s Machine IR
Definition Legalizer.cpp:80
static bool isOneDimensionalArray(const SCEV &AccessFn, const SCEV &ElemSize, const Loop &L, ScalarEvolution &SE)
static cl::opt< unsigned > TemporalReuseThreshold("temporal-reuse-threshold", cl::init(2), cl::Hidden, cl::desc("Use this to specify the max. distance between array elements " "accessed in a loop so that the elements are classified to have " "temporal reuse"))
static const SCEV * computeTripCount(const Loop &L, const SCEV &ElemSize, ScalarEvolution &SE)
Compute the trip count for the given loop L or assume a default value if it is not a compile time con...
static Loop * getInnerMostLoop(const LoopVectorTy &Loops)
Retrieve the innermost loop in the given loop nest Loops.
static cl::opt< unsigned > DefaultTripCount("default-trip-count", cl::init(100), cl::Hidden, cl::desc("Use this to specify the default trip count of a loop"))
This file defines the interface for the loop cache analysis.
#define F(x, y, z)
Definition MD5.cpp:55
#define I(x, y, z)
Definition MD5.cpp:58
Provides some synthesis utilities to produce sequences of values.
This file defines the SmallVector class.
#define LLVM_DEBUG(...)
Definition Debug.h:114
static cl::opt< unsigned > CacheLineSize("cache-line-size", cl::init(0), cl::Hidden, cl::desc("Use this to override the target cache line size when " "specified by the user."))
This pass exposes codegen information to IR-level passes.
bool isMustAlias(const MemoryLocation &LocA, const MemoryLocation &LocB)
A trivial helper function to check to see if the specified pointers are must-alias.
CacheCost represents the estimated cost of a inner loop as the number of cache lines used by the memo...
CacheCost(const LoopVectorTy &Loops, const LoopInfo &LI, ScalarEvolution &SE, TargetTransformInfo &TTI, AAResults &AA, DependenceInfo &DI, std::optional< unsigned > TRT=std::nullopt)
Construct a CacheCost object for the loop nest described by Loops.
static std::unique_ptr< CacheCost > getCacheCost(Loop &Root, LoopStandardAnalysisResults &AR, DependenceInfo &DI, std::optional< unsigned > TRT=std::nullopt)
Create a CacheCost for the loop nest rooted by Root.
@ ICMP_ULT
unsigned less than
Definition InstrTypes.h:701
This is the shared class of boolean and integer constants.
Definition Constants.h:87
bool isZero() const
This is just a convenience method to make client code smaller for a common code.
Definition Constants.h:214
int64_t getSExtValue() const
Return the constant as a 64-bit integer value after it has been sign extended as appropriate for the ...
Definition Constants.h:169
DependenceInfo - This class is the main dependence-analysis driver.
LLVM_ABI std::unique_ptr< Dependence > depends(Instruction *Src, Instruction *Dst, bool UnderRuntimeAssumptions=false)
depends - Tests for a dependence between the Src and Dst instructions.
Represents a memory reference as a base pointer and a set of indexing operations.
CacheCostTy computeRefCost(const Loop &L, unsigned CLS) const
Compute the cost of the reference w.r.t.
const SCEV * getSubscript(unsigned SubNum) const
std::optional< bool > hasSpacialReuse(const IndexedReference &Other, unsigned CLS, AAResults &AA) const
Return true/false if the current object and the indexed reference Other are/aren't in the same cache ...
std::optional< bool > hasTemporalReuse(const IndexedReference &Other, unsigned MaxDistance, const Loop &L, DependenceInfo &DI, AAResults &AA) const
Return true if the current object and the indexed reference Other have distance smaller than MaxDista...
IndexedReference(Instruction &StoreOrLoadInst, const LoopInfo &LI, ScalarEvolution &SE)
Construct an indexed reference given a StoreOrLoadInst instruction.
const SCEV * getLastSubscript() const
size_t getNumSubscripts() const
static InstructionCost getInvalid(CostType Val=0)
This class provides an interface for updating the loop pass manager based on mutations to the loop ne...
bool isOutermost() const
Return true if the loop does not have a parent (natural) loop.
unsigned getLoopDepth() const
Return the nesting level of this loop.
ArrayRef< BlockT * > getBlocks() const
Get a list of the basic blocks which make up this loop.
LoopT * getParentLoop() const
Return the parent loop if it exists or nullptr for top level loops.
PreservedAnalyses run(Loop &L, LoopAnalysisManager &AM, LoopStandardAnalysisResults &AR, LPMUpdater &U)
LoopT * getLoopFor(const BlockT *BB) const
Return the inner most loop that BB lives in.
Represents a single loop in the control flow graph.
Definition LoopInfo.h:40
static LLVM_ABI MemoryLocation get(const LoadInst *LI)
Return a location with information about the memory reference by the given instruction.
A set of analyses that are preserved following a run of a transformation pass.
Definition Analysis.h:112
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition Analysis.h:118
This node represents a polynomial recurrence on the trip count of the specified loop.
const SCEV * getStepRecurrence(ScalarEvolution &SE) const
Constructs and returns the recurrence indicating how much this expression steps by.
bool isAffine() const
Return true if this represents an expression A + B*x where A and B are loop invariant values.
This class represents a constant integer value.
ConstantInt * getValue() const
This class represents an analyzed expression in the program.
LLVM_ABI Type * getType() const
Return the LLVM type of this SCEV expression.
The main scalar evolution driver.
LLVM_ABI const SCEV * getNegativeSCEV(const SCEV *V, SCEV::NoWrapFlags Flags=SCEV::FlagAnyWrap)
Return the SCEV object corresponding to -V.
LLVM_ABI bool isKnownNegative(const SCEV *S)
Test if the given expression is known to be negative.
LLVM_ABI const SCEV * getBackedgeTakenCount(const Loop *L, ExitCountKind Kind=Exact)
If the specified loop has a predictable backedge-taken count, return it, otherwise return a SCEVCould...
LLVM_ABI const SCEV * getConstant(ConstantInt *V)
LLVM_ABI const SCEV * getTripCountFromExitCount(const SCEV *ExitCount)
A version of getTripCountFromExitCount below which always picks an evaluation type which can not resu...
LLVM_ABI bool isLoopInvariant(const SCEV *S, const Loop *L)
Return true if the value of the given SCEV is unchanging in the specified loop.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
This pass provides access to the codegen interfaces that are needed for IR-level transformations.
The instances of the Type class are immutable: once they are created, they are never changed.
Definition Type.h:45
LLVM_ABI Type * getExtendedType() const
Given scalar/vector integer type, returns a type with elements twice as wide as in the original type.
Type * getType() const
All values are typed, get the type of this value.
Definition Value.h:256
This class implements an extremely fast bulk output stream that can only output to a stream.
Definition raw_ostream.h:53
raw_ostream & indent(unsigned NumSpaces)
indent - Insert 'NumSpaces' spaces.
Abstract Attribute helper functions.
Definition Attributor.h:165
@ BasicBlock
Various leaf nodes.
Definition ISDOpcodes.h:81
initializer< Ty > init(const Ty &Val)
This is an optimization pass for GlobalISel generic memory operations.
FunctionAddr VTableAddr Value
Definition InstrProf.h:137
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1725
InstructionCost CacheCostTy
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:643
const Value * getLoadStorePointerOperand(const Value *V)
A helper function that returns the pointer operand of a load or store instruction.
void append_range(Container &C, Range &&R)
Wrapper function to append range R to container C.
Definition STLExtras.h:2136
SmallVector< std::unique_ptr< IndexedReference >, 8 > ReferenceGroupTy
A reference group represents a set of memory references that exhibit temporal or spacial reuse.
const Value * getPointerOperand(const Value *V)
A helper function that returns the pointer operand of a load, store or GEP instruction.
AnalysisManager< Loop, LoopStandardAnalysisResults & > LoopAnalysisManager
The loop analysis manager.
auto dyn_cast_or_null(const Y &Val)
Definition Casting.h:753
SmallVector< Loop *, 8 > LoopVectorTy
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition Debug.cpp:207
bool none_of(R &&Range, UnaryPredicate P)
Provide wrappers to std::none_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1739
bool is_sorted(R &&Range, Compare C)
Wrapper function around std::is_sorted to check if elements in a range R are sorted with respect to a...
Definition STLExtras.h:1920
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
Definition Casting.h:547
iterator_range< bf_iterator< T > > breadth_first(const T &G)
@ Other
Any other memory.
Definition ModRef.h:68
bool tryDelinearizeFixedSizeImpl(ScalarEvolution *SE, Instruction *Inst, const SCEV *AccessFn, SmallVectorImpl< const SCEV * > &Subscripts, SmallVectorImpl< int > &Sizes)
Implementation of fixed size array delinearization.
raw_ostream & operator<<(raw_ostream &OS, const APFixedPoint &FX)
void delinearize(ScalarEvolution &SE, const SCEV *Expr, SmallVectorImpl< const SCEV * > &Subscripts, SmallVectorImpl< const SCEV * > &Sizes, const SCEV *ElementSize)
Split this SCEVAddRecExpr into two vectors of SCEVs representing the subscripts and sizes of an array...
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:559
auto seq(T Begin, T End)
Iterate over an integral type from Begin up to - but not including - End.
Definition Sequence.h:305
SmallVector< ReferenceGroupTy, 8 > ReferenceGroupsTy
The adaptor from a function pass to a loop pass computes these analyses and makes them available to t...