LLVM  13.0.0git
LoopCacheAnalysis.cpp
Go to the documentation of this file.
1 //===- LoopCacheAnalysis.cpp - Loop Cache Analysis -------------------------==//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
6 // See https://llvm.org/LICENSE.txt for license information.
7 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
8 //
9 //===----------------------------------------------------------------------===//
10 ///
11 /// \file
12 /// This file defines the implementation for the loop cache analysis.
13 /// The implementation is largely based on the following paper:
14 ///
15 /// Compiler Optimizations for Improving Data Locality
16 /// By: Steve Carr, Katherine S. McKinley, Chau-Wen Tseng
17 /// http://www.cs.utexas.edu/users/mckinley/papers/asplos-1994.pdf
18 ///
19 /// The general approach taken to estimate the number of cache lines used by the
20 /// memory references in an inner loop is:
21 /// 1. Partition memory references that exhibit temporal or spacial reuse
22 /// into reference groups.
23 /// 2. For each loop L in the a loop nest LN:
24 /// a. Compute the cost of the reference group
25 /// b. Compute the loop cost by summing up the reference groups costs
26 //===----------------------------------------------------------------------===//
27 
30 #include "llvm/ADT/Sequence.h"
31 #include "llvm/ADT/SmallVector.h"
34 #include "llvm/Analysis/LoopInfo.h"
38 #include "llvm/Support/Debug.h"
39 
40 using namespace llvm;
41 
42 #define DEBUG_TYPE "loop-cache-cost"
43 
45  "default-trip-count", cl::init(100), cl::Hidden,
46  cl::desc("Use this to specify the default trip count of a loop"));
47 
48 // In this analysis two array references are considered to exhibit temporal
49 // reuse if they access either the same memory location, or a memory location
50 // with distance smaller than a configurable threshold.
52  "temporal-reuse-threshold", cl::init(2), cl::Hidden,
53  cl::desc("Use this to specify the max. distance between array elements "
54  "accessed in a loop so that the elements are classified to have "
55  "temporal reuse"));
56 
57 /// Retrieve the innermost loop in the given loop nest \p Loops. It returns a
58 /// nullptr if any loops in the loop vector supplied has more than one sibling.
59 /// The loop vector is expected to contain loops collected in breadth-first
60 /// order.
62  assert(!Loops.empty() && "Expecting a non-empy loop vector");
63 
64  Loop *LastLoop = Loops.back();
65  Loop *ParentLoop = LastLoop->getParentLoop();
66 
67  if (ParentLoop == nullptr) {
68  assert(Loops.size() == 1 && "Expecting a single loop");
69  return LastLoop;
70  }
71 
72  return (llvm::is_sorted(Loops,
73  [](const Loop *L1, const Loop *L2) {
74  return L1->getLoopDepth() < L2->getLoopDepth();
75  }))
76  ? LastLoop
77  : nullptr;
78 }
79 
80 static bool isOneDimensionalArray(const SCEV &AccessFn, const SCEV &ElemSize,
81  const Loop &L, ScalarEvolution &SE) {
82  const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(&AccessFn);
83  if (!AR || !AR->isAffine())
84  return false;
85 
86  assert(AR->getLoop() && "AR should have a loop");
87 
88  // Check that start and increment are not add recurrences.
89  const SCEV *Start = AR->getStart();
90  const SCEV *Step = AR->getStepRecurrence(SE);
91  if (isa<SCEVAddRecExpr>(Start) || isa<SCEVAddRecExpr>(Step))
92  return false;
93 
94  // Check that start and increment are both invariant in the loop.
95  if (!SE.isLoopInvariant(Start, &L) || !SE.isLoopInvariant(Step, &L))
96  return false;
97 
98  const SCEV *StepRec = AR->getStepRecurrence(SE);
99  if (StepRec && SE.isKnownNegative(StepRec))
100  StepRec = SE.getNegativeSCEV(StepRec);
101 
102  return StepRec == &ElemSize;
103 }
104 
105 /// Compute the trip count for the given loop \p L. Return the SCEV expression
106 /// for the trip count or nullptr if it cannot be computed.
107 static const SCEV *computeTripCount(const Loop &L, ScalarEvolution &SE) {
108  const SCEV *BackedgeTakenCount = SE.getBackedgeTakenCount(&L);
109  if (isa<SCEVCouldNotCompute>(BackedgeTakenCount) ||
110  !isa<SCEVConstant>(BackedgeTakenCount))
111  return nullptr;
112 
113  return SE.getAddExpr(BackedgeTakenCount,
114  SE.getOne(BackedgeTakenCount->getType()));
115 }
116 
117 //===----------------------------------------------------------------------===//
118 // IndexedReference implementation
119 //
121  if (!R.IsValid) {
122  OS << R.StoreOrLoadInst;
123  OS << ", IsValid=false.";
124  return OS;
125  }
126 
127  OS << *R.BasePointer;
128  for (const SCEV *Subscript : R.Subscripts)
129  OS << "[" << *Subscript << "]";
130 
131  OS << ", Sizes: ";
132  for (const SCEV *Size : R.Sizes)
133  OS << "[" << *Size << "]";
134 
135  return OS;
136 }
137 
139  const LoopInfo &LI, ScalarEvolution &SE)
140  : StoreOrLoadInst(StoreOrLoadInst), SE(SE) {
141  assert((isa<StoreInst>(StoreOrLoadInst) || isa<LoadInst>(StoreOrLoadInst)) &&
142  "Expecting a load or store instruction");
143 
144  IsValid = delinearize(LI);
145  if (IsValid)
146  LLVM_DEBUG(dbgs().indent(2) << "Succesfully delinearized: " << *this
147  << "\n");
148 }
149 
151  unsigned CLS,
152  AAResults &AA) const {
153  assert(IsValid && "Expecting a valid reference");
154 
155  if (BasePointer != Other.getBasePointer() && !isAliased(Other, AA)) {
156  LLVM_DEBUG(dbgs().indent(2)
157  << "No spacial reuse: different base pointers\n");
158  return false;
159  }
160 
161  unsigned NumSubscripts = getNumSubscripts();
162  if (NumSubscripts != Other.getNumSubscripts()) {
163  LLVM_DEBUG(dbgs().indent(2)
164  << "No spacial reuse: different number of subscripts\n");
165  return false;
166  }
167 
168  // all subscripts must be equal, except the leftmost one (the last one).
169  for (auto SubNum : seq<unsigned>(0, NumSubscripts - 1)) {
170  if (getSubscript(SubNum) != Other.getSubscript(SubNum)) {
171  LLVM_DEBUG(dbgs().indent(2) << "No spacial reuse, different subscripts: "
172  << "\n\t" << *getSubscript(SubNum) << "\n\t"
173  << *Other.getSubscript(SubNum) << "\n");
174  return false;
175  }
176  }
177 
178  // the difference between the last subscripts must be less than the cache line
179  // size.
180  const SCEV *LastSubscript = getLastSubscript();
181  const SCEV *OtherLastSubscript = Other.getLastSubscript();
182  const SCEVConstant *Diff = dyn_cast<SCEVConstant>(
183  SE.getMinusSCEV(LastSubscript, OtherLastSubscript));
184 
185  if (Diff == nullptr) {
186  LLVM_DEBUG(dbgs().indent(2)
187  << "No spacial reuse, difference between subscript:\n\t"
188  << *LastSubscript << "\n\t" << OtherLastSubscript
189  << "\nis not constant.\n");
190  return None;
191  }
192 
193  bool InSameCacheLine = (Diff->getValue()->getSExtValue() < CLS);
194 
195  LLVM_DEBUG({
196  if (InSameCacheLine)
197  dbgs().indent(2) << "Found spacial reuse.\n";
198  else
199  dbgs().indent(2) << "No spacial reuse.\n";
200  });
201 
202  return InSameCacheLine;
203 }
204 
206  unsigned MaxDistance,
207  const Loop &L,
208  DependenceInfo &DI,
209  AAResults &AA) const {
210  assert(IsValid && "Expecting a valid reference");
211 
212  if (BasePointer != Other.getBasePointer() && !isAliased(Other, AA)) {
213  LLVM_DEBUG(dbgs().indent(2)
214  << "No temporal reuse: different base pointer\n");
215  return false;
216  }
217 
218  std::unique_ptr<Dependence> D =
219  DI.depends(&StoreOrLoadInst, &Other.StoreOrLoadInst, true);
220 
221  if (D == nullptr) {
222  LLVM_DEBUG(dbgs().indent(2) << "No temporal reuse: no dependence\n");
223  return false;
224  }
225 
226  if (D->isLoopIndependent()) {
227  LLVM_DEBUG(dbgs().indent(2) << "Found temporal reuse\n");
228  return true;
229  }
230 
231  // Check the dependence distance at every loop level. There is temporal reuse
232  // if the distance at the given loop's depth is small (|d| <= MaxDistance) and
233  // it is zero at every other loop level.
234  int LoopDepth = L.getLoopDepth();
235  int Levels = D->getLevels();
236  for (int Level = 1; Level <= Levels; ++Level) {
237  const SCEV *Distance = D->getDistance(Level);
238  const SCEVConstant *SCEVConst = dyn_cast_or_null<SCEVConstant>(Distance);
239 
240  if (SCEVConst == nullptr) {
241  LLVM_DEBUG(dbgs().indent(2) << "No temporal reuse: distance unknown\n");
242  return None;
243  }
244 
245  const ConstantInt &CI = *SCEVConst->getValue();
246  if (Level != LoopDepth && !CI.isZero()) {
247  LLVM_DEBUG(dbgs().indent(2)
248  << "No temporal reuse: distance is not zero at depth=" << Level
249  << "\n");
250  return false;
251  } else if (Level == LoopDepth && CI.getSExtValue() > MaxDistance) {
252  LLVM_DEBUG(
253  dbgs().indent(2)
254  << "No temporal reuse: distance is greater than MaxDistance at depth="
255  << Level << "\n");
256  return false;
257  }
258  }
259 
260  LLVM_DEBUG(dbgs().indent(2) << "Found temporal reuse\n");
261  return true;
262 }
263 
265  unsigned CLS) const {
266  assert(IsValid && "Expecting a valid reference");
267  LLVM_DEBUG({
268  dbgs().indent(2) << "Computing cache cost for:\n";
269  dbgs().indent(4) << *this << "\n";
270  });
271 
272  // If the indexed reference is loop invariant the cost is one.
273  if (isLoopInvariant(L)) {
274  LLVM_DEBUG(dbgs().indent(4) << "Reference is loop invariant: RefCost=1\n");
275  return 1;
276  }
277 
278  const SCEV *TripCount = computeTripCount(L, SE);
279  if (!TripCount) {
280  LLVM_DEBUG(dbgs() << "Trip count of loop " << L.getName()
281  << " could not be computed, using DefaultTripCount\n");
282  const SCEV *ElemSize = Sizes.back();
283  TripCount = SE.getConstant(ElemSize->getType(), DefaultTripCount);
284  }
285  LLVM_DEBUG(dbgs() << "TripCount=" << *TripCount << "\n");
286 
287  // If the indexed reference is 'consecutive' the cost is
288  // (TripCount*Stride)/CLS, otherwise the cost is TripCount.
289  const SCEV *RefCost = TripCount;
290 
291  if (isConsecutive(L, CLS)) {
292  const SCEV *Coeff = getLastCoefficient();
293  const SCEV *ElemSize = Sizes.back();
294  const SCEV *Stride = SE.getMulExpr(Coeff, ElemSize);
295  const SCEV *CacheLineSize = SE.getConstant(Stride->getType(), CLS);
296  Type *WiderType = SE.getWiderType(Stride->getType(), TripCount->getType());
297  if (SE.isKnownNegative(Stride))
298  Stride = SE.getNegativeSCEV(Stride);
299  Stride = SE.getNoopOrAnyExtend(Stride, WiderType);
300  TripCount = SE.getNoopOrAnyExtend(TripCount, WiderType);
301  const SCEV *Numerator = SE.getMulExpr(Stride, TripCount);
302  RefCost = SE.getUDivExpr(Numerator, CacheLineSize);
303 
304  LLVM_DEBUG(dbgs().indent(4)
305  << "Access is consecutive: RefCost=(TripCount*Stride)/CLS="
306  << *RefCost << "\n");
307  } else
308  LLVM_DEBUG(dbgs().indent(4)
309  << "Access is not consecutive: RefCost=TripCount=" << *RefCost
310  << "\n");
311 
312  // Attempt to fold RefCost into a constant.
313  if (auto ConstantCost = dyn_cast<SCEVConstant>(RefCost))
314  return ConstantCost->getValue()->getSExtValue();
315 
316  LLVM_DEBUG(dbgs().indent(4)
317  << "RefCost is not a constant! Setting to RefCost=InvalidCost "
318  "(invalid value).\n");
319 
320  return CacheCost::InvalidCost;
321 }
322 
323 bool IndexedReference::delinearize(const LoopInfo &LI) {
324  assert(Subscripts.empty() && "Subscripts should be empty");
325  assert(Sizes.empty() && "Sizes should be empty");
326  assert(!IsValid && "Should be called once from the constructor");
327  LLVM_DEBUG(dbgs() << "Delinearizing: " << StoreOrLoadInst << "\n");
328 
329  const SCEV *ElemSize = SE.getElementSize(&StoreOrLoadInst);
330  const BasicBlock *BB = StoreOrLoadInst.getParent();
331 
332  if (Loop *L = LI.getLoopFor(BB)) {
333  const SCEV *AccessFn =
334  SE.getSCEVAtScope(getPointerOperand(&StoreOrLoadInst), L);
335 
336  BasePointer = dyn_cast<SCEVUnknown>(SE.getPointerBase(AccessFn));
337  if (BasePointer == nullptr) {
338  LLVM_DEBUG(
339  dbgs().indent(2)
340  << "ERROR: failed to delinearize, can't identify base pointer\n");
341  return false;
342  }
343 
344  AccessFn = SE.getMinusSCEV(AccessFn, BasePointer);
345 
346  LLVM_DEBUG(dbgs().indent(2) << "In Loop '" << L->getName()
347  << "', AccessFn: " << *AccessFn << "\n");
348 
349  SE.delinearize(AccessFn, Subscripts, Sizes,
350  SE.getElementSize(&StoreOrLoadInst));
351 
352  if (Subscripts.empty() || Sizes.empty() ||
353  Subscripts.size() != Sizes.size()) {
354  // Attempt to determine whether we have a single dimensional array access.
355  // before giving up.
356  if (!isOneDimensionalArray(*AccessFn, *ElemSize, *L, SE)) {
357  LLVM_DEBUG(dbgs().indent(2)
358  << "ERROR: failed to delinearize reference\n");
359  Subscripts.clear();
360  Sizes.clear();
361  return false;
362  }
363 
364  // The array may be accessed in reverse, for example:
365  // for (i = N; i > 0; i--)
366  // A[i] = 0;
367  // In this case, reconstruct the access function using the absolute value
368  // of the step recurrence.
369  const SCEVAddRecExpr *AccessFnAR = dyn_cast<SCEVAddRecExpr>(AccessFn);
370  const SCEV *StepRec = AccessFnAR ? AccessFnAR->getStepRecurrence(SE) : nullptr;
371 
372  if (StepRec && SE.isKnownNegative(StepRec))
373  AccessFn = SE.getAddRecExpr(AccessFnAR->getStart(),
374  SE.getNegativeSCEV(StepRec),
375  AccessFnAR->getLoop(),
376  AccessFnAR->getNoWrapFlags());
377  const SCEV *Div = SE.getUDivExactExpr(AccessFn, ElemSize);
378  Subscripts.push_back(Div);
379  Sizes.push_back(ElemSize);
380  }
381 
382  return all_of(Subscripts, [&](const SCEV *Subscript) {
383  return isSimpleAddRecurrence(*Subscript, *L);
384  });
385  }
386 
387  return false;
388 }
389 
390 bool IndexedReference::isLoopInvariant(const Loop &L) const {
391  Value *Addr = getPointerOperand(&StoreOrLoadInst);
392  assert(Addr != nullptr && "Expecting either a load or a store instruction");
393  assert(SE.isSCEVable(Addr->getType()) && "Addr should be SCEVable");
394 
395  if (SE.isLoopInvariant(SE.getSCEV(Addr), &L))
396  return true;
397 
398  // The indexed reference is loop invariant if none of the coefficients use
399  // the loop induction variable.
400  bool allCoeffForLoopAreZero = all_of(Subscripts, [&](const SCEV *Subscript) {
401  return isCoeffForLoopZeroOrInvariant(*Subscript, L);
402  });
403 
404  return allCoeffForLoopAreZero;
405 }
406 
407 bool IndexedReference::isConsecutive(const Loop &L, unsigned CLS) const {
408  // The indexed reference is 'consecutive' if the only coefficient that uses
409  // the loop induction variable is the last one...
410  const SCEV *LastSubscript = Subscripts.back();
411  for (const SCEV *Subscript : Subscripts) {
412  if (Subscript == LastSubscript)
413  continue;
414  if (!isCoeffForLoopZeroOrInvariant(*Subscript, L))
415  return false;
416  }
417 
418  // ...and the access stride is less than the cache line size.
419  const SCEV *Coeff = getLastCoefficient();
420  const SCEV *ElemSize = Sizes.back();
421  const SCEV *Stride = SE.getMulExpr(Coeff, ElemSize);
422  const SCEV *CacheLineSize = SE.getConstant(Stride->getType(), CLS);
423 
424  Stride = SE.isKnownNegative(Stride) ? SE.getNegativeSCEV(Stride) : Stride;
426 }
427 
428 const SCEV *IndexedReference::getLastCoefficient() const {
429  const SCEV *LastSubscript = getLastSubscript();
430  assert(isa<SCEVAddRecExpr>(LastSubscript) &&
431  "Expecting a SCEV add recurrence expression");
432  const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(LastSubscript);
433  return AR->getStepRecurrence(SE);
434 }
435 
436 bool IndexedReference::isCoeffForLoopZeroOrInvariant(const SCEV &Subscript,
437  const Loop &L) const {
438  const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(&Subscript);
439  return (AR != nullptr) ? AR->getLoop() != &L
440  : SE.isLoopInvariant(&Subscript, &L);
441 }
442 
443 bool IndexedReference::isSimpleAddRecurrence(const SCEV &Subscript,
444  const Loop &L) const {
445  if (!isa<SCEVAddRecExpr>(Subscript))
446  return false;
447 
448  const SCEVAddRecExpr *AR = cast<SCEVAddRecExpr>(&Subscript);
449  assert(AR->getLoop() && "AR should have a loop");
450 
451  if (!AR->isAffine())
452  return false;
453 
454  const SCEV *Start = AR->getStart();
455  const SCEV *Step = AR->getStepRecurrence(SE);
456 
457  if (!SE.isLoopInvariant(Start, &L) || !SE.isLoopInvariant(Step, &L))
458  return false;
459 
460  return true;
461 }
462 
463 bool IndexedReference::isAliased(const IndexedReference &Other,
464  AAResults &AA) const {
465  const auto &Loc1 = MemoryLocation::get(&StoreOrLoadInst);
466  const auto &Loc2 = MemoryLocation::get(&Other.StoreOrLoadInst);
467  return AA.isMustAlias(Loc1, Loc2);
468 }
469 
470 //===----------------------------------------------------------------------===//
471 // CacheCost implementation
472 //
474  for (const auto &LC : CC.LoopCosts) {
475  const Loop *L = LC.first;
476  OS << "Loop '" << L->getName() << "' has cost = " << LC.second << "\n";
477  }
478  return OS;
479 }
480 
483  AAResults &AA, DependenceInfo &DI,
484  Optional<unsigned> TRT)
485  : Loops(Loops), TripCounts(), LoopCosts(),
486  TRT((TRT == None) ? Optional<unsigned>(TemporalReuseThreshold) : TRT),
487  LI(LI), SE(SE), TTI(TTI), AA(AA), DI(DI) {
488  assert(!Loops.empty() && "Expecting a non-empty loop vector.");
489 
490  for (const Loop *L : Loops) {
491  unsigned TripCount = SE.getSmallConstantTripCount(L);
492  TripCount = (TripCount == 0) ? DefaultTripCount : TripCount;
493  TripCounts.push_back({L, TripCount});
494  }
495 
496  calculateCacheFootprint();
497 }
498 
499 std::unique_ptr<CacheCost>
502  if (!Root.isOutermost()) {
503  LLVM_DEBUG(dbgs() << "Expecting the outermost loop in a loop nest\n");
504  return nullptr;
505  }
506 
507  LoopVectorTy Loops;
509 
510  if (!getInnerMostLoop(Loops)) {
511  LLVM_DEBUG(dbgs() << "Cannot compute cache cost of loop nest with more "
512  "than one innermost loop\n");
513  return nullptr;
514  }
515 
516  return std::make_unique<CacheCost>(Loops, AR.LI, AR.SE, AR.TTI, AR.AA, DI, TRT);
517 }
518 
519 void CacheCost::calculateCacheFootprint() {
520  LLVM_DEBUG(dbgs() << "POPULATING REFERENCE GROUPS\n");
521  ReferenceGroupsTy RefGroups;
522  if (!populateReferenceGroups(RefGroups))
523  return;
524 
525  LLVM_DEBUG(dbgs() << "COMPUTING LOOP CACHE COSTS\n");
526  for (const Loop *L : Loops) {
527  assert((std::find_if(LoopCosts.begin(), LoopCosts.end(),
528  [L](const LoopCacheCostTy &LCC) {
529  return LCC.first == L;
530  }) == LoopCosts.end()) &&
531  "Should not add duplicate element");
532  CacheCostTy LoopCost = computeLoopCacheCost(*L, RefGroups);
533  LoopCosts.push_back(std::make_pair(L, LoopCost));
534  }
535 
536  sortLoopCosts();
537  RefGroups.clear();
538 }
539 
540 bool CacheCost::populateReferenceGroups(ReferenceGroupsTy &RefGroups) const {
541  assert(RefGroups.empty() && "Reference groups should be empty");
542 
543  unsigned CLS = TTI.getCacheLineSize();
544  Loop *InnerMostLoop = getInnerMostLoop(Loops);
545  assert(InnerMostLoop != nullptr && "Expecting a valid innermost loop");
546 
547  for (BasicBlock *BB : InnerMostLoop->getBlocks()) {
548  for (Instruction &I : *BB) {
549  if (!isa<StoreInst>(I) && !isa<LoadInst>(I))
550  continue;
551 
552  std::unique_ptr<IndexedReference> R(new IndexedReference(I, LI, SE));
553  if (!R->isValid())
554  continue;
555 
556  bool Added = false;
557  for (ReferenceGroupTy &RefGroup : RefGroups) {
558  const IndexedReference &Representative = *RefGroup.front().get();
559  LLVM_DEBUG({
560  dbgs() << "References:\n";
561  dbgs().indent(2) << *R << "\n";
562  dbgs().indent(2) << Representative << "\n";
563  });
564 
565 
566  // FIXME: Both positive and negative access functions will be placed
567  // into the same reference group, resulting in a bi-directional array
568  // access such as:
569  // for (i = N; i > 0; i--)
570  // A[i] = A[N - i];
571  // having the same cost calculation as a single dimention access pattern
572  // for (i = 0; i < N; i++)
573  // A[i] = A[i];
574  // when in actuality, depending on the array size, the first example
575  // should have a cost closer to 2x the second due to the two cache
576  // access per iteration from opposite ends of the array
577  Optional<bool> HasTemporalReuse =
578  R->hasTemporalReuse(Representative, *TRT, *InnerMostLoop, DI, AA);
579  Optional<bool> HasSpacialReuse =
580  R->hasSpacialReuse(Representative, CLS, AA);
581 
582  if ((HasTemporalReuse.hasValue() && *HasTemporalReuse) ||
583  (HasSpacialReuse.hasValue() && *HasSpacialReuse)) {
584  RefGroup.push_back(std::move(R));
585  Added = true;
586  break;
587  }
588  }
589 
590  if (!Added) {
591  ReferenceGroupTy RG;
592  RG.push_back(std::move(R));
593  RefGroups.push_back(std::move(RG));
594  }
595  }
596  }
597 
598  if (RefGroups.empty())
599  return false;
600 
601  LLVM_DEBUG({
602  dbgs() << "\nIDENTIFIED REFERENCE GROUPS:\n";
603  int n = 1;
604  for (const ReferenceGroupTy &RG : RefGroups) {
605  dbgs().indent(2) << "RefGroup " << n << ":\n";
606  for (const auto &IR : RG)
607  dbgs().indent(4) << *IR << "\n";
608  n++;
609  }
610  dbgs() << "\n";
611  });
612 
613  return true;
614 }
615 
617 CacheCost::computeLoopCacheCost(const Loop &L,
618  const ReferenceGroupsTy &RefGroups) const {
619  if (!L.isLoopSimplifyForm())
620  return InvalidCost;
621 
622  LLVM_DEBUG(dbgs() << "Considering loop '" << L.getName()
623  << "' as innermost loop.\n");
624 
625  // Compute the product of the trip counts of each other loop in the nest.
626  CacheCostTy TripCountsProduct = 1;
627  for (const auto &TC : TripCounts) {
628  if (TC.first == &L)
629  continue;
630  TripCountsProduct *= TC.second;
631  }
632 
633  CacheCostTy LoopCost = 0;
634  for (const ReferenceGroupTy &RG : RefGroups) {
635  CacheCostTy RefGroupCost = computeRefGroupCacheCost(RG, L);
636  LoopCost += RefGroupCost * TripCountsProduct;
637  }
638 
639  LLVM_DEBUG(dbgs().indent(2) << "Loop '" << L.getName()
640  << "' has cost=" << LoopCost << "\n");
641 
642  return LoopCost;
643 }
644 
645 CacheCostTy CacheCost::computeRefGroupCacheCost(const ReferenceGroupTy &RG,
646  const Loop &L) const {
647  assert(!RG.empty() && "Reference group should have at least one member.");
648 
649  const IndexedReference *Representative = RG.front().get();
650  return Representative->computeRefCost(L, TTI.getCacheLineSize());
651 }
652 
653 //===----------------------------------------------------------------------===//
654 // LoopCachePrinterPass implementation
655 //
658  LPMUpdater &U) {
659  Function *F = L.getHeader()->getParent();
660  DependenceInfo DI(F, &AR.AA, &AR.SE, &AR.LI);
661 
662  if (auto CC = CacheCost::getCacheCost(L, AR, DI))
663  OS << *CC;
664 
665  return PreservedAnalyses::all();
666 }
llvm::Check::Size
@ Size
Definition: FileCheck.h:73
llvm::PreservedAnalyses
A set of analyses that are preserved following a run of a transformation pass.
Definition: PassManager.h:155
TemporalReuseThreshold
static cl::opt< unsigned > TemporalReuseThreshold("temporal-reuse-threshold", cl::init(2), cl::Hidden, cl::desc("Use this to specify the max. distance between array elements " "accessed in a loop so that the elements are classified to have " "temporal reuse"))
isOneDimensionalArray
static bool isOneDimensionalArray(const SCEV &AccessFn, const SCEV &ElemSize, const Loop &L, ScalarEvolution &SE)
Definition: LoopCacheAnalysis.cpp:80
llvm::MemoryLocation::get
static MemoryLocation get(const LoadInst *LI)
Return a location with information about the memory reference by the given instruction.
Definition: MemoryLocation.cpp:37
llvm::ScalarEvolution::getNegativeSCEV
const SCEV * getNegativeSCEV(const SCEV *V, SCEV::NoWrapFlags Flags=SCEV::FlagAnyWrap)
Return the SCEV object corresponding to -V.
Definition: ScalarEvolution.cpp:3924
llvm
This class represents lattice values for constants.
Definition: AllocatorList.h:23
BreadthFirstIterator.h
llvm::IndexedReference::hasSpacialReuse
Optional< bool > hasSpacialReuse(const IndexedReference &Other, unsigned CLS, AAResults &AA) const
Return true/false if the current object and the indexed reference Other are/aren't in the same cache ...
Definition: LoopCacheAnalysis.cpp:150
llvm::SCEVAddRecExpr::isAffine
bool isAffine() const
Return true if this represents an expression A + B*x where A and B are loop invariant values.
Definition: ScalarEvolutionExpressions.h:378
llvm::BasicBlock::getParent
const Function * getParent() const
Return the enclosing method, or null if none.
Definition: BasicBlock.h:107
llvm::SCEVAddRecExpr::getStart
const SCEV * getStart() const
Definition: ScalarEvolutionExpressions.h:362
llvm::IndexedReference::getSubscript
const SCEV * getSubscript(unsigned SubNum) const
Definition: LoopCacheAnalysis.h:56
llvm::Function
Definition: Function.h:61
llvm::Loop
Represents a single loop in the control flow graph.
Definition: LoopInfo.h:529
Loops
Hexagon Hardware Loops
Definition: HexagonHardwareLoops.cpp:372
llvm::SmallVector
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1168
llvm::ScalarEvolution::getAddRecExpr
const SCEV * getAddRecExpr(const SCEV *Start, const SCEV *Step, const Loop *L, SCEV::NoWrapFlags Flags)
Get an add recurrence expression for the specified loop.
Definition: ScalarEvolution.cpp:3325
llvm::TargetTransformInfo
This pass provides access to the codegen interfaces that are needed for IR-level transformations.
Definition: TargetTransformInfo.h:167
llvm::ScalarEvolution
The main scalar evolution driver.
Definition: ScalarEvolution.h:443
llvm::ScalarEvolution::getNoopOrAnyExtend
const SCEV * getNoopOrAnyExtend(const SCEV *V, Type *Ty)
Return a SCEV corresponding to a conversion of the input value to the specified type.
Definition: ScalarEvolution.cpp:4066
llvm::cl::Hidden
@ Hidden
Definition: CommandLine.h:140
llvm::Type
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:46
llvm::LoopStandardAnalysisResults
The adaptor from a function pass to a loop pass computes these analyses and makes them available to t...
Definition: LoopAnalysisManager.h:52
getInnerMostLoop
static Loop * getInnerMostLoop(const LoopVectorTy &Loops)
Retrieve the innermost loop in the given loop nest Loops.
Definition: LoopCacheAnalysis.cpp:61
llvm::Optional< bool >
llvm::ScalarEvolution::getUDivExactExpr
const SCEV * getUDivExactExpr(const SCEV *LHS, const SCEV *RHS)
Get a canonical unsigned division expression, or something simpler if possible.
Definition: ScalarEvolution.cpp:3271
llvm::ScalarEvolution::getWiderType
Type * getWiderType(Type *Ty1, Type *Ty2) const
Definition: ScalarEvolution.cpp:3771
llvm::SCEVConstant::getValue
ConstantInt * getValue() const
Definition: ScalarEvolutionExpressions.h:56
Sequence.h
llvm::ScalarEvolution::getPointerBase
const SCEV * getPointerBase(const SCEV *V)
Transitively follow the chain of pointer-type operands until reaching a SCEV that does not have a sin...
Definition: ScalarEvolution.cpp:4133
LLVM_DEBUG
#define LLVM_DEBUG(X)
Definition: Debug.h:122
F
#define F(x, y, z)
Definition: MD5.cpp:56
llvm::RISCVFenceField::R
@ R
Definition: RISCVBaseInfo.h:129
llvm::BasicBlock
LLVM Basic Block Representation.
Definition: BasicBlock.h:58
AliasAnalysis.h
llvm::Optional::hasValue
constexpr bool hasValue() const
Definition: Optional.h:286
llvm::dbgs
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:132
llvm::ScalarEvolution::getMulExpr
const SCEV * getMulExpr(SmallVectorImpl< const SCEV * > &Ops, SCEV::NoWrapFlags Flags=SCEV::FlagAnyWrap, unsigned Depth=0)
Get a canonical multiply expression, or something simpler if possible.
Definition: ScalarEvolution.cpp:2808
CommandLine.h
llvm::ConstantInt
This is the shared class of boolean and integer constants.
Definition: Constants.h:77
llvm::LoopBase::getParentLoop
LoopT * getParentLoop() const
Return the parent loop if it exists or nullptr for top level loops.
Definition: LoopInfo.h:113
llvm::all_of
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1498
llvm::CacheCost::getCacheCost
static std::unique_ptr< CacheCost > getCacheCost(Loop &Root, LoopStandardAnalysisResults &AR, DependenceInfo &DI, Optional< unsigned > TRT=None)
Create a CacheCost for the loop nest rooted by Root.
Definition: LoopCacheAnalysis.cpp:500
L2
add sub stmia L5 ldr L2
Definition: README.txt:201
llvm::ScalarEvolution::getOne
const SCEV * getOne(Type *Ty)
Return a SCEV for the constant 1 of a specific type.
Definition: ScalarEvolution.h:596
llvm::AAResults
Definition: AliasAnalysis.h:388
DefaultTripCount
static cl::opt< unsigned > DefaultTripCount("default-trip-count", cl::init(100), cl::Hidden, cl::desc("Use this to specify the default trip count of a loop"))
llvm::ScalarEvolution::getUDivExpr
const SCEV * getUDivExpr(const SCEV *LHS, const SCEV *RHS)
Get a canonical unsigned division expression, or something simpler if possible.
Definition: ScalarEvolution.cpp:3101
llvm::IndexedReference::getNumSubscripts
size_t getNumSubscripts() const
Definition: LoopCacheAnalysis.h:55
llvm::breadth_first
iterator_range< bf_iterator< T > > breadth_first(const T &G)
Definition: BreadthFirstIterator.h:156
llvm::LoopCachePrinterPass::run
PreservedAnalyses run(Loop &L, LoopAnalysisManager &AM, LoopStandardAnalysisResults &AR, LPMUpdater &U)
Definition: LoopCacheAnalysis.cpp:656
llvm::LoopBase::getBlocks
ArrayRef< BlockT * > getBlocks() const
Get a list of the basic blocks which make up this loop.
Definition: LoopInfo.h:171
llvm::ScalarEvolution::isKnownPredicate
bool isKnownPredicate(ICmpInst::Predicate Pred, const SCEV *LHS, const SCEV *RHS)
Test if the given expression is known to satisfy the condition described by Pred, LHS,...
Definition: ScalarEvolution.cpp:9515
llvm::Instruction
Definition: Instruction.h:45
llvm::IndexedReference::computeRefCost
CacheCostTy computeRefCost(const Loop &L, unsigned CLS) const
Compute the cost of the reference w.r.t.
Definition: LoopCacheAnalysis.cpp:264
llvm::raw_ostream
This class implements an extremely fast bulk output stream that can only output to a stream.
Definition: raw_ostream.h:50
llvm::IndexedReference::hasTemporalReuse
Optional< bool > hasTemporalReuse(const IndexedReference &Other, unsigned MaxDistance, const Loop &L, DependenceInfo &DI, AAResults &AA) const
Return true if the current object and the indexed reference Other have distance smaller than MaxDista...
Definition: LoopCacheAnalysis.cpp:205
llvm::operator<<
raw_ostream & operator<<(raw_ostream &OS, const APFixedPoint &FX)
Definition: APFixedPoint.h:230
IR
Statically lint checks LLVM IR
Definition: Lint.cpp:742
llvm::IndexedReference::IndexedReference
IndexedReference(Instruction &StoreOrLoadInst, const LoopInfo &LI, ScalarEvolution &SE)
Construct an indexed reference given a StoreOrLoadInst instruction.
Definition: LoopCacheAnalysis.cpp:138
llvm::Loop::getName
StringRef getName() const
Definition: LoopInfo.h:862
llvm::None
const NoneType None
Definition: None.h:23
LoopInfo.h
computeTripCount
static const SCEV * computeTripCount(const Loop &L, ScalarEvolution &SE)
Compute the trip count for the given loop L.
Definition: LoopCacheAnalysis.cpp:107
llvm::ScalarEvolution::getSCEV
const SCEV * getSCEV(Value *V)
Return a SCEV expression for the full generality of the specified expression.
Definition: ScalarEvolution.cpp:3877
llvm::ScalarEvolution::getSmallConstantTripCount
unsigned getSmallConstantTripCount(const Loop *L)
Returns the maximum trip count of the loop if it is a single-exit loop and we can compute a small max...
Definition: ScalarEvolution.cpp:6819
CacheLineSize
static cl::opt< unsigned > CacheLineSize("ppc-loop-prefetch-cache-line", cl::Hidden, cl::init(64), cl::desc("The loop prefetch cache line size"))
llvm::Loop::isLoopSimplifyForm
bool isLoopSimplifyForm() const
Return true if the Loop is in the form that the LoopSimplify form transforms loops to,...
Definition: LoopInfo.cpp:471
llvm::cl::opt
Definition: CommandLine.h:1419
llvm::SCEV
This class represents an analyzed expression in the program.
Definition: ScalarEvolution.h:78
llvm::DependenceInfo
DependenceInfo - This class is the main dependence-analysis driver.
Definition: DependenceAnalysis.h:272
llvm::getPointerOperand
const Value * getPointerOperand(const Value *V)
A helper function that returns the pointer operand of a load, store or GEP instruction.
Definition: Instructions.h:5271
D
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")
llvm::LPMUpdater
This class provides an interface for updating the loop pass manager based on mutations to the loop ne...
Definition: LoopPassManager.h:243
Addr
uint64_t Addr
Definition: ELFObjHandler.cpp:80
move
compiles ldr LCPI1_0 ldr ldr mov lsr tst moveq r1 ldr LCPI1_1 and r0 bx lr It would be better to do something like to fold the shift into the conditional move
Definition: README.txt:546
llvm::LoopInfoBase::getLoopFor
LoopT * getLoopFor(const BlockT *BB) const
Return the inner most loop that BB lives in.
Definition: LoopInfo.h:963
I
#define I(x, y, z)
Definition: MD5.cpp:59
llvm::IndexedReference
Represents a memory reference as a base pointer and a set of indexing operations.
Definition: LoopCacheAnalysis.h:45
llvm::cl::init
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:440
llvm::SCEVConstant
This class represents a constant integer value.
Definition: ScalarEvolutionExpressions.h:47
assert
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
llvm::LoopBase::getLoopDepth
unsigned getLoopDepth() const
Return the nesting level of this loop.
Definition: LoopInfo.h:96
llvm::IndexedReference::getLastSubscript
const SCEV * getLastSubscript() const
Definition: LoopCacheAnalysis.h:64
llvm::ScalarEvolution::delinearize
void delinearize(const SCEV *Expr, SmallVectorImpl< const SCEV * > &Subscripts, SmallVectorImpl< const SCEV * > &Sizes, const SCEV *ElementSize)
Split this SCEVAddRecExpr into two vectors of SCEVs representing the subscripts and sizes of an array...
Definition: ScalarEvolution.cpp:11875
llvm::SCEVNAryExpr::getNoWrapFlags
NoWrapFlags getNoWrapFlags(NoWrapFlags Mask=NoWrapMask) const
Definition: ScalarEvolutionExpressions.h:215
llvm::LoopInfo
Definition: LoopInfo.h:1079
llvm::ScalarEvolution::getConstant
const SCEV * getConstant(ConstantInt *V)
Definition: ScalarEvolution.cpp:444
llvm::CmpInst::ICMP_ULT
@ ICMP_ULT
unsigned less than
Definition: InstrTypes.h:747
llvm::ConstantInt::isZero
bool isZero() const
This is just a convenience method to make client code smaller for a common code.
Definition: Constants.h:202
llvm::LoopBase::isOutermost
bool isOutermost() const
Return true if the loop does not have a parent (natural) loop.
Definition: LoopInfo.h:168
llvm::ScalarEvolution::getElementSize
const SCEV * getElementSize(Instruction *Inst)
Return the size of an element read or written by Inst.
Definition: ScalarEvolution.cpp:11693
llvm::append_range
void append_range(Container &C, Range &&R)
Wrapper function to append a range to a container.
Definition: STLExtras.h:1683
llvm::LoopStandardAnalysisResults::LI
LoopInfo & LI
Definition: LoopAnalysisManager.h:56
llvm::PICLevel::Level
Level
Definition: CodeGen.h:33
llvm::ConstantInt::getSExtValue
int64_t getSExtValue() const
Return the constant as a 64-bit integer value after it has been sign extended as appropriate for the ...
Definition: Constants.h:152
llvm::ScalarEvolution::isLoopInvariant
bool isLoopInvariant(const SCEV *S, const Loop *L)
Return true if the value of the given SCEV is unchanging in the specified loop.
Definition: ScalarEvolution.cpp:12349
llvm::find_if
auto find_if(R &&Range, UnaryPredicate P)
Provide wrappers to std::find_if which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1525
llvm::TargetTransformInfo::getCacheLineSize
unsigned getCacheLineSize() const
Definition: TargetTransformInfo.cpp:608
llvm::SCEVAddRecExpr::getLoop
const Loop * getLoop() const
Definition: ScalarEvolutionExpressions.h:363
llvm::ScalarEvolution::getMinusSCEV
const SCEV * getMinusSCEV(const SCEV *LHS, const SCEV *RHS, SCEV::NoWrapFlags Flags=SCEV::FlagAnyWrap, unsigned Depth=0)
Return LHS-RHS. Minus is represented in SCEV as A+B*-1.
Definition: ScalarEvolution.cpp:3978
llvm::LoopStandardAnalysisResults::TTI
TargetTransformInfo & TTI
Definition: LoopAnalysisManager.h:59
llvm::PreservedAnalyses::all
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition: PassManager.h:161
llvm::SCEVAddRecExpr
This node represents a polynomial recurrence on the trip count of the specified loop.
Definition: ScalarEvolutionExpressions.h:352
llvm::LoopBase::getHeader
BlockT * getHeader() const
Definition: LoopInfo.h:104
llvm::ScalarEvolution::isSCEVable
bool isSCEVable(Type *Ty) const
Test if values of the given type are analyzable within the SCEV framework.
Definition: ScalarEvolution.cpp:3743
llvm::is_sorted
bool is_sorted(R &&Range, Compare C)
Wrapper function around std::is_sorted to check if elements in a range R are sorted with respect to a...
Definition: STLExtras.h:1569
llvm::SmallVectorImpl::clear
void clear()
Definition: SmallVector.h:585
llvm::LoopStandardAnalysisResults::SE
ScalarEvolution & SE
Definition: LoopAnalysisManager.h:57
llvm::LoopStandardAnalysisResults::AA
AAResults & AA
Definition: LoopAnalysisManager.h:53
ScalarEvolutionExpressions.h
llvm::CacheCostTy
int64_t CacheCostTy
Definition: LoopCacheAnalysis.h:31
llvm::DependenceInfo::depends
std::unique_ptr< Dependence > depends(Instruction *Src, Instruction *Dst, bool PossiblyLoopIndependent)
depends - Tests for a dependence between the Src and Dst instructions.
Definition: DependenceAnalysis.cpp:3481
SmallVector.h
llvm::raw_ostream::indent
raw_ostream & indent(unsigned NumSpaces)
indent - Insert 'NumSpaces' spaces.
Definition: raw_ostream.cpp:497
llvm::Instruction::getParent
const BasicBlock * getParent() const
Definition: Instruction.h:94
llvm::CacheCost
CacheCost represents the estimated cost of a inner loop as the number of cache lines used by the memo...
Definition: LoopCacheAnalysis.h:174
TargetTransformInfo.h
llvm::AAResults::isMustAlias
bool isMustAlias(const MemoryLocation &LocA, const MemoryLocation &LocB)
A trivial helper function to check to see if the specified pointers are must-alias.
Definition: AliasAnalysis.h:460
LoopCacheAnalysis.h
llvm::SCEV::getType
Type * getType() const
Return the LLVM type of this SCEV expression.
Definition: ScalarEvolution.cpp:379
llvm::AnalysisManager
A container for analyses that lazily runs them and caches their results.
Definition: InstructionSimplify.h:43
llvm::ScalarEvolution::getAddExpr
const SCEV * getAddExpr(SmallVectorImpl< const SCEV * > &Ops, SCEV::NoWrapFlags Flags=SCEV::FlagAnyWrap, unsigned Depth=0)
Get a canonical add expression, or something simpler if possible.
Definition: ScalarEvolution.cpp:2292
BB
Common register allocation spilling lr str ldr sxth r3 ldr mla r4 can lr mov lr str ldr sxth r3 mla r4 and then merge mul and lr str ldr sxth r3 mla r4 It also increase the likelihood the store may become dead bb27 Successors according to LLVM BB
Definition: README.txt:39
DependenceAnalysis.h
llvm::cl::desc
Definition: CommandLine.h:411
n
The same transformation can work with an even modulo with the addition of a and shrink the compare RHS by the same amount Unless the target supports that transformation probably isn t worthwhile The transformation can also easily be made to work with non zero equality for n
Definition: README.txt:685
llvm::ScalarEvolution::getBackedgeTakenCount
const SCEV * getBackedgeTakenCount(const Loop *L, ExitCountKind Kind=Exact)
If the specified loop has a predictable backedge-taken count, return it, otherwise return a SCEVCould...
Definition: ScalarEvolution.cpp:6916
llvm::CacheCost::InvalidCost
static constexpr CacheCostTy InvalidCost
Definition: LoopCacheAnalysis.h:180
llvm::ScalarEvolution::isKnownNegative
bool isKnownNegative(const SCEV *S)
Test if the given expression is known to be negative.
Definition: ScalarEvolution.cpp:9432
llvm::ScalarEvolution::getSCEVAtScope
const SCEV * getSCEVAtScope(const SCEV *S, const Loop *L)
Return a SCEV expression for the specified value at the specified scope in the program.
Definition: ScalarEvolution.cpp:8341
llvm::CacheCost::CacheCost
CacheCost(const LoopVectorTy &Loops, const LoopInfo &LI, ScalarEvolution &SE, TargetTransformInfo &TTI, AAResults &AA, DependenceInfo &DI, Optional< unsigned > TRT=None)
Construct a CacheCost object for the loop nest described by Loops.
Definition: LoopCacheAnalysis.cpp:481
llvm::Value
LLVM Value Representation.
Definition: Value.h:75
Debug.h
llvm::SCEVAddRecExpr::getStepRecurrence
const SCEV * getStepRecurrence(ScalarEvolution &SE) const
Constructs and returns the recurrence indicating how much this expression steps by.
Definition: ScalarEvolutionExpressions.h:369
Other
Optional< std::vector< StOtherPiece > > Other
Definition: ELFYAML.cpp:1131