LLVM  14.0.0git
LoopCacheAnalysis.cpp
Go to the documentation of this file.
1 //===- LoopCacheAnalysis.cpp - Loop Cache Analysis -------------------------==//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
6 // See https://llvm.org/LICENSE.txt for license information.
7 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
8 //
9 //===----------------------------------------------------------------------===//
10 ///
11 /// \file
12 /// This file defines the implementation for the loop cache analysis.
13 /// The implementation is largely based on the following paper:
14 ///
15 /// Compiler Optimizations for Improving Data Locality
16 /// By: Steve Carr, Katherine S. McKinley, Chau-Wen Tseng
17 /// http://www.cs.utexas.edu/users/mckinley/papers/asplos-1994.pdf
18 ///
19 /// The general approach taken to estimate the number of cache lines used by the
20 /// memory references in an inner loop is:
21 /// 1. Partition memory references that exhibit temporal or spacial reuse
22 /// into reference groups.
23 /// 2. For each loop L in the a loop nest LN:
24 /// a. Compute the cost of the reference group
25 /// b. Compute the loop cost by summing up the reference groups costs
26 //===----------------------------------------------------------------------===//
27 
30 #include "llvm/ADT/Sequence.h"
31 #include "llvm/ADT/SmallVector.h"
35 #include "llvm/Analysis/LoopInfo.h"
39 #include "llvm/Support/Debug.h"
40 
41 using namespace llvm;
42 
43 #define DEBUG_TYPE "loop-cache-cost"
44 
46  "default-trip-count", cl::init(100), cl::Hidden,
47  cl::desc("Use this to specify the default trip count of a loop"));
48 
49 // In this analysis two array references are considered to exhibit temporal
50 // reuse if they access either the same memory location, or a memory location
51 // with distance smaller than a configurable threshold.
53  "temporal-reuse-threshold", cl::init(2), cl::Hidden,
54  cl::desc("Use this to specify the max. distance between array elements "
55  "accessed in a loop so that the elements are classified to have "
56  "temporal reuse"));
57 
58 /// Retrieve the innermost loop in the given loop nest \p Loops. It returns a
59 /// nullptr if any loops in the loop vector supplied has more than one sibling.
60 /// The loop vector is expected to contain loops collected in breadth-first
61 /// order.
63  assert(!Loops.empty() && "Expecting a non-empy loop vector");
64 
65  Loop *LastLoop = Loops.back();
66  Loop *ParentLoop = LastLoop->getParentLoop();
67 
68  if (ParentLoop == nullptr) {
69  assert(Loops.size() == 1 && "Expecting a single loop");
70  return LastLoop;
71  }
72 
73  return (llvm::is_sorted(Loops,
74  [](const Loop *L1, const Loop *L2) {
75  return L1->getLoopDepth() < L2->getLoopDepth();
76  }))
77  ? LastLoop
78  : nullptr;
79 }
80 
81 static bool isOneDimensionalArray(const SCEV &AccessFn, const SCEV &ElemSize,
82  const Loop &L, ScalarEvolution &SE) {
83  const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(&AccessFn);
84  if (!AR || !AR->isAffine())
85  return false;
86 
87  assert(AR->getLoop() && "AR should have a loop");
88 
89  // Check that start and increment are not add recurrences.
90  const SCEV *Start = AR->getStart();
91  const SCEV *Step = AR->getStepRecurrence(SE);
92  if (isa<SCEVAddRecExpr>(Start) || isa<SCEVAddRecExpr>(Step))
93  return false;
94 
95  // Check that start and increment are both invariant in the loop.
96  if (!SE.isLoopInvariant(Start, &L) || !SE.isLoopInvariant(Step, &L))
97  return false;
98 
99  const SCEV *StepRec = AR->getStepRecurrence(SE);
100  if (StepRec && SE.isKnownNegative(StepRec))
101  StepRec = SE.getNegativeSCEV(StepRec);
102 
103  return StepRec == &ElemSize;
104 }
105 
106 /// Compute the trip count for the given loop \p L. Return the SCEV expression
107 /// for the trip count or nullptr if it cannot be computed.
108 static const SCEV *computeTripCount(const Loop &L, ScalarEvolution &SE) {
109  const SCEV *BackedgeTakenCount = SE.getBackedgeTakenCount(&L);
110  if (isa<SCEVCouldNotCompute>(BackedgeTakenCount) ||
111  !isa<SCEVConstant>(BackedgeTakenCount))
112  return nullptr;
113  return SE.getTripCountFromExitCount(BackedgeTakenCount);
114 }
115 
116 //===----------------------------------------------------------------------===//
117 // IndexedReference implementation
118 //
120  if (!R.IsValid) {
121  OS << R.StoreOrLoadInst;
122  OS << ", IsValid=false.";
123  return OS;
124  }
125 
126  OS << *R.BasePointer;
127  for (const SCEV *Subscript : R.Subscripts)
128  OS << "[" << *Subscript << "]";
129 
130  OS << ", Sizes: ";
131  for (const SCEV *Size : R.Sizes)
132  OS << "[" << *Size << "]";
133 
134  return OS;
135 }
136 
138  const LoopInfo &LI, ScalarEvolution &SE)
139  : StoreOrLoadInst(StoreOrLoadInst), SE(SE) {
140  assert((isa<StoreInst>(StoreOrLoadInst) || isa<LoadInst>(StoreOrLoadInst)) &&
141  "Expecting a load or store instruction");
142 
143  IsValid = delinearize(LI);
144  if (IsValid)
145  LLVM_DEBUG(dbgs().indent(2) << "Succesfully delinearized: " << *this
146  << "\n");
147 }
148 
150  unsigned CLS,
151  AAResults &AA) const {
152  assert(IsValid && "Expecting a valid reference");
153 
154  if (BasePointer != Other.getBasePointer() && !isAliased(Other, AA)) {
155  LLVM_DEBUG(dbgs().indent(2)
156  << "No spacial reuse: different base pointers\n");
157  return false;
158  }
159 
160  unsigned NumSubscripts = getNumSubscripts();
161  if (NumSubscripts != Other.getNumSubscripts()) {
162  LLVM_DEBUG(dbgs().indent(2)
163  << "No spacial reuse: different number of subscripts\n");
164  return false;
165  }
166 
167  // all subscripts must be equal, except the leftmost one (the last one).
168  for (auto SubNum : seq<unsigned>(0, NumSubscripts - 1)) {
169  if (getSubscript(SubNum) != Other.getSubscript(SubNum)) {
170  LLVM_DEBUG(dbgs().indent(2) << "No spacial reuse, different subscripts: "
171  << "\n\t" << *getSubscript(SubNum) << "\n\t"
172  << *Other.getSubscript(SubNum) << "\n");
173  return false;
174  }
175  }
176 
177  // the difference between the last subscripts must be less than the cache line
178  // size.
179  const SCEV *LastSubscript = getLastSubscript();
180  const SCEV *OtherLastSubscript = Other.getLastSubscript();
181  const SCEVConstant *Diff = dyn_cast<SCEVConstant>(
182  SE.getMinusSCEV(LastSubscript, OtherLastSubscript));
183 
184  if (Diff == nullptr) {
185  LLVM_DEBUG(dbgs().indent(2)
186  << "No spacial reuse, difference between subscript:\n\t"
187  << *LastSubscript << "\n\t" << OtherLastSubscript
188  << "\nis not constant.\n");
189  return None;
190  }
191 
192  bool InSameCacheLine = (Diff->getValue()->getSExtValue() < CLS);
193 
194  LLVM_DEBUG({
195  if (InSameCacheLine)
196  dbgs().indent(2) << "Found spacial reuse.\n";
197  else
198  dbgs().indent(2) << "No spacial reuse.\n";
199  });
200 
201  return InSameCacheLine;
202 }
203 
205  unsigned MaxDistance,
206  const Loop &L,
207  DependenceInfo &DI,
208  AAResults &AA) const {
209  assert(IsValid && "Expecting a valid reference");
210 
211  if (BasePointer != Other.getBasePointer() && !isAliased(Other, AA)) {
212  LLVM_DEBUG(dbgs().indent(2)
213  << "No temporal reuse: different base pointer\n");
214  return false;
215  }
216 
217  std::unique_ptr<Dependence> D =
218  DI.depends(&StoreOrLoadInst, &Other.StoreOrLoadInst, true);
219 
220  if (D == nullptr) {
221  LLVM_DEBUG(dbgs().indent(2) << "No temporal reuse: no dependence\n");
222  return false;
223  }
224 
225  if (D->isLoopIndependent()) {
226  LLVM_DEBUG(dbgs().indent(2) << "Found temporal reuse\n");
227  return true;
228  }
229 
230  // Check the dependence distance at every loop level. There is temporal reuse
231  // if the distance at the given loop's depth is small (|d| <= MaxDistance) and
232  // it is zero at every other loop level.
233  int LoopDepth = L.getLoopDepth();
234  int Levels = D->getLevels();
235  for (int Level = 1; Level <= Levels; ++Level) {
236  const SCEV *Distance = D->getDistance(Level);
237  const SCEVConstant *SCEVConst = dyn_cast_or_null<SCEVConstant>(Distance);
238 
239  if (SCEVConst == nullptr) {
240  LLVM_DEBUG(dbgs().indent(2) << "No temporal reuse: distance unknown\n");
241  return None;
242  }
243 
244  const ConstantInt &CI = *SCEVConst->getValue();
245  if (Level != LoopDepth && !CI.isZero()) {
246  LLVM_DEBUG(dbgs().indent(2)
247  << "No temporal reuse: distance is not zero at depth=" << Level
248  << "\n");
249  return false;
250  } else if (Level == LoopDepth && CI.getSExtValue() > MaxDistance) {
251  LLVM_DEBUG(
252  dbgs().indent(2)
253  << "No temporal reuse: distance is greater than MaxDistance at depth="
254  << Level << "\n");
255  return false;
256  }
257  }
258 
259  LLVM_DEBUG(dbgs().indent(2) << "Found temporal reuse\n");
260  return true;
261 }
262 
264  unsigned CLS) const {
265  assert(IsValid && "Expecting a valid reference");
266  LLVM_DEBUG({
267  dbgs().indent(2) << "Computing cache cost for:\n";
268  dbgs().indent(4) << *this << "\n";
269  });
270 
271  // If the indexed reference is loop invariant the cost is one.
272  if (isLoopInvariant(L)) {
273  LLVM_DEBUG(dbgs().indent(4) << "Reference is loop invariant: RefCost=1\n");
274  return 1;
275  }
276 
277  const SCEV *TripCount = computeTripCount(L, SE);
278  if (!TripCount) {
279  LLVM_DEBUG(dbgs() << "Trip count of loop " << L.getName()
280  << " could not be computed, using DefaultTripCount\n");
281  const SCEV *ElemSize = Sizes.back();
282  TripCount = SE.getConstant(ElemSize->getType(), DefaultTripCount);
283  }
284  LLVM_DEBUG(dbgs() << "TripCount=" << *TripCount << "\n");
285 
286  // If the indexed reference is 'consecutive' the cost is
287  // (TripCount*Stride)/CLS, otherwise the cost is TripCount.
288  const SCEV *RefCost = TripCount;
289 
290  if (isConsecutive(L, CLS)) {
291  const SCEV *Coeff = getLastCoefficient();
292  const SCEV *ElemSize = Sizes.back();
293  const SCEV *Stride = SE.getMulExpr(Coeff, ElemSize);
294  Type *WiderType = SE.getWiderType(Stride->getType(), TripCount->getType());
295  const SCEV *CacheLineSize = SE.getConstant(WiderType, CLS);
296  if (SE.isKnownNegative(Stride))
297  Stride = SE.getNegativeSCEV(Stride);
298  Stride = SE.getNoopOrAnyExtend(Stride, WiderType);
299  TripCount = SE.getNoopOrAnyExtend(TripCount, WiderType);
300  const SCEV *Numerator = SE.getMulExpr(Stride, TripCount);
301  RefCost = SE.getUDivExpr(Numerator, CacheLineSize);
302 
303  LLVM_DEBUG(dbgs().indent(4)
304  << "Access is consecutive: RefCost=(TripCount*Stride)/CLS="
305  << *RefCost << "\n");
306  } else
307  LLVM_DEBUG(dbgs().indent(4)
308  << "Access is not consecutive: RefCost=TripCount=" << *RefCost
309  << "\n");
310 
311  // Attempt to fold RefCost into a constant.
312  if (auto ConstantCost = dyn_cast<SCEVConstant>(RefCost))
313  return ConstantCost->getValue()->getSExtValue();
314 
315  LLVM_DEBUG(dbgs().indent(4)
316  << "RefCost is not a constant! Setting to RefCost=InvalidCost "
317  "(invalid value).\n");
318 
319  return CacheCost::InvalidCost;
320 }
321 
322 bool IndexedReference::delinearize(const LoopInfo &LI) {
323  assert(Subscripts.empty() && "Subscripts should be empty");
324  assert(Sizes.empty() && "Sizes should be empty");
325  assert(!IsValid && "Should be called once from the constructor");
326  LLVM_DEBUG(dbgs() << "Delinearizing: " << StoreOrLoadInst << "\n");
327 
328  const SCEV *ElemSize = SE.getElementSize(&StoreOrLoadInst);
329  const BasicBlock *BB = StoreOrLoadInst.getParent();
330 
331  if (Loop *L = LI.getLoopFor(BB)) {
332  const SCEV *AccessFn =
333  SE.getSCEVAtScope(getPointerOperand(&StoreOrLoadInst), L);
334 
335  BasePointer = dyn_cast<SCEVUnknown>(SE.getPointerBase(AccessFn));
336  if (BasePointer == nullptr) {
337  LLVM_DEBUG(
338  dbgs().indent(2)
339  << "ERROR: failed to delinearize, can't identify base pointer\n");
340  return false;
341  }
342 
343  AccessFn = SE.getMinusSCEV(AccessFn, BasePointer);
344 
345  LLVM_DEBUG(dbgs().indent(2) << "In Loop '" << L->getName()
346  << "', AccessFn: " << *AccessFn << "\n");
347 
348  llvm::delinearize(SE, AccessFn, Subscripts, Sizes,
349  SE.getElementSize(&StoreOrLoadInst));
350 
351  if (Subscripts.empty() || Sizes.empty() ||
352  Subscripts.size() != Sizes.size()) {
353  // Attempt to determine whether we have a single dimensional array access.
354  // before giving up.
355  if (!isOneDimensionalArray(*AccessFn, *ElemSize, *L, SE)) {
356  LLVM_DEBUG(dbgs().indent(2)
357  << "ERROR: failed to delinearize reference\n");
358  Subscripts.clear();
359  Sizes.clear();
360  return false;
361  }
362 
363  // The array may be accessed in reverse, for example:
364  // for (i = N; i > 0; i--)
365  // A[i] = 0;
366  // In this case, reconstruct the access function using the absolute value
367  // of the step recurrence.
368  const SCEVAddRecExpr *AccessFnAR = dyn_cast<SCEVAddRecExpr>(AccessFn);
369  const SCEV *StepRec = AccessFnAR ? AccessFnAR->getStepRecurrence(SE) : nullptr;
370 
371  if (StepRec && SE.isKnownNegative(StepRec))
372  AccessFn = SE.getAddRecExpr(AccessFnAR->getStart(),
373  SE.getNegativeSCEV(StepRec),
374  AccessFnAR->getLoop(),
375  AccessFnAR->getNoWrapFlags());
376  const SCEV *Div = SE.getUDivExactExpr(AccessFn, ElemSize);
377  Subscripts.push_back(Div);
378  Sizes.push_back(ElemSize);
379  }
380 
381  return all_of(Subscripts, [&](const SCEV *Subscript) {
382  return isSimpleAddRecurrence(*Subscript, *L);
383  });
384  }
385 
386  return false;
387 }
388 
389 bool IndexedReference::isLoopInvariant(const Loop &L) const {
390  Value *Addr = getPointerOperand(&StoreOrLoadInst);
391  assert(Addr != nullptr && "Expecting either a load or a store instruction");
392  assert(SE.isSCEVable(Addr->getType()) && "Addr should be SCEVable");
393 
394  if (SE.isLoopInvariant(SE.getSCEV(Addr), &L))
395  return true;
396 
397  // The indexed reference is loop invariant if none of the coefficients use
398  // the loop induction variable.
399  bool allCoeffForLoopAreZero = all_of(Subscripts, [&](const SCEV *Subscript) {
400  return isCoeffForLoopZeroOrInvariant(*Subscript, L);
401  });
402 
403  return allCoeffForLoopAreZero;
404 }
405 
406 bool IndexedReference::isConsecutive(const Loop &L, unsigned CLS) const {
407  // The indexed reference is 'consecutive' if the only coefficient that uses
408  // the loop induction variable is the last one...
409  const SCEV *LastSubscript = Subscripts.back();
410  for (const SCEV *Subscript : Subscripts) {
411  if (Subscript == LastSubscript)
412  continue;
413  if (!isCoeffForLoopZeroOrInvariant(*Subscript, L))
414  return false;
415  }
416 
417  // ...and the access stride is less than the cache line size.
418  const SCEV *Coeff = getLastCoefficient();
419  const SCEV *ElemSize = Sizes.back();
420  const SCEV *Stride = SE.getMulExpr(Coeff, ElemSize);
421  const SCEV *CacheLineSize = SE.getConstant(Stride->getType(), CLS);
422 
423  Stride = SE.isKnownNegative(Stride) ? SE.getNegativeSCEV(Stride) : Stride;
425 }
426 
427 const SCEV *IndexedReference::getLastCoefficient() const {
428  const SCEV *LastSubscript = getLastSubscript();
429  auto *AR = cast<SCEVAddRecExpr>(LastSubscript);
430  return AR->getStepRecurrence(SE);
431 }
432 
433 bool IndexedReference::isCoeffForLoopZeroOrInvariant(const SCEV &Subscript,
434  const Loop &L) const {
435  const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(&Subscript);
436  return (AR != nullptr) ? AR->getLoop() != &L
437  : SE.isLoopInvariant(&Subscript, &L);
438 }
439 
440 bool IndexedReference::isSimpleAddRecurrence(const SCEV &Subscript,
441  const Loop &L) const {
442  if (!isa<SCEVAddRecExpr>(Subscript))
443  return false;
444 
445  const SCEVAddRecExpr *AR = cast<SCEVAddRecExpr>(&Subscript);
446  assert(AR->getLoop() && "AR should have a loop");
447 
448  if (!AR->isAffine())
449  return false;
450 
451  const SCEV *Start = AR->getStart();
452  const SCEV *Step = AR->getStepRecurrence(SE);
453 
454  if (!SE.isLoopInvariant(Start, &L) || !SE.isLoopInvariant(Step, &L))
455  return false;
456 
457  return true;
458 }
459 
460 bool IndexedReference::isAliased(const IndexedReference &Other,
461  AAResults &AA) const {
462  const auto &Loc1 = MemoryLocation::get(&StoreOrLoadInst);
463  const auto &Loc2 = MemoryLocation::get(&Other.StoreOrLoadInst);
464  return AA.isMustAlias(Loc1, Loc2);
465 }
466 
467 //===----------------------------------------------------------------------===//
468 // CacheCost implementation
469 //
471  for (const auto &LC : CC.LoopCosts) {
472  const Loop *L = LC.first;
473  OS << "Loop '" << L->getName() << "' has cost = " << LC.second << "\n";
474  }
475  return OS;
476 }
477 
480  AAResults &AA, DependenceInfo &DI,
481  Optional<unsigned> TRT)
482  : Loops(Loops), TripCounts(), LoopCosts(),
483  TRT((TRT == None) ? Optional<unsigned>(TemporalReuseThreshold) : TRT),
484  LI(LI), SE(SE), TTI(TTI), AA(AA), DI(DI) {
485  assert(!Loops.empty() && "Expecting a non-empty loop vector.");
486 
487  for (const Loop *L : Loops) {
488  unsigned TripCount = SE.getSmallConstantTripCount(L);
489  TripCount = (TripCount == 0) ? DefaultTripCount : TripCount;
490  TripCounts.push_back({L, TripCount});
491  }
492 
493  calculateCacheFootprint();
494 }
495 
496 std::unique_ptr<CacheCost>
499  if (!Root.isOutermost()) {
500  LLVM_DEBUG(dbgs() << "Expecting the outermost loop in a loop nest\n");
501  return nullptr;
502  }
503 
504  LoopVectorTy Loops;
506 
507  if (!getInnerMostLoop(Loops)) {
508  LLVM_DEBUG(dbgs() << "Cannot compute cache cost of loop nest with more "
509  "than one innermost loop\n");
510  return nullptr;
511  }
512 
513  return std::make_unique<CacheCost>(Loops, AR.LI, AR.SE, AR.TTI, AR.AA, DI, TRT);
514 }
515 
516 void CacheCost::calculateCacheFootprint() {
517  LLVM_DEBUG(dbgs() << "POPULATING REFERENCE GROUPS\n");
518  ReferenceGroupsTy RefGroups;
519  if (!populateReferenceGroups(RefGroups))
520  return;
521 
522  LLVM_DEBUG(dbgs() << "COMPUTING LOOP CACHE COSTS\n");
523  for (const Loop *L : Loops) {
525  LoopCosts,
526  [L](const LoopCacheCostTy &LCC) { return LCC.first == L; }) &&
527  "Should not add duplicate element");
528  CacheCostTy LoopCost = computeLoopCacheCost(*L, RefGroups);
529  LoopCosts.push_back(std::make_pair(L, LoopCost));
530  }
531 
532  sortLoopCosts();
533  RefGroups.clear();
534 }
535 
536 bool CacheCost::populateReferenceGroups(ReferenceGroupsTy &RefGroups) const {
537  assert(RefGroups.empty() && "Reference groups should be empty");
538 
539  unsigned CLS = TTI.getCacheLineSize();
540  Loop *InnerMostLoop = getInnerMostLoop(Loops);
541  assert(InnerMostLoop != nullptr && "Expecting a valid innermost loop");
542 
543  for (BasicBlock *BB : InnerMostLoop->getBlocks()) {
544  for (Instruction &I : *BB) {
545  if (!isa<StoreInst>(I) && !isa<LoadInst>(I))
546  continue;
547 
548  std::unique_ptr<IndexedReference> R(new IndexedReference(I, LI, SE));
549  if (!R->isValid())
550  continue;
551 
552  bool Added = false;
553  for (ReferenceGroupTy &RefGroup : RefGroups) {
554  const IndexedReference &Representative = *RefGroup.front().get();
555  LLVM_DEBUG({
556  dbgs() << "References:\n";
557  dbgs().indent(2) << *R << "\n";
558  dbgs().indent(2) << Representative << "\n";
559  });
560 
561 
562  // FIXME: Both positive and negative access functions will be placed
563  // into the same reference group, resulting in a bi-directional array
564  // access such as:
565  // for (i = N; i > 0; i--)
566  // A[i] = A[N - i];
567  // having the same cost calculation as a single dimention access pattern
568  // for (i = 0; i < N; i++)
569  // A[i] = A[i];
570  // when in actuality, depending on the array size, the first example
571  // should have a cost closer to 2x the second due to the two cache
572  // access per iteration from opposite ends of the array
573  Optional<bool> HasTemporalReuse =
574  R->hasTemporalReuse(Representative, *TRT, *InnerMostLoop, DI, AA);
575  Optional<bool> HasSpacialReuse =
576  R->hasSpacialReuse(Representative, CLS, AA);
577 
578  if ((HasTemporalReuse.hasValue() && *HasTemporalReuse) ||
579  (HasSpacialReuse.hasValue() && *HasSpacialReuse)) {
580  RefGroup.push_back(std::move(R));
581  Added = true;
582  break;
583  }
584  }
585 
586  if (!Added) {
587  ReferenceGroupTy RG;
588  RG.push_back(std::move(R));
589  RefGroups.push_back(std::move(RG));
590  }
591  }
592  }
593 
594  if (RefGroups.empty())
595  return false;
596 
597  LLVM_DEBUG({
598  dbgs() << "\nIDENTIFIED REFERENCE GROUPS:\n";
599  int n = 1;
600  for (const ReferenceGroupTy &RG : RefGroups) {
601  dbgs().indent(2) << "RefGroup " << n << ":\n";
602  for (const auto &IR : RG)
603  dbgs().indent(4) << *IR << "\n";
604  n++;
605  }
606  dbgs() << "\n";
607  });
608 
609  return true;
610 }
611 
613 CacheCost::computeLoopCacheCost(const Loop &L,
614  const ReferenceGroupsTy &RefGroups) const {
615  if (!L.isLoopSimplifyForm())
616  return InvalidCost;
617 
618  LLVM_DEBUG(dbgs() << "Considering loop '" << L.getName()
619  << "' as innermost loop.\n");
620 
621  // Compute the product of the trip counts of each other loop in the nest.
622  CacheCostTy TripCountsProduct = 1;
623  for (const auto &TC : TripCounts) {
624  if (TC.first == &L)
625  continue;
626  TripCountsProduct *= TC.second;
627  }
628 
629  CacheCostTy LoopCost = 0;
630  for (const ReferenceGroupTy &RG : RefGroups) {
631  CacheCostTy RefGroupCost = computeRefGroupCacheCost(RG, L);
632  LoopCost += RefGroupCost * TripCountsProduct;
633  }
634 
635  LLVM_DEBUG(dbgs().indent(2) << "Loop '" << L.getName()
636  << "' has cost=" << LoopCost << "\n");
637 
638  return LoopCost;
639 }
640 
641 CacheCostTy CacheCost::computeRefGroupCacheCost(const ReferenceGroupTy &RG,
642  const Loop &L) const {
643  assert(!RG.empty() && "Reference group should have at least one member.");
644 
645  const IndexedReference *Representative = RG.front().get();
646  return Representative->computeRefCost(L, TTI.getCacheLineSize());
647 }
648 
649 //===----------------------------------------------------------------------===//
650 // LoopCachePrinterPass implementation
651 //
654  LPMUpdater &U) {
655  Function *F = L.getHeader()->getParent();
656  DependenceInfo DI(F, &AR.AA, &AR.SE, &AR.LI);
657 
658  if (auto CC = CacheCost::getCacheCost(L, AR, DI))
659  OS << *CC;
660 
661  return PreservedAnalyses::all();
662 }
llvm::Check::Size
@ Size
Definition: FileCheck.h:73
llvm::PreservedAnalyses
A set of analyses that are preserved following a run of a transformation pass.
Definition: PassManager.h:155
TemporalReuseThreshold
static cl::opt< unsigned > TemporalReuseThreshold("temporal-reuse-threshold", cl::init(2), cl::Hidden, cl::desc("Use this to specify the max. distance between array elements " "accessed in a loop so that the elements are classified to have " "temporal reuse"))
isOneDimensionalArray
static bool isOneDimensionalArray(const SCEV &AccessFn, const SCEV &ElemSize, const Loop &L, ScalarEvolution &SE)
Definition: LoopCacheAnalysis.cpp:81
llvm::MemoryLocation::get
static MemoryLocation get(const LoadInst *LI)
Return a location with information about the memory reference by the given instruction.
Definition: MemoryLocation.cpp:37
llvm::ScalarEvolution::getNegativeSCEV
const SCEV * getNegativeSCEV(const SCEV *V, SCEV::NoWrapFlags Flags=SCEV::FlagAnyWrap)
Return the SCEV object corresponding to -V.
Definition: ScalarEvolution.cpp:4129
llvm
This file implements support for optimizing divisions by a constant.
Definition: AllocatorList.h:23
llvm::none_of
bool none_of(R &&Range, UnaryPredicate P)
Provide wrappers to std::none_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1565
BreadthFirstIterator.h
llvm::IndexedReference::hasSpacialReuse
Optional< bool > hasSpacialReuse(const IndexedReference &Other, unsigned CLS, AAResults &AA) const
Return true/false if the current object and the indexed reference Other are/aren't in the same cache ...
Definition: LoopCacheAnalysis.cpp:149
llvm::SCEVAddRecExpr::isAffine
bool isAffine() const
Return true if this represents an expression A + B*x where A and B are loop invariant values.
Definition: ScalarEvolutionExpressions.h:379
llvm::BasicBlock::getParent
const Function * getParent() const
Return the enclosing method, or null if none.
Definition: BasicBlock.h:107
llvm::SCEVAddRecExpr::getStart
const SCEV * getStart() const
Definition: ScalarEvolutionExpressions.h:363
llvm::IndexedReference::getSubscript
const SCEV * getSubscript(unsigned SubNum) const
Definition: LoopCacheAnalysis.h:56
llvm::Function
Definition: Function.h:62
llvm::Loop
Represents a single loop in the control flow graph.
Definition: LoopInfo.h:530
Loops
Hexagon Hardware Loops
Definition: HexagonHardwareLoops.cpp:372
llvm::SmallVector
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1168
llvm::ScalarEvolution::getAddRecExpr
const SCEV * getAddRecExpr(const SCEV *Start, const SCEV *Step, const Loop *L, SCEV::NoWrapFlags Flags)
Get an add recurrence expression for the specified loop.
Definition: ScalarEvolution.cpp:3535
llvm::TargetTransformInfo
This pass provides access to the codegen interfaces that are needed for IR-level transformations.
Definition: TargetTransformInfo.h:168
llvm::ScalarEvolution
The main scalar evolution driver.
Definition: ScalarEvolution.h:460
llvm::ScalarEvolution::getNoopOrAnyExtend
const SCEV * getNoopOrAnyExtend(const SCEV *V, Type *Ty)
Return a SCEV corresponding to a conversion of the input value to the specified type.
Definition: ScalarEvolution.cpp:4314
llvm::cl::Hidden
@ Hidden
Definition: CommandLine.h:143
llvm::Type
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
llvm::LoopStandardAnalysisResults
The adaptor from a function pass to a loop pass computes these analyses and makes them available to t...
Definition: LoopAnalysisManager.h:52
getInnerMostLoop
static Loop * getInnerMostLoop(const LoopVectorTy &Loops)
Retrieve the innermost loop in the given loop nest Loops.
Definition: LoopCacheAnalysis.cpp:62
llvm::Optional< bool >
llvm::ScalarEvolution::getUDivExactExpr
const SCEV * getUDivExactExpr(const SCEV *LHS, const SCEV *RHS)
Get a canonical unsigned division expression, or something simpler if possible.
Definition: ScalarEvolution.cpp:3481
llvm::ScalarEvolution::getWiderType
Type * getWiderType(Type *Ty1, Type *Ty2) const
Definition: ScalarEvolution.cpp:3994
llvm::SCEVConstant::getValue
ConstantInt * getValue() const
Definition: ScalarEvolutionExpressions.h:56
Sequence.h
llvm::ScalarEvolution::getPointerBase
const SCEV * getPointerBase(const SCEV *V)
Transitively follow the chain of pointer-type operands until reaching a SCEV that does not have a sin...
Definition: ScalarEvolution.cpp:4381
LLVM_DEBUG
#define LLVM_DEBUG(X)
Definition: Debug.h:101
F
#define F(x, y, z)
Definition: MD5.cpp:56
llvm::RISCVFenceField::R
@ R
Definition: RISCVBaseInfo.h:207
llvm::BasicBlock
LLVM Basic Block Representation.
Definition: BasicBlock.h:58
AliasAnalysis.h
llvm::Optional::hasValue
constexpr bool hasValue() const
Definition: Optional.h:288
llvm::dbgs
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
llvm::ScalarEvolution::getMulExpr
const SCEV * getMulExpr(SmallVectorImpl< const SCEV * > &Ops, SCEV::NoWrapFlags Flags=SCEV::FlagAnyWrap, unsigned Depth=0)
Get a canonical multiply expression, or something simpler if possible.
Definition: ScalarEvolution.cpp:3010
CommandLine.h
llvm::ConstantInt
This is the shared class of boolean and integer constants.
Definition: Constants.h:79
llvm::LoopBase::getParentLoop
LoopT * getParentLoop() const
Return the parent loop if it exists or nullptr for top level loops.
Definition: LoopInfo.h:113
llvm::all_of
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1551
llvm::ScalarEvolution::getTripCountFromExitCount
const SCEV * getTripCountFromExitCount(const SCEV *ExitCount, bool Extend=true)
Convert from an "exit count" (i.e.
Definition: ScalarEvolution.cpp:7213
llvm::CacheCost::getCacheCost
static std::unique_ptr< CacheCost > getCacheCost(Loop &Root, LoopStandardAnalysisResults &AR, DependenceInfo &DI, Optional< unsigned > TRT=None)
Create a CacheCost for the loop nest rooted by Root.
Definition: LoopCacheAnalysis.cpp:497
L2
add sub stmia L5 ldr L2
Definition: README.txt:201
llvm::AAResults
Definition: AliasAnalysis.h:508
DefaultTripCount
static cl::opt< unsigned > DefaultTripCount("default-trip-count", cl::init(100), cl::Hidden, cl::desc("Use this to specify the default trip count of a loop"))
llvm::ScalarEvolution::getUDivExpr
const SCEV * getUDivExpr(const SCEV *LHS, const SCEV *RHS)
Get a canonical unsigned division expression, or something simpler if possible.
Definition: ScalarEvolution.cpp:3304
llvm::IndexedReference::getNumSubscripts
size_t getNumSubscripts() const
Definition: LoopCacheAnalysis.h:55
llvm::breadth_first
iterator_range< bf_iterator< T > > breadth_first(const T &G)
Definition: BreadthFirstIterator.h:157
llvm::LoopCachePrinterPass::run
PreservedAnalyses run(Loop &L, LoopAnalysisManager &AM, LoopStandardAnalysisResults &AR, LPMUpdater &U)
Definition: LoopCacheAnalysis.cpp:652
llvm::LoopBase::getBlocks
ArrayRef< BlockT * > getBlocks() const
Get a list of the basic blocks which make up this loop.
Definition: LoopInfo.h:171
llvm::ScalarEvolution::isKnownPredicate
bool isKnownPredicate(ICmpInst::Predicate Pred, const SCEV *LHS, const SCEV *RHS)
Test if the given expression is known to satisfy the condition described by Pred, LHS,...
Definition: ScalarEvolution.cpp:9885
llvm::Instruction
Definition: Instruction.h:45
llvm::IndexedReference::computeRefCost
CacheCostTy computeRefCost(const Loop &L, unsigned CLS) const
Compute the cost of the reference w.r.t.
Definition: LoopCacheAnalysis.cpp:263
llvm::raw_ostream
This class implements an extremely fast bulk output stream that can only output to a stream.
Definition: raw_ostream.h:53
llvm::IndexedReference::hasTemporalReuse
Optional< bool > hasTemporalReuse(const IndexedReference &Other, unsigned MaxDistance, const Loop &L, DependenceInfo &DI, AAResults &AA) const
Return true if the current object and the indexed reference Other have distance smaller than MaxDista...
Definition: LoopCacheAnalysis.cpp:204
llvm::operator<<
raw_ostream & operator<<(raw_ostream &OS, const APFixedPoint &FX)
Definition: APFixedPoint.h:230
IR
Statically lint checks LLVM IR
Definition: Lint.cpp:744
llvm::IndexedReference::IndexedReference
IndexedReference(Instruction &StoreOrLoadInst, const LoopInfo &LI, ScalarEvolution &SE)
Construct an indexed reference given a StoreOrLoadInst instruction.
Definition: LoopCacheAnalysis.cpp:137
llvm::Loop::getName
StringRef getName() const
Definition: LoopInfo.h:866
llvm::None
const NoneType None
Definition: None.h:23
LoopInfo.h
computeTripCount
static const SCEV * computeTripCount(const Loop &L, ScalarEvolution &SE)
Compute the trip count for the given loop L.
Definition: LoopCacheAnalysis.cpp:108
llvm::ScalarEvolution::getSCEV
const SCEV * getSCEV(Value *V)
Return a SCEV expression for the full generality of the specified expression.
Definition: ScalarEvolution.cpp:4082
llvm::ScalarEvolution::getSmallConstantTripCount
unsigned getSmallConstantTripCount(const Loop *L)
Returns the exact trip count of the loop if we can compute it, and the result is a small constant.
Definition: ScalarEvolution.cpp:7244
CacheLineSize
static cl::opt< unsigned > CacheLineSize("ppc-loop-prefetch-cache-line", cl::Hidden, cl::init(64), cl::desc("The loop prefetch cache line size"))
llvm::Loop::isLoopSimplifyForm
bool isLoopSimplifyForm() const
Return true if the Loop is in the form that the LoopSimplify form transforms loops to,...
Definition: LoopInfo.cpp:478
llvm::cl::opt
Definition: CommandLine.h:1432
llvm::SCEV
This class represents an analyzed expression in the program.
Definition: ScalarEvolution.h:77
llvm::DependenceInfo
DependenceInfo - This class is the main dependence-analysis driver.
Definition: DependenceAnalysis.h:272
llvm::getPointerOperand
const Value * getPointerOperand(const Value *V)
A helper function that returns the pointer operand of a load, store or GEP instruction.
Definition: Instructions.h:5313
D
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")
llvm::LPMUpdater
This class provides an interface for updating the loop pass manager based on mutations to the loop ne...
Definition: LoopPassManager.h:263
Addr
uint64_t Addr
Definition: ELFObjHandler.cpp:80
move
compiles ldr LCPI1_0 ldr ldr mov lsr tst moveq r1 ldr LCPI1_1 and r0 bx lr It would be better to do something like to fold the shift into the conditional move
Definition: README.txt:546
llvm::LoopInfoBase::getLoopFor
LoopT * getLoopFor(const BlockT *BB) const
Return the inner most loop that BB lives in.
Definition: LoopInfo.h:967
I
#define I(x, y, z)
Definition: MD5.cpp:59
llvm::IndexedReference
Represents a memory reference as a base pointer and a set of indexing operations.
Definition: LoopCacheAnalysis.h:45
llvm::cl::init
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:441
llvm::SCEVConstant
This class represents a constant integer value.
Definition: ScalarEvolutionExpressions.h:47
assert
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
llvm::LoopBase::getLoopDepth
unsigned getLoopDepth() const
Return the nesting level of this loop.
Definition: LoopInfo.h:96
llvm::IndexedReference::getLastSubscript
const SCEV * getLastSubscript() const
Definition: LoopCacheAnalysis.h:64
llvm::SCEVNAryExpr::getNoWrapFlags
NoWrapFlags getNoWrapFlags(NoWrapFlags Mask=NoWrapMask) const
Definition: ScalarEvolutionExpressions.h:213
llvm::LoopInfo
Definition: LoopInfo.h:1083
llvm::ScalarEvolution::getConstant
const SCEV * getConstant(ConstantInt *V)
Definition: ScalarEvolution.cpp:444
llvm::CmpInst::ICMP_ULT
@ ICMP_ULT
unsigned less than
Definition: InstrTypes.h:745
llvm::ConstantInt::isZero
bool isZero() const
This is just a convenience method to make client code smaller for a common code.
Definition: Constants.h:194
llvm::LoopBase::isOutermost
bool isOutermost() const
Return true if the loop does not have a parent (natural) loop.
Definition: LoopInfo.h:168
llvm::ScalarEvolution::getElementSize
const SCEV * getElementSize(Instruction *Inst)
Return the size of an element read or written by Inst.
Definition: ScalarEvolution.cpp:12231
llvm::append_range
void append_range(Container &C, Range &&R)
Wrapper function to append a range to a container.
Definition: STLExtras.h:1748
llvm::LoopStandardAnalysisResults::LI
LoopInfo & LI
Definition: LoopAnalysisManager.h:56
llvm::PICLevel::Level
Level
Definition: CodeGen.h:33
llvm::ConstantInt::getSExtValue
int64_t getSExtValue() const
Return the constant as a 64-bit integer value after it has been sign extended as appropriate for the ...
Definition: Constants.h:148
llvm::ScalarEvolution::isLoopInvariant
bool isLoopInvariant(const SCEV *S, const Loop *L)
Return true if the value of the given SCEV is unchanging in the specified loop.
Definition: ScalarEvolution.cpp:12633
llvm::TargetTransformInfo::getCacheLineSize
unsigned getCacheLineSize() const
Definition: TargetTransformInfo.cpp:627
llvm::SCEVAddRecExpr::getLoop
const Loop * getLoop() const
Definition: ScalarEvolutionExpressions.h:364
llvm::delinearize
void delinearize(ScalarEvolution &SE, const SCEV *Expr, SmallVectorImpl< const SCEV * > &Subscripts, SmallVectorImpl< const SCEV * > &Sizes, const SCEV *ElementSize)
Split this SCEVAddRecExpr into two vectors of SCEVs representing the subscripts and sizes of an array...
Definition: Delinearization.cpp:452
llvm::ScalarEvolution::getMinusSCEV
const SCEV * getMinusSCEV(const SCEV *LHS, const SCEV *RHS, SCEV::NoWrapFlags Flags=SCEV::FlagAnyWrap, unsigned Depth=0)
Return LHS-RHS.
Definition: ScalarEvolution.cpp:4215
llvm::LoopStandardAnalysisResults::TTI
TargetTransformInfo & TTI
Definition: LoopAnalysisManager.h:59
llvm::PreservedAnalyses::all
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition: PassManager.h:161
llvm::SCEVAddRecExpr
This node represents a polynomial recurrence on the trip count of the specified loop.
Definition: ScalarEvolutionExpressions.h:352
llvm::LoopBase::getHeader
BlockT * getHeader() const
Definition: LoopInfo.h:104
llvm::ScalarEvolution::isSCEVable
bool isSCEVable(Type *Ty) const
Test if values of the given type are analyzable within the SCEV framework.
Definition: ScalarEvolution.cpp:3966
llvm::is_sorted
bool is_sorted(R &&Range, Compare C)
Wrapper function around std::is_sorted to check if elements in a range R are sorted with respect to a...
Definition: STLExtras.h:1622
llvm::SmallVectorImpl::clear
void clear()
Definition: SmallVector.h:585
llvm::LoopStandardAnalysisResults::SE
ScalarEvolution & SE
Definition: LoopAnalysisManager.h:57
llvm::LoopStandardAnalysisResults::AA
AAResults & AA
Definition: LoopAnalysisManager.h:53
ScalarEvolutionExpressions.h
llvm::CacheCostTy
int64_t CacheCostTy
Definition: LoopCacheAnalysis.h:31
llvm::DependenceInfo::depends
std::unique_ptr< Dependence > depends(Instruction *Src, Instruction *Dst, bool PossiblyLoopIndependent)
depends - Tests for a dependence between the Src and Dst instructions.
Definition: DependenceAnalysis.cpp:3525
SmallVector.h
llvm::raw_ostream::indent
raw_ostream & indent(unsigned NumSpaces)
indent - Insert 'NumSpaces' spaces.
Definition: raw_ostream.cpp:497
llvm::Instruction::getParent
const BasicBlock * getParent() const
Definition: Instruction.h:94
llvm::CacheCost
CacheCost represents the estimated cost of a inner loop as the number of cache lines used by the memo...
Definition: LoopCacheAnalysis.h:174
TargetTransformInfo.h
Delinearization.h
llvm::AAResults::isMustAlias
bool isMustAlias(const MemoryLocation &LocA, const MemoryLocation &LocB)
A trivial helper function to check to see if the specified pointers are must-alias.
Definition: AliasAnalysis.h:580
LoopCacheAnalysis.h
llvm::SCEV::getType
Type * getType() const
Return the LLVM type of this SCEV expression.
Definition: ScalarEvolution.cpp:377
llvm::AnalysisManager
A container for analyses that lazily runs them and caches their results.
Definition: InstructionSimplify.h:44
BB
Common register allocation spilling lr str ldr sxth r3 ldr mla r4 can lr mov lr str ldr sxth r3 mla r4 and then merge mul and lr str ldr sxth r3 mla r4 It also increase the likelihood the store may become dead bb27 Successors according to LLVM BB
Definition: README.txt:39
DependenceAnalysis.h
llvm::cl::desc
Definition: CommandLine.h:412
n
The same transformation can work with an even modulo with the addition of a and shrink the compare RHS by the same amount Unless the target supports that transformation probably isn t worthwhile The transformation can also easily be made to work with non zero equality for n
Definition: README.txt:685
llvm::ScalarEvolution::getBackedgeTakenCount
const SCEV * getBackedgeTakenCount(const Loop *L, ExitCountKind Kind=Exact)
If the specified loop has a predictable backedge-taken count, return it, otherwise return a SCEVCould...
Definition: ScalarEvolution.cpp:7349
llvm::CacheCost::InvalidCost
static constexpr CacheCostTy InvalidCost
Definition: LoopCacheAnalysis.h:180
llvm::ScalarEvolution::isKnownNegative
bool isKnownNegative(const SCEV *S)
Test if the given expression is known to be negative.
Definition: ScalarEvolution.cpp:9802
llvm::ScalarEvolution::getSCEVAtScope
const SCEV * getSCEVAtScope(const SCEV *S, const Loop *L)
Return a SCEV expression for the specified value at the specified scope in the program.
Definition: ScalarEvolution.cpp:8707
llvm::CacheCost::CacheCost
CacheCost(const LoopVectorTy &Loops, const LoopInfo &LI, ScalarEvolution &SE, TargetTransformInfo &TTI, AAResults &AA, DependenceInfo &DI, Optional< unsigned > TRT=None)
Construct a CacheCost object for the loop nest described by Loops.
Definition: LoopCacheAnalysis.cpp:478
llvm::Value
LLVM Value Representation.
Definition: Value.h:74
Debug.h
llvm::SCEVAddRecExpr::getStepRecurrence
const SCEV * getStepRecurrence(ScalarEvolution &SE) const
Constructs and returns the recurrence indicating how much this expression steps by.
Definition: ScalarEvolutionExpressions.h:370
Other
Optional< std::vector< StOtherPiece > > Other
Definition: ELFYAML.cpp:1184