LLVM  10.0.0svn
LoopCacheAnalysis.cpp
Go to the documentation of this file.
1 //===- LoopCacheAnalysis.cpp - Loop Cache Analysis -------------------------==//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
6 // See https://llvm.org/LICENSE.txt for license information.
7 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
8 //
9 //===----------------------------------------------------------------------===//
10 ///
11 /// \file
12 /// This file defines the implementation for the loop cache analysis.
13 /// The implementation is largely based on the following paper:
14 ///
15 /// Compiler Optimizations for Improving Data Locality
16 /// By: Steve Carr, Katherine S. McKinley, Chau-Wen Tseng
17 /// http://www.cs.utexas.edu/users/mckinley/papers/asplos-1994.pdf
18 ///
19 /// The general approach taken to estimate the number of cache lines used by the
20 /// memory references in an inner loop is:
21 /// 1. Partition memory references that exhibit temporal or spacial reuse
22 /// into reference groups.
23 /// 2. For each loop L in the a loop nest LN:
24 /// a. Compute the cost of the reference group
25 /// b. Compute the loop cost by summing up the reference groups costs
26 //===----------------------------------------------------------------------===//
27 
30 #include "llvm/ADT/Sequence.h"
31 #include "llvm/ADT/SmallVector.h"
32 #include "llvm/Support/Debug.h"
33 
34 using namespace llvm;
35 
36 #define DEBUG_TYPE "loop-cache-cost"
37 
39  "default-trip-count", cl::init(100), cl::Hidden,
40  cl::desc("Use this to specify the default trip count of a loop"));
41 
42 // In this analysis two array references are considered to exhibit temporal
43 // reuse if they access either the same memory location, or a memory location
44 // with distance smaller than a configurable threshold.
46  "temporal-reuse-threshold", cl::init(2), cl::Hidden,
47  cl::desc("Use this to specify the max. distance between array elements "
48  "accessed in a loop so that the elements are classified to have "
49  "temporal reuse"));
50 
51 /// Retrieve the innermost loop in the given loop nest \p Loops. It returns a
52 /// nullptr if any loops in the loop vector supplied has more than one sibling.
53 /// The loop vector is expected to contain loops collected in breadth-first
54 /// order.
56  assert(!Loops.empty() && "Expecting a non-empy loop vector");
57 
58  Loop *LastLoop = Loops.back();
59  Loop *ParentLoop = LastLoop->getParentLoop();
60 
61  if (ParentLoop == nullptr) {
62  assert(Loops.size() == 1 && "Expecting a single loop");
63  return LastLoop;
64  }
65 
66  return (std::is_sorted(Loops.begin(), Loops.end(),
67  [](const Loop *L1, const Loop *L2) {
68  return L1->getLoopDepth() < L2->getLoopDepth();
69  }))
70  ? LastLoop
71  : nullptr;
72 }
73 
74 static bool isOneDimensionalArray(const SCEV &AccessFn, const SCEV &ElemSize,
75  const Loop &L, ScalarEvolution &SE) {
76  const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(&AccessFn);
77  if (!AR || !AR->isAffine())
78  return false;
79 
80  assert(AR->getLoop() && "AR should have a loop");
81 
82  // Check that start and increment are not add recurrences.
83  const SCEV *Start = AR->getStart();
84  const SCEV *Step = AR->getStepRecurrence(SE);
85  if (isa<SCEVAddRecExpr>(Start) || isa<SCEVAddRecExpr>(Step))
86  return false;
87 
88  // Check that start and increment are both invariant in the loop.
89  if (!SE.isLoopInvariant(Start, &L) || !SE.isLoopInvariant(Step, &L))
90  return false;
91 
92  return AR->getStepRecurrence(SE) == &ElemSize;
93 }
94 
95 /// Compute the trip count for the given loop \p L. Return the SCEV expression
96 /// for the trip count or nullptr if it cannot be computed.
97 static const SCEV *computeTripCount(const Loop &L, ScalarEvolution &SE) {
98  const SCEV *BackedgeTakenCount = SE.getBackedgeTakenCount(&L);
99  if (isa<SCEVCouldNotCompute>(BackedgeTakenCount) ||
100  !isa<SCEVConstant>(BackedgeTakenCount))
101  return nullptr;
102 
103  return SE.getAddExpr(BackedgeTakenCount,
104  SE.getOne(BackedgeTakenCount->getType()));
105 }
106 
107 //===----------------------------------------------------------------------===//
108 // IndexedReference implementation
109 //
111  if (!R.IsValid) {
112  OS << R.StoreOrLoadInst;
113  OS << ", IsValid=false.";
114  return OS;
115  }
116 
117  OS << *R.BasePointer;
118  for (const SCEV *Subscript : R.Subscripts)
119  OS << "[" << *Subscript << "]";
120 
121  OS << ", Sizes: ";
122  for (const SCEV *Size : R.Sizes)
123  OS << "[" << *Size << "]";
124 
125  return OS;
126 }
127 
129  const LoopInfo &LI, ScalarEvolution &SE)
130  : StoreOrLoadInst(StoreOrLoadInst), SE(SE) {
131  assert((isa<StoreInst>(StoreOrLoadInst) || isa<LoadInst>(StoreOrLoadInst)) &&
132  "Expecting a load or store instruction");
133 
134  IsValid = delinearize(LI);
135  if (IsValid)
136  LLVM_DEBUG(dbgs().indent(2) << "Succesfully delinearized: " << *this
137  << "\n");
138 }
139 
141  unsigned CLS,
142  AliasAnalysis &AA) const {
143  assert(IsValid && "Expecting a valid reference");
144 
145  if (BasePointer != Other.getBasePointer() && !isAliased(Other, AA)) {
146  LLVM_DEBUG(dbgs().indent(2)
147  << "No spacial reuse: different base pointers\n");
148  return false;
149  }
150 
151  unsigned NumSubscripts = getNumSubscripts();
152  if (NumSubscripts != Other.getNumSubscripts()) {
153  LLVM_DEBUG(dbgs().indent(2)
154  << "No spacial reuse: different number of subscripts\n");
155  return false;
156  }
157 
158  // all subscripts must be equal, except the leftmost one (the last one).
159  for (auto SubNum : seq<unsigned>(0, NumSubscripts - 1)) {
160  if (getSubscript(SubNum) != Other.getSubscript(SubNum)) {
161  LLVM_DEBUG(dbgs().indent(2) << "No spacial reuse, different subscripts: "
162  << "\n\t" << *getSubscript(SubNum) << "\n\t"
163  << *Other.getSubscript(SubNum) << "\n");
164  return false;
165  }
166  }
167 
168  // the difference between the last subscripts must be less than the cache line
169  // size.
170  const SCEV *LastSubscript = getLastSubscript();
171  const SCEV *OtherLastSubscript = Other.getLastSubscript();
172  const SCEVConstant *Diff = dyn_cast<SCEVConstant>(
173  SE.getMinusSCEV(LastSubscript, OtherLastSubscript));
174 
175  if (Diff == nullptr) {
176  LLVM_DEBUG(dbgs().indent(2)
177  << "No spacial reuse, difference between subscript:\n\t"
178  << *LastSubscript << "\n\t" << OtherLastSubscript
179  << "\nis not constant.\n");
180  return None;
181  }
182 
183  bool InSameCacheLine = (Diff->getValue()->getSExtValue() < CLS);
184 
185  LLVM_DEBUG({
186  if (InSameCacheLine)
187  dbgs().indent(2) << "Found spacial reuse.\n";
188  else
189  dbgs().indent(2) << "No spacial reuse.\n";
190  });
191 
192  return InSameCacheLine;
193 }
194 
196  unsigned MaxDistance,
197  const Loop &L,
198  DependenceInfo &DI,
199  AliasAnalysis &AA) const {
200  assert(IsValid && "Expecting a valid reference");
201 
202  if (BasePointer != Other.getBasePointer() && !isAliased(Other, AA)) {
203  LLVM_DEBUG(dbgs().indent(2)
204  << "No temporal reuse: different base pointer\n");
205  return false;
206  }
207 
208  std::unique_ptr<Dependence> D =
209  DI.depends(&StoreOrLoadInst, &Other.StoreOrLoadInst, true);
210 
211  if (D == nullptr) {
212  LLVM_DEBUG(dbgs().indent(2) << "No temporal reuse: no dependence\n");
213  return false;
214  }
215 
216  if (D->isLoopIndependent()) {
217  LLVM_DEBUG(dbgs().indent(2) << "Found temporal reuse\n");
218  return true;
219  }
220 
221  // Check the dependence distance at every loop level. There is temporal reuse
222  // if the distance at the given loop's depth is small (|d| <= MaxDistance) and
223  // it is zero at every other loop level.
224  int LoopDepth = L.getLoopDepth();
225  int Levels = D->getLevels();
226  for (int Level = 1; Level <= Levels; ++Level) {
227  const SCEV *Distance = D->getDistance(Level);
228  const SCEVConstant *SCEVConst = dyn_cast_or_null<SCEVConstant>(Distance);
229 
230  if (SCEVConst == nullptr) {
231  LLVM_DEBUG(dbgs().indent(2) << "No temporal reuse: distance unknown\n");
232  return None;
233  }
234 
235  const ConstantInt &CI = *SCEVConst->getValue();
236  if (Level != LoopDepth && !CI.isZero()) {
237  LLVM_DEBUG(dbgs().indent(2)
238  << "No temporal reuse: distance is not zero at depth=" << Level
239  << "\n");
240  return false;
241  } else if (Level == LoopDepth && CI.getSExtValue() > MaxDistance) {
242  LLVM_DEBUG(
243  dbgs().indent(2)
244  << "No temporal reuse: distance is greater than MaxDistance at depth="
245  << Level << "\n");
246  return false;
247  }
248  }
249 
250  LLVM_DEBUG(dbgs().indent(2) << "Found temporal reuse\n");
251  return true;
252 }
253 
255  unsigned CLS) const {
256  assert(IsValid && "Expecting a valid reference");
257  LLVM_DEBUG({
258  dbgs().indent(2) << "Computing cache cost for:\n";
259  dbgs().indent(4) << *this << "\n";
260  });
261 
262  // If the indexed reference is loop invariant the cost is one.
263  if (isLoopInvariant(L)) {
264  LLVM_DEBUG(dbgs().indent(4) << "Reference is loop invariant: RefCost=1\n");
265  return 1;
266  }
267 
268  const SCEV *TripCount = computeTripCount(L, SE);
269  if (!TripCount) {
270  LLVM_DEBUG(dbgs() << "Trip count of loop " << L.getName()
271  << " could not be computed, using DefaultTripCount\n");
272  const SCEV *ElemSize = Sizes.back();
273  TripCount = SE.getConstant(ElemSize->getType(), DefaultTripCount);
274  }
275  LLVM_DEBUG(dbgs() << "TripCount=" << *TripCount << "\n");
276 
277  // If the indexed reference is 'consecutive' the cost is
278  // (TripCount*Stride)/CLS, otherwise the cost is TripCount.
279  const SCEV *RefCost = TripCount;
280 
281  if (isConsecutive(L, CLS)) {
282  const SCEV *Coeff = getLastCoefficient();
283  const SCEV *ElemSize = Sizes.back();
284  const SCEV *Stride = SE.getMulExpr(Coeff, ElemSize);
285  const SCEV *CacheLineSize = SE.getConstant(Stride->getType(), CLS);
286  const SCEV *Numerator = SE.getMulExpr(Stride, TripCount);
287  RefCost = SE.getUDivExpr(Numerator, CacheLineSize);
288  LLVM_DEBUG(dbgs().indent(4)
289  << "Access is consecutive: RefCost=(TripCount*Stride)/CLS="
290  << *RefCost << "\n");
291  } else
292  LLVM_DEBUG(dbgs().indent(4)
293  << "Access is not consecutive: RefCost=TripCount=" << *RefCost
294  << "\n");
295 
296  // Attempt to fold RefCost into a constant.
297  if (auto ConstantCost = dyn_cast<SCEVConstant>(RefCost))
298  return ConstantCost->getValue()->getSExtValue();
299 
300  LLVM_DEBUG(dbgs().indent(4)
301  << "RefCost is not a constant! Setting to RefCost=InvalidCost "
302  "(invalid value).\n");
303 
304  return CacheCost::InvalidCost;
305 }
306 
307 bool IndexedReference::delinearize(const LoopInfo &LI) {
308  assert(Subscripts.empty() && "Subscripts should be empty");
309  assert(Sizes.empty() && "Sizes should be empty");
310  assert(!IsValid && "Should be called once from the constructor");
311  LLVM_DEBUG(dbgs() << "Delinearizing: " << StoreOrLoadInst << "\n");
312 
313  const SCEV *ElemSize = SE.getElementSize(&StoreOrLoadInst);
314  const BasicBlock *BB = StoreOrLoadInst.getParent();
315 
316  for (Loop *L = LI.getLoopFor(BB); L != nullptr; L = L->getParentLoop()) {
317  const SCEV *AccessFn =
318  SE.getSCEVAtScope(getPointerOperand(&StoreOrLoadInst), L);
319 
320  BasePointer = dyn_cast<SCEVUnknown>(SE.getPointerBase(AccessFn));
321  if (BasePointer == nullptr) {
322  LLVM_DEBUG(
323  dbgs().indent(2)
324  << "ERROR: failed to delinearize, can't identify base pointer\n");
325  return false;
326  }
327 
328  AccessFn = SE.getMinusSCEV(AccessFn, BasePointer);
329 
330  LLVM_DEBUG(dbgs().indent(2) << "In Loop '" << L->getName()
331  << "', AccessFn: " << *AccessFn << "\n");
332 
333  SE.delinearize(AccessFn, Subscripts, Sizes,
334  SE.getElementSize(&StoreOrLoadInst));
335 
336  if (Subscripts.empty() || Sizes.empty() ||
337  Subscripts.size() != Sizes.size()) {
338  // Attempt to determine whether we have a single dimensional array access.
339  // before giving up.
340  if (!isOneDimensionalArray(*AccessFn, *ElemSize, *L, SE)) {
341  LLVM_DEBUG(dbgs().indent(2)
342  << "ERROR: failed to delinearize reference\n");
343  Subscripts.clear();
344  Sizes.clear();
345  break;
346  }
347 
348  const SCEV *Div = SE.getUDivExactExpr(AccessFn, ElemSize);
349  Subscripts.push_back(Div);
350  Sizes.push_back(ElemSize);
351  }
352 
353  return all_of(Subscripts, [&](const SCEV *Subscript) {
354  return isSimpleAddRecurrence(*Subscript, *L);
355  });
356  }
357 
358  return false;
359 }
360 
361 bool IndexedReference::isLoopInvariant(const Loop &L) const {
362  Value *Addr = getPointerOperand(&StoreOrLoadInst);
363  assert(Addr != nullptr && "Expecting either a load or a store instruction");
364  assert(SE.isSCEVable(Addr->getType()) && "Addr should be SCEVable");
365 
366  if (SE.isLoopInvariant(SE.getSCEV(Addr), &L))
367  return true;
368 
369  // The indexed reference is loop invariant if none of the coefficients use
370  // the loop induction variable.
371  bool allCoeffForLoopAreZero = all_of(Subscripts, [&](const SCEV *Subscript) {
372  return isCoeffForLoopZeroOrInvariant(*Subscript, L);
373  });
374 
375  return allCoeffForLoopAreZero;
376 }
377 
378 bool IndexedReference::isConsecutive(const Loop &L, unsigned CLS) const {
379  // The indexed reference is 'consecutive' if the only coefficient that uses
380  // the loop induction variable is the last one...
381  const SCEV *LastSubscript = Subscripts.back();
382  for (const SCEV *Subscript : Subscripts) {
383  if (Subscript == LastSubscript)
384  continue;
385  if (!isCoeffForLoopZeroOrInvariant(*Subscript, L))
386  return false;
387  }
388 
389  // ...and the access stride is less than the cache line size.
390  const SCEV *Coeff = getLastCoefficient();
391  const SCEV *ElemSize = Sizes.back();
392  const SCEV *Stride = SE.getMulExpr(Coeff, ElemSize);
393  const SCEV *CacheLineSize = SE.getConstant(Stride->getType(), CLS);
394 
395  return SE.isKnownPredicate(ICmpInst::ICMP_ULT, Stride, CacheLineSize);
396 }
397 
398 const SCEV *IndexedReference::getLastCoefficient() const {
399  const SCEV *LastSubscript = getLastSubscript();
400  assert(isa<SCEVAddRecExpr>(LastSubscript) &&
401  "Expecting a SCEV add recurrence expression");
402  const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(LastSubscript);
403  return AR->getStepRecurrence(SE);
404 }
405 
406 bool IndexedReference::isCoeffForLoopZeroOrInvariant(const SCEV &Subscript,
407  const Loop &L) const {
408  const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(&Subscript);
409  return (AR != nullptr) ? AR->getLoop() != &L
410  : SE.isLoopInvariant(&Subscript, &L);
411 }
412 
413 bool IndexedReference::isSimpleAddRecurrence(const SCEV &Subscript,
414  const Loop &L) const {
415  if (!isa<SCEVAddRecExpr>(Subscript))
416  return false;
417 
418  const SCEVAddRecExpr *AR = cast<SCEVAddRecExpr>(&Subscript);
419  assert(AR->getLoop() && "AR should have a loop");
420 
421  if (!AR->isAffine())
422  return false;
423 
424  const SCEV *Start = AR->getStart();
425  const SCEV *Step = AR->getStepRecurrence(SE);
426 
427  if (!SE.isLoopInvariant(Start, &L) || !SE.isLoopInvariant(Step, &L))
428  return false;
429 
430  return true;
431 }
432 
433 bool IndexedReference::isAliased(const IndexedReference &Other,
434  AliasAnalysis &AA) const {
435  const auto &Loc1 = MemoryLocation::get(&StoreOrLoadInst);
436  const auto &Loc2 = MemoryLocation::get(&Other.StoreOrLoadInst);
437  return AA.isMustAlias(Loc1, Loc2);
438 }
439 
440 //===----------------------------------------------------------------------===//
441 // CacheCost implementation
442 //
444  for (const auto &LC : CC.LoopCosts) {
445  const Loop *L = LC.first;
446  OS << "Loop '" << L->getName() << "' has cost = " << LC.second << "\n";
447  }
448  return OS;
449 }
450 
453  AliasAnalysis &AA, DependenceInfo &DI,
454  Optional<unsigned> TRT)
455  : Loops(Loops), TripCounts(), LoopCosts(),
456  TRT(TRT == None ? Optional<unsigned>(TemporalReuseThreshold) : TRT),
457  LI(LI), SE(SE), TTI(TTI), AA(AA), DI(DI) {
458  assert(!Loops.empty() && "Expecting a non-empty loop vector.");
459 
460  for (const Loop *L : Loops) {
461  unsigned TripCount = SE.getSmallConstantTripCount(L);
462  TripCount = (TripCount == 0) ? DefaultTripCount : TripCount;
463  TripCounts.push_back({L, TripCount});
464  }
465 
466  calculateCacheFootprint();
467 }
468 
469 std::unique_ptr<CacheCost>
472  if (Root.getParentLoop()) {
473  LLVM_DEBUG(dbgs() << "Expecting the outermost loop in a loop nest\n");
474  return nullptr;
475  }
476 
477  LoopVectorTy Loops;
478  for (Loop *L : breadth_first(&Root))
479  Loops.push_back(L);
480 
481  if (!getInnerMostLoop(Loops)) {
482  LLVM_DEBUG(dbgs() << "Cannot compute cache cost of loop nest with more "
483  "than one innermost loop\n");
484  return nullptr;
485  }
486 
487  return std::make_unique<CacheCost>(Loops, AR.LI, AR.SE, AR.TTI, AR.AA, DI, TRT);
488 }
489 
490 void CacheCost::calculateCacheFootprint() {
491  LLVM_DEBUG(dbgs() << "POPULATING REFERENCE GROUPS\n");
492  ReferenceGroupsTy RefGroups;
493  if (!populateReferenceGroups(RefGroups))
494  return;
495 
496  LLVM_DEBUG(dbgs() << "COMPUTING LOOP CACHE COSTS\n");
497  for (const Loop *L : Loops) {
498  assert((std::find_if(LoopCosts.begin(), LoopCosts.end(),
499  [L](const LoopCacheCostTy &LCC) {
500  return LCC.first == L;
501  }) == LoopCosts.end()) &&
502  "Should not add duplicate element");
503  CacheCostTy LoopCost = computeLoopCacheCost(*L, RefGroups);
504  LoopCosts.push_back(std::make_pair(L, LoopCost));
505  }
506 
507  sortLoopCosts();
508  RefGroups.clear();
509 }
510 
511 bool CacheCost::populateReferenceGroups(ReferenceGroupsTy &RefGroups) const {
512  assert(RefGroups.empty() && "Reference groups should be empty");
513 
514  unsigned CLS = TTI.getCacheLineSize();
515  Loop *InnerMostLoop = getInnerMostLoop(Loops);
516  assert(InnerMostLoop != nullptr && "Expecting a valid innermost loop");
517 
518  for (BasicBlock *BB : InnerMostLoop->getBlocks()) {
519  for (Instruction &I : *BB) {
520  if (!isa<StoreInst>(I) && !isa<LoadInst>(I))
521  continue;
522 
523  std::unique_ptr<IndexedReference> R(new IndexedReference(I, LI, SE));
524  if (!R->isValid())
525  continue;
526 
527  bool Added = false;
528  for (ReferenceGroupTy &RefGroup : RefGroups) {
529  const IndexedReference &Representative = *RefGroup.front().get();
530  LLVM_DEBUG({
531  dbgs() << "References:\n";
532  dbgs().indent(2) << *R << "\n";
533  dbgs().indent(2) << Representative << "\n";
534  });
535 
536  Optional<bool> HasTemporalReuse =
537  R->hasTemporalReuse(Representative, *TRT, *InnerMostLoop, DI, AA);
538  Optional<bool> HasSpacialReuse =
539  R->hasSpacialReuse(Representative, CLS, AA);
540 
541  if ((HasTemporalReuse.hasValue() && *HasTemporalReuse) ||
542  (HasSpacialReuse.hasValue() && *HasSpacialReuse)) {
543  RefGroup.push_back(std::move(R));
544  Added = true;
545  break;
546  }
547  }
548 
549  if (!Added) {
550  ReferenceGroupTy RG;
551  RG.push_back(std::move(R));
552  RefGroups.push_back(std::move(RG));
553  }
554  }
555  }
556 
557  if (RefGroups.empty())
558  return false;
559 
560  LLVM_DEBUG({
561  dbgs() << "\nIDENTIFIED REFERENCE GROUPS:\n";
562  int n = 1;
563  for (const ReferenceGroupTy &RG : RefGroups) {
564  dbgs().indent(2) << "RefGroup " << n << ":\n";
565  for (const auto &IR : RG)
566  dbgs().indent(4) << *IR << "\n";
567  n++;
568  }
569  dbgs() << "\n";
570  });
571 
572  return true;
573 }
574 
576 CacheCost::computeLoopCacheCost(const Loop &L,
577  const ReferenceGroupsTy &RefGroups) const {
578  if (!L.isLoopSimplifyForm())
579  return InvalidCost;
580 
581  LLVM_DEBUG(dbgs() << "Considering loop '" << L.getName()
582  << "' as innermost loop.\n");
583 
584  // Compute the product of the trip counts of each other loop in the nest.
585  CacheCostTy TripCountsProduct = 1;
586  for (const auto &TC : TripCounts) {
587  if (TC.first == &L)
588  continue;
589  TripCountsProduct *= TC.second;
590  }
591 
592  CacheCostTy LoopCost = 0;
593  for (const ReferenceGroupTy &RG : RefGroups) {
594  CacheCostTy RefGroupCost = computeRefGroupCacheCost(RG, L);
595  LoopCost += RefGroupCost * TripCountsProduct;
596  }
597 
598  LLVM_DEBUG(dbgs().indent(2) << "Loop '" << L.getName()
599  << "' has cost=" << LoopCost << "\n");
600 
601  return LoopCost;
602 }
603 
604 CacheCostTy CacheCost::computeRefGroupCacheCost(const ReferenceGroupTy &RG,
605  const Loop &L) const {
606  assert(!RG.empty() && "Reference group should have at least one member.");
607 
608  const IndexedReference *Representative = RG.front().get();
609  return Representative->computeRefCost(L, TTI.getCacheLineSize());
610 }
611 
612 //===----------------------------------------------------------------------===//
613 // LoopCachePrinterPass implementation
614 //
617  LPMUpdater &U) {
618  Function *F = L.getHeader()->getParent();
619  DependenceInfo DI(F, &AR.AA, &AR.SE, &AR.LI);
620 
621  if (auto CC = CacheCost::getCacheCost(L, AR, DI))
622  OS << *CC;
623 
624  return PreservedAnalyses::all();
625 }
unsigned getSmallConstantTripCount(const Loop *L)
Returns the maximum trip count of the loop if it is a single-exit loop and we can compute a small max...
This routine provides some synthesis utilities to produce sequences of values.
static std::unique_ptr< CacheCost > getCacheCost(Loop &Root, LoopStandardAnalysisResults &AR, DependenceInfo &DI, Optional< unsigned > TRT=None)
Create a CacheCost for the loop nest rooted by Root.
const SCEV * getConstant(ConstantInt *V)
This class represents lattice values for constants.
Definition: AllocatorList.h:23
void delinearize(const SCEV *Expr, SmallVectorImpl< const SCEV *> &Subscripts, SmallVectorImpl< const SCEV *> &Sizes, const SCEV *ElementSize)
Split this SCEVAddRecExpr into two vectors of SCEVs representing the subscripts and sizes of an array...
int64_t CacheCostTy
static cl::opt< unsigned > TemporalReuseThreshold("temporal-reuse-threshold", cl::init(2), cl::Hidden, cl::desc("Use this to specify the max. distance between array elements " "accessed in a loop so that the elements are classified to have " "temporal reuse"))
static bool isOneDimensionalArray(const SCEV &AccessFn, const SCEV &ElemSize, const Loop &L, ScalarEvolution &SE)
unsigned getLoopDepth() const
Return the nesting level of this loop.
Definition: LoopInfo.h:97
void push_back(const T &Elt)
Definition: SmallVector.h:211
const SCEV * getPointerBase(const SCEV *V)
Transitively follow the chain of pointer-type operands until reaching a SCEV that does not have a sin...
The main scalar evolution driver.
Optional< std::vector< StOtherPiece > > Other
Definition: ELFYAML.cpp:948
raw_ostream & indent(unsigned NumSpaces)
indent - Insert &#39;NumSpaces&#39; spaces.
unsigned less than
Definition: InstrTypes.h:757
iterator_range< bf_iterator< T > > breadth_first(const T &G)
bool isLoopInvariant(const SCEV *S, const Loop *L)
Return true if the value of the given SCEV is unchanging in the specified loop.
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly...
Definition: STLExtras.h:1165
The adaptor from a function pass to a loop pass computes these analyses and makes them available to t...
F(f)
DependenceInfo - This class is the main dependence-analysis driver.
Hexagon Hardware Loops
static Loop * getInnerMostLoop(const LoopVectorTy &Loops)
Retrieve the innermost loop in the given loop nest Loops.
LoopT * getLoopFor(const BlockT *BB) const
Return the inner most loop that BB lives in.
Definition: LoopInfo.h:928
BlockT * getHeader() const
Definition: LoopInfo.h:105
ConstantInt * getValue() const
bool isKnownPredicate(ICmpInst::Predicate Pred, const SCEV *LHS, const SCEV *RHS)
Test if the given expression is known to satisfy the condition described by Pred, LHS...
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:245
This node represents a polynomial recurrence on the trip count of the specified loop.
static cl::opt< unsigned > CacheLineSize("ppc-loop-prefetch-cache-line", cl::Hidden, cl::init(64), cl::desc("The loop prefetch cache line size"))
static cl::opt< unsigned > DefaultTripCount("default-trip-count", cl::init(100), cl::Hidden, cl::desc("Use this to specify the default trip count of a loop"))
CacheCost represents the estimated cost of a inner loop as the number of cache lines used by the memo...
const SCEV * getStepRecurrence(ScalarEvolution &SE) const
Constructs and returns the recurrence indicating how much this expression steps by.
static MemoryLocation get(const LoadInst *LI)
Return a location with information about the memory reference by the given instruction.
This means that we are dealing with an entirely unknown SCEV value, and only represent it as its LLVM...
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:432
std::unique_ptr< Dependence > depends(Instruction *Src, Instruction *Dst, bool PossiblyLoopIndependent)
depends - Tests for a dependence between the Src and Dst instructions.
A set of analyses that are preserved following a run of a transformation pass.
Definition: PassManager.h:153
const SCEV * getOne(Type *Ty)
Return a SCEV for the constant 1 of a specific type.
LLVM Basic Block Representation.
Definition: BasicBlock.h:57
const SCEV * getAddExpr(SmallVectorImpl< const SCEV *> &Ops, SCEV::NoWrapFlags Flags=SCEV::FlagAnyWrap, unsigned Depth=0)
Get a canonical add expression, or something simpler if possible.
const SCEV * getSCEVAtScope(const SCEV *S, const Loop *L)
Return a SCEV expression for the specified value at the specified scope in the program.
static const SCEV * computeTripCount(const Loop &L, ScalarEvolution &SE)
Compute the trip count for the given loop L.
bool isAffine() const
Return true if this represents an expression A + B*x where A and B are loop invariant values...
Optional< bool > hasSpacialReuse(const IndexedReference &Other, unsigned CLS, AliasAnalysis &AA) const
Return true/false if the current object and the indexed reference Other are/aren&#39;t in the same cache ...
auto find_if(R &&Range, UnaryPredicate P) -> decltype(adl_begin(Range))
Provide wrappers to std::find_if which take ranges instead of having to pass begin/end explicitly...
Definition: STLExtras.h:1193
const SCEV * getMinusSCEV(const SCEV *LHS, const SCEV *RHS, SCEV::NoWrapFlags Flags=SCEV::FlagAnyWrap, unsigned Depth=0)
Return LHS-RHS. Minus is represented in SCEV as A+B*-1.
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition: PassManager.h:159
size_t size() const
Definition: SmallVector.h:52
const SCEV * getMulExpr(SmallVectorImpl< const SCEV *> &Ops, SCEV::NoWrapFlags Flags=SCEV::FlagAnyWrap, unsigned Depth=0)
Get a canonical multiply expression, or something simpler if possible.
const SCEV * getUDivExactExpr(const SCEV *LHS, const SCEV *RHS)
Get a canonical unsigned division expression, or something simpler if possible.
This class provides an interface for updating the loop pass manager based on mutations to the loop ne...
size_t getNumSubscripts() const
const SCEV * getSubscript(unsigned SubNum) const
This is the shared class of boolean and integer constants.
Definition: Constants.h:83
Type * getType() const
Return the LLVM type of this SCEV expression.
This pass provides access to the codegen interfaces that are needed for IR-level transformations.
Optional< bool > hasTemporalReuse(const IndexedReference &Other, unsigned MaxDistance, const Loop &L, DependenceInfo &DI, AliasAnalysis &AA) const
Return true if the current object and the indexed reference Other have distance smaller than MaxDista...
Represents a memory reference as a base pointer and a set of indexing operations. ...
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")
const SCEV * getLastSubscript() const
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:132
IndexedReference(Instruction &StoreOrLoadInst, const LoopInfo &LI, ScalarEvolution &SE)
Construct an indexed reference given a StoreOrLoadInst instruction.
static CacheCostTy constexpr InvalidCost
CacheCost(const LoopVectorTy &Loops, const LoopInfo &LI, ScalarEvolution &SE, TargetTransformInfo &TTI, AliasAnalysis &AA, DependenceInfo &DI, Optional< unsigned > TRT=None)
Construct a CacheCost object for the loop nest described by Loops.
bool isMustAlias(const MemoryLocation &LocA, const MemoryLocation &LocB)
A trivial helper function to check to see if the specified pointers are must-alias.
LoopT * getParentLoop() const
Definition: LoopInfo.h:106
bool hasValue() const
Definition: Optional.h:259
bool isLoopSimplifyForm() const
Return true if the Loop is in the form that the LoopSimplify form transforms loops to...
Definition: LoopInfo.cpp:460
This class represents an analyzed expression in the program.
LLVM_NODISCARD bool empty() const
Definition: SmallVector.h:55
StringRef getName() const
Definition: LoopInfo.h:827
Represents a single loop in the control flow graph.
Definition: LoopInfo.h:509
ArrayRef< BlockT * > getBlocks() const
Get a list of the basic blocks which make up this loop.
Definition: LoopInfo.h:154
const Function * getParent() const
Return the enclosing method, or null if none.
Definition: BasicBlock.h:106
#define I(x, y, z)
Definition: MD5.cpp:58
bool isZero() const
This is just a convenience method to make client code smaller for a common code.
Definition: Constants.h:192
LLVM_NODISCARD std::enable_if<!is_simple_type< Y >::value, typename cast_retty< X, const Y >::ret_type >::type dyn_cast(const Y &Val)
Definition: Casting.h:332
uint32_t Size
Definition: Profile.cpp:46
This file defines the interface for the loop cache analysis.
raw_ostream & operator<<(raw_ostream &OS, const APInt &I)
Definition: APInt.h:2045
const Value * getPointerOperand(const Value *V)
A helper function that returns the pointer operand of a load, store or GEP instruction.
const SCEV * getBackedgeTakenCount(const Loop *L)
If the specified loop has a predictable backedge-taken count, return it, otherwise return a SCEVCould...
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
LLVM Value Representation.
Definition: Value.h:73
const SCEV * getSCEV(Value *V)
Return a SCEV expression for the full generality of the specified expression.
CacheCostTy computeRefCost(const Loop &L, unsigned CLS) const
Compute the cost of the reference w.r.t.
bool isSCEVable(Type *Ty) const
Test if values of the given type are analyzable within the SCEV framework.
const SCEV * getBasePointer() const
This class implements an extremely fast bulk output stream that can only output to a stream...
Definition: raw_ostream.h:45
const SCEV * getUDivExpr(const SCEV *LHS, const SCEV *RHS)
Get a canonical unsigned division expression, or something simpler if possible.
PreservedAnalyses run(Loop &L, LoopAnalysisManager &AM, LoopStandardAnalysisResults &AR, LPMUpdater &U)
A container for analyses that lazily runs them and caches their results.
const SCEV * getElementSize(Instruction *Inst)
Return the size of an element read or written by Inst.
int64_t getSExtValue() const
Return the constant as a 64-bit integer value after it has been sign extended as appropriate for the ...
Definition: Constants.h:156
#define LLVM_DEBUG(X)
Definition: Debug.h:122
Statically lint checks LLVM IR
Definition: Lint.cpp:192
const BasicBlock * getParent() const
Definition: Instruction.h:66
This class represents a constant integer value.