LLVM  14.0.0git
LoopCacheAnalysis.h
Go to the documentation of this file.
1 //===- llvm/Analysis/LoopCacheAnalysis.h ------------------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 ///
9 /// \file
10 /// This file defines the interface for the loop cache analysis.
11 ///
12 //===----------------------------------------------------------------------===//
13 
14 #ifndef LLVM_ANALYSIS_LOOPCACHEANALYSIS_H
15 #define LLVM_ANALYSIS_LOOPCACHEANALYSIS_H
16 
18 #include "llvm/IR/Instructions.h"
19 #include "llvm/IR/PassManager.h"
21 
22 namespace llvm {
23 
24 class AAResults;
25 class DependenceInfo;
26 class LPMUpdater;
27 class ScalarEvolution;
28 class SCEV;
29 class TargetTransformInfo;
30 
31 using CacheCostTy = int64_t;
33 
34 /// Represents a memory reference as a base pointer and a set of indexing
35 /// operations. For example given the array reference A[i][2j+1][3k+2] in a
36 /// 3-dim loop nest:
37 /// for(i=0;i<n;++i)
38 /// for(j=0;j<m;++j)
39 /// for(k=0;k<o;++k)
40 /// ... A[i][2j+1][3k+2] ...
41 /// We expect:
42 /// BasePointer -> A
43 /// Subscripts -> [{0,+,1}<%for.i>][{1,+,2}<%for.j>][{2,+,3}<%for.k>]
44 /// Sizes -> [m][o][4]
46  friend raw_ostream &operator<<(raw_ostream &OS, const IndexedReference &R);
47 
48 public:
49  /// Construct an indexed reference given a \p StoreOrLoadInst instruction.
50  IndexedReference(Instruction &StoreOrLoadInst, const LoopInfo &LI,
51  ScalarEvolution &SE);
52 
53  bool isValid() const { return IsValid; }
54  const SCEV *getBasePointer() const { return BasePointer; }
55  size_t getNumSubscripts() const { return Subscripts.size(); }
56  const SCEV *getSubscript(unsigned SubNum) const {
57  assert(SubNum < getNumSubscripts() && "Invalid subscript number");
58  return Subscripts[SubNum];
59  }
60  const SCEV *getFirstSubscript() const {
61  assert(!Subscripts.empty() && "Expecting non-empty container");
62  return Subscripts.front();
63  }
64  const SCEV *getLastSubscript() const {
65  assert(!Subscripts.empty() && "Expecting non-empty container");
66  return Subscripts.back();
67  }
68 
69  /// Return true/false if the current object and the indexed reference \p Other
70  /// are/aren't in the same cache line of size \p CLS. Two references are in
71  /// the same chace line iff the distance between them in the innermost
72  /// dimension is less than the cache line size. Return None if unsure.
73  Optional<bool> hasSpacialReuse(const IndexedReference &Other, unsigned CLS,
74  AAResults &AA) const;
75 
76  /// Return true if the current object and the indexed reference \p Other
77  /// have distance smaller than \p MaxDistance in the dimension associated with
78  /// the given loop \p L. Return false if the distance is not smaller than \p
79  /// MaxDistance and None if unsure.
81  unsigned MaxDistance, const Loop &L,
82  DependenceInfo &DI, AAResults &AA) const;
83 
84  /// Compute the cost of the reference w.r.t. the given loop \p L when it is
85  /// considered in the innermost position in the loop nest.
86  /// The cost is defined as:
87  /// - equal to one if the reference is loop invariant, or
88  /// - equal to '(TripCount * stride) / cache_line_size' if:
89  /// + the reference stride is less than the cache line size, and
90  /// + the coefficient of this loop's index variable used in all other
91  /// subscripts is zero
92  /// - or otherwise equal to 'TripCount'.
93  CacheCostTy computeRefCost(const Loop &L, unsigned CLS) const;
94 
95 private:
96  /// Attempt to delinearize the indexed reference.
97  bool delinearize(const LoopInfo &LI);
98 
99  /// Return true if the index reference is invariant with respect to loop \p L.
100  bool isLoopInvariant(const Loop &L) const;
101 
102  /// Return true if the indexed reference is 'consecutive' in loop \p L.
103  /// An indexed reference is 'consecutive' if the only coefficient that uses
104  /// the loop induction variable is the rightmost one, and the access stride is
105  /// smaller than the cache line size \p CLS.
106  bool isConsecutive(const Loop &L, unsigned CLS) const;
107 
108  /// Return the coefficient used in the rightmost dimension.
109  const SCEV *getLastCoefficient() const;
110 
111  /// Return true if the coefficient corresponding to induction variable of
112  /// loop \p L in the given \p Subscript is zero or is loop invariant in \p L.
113  bool isCoeffForLoopZeroOrInvariant(const SCEV &Subscript,
114  const Loop &L) const;
115 
116  /// Verify that the given \p Subscript is 'well formed' (must be a simple add
117  /// recurrence).
118  bool isSimpleAddRecurrence(const SCEV &Subscript, const Loop &L) const;
119 
120  /// Return true if the given reference \p Other is definetely aliased with
121  /// the indexed reference represented by this class.
122  bool isAliased(const IndexedReference &Other, AAResults &AA) const;
123 
124 private:
125  /// True if the reference can be delinearized, false otherwise.
126  bool IsValid = false;
127 
128  /// Represent the memory reference instruction.
129  Instruction &StoreOrLoadInst;
130 
131  /// The base pointer of the memory reference.
132  const SCEV *BasePointer = nullptr;
133 
134  /// The subscript (indexes) of the memory reference.
135  SmallVector<const SCEV *, 3> Subscripts;
136 
137  /// The dimensions of the memory reference.
139 
140  ScalarEvolution &SE;
141 };
142 
143 /// A reference group represents a set of memory references that exhibit
144 /// temporal or spacial reuse. Two references belong to the same
145 /// reference group with respect to a inner loop L iff:
146 /// 1. they have a loop independent dependency, or
147 /// 2. they have a loop carried dependence with a small dependence distance
148 /// (e.g. less than 2) carried by the inner loop, or
149 /// 3. they refer to the same array, and the subscript in their innermost
150 /// dimension is less than or equal to 'd' (where 'd' is less than the cache
151 /// line size)
152 ///
153 /// Intuitively a reference group represents memory references that access
154 /// the same cache line. Conditions 1,2 above account for temporal reuse, while
155 /// contition 3 accounts for spacial reuse.
158 
159 /// \c CacheCost represents the estimated cost of a inner loop as the number of
160 /// cache lines used by the memory references it contains.
161 /// The 'cache cost' of a loop 'L' in a loop nest 'LN' is computed as the sum of
162 /// the cache costs of all of its reference groups when the loop is considered
163 /// to be in the innermost position in the nest.
164 /// A reference group represents memory references that fall into the same cache
165 /// line. Each reference group is analysed with respect to the innermost loop in
166 /// a loop nest. The cost of a reference is defined as follow:
167 /// - one if it is loop invariant w.r.t the innermost loop,
168 /// - equal to the loop trip count divided by the cache line times the
169 /// reference stride if the reference stride is less than the cache line
170 /// size (CLS), and the coefficient of this loop's index variable used in all
171 /// other subscripts is zero (e.g. RefCost = TripCount/(CLS/RefStride))
172 /// - equal to the innermost loop trip count if the reference stride is greater
173 /// or equal to the cache line size CLS.
174 class CacheCost {
175  friend raw_ostream &operator<<(raw_ostream &OS, const CacheCost &CC);
176  using LoopTripCountTy = std::pair<const Loop *, unsigned>;
177  using LoopCacheCostTy = std::pair<const Loop *, CacheCostTy>;
178 
179 public:
180  static CacheCostTy constexpr InvalidCost = -1;
181 
182  /// Construct a CacheCost object for the loop nest described by \p Loops.
183  /// The optional parameter \p TRT can be used to specify the max. distance
184  /// between array elements accessed in a loop so that the elements are
185  /// classified to have temporal reuse.
186  CacheCost(const LoopVectorTy &Loops, const LoopInfo &LI, ScalarEvolution &SE,
188  Optional<unsigned> TRT = None);
189 
190  /// Create a CacheCost for the loop nest rooted by \p Root.
191  /// The optional parameter \p TRT can be used to specify the max. distance
192  /// between array elements accessed in a loop so that the elements are
193  /// classified to have temporal reuse.
194  static std::unique_ptr<CacheCost>
196  Optional<unsigned> TRT = None);
197 
198  /// Return the estimated cost of loop \p L if the given loop is part of the
199  /// loop nest associated with this object. Return -1 otherwise.
200  CacheCostTy getLoopCost(const Loop &L) const {
201  auto IT = llvm::find_if(LoopCosts, [&L](const LoopCacheCostTy &LCC) {
202  return LCC.first == &L;
203  });
204  return (IT != LoopCosts.end()) ? (*IT).second : -1;
205  }
206 
207  /// Return the estimated ordered loop costs.
208  ArrayRef<LoopCacheCostTy> getLoopCosts() const { return LoopCosts; }
209 
210 private:
211  /// Calculate the cache footprint of each loop in the nest (when it is
212  /// considered to be in the innermost position).
213  void calculateCacheFootprint();
214 
215  /// Partition store/load instructions in the loop nest into reference groups.
216  /// Two or more memory accesses belong in the same reference group if they
217  /// share the same cache line.
218  bool populateReferenceGroups(ReferenceGroupsTy &RefGroups) const;
219 
220  /// Calculate the cost of the given loop \p L assuming it is the innermost
221  /// loop in nest.
222  CacheCostTy computeLoopCacheCost(const Loop &L,
223  const ReferenceGroupsTy &RefGroups) const;
224 
225  /// Compute the cost of a representative reference in reference group \p RG
226  /// when the given loop \p L is considered as the innermost loop in the nest.
227  /// The computed cost is an estimate for the number of cache lines used by the
228  /// reference group. The representative reference cost is defined as:
229  /// - equal to one if the reference is loop invariant, or
230  /// - equal to '(TripCount * stride) / cache_line_size' if (a) loop \p L's
231  /// induction variable is used only in the reference subscript associated
232  /// with loop \p L, and (b) the reference stride is less than the cache
233  /// line size, or
234  /// - TripCount otherwise
235  CacheCostTy computeRefGroupCacheCost(const ReferenceGroupTy &RG,
236  const Loop &L) const;
237 
238  /// Sort the LoopCosts vector by decreasing cache cost.
239  void sortLoopCosts() {
240  sort(LoopCosts, [](const LoopCacheCostTy &A, const LoopCacheCostTy &B) {
241  return A.second > B.second;
242  });
243  }
244 
245 private:
246  /// Loops in the loop nest associated with this object.
247  LoopVectorTy Loops;
248 
249  /// Trip counts for the loops in the loop nest associated with this object.
250  SmallVector<LoopTripCountTy, 3> TripCounts;
251 
252  /// Cache costs for the loops in the loop nest associated with this object.
253  SmallVector<LoopCacheCostTy, 3> LoopCosts;
254 
255  /// The max. distance between array elements accessed in a loop so that the
256  /// elements are classified to have temporal reuse.
257  Optional<unsigned> TRT;
258 
259  const LoopInfo &LI;
260  ScalarEvolution &SE;
261  TargetTransformInfo &TTI;
262  AAResults &AA;
263  DependenceInfo &DI;
264 };
265 
266 raw_ostream &operator<<(raw_ostream &OS, const IndexedReference &R);
267 raw_ostream &operator<<(raw_ostream &OS, const CacheCost &CC);
268 
269 /// Printer pass for the \c CacheCost results.
270 class LoopCachePrinterPass : public PassInfoMixin<LoopCachePrinterPass> {
271  raw_ostream &OS;
272 
273 public:
274  explicit LoopCachePrinterPass(raw_ostream &OS) : OS(OS) {}
275 
278 };
279 
280 } // namespace llvm
281 
282 #endif // LLVM_ANALYSIS_LOOPCACHEANALYSIS_H
llvm::PreservedAnalyses
A set of analyses that are preserved following a run of a transformation pass.
Definition: PassManager.h:155
llvm
This is an optimization pass for GlobalISel generic memory operations.
Definition: AllocatorList.h:23
llvm::IndexedReference::hasSpacialReuse
Optional< bool > hasSpacialReuse(const IndexedReference &Other, unsigned CLS, AAResults &AA) const
Return true/false if the current object and the indexed reference Other are/aren't in the same cache ...
Definition: LoopCacheAnalysis.cpp:149
llvm::PassInfoMixin
A CRTP mix-in to automatically provide informational APIs needed for passes.
Definition: PassManager.h:374
llvm::IndexedReference::getSubscript
const SCEV * getSubscript(unsigned SubNum) const
Definition: LoopCacheAnalysis.h:56
llvm::Loop
Represents a single loop in the control flow graph.
Definition: LoopInfo.h:530
llvm::CacheCost::getLoopCosts
ArrayRef< LoopCacheCostTy > getLoopCosts() const
Return the estimated ordered loop costs.
Definition: LoopCacheAnalysis.h:208
llvm::SmallVector< Loop *, 8 >
llvm::TargetTransformInfo
This pass provides access to the codegen interfaces that are needed for IR-level transformations.
Definition: TargetTransformInfo.h:169
llvm::ScalarEvolution
The main scalar evolution driver.
Definition: ScalarEvolution.h:460
llvm::CacheCost::operator<<
friend raw_ostream & operator<<(raw_ostream &OS, const CacheCost &CC)
llvm::LoopStandardAnalysisResults
The adaptor from a function pass to a loop pass computes these analyses and makes them available to t...
Definition: LoopAnalysisManager.h:52
llvm::Optional< bool >
llvm::CacheCost::getLoopCost
CacheCostTy getLoopCost(const Loop &L) const
Return the estimated cost of loop L if the given loop is part of the loop nest associated with this o...
Definition: LoopCacheAnalysis.h:200
llvm::IndexedReference::isValid
bool isValid() const
Definition: LoopCacheAnalysis.h:53
llvm::IndexedReference::getFirstSubscript
const SCEV * getFirstSubscript() const
Definition: LoopCacheAnalysis.h:60
LoopAnalysisManager.h
llvm::LoopCachePrinterPass
Printer pass for the CacheCost results.
Definition: LoopCacheAnalysis.h:270
llvm::CacheCost::getCacheCost
static std::unique_ptr< CacheCost > getCacheCost(Loop &Root, LoopStandardAnalysisResults &AR, DependenceInfo &DI, Optional< unsigned > TRT=None)
Create a CacheCost for the loop nest rooted by Root.
Definition: LoopCacheAnalysis.cpp:497
llvm::AAResults
Definition: AliasAnalysis.h:508
llvm::LoopVectorTy
SmallVector< Loop *, 8 > LoopVectorTy
Definition: LoopCacheAnalysis.h:32
llvm::IndexedReference::getNumSubscripts
size_t getNumSubscripts() const
Definition: LoopCacheAnalysis.h:55
llvm::LoopCachePrinterPass::run
PreservedAnalyses run(Loop &L, LoopAnalysisManager &AM, LoopStandardAnalysisResults &AR, LPMUpdater &U)
Definition: LoopCacheAnalysis.cpp:652
B
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
llvm::Instruction
Definition: Instruction.h:45
llvm::IndexedReference::computeRefCost
CacheCostTy computeRefCost(const Loop &L, unsigned CLS) const
Compute the cost of the reference w.r.t.
Definition: LoopCacheAnalysis.cpp:263
llvm::raw_ostream
This class implements an extremely fast bulk output stream that can only output to a stream.
Definition: raw_ostream.h:53
llvm::IndexedReference::hasTemporalReuse
Optional< bool > hasTemporalReuse(const IndexedReference &Other, unsigned MaxDistance, const Loop &L, DependenceInfo &DI, AAResults &AA) const
Return true if the current object and the indexed reference Other have distance smaller than MaxDista...
Definition: LoopCacheAnalysis.cpp:204
llvm::operator<<
raw_ostream & operator<<(raw_ostream &OS, const APFixedPoint &FX)
Definition: APFixedPoint.h:230
llvm::IndexedReference::IndexedReference
IndexedReference(Instruction &StoreOrLoadInst, const LoopInfo &LI, ScalarEvolution &SE)
Construct an indexed reference given a StoreOrLoadInst instruction.
Definition: LoopCacheAnalysis.cpp:137
llvm::None
const NoneType None
Definition: None.h:23
llvm::SCEV
This class represents an analyzed expression in the program.
Definition: ScalarEvolution.h:77
llvm::DependenceInfo
DependenceInfo - This class is the main dependence-analysis driver.
Definition: DependenceAnalysis.h:272
llvm::LPMUpdater
This class provides an interface for updating the loop pass manager based on mutations to the loop ne...
Definition: LoopPassManager.h:263
llvm::IndexedReference
Represents a memory reference as a base pointer and a set of indexing operations.
Definition: LoopCacheAnalysis.h:45
assert
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
llvm::IndexedReference::getLastSubscript
const SCEV * getLastSubscript() const
Definition: LoopCacheAnalysis.h:64
llvm::ArrayRef
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: APInt.h:32
llvm::LoopInfo
Definition: LoopInfo.h:1083
IT
static cl::opt< ITMode > IT(cl::desc("IT block support"), cl::Hidden, cl::init(DefaultIT), cl::ZeroOrMore, cl::values(clEnumValN(DefaultIT, "arm-default-it", "Generate IT block based on arch"), clEnumValN(RestrictedIT, "arm-restrict-it", "Disallow deprecated IT based on ARMv8"), clEnumValN(NoRestrictedIT, "arm-no-restrict-it", "Allow IT blocks based on ARMv7")))
llvm::IndexedReference::operator<<
friend raw_ostream & operator<<(raw_ostream &OS, const IndexedReference &R)
llvm::find_if
auto find_if(R &&Range, UnaryPredicate P)
Provide wrappers to std::find_if which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1627
llvm::LoopCachePrinterPass::LoopCachePrinterPass
LoopCachePrinterPass(raw_ostream &OS)
Definition: LoopCacheAnalysis.h:274
llvm::sort
void sort(IteratorTy Start, IteratorTy End)
Definition: STLExtras.h:1541
PassManager.h
llvm::IndexedReference::getBasePointer
const SCEV * getBasePointer() const
Definition: LoopCacheAnalysis.h:54
llvm::CacheCostTy
int64_t CacheCostTy
Definition: LoopCacheAnalysis.h:31
Instructions.h
llvm::CacheCost
CacheCost represents the estimated cost of a inner loop as the number of cache lines used by the memo...
Definition: LoopCacheAnalysis.h:174
llvm::AnalysisManager
A container for analyses that lazily runs them and caches their results.
Definition: InstructionSimplify.h:44
raw_ostream.h
llvm::CacheCost::InvalidCost
static constexpr CacheCostTy InvalidCost
Definition: LoopCacheAnalysis.h:180
llvm::CacheCost::CacheCost
CacheCost(const LoopVectorTy &Loops, const LoopInfo &LI, ScalarEvolution &SE, TargetTransformInfo &TTI, AAResults &AA, DependenceInfo &DI, Optional< unsigned > TRT=None)
Construct a CacheCost object for the loop nest described by Loops.
Definition: LoopCacheAnalysis.cpp:478