LLVM  15.0.0git
LoopCacheAnalysis.h
Go to the documentation of this file.
1 //===- llvm/Analysis/LoopCacheAnalysis.h ------------------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 ///
9 /// \file
10 /// This file defines the interface for the loop cache analysis.
11 ///
12 //===----------------------------------------------------------------------===//
13 
14 #ifndef LLVM_ANALYSIS_LOOPCACHEANALYSIS_H
15 #define LLVM_ANALYSIS_LOOPCACHEANALYSIS_H
16 
18 #include "llvm/IR/PassManager.h"
19 
20 namespace llvm {
21 
22 class AAResults;
23 class DependenceInfo;
24 class Instruction;
25 class LPMUpdater;
26 class raw_ostream;
27 class LoopInfo;
28 class Loop;
29 class ScalarEvolution;
30 class SCEV;
31 class TargetTransformInfo;
32 
33 using CacheCostTy = int64_t;
35 
36 /// Represents a memory reference as a base pointer and a set of indexing
37 /// operations. For example given the array reference A[i][2j+1][3k+2] in a
38 /// 3-dim loop nest:
39 /// for(i=0;i<n;++i)
40 /// for(j=0;j<m;++j)
41 /// for(k=0;k<o;++k)
42 /// ... A[i][2j+1][3k+2] ...
43 /// We expect:
44 /// BasePointer -> A
45 /// Subscripts -> [{0,+,1}<%for.i>][{1,+,2}<%for.j>][{2,+,3}<%for.k>]
46 /// Sizes -> [m][o][4]
48  friend raw_ostream &operator<<(raw_ostream &OS, const IndexedReference &R);
49 
50 public:
51  /// Construct an indexed reference given a \p StoreOrLoadInst instruction.
52  IndexedReference(Instruction &StoreOrLoadInst, const LoopInfo &LI,
53  ScalarEvolution &SE);
54 
55  bool isValid() const { return IsValid; }
56  const SCEV *getBasePointer() const { return BasePointer; }
57  size_t getNumSubscripts() const { return Subscripts.size(); }
58  const SCEV *getSubscript(unsigned SubNum) const {
59  assert(SubNum < getNumSubscripts() && "Invalid subscript number");
60  return Subscripts[SubNum];
61  }
62  const SCEV *getFirstSubscript() const {
63  assert(!Subscripts.empty() && "Expecting non-empty container");
64  return Subscripts.front();
65  }
66  const SCEV *getLastSubscript() const {
67  assert(!Subscripts.empty() && "Expecting non-empty container");
68  return Subscripts.back();
69  }
70 
71  /// Return true/false if the current object and the indexed reference \p Other
72  /// are/aren't in the same cache line of size \p CLS. Two references are in
73  /// the same chace line iff the distance between them in the innermost
74  /// dimension is less than the cache line size. Return None if unsure.
75  Optional<bool> hasSpacialReuse(const IndexedReference &Other, unsigned CLS,
76  AAResults &AA) const;
77 
78  /// Return true if the current object and the indexed reference \p Other
79  /// have distance smaller than \p MaxDistance in the dimension associated with
80  /// the given loop \p L. Return false if the distance is not smaller than \p
81  /// MaxDistance and None if unsure.
83  unsigned MaxDistance, const Loop &L,
84  DependenceInfo &DI, AAResults &AA) const;
85 
86  /// Compute the cost of the reference w.r.t. the given loop \p L when it is
87  /// considered in the innermost position in the loop nest.
88  /// The cost is defined as:
89  /// - equal to one if the reference is loop invariant, or
90  /// - equal to '(TripCount * stride) / cache_line_size' if:
91  /// + the reference stride is less than the cache line size, and
92  /// + the coefficient of this loop's index variable used in all other
93  /// subscripts is zero
94  /// - or otherwise equal to 'TripCount'.
95  CacheCostTy computeRefCost(const Loop &L, unsigned CLS) const;
96 
97 private:
98  /// Attempt to delinearize the indexed reference.
99  bool delinearize(const LoopInfo &LI);
100 
101  bool tryDelinearizeFixedSize(ScalarEvolution *SE, Instruction *Src,
102  const SCEV *SrcAccessFn,
103  SmallVectorImpl<const SCEV *> &SrcSubscripts);
104 
105  /// Return true if the index reference is invariant with respect to loop \p L.
106  bool isLoopInvariant(const Loop &L) const;
107 
108  /// Return true if the indexed reference is 'consecutive' in loop \p L.
109  /// An indexed reference is 'consecutive' if the only coefficient that uses
110  /// the loop induction variable is the rightmost one, and the access stride is
111  /// smaller than the cache line size \p CLS.
112  bool isConsecutive(const Loop &L, unsigned CLS) const;
113 
114  /// Retrieve the index of the subscript corresponding to the given loop \p
115  /// L. Return a zero-based positive index if the subscript index is
116  /// succesfully located and a negative value otherwise. For example given the
117  /// indexed reference 'A[i][2j+1][3k+2]', the call
118  /// 'getSubscriptIndex(loop-k)' would return value 2.
119  int getSubscriptIndex(const Loop &L) const;
120 
121  /// Return the coefficient used in the rightmost dimension.
122  const SCEV *getLastCoefficient() const;
123 
124  /// Return true if the coefficient corresponding to induction variable of
125  /// loop \p L in the given \p Subscript is zero or is loop invariant in \p L.
126  bool isCoeffForLoopZeroOrInvariant(const SCEV &Subscript,
127  const Loop &L) const;
128 
129  /// Verify that the given \p Subscript is 'well formed' (must be a simple add
130  /// recurrence).
131  bool isSimpleAddRecurrence(const SCEV &Subscript, const Loop &L) const;
132 
133  /// Return true if the given reference \p Other is definetely aliased with
134  /// the indexed reference represented by this class.
135  bool isAliased(const IndexedReference &Other, AAResults &AA) const;
136 
137 private:
138  /// True if the reference can be delinearized, false otherwise.
139  bool IsValid = false;
140 
141  /// Represent the memory reference instruction.
142  Instruction &StoreOrLoadInst;
143 
144  /// The base pointer of the memory reference.
145  const SCEV *BasePointer = nullptr;
146 
147  /// The subscript (indexes) of the memory reference.
148  SmallVector<const SCEV *, 3> Subscripts;
149 
150  /// The dimensions of the memory reference.
152 
153  ScalarEvolution &SE;
154 };
155 
156 /// A reference group represents a set of memory references that exhibit
157 /// temporal or spacial reuse. Two references belong to the same
158 /// reference group with respect to a inner loop L iff:
159 /// 1. they have a loop independent dependency, or
160 /// 2. they have a loop carried dependence with a small dependence distance
161 /// (e.g. less than 2) carried by the inner loop, or
162 /// 3. they refer to the same array, and the subscript in their innermost
163 /// dimension is less than or equal to 'd' (where 'd' is less than the cache
164 /// line size)
165 ///
166 /// Intuitively a reference group represents memory references that access
167 /// the same cache line. Conditions 1,2 above account for temporal reuse, while
168 /// contition 3 accounts for spacial reuse.
171 
172 /// \c CacheCost represents the estimated cost of a inner loop as the number of
173 /// cache lines used by the memory references it contains.
174 /// The 'cache cost' of a loop 'L' in a loop nest 'LN' is computed as the sum of
175 /// the cache costs of all of its reference groups when the loop is considered
176 /// to be in the innermost position in the nest.
177 /// A reference group represents memory references that fall into the same cache
178 /// line. Each reference group is analysed with respect to the innermost loop in
179 /// a loop nest. The cost of a reference is defined as follow:
180 /// - one if it is loop invariant w.r.t the innermost loop,
181 /// - equal to the loop trip count divided by the cache line times the
182 /// reference stride if the reference stride is less than the cache line
183 /// size (CLS), and the coefficient of this loop's index variable used in all
184 /// other subscripts is zero (e.g. RefCost = TripCount/(CLS/RefStride))
185 /// - equal to the innermost loop trip count if the reference stride is greater
186 /// or equal to the cache line size CLS.
187 class CacheCost {
188  friend raw_ostream &operator<<(raw_ostream &OS, const CacheCost &CC);
189  using LoopTripCountTy = std::pair<const Loop *, unsigned>;
190  using LoopCacheCostTy = std::pair<const Loop *, CacheCostTy>;
191 
192 public:
193  static CacheCostTy constexpr InvalidCost = -1;
194 
195  /// Construct a CacheCost object for the loop nest described by \p Loops.
196  /// The optional parameter \p TRT can be used to specify the max. distance
197  /// between array elements accessed in a loop so that the elements are
198  /// classified to have temporal reuse.
199  CacheCost(const LoopVectorTy &Loops, const LoopInfo &LI, ScalarEvolution &SE,
201  Optional<unsigned> TRT = None);
202 
203  /// Create a CacheCost for the loop nest rooted by \p Root.
204  /// The optional parameter \p TRT can be used to specify the max. distance
205  /// between array elements accessed in a loop so that the elements are
206  /// classified to have temporal reuse.
207  static std::unique_ptr<CacheCost>
209  Optional<unsigned> TRT = None);
210 
211  /// Return the estimated cost of loop \p L if the given loop is part of the
212  /// loop nest associated with this object. Return -1 otherwise.
213  CacheCostTy getLoopCost(const Loop &L) const {
214  auto IT = llvm::find_if(LoopCosts, [&L](const LoopCacheCostTy &LCC) {
215  return LCC.first == &L;
216  });
217  return (IT != LoopCosts.end()) ? (*IT).second : -1;
218  }
219 
220  /// Return the estimated ordered loop costs.
221  ArrayRef<LoopCacheCostTy> getLoopCosts() const { return LoopCosts; }
222 
223 private:
224  /// Calculate the cache footprint of each loop in the nest (when it is
225  /// considered to be in the innermost position).
226  void calculateCacheFootprint();
227 
228  /// Partition store/load instructions in the loop nest into reference groups.
229  /// Two or more memory accesses belong in the same reference group if they
230  /// share the same cache line.
231  bool populateReferenceGroups(ReferenceGroupsTy &RefGroups) const;
232 
233  /// Calculate the cost of the given loop \p L assuming it is the innermost
234  /// loop in nest.
235  CacheCostTy computeLoopCacheCost(const Loop &L,
236  const ReferenceGroupsTy &RefGroups) const;
237 
238  /// Compute the cost of a representative reference in reference group \p RG
239  /// when the given loop \p L is considered as the innermost loop in the nest.
240  /// The computed cost is an estimate for the number of cache lines used by the
241  /// reference group. The representative reference cost is defined as:
242  /// - equal to one if the reference is loop invariant, or
243  /// - equal to '(TripCount * stride) / cache_line_size' if (a) loop \p L's
244  /// induction variable is used only in the reference subscript associated
245  /// with loop \p L, and (b) the reference stride is less than the cache
246  /// line size, or
247  /// - TripCount otherwise
248  CacheCostTy computeRefGroupCacheCost(const ReferenceGroupTy &RG,
249  const Loop &L) const;
250 
251  /// Sort the LoopCosts vector by decreasing cache cost.
252  void sortLoopCosts() {
253  stable_sort(LoopCosts,
254  [](const LoopCacheCostTy &A, const LoopCacheCostTy &B) {
255  return A.second > B.second;
256  });
257  }
258 
259 private:
260  /// Loops in the loop nest associated with this object.
261  LoopVectorTy Loops;
262 
263  /// Trip counts for the loops in the loop nest associated with this object.
264  SmallVector<LoopTripCountTy, 3> TripCounts;
265 
266  /// Cache costs for the loops in the loop nest associated with this object.
267  SmallVector<LoopCacheCostTy, 3> LoopCosts;
268 
269  /// The max. distance between array elements accessed in a loop so that the
270  /// elements are classified to have temporal reuse.
271  Optional<unsigned> TRT;
272 
273  const LoopInfo &LI;
274  ScalarEvolution &SE;
275  TargetTransformInfo &TTI;
276  AAResults &AA;
277  DependenceInfo &DI;
278 };
279 
280 raw_ostream &operator<<(raw_ostream &OS, const IndexedReference &R);
281 raw_ostream &operator<<(raw_ostream &OS, const CacheCost &CC);
282 
283 /// Printer pass for the \c CacheCost results.
284 class LoopCachePrinterPass : public PassInfoMixin<LoopCachePrinterPass> {
285  raw_ostream &OS;
286 
287 public:
288  explicit LoopCachePrinterPass(raw_ostream &OS) : OS(OS) {}
289 
292 };
293 
294 } // namespace llvm
295 
296 #endif // LLVM_ANALYSIS_LOOPCACHEANALYSIS_H
llvm::PreservedAnalyses
A set of analyses that are preserved following a run of a transformation pass.
Definition: PassManager.h:152
llvm
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:17
llvm::IndexedReference::hasSpacialReuse
Optional< bool > hasSpacialReuse(const IndexedReference &Other, unsigned CLS, AAResults &AA) const
Return true/false if the current object and the indexed reference Other are/aren't in the same cache ...
Definition: LoopCacheAnalysis.cpp:159
llvm::PassInfoMixin
A CRTP mix-in to automatically provide informational APIs needed for passes.
Definition: PassManager.h:371
llvm::IndexedReference::getSubscript
const SCEV * getSubscript(unsigned SubNum) const
Definition: LoopCacheAnalysis.h:58
llvm::Loop
Represents a single loop in the control flow graph.
Definition: LoopInfo.h:530
llvm::CacheCost::getLoopCosts
ArrayRef< LoopCacheCostTy > getLoopCosts() const
Return the estimated ordered loop costs.
Definition: LoopCacheAnalysis.h:221
llvm::SmallVector< Loop *, 8 >
llvm::TargetTransformInfo
This pass provides access to the codegen interfaces that are needed for IR-level transformations.
Definition: TargetTransformInfo.h:167
llvm::ScalarEvolution
The main scalar evolution driver.
Definition: ScalarEvolution.h:449
llvm::CacheCost::operator<<
friend raw_ostream & operator<<(raw_ostream &OS, const CacheCost &CC)
llvm::LoopStandardAnalysisResults
The adaptor from a function pass to a loop pass computes these analyses and makes them available to t...
Definition: LoopAnalysisManager.h:51
llvm::Optional< bool >
llvm::CacheCost::getLoopCost
CacheCostTy getLoopCost(const Loop &L) const
Return the estimated cost of loop L if the given loop is part of the loop nest associated with this o...
Definition: LoopCacheAnalysis.h:213
llvm::IndexedReference::isValid
bool isValid() const
Definition: LoopCacheAnalysis.h:55
llvm::IndexedReference::getFirstSubscript
const SCEV * getFirstSubscript() const
Definition: LoopCacheAnalysis.h:62
LoopAnalysisManager.h
llvm::LoopCachePrinterPass
Printer pass for the CacheCost results.
Definition: LoopCacheAnalysis.h:284
llvm::CacheCost::getCacheCost
static std::unique_ptr< CacheCost > getCacheCost(Loop &Root, LoopStandardAnalysisResults &AR, DependenceInfo &DI, Optional< unsigned > TRT=None)
Create a CacheCost for the loop nest rooted by Root.
Definition: LoopCacheAnalysis.cpp:592
llvm::AAResults
Definition: AliasAnalysis.h:511
llvm::LoopVectorTy
SmallVector< Loop *, 8 > LoopVectorTy
Definition: LoopCacheAnalysis.h:34
llvm::IndexedReference::getNumSubscripts
size_t getNumSubscripts() const
Definition: LoopCacheAnalysis.h:57
llvm::LoopCachePrinterPass::run
PreservedAnalyses run(Loop &L, LoopAnalysisManager &AM, LoopStandardAnalysisResults &AR, LPMUpdater &U)
Definition: LoopCacheAnalysis.cpp:747
B
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
llvm::Instruction
Definition: Instruction.h:42
llvm::IndexedReference::computeRefCost
CacheCostTy computeRefCost(const Loop &L, unsigned CLS) const
Compute the cost of the reference w.r.t.
Definition: LoopCacheAnalysis.cpp:273
llvm::raw_ostream
This class implements an extremely fast bulk output stream that can only output to a stream.
Definition: raw_ostream.h:54
llvm::IndexedReference::hasTemporalReuse
Optional< bool > hasTemporalReuse(const IndexedReference &Other, unsigned MaxDistance, const Loop &L, DependenceInfo &DI, AAResults &AA) const
Return true if the current object and the indexed reference Other have distance smaller than MaxDista...
Definition: LoopCacheAnalysis.cpp:214
llvm::operator<<
raw_ostream & operator<<(raw_ostream &OS, const APFixedPoint &FX)
Definition: APFixedPoint.h:230
llvm::IndexedReference::IndexedReference
IndexedReference(Instruction &StoreOrLoadInst, const LoopInfo &LI, ScalarEvolution &SE)
Construct an indexed reference given a StoreOrLoadInst instruction.
Definition: LoopCacheAnalysis.cpp:147
llvm::None
const NoneType None
Definition: None.h:24
llvm::SCEV
This class represents an analyzed expression in the program.
Definition: ScalarEvolution.h:75
llvm::DependenceInfo
DependenceInfo - This class is the main dependence-analysis driver.
Definition: DependenceAnalysis.h:269
llvm::LPMUpdater
This class provides an interface for updating the loop pass manager based on mutations to the loop ne...
Definition: LoopPassManager.h:262
llvm::IndexedReference
Represents a memory reference as a base pointer and a set of indexing operations.
Definition: LoopCacheAnalysis.h:47
IT
static cl::opt< ITMode > IT(cl::desc("IT block support"), cl::Hidden, cl::init(DefaultIT), cl::ZeroOrMore, cl::values(clEnumValN(DefaultIT, "arm-default-it", "Generate any type of IT block"), clEnumValN(RestrictedIT, "arm-restrict-it", "Disallow complex IT blocks")))
assert
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
llvm::IndexedReference::getLastSubscript
const SCEV * getLastSubscript() const
Definition: LoopCacheAnalysis.h:66
llvm::ArrayRef
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: APInt.h:32
llvm::LoopInfo
Definition: LoopInfo.h:1086
llvm::IndexedReference::operator<<
friend raw_ostream & operator<<(raw_ostream &OS, const IndexedReference &R)
llvm::find_if
auto find_if(R &&Range, UnaryPredicate P)
Provide wrappers to std::find_if which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1634
llvm::stable_sort
void stable_sort(R &&Range)
Definition: STLExtras.h:1751
llvm::LoopCachePrinterPass::LoopCachePrinterPass
LoopCachePrinterPass(raw_ostream &OS)
Definition: LoopCacheAnalysis.h:288
PassManager.h
AA
llvm::IndexedReference::getBasePointer
const SCEV * getBasePointer() const
Definition: LoopCacheAnalysis.h:56
llvm::CacheCostTy
int64_t CacheCostTy
Definition: LoopCacheAnalysis.h:33
llvm::CacheCost
CacheCost represents the estimated cost of a inner loop as the number of cache lines used by the memo...
Definition: LoopCacheAnalysis.h:187
llvm::SmallVectorImpl
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: APFloat.h:42
llvm::AnalysisManager
A container for analyses that lazily runs them and caches their results.
Definition: InstructionSimplify.h:42
llvm::CacheCost::InvalidCost
static constexpr CacheCostTy InvalidCost
Definition: LoopCacheAnalysis.h:193
llvm::CacheCost::CacheCost
CacheCost(const LoopVectorTy &Loops, const LoopInfo &LI, ScalarEvolution &SE, TargetTransformInfo &TTI, AAResults &AA, DependenceInfo &DI, Optional< unsigned > TRT=None)
Construct a CacheCost object for the loop nest described by Loops.
Definition: LoopCacheAnalysis.cpp:574