LLVM 22.0.0git
LoopCacheAnalysis.h
Go to the documentation of this file.
1//===- llvm/Analysis/LoopCacheAnalysis.h ------------------------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8///
9/// \file
10/// This file defines the interface for the loop cache analysis.
11///
12//===----------------------------------------------------------------------===//
13
14#ifndef LLVM_ANALYSIS_LOOPCACHEANALYSIS_H
15#define LLVM_ANALYSIS_LOOPCACHEANALYSIS_H
16
18#include "llvm/IR/PassManager.h"
20#include <optional>
21
22namespace llvm {
23
24class AAResults;
25class DependenceInfo;
26class Instruction;
27class LPMUpdater;
28class raw_ostream;
29class LoopInfo;
30class Loop;
31class ScalarEvolution;
32class SCEV;
34
37
38/// Represents a memory reference as a base pointer and a set of indexing
39/// operations. For example given the array reference A[i][2j+1][3k+2] in a
40/// 3-dim loop nest:
41/// for(i=0;i<n;++i)
42/// for(j=0;j<m;++j)
43/// for(k=0;k<o;++k)
44/// ... A[i][2j+1][3k+2] ...
45/// We expect:
46/// BasePointer -> A
47/// Subscripts -> [{0,+,1}<%for.i>][{1,+,2}<%for.j>][{2,+,3}<%for.k>]
48/// Sizes -> [m][o][4]
51
52public:
53 /// Construct an indexed reference given a \p StoreOrLoadInst instruction.
54 IndexedReference(Instruction &StoreOrLoadInst, const LoopInfo &LI,
55 ScalarEvolution &SE);
56
57 bool isValid() const { return IsValid; }
58 const SCEV *getBasePointer() const { return BasePointer; }
59 size_t getNumSubscripts() const { return Subscripts.size(); }
60 const SCEV *getSubscript(unsigned SubNum) const {
61 assert(SubNum < getNumSubscripts() && "Invalid subscript number");
62 return Subscripts[SubNum];
63 }
64 const SCEV *getFirstSubscript() const {
65 assert(!Subscripts.empty() && "Expecting non-empty container");
66 return Subscripts.front();
67 }
68 const SCEV *getLastSubscript() const {
69 assert(!Subscripts.empty() && "Expecting non-empty container");
70 return Subscripts.back();
71 }
72
73 /// Return true/false if the current object and the indexed reference \p Other
74 /// are/aren't in the same cache line of size \p CLS. Two references are in
75 /// the same chace line iff the distance between them in the innermost
76 /// dimension is less than the cache line size. Return std::nullopt if unsure.
77 std::optional<bool> hasSpacialReuse(const IndexedReference &Other,
78 unsigned CLS, AAResults &AA) const;
79
80 /// Return true if the current object and the indexed reference \p Other
81 /// have distance smaller than \p MaxDistance in the dimension associated with
82 /// the given loop \p L. Return false if the distance is not smaller than \p
83 /// MaxDistance and std::nullopt if unsure.
84 std::optional<bool> hasTemporalReuse(const IndexedReference &Other,
85 unsigned MaxDistance, const Loop &L,
86 DependenceInfo &DI, AAResults &AA) const;
87
88 /// Compute the cost of the reference w.r.t. the given loop \p L when it is
89 /// considered in the innermost position in the loop nest.
90 /// The cost is defined as:
91 /// - equal to one if the reference is loop invariant, or
92 /// - equal to '(TripCount * stride) / cache_line_size' if:
93 /// + the reference stride is less than the cache line size, and
94 /// + the coefficient of this loop's index variable used in all other
95 /// subscripts is zero
96 /// - or otherwise equal to 'TripCount'.
97 CacheCostTy computeRefCost(const Loop &L, unsigned CLS) const;
98
99private:
100 /// Attempt to delinearize the indexed reference.
101 bool delinearize(const LoopInfo &LI);
102
103 /// Attempt to delinearize \p AccessFn for fixed-size arrays.
104 bool tryDelinearizeFixedSize(const SCEV *AccessFn,
106 const SCEV *ElementSize);
107
108 /// Return true if the index reference is invariant with respect to loop \p L.
109 bool isLoopInvariant(const Loop &L) const;
110
111 /// Return true if the indexed reference is 'consecutive' in loop \p L.
112 /// An indexed reference is 'consecutive' if the only coefficient that uses
113 /// the loop induction variable is the rightmost one, and the access stride is
114 /// smaller than the cache line size \p CLS. Provide a valid \p Stride value
115 /// if the indexed reference is 'consecutive'.
116 bool isConsecutive(const Loop &L, const SCEV *&Stride, unsigned CLS) const;
117
118 /// Retrieve the index of the subscript corresponding to the given loop \p
119 /// L. Return a zero-based positive index if the subscript index is
120 /// succesfully located and a negative value otherwise. For example given the
121 /// indexed reference 'A[i][2j+1][3k+2]', the call
122 /// 'getSubscriptIndex(loop-k)' would return value 2.
123 int getSubscriptIndex(const Loop &L) const;
124
125 /// Return the coefficient used in the rightmost dimension.
126 const SCEV *getLastCoefficient() const;
127
128 /// Return true if the coefficient corresponding to induction variable of
129 /// loop \p L in the given \p Subscript is zero or is loop invariant in \p L.
130 bool isCoeffForLoopZeroOrInvariant(const SCEV &Subscript,
131 const Loop &L) const;
132
133 /// Verify that the given \p Subscript is 'well formed' (must be a simple add
134 /// recurrence).
135 bool isSimpleAddRecurrence(const SCEV &Subscript, const Loop &L) const;
136
137 /// Return true if the given reference \p Other is definetely aliased with
138 /// the indexed reference represented by this class.
139 bool isAliased(const IndexedReference &Other, AAResults &AA) const;
140
141private:
142 /// True if the reference can be delinearized, false otherwise.
143 bool IsValid = false;
144
145 /// Represent the memory reference instruction.
146 Instruction &StoreOrLoadInst;
147
148 /// The base pointer of the memory reference.
149 const SCEV *BasePointer = nullptr;
150
151 /// The subscript (indexes) of the memory reference.
153
154 /// The dimensions of the memory reference.
156
157 ScalarEvolution &SE;
158};
159
160/// A reference group represents a set of memory references that exhibit
161/// temporal or spacial reuse. Two references belong to the same
162/// reference group with respect to a inner loop L iff:
163/// 1. they have a loop independent dependency, or
164/// 2. they have a loop carried dependence with a small dependence distance
165/// (e.g. less than 2) carried by the inner loop, or
166/// 3. they refer to the same array, and the subscript in their innermost
167/// dimension is less than or equal to 'd' (where 'd' is less than the cache
168/// line size)
169///
170/// Intuitively a reference group represents memory references that access
171/// the same cache line. Conditions 1,2 above account for temporal reuse, while
172/// contition 3 accounts for spacial reuse.
175
176/// \c CacheCost represents the estimated cost of a inner loop as the number of
177/// cache lines used by the memory references it contains.
178/// The 'cache cost' of a loop 'L' in a loop nest 'LN' is computed as the sum of
179/// the cache costs of all of its reference groups when the loop is considered
180/// to be in the innermost position in the nest.
181/// A reference group represents memory references that fall into the same cache
182/// line. Each reference group is analysed with respect to the innermost loop in
183/// a loop nest. The cost of a reference is defined as follow:
184/// - one if it is loop invariant w.r.t the innermost loop,
185/// - equal to the loop trip count divided by the cache line times the
186/// reference stride if the reference stride is less than the cache line
187/// size (CLS), and the coefficient of this loop's index variable used in all
188/// other subscripts is zero (e.g. RefCost = TripCount/(CLS/RefStride))
189/// - equal to the innermost loop trip count if the reference stride is greater
190/// or equal to the cache line size CLS.
193 using LoopTripCountTy = std::pair<const Loop *, unsigned>;
194 using LoopCacheCostTy = std::pair<const Loop *, CacheCostTy>;
195
196public:
197 /// Construct a CacheCost object for the loop nest described by \p Loops.
198 /// The optional parameter \p TRT can be used to specify the max. distance
199 /// between array elements accessed in a loop so that the elements are
200 /// classified to have temporal reuse.
201 CacheCost(const LoopVectorTy &Loops, const LoopInfo &LI, ScalarEvolution &SE,
203 std::optional<unsigned> TRT = std::nullopt);
204
205 /// Create a CacheCost for the loop nest rooted by \p Root.
206 /// The optional parameter \p TRT can be used to specify the max. distance
207 /// between array elements accessed in a loop so that the elements are
208 /// classified to have temporal reuse.
209 static std::unique_ptr<CacheCost>
211 std::optional<unsigned> TRT = std::nullopt);
212
213 /// Return the estimated cost of loop \p L if the given loop is part of the
214 /// loop nest associated with this object. Return -1 otherwise.
215 CacheCostTy getLoopCost(const Loop &L) const {
216 auto IT = llvm::find_if(LoopCosts, [&L](const LoopCacheCostTy &LCC) {
217 return LCC.first == &L;
218 });
219 return (IT != LoopCosts.end()) ? (*IT).second : -1;
220 }
221
222 /// Return the estimated ordered loop costs.
223 ArrayRef<LoopCacheCostTy> getLoopCosts() const { return LoopCosts; }
224
225private:
226 /// Calculate the cache footprint of each loop in the nest (when it is
227 /// considered to be in the innermost position).
228 void calculateCacheFootprint();
229
230 /// Partition store/load instructions in the loop nest into reference groups.
231 /// Two or more memory accesses belong in the same reference group if they
232 /// share the same cache line.
233 bool populateReferenceGroups(ReferenceGroupsTy &RefGroups) const;
234
235 /// Calculate the cost of the given loop \p L assuming it is the innermost
236 /// loop in nest.
237 CacheCostTy computeLoopCacheCost(const Loop &L,
238 const ReferenceGroupsTy &RefGroups) const;
239
240 /// Compute the cost of a representative reference in reference group \p RG
241 /// when the given loop \p L is considered as the innermost loop in the nest.
242 /// The computed cost is an estimate for the number of cache lines used by the
243 /// reference group. The representative reference cost is defined as:
244 /// - equal to one if the reference is loop invariant, or
245 /// - equal to '(TripCount * stride) / cache_line_size' if (a) loop \p L's
246 /// induction variable is used only in the reference subscript associated
247 /// with loop \p L, and (b) the reference stride is less than the cache
248 /// line size, or
249 /// - TripCount otherwise
250 CacheCostTy computeRefGroupCacheCost(const ReferenceGroupTy &RG,
251 const Loop &L) const;
252
253 /// Sort the LoopCosts vector by decreasing cache cost.
254 void sortLoopCosts() {
255 stable_sort(LoopCosts,
256 [](const LoopCacheCostTy &A, const LoopCacheCostTy &B) {
257 return A.second > B.second;
258 });
259 }
260
261private:
262 /// Loops in the loop nest associated with this object.
263 LoopVectorTy Loops;
264
265 /// Trip counts for the loops in the loop nest associated with this object.
266 SmallVector<LoopTripCountTy, 3> TripCounts;
267
268 /// Cache costs for the loops in the loop nest associated with this object.
269 SmallVector<LoopCacheCostTy, 3> LoopCosts;
270
271 /// The max. distance between array elements accessed in a loop so that the
272 /// elements are classified to have temporal reuse.
273 std::optional<unsigned> TRT;
274
275 const LoopInfo &LI;
276 ScalarEvolution &SE;
277 TargetTransformInfo &TTI;
278 AAResults &AA;
279 DependenceInfo &DI;
280};
281
284
285/// Printer pass for the \c CacheCost results.
286class LoopCachePrinterPass : public PassInfoMixin<LoopCachePrinterPass> {
287 raw_ostream &OS;
288
289public:
290 explicit LoopCachePrinterPass(raw_ostream &OS) : OS(OS) {}
291
294
295 static bool isRequired() { return true; }
296};
297
298} // namespace llvm
299
300#endif // LLVM_ANALYSIS_LOOPCACHEANALYSIS_H
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
static cl::opt< ITMode > IT(cl::desc("IT block support"), cl::Hidden, cl::init(DefaultIT), cl::values(clEnumValN(DefaultIT, "arm-default-it", "Generate any type of IT block"), clEnumValN(RestrictedIT, "arm-restrict-it", "Disallow complex IT blocks")))
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
This header defines various interfaces for pass management in LLVM.
This file defines an InstructionCost class that is used when calculating the cost of an instruction,...
This header provides classes for managing per-loop analyses.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition ArrayRef.h:40
CacheCost represents the estimated cost of a inner loop as the number of cache lines used by the memo...
CacheCost(const LoopVectorTy &Loops, const LoopInfo &LI, ScalarEvolution &SE, TargetTransformInfo &TTI, AAResults &AA, DependenceInfo &DI, std::optional< unsigned > TRT=std::nullopt)
Construct a CacheCost object for the loop nest described by Loops.
ArrayRef< LoopCacheCostTy > getLoopCosts() const
Return the estimated ordered loop costs.
static std::unique_ptr< CacheCost > getCacheCost(Loop &Root, LoopStandardAnalysisResults &AR, DependenceInfo &DI, std::optional< unsigned > TRT=std::nullopt)
Create a CacheCost for the loop nest rooted by Root.
friend raw_ostream & operator<<(raw_ostream &OS, const CacheCost &CC)
CacheCostTy getLoopCost(const Loop &L) const
Return the estimated cost of loop L if the given loop is part of the loop nest associated with this o...
DependenceInfo - This class is the main dependence-analysis driver.
Represents a memory reference as a base pointer and a set of indexing operations.
const SCEV * getBasePointer() const
CacheCostTy computeRefCost(const Loop &L, unsigned CLS) const
Compute the cost of the reference w.r.t.
const SCEV * getSubscript(unsigned SubNum) const
const SCEV * getFirstSubscript() const
friend raw_ostream & operator<<(raw_ostream &OS, const IndexedReference &R)
std::optional< bool > hasSpacialReuse(const IndexedReference &Other, unsigned CLS, AAResults &AA) const
Return true/false if the current object and the indexed reference Other are/aren't in the same cache ...
std::optional< bool > hasTemporalReuse(const IndexedReference &Other, unsigned MaxDistance, const Loop &L, DependenceInfo &DI, AAResults &AA) const
Return true if the current object and the indexed reference Other have distance smaller than MaxDista...
IndexedReference(Instruction &StoreOrLoadInst, const LoopInfo &LI, ScalarEvolution &SE)
Construct an indexed reference given a StoreOrLoadInst instruction.
const SCEV * getLastSubscript() const
size_t getNumSubscripts() const
This class provides an interface for updating the loop pass manager based on mutations to the loop ne...
LoopCachePrinterPass(raw_ostream &OS)
PreservedAnalyses run(Loop &L, LoopAnalysisManager &AM, LoopStandardAnalysisResults &AR, LPMUpdater &U)
Represents a single loop in the control flow graph.
Definition LoopInfo.h:40
A set of analyses that are preserved following a run of a transformation pass.
Definition Analysis.h:112
This class represents an analyzed expression in the program.
The main scalar evolution driver.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
This pass provides access to the codegen interfaces that are needed for IR-level transformations.
This class implements an extremely fast bulk output stream that can only output to a stream.
Definition raw_ostream.h:53
Abstract Attribute helper functions.
Definition Attributor.h:165
This is an optimization pass for GlobalISel generic memory operations.
void stable_sort(R &&Range)
Definition STLExtras.h:2058
InstructionCost CacheCostTy
SmallVector< std::unique_ptr< IndexedReference >, 8 > ReferenceGroupTy
A reference group represents a set of memory references that exhibit temporal or spacial reuse.
AnalysisManager< Loop, LoopStandardAnalysisResults & > LoopAnalysisManager
The loop analysis manager.
SmallVector< Loop *, 8 > LoopVectorTy
@ Other
Any other memory.
Definition ModRef.h:68
raw_ostream & operator<<(raw_ostream &OS, const APFixedPoint &FX)
void delinearize(ScalarEvolution &SE, const SCEV *Expr, SmallVectorImpl< const SCEV * > &Subscripts, SmallVectorImpl< const SCEV * > &Sizes, const SCEV *ElementSize)
Split this SCEVAddRecExpr into two vectors of SCEVs representing the subscripts and sizes of an array...
auto find_if(R &&Range, UnaryPredicate P)
Provide wrappers to std::find_if which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1758
SmallVector< ReferenceGroupTy, 8 > ReferenceGroupsTy
The adaptor from a function pass to a loop pass computes these analyses and makes them available to t...
A CRTP mix-in to automatically provide informational APIs needed for passes.
Definition PassManager.h:69