LLVM 20.0.0git
LoopCacheAnalysis.h
Go to the documentation of this file.
1//===- llvm/Analysis/LoopCacheAnalysis.h ------------------------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8///
9/// \file
10/// This file defines the interface for the loop cache analysis.
11///
12//===----------------------------------------------------------------------===//
13
14#ifndef LLVM_ANALYSIS_LOOPCACHEANALYSIS_H
15#define LLVM_ANALYSIS_LOOPCACHEANALYSIS_H
16
18#include "llvm/IR/PassManager.h"
20#include <optional>
21
22namespace llvm {
23
24class AAResults;
25class DependenceInfo;
26class Instruction;
27class LPMUpdater;
28class raw_ostream;
29class LoopInfo;
30class Loop;
31class ScalarEvolution;
32class SCEV;
33class TargetTransformInfo;
34
37
38/// Represents a memory reference as a base pointer and a set of indexing
39/// operations. For example given the array reference A[i][2j+1][3k+2] in a
40/// 3-dim loop nest:
41/// for(i=0;i<n;++i)
42/// for(j=0;j<m;++j)
43/// for(k=0;k<o;++k)
44/// ... A[i][2j+1][3k+2] ...
45/// We expect:
46/// BasePointer -> A
47/// Subscripts -> [{0,+,1}<%for.i>][{1,+,2}<%for.j>][{2,+,3}<%for.k>]
48/// Sizes -> [m][o][4]
51
52public:
53 /// Construct an indexed reference given a \p StoreOrLoadInst instruction.
54 IndexedReference(Instruction &StoreOrLoadInst, const LoopInfo &LI,
55 ScalarEvolution &SE);
56
57 bool isValid() const { return IsValid; }
58 const SCEV *getBasePointer() const { return BasePointer; }
59 size_t getNumSubscripts() const { return Subscripts.size(); }
60 const SCEV *getSubscript(unsigned SubNum) const {
61 assert(SubNum < getNumSubscripts() && "Invalid subscript number");
62 return Subscripts[SubNum];
63 }
64 const SCEV *getFirstSubscript() const {
65 assert(!Subscripts.empty() && "Expecting non-empty container");
66 return Subscripts.front();
67 }
68 const SCEV *getLastSubscript() const {
69 assert(!Subscripts.empty() && "Expecting non-empty container");
70 return Subscripts.back();
71 }
72
73 /// Return true/false if the current object and the indexed reference \p Other
74 /// are/aren't in the same cache line of size \p CLS. Two references are in
75 /// the same chace line iff the distance between them in the innermost
76 /// dimension is less than the cache line size. Return std::nullopt if unsure.
77 std::optional<bool> hasSpacialReuse(const IndexedReference &Other,
78 unsigned CLS, AAResults &AA) const;
79
80 /// Return true if the current object and the indexed reference \p Other
81 /// have distance smaller than \p MaxDistance in the dimension associated with
82 /// the given loop \p L. Return false if the distance is not smaller than \p
83 /// MaxDistance and std::nullopt if unsure.
84 std::optional<bool> hasTemporalReuse(const IndexedReference &Other,
85 unsigned MaxDistance, const Loop &L,
86 DependenceInfo &DI, AAResults &AA) const;
87
88 /// Compute the cost of the reference w.r.t. the given loop \p L when it is
89 /// considered in the innermost position in the loop nest.
90 /// The cost is defined as:
91 /// - equal to one if the reference is loop invariant, or
92 /// - equal to '(TripCount * stride) / cache_line_size' if:
93 /// + the reference stride is less than the cache line size, and
94 /// + the coefficient of this loop's index variable used in all other
95 /// subscripts is zero
96 /// - or otherwise equal to 'TripCount'.
97 CacheCostTy computeRefCost(const Loop &L, unsigned CLS) const;
98
99private:
100 /// Attempt to delinearize the indexed reference.
101 bool delinearize(const LoopInfo &LI);
102
103 /// Attempt to delinearize \p AccessFn for fixed-size arrays.
104 bool tryDelinearizeFixedSize(const SCEV *AccessFn,
106
107 /// Return true if the index reference is invariant with respect to loop \p L.
108 bool isLoopInvariant(const Loop &L) const;
109
110 /// Return true if the indexed reference is 'consecutive' in loop \p L.
111 /// An indexed reference is 'consecutive' if the only coefficient that uses
112 /// the loop induction variable is the rightmost one, and the access stride is
113 /// smaller than the cache line size \p CLS. Provide a valid \p Stride value
114 /// if the indexed reference is 'consecutive'.
115 bool isConsecutive(const Loop &L, const SCEV *&Stride, unsigned CLS) const;
116
117 /// Retrieve the index of the subscript corresponding to the given loop \p
118 /// L. Return a zero-based positive index if the subscript index is
119 /// succesfully located and a negative value otherwise. For example given the
120 /// indexed reference 'A[i][2j+1][3k+2]', the call
121 /// 'getSubscriptIndex(loop-k)' would return value 2.
122 int getSubscriptIndex(const Loop &L) const;
123
124 /// Return the coefficient used in the rightmost dimension.
125 const SCEV *getLastCoefficient() const;
126
127 /// Return true if the coefficient corresponding to induction variable of
128 /// loop \p L in the given \p Subscript is zero or is loop invariant in \p L.
129 bool isCoeffForLoopZeroOrInvariant(const SCEV &Subscript,
130 const Loop &L) const;
131
132 /// Verify that the given \p Subscript is 'well formed' (must be a simple add
133 /// recurrence).
134 bool isSimpleAddRecurrence(const SCEV &Subscript, const Loop &L) const;
135
136 /// Return true if the given reference \p Other is definetely aliased with
137 /// the indexed reference represented by this class.
138 bool isAliased(const IndexedReference &Other, AAResults &AA) const;
139
140private:
141 /// True if the reference can be delinearized, false otherwise.
142 bool IsValid = false;
143
144 /// Represent the memory reference instruction.
145 Instruction &StoreOrLoadInst;
146
147 /// The base pointer of the memory reference.
148 const SCEV *BasePointer = nullptr;
149
150 /// The subscript (indexes) of the memory reference.
152
153 /// The dimensions of the memory reference.
155
156 ScalarEvolution &SE;
157};
158
159/// A reference group represents a set of memory references that exhibit
160/// temporal or spacial reuse. Two references belong to the same
161/// reference group with respect to a inner loop L iff:
162/// 1. they have a loop independent dependency, or
163/// 2. they have a loop carried dependence with a small dependence distance
164/// (e.g. less than 2) carried by the inner loop, or
165/// 3. they refer to the same array, and the subscript in their innermost
166/// dimension is less than or equal to 'd' (where 'd' is less than the cache
167/// line size)
168///
169/// Intuitively a reference group represents memory references that access
170/// the same cache line. Conditions 1,2 above account for temporal reuse, while
171/// contition 3 accounts for spacial reuse.
174
175/// \c CacheCost represents the estimated cost of a inner loop as the number of
176/// cache lines used by the memory references it contains.
177/// The 'cache cost' of a loop 'L' in a loop nest 'LN' is computed as the sum of
178/// the cache costs of all of its reference groups when the loop is considered
179/// to be in the innermost position in the nest.
180/// A reference group represents memory references that fall into the same cache
181/// line. Each reference group is analysed with respect to the innermost loop in
182/// a loop nest. The cost of a reference is defined as follow:
183/// - one if it is loop invariant w.r.t the innermost loop,
184/// - equal to the loop trip count divided by the cache line times the
185/// reference stride if the reference stride is less than the cache line
186/// size (CLS), and the coefficient of this loop's index variable used in all
187/// other subscripts is zero (e.g. RefCost = TripCount/(CLS/RefStride))
188/// - equal to the innermost loop trip count if the reference stride is greater
189/// or equal to the cache line size CLS.
192 using LoopTripCountTy = std::pair<const Loop *, unsigned>;
193 using LoopCacheCostTy = std::pair<const Loop *, CacheCostTy>;
194
195public:
196 /// Construct a CacheCost object for the loop nest described by \p Loops.
197 /// The optional parameter \p TRT can be used to specify the max. distance
198 /// between array elements accessed in a loop so that the elements are
199 /// classified to have temporal reuse.
200 CacheCost(const LoopVectorTy &Loops, const LoopInfo &LI, ScalarEvolution &SE,
202 std::optional<unsigned> TRT = std::nullopt);
203
204 /// Create a CacheCost for the loop nest rooted by \p Root.
205 /// The optional parameter \p TRT can be used to specify the max. distance
206 /// between array elements accessed in a loop so that the elements are
207 /// classified to have temporal reuse.
208 static std::unique_ptr<CacheCost>
210 std::optional<unsigned> TRT = std::nullopt);
211
212 /// Return the estimated cost of loop \p L if the given loop is part of the
213 /// loop nest associated with this object. Return -1 otherwise.
214 CacheCostTy getLoopCost(const Loop &L) const {
215 auto IT = llvm::find_if(LoopCosts, [&L](const LoopCacheCostTy &LCC) {
216 return LCC.first == &L;
217 });
218 return (IT != LoopCosts.end()) ? (*IT).second : -1;
219 }
220
221 /// Return the estimated ordered loop costs.
222 ArrayRef<LoopCacheCostTy> getLoopCosts() const { return LoopCosts; }
223
224private:
225 /// Calculate the cache footprint of each loop in the nest (when it is
226 /// considered to be in the innermost position).
227 void calculateCacheFootprint();
228
229 /// Partition store/load instructions in the loop nest into reference groups.
230 /// Two or more memory accesses belong in the same reference group if they
231 /// share the same cache line.
232 bool populateReferenceGroups(ReferenceGroupsTy &RefGroups) const;
233
234 /// Calculate the cost of the given loop \p L assuming it is the innermost
235 /// loop in nest.
236 CacheCostTy computeLoopCacheCost(const Loop &L,
237 const ReferenceGroupsTy &RefGroups) const;
238
239 /// Compute the cost of a representative reference in reference group \p RG
240 /// when the given loop \p L is considered as the innermost loop in the nest.
241 /// The computed cost is an estimate for the number of cache lines used by the
242 /// reference group. The representative reference cost is defined as:
243 /// - equal to one if the reference is loop invariant, or
244 /// - equal to '(TripCount * stride) / cache_line_size' if (a) loop \p L's
245 /// induction variable is used only in the reference subscript associated
246 /// with loop \p L, and (b) the reference stride is less than the cache
247 /// line size, or
248 /// - TripCount otherwise
249 CacheCostTy computeRefGroupCacheCost(const ReferenceGroupTy &RG,
250 const Loop &L) const;
251
252 /// Sort the LoopCosts vector by decreasing cache cost.
253 void sortLoopCosts() {
254 stable_sort(LoopCosts,
255 [](const LoopCacheCostTy &A, const LoopCacheCostTy &B) {
256 return A.second > B.second;
257 });
258 }
259
260private:
261 /// Loops in the loop nest associated with this object.
262 LoopVectorTy Loops;
263
264 /// Trip counts for the loops in the loop nest associated with this object.
265 SmallVector<LoopTripCountTy, 3> TripCounts;
266
267 /// Cache costs for the loops in the loop nest associated with this object.
268 SmallVector<LoopCacheCostTy, 3> LoopCosts;
269
270 /// The max. distance between array elements accessed in a loop so that the
271 /// elements are classified to have temporal reuse.
272 std::optional<unsigned> TRT;
273
274 const LoopInfo &LI;
275 ScalarEvolution &SE;
276 TargetTransformInfo &TTI;
277 AAResults &AA;
278 DependenceInfo &DI;
279};
280
281raw_ostream &operator<<(raw_ostream &OS, const IndexedReference &R);
282raw_ostream &operator<<(raw_ostream &OS, const CacheCost &CC);
283
284/// Printer pass for the \c CacheCost results.
285class LoopCachePrinterPass : public PassInfoMixin<LoopCachePrinterPass> {
286 raw_ostream &OS;
287
288public:
290
293
294 static bool isRequired() { return true; }
295};
296
297} // namespace llvm
298
299#endif // LLVM_ANALYSIS_LOOPCACHEANALYSIS_H
static cl::opt< ITMode > IT(cl::desc("IT block support"), cl::Hidden, cl::init(DefaultIT), cl::values(clEnumValN(DefaultIT, "arm-default-it", "Generate any type of IT block"), clEnumValN(RestrictedIT, "arm-restrict-it", "Disallow complex IT blocks")))
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
This header defines various interfaces for pass management in LLVM.
This file defines an InstructionCost class that is used when calculating the cost of an instruction,...
This header provides classes for managing per-loop analyses.
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
raw_pwrite_stream & OS
A container for analyses that lazily runs them and caches their results.
Definition: PassManager.h:253
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: ArrayRef.h:41
CacheCost represents the estimated cost of a inner loop as the number of cache lines used by the memo...
ArrayRef< LoopCacheCostTy > getLoopCosts() const
Return the estimated ordered loop costs.
static std::unique_ptr< CacheCost > getCacheCost(Loop &Root, LoopStandardAnalysisResults &AR, DependenceInfo &DI, std::optional< unsigned > TRT=std::nullopt)
Create a CacheCost for the loop nest rooted by Root.
friend raw_ostream & operator<<(raw_ostream &OS, const CacheCost &CC)
CacheCostTy getLoopCost(const Loop &L) const
Return the estimated cost of loop L if the given loop is part of the loop nest associated with this o...
DependenceInfo - This class is the main dependence-analysis driver.
Represents a memory reference as a base pointer and a set of indexing operations.
const SCEV * getBasePointer() const
CacheCostTy computeRefCost(const Loop &L, unsigned CLS) const
Compute the cost of the reference w.r.t.
const SCEV * getSubscript(unsigned SubNum) const
const SCEV * getFirstSubscript() const
friend raw_ostream & operator<<(raw_ostream &OS, const IndexedReference &R)
std::optional< bool > hasSpacialReuse(const IndexedReference &Other, unsigned CLS, AAResults &AA) const
Return true/false if the current object and the indexed reference Other are/aren't in the same cache ...
std::optional< bool > hasTemporalReuse(const IndexedReference &Other, unsigned MaxDistance, const Loop &L, DependenceInfo &DI, AAResults &AA) const
Return true if the current object and the indexed reference Other have distance smaller than MaxDista...
const SCEV * getLastSubscript() const
size_t getNumSubscripts() const
This class provides an interface for updating the loop pass manager based on mutations to the loop ne...
Printer pass for the CacheCost results.
LoopCachePrinterPass(raw_ostream &OS)
PreservedAnalyses run(Loop &L, LoopAnalysisManager &AM, LoopStandardAnalysisResults &AR, LPMUpdater &U)
Represents a single loop in the control flow graph.
Definition: LoopInfo.h:39
A set of analyses that are preserved following a run of a transformation pass.
Definition: Analysis.h:111
This class represents an analyzed expression in the program.
The main scalar evolution driver.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: SmallVector.h:573
This pass provides access to the codegen interfaces that are needed for IR-level transformations.
This class implements an extremely fast bulk output stream that can only output to a stream.
Definition: raw_ostream.h:52
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
void stable_sort(R &&Range)
Definition: STLExtras.h:2037
@ Other
Any other memory.
raw_ostream & operator<<(raw_ostream &OS, const APFixedPoint &FX)
Definition: APFixedPoint.h:303
auto find_if(R &&Range, UnaryPredicate P)
Provide wrappers to std::find_if which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1766
SmallVector< Loop *, 8 > LoopVectorTy
The adaptor from a function pass to a loop pass computes these analyses and makes them available to t...
A CRTP mix-in to automatically provide informational APIs needed for passes.
Definition: PassManager.h:69