LLVM 20.0.0git
LoopCacheAnalysis.h
Go to the documentation of this file.
1//===- llvm/Analysis/LoopCacheAnalysis.h ------------------------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8///
9/// \file
10/// This file defines the interface for the loop cache analysis.
11///
12//===----------------------------------------------------------------------===//
13
14#ifndef LLVM_ANALYSIS_LOOPCACHEANALYSIS_H
15#define LLVM_ANALYSIS_LOOPCACHEANALYSIS_H
16
18#include "llvm/IR/PassManager.h"
19#include <optional>
20
21namespace llvm {
22
23class AAResults;
24class DependenceInfo;
25class Instruction;
26class LPMUpdater;
27class raw_ostream;
28class LoopInfo;
29class Loop;
30class ScalarEvolution;
31class SCEV;
32class TargetTransformInfo;
33
34using CacheCostTy = int64_t;
36
37/// Represents a memory reference as a base pointer and a set of indexing
38/// operations. For example given the array reference A[i][2j+1][3k+2] in a
39/// 3-dim loop nest:
40/// for(i=0;i<n;++i)
41/// for(j=0;j<m;++j)
42/// for(k=0;k<o;++k)
43/// ... A[i][2j+1][3k+2] ...
44/// We expect:
45/// BasePointer -> A
46/// Subscripts -> [{0,+,1}<%for.i>][{1,+,2}<%for.j>][{2,+,3}<%for.k>]
47/// Sizes -> [m][o][4]
50
51public:
52 /// Construct an indexed reference given a \p StoreOrLoadInst instruction.
53 IndexedReference(Instruction &StoreOrLoadInst, const LoopInfo &LI,
54 ScalarEvolution &SE);
55
56 bool isValid() const { return IsValid; }
57 const SCEV *getBasePointer() const { return BasePointer; }
58 size_t getNumSubscripts() const { return Subscripts.size(); }
59 const SCEV *getSubscript(unsigned SubNum) const {
60 assert(SubNum < getNumSubscripts() && "Invalid subscript number");
61 return Subscripts[SubNum];
62 }
63 const SCEV *getFirstSubscript() const {
64 assert(!Subscripts.empty() && "Expecting non-empty container");
65 return Subscripts.front();
66 }
67 const SCEV *getLastSubscript() const {
68 assert(!Subscripts.empty() && "Expecting non-empty container");
69 return Subscripts.back();
70 }
71
72 /// Return true/false if the current object and the indexed reference \p Other
73 /// are/aren't in the same cache line of size \p CLS. Two references are in
74 /// the same chace line iff the distance between them in the innermost
75 /// dimension is less than the cache line size. Return std::nullopt if unsure.
76 std::optional<bool> hasSpacialReuse(const IndexedReference &Other,
77 unsigned CLS, AAResults &AA) const;
78
79 /// Return true if the current object and the indexed reference \p Other
80 /// have distance smaller than \p MaxDistance in the dimension associated with
81 /// the given loop \p L. Return false if the distance is not smaller than \p
82 /// MaxDistance and std::nullopt if unsure.
83 std::optional<bool> hasTemporalReuse(const IndexedReference &Other,
84 unsigned MaxDistance, const Loop &L,
85 DependenceInfo &DI, AAResults &AA) const;
86
87 /// Compute the cost of the reference w.r.t. the given loop \p L when it is
88 /// considered in the innermost position in the loop nest.
89 /// The cost is defined as:
90 /// - equal to one if the reference is loop invariant, or
91 /// - equal to '(TripCount * stride) / cache_line_size' if:
92 /// + the reference stride is less than the cache line size, and
93 /// + the coefficient of this loop's index variable used in all other
94 /// subscripts is zero
95 /// - or otherwise equal to 'TripCount'.
96 CacheCostTy computeRefCost(const Loop &L, unsigned CLS) const;
97
98private:
99 /// Attempt to delinearize the indexed reference.
100 bool delinearize(const LoopInfo &LI);
101
102 /// Attempt to delinearize \p AccessFn for fixed-size arrays.
103 bool tryDelinearizeFixedSize(const SCEV *AccessFn,
105
106 /// Return true if the index reference is invariant with respect to loop \p L.
107 bool isLoopInvariant(const Loop &L) const;
108
109 /// Return true if the indexed reference is 'consecutive' in loop \p L.
110 /// An indexed reference is 'consecutive' if the only coefficient that uses
111 /// the loop induction variable is the rightmost one, and the access stride is
112 /// smaller than the cache line size \p CLS. Provide a valid \p Stride value
113 /// if the indexed reference is 'consecutive'.
114 bool isConsecutive(const Loop &L, const SCEV *&Stride, unsigned CLS) const;
115
116 /// Retrieve the index of the subscript corresponding to the given loop \p
117 /// L. Return a zero-based positive index if the subscript index is
118 /// succesfully located and a negative value otherwise. For example given the
119 /// indexed reference 'A[i][2j+1][3k+2]', the call
120 /// 'getSubscriptIndex(loop-k)' would return value 2.
121 int getSubscriptIndex(const Loop &L) const;
122
123 /// Return the coefficient used in the rightmost dimension.
124 const SCEV *getLastCoefficient() const;
125
126 /// Return true if the coefficient corresponding to induction variable of
127 /// loop \p L in the given \p Subscript is zero or is loop invariant in \p L.
128 bool isCoeffForLoopZeroOrInvariant(const SCEV &Subscript,
129 const Loop &L) const;
130
131 /// Verify that the given \p Subscript is 'well formed' (must be a simple add
132 /// recurrence).
133 bool isSimpleAddRecurrence(const SCEV &Subscript, const Loop &L) const;
134
135 /// Return true if the given reference \p Other is definetely aliased with
136 /// the indexed reference represented by this class.
137 bool isAliased(const IndexedReference &Other, AAResults &AA) const;
138
139private:
140 /// True if the reference can be delinearized, false otherwise.
141 bool IsValid = false;
142
143 /// Represent the memory reference instruction.
144 Instruction &StoreOrLoadInst;
145
146 /// The base pointer of the memory reference.
147 const SCEV *BasePointer = nullptr;
148
149 /// The subscript (indexes) of the memory reference.
151
152 /// The dimensions of the memory reference.
154
155 ScalarEvolution &SE;
156};
157
158/// A reference group represents a set of memory references that exhibit
159/// temporal or spacial reuse. Two references belong to the same
160/// reference group with respect to a inner loop L iff:
161/// 1. they have a loop independent dependency, or
162/// 2. they have a loop carried dependence with a small dependence distance
163/// (e.g. less than 2) carried by the inner loop, or
164/// 3. they refer to the same array, and the subscript in their innermost
165/// dimension is less than or equal to 'd' (where 'd' is less than the cache
166/// line size)
167///
168/// Intuitively a reference group represents memory references that access
169/// the same cache line. Conditions 1,2 above account for temporal reuse, while
170/// contition 3 accounts for spacial reuse.
173
174/// \c CacheCost represents the estimated cost of a inner loop as the number of
175/// cache lines used by the memory references it contains.
176/// The 'cache cost' of a loop 'L' in a loop nest 'LN' is computed as the sum of
177/// the cache costs of all of its reference groups when the loop is considered
178/// to be in the innermost position in the nest.
179/// A reference group represents memory references that fall into the same cache
180/// line. Each reference group is analysed with respect to the innermost loop in
181/// a loop nest. The cost of a reference is defined as follow:
182/// - one if it is loop invariant w.r.t the innermost loop,
183/// - equal to the loop trip count divided by the cache line times the
184/// reference stride if the reference stride is less than the cache line
185/// size (CLS), and the coefficient of this loop's index variable used in all
186/// other subscripts is zero (e.g. RefCost = TripCount/(CLS/RefStride))
187/// - equal to the innermost loop trip count if the reference stride is greater
188/// or equal to the cache line size CLS.
191 using LoopTripCountTy = std::pair<const Loop *, unsigned>;
192 using LoopCacheCostTy = std::pair<const Loop *, CacheCostTy>;
193
194public:
195 static CacheCostTy constexpr InvalidCost = -1;
196
197 /// Construct a CacheCost object for the loop nest described by \p Loops.
198 /// The optional parameter \p TRT can be used to specify the max. distance
199 /// between array elements accessed in a loop so that the elements are
200 /// classified to have temporal reuse.
201 CacheCost(const LoopVectorTy &Loops, const LoopInfo &LI, ScalarEvolution &SE,
203 std::optional<unsigned> TRT = std::nullopt);
204
205 /// Create a CacheCost for the loop nest rooted by \p Root.
206 /// The optional parameter \p TRT can be used to specify the max. distance
207 /// between array elements accessed in a loop so that the elements are
208 /// classified to have temporal reuse.
209 static std::unique_ptr<CacheCost>
211 std::optional<unsigned> TRT = std::nullopt);
212
213 /// Return the estimated cost of loop \p L if the given loop is part of the
214 /// loop nest associated with this object. Return -1 otherwise.
215 CacheCostTy getLoopCost(const Loop &L) const {
216 auto IT = llvm::find_if(LoopCosts, [&L](const LoopCacheCostTy &LCC) {
217 return LCC.first == &L;
218 });
219 return (IT != LoopCosts.end()) ? (*IT).second : -1;
220 }
221
222 /// Return the estimated ordered loop costs.
223 ArrayRef<LoopCacheCostTy> getLoopCosts() const { return LoopCosts; }
224
225private:
226 /// Calculate the cache footprint of each loop in the nest (when it is
227 /// considered to be in the innermost position).
228 void calculateCacheFootprint();
229
230 /// Partition store/load instructions in the loop nest into reference groups.
231 /// Two or more memory accesses belong in the same reference group if they
232 /// share the same cache line.
233 bool populateReferenceGroups(ReferenceGroupsTy &RefGroups) const;
234
235 /// Calculate the cost of the given loop \p L assuming it is the innermost
236 /// loop in nest.
237 CacheCostTy computeLoopCacheCost(const Loop &L,
238 const ReferenceGroupsTy &RefGroups) const;
239
240 /// Compute the cost of a representative reference in reference group \p RG
241 /// when the given loop \p L is considered as the innermost loop in the nest.
242 /// The computed cost is an estimate for the number of cache lines used by the
243 /// reference group. The representative reference cost is defined as:
244 /// - equal to one if the reference is loop invariant, or
245 /// - equal to '(TripCount * stride) / cache_line_size' if (a) loop \p L's
246 /// induction variable is used only in the reference subscript associated
247 /// with loop \p L, and (b) the reference stride is less than the cache
248 /// line size, or
249 /// - TripCount otherwise
250 CacheCostTy computeRefGroupCacheCost(const ReferenceGroupTy &RG,
251 const Loop &L) const;
252
253 /// Sort the LoopCosts vector by decreasing cache cost.
254 void sortLoopCosts() {
255 stable_sort(LoopCosts,
256 [](const LoopCacheCostTy &A, const LoopCacheCostTy &B) {
257 return A.second > B.second;
258 });
259 }
260
261private:
262 /// Loops in the loop nest associated with this object.
263 LoopVectorTy Loops;
264
265 /// Trip counts for the loops in the loop nest associated with this object.
266 SmallVector<LoopTripCountTy, 3> TripCounts;
267
268 /// Cache costs for the loops in the loop nest associated with this object.
269 SmallVector<LoopCacheCostTy, 3> LoopCosts;
270
271 /// The max. distance between array elements accessed in a loop so that the
272 /// elements are classified to have temporal reuse.
273 std::optional<unsigned> TRT;
274
275 const LoopInfo &LI;
276 ScalarEvolution &SE;
277 TargetTransformInfo &TTI;
278 AAResults &AA;
279 DependenceInfo &DI;
280};
281
282raw_ostream &operator<<(raw_ostream &OS, const IndexedReference &R);
283raw_ostream &operator<<(raw_ostream &OS, const CacheCost &CC);
284
285/// Printer pass for the \c CacheCost results.
286class LoopCachePrinterPass : public PassInfoMixin<LoopCachePrinterPass> {
287 raw_ostream &OS;
288
289public:
291
294
295 static bool isRequired() { return true; }
296};
297
298} // namespace llvm
299
300#endif // LLVM_ANALYSIS_LOOPCACHEANALYSIS_H
static cl::opt< ITMode > IT(cl::desc("IT block support"), cl::Hidden, cl::init(DefaultIT), cl::values(clEnumValN(DefaultIT, "arm-default-it", "Generate any type of IT block"), clEnumValN(RestrictedIT, "arm-restrict-it", "Disallow complex IT blocks")))
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
This header provides classes for managing per-loop analyses.
This header defines various interfaces for pass management in LLVM.
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
raw_pwrite_stream & OS
A container for analyses that lazily runs them and caches their results.
Definition: PassManager.h:253
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: ArrayRef.h:41
CacheCost represents the estimated cost of a inner loop as the number of cache lines used by the memo...
ArrayRef< LoopCacheCostTy > getLoopCosts() const
Return the estimated ordered loop costs.
static std::unique_ptr< CacheCost > getCacheCost(Loop &Root, LoopStandardAnalysisResults &AR, DependenceInfo &DI, std::optional< unsigned > TRT=std::nullopt)
Create a CacheCost for the loop nest rooted by Root.
friend raw_ostream & operator<<(raw_ostream &OS, const CacheCost &CC)
static CacheCostTy constexpr InvalidCost
CacheCostTy getLoopCost(const Loop &L) const
Return the estimated cost of loop L if the given loop is part of the loop nest associated with this o...
DependenceInfo - This class is the main dependence-analysis driver.
Represents a memory reference as a base pointer and a set of indexing operations.
const SCEV * getBasePointer() const
CacheCostTy computeRefCost(const Loop &L, unsigned CLS) const
Compute the cost of the reference w.r.t.
const SCEV * getSubscript(unsigned SubNum) const
const SCEV * getFirstSubscript() const
friend raw_ostream & operator<<(raw_ostream &OS, const IndexedReference &R)
std::optional< bool > hasSpacialReuse(const IndexedReference &Other, unsigned CLS, AAResults &AA) const
Return true/false if the current object and the indexed reference Other are/aren't in the same cache ...
std::optional< bool > hasTemporalReuse(const IndexedReference &Other, unsigned MaxDistance, const Loop &L, DependenceInfo &DI, AAResults &AA) const
Return true if the current object and the indexed reference Other have distance smaller than MaxDista...
const SCEV * getLastSubscript() const
size_t getNumSubscripts() const
This class provides an interface for updating the loop pass manager based on mutations to the loop ne...
Printer pass for the CacheCost results.
LoopCachePrinterPass(raw_ostream &OS)
PreservedAnalyses run(Loop &L, LoopAnalysisManager &AM, LoopStandardAnalysisResults &AR, LPMUpdater &U)
Represents a single loop in the control flow graph.
Definition: LoopInfo.h:39
A set of analyses that are preserved following a run of a transformation pass.
Definition: Analysis.h:111
This class represents an analyzed expression in the program.
The main scalar evolution driver.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: SmallVector.h:586
This pass provides access to the codegen interfaces that are needed for IR-level transformations.
This class implements an extremely fast bulk output stream that can only output to a stream.
Definition: raw_ostream.h:52
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
void stable_sort(R &&Range)
Definition: STLExtras.h:2020
@ Other
Any other memory.
raw_ostream & operator<<(raw_ostream &OS, const APFixedPoint &FX)
Definition: APFixedPoint.h:292
auto find_if(R &&Range, UnaryPredicate P)
Provide wrappers to std::find_if which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1749
SmallVector< Loop *, 8 > LoopVectorTy
int64_t CacheCostTy
The adaptor from a function pass to a loop pass computes these analyses and makes them available to t...
A CRTP mix-in to automatically provide informational APIs needed for passes.
Definition: PassManager.h:69