LLVM 23.0.0git
LoopCacheAnalysis.h
Go to the documentation of this file.
1//===- llvm/Analysis/LoopCacheAnalysis.h ------------------------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8///
9/// \file
10/// This file defines the interface for the loop cache analysis.
11///
12//===----------------------------------------------------------------------===//
13
14#ifndef LLVM_ANALYSIS_LOOPCACHEANALYSIS_H
15#define LLVM_ANALYSIS_LOOPCACHEANALYSIS_H
16
18#include "llvm/IR/PassManager.h"
20#include <optional>
21
22namespace llvm {
23
24class AAResults;
25class DependenceInfo;
26class Instruction;
27class LPMUpdater;
28class raw_ostream;
29class LoopInfo;
30class Loop;
31class ScalarEvolution;
32class SCEV;
34
37
38/// Represents a memory reference as a base pointer and a set of indexing
39/// operations. For example given the array reference A[i][2j+1][3k+2] in a
40/// 3-dim loop nest:
41/// for(i=0;i<n;++i)
42/// for(j=0;j<m;++j)
43/// for(k=0;k<o;++k)
44/// ... A[i][2j+1][3k+2] ...
45/// We expect:
46/// BasePointer -> A
47/// Subscripts -> [{0,+,1}<%for.i>][{1,+,2}<%for.j>][{2,+,3}<%for.k>]
48/// Sizes -> [m][o][4]
51
52public:
53 /// Construct an indexed reference given a \p StoreOrLoadInst instruction.
54 IndexedReference(Instruction &StoreOrLoadInst, const LoopInfo &LI,
55 ScalarEvolution &SE);
56
57 bool isValid() const { return IsValid; }
58 const SCEV *getBasePointer() const { return BasePointer; }
59 size_t getNumSubscripts() const { return Subscripts.size(); }
60 const SCEV *getSubscript(unsigned SubNum) const {
61 assert(SubNum < getNumSubscripts() && "Invalid subscript number");
62 return Subscripts[SubNum];
63 }
64 const SCEV *getFirstSubscript() const {
65 assert(!Subscripts.empty() && "Expecting non-empty container");
66 return Subscripts.front();
67 }
68 const SCEV *getLastSubscript() const {
69 assert(!Subscripts.empty() && "Expecting non-empty container");
70 return Subscripts.back();
71 }
72
73 /// Return true/false if the current object and the indexed reference \p Other
74 /// are/aren't in the same cache line of size \p CLS. Two references are in
75 /// the same chace line iff the distance between them in the innermost
76 /// dimension is less than the cache line size. Return std::nullopt if unsure.
77 std::optional<bool> hasSpacialReuse(const IndexedReference &Other,
78 unsigned CLS, AAResults &AA) const;
79
80 /// Return true if the current object and the indexed reference \p Other
81 /// have distance smaller than \p MaxDistance in the dimension associated with
82 /// the given loop \p L. Return false if the distance is not smaller than \p
83 /// MaxDistance and std::nullopt if unsure.
84 std::optional<bool> hasTemporalReuse(const IndexedReference &Other,
85 unsigned MaxDistance, const Loop &L,
86 DependenceInfo &DI, AAResults &AA) const;
87
88 /// Compute the cost of the reference w.r.t. the given loop \p L when it is
89 /// considered in the innermost position in the loop nest.
90 /// The cost is defined as:
91 /// - equal to one if the reference is loop invariant, or
92 /// - equal to '(TripCount * stride) / cache_line_size' if:
93 /// + the reference stride is less than the cache line size, and
94 /// + the coefficient of this loop's index variable used in all other
95 /// subscripts is zero
96 /// - or otherwise equal to 'TripCount'.
97 CacheCostTy computeRefCost(const Loop &L, unsigned CLS) const;
98
99private:
100 /// Attempt to delinearize the indexed reference.
101 bool delinearize(const LoopInfo &LI);
102
103 /// Return true if the index reference is invariant with respect to loop \p L.
104 bool isLoopInvariant(const Loop &L) const;
105
106 /// Return true if the indexed reference is 'consecutive' in loop \p L.
107 /// An indexed reference is 'consecutive' if the only coefficient that uses
108 /// the loop induction variable is the rightmost one, and the access stride is
109 /// smaller than the cache line size \p CLS. Provide a valid \p Stride value
110 /// if the indexed reference is 'consecutive'.
111 bool isConsecutive(const Loop &L, const SCEV *&Stride, unsigned CLS) const;
112
113 /// Retrieve the index of the subscript corresponding to the given loop \p
114 /// L. Return a zero-based positive index if the subscript index is
115 /// succesfully located and a negative value otherwise. For example given the
116 /// indexed reference 'A[i][2j+1][3k+2]', the call
117 /// 'getSubscriptIndex(loop-k)' would return value 2.
118 int getSubscriptIndex(const Loop &L) const;
119
120 /// Return the coefficient used in the rightmost dimension.
121 const SCEV *getLastCoefficient() const;
122
123 /// Return true if the coefficient corresponding to induction variable of
124 /// loop \p L in the given \p Subscript is zero or is loop invariant in \p L.
125 bool isCoeffForLoopZeroOrInvariant(const SCEV &Subscript,
126 const Loop &L) const;
127
128 /// Verify that the given \p Subscript is 'well formed' (must be a simple add
129 /// recurrence).
130 bool isSimpleAddRecurrence(const SCEV &Subscript, const Loop &L) const;
131
132 /// Return true if the given reference \p Other is definetely aliased with
133 /// the indexed reference represented by this class.
134 bool isAliased(const IndexedReference &Other, AAResults &AA) const;
135
136private:
137 /// True if the reference can be delinearized, false otherwise.
138 bool IsValid = false;
139
140 /// Represent the memory reference instruction.
141 Instruction &StoreOrLoadInst;
142
143 /// The base pointer of the memory reference.
144 const SCEV *BasePointer = nullptr;
145
146 /// The subscript (indexes) of the memory reference.
148
149 /// The dimensions of the memory reference.
151
152 ScalarEvolution &SE;
153};
154
155/// A reference group represents a set of memory references that exhibit
156/// temporal or spacial reuse. Two references belong to the same
157/// reference group with respect to a inner loop L iff:
158/// 1. they have a loop independent dependency, or
159/// 2. they have a loop carried dependence with a small dependence distance
160/// (e.g. less than 2) carried by the inner loop, or
161/// 3. they refer to the same array, and the subscript in their innermost
162/// dimension is less than or equal to 'd' (where 'd' is less than the cache
163/// line size)
164///
165/// Intuitively a reference group represents memory references that access
166/// the same cache line. Conditions 1,2 above account for temporal reuse, while
167/// contition 3 accounts for spacial reuse.
170
171/// \c CacheCost represents the estimated cost of a inner loop as the number of
172/// cache lines used by the memory references it contains.
173/// The 'cache cost' of a loop 'L' in a loop nest 'LN' is computed as the sum of
174/// the cache costs of all of its reference groups when the loop is considered
175/// to be in the innermost position in the nest.
176/// A reference group represents memory references that fall into the same cache
177/// line. Each reference group is analysed with respect to the innermost loop in
178/// a loop nest. The cost of a reference is defined as follow:
179/// - one if it is loop invariant w.r.t the innermost loop,
180/// - equal to the loop trip count divided by the cache line times the
181/// reference stride if the reference stride is less than the cache line
182/// size (CLS), and the coefficient of this loop's index variable used in all
183/// other subscripts is zero (e.g. RefCost = TripCount/(CLS/RefStride))
184/// - equal to the innermost loop trip count if the reference stride is greater
185/// or equal to the cache line size CLS.
188 using LoopTripCountTy = std::pair<const Loop *, unsigned>;
189 using LoopCacheCostTy = std::pair<const Loop *, CacheCostTy>;
190
191public:
192 /// Construct a CacheCost object for the loop nest described by \p Loops.
193 /// The optional parameter \p TRT can be used to specify the max. distance
194 /// between array elements accessed in a loop so that the elements are
195 /// classified to have temporal reuse.
196 CacheCost(const LoopVectorTy &Loops, const LoopInfo &LI, ScalarEvolution &SE,
198 std::optional<unsigned> TRT = std::nullopt);
199
200 /// Create a CacheCost for the loop nest rooted by \p Root.
201 /// The optional parameter \p TRT can be used to specify the max. distance
202 /// between array elements accessed in a loop so that the elements are
203 /// classified to have temporal reuse.
204 static std::unique_ptr<CacheCost>
206 std::optional<unsigned> TRT = std::nullopt);
207
208 /// Return the estimated cost of loop \p L if the given loop is part of the
209 /// loop nest associated with this object. Return -1 otherwise.
210 CacheCostTy getLoopCost(const Loop &L) const {
211 auto IT = llvm::find_if(LoopCosts, [&L](const LoopCacheCostTy &LCC) {
212 return LCC.first == &L;
213 });
214 return (IT != LoopCosts.end()) ? (*IT).second : -1;
215 }
216
217 /// Return the estimated ordered loop costs.
218 ArrayRef<LoopCacheCostTy> getLoopCosts() const { return LoopCosts; }
219
220private:
221 /// Calculate the cache footprint of each loop in the nest (when it is
222 /// considered to be in the innermost position).
223 void calculateCacheFootprint();
224
225 /// Partition store/load instructions in the loop nest into reference groups.
226 /// Two or more memory accesses belong in the same reference group if they
227 /// share the same cache line.
228 bool populateReferenceGroups(ReferenceGroupsTy &RefGroups) const;
229
230 /// Calculate the cost of the given loop \p L assuming it is the innermost
231 /// loop in nest.
232 CacheCostTy computeLoopCacheCost(const Loop &L,
233 const ReferenceGroupsTy &RefGroups) const;
234
235 /// Compute the cost of a representative reference in reference group \p RG
236 /// when the given loop \p L is considered as the innermost loop in the nest.
237 /// The computed cost is an estimate for the number of cache lines used by the
238 /// reference group. The representative reference cost is defined as:
239 /// - equal to one if the reference is loop invariant, or
240 /// - equal to '(TripCount * stride) / cache_line_size' if (a) loop \p L's
241 /// induction variable is used only in the reference subscript associated
242 /// with loop \p L, and (b) the reference stride is less than the cache
243 /// line size, or
244 /// - TripCount otherwise
245 CacheCostTy computeRefGroupCacheCost(const ReferenceGroupTy &RG,
246 const Loop &L) const;
247
248 /// Sort the LoopCosts vector by decreasing cache cost.
249 void sortLoopCosts() {
250 stable_sort(LoopCosts,
251 [](const LoopCacheCostTy &A, const LoopCacheCostTy &B) {
252 return A.second > B.second;
253 });
254 }
255
256private:
257 /// Loops in the loop nest associated with this object.
258 LoopVectorTy Loops;
259
260 /// Trip counts for the loops in the loop nest associated with this object.
261 SmallVector<LoopTripCountTy, 3> TripCounts;
262
263 /// Cache costs for the loops in the loop nest associated with this object.
264 SmallVector<LoopCacheCostTy, 3> LoopCosts;
265
266 /// The max. distance between array elements accessed in a loop so that the
267 /// elements are classified to have temporal reuse.
268 std::optional<unsigned> TRT;
269
270 const LoopInfo &LI;
271 ScalarEvolution &SE;
272 TargetTransformInfo &TTI;
273 AAResults &AA;
274 DependenceInfo &DI;
275};
276
279
280/// Printer pass for the \c CacheCost results.
281class LoopCachePrinterPass : public PassInfoMixin<LoopCachePrinterPass> {
282 raw_ostream &OS;
283
284public:
285 explicit LoopCachePrinterPass(raw_ostream &OS) : OS(OS) {}
286
289
290 static bool isRequired() { return true; }
291};
292
293} // namespace llvm
294
295#endif // LLVM_ANALYSIS_LOOPCACHEANALYSIS_H
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
static cl::opt< ITMode > IT(cl::desc("IT block support"), cl::Hidden, cl::init(DefaultIT), cl::values(clEnumValN(DefaultIT, "arm-default-it", "Generate any type of IT block"), clEnumValN(RestrictedIT, "arm-restrict-it", "Disallow complex IT blocks")))
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
This header defines various interfaces for pass management in LLVM.
This file defines an InstructionCost class that is used when calculating the cost of an instruction,...
This header provides classes for managing per-loop analyses.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition ArrayRef.h:40
CacheCost represents the estimated cost of a inner loop as the number of cache lines used by the memo...
CacheCost(const LoopVectorTy &Loops, const LoopInfo &LI, ScalarEvolution &SE, TargetTransformInfo &TTI, AAResults &AA, DependenceInfo &DI, std::optional< unsigned > TRT=std::nullopt)
Construct a CacheCost object for the loop nest described by Loops.
ArrayRef< LoopCacheCostTy > getLoopCosts() const
Return the estimated ordered loop costs.
static std::unique_ptr< CacheCost > getCacheCost(Loop &Root, LoopStandardAnalysisResults &AR, DependenceInfo &DI, std::optional< unsigned > TRT=std::nullopt)
Create a CacheCost for the loop nest rooted by Root.
friend raw_ostream & operator<<(raw_ostream &OS, const CacheCost &CC)
CacheCostTy getLoopCost(const Loop &L) const
Return the estimated cost of loop L if the given loop is part of the loop nest associated with this o...
DependenceInfo - This class is the main dependence-analysis driver.
Represents a memory reference as a base pointer and a set of indexing operations.
const SCEV * getBasePointer() const
CacheCostTy computeRefCost(const Loop &L, unsigned CLS) const
Compute the cost of the reference w.r.t.
const SCEV * getSubscript(unsigned SubNum) const
const SCEV * getFirstSubscript() const
friend raw_ostream & operator<<(raw_ostream &OS, const IndexedReference &R)
std::optional< bool > hasSpacialReuse(const IndexedReference &Other, unsigned CLS, AAResults &AA) const
Return true/false if the current object and the indexed reference Other are/aren't in the same cache ...
std::optional< bool > hasTemporalReuse(const IndexedReference &Other, unsigned MaxDistance, const Loop &L, DependenceInfo &DI, AAResults &AA) const
Return true if the current object and the indexed reference Other have distance smaller than MaxDista...
IndexedReference(Instruction &StoreOrLoadInst, const LoopInfo &LI, ScalarEvolution &SE)
Construct an indexed reference given a StoreOrLoadInst instruction.
const SCEV * getLastSubscript() const
size_t getNumSubscripts() const
This class provides an interface for updating the loop pass manager based on mutations to the loop ne...
LoopCachePrinterPass(raw_ostream &OS)
PreservedAnalyses run(Loop &L, LoopAnalysisManager &AM, LoopStandardAnalysisResults &AR, LPMUpdater &U)
Represents a single loop in the control flow graph.
Definition LoopInfo.h:40
A set of analyses that are preserved following a run of a transformation pass.
Definition Analysis.h:112
This class represents an analyzed expression in the program.
The main scalar evolution driver.
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
This pass provides access to the codegen interfaces that are needed for IR-level transformations.
This class implements an extremely fast bulk output stream that can only output to a stream.
Definition raw_ostream.h:53
Abstract Attribute helper functions.
Definition Attributor.h:165
This is an optimization pass for GlobalISel generic memory operations.
Definition Types.h:26
void stable_sort(R &&Range)
Definition STLExtras.h:2116
InstructionCost CacheCostTy
SmallVector< std::unique_ptr< IndexedReference >, 8 > ReferenceGroupTy
A reference group represents a set of memory references that exhibit temporal or spacial reuse.
AnalysisManager< Loop, LoopStandardAnalysisResults & > LoopAnalysisManager
The loop analysis manager.
SmallVector< Loop *, 8 > LoopVectorTy
@ Other
Any other memory.
Definition ModRef.h:68
raw_ostream & operator<<(raw_ostream &OS, const APFixedPoint &FX)
void delinearize(ScalarEvolution &SE, const SCEV *Expr, SmallVectorImpl< const SCEV * > &Subscripts, SmallVectorImpl< const SCEV * > &Sizes, const SCEV *ElementSize)
Split this SCEVAddRecExpr into two vectors of SCEVs representing the subscripts and sizes of an array...
auto find_if(R &&Range, UnaryPredicate P)
Provide wrappers to std::find_if which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1772
SmallVector< ReferenceGroupTy, 8 > ReferenceGroupsTy
The adaptor from a function pass to a loop pass computes these analyses and makes them available to t...
A CRTP mix-in to automatically provide informational APIs needed for passes.
Definition PassManager.h:70