LLVM 23.0.0git
LoopCacheAnalysis.h
Go to the documentation of this file.
1//===- llvm/Analysis/LoopCacheAnalysis.h ------------------------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8///
9/// \file
10/// This file defines the interface for the loop cache analysis.
11///
12//===----------------------------------------------------------------------===//
13
14#ifndef LLVM_ANALYSIS_LOOPCACHEANALYSIS_H
15#define LLVM_ANALYSIS_LOOPCACHEANALYSIS_H
16
18#include "llvm/IR/PassManager.h"
20#include <optional>
21
22namespace llvm {
23
24class AAResults;
25class DependenceInfo;
26class Instruction;
27class LPMUpdater;
28class raw_ostream;
29class LoopInfo;
30class Loop;
31class ScalarEvolution;
32class SCEV;
34
37
38/// Represents a memory reference as a base pointer and a set of indexing
39/// operations. For example given the array reference A[i][2j+1][3k+2] in a
40/// 3-dim loop nest:
41/// for(i=0;i<n;++i)
42/// for(j=0;j<m;++j)
43/// for(k=0;k<o;++k)
44/// ... A[i][2j+1][3k+2] ...
45/// We expect:
46/// BasePointer -> A
47/// Subscripts -> [{0,+,1}<%for.i>][{1,+,2}<%for.j>][{2,+,3}<%for.k>]
48/// Sizes -> [m][o][4]
51 const IndexedReference &R);
52
53public:
54 /// Construct an indexed reference given a \p StoreOrLoadInst instruction.
55 LLVM_ABI IndexedReference(Instruction &StoreOrLoadInst, const LoopInfo &LI,
56 ScalarEvolution &SE);
57
58 bool isValid() const { return IsValid; }
59 const SCEV *getBasePointer() const { return BasePointer; }
60 size_t getNumSubscripts() const { return Subscripts.size(); }
61 const SCEV *getSubscript(unsigned SubNum) const {
62 assert(SubNum < getNumSubscripts() && "Invalid subscript number");
63 return Subscripts[SubNum];
64 }
65 const SCEV *getFirstSubscript() const {
66 assert(!Subscripts.empty() && "Expecting non-empty container");
67 return Subscripts.front();
68 }
69 const SCEV *getLastSubscript() const {
70 assert(!Subscripts.empty() && "Expecting non-empty container");
71 return Subscripts.back();
72 }
73
74 /// Return true/false if the current object and the indexed reference \p Other
75 /// are/aren't in the same cache line of size \p CLS. Two references are in
76 /// the same chace line iff the distance between them in the innermost
77 /// dimension is less than the cache line size. Return std::nullopt if unsure.
78 LLVM_ABI std::optional<bool> hasSpacialReuse(const IndexedReference &Other,
79 unsigned CLS,
80 AAResults &AA) const;
81
82 /// Return true if the current object and the indexed reference \p Other
83 /// have distance smaller than \p MaxDistance in the dimension associated with
84 /// the given loop \p L. Return false if the distance is not smaller than \p
85 /// MaxDistance and std::nullopt if unsure.
86 LLVM_ABI std::optional<bool>
87 hasTemporalReuse(const IndexedReference &Other, unsigned MaxDistance,
88 const Loop &L, DependenceInfo &DI, AAResults &AA) const;
89
90 /// Compute the cost of the reference w.r.t. the given loop \p L when it is
91 /// considered in the innermost position in the loop nest.
92 /// The cost is defined as:
93 /// - equal to one if the reference is loop invariant, or
94 /// - equal to '(TripCount * stride) / cache_line_size' if:
95 /// + the reference stride is less than the cache line size, and
96 /// + the coefficient of this loop's index variable used in all other
97 /// subscripts is zero
98 /// - or otherwise equal to 'TripCount'.
99 LLVM_ABI CacheCostTy computeRefCost(const Loop &L, unsigned CLS) const;
100
101private:
102 /// Attempt to delinearize the indexed reference.
103 bool delinearize(const LoopInfo &LI);
104
105 /// Return true if the index reference is invariant with respect to loop \p L.
106 bool isLoopInvariant(const Loop &L) const;
107
108 /// Return true if the indexed reference is 'consecutive' in loop \p L.
109 /// An indexed reference is 'consecutive' if the only coefficient that uses
110 /// the loop induction variable is the rightmost one, and the access stride is
111 /// smaller than the cache line size \p CLS. Provide a valid \p Stride value
112 /// if the indexed reference is 'consecutive'.
113 bool isConsecutive(const Loop &L, const SCEV *&Stride, unsigned CLS) const;
114
115 /// Retrieve the index of the subscript corresponding to the given loop \p
116 /// L. Return a zero-based positive index if the subscript index is
117 /// succesfully located and a negative value otherwise. For example given the
118 /// indexed reference 'A[i][2j+1][3k+2]', the call
119 /// 'getSubscriptIndex(loop-k)' would return value 2.
120 int getSubscriptIndex(const Loop &L) const;
121
122 /// Return the coefficient used in the rightmost dimension.
123 const SCEV *getLastCoefficient() const;
124
125 /// Return true if the coefficient corresponding to induction variable of
126 /// loop \p L in the given \p Subscript is zero or is loop invariant in \p L.
127 bool isCoeffForLoopZeroOrInvariant(const SCEV &Subscript,
128 const Loop &L) const;
129
130 /// Verify that the given \p Subscript is 'well formed' (must be a simple add
131 /// recurrence).
132 bool isSimpleAddRecurrence(const SCEV &Subscript, const Loop &L) const;
133
134 /// Return true if the given reference \p Other is definetely aliased with
135 /// the indexed reference represented by this class.
136 bool isAliased(const IndexedReference &Other, AAResults &AA) const;
137
138private:
139 /// True if the reference can be delinearized, false otherwise.
140 bool IsValid = false;
141
142 /// Represent the memory reference instruction.
143 Instruction &StoreOrLoadInst;
144
145 /// The base pointer of the memory reference.
146 const SCEV *BasePointer = nullptr;
147
148 /// The subscript (indexes) of the memory reference.
150
151 /// The dimensions of the memory reference.
153
154 ScalarEvolution &SE;
155};
156
157/// A reference group represents a set of memory references that exhibit
158/// temporal or spacial reuse. Two references belong to the same
159/// reference group with respect to a inner loop L iff:
160/// 1. they have a loop independent dependency, or
161/// 2. they have a loop carried dependence with a small dependence distance
162/// (e.g. less than 2) carried by the inner loop, or
163/// 3. they refer to the same array, and the subscript in their innermost
164/// dimension is less than or equal to 'd' (where 'd' is less than the cache
165/// line size)
166///
167/// Intuitively a reference group represents memory references that access
168/// the same cache line. Conditions 1,2 above account for temporal reuse, while
169/// contition 3 accounts for spacial reuse.
172
173/// \c CacheCost represents the estimated cost of a inner loop as the number of
174/// cache lines used by the memory references it contains.
175/// The 'cache cost' of a loop 'L' in a loop nest 'LN' is computed as the sum of
176/// the cache costs of all of its reference groups when the loop is considered
177/// to be in the innermost position in the nest.
178/// A reference group represents memory references that fall into the same cache
179/// line. Each reference group is analysed with respect to the innermost loop in
180/// a loop nest. The cost of a reference is defined as follow:
181/// - one if it is loop invariant w.r.t the innermost loop,
182/// - equal to the loop trip count divided by the cache line times the
183/// reference stride if the reference stride is less than the cache line
184/// size (CLS), and the coefficient of this loop's index variable used in all
185/// other subscripts is zero (e.g. RefCost = TripCount/(CLS/RefStride))
186/// - equal to the innermost loop trip count if the reference stride is greater
187/// or equal to the cache line size CLS.
190 using LoopTripCountTy = std::pair<const Loop *, unsigned>;
191 using LoopCacheCostTy = std::pair<const Loop *, CacheCostTy>;
192
193public:
194 /// Construct a CacheCost object for the loop nest described by \p Loops.
195 /// The optional parameter \p TRT can be used to specify the max. distance
196 /// between array elements accessed in a loop so that the elements are
197 /// classified to have temporal reuse.
198 LLVM_ABI CacheCost(const LoopVectorTy &Loops, const LoopInfo &LI,
200 AAResults &AA, DependenceInfo &DI,
201 std::optional<unsigned> TRT = std::nullopt);
202
203 /// Create a CacheCost for the loop nest rooted by \p Root.
204 /// The optional parameter \p TRT can be used to specify the max. distance
205 /// between array elements accessed in a loop so that the elements are
206 /// classified to have temporal reuse.
207 LLVM_ABI static std::unique_ptr<CacheCost>
209 std::optional<unsigned> TRT = std::nullopt);
210
211 /// Return the estimated cost of loop \p L if the given loop is part of the
212 /// loop nest associated with this object. Return -1 otherwise.
213 CacheCostTy getLoopCost(const Loop &L) const {
214 auto IT = llvm::find_if(LoopCosts, [&L](const LoopCacheCostTy &LCC) {
215 return LCC.first == &L;
216 });
217 return (IT != LoopCosts.end()) ? (*IT).second : -1;
218 }
219
220 /// Return the estimated ordered loop costs.
221 ArrayRef<LoopCacheCostTy> getLoopCosts() const { return LoopCosts; }
222
223private:
224 /// Calculate the cache footprint of each loop in the nest (when it is
225 /// considered to be in the innermost position).
226 void calculateCacheFootprint();
227
228 /// Partition store/load instructions in the loop nest into reference groups.
229 /// Two or more memory accesses belong in the same reference group if they
230 /// share the same cache line.
231 bool populateReferenceGroups(ReferenceGroupsTy &RefGroups) const;
232
233 /// Calculate the cost of the given loop \p L assuming it is the innermost
234 /// loop in nest.
235 CacheCostTy computeLoopCacheCost(const Loop &L,
236 const ReferenceGroupsTy &RefGroups) const;
237
238 /// Compute the cost of a representative reference in reference group \p RG
239 /// when the given loop \p L is considered as the innermost loop in the nest.
240 /// The computed cost is an estimate for the number of cache lines used by the
241 /// reference group. The representative reference cost is defined as:
242 /// - equal to one if the reference is loop invariant, or
243 /// - equal to '(TripCount * stride) / cache_line_size' if (a) loop \p L's
244 /// induction variable is used only in the reference subscript associated
245 /// with loop \p L, and (b) the reference stride is less than the cache
246 /// line size, or
247 /// - TripCount otherwise
248 CacheCostTy computeRefGroupCacheCost(const ReferenceGroupTy &RG,
249 const Loop &L) const;
250
251 /// Sort the LoopCosts vector by decreasing cache cost.
252 void sortLoopCosts() {
253 stable_sort(LoopCosts,
254 [](const LoopCacheCostTy &A, const LoopCacheCostTy &B) {
255 return A.second > B.second;
256 });
257 }
258
259private:
260 /// Loops in the loop nest associated with this object.
261 LoopVectorTy Loops;
262
263 /// Trip counts for the loops in the loop nest associated with this object.
264 SmallVector<LoopTripCountTy, 3> TripCounts;
265
266 /// Cache costs for the loops in the loop nest associated with this object.
267 SmallVector<LoopCacheCostTy, 3> LoopCosts;
268
269 /// The max. distance between array elements accessed in a loop so that the
270 /// elements are classified to have temporal reuse.
271 std::optional<unsigned> TRT;
272
273 const LoopInfo &LI;
274 ScalarEvolution &SE;
275 TargetTransformInfo &TTI;
276 AAResults &AA;
277 DependenceInfo &DI;
278};
279
282
283/// Printer pass for the \c CacheCost results.
285 : public RequiredPassInfoMixin<LoopCachePrinterPass> {
286 raw_ostream &OS;
287
288public:
289 explicit LoopCachePrinterPass(raw_ostream &OS) : OS(OS) {}
290
293 LPMUpdater &U);
294};
295
296} // namespace llvm
297
298#endif // LLVM_ANALYSIS_LOOPCACHEANALYSIS_H
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
static cl::opt< ITMode > IT(cl::desc("IT block support"), cl::Hidden, cl::init(DefaultIT), cl::values(clEnumValN(DefaultIT, "arm-default-it", "Generate any type of IT block"), clEnumValN(RestrictedIT, "arm-restrict-it", "Disallow complex IT blocks")))
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
#define LLVM_ABI
Definition Compiler.h:213
This header defines various interfaces for pass management in LLVM.
This file defines an InstructionCost class that is used when calculating the cost of an instruction,...
This header provides classes for managing per-loop analyses.
Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition ArrayRef.h:40
CacheCost represents the estimated cost of a inner loop as the number of cache lines used by the memo...
LLVM_ABI CacheCost(const LoopVectorTy &Loops, const LoopInfo &LI, ScalarEvolution &SE, TargetTransformInfo &TTI, AAResults &AA, DependenceInfo &DI, std::optional< unsigned > TRT=std::nullopt)
Construct a CacheCost object for the loop nest described by Loops.
friend LLVM_ABI raw_ostream & operator<<(raw_ostream &OS, const CacheCost &CC)
ArrayRef< LoopCacheCostTy > getLoopCosts() const
Return the estimated ordered loop costs.
static LLVM_ABI std::unique_ptr< CacheCost > getCacheCost(Loop &Root, LoopStandardAnalysisResults &AR, DependenceInfo &DI, std::optional< unsigned > TRT=std::nullopt)
Create a CacheCost for the loop nest rooted by Root.
CacheCostTy getLoopCost(const Loop &L) const
Return the estimated cost of loop L if the given loop is part of the loop nest associated with this o...
DependenceInfo - This class is the main dependence-analysis driver.
Represents a memory reference as a base pointer and a set of indexing operations.
const SCEV * getBasePointer() const
LLVM_ABI CacheCostTy computeRefCost(const Loop &L, unsigned CLS) const
Compute the cost of the reference w.r.t.
const SCEV * getSubscript(unsigned SubNum) const
const SCEV * getFirstSubscript() const
LLVM_ABI std::optional< bool > hasSpacialReuse(const IndexedReference &Other, unsigned CLS, AAResults &AA) const
Return true/false if the current object and the indexed reference Other are/aren't in the same cache ...
LLVM_ABI std::optional< bool > hasTemporalReuse(const IndexedReference &Other, unsigned MaxDistance, const Loop &L, DependenceInfo &DI, AAResults &AA) const
Return true if the current object and the indexed reference Other have distance smaller than MaxDista...
LLVM_ABI IndexedReference(Instruction &StoreOrLoadInst, const LoopInfo &LI, ScalarEvolution &SE)
Construct an indexed reference given a StoreOrLoadInst instruction.
const SCEV * getLastSubscript() const
friend LLVM_ABI raw_ostream & operator<<(raw_ostream &OS, const IndexedReference &R)
size_t getNumSubscripts() const
This class provides an interface for updating the loop pass manager based on mutations to the loop ne...
LoopCachePrinterPass(raw_ostream &OS)
LLVM_ABI PreservedAnalyses run(Loop &L, LoopAnalysisManager &AM, LoopStandardAnalysisResults &AR, LPMUpdater &U)
Represents a single loop in the control flow graph.
Definition LoopInfo.h:40
A set of analyses that are preserved following a run of a transformation pass.
Definition Analysis.h:112
This class represents an analyzed expression in the program.
The main scalar evolution driver.
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
This pass provides access to the codegen interfaces that are needed for IR-level transformations.
This class implements an extremely fast bulk output stream that can only output to a stream.
Definition raw_ostream.h:53
Abstract Attribute helper functions.
Definition Attributor.h:165
This is an optimization pass for GlobalISel generic memory operations.
void stable_sort(R &&Range)
Definition STLExtras.h:2115
InstructionCost CacheCostTy
SmallVector< std::unique_ptr< IndexedReference >, 8 > ReferenceGroupTy
A reference group represents a set of memory references that exhibit temporal or spacial reuse.
AnalysisManager< Loop, LoopStandardAnalysisResults & > LoopAnalysisManager
The loop analysis manager.
SmallVector< Loop *, 8 > LoopVectorTy
@ Other
Any other memory.
Definition ModRef.h:68
raw_ostream & operator<<(raw_ostream &OS, const APFixedPoint &FX)
LLVM_ABI void delinearize(ScalarEvolution &SE, const SCEV *Expr, SmallVectorImpl< const SCEV * > &Subscripts, SmallVectorImpl< const SCEV * > &Sizes, const SCEV *ElementSize)
Split this SCEVAddRecExpr into two vectors of SCEVs representing the subscripts and sizes of an array...
auto find_if(R &&Range, UnaryPredicate P)
Provide wrappers to std::find_if which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1771
SmallVector< ReferenceGroupTy, 8 > ReferenceGroupsTy
The adaptor from a function pass to a loop pass computes these analyses and makes them available to t...
A CRTP mix-in for passes that should not be skipped.