LLVM 22.0.0git
CachePruning.cpp
Go to the documentation of this file.
1//===-CachePruning.cpp - LLVM Cache Directory Pruning ---------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file implements the pruning of a directory based on least recently used.
10//
11//===----------------------------------------------------------------------===//
12
14#include "llvm/ADT/StringRef.h"
15#include "llvm/Support/Debug.h"
16#include "llvm/Support/Errc.h"
17#include "llvm/Support/Error.h"
19#include "llvm/Support/Path.h"
22
23#define DEBUG_TYPE "cache-pruning"
24
25#include <set>
26#include <system_error>
27
28using namespace llvm;
29
30namespace {
31struct FileInfo {
34 std::string Path;
35
36 /// Used to determine which files to prune first. Also used to determine
37 /// set membership, so must take into account all fields.
38 bool operator<(const FileInfo &Other) const {
39 return std::tie(Time, Other.Size, Path) <
40 std::tie(Other.Time, Size, Other.Path);
41 }
42};
43} // anonymous namespace
44
45/// Write a new timestamp file with the given path. This is used for the pruning
46/// interval option.
47static void writeTimestampFile(StringRef TimestampFile) {
48 std::error_code EC;
49 raw_fd_ostream Out(TimestampFile.str(), EC, sys::fs::OF_None);
50}
51
53 if (Duration.empty())
54 return make_error<StringError>("Duration must not be empty",
56
57 StringRef NumStr = Duration.slice(0, Duration.size()-1);
58 uint64_t Num;
59 if (NumStr.getAsInteger(0, Num))
60 return make_error<StringError>("'" + NumStr + "' not an integer",
62
63 switch (Duration.back()) {
64 case 's':
65 return std::chrono::seconds(Num);
66 case 'm':
67 return std::chrono::minutes(Num);
68 case 'h':
69 return std::chrono::hours(Num);
70 default:
72 "' must end with one of 's', 'm' or 'h'",
74 }
75}
76
79 CachePruningPolicy Policy;
80 std::pair<StringRef, StringRef> P = {"", PolicyStr};
81 while (!P.second.empty()) {
82 P = P.second.split(':');
83
85 std::tie(Key, Value) = P.first.split('=');
86 if (Key == "prune_interval") {
87 auto DurationOrErr = parseDuration(Value);
88 if (!DurationOrErr)
89 return DurationOrErr.takeError();
90 Policy.Interval = *DurationOrErr;
91 } else if (Key == "prune_after") {
92 auto DurationOrErr = parseDuration(Value);
93 if (!DurationOrErr)
94 return DurationOrErr.takeError();
95 Policy.Expiration = *DurationOrErr;
96 } else if (Key == "cache_size") {
97 if (Value.back() != '%')
98 return make_error<StringError>("'" + Value + "' must be a percentage",
100 StringRef SizeStr = Value.drop_back();
102 if (SizeStr.getAsInteger(0, Size))
103 return make_error<StringError>("'" + SizeStr + "' not an integer",
105 if (Size > 100)
106 return make_error<StringError>("'" + SizeStr +
107 "' must be between 0 and 100",
110 } else if (Key == "cache_size_bytes") {
111 uint64_t Mult = 1;
112 switch (tolower(Value.back())) {
113 case 'k':
114 Mult = 1024;
115 Value = Value.drop_back();
116 break;
117 case 'm':
118 Mult = 1024 * 1024;
119 Value = Value.drop_back();
120 break;
121 case 'g':
122 Mult = 1024 * 1024 * 1024;
123 Value = Value.drop_back();
124 break;
125 }
127 if (Value.getAsInteger(0, Size))
128 return make_error<StringError>("'" + Value + "' not an integer",
130 Policy.MaxSizeBytes = Size * Mult;
131 } else if (Key == "cache_size_files") {
132 if (Value.getAsInteger(0, Policy.MaxSizeFiles))
133 return make_error<StringError>("'" + Value + "' not an integer",
135 } else {
136 return make_error<StringError>("Unknown key: '" + Key + "'",
138 }
139 }
140
141 return Policy;
142}
143
144/// Prune the cache of files that haven't been accessed in a long time.
146 const std::vector<std::unique_ptr<MemoryBuffer>> &Files) {
147 using namespace std::chrono;
148
149 if (Path.empty())
150 return false;
151
152 bool isPathDir;
153 if (sys::fs::is_directory(Path, isPathDir))
154 return false;
155
156 if (!isPathDir)
157 return false;
158
160 std::min(Policy.MaxSizePercentageOfAvailableSpace, 100u);
161
162 if (Policy.Expiration == seconds(0) &&
164 Policy.MaxSizeBytes == 0 && Policy.MaxSizeFiles == 0) {
165 LLVM_DEBUG(dbgs() << "No pruning settings set, exit early\n");
166 // Nothing will be pruned, early exit
167 return false;
168 }
169
170 // Try to stat() the timestamp file.
171 SmallString<128> TimestampFile(Path);
172 sys::path::append(TimestampFile, "llvmcache.timestamp");
173 sys::fs::file_status FileStatus;
174 const auto CurrentTime = system_clock::now();
175 if (auto EC = sys::fs::status(TimestampFile, FileStatus)) {
177 // If the timestamp file wasn't there, create one now.
178 writeTimestampFile(TimestampFile);
179 } else {
180 // Unknown error?
181 return false;
182 }
183 } else {
184 if (!Policy.Interval)
185 return false;
186 if (Policy.Interval != seconds(0)) {
187 // Check whether the time stamp is older than our pruning interval.
188 // If not, do nothing.
189 const auto TimeStampModTime = FileStatus.getLastModificationTime();
190 auto TimeStampAge = CurrentTime - TimeStampModTime;
191 if (TimeStampAge <= *Policy.Interval) {
192 LLVM_DEBUG(dbgs() << "Timestamp file too recent ("
193 << duration_cast<seconds>(TimeStampAge).count()
194 << "s old), do not prune.\n");
195 return false;
196 }
197 }
198 // Write a new timestamp file so that nobody else attempts to prune.
199 // There is a benign race condition here, if two processes happen to
200 // notice at the same time that the timestamp is out-of-date.
201 writeTimestampFile(TimestampFile);
202 }
203
204 // Keep track of files to delete to get below the size limit.
205 // Order by time of last use so that recently used files are preserved.
206 std::set<FileInfo> FileInfos;
207 uint64_t TotalSize = 0;
208
209 // Walk the entire directory cache, looking for unused files.
210 std::error_code EC;
211 SmallString<128> CachePathNative;
212 sys::path::native(Path, CachePathNative);
213 // Walk all of the files within this directory.
214 for (sys::fs::directory_iterator File(CachePathNative, EC), FileEnd;
215 File != FileEnd && !EC; File.increment(EC)) {
216 // Ignore filenames not beginning with "llvmcache-" or "Thin-". This
217 // includes the timestamp file as well as any files created by the user.
218 // This acts as a safeguard against data loss if the user specifies the
219 // wrong directory as their cache directory.
220 StringRef filename = sys::path::filename(File->path());
221 if (!filename.starts_with("llvmcache-") && !filename.starts_with("Thin-"))
222 continue;
223
224 // Look at this file. If we can't stat it, there's nothing interesting
225 // there.
226 ErrorOr<sys::fs::basic_file_status> StatusOrErr = File->status();
227 if (!StatusOrErr) {
228 LLVM_DEBUG(dbgs() << "Ignore " << File->path() << " (can't stat)\n");
229 continue;
230 }
231
232 // If the file hasn't been used recently enough, delete it
233 const auto FileAccessTime = StatusOrErr->getLastAccessedTime();
234 auto FileAge = CurrentTime - FileAccessTime;
235 if (Policy.Expiration != seconds(0) && FileAge > Policy.Expiration) {
236 LLVM_DEBUG(dbgs() << "Remove " << File->path() << " ("
237 << duration_cast<seconds>(FileAge).count()
238 << "s old)\n");
239 sys::fs::remove(File->path());
240 continue;
241 }
242
243 // Leave it here for now, but add it to the list of size-based pruning.
244 TotalSize += StatusOrErr->getSize();
245 FileInfos.insert({FileAccessTime, StatusOrErr->getSize(), File->path()});
246 }
247
248 auto FileInfo = FileInfos.begin();
249 size_t NumFiles = FileInfos.size();
250
251 auto RemoveCacheFile = [&]() {
252 // Remove the file.
253 sys::fs::remove(FileInfo->Path);
254 // Update size
255 TotalSize -= FileInfo->Size;
256 NumFiles--;
257 LLVM_DEBUG(dbgs() << " - Remove " << FileInfo->Path << " (size "
258 << FileInfo->Size << "), new occupancy is " << TotalSize
259 << "%\n");
260 ++FileInfo;
261 };
262
263 // files.size() is greater the number of inputs by one. However, a timestamp
264 // file is created and stored in the cache directory if --thinlto-cache-policy
265 // option is used. Therefore, files.size() is used as ActualNums.
266 const size_t ActualNums = Files.size();
267 if (Policy.MaxSizeFiles && ActualNums > Policy.MaxSizeFiles)
269 << "ThinLTO cache pruning happens since the number of created files ("
270 << ActualNums << ") exceeds the maximum number of files ("
271 << Policy.MaxSizeFiles
272 << "); consider adjusting --thinlto-cache-policy\n";
273
274 // Prune for number of files.
275 if (Policy.MaxSizeFiles)
276 while (NumFiles > Policy.MaxSizeFiles)
277 RemoveCacheFile();
278
279 // Prune for size now if needed
280 if (Policy.MaxSizePercentageOfAvailableSpace > 0 || Policy.MaxSizeBytes > 0) {
281 auto ErrOrSpaceInfo = sys::fs::disk_space(Path);
282 if (!ErrOrSpaceInfo) {
283 report_fatal_error("Can't get available size");
284 }
285 sys::fs::space_info SpaceInfo = ErrOrSpaceInfo.get();
286 auto AvailableSpace = TotalSize + SpaceInfo.free;
287
288 if (Policy.MaxSizePercentageOfAvailableSpace == 0)
290 if (Policy.MaxSizeBytes == 0)
291 Policy.MaxSizeBytes = AvailableSpace;
292 auto TotalSizeTarget = std::min<uint64_t>(
293 AvailableSpace * Policy.MaxSizePercentageOfAvailableSpace / 100ull,
294 Policy.MaxSizeBytes);
295
296 LLVM_DEBUG(dbgs() << "Occupancy: " << ((100 * TotalSize) / AvailableSpace)
297 << "% target is: "
298 << Policy.MaxSizePercentageOfAvailableSpace << "%, "
299 << Policy.MaxSizeBytes << " bytes\n");
300
301 size_t ActualSizes = 0;
302 for (const auto &File : Files)
303 if (File)
304 ActualSizes += File->getBufferSize();
305
306 if (ActualSizes > TotalSizeTarget)
308 << "ThinLTO cache pruning happens since the total size of the cache "
309 "files consumed by the current link job ("
310 << ActualSizes << " bytes) exceeds maximum cache size ("
311 << TotalSizeTarget
312 << " bytes); consider adjusting --thinlto-cache-policy\n";
313
314 // Remove the oldest accessed files first, till we get below the threshold.
315 while (TotalSize > TotalSizeTarget && FileInfo != FileInfos.end())
316 RemoveCacheFile();
317 }
318 return true;
319}
static Expected< std::chrono::seconds > parseDuration(StringRef Duration)
static void writeTimestampFile(StringRef TimestampFile)
Write a new timestamp file with the given path.
#define P(N)
#define LLVM_DEBUG(...)
Definition Debug.h:114
Represents either an error or a value T.
Definition ErrorOr.h:56
Tagged union holding either a T or a Error.
Definition Error.h:485
SmallString - A SmallString is just a SmallVector with methods and accessors that make it work better...
Definition SmallString.h:26
StringRef - Represent a constant reference to a string, i.e.
Definition StringRef.h:55
bool getAsInteger(unsigned Radix, T &Result) const
Parse the current string as an integer of the specified radix.
Definition StringRef.h:480
std::string str() const
str - Get the contents as an std::string.
Definition StringRef.h:233
bool starts_with(StringRef Prefix) const
Check if this string starts with the given Prefix.
Definition StringRef.h:269
LLVM Value Representation.
Definition Value.h:75
static LLVM_ABI raw_ostream & warning()
Convenience method for printing "warning: " to stderr.
Definition WithColor.cpp:85
A raw_ostream that writes to a file descriptor.
LLVM_ABI TimePoint getLastModificationTime() const
The file modification time as reported from the underlying file system.
directory_iterator - Iterates through the entries in path.
Represents the result of a call to sys::fs::status().
Definition FileSystem.h:222
LLVM_ABI ErrorOr< space_info > disk_space(const Twine &Path)
Get disk space usage information.
LLVM_ABI std::error_code remove(const Twine &path, bool IgnoreNonExisting=true)
Remove path.
LLVM_ABI std::error_code status(const Twine &path, file_status &result, bool follow=true)
Get file status as if by POSIX stat().
LLVM_ABI bool is_directory(const basic_file_status &status)
Does status represent a directory?
Definition Path.cpp:1092
LLVM_ABI StringRef filename(StringRef path LLVM_LIFETIME_BOUND, Style style=Style::native)
Get filename.
Definition Path.cpp:577
LLVM_ABI void append(SmallVectorImpl< char > &path, const Twine &a, const Twine &b="", const Twine &c="", const Twine &d="")
Append to path.
Definition Path.cpp:456
std::chrono::time_point< std::chrono::system_clock, D > TimePoint
A time point on the system clock.
Definition Chrono.h:34
This is an optimization pass for GlobalISel generic memory operations.
bool operator<(int64_t V1, const APSInt &V2)
Definition APSInt.h:362
LLVM_ABI std::error_code inconvertibleErrorCode()
The value returned by this function can be returned from convertToErrorCode for Error values where no...
Definition Error.cpp:98
LLVM_ABI Expected< CachePruningPolicy > parseCachePruningPolicy(StringRef PolicyStr)
Parse the given string as a cache pruning policy.
@ no_such_file_or_directory
Definition Errc.h:65
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition Debug.cpp:207
LLVM_ABI void report_fatal_error(Error Err, bool gen_crash_diag=true)
Definition Error.cpp:167
LLVM_ATTRIBUTE_VISIBILITY_DEFAULT AnalysisKey InnerAnalysisManagerProxy< AnalysisManagerT, IRUnitT, ExtraArgTs... >::Key
Error make_error(ArgTs &&... Args)
Make a Error instance representing failure using the given error info type.
Definition Error.h:340
@ Other
Any other memory.
Definition ModRef.h:68
LLVM_ABI bool pruneCache(StringRef Path, CachePruningPolicy Policy, const std::vector< std::unique_ptr< MemoryBuffer > > &Files={})
Peform pruning using the supplied policy, returns true if pruning occurred, i.e.
auto count(R &&Range, const E &Element)
Wrapper function around std::count to count the number of times an element Element occurs in the give...
Definition STLExtras.h:1934
Policy for the pruneCache() function.
uint64_t MaxSizeFiles
The maximum number of files in the cache directory.
std::optional< std::chrono::seconds > Interval
The pruning interval.
std::chrono::seconds Expiration
The expiration for a file.
uint64_t MaxSizeBytes
The maximum size for the cache directory in bytes.
unsigned MaxSizePercentageOfAvailableSpace
The maximum size for the cache directory, in terms of percentage of the available space on the disk.
space_info - Self explanatory.
Definition FileSystem.h:76