LLVM 23.0.0git
UnifiedOnDiskCache.h
Go to the documentation of this file.
1//===----------------------------------------------------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
9#ifndef LLVM_CAS_UNIFIEDONDISKCACHE_H
10#define LLVM_CAS_UNIFIEDONDISKCACHE_H
11
14#include <atomic>
15
16namespace llvm::cas::ondisk {
17
19
20/// A unified CAS nodes and key-value database, using on-disk storage for both.
21/// It manages storage growth and provides APIs for garbage collection.
22///
23/// High-level properties:
24/// * While \p UnifiedOnDiskCache is open on a directory, by any process, the
25/// storage size in that directory will keep growing unrestricted. For data to
26/// become eligible for garbage-collection there should be no open instances
27/// of \p UnifiedOnDiskCache for that directory, by any process.
28/// * Garbage-collection needs to be triggered explicitly by the client. It can
29/// be triggered on a directory concurrently, at any time and by any process,
30/// without affecting any active readers/writers, in the same process or other
31/// processes.
32///
33/// Usage patterns should be that an instance of \p UnifiedOnDiskCache is open
34/// for a limited period of time, e.g. for the duration of a build operation.
35/// For long-living processes that need periodic access to a
36/// \p UnifiedOnDiskCache, the client should devise a scheme where access is
37/// performed within some defined period. For example, if a service is designed
38/// to continuously wait for requests that access a \p UnifiedOnDiskCache, it
39/// could keep the instance alive while new requests are coming in but close it
40/// after a time period in which there are no new requests.
41class UnifiedOnDiskCache {
42public:
43 /// The \p OnDiskGraphDB instance for the open directory.
44 OnDiskGraphDB &getGraphDB() { return *PrimaryGraphDB; }
45
46 /// The \p OnDiskGraphDB instance for the open directory.
47 const OnDiskGraphDB &getGraphDB() const { return *PrimaryGraphDB; }
48
49 /// The \p OnDiskGraphDB instance for the open directory.
50 OnDiskKeyValueDB &getKeyValueDB() { return *PrimaryKVDB; }
51
52 /// The \p OnDiskGraphDB instance for the open directory.
53 const OnDiskKeyValueDB &getKeyValueDB() const { return *PrimaryKVDB; }
54
55 /// Open a \p UnifiedOnDiskCache instance for a directory.
56 ///
57 /// \param Path directory for the on-disk database. The directory will be
58 /// created if it doesn't exist.
59 /// \param SizeLimit Optional size for limiting growth. This has an effect for
60 /// when the instance is closed.
61 /// \param HashName Identifier name for the hashing algorithm that is going to
62 /// be used.
63 /// \param HashByteSize Size for the object digest hash bytes.
64 /// \param FaultInPolicy Controls how nodes are copied to primary store. This
65 /// is recorded at creation time and subsequent opens need to pass the same
66 /// policy otherwise the \p open will fail.
68 open(StringRef Path, std::optional<uint64_t> SizeLimit, StringRef HashName,
69 unsigned HashByteSize,
70 OnDiskGraphDB::FaultInPolicy FaultInPolicy =
72
73 /// Validate the data in \p Path, if needed to ensure correctness.
74 ///
75 /// Note: if invalid data is detected and \p AllowRecovery is true, then
76 /// recovery requires exclusive access to the CAS and it is an error to
77 /// attempt recovery if there is concurrent use of the CAS.
78 ///
79 /// \param Path directory for the on-disk database.
80 /// \param HashName Identifier name for the hashing algorithm that is going to
81 /// be used.
82 /// \param HashByteSize Size for the object digest hash bytes.
83 /// \param CheckHash Whether to validate hashes match the data.
84 /// \param AllowRecovery Whether to automatically recover from invalid data by
85 /// marking the files for garbage collection.
86 /// \param ForceValidation Whether to force validation to occur even if it
87 /// should not be necessary.
88 /// \param LLVMCasBinary If provided, validation is performed out-of-process
89 /// using the given \c llvm-cas executable which protects against crashes
90 /// during validation. Otherwise validation is performed in-process.
91 ///
92 /// \returns \c Valid if the data is already valid, \c Recovered if data
93 /// was invalid but has been cleared, \c Skipped if validation is not needed,
94 /// or an \c Error if validation cannot be performed or if the data is left
95 /// in an invalid state because \p AllowRecovery is false.
97 validateIfNeeded(StringRef Path, StringRef HashName, unsigned HashByteSize,
98 bool CheckHash, OnDiskGraphDB::HashingFuncT HashFn,
99 bool AllowRecovery, bool ForceValidation,
100 std::optional<StringRef> LLVMCasBinary);
101
102 /// Validate the action cache only.
104
105 /// This is called implicitly at destruction time, so it is not required for a
106 /// client to call this. After calling \p close the only method that is valid
107 /// to call is \p needsGarbageCollection.
108 ///
109 /// \param CheckSizeLimit if true it will check whether the primary store has
110 /// exceeded its intended size limit. If false the check is skipped even if a
111 /// \p SizeLimit was passed to the \p open call.
112 LLVM_ABI_FOR_TEST Error close(bool CheckSizeLimit = true);
113
114 /// Set the size for limiting growth. This has an effect for when the instance
115 /// is closed.
116 LLVM_ABI_FOR_TEST void setSizeLimit(std::optional<uint64_t> SizeLimit);
117
118 /// \returns the storage size of the cache data.
120
121 /// \returns whether the primary store has exceeded the intended size limit.
122 /// This can return false even if the overall size of the opened directory is
123 /// over the \p SizeLimit passed to \p open. To know whether garbage
124 /// collection needs to be triggered or not, call \p needsGarbaseCollection.
126
127 /// \returns whether there are unused data that can be deleted using a
128 /// \p collectGarbage call.
129 bool needsGarbageCollection() const { return NeedsGarbageCollection; }
130
131 /// Remove any unused data from the directory at \p Path. If there are no such
132 /// data the operation is a no-op.
133 ///
134 /// This can be called concurrently, regardless of whether there is an open
135 /// \p UnifiedOnDiskCache instance or not; it has no effect on readers/writers
136 /// in the same process or other processes.
137 ///
138 /// It is recommended that garbage-collection is triggered concurrently in the
139 /// background, so that it has minimal effect on the workload of the process.
142
143 /// Remove unused data from the current UnifiedOnDiskCache.
145
146 /// Helper function to convert the value stored in KeyValueDB and ObjectID.
148
149 using ValueBytes = std::array<char, sizeof(uint64_t)>;
151
153
154private:
155 friend class OnDiskGraphDB;
156 friend class OnDiskKeyValueDB;
157 UnifiedOnDiskCache();
158
160 faultInFromUpstreamKV(ArrayRef<uint8_t> Key);
161
162 /// \returns the storage size of the primary directory.
163 uint64_t getPrimaryStorageSize() const;
164
165 std::string RootPath;
166 std::atomic<uint64_t> SizeLimit;
167
168 int LockFD = -1;
169
170 std::atomic<bool> NeedsGarbageCollection;
171 std::string PrimaryDBDir;
172
173 std::unique_ptr<OnDiskGraphDB> UpstreamGraphDB;
174 std::unique_ptr<OnDiskGraphDB> PrimaryGraphDB;
175
176 std::unique_ptr<OnDiskKeyValueDB> UpstreamKVDB;
177 std::unique_ptr<OnDiskKeyValueDB> PrimaryKVDB;
178
179 std::shared_ptr<ondisk::OnDiskCASLogger> Logger = nullptr;
180};
181
182} // namespace llvm::cas::ondisk
183
184#endif // LLVM_CAS_UNIFIEDONDISKCACHE_H
#define LLVM_ABI_FOR_TEST
Definition Compiler.h:218
static cl::opt< unsigned > SizeLimit("eif-limit", cl::init(6), cl::Hidden, cl::desc("Size limit in Hexagon early if-conversion"))
This declares OnDiskGraphDB, an ondisk CAS database with a fixed length hash.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition ArrayRef.h:40
Lightweight error class with error context and mandatory checking.
Definition Error.h:159
Tagged union holding either a T or a Error.
Definition Error.h:485
Logging utility - given an ordered specification of features, and assuming a scalar reward,...
StringRef - Represent a constant reference to a string, i.e.
Definition StringRef.h:55
LLVM Value Representation.
Definition Value.h:75
Reference to a node.
Interface for logging low-level on-disk cas operations.
FaultInPolicy
How to fault-in nodes if an upstream database is used.
@ FullTree
Copy the the entire graph of a node.
function_ref< void( ArrayRef< ArrayRef< uint8_t > >, ArrayRef< char >, SmallVectorImpl< uint8_t > &)> HashingFuncT
Hashing function type for validation.
An on-disk key-value data store with the following properties:
OnDiskGraphDB & getGraphDB()
The OnDiskGraphDB instance for the open directory.
LLVM_ABI_FOR_TEST uint64_t getStorageSize() const
static LLVM_ABI_FOR_TEST ValueBytes getValueFromObjectID(ObjectID ID)
static LLVM_ABI_FOR_TEST Expected< std::unique_ptr< UnifiedOnDiskCache > > open(StringRef Path, std::optional< uint64_t > SizeLimit, StringRef HashName, unsigned HashByteSize, OnDiskGraphDB::FaultInPolicy FaultInPolicy=OnDiskGraphDB::FaultInPolicy::FullTree)
Open a UnifiedOnDiskCache instance for a directory.
LLVM_ABI_FOR_TEST Error close(bool CheckSizeLimit=true)
This is called implicitly at destruction time, so it is not required for a client to call this.
LLVM_ABI_FOR_TEST Error validateActionCache() const
Validate the action cache only.
const OnDiskKeyValueDB & getKeyValueDB() const
The OnDiskGraphDB instance for the open directory.
static LLVM_ABI_FOR_TEST ObjectID getObjectIDFromValue(ArrayRef< char > Value)
Helper function to convert the value stored in KeyValueDB and ObjectID.
const OnDiskGraphDB & getGraphDB() const
The OnDiskGraphDB instance for the open directory.
static Expected< ValidationResult > validateIfNeeded(StringRef Path, StringRef HashName, unsigned HashByteSize, bool CheckHash, OnDiskGraphDB::HashingFuncT HashFn, bool AllowRecovery, bool ForceValidation, std::optional< StringRef > LLVMCasBinary)
Validate the data in Path, if needed to ensure correctness.
LLVM_ABI_FOR_TEST bool hasExceededSizeLimit() const
OnDiskKeyValueDB & getKeyValueDB()
The OnDiskGraphDB instance for the open directory.
std::array< char, sizeof(uint64_t)> ValueBytes
Error collectGarbage()
Remove unused data from the current UnifiedOnDiskCache.
LLVM_ABI_FOR_TEST void setSizeLimit(std::optional< uint64_t > SizeLimit)
Set the size for limiting growth.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition CallingConv.h:24
LLVM_ATTRIBUTE_VISIBILITY_DEFAULT AnalysisKey InnerAnalysisManagerProxy< AnalysisManagerT, IRUnitT, ExtraArgTs... >::Key