LLVM 23.0.0git
OnDiskCAS.cpp
Go to the documentation of this file.
1//===----------------------------------------------------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
9#include "BuiltinCAS.h"
10#include "OnDiskCommon.h"
11#include "llvm/ADT/ScopeExit.h"
18#include "llvm/Support/Error.h"
20#include "llvm/Support/Path.h"
21
22using namespace llvm;
23using namespace llvm::cas;
24using namespace llvm::cas::builtin;
25
26namespace {
27
28class OnDiskCAS : public BuiltinCAS {
29public:
30 Expected<ObjectRef> storeImpl(ArrayRef<uint8_t> ComputedHash,
32 ArrayRef<char> Data) final;
33
34 Expected<std::optional<ObjectHandle>> loadIfExists(ObjectRef Ref) final;
35
36 CASID getID(ObjectRef Ref) const final;
37
38 std::optional<ObjectRef> getReference(const CASID &ID) const final;
39
40 Expected<bool> isMaterialized(ObjectRef Ref) const final;
41
42 ArrayRef<char> getDataConst(ObjectHandle Node) const final;
43
44 Expected<ObjectRef> storeFromFile(StringRef Path) final;
45
46 Error exportDataToFile(ObjectHandle Node, StringRef Path) const final;
47
48 void print(raw_ostream &OS) const final;
49 Error validate(bool CheckHash) const final;
50
51 static Expected<std::unique_ptr<OnDiskCAS>> open(StringRef Path);
52
53 OnDiskCAS(std::shared_ptr<ondisk::UnifiedOnDiskCache> UniDB)
54 : UnifiedDB(std::move(UniDB)), DB(&UnifiedDB->getGraphDB()) {}
55
56private:
57 ObjectHandle convertHandle(ondisk::ObjectHandle Node) const {
58 return makeObjectHandle(Node.getOpaqueData());
59 }
60
61 ondisk::ObjectHandle convertHandle(ObjectHandle Node) const {
62 return ondisk::ObjectHandle(Node.getInternalRef(*this));
63 }
64
65 ObjectRef convertRef(ondisk::ObjectID Ref) const {
66 return makeObjectRef(Ref.getOpaqueData());
67 }
68
69 ondisk::ObjectID convertRef(ObjectRef Ref) const {
70 return ondisk::ObjectID::fromOpaqueData(Ref.getInternalRef(*this));
71 }
72
73 size_t getNumRefs(ObjectHandle Node) const final {
74 auto RefsRange = DB->getObjectRefs(convertHandle(Node));
75 return llvm::size(RefsRange);
76 }
77
78 ObjectRef readRef(ObjectHandle Node, size_t I) const final {
79 auto RefsRange = DB->getObjectRefs(convertHandle(Node));
80 return convertRef(RefsRange.begin()[I]);
81 }
82
83 Error forEachRef(ObjectHandle Node,
84 function_ref<Error(ObjectRef)> Callback) const final;
85
86 Error setSizeLimit(std::optional<uint64_t> SizeLimit) final;
87 Expected<std::optional<uint64_t>> getStorageSize() const final;
88 Error pruneStorageData() final;
89
90 OnDiskCAS(std::unique_ptr<ondisk::OnDiskGraphDB> GraphDB)
91 : OwnedDB(std::move(GraphDB)), DB(OwnedDB.get()) {}
92
93 std::unique_ptr<ondisk::OnDiskGraphDB> OwnedDB;
94 std::shared_ptr<ondisk::UnifiedOnDiskCache> UnifiedDB;
95 ondisk::OnDiskGraphDB *DB;
96};
97
98} // end anonymous namespace
99
100void OnDiskCAS::print(raw_ostream &OS) const { DB->print(OS); }
101Error OnDiskCAS::validate(bool CheckHash) const {
102 if (auto E = DB->validate(CheckHash, builtin::hashingFunc))
103 return E;
104
105 return Error::success();
106}
107
108CASID OnDiskCAS::getID(ObjectRef Ref) const {
109 ArrayRef<uint8_t> Hash = DB->getDigest(convertRef(Ref));
110 return CASID::create(&getContext(), toStringRef(Hash));
111}
112
113std::optional<ObjectRef> OnDiskCAS::getReference(const CASID &ID) const {
114 std::optional<ondisk::ObjectID> ObjID =
115 DB->getExistingReference(ID.getHash());
116 if (!ObjID)
117 return std::nullopt;
118 return convertRef(*ObjID);
119}
120
121Expected<bool> OnDiskCAS::isMaterialized(ObjectRef ExternalRef) const {
122 return DB->isMaterialized(convertRef(ExternalRef));
123}
124
125ArrayRef<char> OnDiskCAS::getDataConst(ObjectHandle Node) const {
126 return DB->getObjectData(convertHandle(Node));
127}
128
129Expected<std::optional<ObjectHandle>>
130OnDiskCAS::loadIfExists(ObjectRef ExternalRef) {
131 Expected<std::optional<ondisk::ObjectHandle>> ObjHnd =
132 DB->load(convertRef(ExternalRef));
133 if (!ObjHnd)
134 return ObjHnd.takeError();
135 if (!*ObjHnd)
136 return std::nullopt;
137 return convertHandle(**ObjHnd);
138}
139
140Expected<ObjectRef> OnDiskCAS::storeImpl(ArrayRef<uint8_t> ComputedHash,
142 ArrayRef<char> Data) {
144 IDs.reserve(Refs.size());
145 for (ObjectRef Ref : Refs) {
146 IDs.push_back(convertRef(Ref));
147 }
148
149 auto StoredID = DB->getReference(ComputedHash);
150 if (LLVM_UNLIKELY(!StoredID))
151 return StoredID.takeError();
152 if (Error E = DB->store(*StoredID, IDs, Data))
153 return std::move(E);
154 return convertRef(*StoredID);
155}
156
157Expected<ObjectRef> OnDiskCAS::storeFromFile(StringRef Path) {
159 if (LLVM_UNLIKELY(!Hash))
160 return Hash.takeError();
161 auto StoredID = DB->getReference(*Hash);
162 if (LLVM_UNLIKELY(!StoredID))
163 return StoredID.takeError();
164 if (Error E = DB->storeFile(*StoredID, Path))
165 return E;
166 return convertRef(*StoredID);
167}
168
169Error OnDiskCAS::exportDataToFile(ObjectHandle Node, StringRef Path) const {
170 auto FBData = DB->getInternalFileBackedObjectData(convertHandle(Node));
171 if (!FBData.FileInfo.has_value())
172 return BuiltinCAS::exportDataToFile(Node, Path);
173
174 // Optimized version using the underlying database file.
175 assert(FBData.FileInfo.has_value());
176
177 auto BypassSandbox = sys::sandbox::scopedDisable();
178
179 ondisk::UniqueTempFile UniqueTmp;
180 auto ExpectedPath = UniqueTmp.createAndCopyFrom(sys::path::parent_path(Path),
181 FBData.FileInfo->FilePath);
182 if (!ExpectedPath)
183 return ExpectedPath.takeError();
184 StringRef TmpPath = *ExpectedPath;
185
186 if (FBData.FileInfo->IsFileNulTerminated) {
187 // Remove the nul terminator.
188 int FD;
189 if (std::error_code EC =
191 return createFileError(TmpPath, EC);
192 auto CloseFile = scope_exit([&FD] {
194 sys::fs::closeFile(File);
195 });
196 if (std::error_code EC = sys::fs::resize_file(FD, FBData.Data.size()))
197 return createFileError(TmpPath, EC);
198 }
199
200 if (Error E = UniqueTmp.renameTo(Path))
201 return E;
202
203 return Error::success();
204}
205
206Error OnDiskCAS::forEachRef(ObjectHandle Node,
207 function_ref<Error(ObjectRef)> Callback) const {
208 auto RefsRange = DB->getObjectRefs(convertHandle(Node));
209 for (ondisk::ObjectID Ref : RefsRange) {
210 if (Error E = Callback(convertRef(Ref)))
211 return E;
212 }
213 return Error::success();
214}
215
216Error OnDiskCAS::setSizeLimit(std::optional<uint64_t> SizeLimit) {
217 UnifiedDB->setSizeLimit(SizeLimit);
218 return Error::success();
219}
220
221Expected<std::optional<uint64_t>> OnDiskCAS::getStorageSize() const {
222 return UnifiedDB->getStorageSize();
223}
224
225Error OnDiskCAS::pruneStorageData() { return UnifiedDB->collectGarbage(); }
226
227Expected<std::unique_ptr<OnDiskCAS>> OnDiskCAS::open(StringRef AbsPath) {
228 std::shared_ptr<ondisk::OnDiskCASLogger> Logger;
229#ifndef _WIN32
230 if (Error E =
231 ondisk::OnDiskCASLogger::openIfEnabled(AbsPath).moveInto(Logger))
232 return std::move(E);
233#endif
234
235 Expected<std::unique_ptr<ondisk::OnDiskGraphDB>> DB =
237 sizeof(HashType), /*UpstreamDB=*/nullptr,
238 std::move(Logger));
239 if (!DB)
240 return DB.takeError();
241 return std::unique_ptr<OnDiskCAS>(new OnDiskCAS(std::move(*DB)));
242}
243
245#if LLVM_ENABLE_ONDISK_CAS
246 return true;
247#else
248 return false;
249#endif
250}
251
253#if LLVM_ENABLE_ONDISK_CAS
254 // FIXME: An absolute path isn't really good enough. Should open a directory
255 // and use openat() for files underneath.
256 SmallString<256> AbsPath;
257 Path.toVector(AbsPath);
258 sys::fs::make_absolute(AbsPath);
259
260 return OnDiskCAS::open(AbsPath);
261#else
262 return createStringError(inconvertibleErrorCode(), "OnDiskCAS is disabled");
263#endif /* LLVM_ENABLE_ONDISK_CAS */
264}
265
266std::unique_ptr<ObjectStore>
268 std::shared_ptr<ondisk::UnifiedOnDiskCache> UniDB) {
269 return std::make_unique<OnDiskCAS>(std::move(UniDB));
270}
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
aarch64 promote const
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
#define LLVM_UNLIKELY(EXPR)
Definition Compiler.h:336
static cl::opt< unsigned > SizeLimit("eif-limit", cl::init(6), cl::Hidden, cl::desc("Size limit in Hexagon early if-conversion"))
#define I(x, y, z)
Definition MD5.cpp:57
This file declares interface for OnDiskCASLogger, an interface that can be used to log CAS events to ...
This declares OnDiskGraphDB, an ondisk CAS database with a fixed length hash.
This file defines the make_scope_exit function, which executes user-defined cleanup logic at scope ex...
size_t size() const
size - Get the array size.
Definition ArrayRef.h:142
static ErrorSuccess success()
Create a success value.
Definition Error.h:336
Tagged union holding either a T or a Error.
Definition Error.h:485
Error takeError()
Take ownership of the stored error.
Definition Error.h:612
SmallString - A SmallString is just a SmallVector with methods and accessors that make it work better...
Definition SmallString.h:26
void reserve(size_type N)
void push_back(const T &Elt)
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition Twine.h:82
static Expected< HashT > hashFile(StringRef FilePath)
static CASID create(const CASContext *Context, StringRef Hash)
Create CASID from CASContext and raw hash bytes.
Definition CASID.h:116
static StringRef getHashName()
Get the name of the hash for any table identifiers.
Common base class for builtin CAS implementations using the same CASContext.
Definition BuiltinCAS.h:24
static ObjectID fromOpaqueData(uint64_t Opaque)
static LLVM_ABI Expected< std::unique_ptr< OnDiskCASLogger > > openIfEnabled(const Twine &Path)
Create or append to a log file inside the given CAS directory Path if logging is enabled by the envir...
void print(raw_ostream &OS) const
LLVM_ABI_FOR_TEST Expected< std::optional< ObjectHandle > > load(ObjectID Ref)
Expected< bool > isMaterialized(ObjectID Ref)
Check whether the object associated with Ref is stored in the CAS.
Error validate(bool Deep, HashingFuncT Hasher) const
Validate the OnDiskGraphDB.
object_refs_range getObjectRefs(ObjectHandle Node) const
LLVM_ABI_FOR_TEST Error store(ObjectID ID, ArrayRef< ObjectID > Refs, ArrayRef< char > Data)
Associate data & references with a particular object ID.
ArrayRef< uint8_t > getDigest(ObjectID Ref) const
FileBackedData getInternalFileBackedObjectData(ObjectHandle Node) const
Provides access to the underlying file path, that represents an object leaf node, when available.
LLVM_ABI_FOR_TEST Error storeFile(ObjectID ID, StringRef FilePath)
Associates the data of a file with a particular object ID.
static LLVM_ABI_FOR_TEST Expected< std::unique_ptr< OnDiskGraphDB > > open(StringRef Path, StringRef HashName, unsigned HashByteSize, OnDiskGraphDB *UpstreamDB=nullptr, std::shared_ptr< OnDiskCASLogger > Logger=nullptr, FaultInPolicy Policy=FaultInPolicy::FullTree)
Open the on-disk store from a directory.
LLVM_ABI_FOR_TEST Expected< ObjectID > getReference(ArrayRef< uint8_t > Hash)
Form a reference for the provided hash.
LLVM_ABI_FOR_TEST ArrayRef< char > getObjectData(ObjectHandle Node) const
LLVM_ABI_FOR_TEST std::optional< ObjectID > getExistingReference(ArrayRef< uint8_t > Digest, bool CheckUpstream=true)
Get an existing reference to the object Digest.
Error renameTo(StringRef RenameToPath)
Rename the new unique file to RenameToPath.
Expected< StringRef > createAndCopyFrom(StringRef ParentPath, StringRef CopyFromPath)
Create a new unique file path under ParentPath and copy the contents of CopyFromPath into it.
This class implements an extremely fast bulk output stream that can only output to a stream.
Definition raw_ostream.h:53
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition CallingConv.h:24
void validate(const Triple &TT, const FeatureBitset &FeatureBits)
std::unique_ptr< ObjectStore > createObjectStoreFromUnifiedOnDiskCache(std::shared_ptr< ondisk::UnifiedOnDiskCache > UniDB)
void hashingFunc(ArrayRef< ArrayRef< uint8_t > > Refs, ArrayRef< char > Data, SmallVectorImpl< uint8_t > &Result)
Convenience wrapper for BuiltinObjectHasher.
decltype(HasherT::hash(std::declval< ArrayRef< uint8_t > & >())) HashType
bool isOnDiskCASEnabled()
LLVM_ABI Expected< std::unique_ptr< ObjectStore > > createOnDiskCAS(const Twine &Path)
Create a persistent on-disk path at Path.
llvm::unique_function< void(llvm::Expected< T >)> Callback
A Callback<T> is a void function that accepts Expected<T>.
Definition Transport.h:139
NodeAddr< NodeBase * > Node
Definition RDFGraph.h:381
Context & getContext() const
Definition BasicBlock.h:99
LLVM_ABI std::error_code closeFile(file_t &F)
Close the file object.
@ CD_OpenExisting
CD_OpenExisting - When opening a file:
Definition FileSystem.h:737
std::error_code openFileForWrite(const Twine &Name, int &ResultFD, CreationDisposition Disp=CD_CreateAlways, OpenFlags Flags=OF_None, unsigned Mode=0666)
Opens the file with the given name in a write-only or read-write mode, returning its open file descri...
LLVM_ABI std::error_code make_absolute(SmallVectorImpl< char > &path)
Make path an absolute path.
Definition Path.cpp:960
LLVM_ABI std::error_code resize_file(int FD, uint64_t Size)
Resize path to size.
LLVM_ABI file_t convertFDToNativeFile(int FD)
Converts from a Posix file descriptor number to a native file handle.
Definition FileSystem.h:991
LLVM_ABI StringRef parent_path(StringRef path LLVM_LIFETIME_BOUND, Style style=Style::native)
Get parent path.
Definition Path.cpp:468
ScopedSetting scopedDisable()
Definition IOSandbox.h:36
This is an optimization pass for GlobalISel generic memory operations.
Definition Types.h:26
Error createFileError(const Twine &F, Error E)
Concatenate a source file path and/or name with an Error.
Definition Error.h:1399
Printable print(const GCNRegPressure &RP, const GCNSubtarget *ST=nullptr, unsigned DynamicVGPRBlockSize=0)
auto size(R &&Range, std::enable_if_t< std::is_base_of< std::random_access_iterator_tag, typename std::iterator_traits< decltype(Range.begin())>::iterator_category >::value, void > *=nullptr)
Get the size of a range.
Definition STLExtras.h:1669
scope_exit(Callable) -> scope_exit< Callable >
LLVM_ABI std::error_code inconvertibleErrorCode()
The value returned by this function can be returned from convertToErrorCode for Error values where no...
Definition Error.cpp:94
Error createStringError(std::error_code EC, char const *Fmt, const Ts &... Vals)
Create formatted StringError object.
Definition Error.h:1305
decltype(auto) get(const PointerIntPair< PointerTy, IntBits, IntType, PtrTraits, Info > &Pair)
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
@ Ref
The access may reference the value stored in memory.
Definition ModRef.h:32
FunctionAddr VTableAddr uintptr_t uintptr_t Data
Definition InstrProf.h:189
ArrayRef(const T &OneElt) -> ArrayRef< T >
OutputIt move(R &&Range, OutputIt Out)
Provide wrappers to std::move which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1917
StringRef toStringRef(bool B)
Construct a string ref from a boolean.