LLVM 22.0.0git
ObjectStore.h
Go to the documentation of this file.
1//===- llvm/CAS/ObjectStore.h -----------------------------------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
9#ifndef LLVM_CAS_OBJECTSTORE_H
10#define LLVM_CAS_OBJECTSTORE_H
11
12#include "llvm/ADT/StringRef.h"
13#include "llvm/CAS/CASID.h"
15#include "llvm/Support/Error.h"
17#include <cstddef>
18
19namespace llvm {
20
21class MemoryBuffer;
22template <typename T> class unique_function;
23
24namespace cas {
25
26class ObjectStore;
27class ObjectProxy;
28
29/// Content-addressable storage for objects.
30///
31/// Conceptually, objects are stored in a "unique set".
32///
33/// - Objects are immutable ("value objects") that are defined by their
34/// content. They are implicitly deduplicated by content.
35/// - Each object has a unique identifier (UID) that's derived from its content,
36/// called a \a CASID.
37/// - This UID is a fixed-size (strong) hash of the transitive content of a
38/// CAS object.
39/// - It's comparable between any two CAS instances that have the same \a
40/// CASIDContext::getHashSchemaIdentifier().
41/// - The UID can be printed (e.g., \a CASID::toString()) and it can parsed
42/// by the same or a different CAS instance with \a
43/// ObjectStore::parseID().
44/// - An object can be looked up by content or by UID.
45/// - \a store() is "get-or-create" methods, writing an object if it
46/// doesn't exist yet, and return a ref to it in any case.
47/// - \a loadObject(const CASID&) looks up an object by its UID.
48/// - Objects can reference other objects, forming an arbitrary DAG.
49///
50/// The \a ObjectStore interface has a few ways of referencing objects:
51///
52/// - \a ObjectRef encapsulates a reference to something in the CAS. It is an
53/// opaque type that references an object inside a specific CAS. It is
54/// implementation defined if the underlying object exists or not for an
55/// ObjectRef, and it can used to speed up CAS lookup as an implementation
56/// detail. However, you don't know anything about the underlying objects.
57/// "Loading" the object is a separate step that may not have happened
58/// yet, and which can fail (e.g. due to filesystem corruption) or introduce
59/// latency (if downloading from a remote store).
60/// - \a ObjectHandle encapulates a *loaded* object in the CAS. You need one of
61/// these to inspect the content of an object: to look at its stored
62/// data and references. This is internal to CAS implementation and not
63/// availble from CAS public APIs.
64/// - \a CASID: the UID for an object in the CAS, obtained through \a
65/// ObjectStore::getID() or \a ObjectStore::parseID(). This is a valid CAS
66/// identifier, but may reference an object that is unknown to this CAS
67/// instance.
68/// - \a ObjectProxy pairs an ObjectHandle (subclass) with a ObjectStore, and
69/// wraps access APIs to avoid having to pass extra parameters. It is the
70/// object used for accessing underlying data and refs by CAS users.
71///
72/// Both ObjectRef and ObjectHandle are lightweight, wrapping a `uint64_t` and
73/// are only valid with the associated ObjectStore instance.
74///
75/// There are a few options for accessing content of objects, with different
76/// lifetime tradeoffs:
77///
78/// - \a getData() accesses data without exposing lifetime at all.
79/// - \a getMemoryBuffer() returns a \a MemoryBuffer whose lifetime
80/// is independent of the CAS (it can live longer).
81/// - \a getDataString() return StringRef with lifetime is guaranteed to last as
82/// long as \a ObjectStore.
83/// - \a readRef() and \a forEachRef() iterate through the references in an
84/// object. There is no lifetime assumption.
86 friend class ObjectProxy;
87 void anchor();
88
89public:
90 /// Get a \p CASID from a \p ID, which should have been generated by \a
91 /// CASID::print(). This succeeds as long as \a validateID() would pass. The
92 /// object may be unknown to this CAS instance.
93 ///
94 /// TODO: Remove, and update callers to use \a validateID() or \a
95 /// extractHashFromID().
97
98 /// Store object into ObjectStore.
100 ArrayRef<char> Data) = 0;
101 /// Get an ID for \p Ref.
102 virtual CASID getID(ObjectRef Ref) const = 0;
103
104 /// Get an existing reference to the object called \p ID.
105 ///
106 /// Returns \c None if the object is not stored in this CAS.
107 virtual std::optional<ObjectRef> getReference(const CASID &ID) const = 0;
108
109 /// \returns true if the object is directly available from the local CAS, for
110 /// implementations that have this kind of distinction.
112
113 /// Validate the underlying object referred by CASID.
114 virtual Error validate(const CASID &ID) = 0;
115
116protected:
117 /// Load the object referenced by \p Ref.
118 ///
119 /// Errors if the object cannot be loaded.
120 /// \returns \c std::nullopt if the object is missing from the CAS.
122
123 /// Like \c loadIfExists but returns an error if the object is missing.
125
126 /// Get the size of some data.
128
129 /// Methods for handling objects. CAS implementations need to override to
130 /// provide functions to access stored CAS objects and references.
132 function_ref<Error(ObjectRef)> Callback) const = 0;
133 virtual ObjectRef readRef(ObjectHandle Node, size_t I) const = 0;
134 virtual size_t getNumRefs(ObjectHandle Node) const = 0;
136 bool RequiresNullTerminator = false) const = 0;
137
138 /// Get ObjectRef from open file.
139 virtual Expected<ObjectRef>
141 std::optional<sys::fs::file_status> Status);
142
143 /// Get a lifetime-extended StringRef pointing at \p Data.
144 ///
145 /// Depending on the CAS implementation, this may involve in-memory storage
146 /// overhead.
148 return toStringRef(getData(Node));
149 }
150
151 /// Get a lifetime-extended MemoryBuffer pointing at \p Data.
152 ///
153 /// Depending on the CAS implementation, this may involve in-memory storage
154 /// overhead.
155 std::unique_ptr<MemoryBuffer>
157 bool RequiresNullTerminator = true);
158
159 /// Read all the refs from object in a SmallVector.
160 virtual void readRefs(ObjectHandle Node,
161 SmallVectorImpl<ObjectRef> &Refs) const;
162
163 /// Allow ObjectStore implementations to create internal handles.
164#define MAKE_CAS_HANDLE_CONSTRUCTOR(HandleKind) \
165 HandleKind make##HandleKind(uint64_t InternalRef) const { \
166 return HandleKind(*this, InternalRef); \
167 }
168 MAKE_CAS_HANDLE_CONSTRUCTOR(ObjectHandle)
170#undef MAKE_CAS_HANDLE_CONSTRUCTOR
171
172public:
173 /// Helper functions to store object and returns a ObjectProxy.
174 Expected<ObjectProxy> createProxy(ArrayRef<ObjectRef> Refs, StringRef Data);
175
176 /// Store object from StringRef.
179 return store(Refs, arrayRefFromStringRef<char>(String));
180 }
181
182 /// Default implementation reads \p FD and calls \a storeNode(). Does not
183 /// take ownership of \p FD; the caller is responsible for closing it.
184 ///
185 /// If \p Status is sent in it is to be treated as a hint. Implementations
186 /// must protect against the file size potentially growing after the status
187 /// was taken (i.e., they cannot assume that an mmap will be null-terminated
188 /// where \p Status implies).
189 ///
190 /// Returns the \a CASID and the size of the file.
193 std::optional<sys::fs::file_status> Status = std::nullopt) {
194 return storeFromOpenFileImpl(FD, Status);
195 }
196
197 static Error createUnknownObjectError(const CASID &ID);
198
199 /// Create ObjectProxy from CASID. If the object doesn't exist, get an error.
201 /// Create ObjectProxy from ObjectRef. If the object can't be loaded, get an
202 /// error.
204
205 /// \returns \c std::nullopt if the object is missing from the CAS.
207
208 /// Read the data from \p Data into \p OS.
210 uint64_t MaxBytes = -1ULL) const {
212 assert(Offset < Data.size() && "Expected valid offset");
213 Data = Data.drop_front(Offset).take_front(MaxBytes);
214 OS << toStringRef(Data);
215 return Data.size();
216 }
217
218 /// Validate the whole node tree.
220
221 /// Get CASContext
222 const CASContext &getContext() const { return Context; }
223
224 virtual ~ObjectStore() = default;
225
226protected:
227 ObjectStore(const CASContext &Context) : Context(Context) {}
228
229private:
230 const CASContext &Context;
231};
232
233/// Reference to an abstract hierarchical node, with data and references.
234/// Reference is passed by value and is expected to be valid as long as the \a
235/// ObjectStore is.
237public:
238 ObjectStore &getCAS() const { return *CAS; }
239 CASID getID() const { return CAS->getID(Ref); }
240 ObjectRef getRef() const { return Ref; }
241 size_t getNumReferences() const { return CAS->getNumRefs(H); }
242 ObjectRef getReference(size_t I) const { return CAS->readRef(H, I); }
243
244 operator CASID() const { return getID(); }
245 CASID getReferenceID(size_t I) const {
246 std::optional<CASID> ID = getCAS().getID(getReference(I));
247 assert(ID && "Expected reference to be first-class object");
248 return *ID;
249 }
250
251 /// Visit each reference in order, returning an error from \p Callback to
252 /// stop early.
254 return CAS->forEachRef(H, Callback);
255 }
256
257 std::unique_ptr<MemoryBuffer>
259 bool RequiresNullTerminator = true) const;
260
261 /// Get the content of the node. Valid as long as the CAS is valid.
262 StringRef getData() const { return CAS->getDataString(H); }
263
264 friend bool operator==(const ObjectProxy &Proxy, ObjectRef Ref) {
265 return Proxy.getRef() == Ref;
266 }
267 friend bool operator==(ObjectRef Ref, const ObjectProxy &Proxy) {
268 return Proxy.getRef() == Ref;
269 }
270 friend bool operator!=(const ObjectProxy &Proxy, ObjectRef Ref) {
271 return !(Proxy.getRef() == Ref);
272 }
273 friend bool operator!=(ObjectRef Ref, const ObjectProxy &Proxy) {
274 return !(Proxy.getRef() == Ref);
275 }
276
277public:
278 ObjectProxy() = delete;
279
281 return ObjectProxy(CAS, Ref, Node);
282 }
283
284private:
286 : CAS(&CAS), Ref(Ref), H(H) {}
287
288 ObjectStore *CAS;
289 ObjectRef Ref;
290 ObjectHandle H;
291};
292
293std::unique_ptr<ObjectStore> createInMemoryCAS();
294
295} // namespace cas
296} // namespace llvm
297
298#endif // LLVM_CAS_OBJECTSTORE_H
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
AMDGPU Mark last scratch load
std::string Name
#define I(x, y, z)
Definition: MD5.cpp:58
#define H(x, y, z)
Definition: MD5.cpp:57
#define MAKE_CAS_HANDLE_CONSTRUCTOR(HandleKind)
Allow ObjectStore implementations to create internal handles.
Definition: ObjectStore.h:164
raw_pwrite_stream & OS
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: ArrayRef.h:41
Lightweight error class with error context and mandatory checking.
Definition: Error.h:159
Tagged union holding either a T or a Error.
Definition: Error.h:485
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: SmallVector.h:574
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:55
Context for CAS identifiers.
Definition: CASID.h:28
Unique identifier for a CAS object.
Definition: CASID.h:58
Handle to a loaded object in a ObjectStore instance.
Definition: CASReference.h:150
Reference to an abstract hierarchical node, with data and references.
Definition: ObjectStore.h:236
static ObjectProxy load(ObjectStore &CAS, ObjectRef Ref, ObjectHandle Node)
Definition: ObjectStore.h:280
friend bool operator==(ObjectRef Ref, const ObjectProxy &Proxy)
Definition: ObjectStore.h:267
std::unique_ptr< MemoryBuffer > getMemoryBuffer(StringRef Name="", bool RequiresNullTerminator=true) const
size_t getNumReferences() const
Definition: ObjectStore.h:241
friend bool operator!=(const ObjectProxy &Proxy, ObjectRef Ref)
Definition: ObjectStore.h:270
CASID getID() const
Definition: ObjectStore.h:239
friend bool operator==(const ObjectProxy &Proxy, ObjectRef Ref)
Definition: ObjectStore.h:264
Error forEachReference(function_ref< Error(ObjectRef)> Callback) const
Visit each reference in order, returning an error from Callback to stop early.
Definition: ObjectStore.h:253
StringRef getData() const
Get the content of the node. Valid as long as the CAS is valid.
Definition: ObjectStore.h:262
friend bool operator!=(ObjectRef Ref, const ObjectProxy &Proxy)
Definition: ObjectStore.h:273
CASID getReferenceID(size_t I) const
Definition: ObjectStore.h:245
ObjectStore & getCAS() const
Definition: ObjectStore.h:238
ObjectRef getRef() const
Definition: ObjectStore.h:240
ObjectRef getReference(size_t I) const
Definition: ObjectStore.h:242
Reference to an object in an ObjectStore instance.
Definition: CASReference.h:108
Content-addressable storage for objects.
Definition: ObjectStore.h:85
Expected< ObjectProxy > createProxy(ArrayRef< ObjectRef > Refs, StringRef Data)
Helper functions to store object and returns a ObjectProxy.
Expected< ObjectRef > storeFromOpenFile(sys::fs::file_t FD, std::optional< sys::fs::file_status > Status=std::nullopt)
Default implementation reads FD and calls storeNode().
Definition: ObjectStore.h:192
Expected< std::optional< ObjectProxy > > getProxyIfExists(ObjectRef Ref)
Definition: ObjectStore.cpp:99
virtual Expected< bool > isMaterialized(ObjectRef Ref) const =0
virtual Expected< ObjectRef > store(ArrayRef< ObjectRef > Refs, ArrayRef< char > Data)=0
Store object into ObjectStore.
virtual ArrayRef< char > getData(ObjectHandle Node, bool RequiresNullTerminator=false) const =0
virtual CASID getID(ObjectRef Ref) const =0
Get an ID for Ref.
static Error createUnknownObjectError(const CASID &ID)
virtual Expected< std::optional< ObjectHandle > > loadIfExists(ObjectRef Ref)=0
Load the object referenced by Ref.
virtual Error validate(const CASID &ID)=0
Validate the underlying object referred by CASID.
const CASContext & getContext() const
Get CASContext.
Definition: ObjectStore.h:222
virtual ~ObjectStore()=default
Error validateTree(ObjectRef Ref)
Validate the whole node tree.
Expected< ObjectRef > storeFromString(ArrayRef< ObjectRef > Refs, StringRef String)
Store object from StringRef.
Definition: ObjectStore.h:177
uint64_t readData(ObjectHandle Node, raw_ostream &OS, uint64_t Offset=0, uint64_t MaxBytes=-1ULL) const
Read the data from Data into OS.
Definition: ObjectStore.h:209
virtual ObjectRef readRef(ObjectHandle Node, size_t I) const =0
ObjectStore(const CASContext &Context)
Definition: ObjectStore.h:227
virtual Expected< CASID > parseID(StringRef ID)=0
Get a CASID from a ID, which should have been generated by CASID::print().
virtual uint64_t getDataSize(ObjectHandle Node) const =0
Get the size of some data.
virtual Expected< ObjectRef > storeFromOpenFileImpl(sys::fs::file_t FD, std::optional< sys::fs::file_status > Status)
Get ObjectRef from open file.
StringRef getDataString(ObjectHandle Node)
Get a lifetime-extended StringRef pointing at Data.
Definition: ObjectStore.h:147
virtual void readRefs(ObjectHandle Node, SmallVectorImpl< ObjectRef > &Refs) const
Read all the refs from object in a SmallVector.
Definition: ObjectStore.cpp:74
virtual size_t getNumRefs(ObjectHandle Node) const =0
std::unique_ptr< MemoryBuffer > getMemoryBuffer(ObjectHandle Node, StringRef Name="", bool RequiresNullTerminator=true)
Get a lifetime-extended MemoryBuffer pointing at Data.
Definition: ObjectStore.cpp:67
virtual std::optional< ObjectRef > getReference(const CASID &ID) const =0
Get an existing reference to the object called ID.
Expected< ObjectProxy > getProxy(const CASID &ID)
Create ObjectProxy from CASID. If the object doesn't exist, get an error.
Definition: ObjectStore.cpp:82
virtual Error forEachRef(ObjectHandle Node, function_ref< Error(ObjectRef)> Callback) const =0
Methods for handling objects.
An efficient, type-erasing, non-owning reference to a callable.
This class implements an extremely fast bulk output stream that can only output to a stream.
Definition: raw_ostream.h:53
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition: CallingConv.h:24
std::unique_ptr< ObjectStore > createInMemoryCAS()
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
@ Offset
Definition: DWP.cpp:477
@ Ref
The access may reference the value stored in memory.