LLVM 23.0.0git
ObjectStore.cpp
Go to the documentation of this file.
1//===----------------------------------------------------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
10#include "llvm/ADT/DenseSet.h"
11#include "llvm/ADT/ScopeExit.h"
12#include "llvm/Support/Debug.h"
13#include "llvm/Support/Errc.h"
17#include "llvm/Support/Path.h"
18#include <deque>
19
20using namespace llvm;
21using namespace llvm::cas;
22
23void CASContext::anchor() {}
24void ObjectStore::anchor() {}
25
30
31std::string CASID::toString() const {
32 std::string S;
33 raw_string_ostream(S) << *this;
34 return S;
35}
36
38 uint64_t InternalRef, std::optional<CASID> ID) {
39 OS << Kind << "=" << InternalRef;
40 if (ID)
41 OS << "[" << *ID << "]";
42}
43
44void ReferenceBase::print(raw_ostream &OS, const ObjectHandle &This) const {
45 assert(this == &This);
46 printReferenceBase(OS, "object-handle", InternalRef, std::nullopt);
47}
48
49void ReferenceBase::print(raw_ostream &OS, const ObjectRef &This) const {
50 assert(this == &This);
51
52 std::optional<CASID> ID;
53#if LLVM_ENABLE_ABI_BREAKING_CHECKS
54 if (CAS)
55 ID = CAS->getID(This);
56#endif
57 printReferenceBase(OS, "object-ref", InternalRef, ID);
58}
59
61 std::optional<ObjectHandle> Handle;
62 if (Error E = loadIfExists(Ref).moveInto(Handle))
63 return std::move(E);
64 if (!Handle)
66 "missing object '" + getID(Ref).toString() + "'");
67 return *Handle;
68}
69
70std::unique_ptr<MemoryBuffer>
72 bool RequiresNullTerminator) {
74 toStringRef(getData(Node, RequiresNullTerminator)), Name,
75 RequiresNullTerminator);
76}
77
79 SmallVectorImpl<ObjectRef> &Refs) const {
81 Refs.push_back(Ref);
82 return Error::success();
83 }));
84}
85
87 std::optional<ObjectRef> Ref = getReference(ID);
88 if (!Ref)
90
91 return getProxy(*Ref);
92}
93
95 std::optional<ObjectHandle> H;
96 if (Error E = load(Ref).moveInto(H))
97 return std::move(E);
98
99 return ObjectProxy::load(*this, Ref, *H);
100}
101
104 std::optional<ObjectHandle> H;
105 if (Error E = loadIfExists(Ref).moveInto(H))
106 return std::move(E);
107 if (!H)
108 return std::nullopt;
109 return ObjectProxy::load(*this, Ref, *H);
110}
111
113 return createStringError(std::make_error_code(std::errc::invalid_argument),
114 "unknown object '" + ID.toString() + "'");
115}
116
124
127 std::optional<sys::fs::file_status> Status) {
128 // TODO: For the on-disk CAS implementation use cloning to store it as a
129 // standalone file if the file-system supports it and the file is large.
130 uint64_t Size = Status ? Status->getSize() : -1;
131 auto Buffer = MemoryBuffer::getOpenFile(FD, /*Filename=*/"", Size);
132 if (!Buffer)
133 return errorCodeToError(Buffer.getError());
134
135 return store({}, arrayRefFromStringRef<char>((*Buffer)->getBuffer()));
136}
137
139 auto BypassSandbox = sys::sandbox::scopedDisable();
140
142 if (Error E = sys::fs::openNativeFileForRead(Path).moveInto(FD))
143 return E;
144 auto CloseFile = scope_exit([&FD] { sys::fs::closeFile(FD); });
145 return storeFromOpenFile(FD);
146}
147
149 auto BypassSandbox = sys::sandbox::scopedDisable();
150
151 SmallString<256> TmpPath;
152 SmallString<256> Model;
153 Model += sys::path::parent_path(Path);
154 sys::path::append(Model, "%%%%%%%.tmp");
155 if (std::error_code EC = sys::fs::createUniqueFile(Model, TmpPath))
156 return createFileError(Model, EC);
157 auto RemoveTmpFile = scope_exit([&] {
158 if (!TmpPath.empty())
159 sys::fs::remove(TmpPath);
160 });
161
163 std::error_code EC;
164 raw_fd_ostream FS(TmpPath, EC);
165 if (EC)
166 return createFileError(TmpPath, EC);
167 FS.write(Data.begin(), Data.size());
168 FS.close();
169 if (FS.has_error())
170 return createFileError(TmpPath, FS.error());
171
172 if (std::error_code EC = sys::fs::rename(TmpPath, Path))
173 return createFileError(Path, EC);
174 TmpPath.clear();
175
176 return Error::success();
177}
178
180 SmallDenseSet<ObjectRef> ValidatedRefs;
181 SmallVector<ObjectRef, 16> RefsToValidate;
182 RefsToValidate.push_back(Root);
183
184 while (!RefsToValidate.empty()) {
185 ObjectRef Ref = RefsToValidate.pop_back_val();
186 auto [I, Inserted] = ValidatedRefs.insert(Ref);
187 if (!Inserted)
188 continue; // already validated.
189 if (Error E = validateObject(getID(Ref)))
190 return E;
192 if (!Obj)
193 return Obj.takeError();
194 if (Error E = forEachRef(*Obj, [&RefsToValidate](ObjectRef R) -> Error {
195 RefsToValidate.push_back(R);
196 return Error::success();
197 }))
198 return E;
199 }
200 return Error::success();
201}
202
205 // Copy the full CAS tree from upstream with depth-first ordering to ensure
206 // all the child nodes are available in downstream CAS before inserting
207 // current object. This uses a similar algorithm as
208 // `OnDiskGraphDB::importFullTree` but doesn't assume the upstream CAS schema
209 // so it can be used to import from any other ObjectStore reguardless of the
210 // CAS schema.
211
212 // There is no work to do if importing from self.
213 if (this == &Upstream)
214 return Other;
215
216 /// Keeps track of the state of visitation for current node and all of its
217 /// parents. Upstream Cursor holds information only from upstream CAS.
218 struct UpstreamCursor {
221 size_t RefsCount;
222 std::deque<ObjectRef> Refs;
223 };
225 /// PrimaryNodeStack holds the ObjectRef of the current CAS, with nodes either
226 /// just stored in the CAS or nodes already exists in the current CAS.
227 SmallVector<ObjectRef, 128> PrimaryRefStack;
228 /// A map from upstream ObjectRef to current ObjectRef.
230
231 auto enqueueNode = [&](ObjectRef Ref, ObjectHandle Node) {
232 unsigned NumRefs = Upstream.getNumRefs(Node);
233 std::deque<ObjectRef> Refs;
234 for (unsigned I = 0; I < NumRefs; ++I)
235 Refs.push_back(Upstream.readRef(Node, I));
236
237 CursorStack.push_back({Ref, Node, NumRefs, std::move(Refs)});
238 };
239
240 auto UpstreamHandle = Upstream.load(Other);
241 if (!UpstreamHandle)
242 return UpstreamHandle.takeError();
243 enqueueNode(Other, *UpstreamHandle);
244
245 while (!CursorStack.empty()) {
246 UpstreamCursor &Cur = CursorStack.back();
247 if (Cur.Refs.empty()) {
248 // Copy the node data into the primary store.
249 // The bottom of \p PrimaryRefStack contains the ObjectRef for the
250 // current node.
251 assert(PrimaryRefStack.size() >= Cur.RefsCount);
252 auto Refs = ArrayRef(PrimaryRefStack)
253 .slice(PrimaryRefStack.size() - Cur.RefsCount);
254 auto NewNode = store(Refs, Upstream.getData(Cur.Node));
255 if (!NewNode)
256 return NewNode.takeError();
257
258 // Remove the current node and its IDs from the stack.
259 PrimaryRefStack.truncate(PrimaryRefStack.size() - Cur.RefsCount);
260
261 // Push new node into created objects.
262 PrimaryRefStack.push_back(*NewNode);
263 CreatedObjects.try_emplace(Cur.Ref, *NewNode);
264
265 // Pop the cursor in the end after all uses.
266 CursorStack.pop_back();
267 continue;
268 }
269
270 // Check if the node exists already.
271 auto CurrentID = Cur.Refs.front();
272 Cur.Refs.pop_front();
273 auto Ref = CreatedObjects.find(CurrentID);
274 if (Ref != CreatedObjects.end()) {
275 // If exists already, just need to enqueue the primary node.
276 PrimaryRefStack.push_back(Ref->second);
277 continue;
278 }
279
280 // Load child.
281 auto PrimaryID = Upstream.load(CurrentID);
282 if (LLVM_UNLIKELY(!PrimaryID))
283 return PrimaryID.takeError();
284
285 enqueueNode(CurrentID, *PrimaryID);
286 }
287
288 assert(PrimaryRefStack.size() == 1);
289 return PrimaryRefStack.front();
290}
291
292std::unique_ptr<MemoryBuffer>
294 bool RequiresNullTerminator) const {
295 return CAS->getMemoryBuffer(H, Name, RequiresNullTerminator);
296}
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
AMDGPU Mark last scratch load
#define LLVM_UNLIKELY(EXPR)
Definition Compiler.h:336
#define LLVM_DUMP_METHOD
Mark debug helper function definitions like dump() that should not be stripped from debug builds.
Definition Compiler.h:661
This file defines the DenseSet and SmallDenseSet classes.
#define I(x, y, z)
Definition MD5.cpp:57
#define H(x, y, z)
Definition MD5.cpp:56
static void printReferenceBase(raw_ostream &OS, StringRef Kind, uint64_t InternalRef, std::optional< CASID > ID)
This file contains the declaration of the ObjectStore class.
This file defines the make_scope_exit function, which executes user-defined cleanup logic at scope ex...
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition ArrayRef.h:40
iterator find(const_arg_type_t< KeyT > Val)
Definition DenseMap.h:178
std::pair< iterator, bool > try_emplace(KeyT &&Key, Ts &&...Args)
Definition DenseMap.h:256
iterator end()
Definition DenseMap.h:81
Lightweight error class with error context and mandatory checking.
Definition Error.h:159
static ErrorSuccess success()
Create a success value.
Definition Error.h:336
Tagged union holding either a T or a Error.
Definition Error.h:485
static ErrorOr< std::unique_ptr< MemoryBuffer > > getOpenFile(sys::fs::file_t FD, const Twine &Filename, uint64_t FileSize, bool RequiresNullTerminator=true, bool IsVolatile=false, std::optional< Align > Alignment=std::nullopt)
Given an already-open file descriptor, read the file and return a MemoryBuffer.
static std::unique_ptr< MemoryBuffer > getMemBuffer(StringRef InputData, StringRef BufferName="", bool RequiresNullTerminator=true)
Open the specified memory range as a MemoryBuffer.
Implements a dense probed hash-table based set with some number of buckets stored inline.
Definition DenseSet.h:291
SmallString - A SmallString is just a SmallVector with methods and accessors that make it work better...
Definition SmallString.h:26
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
void truncate(size_type N)
Like resize, but requires that N is less than size().
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
StringRef - Represent a constant reference to a string, i.e.
Definition StringRef.h:55
Unique identifier for a CAS object.
Definition CASID.h:58
void dump() const
void print(raw_ostream &OS) const
Print CASID.
Definition CASID.h:68
LLVM_ABI std::string toString() const
Return a printable string for CASID.
Handle to a loaded object in a ObjectStore instance.
void print(raw_ostream &OS) const
Print internal ref and/or CASID. Only suitable for debugging.
LLVM_DUMP_METHOD void dump() const
static ObjectProxy load(ObjectStore &CAS, ObjectRef Ref, ObjectHandle Node)
std::unique_ptr< MemoryBuffer > getMemoryBuffer(StringRef Name="", bool RequiresNullTerminator=true) const
Reference to an object in an ObjectStore instance.
void print(raw_ostream &OS) const
Print internal ref and/or CASID. Only suitable for debugging.
LLVM_DUMP_METHOD void dump() const
Expected< ObjectHandle > load(ObjectRef Ref)
Like loadIfExists but returns an error if the object is missing.
LLVM_ABI_FOR_TEST Expected< ObjectProxy > createProxy(ArrayRef< ObjectRef > Refs, StringRef Data)
Helper functions to store object and returns a ObjectProxy.
virtual void print(raw_ostream &) const
Print the ObjectStore internals for debugging purpose.
virtual Error validateObject(const CASID &ID)=0
Validate the underlying object referred by CASID.
LLVM_ABI Expected< ObjectRef > importObject(ObjectStore &Upstream, ObjectRef Other)
Import object from another CAS.
Expected< ObjectRef > storeFromOpenFile(sys::fs::file_t FD, std::optional< sys::fs::file_status > Status=std::nullopt)
Default implementation reads FD and calls storeNode().
Expected< std::optional< ObjectProxy > > getProxyIfExists(ObjectRef Ref)
virtual Expected< ObjectRef > store(ArrayRef< ObjectRef > Refs, ArrayRef< char > Data)=0
Store object into ObjectStore.
virtual ArrayRef< char > getData(ObjectHandle Node, bool RequiresNullTerminator=false) const =0
virtual Error exportDataToFile(ObjectHandle Node, StringRef Path) const
Exports the data of an object to a file path.
virtual CASID getID(ObjectRef Ref) const =0
Get an ID for Ref.
static Error createUnknownObjectError(const CASID &ID)
virtual Expected< std::optional< ObjectHandle > > loadIfExists(ObjectRef Ref)=0
Load the object referenced by Ref.
virtual Expected< ObjectRef > storeFromFile(StringRef Path)
Stores the data of a file into ObjectStore.
Error validateTree(ObjectRef Ref)
Validate the whole node tree.
virtual ObjectRef readRef(ObjectHandle Node, size_t I) const =0
ObjectStore(const CASContext &Context)
virtual Expected< ObjectRef > storeFromOpenFileImpl(sys::fs::file_t FD, std::optional< sys::fs::file_status > Status)
Get ObjectRef from open file.
virtual void readRefs(ObjectHandle Node, SmallVectorImpl< ObjectRef > &Refs) const
Read all the refs from object in a SmallVector.
virtual size_t getNumRefs(ObjectHandle Node) const =0
std::unique_ptr< MemoryBuffer > getMemoryBuffer(ObjectHandle Node, StringRef Name="", bool RequiresNullTerminator=true)
Get a lifetime-extended MemoryBuffer pointing at Data.
virtual std::optional< ObjectRef > getReference(const CASID &ID) const =0
Get an existing reference to the object called ID.
LLVM_ABI Expected< ObjectProxy > getProxy(const CASID &ID)
Create ObjectProxy from CASID. If the object doesn't exist, get an error.
virtual Error forEachRef(ObjectHandle Node, function_ref< Error(ObjectRef)> Callback) const =0
Methods for handling objects.
void print(raw_ostream &OS, const ObjectHandle &This) const
std::pair< iterator, bool > insert(const ValueT &V)
Definition DenseSet.h:202
A raw_ostream that writes to a file descriptor.
This class implements an extremely fast bulk output stream that can only output to a stream.
Definition raw_ostream.h:53
A raw_ostream that writes to an std::string.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition CallingConv.h:24
LLVM_ABI std::error_code closeFile(file_t &F)
Close the file object.
LLVM_ABI std::error_code rename(const Twine &from, const Twine &to)
Rename from to to.
LLVM_ABI std::error_code createUniqueFile(const Twine &Model, int &ResultFD, SmallVectorImpl< char > &ResultPath, OpenFlags Flags=OF_None, unsigned Mode=all_read|all_write)
Create a uniquely named file.
Definition Path.cpp:872
LLVM_ABI std::error_code remove(const Twine &path, bool IgnoreNonExisting=true)
Remove path.
LLVM_ABI Expected< file_t > openNativeFileForRead(const Twine &Name, OpenFlags Flags=OF_None, SmallVectorImpl< char > *RealPath=nullptr)
Opens the file with the given name in a read-only mode, returning its open file descriptor.
LLVM_ABI StringRef parent_path(StringRef path LLVM_LIFETIME_BOUND, Style style=Style::native)
Get parent path.
Definition Path.cpp:468
LLVM_ABI void append(SmallVectorImpl< char > &path, const Twine &a, const Twine &b="", const Twine &c="", const Twine &d="")
Append to path.
Definition Path.cpp:457
ScopedSetting scopedDisable()
Definition IOSandbox.h:36
This is an optimization pass for GlobalISel generic memory operations.
Definition Types.h:26
Error createFileError(const Twine &F, Error E)
Concatenate a source file path and/or name with an Error.
Definition Error.h:1399
ArrayRef< CharT > arrayRefFromStringRef(StringRef Input)
Construct a string ref from an array ref of unsigned chars.
scope_exit(Callable) -> scope_exit< Callable >
Error createStringError(std::error_code EC, char const *Fmt, const Ts &... Vals)
Create formatted StringError object.
Definition Error.h:1305
@ invalid_argument
Definition Errc.h:56
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition Debug.cpp:207
@ Ref
The access may reference the value stored in memory.
Definition ModRef.h:32
@ Other
Any other memory.
Definition ModRef.h:68
FunctionAddr VTableAddr uintptr_t uintptr_t Data
Definition InstrProf.h:189
ArrayRef(const T &OneElt) -> ArrayRef< T >
std::string toString(const APInt &I, unsigned Radix, bool Signed, bool formatAsCLiteral=false, bool UpperCase=true, bool InsertSeparators=false)
LLVM_ABI Error errorCodeToError(std::error_code EC)
Helper for converting an std::error_code to a Error.
Definition Error.cpp:107
void consumeError(Error Err)
Consume a Error without doing anything.
Definition Error.h:1083
StringRef toStringRef(bool B)
Construct a string ref from a boolean.