LLVM 22.0.0git
OnDiskGraphDB.cpp
Go to the documentation of this file.
1//===----------------------------------------------------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9/// \file
10/// This file implements OnDiskGraphDB, an on-disk CAS nodes database,
11/// independent of a particular hashing algorithm. It only needs to be
12/// configured for the hash size and controls the schema of the storage.
13///
14/// OnDiskGraphDB defines:
15///
16/// - How the data is stored inside database, either as a standalone file, or
17/// allocated inside a datapool.
18/// - How references to other objects inside the same database is stored. They
19/// are stored as internal references, instead of full hash value to save
20/// space.
21/// - How to chain databases together and import objects from upstream
22/// databases.
23///
24/// Here's a top-level description of the current layout:
25///
26/// - db/index.<version>: a file for the "index" table, named by \a
27/// IndexTableName and managed by \a TrieRawHashMap. The contents are 8B
28/// that are accessed atomically, describing the object kind and where/how
29/// it's stored (including an optional file offset). See \a TrieRecord for
30/// more details.
31/// - db/data.<version>: a file for the "data" table, named by \a
32/// DataPoolTableName and managed by \a DataStore. New objects within
33/// TrieRecord::MaxEmbeddedSize are inserted here as \a
34/// TrieRecord::StorageKind::DataPool.
35/// - db/obj.<offset>.<version>: a file storing an object outside the main
36/// "data" table, named by its offset into the "index" table, with the
37/// format of \a TrieRecord::StorageKind::Standalone.
38/// - db/leaf.<offset>.<version>: a file storing a leaf node outside the
39/// main "data" table, named by its offset into the "index" table, with
40/// the format of \a TrieRecord::StorageKind::StandaloneLeaf.
41/// - db/leaf+0.<offset>.<version>: a file storing a null-terminated leaf object
42/// outside the main "data" table, named by its offset into the "index" table,
43/// with the format of \a TrieRecord::StorageKind::StandaloneLeaf0.
44//
45//===----------------------------------------------------------------------===//
46
48#include "OnDiskCommon.h"
49#include "llvm/ADT/DenseMap.h"
50#include "llvm/ADT/ScopeExit.h"
56#include "llvm/Support/Errc.h"
57#include "llvm/Support/Error.h"
61#include "llvm/Support/Path.h"
63#include <atomic>
64#include <mutex>
65#include <optional>
66
67#define DEBUG_TYPE "on-disk-cas"
68
69using namespace llvm;
70using namespace llvm::cas;
71using namespace llvm::cas::ondisk;
72
73static constexpr StringLiteral IndexTableName = "llvm.cas.index";
74static constexpr StringLiteral DataPoolTableName = "llvm.cas.data";
75
76static constexpr StringLiteral IndexFilePrefix = "index.";
77static constexpr StringLiteral DataPoolFilePrefix = "data.";
78
79static constexpr StringLiteral FilePrefixObject = "obj.";
80static constexpr StringLiteral FilePrefixLeaf = "leaf.";
81static constexpr StringLiteral FilePrefixLeaf0 = "leaf+0.";
82
84 if (!ID)
85 return ID.takeError();
86
88 "corrupt object '" + toHex(*ID) + "'");
89}
90
91namespace {
92
93/// Trie record data: 8 bytes, atomic<uint64_t>
94/// - 1-byte: StorageKind
95/// - 7-bytes: DataStoreOffset (offset into referenced file)
96class TrieRecord {
97public:
98 enum class StorageKind : uint8_t {
99 /// Unknown object.
100 Unknown = 0,
101
102 /// data.vX: main pool, full DataStore record.
103 DataPool = 1,
104
105 /// obj.<TrieRecordOffset>.vX: standalone, with a full DataStore record.
106 Standalone = 10,
107
108 /// leaf.<TrieRecordOffset>.vX: standalone, just the data. File contents
109 /// exactly the data content and file size matches the data size. No refs.
110 StandaloneLeaf = 11,
111
112 /// leaf+0.<TrieRecordOffset>.vX: standalone, just the data plus an
113 /// extra null character ('\0'). File size is 1 bigger than the data size.
114 /// No refs.
115 StandaloneLeaf0 = 12,
116 };
117
118 static StringRef getStandaloneFilePrefix(StorageKind SK) {
119 switch (SK) {
120 default:
121 llvm_unreachable("Expected standalone storage kind");
122 case TrieRecord::StorageKind::Standalone:
123 return FilePrefixObject;
124 case TrieRecord::StorageKind::StandaloneLeaf:
125 return FilePrefixLeaf;
126 case TrieRecord::StorageKind::StandaloneLeaf0:
127 return FilePrefixLeaf0;
128 }
129 }
130
131 enum Limits : int64_t {
132 /// Saves files bigger than 64KB standalone instead of embedding them.
133 MaxEmbeddedSize = 64LL * 1024LL - 1,
134 };
135
136 struct Data {
137 StorageKind SK = StorageKind::Unknown;
138 FileOffset Offset;
139 };
140
141 /// Pack StorageKind and Offset from Data into 8 byte TrieRecord.
142 static uint64_t pack(Data D) {
143 assert(D.Offset.get() < (int64_t)(1ULL << 56));
144 uint64_t Packed = uint64_t(D.SK) << 56 | D.Offset.get();
145 assert(D.SK != StorageKind::Unknown || Packed == 0);
146#ifndef NDEBUG
147 Data RoundTrip = unpack(Packed);
148 assert(D.SK == RoundTrip.SK);
149 assert(D.Offset.get() == RoundTrip.Offset.get());
150#endif
151 return Packed;
152 }
153
154 // Unpack TrieRecord into Data.
155 static Data unpack(uint64_t Packed) {
156 Data D;
157 if (!Packed)
158 return D;
159 D.SK = (StorageKind)(Packed >> 56);
160 D.Offset = FileOffset(Packed & (UINT64_MAX >> 8));
161 return D;
162 }
163
164 TrieRecord() : Storage(0) {}
165
166 Data load() const { return unpack(Storage); }
167 bool compare_exchange_strong(Data &Existing, Data New);
168
169private:
170 std::atomic<uint64_t> Storage;
171};
172
173/// DataStore record data: 4B + size? + refs? + data + 0
174/// - 4-bytes: Header
175/// - {0,4,8}-bytes: DataSize (may be packed in Header)
176/// - {0,4,8}-bytes: NumRefs (may be packed in Header)
177/// - NumRefs*{4,8}-bytes: Refs[] (end-ptr is 8-byte aligned)
178/// - <data>
179/// - 1-byte: 0-term
180struct DataRecordHandle {
181 /// NumRefs storage: 4B, 2B, 1B, or 0B (no refs). Or, 8B, for alignment
182 /// convenience to avoid computing padding later.
183 enum class NumRefsFlags : uint8_t {
184 Uses0B = 0U,
185 Uses1B = 1U,
186 Uses2B = 2U,
187 Uses4B = 3U,
188 Uses8B = 4U,
189 Max = Uses8B,
190 };
191
192 /// DataSize storage: 8B, 4B, 2B, or 1B.
193 enum class DataSizeFlags {
194 Uses1B = 0U,
195 Uses2B = 1U,
196 Uses4B = 2U,
197 Uses8B = 3U,
198 Max = Uses8B,
199 };
200
201 /// Kind of ref stored in Refs[]: InternalRef or InternalRef4B.
202 enum class RefKindFlags {
203 InternalRef = 0U,
204 InternalRef4B = 1U,
205 Max = InternalRef4B,
206 };
207
208 enum Counts : int {
209 NumRefsShift = 0,
210 NumRefsBits = 3,
211 DataSizeShift = NumRefsShift + NumRefsBits,
212 DataSizeBits = 2,
213 RefKindShift = DataSizeShift + DataSizeBits,
214 RefKindBits = 1,
215 };
216 static_assert(((UINT32_MAX << NumRefsBits) & (uint32_t)NumRefsFlags::Max) ==
217 0,
218 "Not enough bits");
219 static_assert(((UINT32_MAX << DataSizeBits) & (uint32_t)DataSizeFlags::Max) ==
220 0,
221 "Not enough bits");
222 static_assert(((UINT32_MAX << RefKindBits) & (uint32_t)RefKindFlags::Max) ==
223 0,
224 "Not enough bits");
225
226 /// Layout of the DataRecordHandle and how to decode it.
227 struct LayoutFlags {
228 NumRefsFlags NumRefs;
229 DataSizeFlags DataSize;
230 RefKindFlags RefKind;
231
232 static uint64_t pack(LayoutFlags LF) {
233 unsigned Packed = ((unsigned)LF.NumRefs << NumRefsShift) |
234 ((unsigned)LF.DataSize << DataSizeShift) |
235 ((unsigned)LF.RefKind << RefKindShift);
236#ifndef NDEBUG
237 LayoutFlags RoundTrip = unpack(Packed);
238 assert(LF.NumRefs == RoundTrip.NumRefs);
239 assert(LF.DataSize == RoundTrip.DataSize);
240 assert(LF.RefKind == RoundTrip.RefKind);
241#endif
242 return Packed;
243 }
244 static LayoutFlags unpack(uint64_t Storage) {
245 assert(Storage <= UINT8_MAX && "Expect storage to fit in a byte");
246 LayoutFlags LF;
247 LF.NumRefs =
248 (NumRefsFlags)((Storage >> NumRefsShift) & ((1U << NumRefsBits) - 1));
249 LF.DataSize = (DataSizeFlags)((Storage >> DataSizeShift) &
250 ((1U << DataSizeBits) - 1));
251 LF.RefKind =
252 (RefKindFlags)((Storage >> RefKindShift) & ((1U << RefKindBits) - 1));
253 return LF;
254 }
255 };
256
257 /// Header layout:
258 /// - 1-byte: LayoutFlags
259 /// - 1-byte: 1B size field
260 /// - {0,2}-bytes: 2B size field
261 struct Header {
262 using PackTy = uint32_t;
263 PackTy Packed;
264
265 static constexpr unsigned LayoutFlagsShift =
266 (sizeof(PackTy) - 1) * CHAR_BIT;
267 };
268
269 struct Input {
270 InternalRefArrayRef Refs;
271 ArrayRef<char> Data;
272 };
273
274 LayoutFlags getLayoutFlags() const {
275 return LayoutFlags::unpack(H->Packed >> Header::LayoutFlagsShift);
276 }
277
278 uint64_t getDataSize() const;
279 void skipDataSize(LayoutFlags LF, int64_t &RelOffset) const;
280 uint32_t getNumRefs() const;
281 void skipNumRefs(LayoutFlags LF, int64_t &RelOffset) const;
282 int64_t getRefsRelOffset() const;
283 int64_t getDataRelOffset() const;
284
285 static uint64_t getTotalSize(uint64_t DataRelOffset, uint64_t DataSize) {
286 return DataRelOffset + DataSize + 1;
287 }
288 uint64_t getTotalSize() const {
289 return getDataRelOffset() + getDataSize() + 1;
290 }
291
292 /// Describe the layout of data stored and how to decode from
293 /// DataRecordHandle.
294 struct Layout {
295 explicit Layout(const Input &I);
296
297 LayoutFlags Flags;
298 uint64_t DataSize = 0;
299 uint32_t NumRefs = 0;
300 int64_t RefsRelOffset = 0;
301 int64_t DataRelOffset = 0;
302 uint64_t getTotalSize() const {
303 return DataRecordHandle::getTotalSize(DataRelOffset, DataSize);
304 }
305 };
306
307 InternalRefArrayRef getRefs() const {
308 assert(H && "Expected valid handle");
309 auto *BeginByte = reinterpret_cast<const char *>(H) + getRefsRelOffset();
310 size_t Size = getNumRefs();
311 if (!Size)
312 return InternalRefArrayRef();
313 if (getLayoutFlags().RefKind == RefKindFlags::InternalRef4B)
314 return ArrayRef(reinterpret_cast<const InternalRef4B *>(BeginByte), Size);
315 return ArrayRef(reinterpret_cast<const InternalRef *>(BeginByte), Size);
316 }
317
318 ArrayRef<char> getData() const {
319 assert(H && "Expected valid handle");
320 return ArrayRef(reinterpret_cast<const char *>(H) + getDataRelOffset(),
321 getDataSize());
322 }
323
324 static DataRecordHandle create(function_ref<char *(size_t Size)> Alloc,
325 const Input &I);
326 static Expected<DataRecordHandle>
327 createWithError(function_ref<Expected<char *>(size_t Size)> Alloc,
328 const Input &I);
329 static DataRecordHandle construct(char *Mem, const Input &I);
330
331 static DataRecordHandle get(const char *Mem) {
332 return DataRecordHandle(
333 *reinterpret_cast<const DataRecordHandle::Header *>(Mem));
334 }
335 static Expected<DataRecordHandle>
336 getFromDataPool(const OnDiskDataAllocator &Pool, FileOffset Offset);
337
338 explicit operator bool() const { return H; }
339 const Header &getHeader() const { return *H; }
340
341 DataRecordHandle() = default;
342 explicit DataRecordHandle(const Header &H) : H(&H) {}
343
344private:
345 static DataRecordHandle constructImpl(char *Mem, const Input &I,
346 const Layout &L);
347 const Header *H = nullptr;
348};
349
350/// Proxy for any on-disk object or raw data.
351struct OnDiskContent {
352 std::optional<DataRecordHandle> Record;
353 std::optional<ArrayRef<char>> Bytes;
354};
355
356/// Data loaded inside the memory from standalone file.
357class StandaloneDataInMemory {
358public:
359 OnDiskContent getContent() const;
360
361 StandaloneDataInMemory(std::unique_ptr<sys::fs::mapped_file_region> Region,
362 TrieRecord::StorageKind SK)
363 : Region(std::move(Region)), SK(SK) {
364#ifndef NDEBUG
365 bool IsStandalone = false;
366 switch (SK) {
367 case TrieRecord::StorageKind::Standalone:
368 case TrieRecord::StorageKind::StandaloneLeaf:
369 case TrieRecord::StorageKind::StandaloneLeaf0:
370 IsStandalone = true;
371 break;
372 default:
373 break;
374 }
375 assert(IsStandalone);
376#endif
377 }
378
379private:
380 std::unique_ptr<sys::fs::mapped_file_region> Region;
381 TrieRecord::StorageKind SK;
382};
383
384/// Container to lookup loaded standalone objects.
385template <size_t NumShards> class StandaloneDataMap {
386 static_assert(isPowerOf2_64(NumShards), "Expected power of 2");
387
388public:
389 uintptr_t insert(ArrayRef<uint8_t> Hash, TrieRecord::StorageKind SK,
390 std::unique_ptr<sys::fs::mapped_file_region> Region);
391
392 const StandaloneDataInMemory *lookup(ArrayRef<uint8_t> Hash) const;
393 bool count(ArrayRef<uint8_t> Hash) const { return bool(lookup(Hash)); }
394
395private:
396 struct Shard {
397 /// Needs to store a std::unique_ptr for a stable address identity.
398 DenseMap<const uint8_t *, std::unique_ptr<StandaloneDataInMemory>> Map;
399 mutable std::mutex Mutex;
400 };
401 Shard &getShard(ArrayRef<uint8_t> Hash) {
402 return const_cast<Shard &>(
403 const_cast<const StandaloneDataMap *>(this)->getShard(Hash));
404 }
405 const Shard &getShard(ArrayRef<uint8_t> Hash) const {
406 static_assert(NumShards <= 256, "Expected only 8 bits of shard");
407 return Shards[Hash[0] % NumShards];
408 }
409
410 Shard Shards[NumShards];
411};
412
413using StandaloneDataMapTy = StandaloneDataMap<16>;
414
415/// A vector of internal node references.
416class InternalRefVector {
417public:
418 void push_back(InternalRef Ref) {
419 if (NeedsFull)
420 return FullRefs.push_back(Ref);
421 if (std::optional<InternalRef4B> Small = InternalRef4B::tryToShrink(Ref))
422 return SmallRefs.push_back(*Small);
423 NeedsFull = true;
424 assert(FullRefs.empty());
425 FullRefs.reserve(SmallRefs.size() + 1);
426 for (InternalRef4B Small : SmallRefs)
427 FullRefs.push_back(Small);
428 FullRefs.push_back(Ref);
429 SmallRefs.clear();
430 }
431
432 operator InternalRefArrayRef() const {
433 assert(SmallRefs.empty() || FullRefs.empty());
434 return NeedsFull ? InternalRefArrayRef(FullRefs)
435 : InternalRefArrayRef(SmallRefs);
436 }
437
438private:
439 bool NeedsFull = false;
442};
443
444} // namespace
445
446Expected<DataRecordHandle> DataRecordHandle::createWithError(
447 function_ref<Expected<char *>(size_t Size)> Alloc, const Input &I) {
448 Layout L(I);
449 if (Expected<char *> Mem = Alloc(L.getTotalSize()))
450 return constructImpl(*Mem, I, L);
451 else
452 return Mem.takeError();
453}
454
455DataRecordHandle
456DataRecordHandle::create(function_ref<char *(size_t Size)> Alloc,
457 const Input &I) {
458 Layout L(I);
459 return constructImpl(Alloc(L.getTotalSize()), I, L);
460}
461
463 // Store the file offset as it is.
464 assert(!(Offset.get() & 0x1));
465 return ObjectHandle(Offset.get());
466}
467
469 // Store the pointer from memory with lowest bit set.
470 assert(!(Ptr & 0x1));
471 return ObjectHandle(Ptr | 1);
472}
473
474/// Proxy for an on-disk index record.
480
481template <size_t N>
482uintptr_t StandaloneDataMap<N>::insert(
483 ArrayRef<uint8_t> Hash, TrieRecord::StorageKind SK,
484 std::unique_ptr<sys::fs::mapped_file_region> Region) {
485 auto &S = getShard(Hash);
486 std::lock_guard<std::mutex> Lock(S.Mutex);
487 auto &V = S.Map[Hash.data()];
488 if (!V)
489 V = std::make_unique<StandaloneDataInMemory>(std::move(Region), SK);
490 return reinterpret_cast<uintptr_t>(V.get());
491}
492
493template <size_t N>
494const StandaloneDataInMemory *
495StandaloneDataMap<N>::lookup(ArrayRef<uint8_t> Hash) const {
496 auto &S = getShard(Hash);
497 std::lock_guard<std::mutex> Lock(S.Mutex);
498 auto I = S.Map.find(Hash.data());
499 if (I == S.Map.end())
500 return nullptr;
501 return &*I->second;
502}
503
504namespace {
505
506/// Copy of \a sys::fs::TempFile that skips RemoveOnSignal, which is too
507/// expensive to register/unregister at this rate.
508///
509/// FIXME: Add a TempFileManager that maintains a thread-safe list of open temp
510/// files and has a signal handler registerd that removes them all.
511class TempFile {
512 bool Done = false;
513 TempFile(StringRef Name, int FD) : TmpName(std::string(Name)), FD(FD) {}
514
515public:
516 /// This creates a temporary file with createUniqueFile.
517 static Expected<TempFile> create(const Twine &Model);
518 TempFile(TempFile &&Other) { *this = std::move(Other); }
519 TempFile &operator=(TempFile &&Other) {
520 TmpName = std::move(Other.TmpName);
521 FD = Other.FD;
522 Other.Done = true;
523 Other.FD = -1;
524 return *this;
525 }
526
527 // Name of the temporary file.
528 std::string TmpName;
529
530 // The open file descriptor.
531 int FD = -1;
532
533 // Keep this with the given name.
534 Error keep(const Twine &Name);
535 Error discard();
536
537 // This checks that keep or delete was called.
538 ~TempFile() { consumeError(discard()); }
539};
540
541class MappedTempFile {
542public:
543 char *data() const { return Map.data(); }
544 size_t size() const { return Map.size(); }
545
546 Error discard() {
547 assert(Map && "Map already destroyed");
548 Map.unmap();
549 return Temp.discard();
550 }
551
552 Error keep(const Twine &Name) {
553 assert(Map && "Map already destroyed");
554 Map.unmap();
555 return Temp.keep(Name);
556 }
557
558 MappedTempFile(TempFile Temp, sys::fs::mapped_file_region Map)
559 : Temp(std::move(Temp)), Map(std::move(Map)) {}
560
561private:
562 TempFile Temp;
563 sys::fs::mapped_file_region Map;
564};
565} // namespace
566
568 Done = true;
569 if (FD != -1) {
571 if (std::error_code EC = sys::fs::closeFile(File))
572 return errorCodeToError(EC);
573 }
574 FD = -1;
575
576 // Always try to close and remove.
577 std::error_code RemoveEC;
578 if (!TmpName.empty()) {
579 std::error_code EC = sys::fs::remove(TmpName);
580 if (EC)
581 return errorCodeToError(EC);
582 }
583 TmpName = "";
584
585 return Error::success();
586}
587
589 assert(!Done);
590 Done = true;
591 // Always try to close and rename.
592 std::error_code RenameEC = sys::fs::rename(TmpName, Name);
593
594 if (!RenameEC)
595 TmpName = "";
596
598 if (std::error_code EC = sys::fs::closeFile(File))
599 return errorCodeToError(EC);
600 FD = -1;
601
602 return errorCodeToError(RenameEC);
603}
604
606 int FD;
607 SmallString<128> ResultPath;
608 if (std::error_code EC = sys::fs::createUniqueFile(Model, FD, ResultPath))
609 return errorCodeToError(EC);
610
611 TempFile Ret(ResultPath, FD);
612 return std::move(Ret);
613}
614
615bool TrieRecord::compare_exchange_strong(Data &Existing, Data New) {
616 uint64_t ExistingPacked = pack(Existing);
617 uint64_t NewPacked = pack(New);
618 if (Storage.compare_exchange_strong(ExistingPacked, NewPacked))
619 return true;
620 Existing = unpack(ExistingPacked);
621 return false;
622}
623
624DataRecordHandle DataRecordHandle::construct(char *Mem, const Input &I) {
625 return constructImpl(Mem, I, Layout(I));
626}
627
629DataRecordHandle::getFromDataPool(const OnDiskDataAllocator &Pool,
631 auto HeaderData = Pool.get(Offset, sizeof(DataRecordHandle::Header));
632 if (!HeaderData)
633 return HeaderData.takeError();
634
635 auto Record = DataRecordHandle::get(HeaderData->data());
636 if (Record.getTotalSize() + Offset.get() > Pool.size())
637 return createStringError(
638 make_error_code(std::errc::illegal_byte_sequence),
639 "data record span passed the end of the data pool");
640
641 return Record;
642}
643
644DataRecordHandle DataRecordHandle::constructImpl(char *Mem, const Input &I,
645 const Layout &L) {
646 char *Next = Mem + sizeof(Header);
647
648 // Fill in Packed and set other data, then come back to construct the header.
649 Header::PackTy Packed = 0;
650 Packed |= LayoutFlags::pack(L.Flags) << Header::LayoutFlagsShift;
651
652 // Construct DataSize.
653 switch (L.Flags.DataSize) {
654 case DataSizeFlags::Uses1B:
655 assert(I.Data.size() <= UINT8_MAX);
656 Packed |= (Header::PackTy)I.Data.size()
657 << ((sizeof(Packed) - 2) * CHAR_BIT);
658 break;
659 case DataSizeFlags::Uses2B:
660 assert(I.Data.size() <= UINT16_MAX);
661 Packed |= (Header::PackTy)I.Data.size()
662 << ((sizeof(Packed) - 4) * CHAR_BIT);
663 break;
664 case DataSizeFlags::Uses4B:
665 support::endian::write32le(Next, I.Data.size());
666 Next += 4;
667 break;
668 case DataSizeFlags::Uses8B:
669 support::endian::write64le(Next, I.Data.size());
670 Next += 8;
671 break;
672 }
673
674 // Construct NumRefs.
675 //
676 // NOTE: May be writing NumRefs even if there are zero refs in order to fix
677 // alignment.
678 switch (L.Flags.NumRefs) {
679 case NumRefsFlags::Uses0B:
680 break;
681 case NumRefsFlags::Uses1B:
682 assert(I.Refs.size() <= UINT8_MAX);
683 Packed |= (Header::PackTy)I.Refs.size()
684 << ((sizeof(Packed) - 2) * CHAR_BIT);
685 break;
686 case NumRefsFlags::Uses2B:
687 assert(I.Refs.size() <= UINT16_MAX);
688 Packed |= (Header::PackTy)I.Refs.size()
689 << ((sizeof(Packed) - 4) * CHAR_BIT);
690 break;
691 case NumRefsFlags::Uses4B:
692 support::endian::write32le(Next, I.Refs.size());
693 Next += 4;
694 break;
695 case NumRefsFlags::Uses8B:
696 support::endian::write64le(Next, I.Refs.size());
697 Next += 8;
698 break;
699 }
700
701 // Construct Refs[].
702 if (!I.Refs.empty()) {
703 assert((L.Flags.RefKind == RefKindFlags::InternalRef4B) == I.Refs.is4B());
704 ArrayRef<uint8_t> RefsBuffer = I.Refs.getBuffer();
705 llvm::copy(RefsBuffer, Next);
706 Next += RefsBuffer.size();
707 }
708
709 // Construct Data and the trailing null.
711 llvm::copy(I.Data, Next);
712 Next[I.Data.size()] = 0;
713
714 // Construct the header itself and return.
715 Header *H = new (Mem) Header{Packed};
716 DataRecordHandle Record(*H);
717 assert(Record.getData() == I.Data);
718 assert(Record.getNumRefs() == I.Refs.size());
719 assert(Record.getRefs() == I.Refs);
720 assert(Record.getLayoutFlags().DataSize == L.Flags.DataSize);
721 assert(Record.getLayoutFlags().NumRefs == L.Flags.NumRefs);
722 assert(Record.getLayoutFlags().RefKind == L.Flags.RefKind);
723 return Record;
724}
725
726DataRecordHandle::Layout::Layout(const Input &I) {
727 // Start initial relative offsets right after the Header.
728 uint64_t RelOffset = sizeof(Header);
729
730 // Initialize the easy stuff.
731 DataSize = I.Data.size();
732 NumRefs = I.Refs.size();
733
734 // Check refs size.
735 Flags.RefKind =
736 I.Refs.is4B() ? RefKindFlags::InternalRef4B : RefKindFlags::InternalRef;
737
738 // Find the smallest slot available for DataSize.
739 bool Has1B = true;
740 bool Has2B = true;
741 if (DataSize <= UINT8_MAX && Has1B) {
742 Flags.DataSize = DataSizeFlags::Uses1B;
743 Has1B = false;
744 } else if (DataSize <= UINT16_MAX && Has2B) {
745 Flags.DataSize = DataSizeFlags::Uses2B;
746 Has2B = false;
747 } else if (DataSize <= UINT32_MAX) {
748 Flags.DataSize = DataSizeFlags::Uses4B;
749 RelOffset += 4;
750 } else {
751 Flags.DataSize = DataSizeFlags::Uses8B;
752 RelOffset += 8;
753 }
754
755 // Find the smallest slot available for NumRefs. Never sets NumRefs8B here.
756 if (!NumRefs) {
757 Flags.NumRefs = NumRefsFlags::Uses0B;
758 } else if (NumRefs <= UINT8_MAX && Has1B) {
759 Flags.NumRefs = NumRefsFlags::Uses1B;
760 Has1B = false;
761 } else if (NumRefs <= UINT16_MAX && Has2B) {
762 Flags.NumRefs = NumRefsFlags::Uses2B;
763 Has2B = false;
764 } else {
765 Flags.NumRefs = NumRefsFlags::Uses4B;
766 RelOffset += 4;
767 }
768
769 // Helper to "upgrade" either DataSize or NumRefs by 4B to avoid complicated
770 // padding rules when reading and writing. This also bumps RelOffset.
771 //
772 // The value for NumRefs is strictly limited to UINT32_MAX, but it can be
773 // stored as 8B. This means we can *always* find a size to grow.
774 //
775 // NOTE: Only call this once.
776 auto GrowSizeFieldsBy4B = [&]() {
777 assert(isAligned(Align(4), RelOffset));
778 RelOffset += 4;
779
780 assert(Flags.NumRefs != NumRefsFlags::Uses8B &&
781 "Expected to be able to grow NumRefs8B");
782
783 // First try to grow DataSize. NumRefs will not (yet) be 8B, and if
784 // DataSize is upgraded to 8B it'll already be aligned.
785 //
786 // Failing that, grow NumRefs.
787 if (Flags.DataSize < DataSizeFlags::Uses4B)
788 Flags.DataSize = DataSizeFlags::Uses4B; // DataSize: Packed => 4B.
789 else if (Flags.DataSize < DataSizeFlags::Uses8B)
790 Flags.DataSize = DataSizeFlags::Uses8B; // DataSize: 4B => 8B.
791 else if (Flags.NumRefs < NumRefsFlags::Uses4B)
792 Flags.NumRefs = NumRefsFlags::Uses4B; // NumRefs: Packed => 4B.
793 else
794 Flags.NumRefs = NumRefsFlags::Uses8B; // NumRefs: 4B => 8B.
795 };
796
797 assert(isAligned(Align(4), RelOffset));
798 if (Flags.RefKind == RefKindFlags::InternalRef) {
799 // List of 8B refs should be 8B-aligned. Grow one of the sizes to get this
800 // without padding.
801 if (!isAligned(Align(8), RelOffset))
802 GrowSizeFieldsBy4B();
803
804 assert(isAligned(Align(8), RelOffset));
805 RefsRelOffset = RelOffset;
806 RelOffset += 8 * NumRefs;
807 } else {
808 // The array of 4B refs doesn't need 8B alignment, but the data will need
809 // to be 8B-aligned. Detect this now, and, if necessary, shift everything
810 // by 4B by growing one of the sizes.
811 // If we remove the need for 8B-alignment for data there is <1% savings in
812 // disk storage for a clang build using MCCAS but the 8B-alignment may be
813 // useful in the future so keep it for now.
814 uint64_t RefListSize = 4 * NumRefs;
815 if (!isAligned(Align(8), RelOffset + RefListSize))
816 GrowSizeFieldsBy4B();
817 RefsRelOffset = RelOffset;
818 RelOffset += RefListSize;
819 }
820
821 assert(isAligned(Align(8), RelOffset));
822 DataRelOffset = RelOffset;
823}
824
825uint64_t DataRecordHandle::getDataSize() const {
826 int64_t RelOffset = sizeof(Header);
827 auto *DataSizePtr = reinterpret_cast<const char *>(H) + RelOffset;
828 switch (getLayoutFlags().DataSize) {
829 case DataSizeFlags::Uses1B:
830 return (H->Packed >> ((sizeof(Header::PackTy) - 2) * CHAR_BIT)) & UINT8_MAX;
831 case DataSizeFlags::Uses2B:
832 return (H->Packed >> ((sizeof(Header::PackTy) - 4) * CHAR_BIT)) &
833 UINT16_MAX;
834 case DataSizeFlags::Uses4B:
835 return support::endian::read32le(DataSizePtr);
836 case DataSizeFlags::Uses8B:
837 return support::endian::read64le(DataSizePtr);
838 }
839}
840
841void DataRecordHandle::skipDataSize(LayoutFlags LF, int64_t &RelOffset) const {
842 if (LF.DataSize >= DataSizeFlags::Uses4B)
843 RelOffset += 4;
844 if (LF.DataSize >= DataSizeFlags::Uses8B)
845 RelOffset += 4;
846}
847
848uint32_t DataRecordHandle::getNumRefs() const {
849 LayoutFlags LF = getLayoutFlags();
850 int64_t RelOffset = sizeof(Header);
851 skipDataSize(LF, RelOffset);
852 auto *NumRefsPtr = reinterpret_cast<const char *>(H) + RelOffset;
853 switch (LF.NumRefs) {
854 case NumRefsFlags::Uses0B:
855 return 0;
856 case NumRefsFlags::Uses1B:
857 return (H->Packed >> ((sizeof(Header::PackTy) - 2) * CHAR_BIT)) & UINT8_MAX;
858 case NumRefsFlags::Uses2B:
859 return (H->Packed >> ((sizeof(Header::PackTy) - 4) * CHAR_BIT)) &
860 UINT16_MAX;
861 case NumRefsFlags::Uses4B:
862 return support::endian::read32le(NumRefsPtr);
863 case NumRefsFlags::Uses8B:
864 return support::endian::read64le(NumRefsPtr);
865 }
866}
867
868void DataRecordHandle::skipNumRefs(LayoutFlags LF, int64_t &RelOffset) const {
869 if (LF.NumRefs >= NumRefsFlags::Uses4B)
870 RelOffset += 4;
871 if (LF.NumRefs >= NumRefsFlags::Uses8B)
872 RelOffset += 4;
873}
874
875int64_t DataRecordHandle::getRefsRelOffset() const {
876 LayoutFlags LF = getLayoutFlags();
877 int64_t RelOffset = sizeof(Header);
878 skipDataSize(LF, RelOffset);
879 skipNumRefs(LF, RelOffset);
880 return RelOffset;
881}
882
883int64_t DataRecordHandle::getDataRelOffset() const {
884 LayoutFlags LF = getLayoutFlags();
885 int64_t RelOffset = sizeof(Header);
886 skipDataSize(LF, RelOffset);
887 skipNumRefs(LF, RelOffset);
888 uint32_t RefSize = LF.RefKind == RefKindFlags::InternalRef4B ? 4 : 8;
889 RelOffset += RefSize * getNumRefs();
890 return RelOffset;
891}
892
894 return Index.validate([&](FileOffset Offset,
896 -> Error {
897 auto formatError = [&](Twine Msg) {
898 return createStringError(
900 "bad record at 0x" +
901 utohexstr((unsigned)Offset.get(), /*LowerCase=*/true) + ": " +
902 Msg.str());
903 };
904
905 if (Record.Data.size() != sizeof(TrieRecord))
906 return formatError("wrong data record size");
907 if (!isAligned(Align::Of<TrieRecord>(), Record.Data.size()))
908 return formatError("wrong data record alignment");
909
910 auto *R = reinterpret_cast<const TrieRecord *>(Record.Data.data());
911 TrieRecord::Data D = R->load();
912 std::unique_ptr<MemoryBuffer> FileBuffer;
913 if ((uint8_t)D.SK != (uint8_t)TrieRecord::StorageKind::Unknown &&
914 (uint8_t)D.SK != (uint8_t)TrieRecord::StorageKind::DataPool &&
915 (uint8_t)D.SK != (uint8_t)TrieRecord::StorageKind::Standalone &&
916 (uint8_t)D.SK != (uint8_t)TrieRecord::StorageKind::StandaloneLeaf &&
917 (uint8_t)D.SK != (uint8_t)TrieRecord::StorageKind::StandaloneLeaf0)
918 return formatError("invalid record kind value");
919
921 auto I = getIndexProxyFromRef(Ref);
922 if (!I)
923 return I.takeError();
924
925 switch (D.SK) {
926 case TrieRecord::StorageKind::Unknown:
927 // This could be an abandoned entry due to a termination before updating
928 // the record. It can be reused by later insertion so just skip this entry
929 // for now.
930 return Error::success();
931 case TrieRecord::StorageKind::DataPool:
932 // Check offset is a postive value, and large enough to hold the
933 // header for the data record.
934 if (D.Offset.get() <= 0 ||
935 (uint64_t)D.Offset.get() + sizeof(DataRecordHandle::Header) >=
936 DataPool.size())
937 return formatError("datapool record out of bound");
938 break;
939 case TrieRecord::StorageKind::Standalone:
940 case TrieRecord::StorageKind::StandaloneLeaf:
941 case TrieRecord::StorageKind::StandaloneLeaf0:
942 SmallString<256> Path;
943 getStandalonePath(TrieRecord::getStandaloneFilePrefix(D.SK), *I, Path);
944 // If need to validate the content of the file later, just load the
945 // buffer here. Otherwise, just check the existance of the file.
946 if (Deep) {
947 auto File = MemoryBuffer::getFile(Path, /*IsText=*/false,
948 /*RequiresNullTerminator=*/false);
949 if (!File || !*File)
950 return formatError("record file \'" + Path + "\' does not exist");
951
952 FileBuffer = std::move(*File);
953 } else if (!llvm::sys::fs::exists(Path))
954 return formatError("record file \'" + Path + "\' does not exist");
955 }
956
957 if (!Deep)
958 return Error::success();
959
960 auto dataError = [&](Twine Msg) {
962 "bad data for digest \'" + toHex(I->Hash) +
963 "\': " + Msg.str());
964 };
966 ArrayRef<char> StoredData;
967
968 switch (D.SK) {
969 case TrieRecord::StorageKind::Unknown:
970 llvm_unreachable("already handled");
971 case TrieRecord::StorageKind::DataPool: {
972 auto DataRecord = DataRecordHandle::getFromDataPool(DataPool, D.Offset);
973 if (!DataRecord)
974 return dataError(toString(DataRecord.takeError()));
975
976 for (auto InternRef : DataRecord->getRefs()) {
977 auto Index = getIndexProxyFromRef(InternRef);
978 if (!Index)
979 return Index.takeError();
980 Refs.push_back(Index->Hash);
981 }
982 StoredData = DataRecord->getData();
983 break;
984 }
985 case TrieRecord::StorageKind::Standalone: {
986 if (FileBuffer->getBufferSize() < sizeof(DataRecordHandle::Header))
987 return dataError("data record is not big enough to read the header");
988 auto DataRecord = DataRecordHandle::get(FileBuffer->getBufferStart());
989 if (DataRecord.getTotalSize() < FileBuffer->getBufferSize())
990 return dataError(
991 "data record span passed the end of the standalone file");
992 for (auto InternRef : DataRecord.getRefs()) {
993 auto Index = getIndexProxyFromRef(InternRef);
994 if (!Index)
995 return Index.takeError();
996 Refs.push_back(Index->Hash);
997 }
998 StoredData = DataRecord.getData();
999 break;
1000 }
1001 case TrieRecord::StorageKind::StandaloneLeaf:
1002 case TrieRecord::StorageKind::StandaloneLeaf0: {
1003 StoredData = arrayRefFromStringRef<char>(FileBuffer->getBuffer());
1004 if (D.SK == TrieRecord::StorageKind::StandaloneLeaf0) {
1005 if (!FileBuffer->getBuffer().ends_with('\0'))
1006 return dataError("standalone file is not zero terminated");
1007 StoredData = StoredData.drop_back(1);
1008 }
1009 break;
1010 }
1011 }
1012
1013 SmallVector<uint8_t> ComputedHash;
1014 Hasher(Refs, StoredData, ComputedHash);
1015 if (I->Hash != ArrayRef(ComputedHash))
1016 return dataError("hash mismatch, got \'" + toHex(ComputedHash) +
1017 "\' instead");
1018
1019 return Error::success();
1020 });
1021}
1022
1024 OS << "on-disk-root-path: " << RootPath << "\n";
1025
1026 struct PoolInfo {
1028 };
1030
1031 OS << "\n";
1032 OS << "index:\n";
1033 Index.print(OS, [&](ArrayRef<char> Data) {
1034 assert(Data.size() == sizeof(TrieRecord));
1036 auto *R = reinterpret_cast<const TrieRecord *>(Data.data());
1037 TrieRecord::Data D = R->load();
1038 OS << " SK=";
1039 switch (D.SK) {
1040 case TrieRecord::StorageKind::Unknown:
1041 OS << "unknown ";
1042 break;
1043 case TrieRecord::StorageKind::DataPool:
1044 OS << "datapool ";
1045 Pool.push_back({D.Offset.get()});
1046 break;
1047 case TrieRecord::StorageKind::Standalone:
1048 OS << "standalone-data ";
1049 break;
1050 case TrieRecord::StorageKind::StandaloneLeaf:
1051 OS << "standalone-leaf ";
1052 break;
1053 case TrieRecord::StorageKind::StandaloneLeaf0:
1054 OS << "standalone-leaf+0";
1055 break;
1056 }
1057 OS << " Offset=" << (void *)D.Offset.get();
1058 });
1059 if (Pool.empty())
1060 return;
1061
1062 OS << "\n";
1063 OS << "pool:\n";
1064 llvm::sort(
1065 Pool, [](PoolInfo LHS, PoolInfo RHS) { return LHS.Offset < RHS.Offset; });
1066 for (PoolInfo PI : Pool) {
1067 OS << "- addr=" << (void *)PI.Offset << " ";
1068 auto D = DataRecordHandle::getFromDataPool(DataPool, FileOffset(PI.Offset));
1069 if (!D) {
1070 OS << "error: " << toString(D.takeError());
1071 return;
1072 }
1073
1074 OS << "record refs=" << D->getNumRefs() << " data=" << D->getDataSize()
1075 << " size=" << D->getTotalSize()
1076 << " end=" << (void *)(PI.Offset + D->getTotalSize()) << "\n";
1077 }
1078}
1079
1081OnDiskGraphDB::indexHash(ArrayRef<uint8_t> Hash) {
1082 auto P = Index.insertLazy(
1083 Hash, [](FileOffset TentativeOffset,
1084 OnDiskTrieRawHashMap::ValueProxy TentativeValue) {
1085 assert(TentativeValue.Data.size() == sizeof(TrieRecord));
1086 assert(
1087 isAddrAligned(Align::Of<TrieRecord>(), TentativeValue.Data.data()));
1088 new (TentativeValue.Data.data()) TrieRecord();
1089 });
1090 if (LLVM_UNLIKELY(!P))
1091 return P.takeError();
1092
1093 assert(*P && "Expected insertion");
1094 return getIndexProxyFromPointer(*P);
1095}
1096
1097OnDiskGraphDB::IndexProxy OnDiskGraphDB::getIndexProxyFromPointer(
1099 assert(P);
1100 assert(P.getOffset());
1101 return IndexProxy{P.getOffset(), P->Hash,
1102 *const_cast<TrieRecord *>(
1103 reinterpret_cast<const TrieRecord *>(P->Data.data()))};
1104}
1105
1107 auto I = indexHash(Hash);
1108 if (LLVM_UNLIKELY(!I))
1109 return I.takeError();
1110 return getExternalReference(*I);
1111}
1112
1113ObjectID OnDiskGraphDB::getExternalReference(const IndexProxy &I) {
1114 return getExternalReference(makeInternalRef(I.Offset));
1115}
1116
1117std::optional<ObjectID>
1119 auto tryUpstream =
1120 [&](std::optional<IndexProxy> I) -> std::optional<ObjectID> {
1121 if (!UpstreamDB)
1122 return std::nullopt;
1123 std::optional<ObjectID> UpstreamID =
1124 UpstreamDB->getExistingReference(Digest);
1125 if (LLVM_UNLIKELY(!UpstreamID))
1126 return std::nullopt;
1127 auto Ref = expectedToOptional(indexHash(Digest));
1128 if (!Ref)
1129 return std::nullopt;
1130 if (!I)
1131 I.emplace(*Ref);
1132 return getExternalReference(*I);
1133 };
1134
1135 OnDiskTrieRawHashMap::ConstOnDiskPtr P = Index.find(Digest);
1136 if (!P)
1137 return tryUpstream(std::nullopt);
1138 IndexProxy I = getIndexProxyFromPointer(P);
1139 TrieRecord::Data Obj = I.Ref.load();
1140 if (Obj.SK == TrieRecord::StorageKind::Unknown)
1141 return tryUpstream(I);
1142 return getExternalReference(makeInternalRef(I.Offset));
1143}
1144
1146OnDiskGraphDB::getIndexProxyFromRef(InternalRef Ref) const {
1147 auto P = Index.recoverFromFileOffset(Ref.getFileOffset());
1148 if (LLVM_UNLIKELY(!P))
1149 return P.takeError();
1150 return getIndexProxyFromPointer(*P);
1151}
1152
1154 auto I = getIndexProxyFromRef(Ref);
1155 if (!I)
1156 return I.takeError();
1157 return I->Hash;
1158}
1159
1160ArrayRef<uint8_t> OnDiskGraphDB::getDigest(const IndexProxy &I) const {
1161 return I.Hash;
1162}
1163
1164static OnDiskContent getContentFromHandle(const OnDiskDataAllocator &DataPool,
1165 ObjectHandle OH) {
1166 // Decode ObjectHandle to locate the stored content.
1167 uint64_t Data = OH.getOpaqueData();
1168 if (Data & 1) {
1169 const auto *SDIM =
1170 reinterpret_cast<const StandaloneDataInMemory *>(Data & (-1ULL << 1));
1171 return SDIM->getContent();
1172 }
1173
1174 auto DataHandle =
1175 cantFail(DataRecordHandle::getFromDataPool(DataPool, FileOffset(Data)));
1176 assert(DataHandle.getData().end()[0] == 0 && "Null termination");
1177 return OnDiskContent{DataHandle, std::nullopt};
1178}
1179
1181 OnDiskContent Content = getContentFromHandle(DataPool, Node);
1182 if (Content.Bytes)
1183 return *Content.Bytes;
1184 assert(Content.Record && "Expected record or bytes");
1185 return Content.Record->getData();
1186}
1187
1188InternalRefArrayRef OnDiskGraphDB::getInternalRefs(ObjectHandle Node) const {
1189 if (std::optional<DataRecordHandle> Record =
1190 getContentFromHandle(DataPool, Node).Record)
1191 return Record->getRefs();
1192 return std::nullopt;
1193}
1194
1197 InternalRef Ref = getInternalRef(ExternalRef);
1198 auto I = getIndexProxyFromRef(Ref);
1199 if (!I)
1200 return I.takeError();
1201 TrieRecord::Data Object = I->Ref.load();
1202
1203 if (Object.SK == TrieRecord::StorageKind::Unknown) {
1204 if (!UpstreamDB)
1205 return std::nullopt;
1206 return faultInFromUpstream(ExternalRef);
1207 }
1208
1209 if (Object.SK == TrieRecord::StorageKind::DataPool)
1210 return ObjectHandle::fromFileOffset(Object.Offset);
1211
1212 // Only TrieRecord::StorageKind::Standalone (and variants) need to be
1213 // explicitly loaded.
1214 //
1215 // There's corruption if standalone objects have offsets, or if we get here
1216 // for something that isn't standalone.
1217 if (Object.Offset)
1219 switch (Object.SK) {
1220 case TrieRecord::StorageKind::Unknown:
1221 case TrieRecord::StorageKind::DataPool:
1222 llvm_unreachable("unexpected storage kind");
1223 case TrieRecord::StorageKind::Standalone:
1224 case TrieRecord::StorageKind::StandaloneLeaf0:
1225 case TrieRecord::StorageKind::StandaloneLeaf:
1226 break;
1227 }
1228
1229 // Load it from disk.
1230 //
1231 // Note: Creation logic guarantees that data that needs null-termination is
1232 // suitably 0-padded. Requiring null-termination here would be too expensive
1233 // for extremely large objects that happen to be page-aligned.
1234 SmallString<256> Path;
1235 getStandalonePath(TrieRecord::getStandaloneFilePrefix(Object.SK), *I, Path);
1236
1237 auto File = sys::fs::openNativeFileForRead(Path);
1238 if (!File)
1239 return createFileError(Path, File.takeError());
1240
1241 auto CloseFile = make_scope_exit([&]() { sys::fs::closeFile(*File); });
1242
1244 if (std::error_code EC = sys::fs::status(*File, Status))
1246
1247 std::error_code EC;
1248 auto Region = std::make_unique<sys::fs::mapped_file_region>(
1249 *File, sys::fs::mapped_file_region::readonly, Status.getSize(), 0, EC);
1250 if (EC)
1252
1254 static_cast<StandaloneDataMapTy *>(StandaloneData)
1255 ->insert(I->Hash, Object.SK, std::move(Region)));
1256}
1257
1259 auto Presence = getObjectPresence(Ref, /*CheckUpstream=*/true);
1260 if (!Presence)
1261 return Presence.takeError();
1262
1263 switch (*Presence) {
1264 case ObjectPresence::Missing:
1265 return false;
1266 case ObjectPresence::InPrimaryDB:
1267 return true;
1268 case ObjectPresence::OnlyInUpstreamDB:
1269 if (auto FaultInResult = faultInFromUpstream(Ref); !FaultInResult)
1270 return FaultInResult.takeError();
1271 return true;
1272 }
1273}
1274
1276OnDiskGraphDB::getObjectPresence(ObjectID ExternalRef,
1277 bool CheckUpstream) const {
1278 InternalRef Ref = getInternalRef(ExternalRef);
1279 auto I = getIndexProxyFromRef(Ref);
1280 if (!I)
1281 return I.takeError();
1282
1283 TrieRecord::Data Object = I->Ref.load();
1284 if (Object.SK != TrieRecord::StorageKind::Unknown)
1285 return ObjectPresence::InPrimaryDB;
1286 if (!CheckUpstream || !UpstreamDB)
1287 return ObjectPresence::Missing;
1288 std::optional<ObjectID> UpstreamID =
1289 UpstreamDB->getExistingReference(getDigest(*I));
1290 return UpstreamID.has_value() ? ObjectPresence::OnlyInUpstreamDB
1291 : ObjectPresence::Missing;
1292}
1293
1294InternalRef OnDiskGraphDB::makeInternalRef(FileOffset IndexOffset) {
1295 return InternalRef::getFromOffset(IndexOffset);
1296}
1297
1298void OnDiskGraphDB::getStandalonePath(StringRef Prefix, const IndexProxy &I,
1299 SmallVectorImpl<char> &Path) const {
1300 Path.assign(RootPath.begin(), RootPath.end());
1301 sys::path::append(Path,
1302 Prefix + Twine(I.Offset.get()) + "." + CASFormatVersion);
1303}
1304
1305OnDiskContent StandaloneDataInMemory::getContent() const {
1306 bool Leaf0 = false;
1307 bool Leaf = false;
1308 switch (SK) {
1309 default:
1310 llvm_unreachable("Storage kind must be standalone");
1311 case TrieRecord::StorageKind::Standalone:
1312 break;
1313 case TrieRecord::StorageKind::StandaloneLeaf0:
1314 Leaf = Leaf0 = true;
1315 break;
1316 case TrieRecord::StorageKind::StandaloneLeaf:
1317 Leaf = true;
1318 break;
1319 }
1320
1321 if (Leaf) {
1322 StringRef Data(Region->data(), Region->size());
1323 assert(Data.drop_back(Leaf0).end()[0] == 0 &&
1324 "Standalone node data missing null termination");
1325 return OnDiskContent{std::nullopt,
1326 arrayRefFromStringRef<char>(Data.drop_back(Leaf0))};
1327 }
1328
1329 DataRecordHandle Record = DataRecordHandle::get(Region->data());
1330 assert(Record.getData().end()[0] == 0 &&
1331 "Standalone object record missing null termination for data");
1332 return OnDiskContent{Record, std::nullopt};
1333}
1334
1336 uint64_t Size) {
1337 assert(Size && "Unexpected request for an empty temp file");
1338 Expected<TempFile> File = TempFile::create(FinalPath + ".%%%%%%");
1339 if (!File)
1340 return File.takeError();
1341
1342 if (Error E = preallocateFileTail(File->FD, 0, Size).takeError())
1343 return createFileError(File->TmpName, std::move(E));
1344
1345 if (auto EC = sys::fs::resize_file_before_mapping_readwrite(File->FD, Size))
1346 return createFileError(File->TmpName, EC);
1347
1348 std::error_code EC;
1351 0, EC);
1352 if (EC)
1353 return createFileError(File->TmpName, EC);
1354 return MappedTempFile(std::move(*File), std::move(Map));
1355}
1356
1357static size_t getPageSize() {
1359 return PageSize;
1360}
1361
1362Error OnDiskGraphDB::createStandaloneLeaf(IndexProxy &I, ArrayRef<char> Data) {
1363 assert(Data.size() > TrieRecord::MaxEmbeddedSize &&
1364 "Expected a bigger file for external content...");
1365
1366 bool Leaf0 = isAligned(Align(getPageSize()), Data.size());
1367 TrieRecord::StorageKind SK = Leaf0 ? TrieRecord::StorageKind::StandaloneLeaf0
1368 : TrieRecord::StorageKind::StandaloneLeaf;
1369
1370 SmallString<256> Path;
1371 int64_t FileSize = Data.size() + Leaf0;
1372 getStandalonePath(TrieRecord::getStandaloneFilePrefix(SK), I, Path);
1373
1374 // Write the file. Don't reuse this mapped_file_region, which is read/write.
1375 // Let load() pull up one that's read-only.
1376 Expected<MappedTempFile> File = createTempFile(Path, FileSize);
1377 if (!File)
1378 return File.takeError();
1379 assert(File->size() == (uint64_t)FileSize);
1380 llvm::copy(Data, File->data());
1381 if (Leaf0)
1382 File->data()[Data.size()] = 0;
1383 assert(File->data()[Data.size()] == 0);
1384 if (Error E = File->keep(Path))
1385 return E;
1386
1387 // Store the object reference.
1388 TrieRecord::Data Existing;
1389 {
1390 TrieRecord::Data Leaf{SK, FileOffset()};
1391 if (I.Ref.compare_exchange_strong(Existing, Leaf)) {
1392 recordStandaloneSizeIncrease(FileSize);
1393 return Error::success();
1394 }
1395 }
1396
1397 // If there was a race, confirm that the new value has valid storage.
1398 if (Existing.SK == TrieRecord::StorageKind::Unknown)
1399 return createCorruptObjectError(getDigest(I));
1400
1401 return Error::success();
1402}
1403
1406 auto I = getIndexProxyFromRef(getInternalRef(ID));
1407 if (LLVM_UNLIKELY(!I))
1408 return I.takeError();
1409
1410 // Early return in case the node exists.
1411 {
1412 TrieRecord::Data Existing = I->Ref.load();
1413 if (Existing.SK != TrieRecord::StorageKind::Unknown)
1414 return Error::success();
1415 }
1416
1417 // Big leaf nodes.
1418 if (Refs.empty() && Data.size() > TrieRecord::MaxEmbeddedSize)
1419 return createStandaloneLeaf(*I, Data);
1420
1421 // TODO: Check whether it's worth checking the index for an already existing
1422 // object (like storeTreeImpl() does) before building up the
1423 // InternalRefVector.
1424 InternalRefVector InternalRefs;
1425 for (ObjectID Ref : Refs)
1426 InternalRefs.push_back(getInternalRef(Ref));
1427
1428 // Create the object.
1429
1430 DataRecordHandle::Input Input{InternalRefs, Data};
1431
1432 // Compute the storage kind, allocate it, and create the record.
1433 TrieRecord::StorageKind SK = TrieRecord::StorageKind::Unknown;
1434 FileOffset PoolOffset;
1435 SmallString<256> Path;
1436 std::optional<MappedTempFile> File;
1437 std::optional<uint64_t> FileSize;
1438 auto AllocStandaloneFile = [&](size_t Size) -> Expected<char *> {
1439 getStandalonePath(TrieRecord::getStandaloneFilePrefix(
1440 TrieRecord::StorageKind::Standalone),
1441 *I, Path);
1442 if (Error E = createTempFile(Path, Size).moveInto(File))
1443 return std::move(E);
1444 assert(File->size() == Size);
1445 FileSize = Size;
1446 SK = TrieRecord::StorageKind::Standalone;
1447 return File->data();
1448 };
1449 auto Alloc = [&](size_t Size) -> Expected<char *> {
1450 if (Size <= TrieRecord::MaxEmbeddedSize) {
1451 SK = TrieRecord::StorageKind::DataPool;
1452 auto P = DataPool.allocate(Size);
1453 if (LLVM_UNLIKELY(!P)) {
1454 char *NewAlloc = nullptr;
1455 auto NewE = handleErrors(
1456 P.takeError(), [&](std::unique_ptr<StringError> E) -> Error {
1457 if (E->convertToErrorCode() == std::errc::not_enough_memory)
1458 return AllocStandaloneFile(Size).moveInto(NewAlloc);
1459 return Error(std::move(E));
1460 });
1461 if (!NewE)
1462 return NewAlloc;
1463 return std::move(NewE);
1464 }
1465 PoolOffset = P->getOffset();
1466 LLVM_DEBUG({
1467 dbgs() << "pool-alloc addr=" << (void *)PoolOffset.get()
1468 << " size=" << Size
1469 << " end=" << (void *)(PoolOffset.get() + Size) << "\n";
1470 });
1471 return (*P)->data();
1472 }
1473 return AllocStandaloneFile(Size);
1474 };
1475
1476 DataRecordHandle Record;
1477 if (Error E =
1478 DataRecordHandle::createWithError(Alloc, Input).moveInto(Record))
1479 return E;
1480 assert(Record.getData().end()[0] == 0 && "Expected null-termination");
1481 assert(Record.getData() == Input.Data && "Expected initialization");
1482 assert(SK != TrieRecord::StorageKind::Unknown);
1483 assert(bool(File) != bool(PoolOffset) &&
1484 "Expected either a mapped file or a pooled offset");
1485
1486 // Check for a race before calling MappedTempFile::keep().
1487 //
1488 // Then decide what to do with the file. Better to discard than overwrite if
1489 // another thread/process has already added this.
1490 TrieRecord::Data Existing = I->Ref.load();
1491 {
1492 TrieRecord::Data NewObject{SK, PoolOffset};
1493 if (File) {
1494 if (Existing.SK == TrieRecord::StorageKind::Unknown) {
1495 // Keep the file!
1496 if (Error E = File->keep(Path))
1497 return E;
1498 } else {
1499 File.reset();
1500 }
1501 }
1502
1503 // If we didn't already see a racing/existing write, then try storing the
1504 // new object. If that races, confirm that the new value has valid storage.
1505 //
1506 // TODO: Find a way to reuse the storage from the new-but-abandoned record
1507 // handle.
1508 if (Existing.SK == TrieRecord::StorageKind::Unknown) {
1509 if (I->Ref.compare_exchange_strong(Existing, NewObject)) {
1510 if (FileSize)
1511 recordStandaloneSizeIncrease(*FileSize);
1512 return Error::success();
1513 }
1514 }
1515 }
1516
1517 if (Existing.SK == TrieRecord::StorageKind::Unknown)
1519
1520 // Load existing object.
1521 return Error::success();
1522}
1523
1524void OnDiskGraphDB::recordStandaloneSizeIncrease(size_t SizeIncrease) {
1525 standaloneStorageSize().fetch_add(SizeIncrease, std::memory_order_relaxed);
1526}
1527
1528std::atomic<uint64_t> &OnDiskGraphDB::standaloneStorageSize() const {
1529 MutableArrayRef<uint8_t> UserHeader = DataPool.getUserHeader();
1530 assert(UserHeader.size() == sizeof(std::atomic<uint64_t>));
1531 assert(isAddrAligned(Align(8), UserHeader.data()));
1532 return *reinterpret_cast<std::atomic<uint64_t> *>(UserHeader.data());
1533}
1534
1535uint64_t OnDiskGraphDB::getStandaloneStorageSize() const {
1536 return standaloneStorageSize().load(std::memory_order_relaxed);
1537}
1538
1540 return Index.size() + DataPool.size() + getStandaloneStorageSize();
1541}
1542
1544 unsigned IndexPercent = Index.size() * 100ULL / Index.capacity();
1545 unsigned DataPercent = DataPool.size() * 100ULL / DataPool.capacity();
1546 return std::max(IndexPercent, DataPercent);
1547}
1548
1550 StringRef AbsPath, StringRef HashName, unsigned HashByteSize,
1551 std::unique_ptr<OnDiskGraphDB> UpstreamDB, FaultInPolicy Policy) {
1552 if (std::error_code EC = sys::fs::create_directories(AbsPath))
1553 return createFileError(AbsPath, EC);
1554
1555 constexpr uint64_t MB = 1024ull * 1024ull;
1556 constexpr uint64_t GB = 1024ull * 1024ull * 1024ull;
1557
1558 uint64_t MaxIndexSize = 12 * GB;
1559 uint64_t MaxDataPoolSize = 24 * GB;
1560
1561 if (useSmallMappingSize(AbsPath)) {
1562 MaxIndexSize = 1 * GB;
1563 MaxDataPoolSize = 2 * GB;
1564 }
1565
1566 auto CustomSize = getOverriddenMaxMappingSize();
1567 if (!CustomSize)
1568 return CustomSize.takeError();
1569 if (*CustomSize)
1570 MaxIndexSize = MaxDataPoolSize = **CustomSize;
1571
1572 SmallString<256> IndexPath(AbsPath);
1574 std::optional<OnDiskTrieRawHashMap> Index;
1576 IndexPath, IndexTableName + "[" + HashName + "]",
1577 HashByteSize * CHAR_BIT,
1578 /*DataSize=*/sizeof(TrieRecord), MaxIndexSize,
1579 /*MinFileSize=*/MB)
1580 .moveInto(Index))
1581 return std::move(E);
1582
1583 uint32_t UserHeaderSize = sizeof(std::atomic<uint64_t>);
1584
1585 SmallString<256> DataPoolPath(AbsPath);
1587 std::optional<OnDiskDataAllocator> DataPool;
1588 StringRef PolicyName =
1589 Policy == FaultInPolicy::SingleNode ? "single" : "full";
1591 DataPoolPath,
1592 DataPoolTableName + "[" + HashName + "]" + PolicyName,
1593 MaxDataPoolSize, /*MinFileSize=*/MB, UserHeaderSize,
1594 [](void *UserHeaderPtr) {
1595 new (UserHeaderPtr) std::atomic<uint64_t>(0);
1596 })
1597 .moveInto(DataPool))
1598 return std::move(E);
1599 if (DataPool->getUserHeader().size() != UserHeaderSize)
1601 "unexpected user header in '" + DataPoolPath +
1602 "'");
1603
1604 return std::unique_ptr<OnDiskGraphDB>(
1605 new OnDiskGraphDB(AbsPath, std::move(*Index), std::move(*DataPool),
1606 std::move(UpstreamDB), Policy));
1607}
1608
1609OnDiskGraphDB::OnDiskGraphDB(StringRef RootPath, OnDiskTrieRawHashMap Index,
1610 OnDiskDataAllocator DataPool,
1611 std::unique_ptr<OnDiskGraphDB> UpstreamDB,
1612 FaultInPolicy Policy)
1613 : Index(std::move(Index)), DataPool(std::move(DataPool)),
1614 RootPath(RootPath.str()), UpstreamDB(std::move(UpstreamDB)),
1615 FIPolicy(Policy) {
1616 /// Lifetime for "big" objects not in DataPool.
1617 ///
1618 /// NOTE: Could use ThreadSafeTrieRawHashMap here. For now, doing something
1619 /// simpler on the assumption there won't be much contention since most data
1620 /// is not big. If there is contention, and we've already fixed ObjectProxy
1621 /// object handles to be cheap enough to use consistently, the fix might be
1622 /// to use better use of them rather than optimizing this map.
1623 ///
1624 /// FIXME: Figure out the right number of shards, if any.
1625 StandaloneData = new StandaloneDataMapTy();
1626}
1627
1629 delete static_cast<StandaloneDataMapTy *>(StandaloneData);
1630}
1631
1632Error OnDiskGraphDB::importFullTree(ObjectID PrimaryID,
1633 ObjectHandle UpstreamNode) {
1634 // Copies the full CAS tree from upstream. Uses depth-first copying to protect
1635 // against the process dying during importing and leaving the database with an
1636 // incomplete tree. Note that if the upstream has missing nodes then the tree
1637 // will be copied with missing nodes as well, it won't be considered an error.
1638
1639 struct UpstreamCursor {
1641 size_t RefsCount;
1644 };
1645 /// Keeps track of the state of visitation for current node and all of its
1646 /// parents.
1648 /// Keeps track of the currently visited nodes as they are imported into
1649 /// primary database, from current node and its parents. When a node is
1650 /// entered for visitation it appends its own ID, then appends referenced IDs
1651 /// as they get imported. When a node is fully imported it removes the
1652 /// referenced IDs from the bottom of the stack which leaves its own ID at the
1653 /// bottom, adding to the list of referenced IDs for the parent node.
1654 SmallVector<ObjectID, 128> PrimaryNodesStack;
1655
1656 auto enqueueNode = [&](ObjectID PrimaryID, std::optional<ObjectHandle> Node) {
1657 PrimaryNodesStack.push_back(PrimaryID);
1658 if (!Node)
1659 return;
1660 auto Refs = UpstreamDB->getObjectRefs(*Node);
1661 CursorStack.push_back({*Node,
1662 (size_t)std::distance(Refs.begin(), Refs.end()),
1663 Refs.begin(), Refs.end()});
1664 };
1665
1666 enqueueNode(PrimaryID, UpstreamNode);
1667
1668 while (!CursorStack.empty()) {
1669 UpstreamCursor &Cur = CursorStack.back();
1670 if (Cur.RefI == Cur.RefE) {
1671 // Copy the node data into the primary store.
1672 // FIXME: Use hard-link or cloning if the file-system supports it and data
1673 // is stored into a separate file.
1674
1675 // The bottom of \p PrimaryNodesStack contains the primary ID for the
1676 // current node plus the list of imported referenced IDs.
1677 assert(PrimaryNodesStack.size() >= Cur.RefsCount + 1);
1678 ObjectID PrimaryID = *(PrimaryNodesStack.end() - Cur.RefsCount - 1);
1679 auto PrimaryRefs = ArrayRef(PrimaryNodesStack)
1680 .slice(PrimaryNodesStack.size() - Cur.RefsCount);
1681 auto Data = UpstreamDB->getObjectData(Cur.Node);
1682 if (Error E = store(PrimaryID, PrimaryRefs, Data))
1683 return E;
1684 // Remove the current node and its IDs from the stack.
1685 PrimaryNodesStack.truncate(PrimaryNodesStack.size() - Cur.RefsCount);
1686 CursorStack.pop_back();
1687 continue;
1688 }
1689
1690 ObjectID UpstreamID = *(Cur.RefI++);
1691 auto PrimaryID = getReference(UpstreamDB->getDigest(UpstreamID));
1692 if (LLVM_UNLIKELY(!PrimaryID))
1693 return PrimaryID.takeError();
1694 if (containsObject(*PrimaryID, /*CheckUpstream=*/false)) {
1695 // This \p ObjectID already exists in the primary. Either it was imported
1696 // via \p importFullTree or the client created it, in which case the
1697 // client takes responsibility for how it was formed.
1698 enqueueNode(*PrimaryID, std::nullopt);
1699 continue;
1700 }
1701 Expected<std::optional<ObjectHandle>> UpstreamNode =
1702 UpstreamDB->load(UpstreamID);
1703 if (!UpstreamNode)
1704 return UpstreamNode.takeError();
1705 enqueueNode(*PrimaryID, *UpstreamNode);
1706 }
1707
1708 assert(PrimaryNodesStack.size() == 1);
1709 assert(PrimaryNodesStack.front() == PrimaryID);
1710 return Error::success();
1711}
1712
1713Error OnDiskGraphDB::importSingleNode(ObjectID PrimaryID,
1714 ObjectHandle UpstreamNode) {
1715 // Copies only a single node, it doesn't copy the referenced nodes.
1716
1717 // Copy the node data into the primary store.
1718 // FIXME: Use hard-link or cloning if the file-system supports it and data is
1719 // stored into a separate file.
1720
1721 auto Data = UpstreamDB->getObjectData(UpstreamNode);
1722 auto UpstreamRefs = UpstreamDB->getObjectRefs(UpstreamNode);
1724 Refs.reserve(std::distance(UpstreamRefs.begin(), UpstreamRefs.end()));
1725 for (ObjectID UpstreamRef : UpstreamRefs) {
1726 auto Ref = getReference(UpstreamDB->getDigest(UpstreamRef));
1727 if (LLVM_UNLIKELY(!Ref))
1728 return Ref.takeError();
1729 Refs.push_back(*Ref);
1730 }
1731
1732 return store(PrimaryID, Refs, Data);
1733}
1734
1735Expected<std::optional<ObjectHandle>>
1736OnDiskGraphDB::faultInFromUpstream(ObjectID PrimaryID) {
1737 assert(UpstreamDB);
1738
1739 auto UpstreamID = UpstreamDB->getReference(getDigest(PrimaryID));
1740 if (LLVM_UNLIKELY(!UpstreamID))
1741 return UpstreamID.takeError();
1742
1743 Expected<std::optional<ObjectHandle>> UpstreamNode =
1744 UpstreamDB->load(*UpstreamID);
1745 if (!UpstreamNode)
1746 return UpstreamNode.takeError();
1747 if (!*UpstreamNode)
1748 return std::nullopt;
1749
1750 if (Error E = FIPolicy == FaultInPolicy::SingleNode
1751 ? importSingleNode(PrimaryID, **UpstreamNode)
1752 : importFullTree(PrimaryID, **UpstreamNode))
1753 return std::move(E);
1754 return load(PrimaryID);
1755}
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
AMDGPU Mark last scratch load
AMDGPU Prepare AGPR Alloc
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
#define LLVM_UNLIKELY(EXPR)
Definition Compiler.h:336
This file defines the DenseMap class.
static cl::opt< int > PageSize("imp-null-check-page-size", cl::desc("The page size of the target in bytes"), cl::init(4096), cl::Hidden)
static bool lookup(const GsymReader &GR, DataExtractor &Data, uint64_t &Offset, uint64_t BaseAddr, uint64_t Addr, SourceLocations &SrcLocs, llvm::Error &Err)
A Lookup helper functions.
#define I(x, y, z)
Definition MD5.cpp:58
#define H(x, y, z)
Definition MD5.cpp:57
This file declares interface for OnDiskDataAllocator, a file backed data pool can be used to allocate...
static constexpr StringLiteral FilePrefixLeaf0
static constexpr StringLiteral DataPoolTableName
static constexpr StringLiteral FilePrefixObject
static constexpr StringLiteral FilePrefixLeaf
static constexpr StringLiteral IndexFilePrefix
static OnDiskContent getContentFromHandle(const OnDiskDataAllocator &DataPool, ObjectHandle OH)
static constexpr StringLiteral DataPoolFilePrefix
static Error createCorruptObjectError(Expected< ArrayRef< uint8_t > > ID)
static size_t getPageSize()
static Expected< MappedTempFile > createTempFile(StringRef FinalPath, uint64_t Size)
static constexpr StringLiteral IndexTableName
This declares OnDiskGraphDB, an ondisk CAS database with a fixed length hash.
This file declares interface for OnDiskTrieRawHashMap, a thread-safe and (mostly) lock-free hash map ...
#define P(N)
Provides a library for accessing information about this process and other processes on the operating ...
This file defines the make_scope_exit function, which executes user-defined cleanup logic at scope ex...
static Split data
This file contains some functions that are useful when dealing with strings.
#define LLVM_DEBUG(...)
Definition Debug.h:114
The Input class is used to parse a yaml document into in-memory structs and vectors.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition ArrayRef.h:41
size_t size() const
size - Get the array size.
Definition ArrayRef.h:147
ArrayRef< T > drop_back(size_t N=1) const
Drop the last N elements of the array.
Definition ArrayRef.h:206
bool empty() const
empty - Check if the array is empty.
Definition ArrayRef.h:142
const T * data() const
Definition ArrayRef.h:144
Lightweight error class with error context and mandatory checking.
Definition Error.h:159
static ErrorSuccess success()
Create a success value.
Definition Error.h:336
Tagged union holding either a T or a Error.
Definition Error.h:485
Error takeError()
Take ownership of the stored error.
Definition Error.h:612
static ErrorOr< std::unique_ptr< MemoryBuffer > > getFile(const Twine &Filename, bool IsText=false, bool RequiresNullTerminator=true, bool IsVolatile=false, std::optional< Align > Alignment=std::nullopt)
Open the specified file as a MemoryBuffer, returning a new MemoryBuffer if successful,...
MutableArrayRef - Represent a mutable reference to an array (0 or more elements consecutively in memo...
Definition ArrayRef.h:303
SmallString - A SmallString is just a SmallVector with methods and accessors that make it work better...
Definition SmallString.h:26
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
void reserve(size_type N)
void truncate(size_type N)
Like resize, but requires that N is less than size().
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
A wrapper around a string literal that serves as a proxy for constructing global tables of StringRefs...
Definition StringRef.h:854
StringRef - Represent a constant reference to a string, i.e.
Definition StringRef.h:55
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition Twine.h:82
FileOffset is a wrapper around uint64_t to represent the offset of data from the beginning of the fil...
Definition FileOffset.h:24
uint64_t get() const
Definition FileOffset.h:26
Handle to a loaded object in a ObjectStore instance.
Expected< ArrayRef< char > > get(FileOffset Offset, size_t Size) const
Get the data of Size stored at the given Offset.
static Expected< OnDiskDataAllocator > create(const Twine &Path, const Twine &TableName, uint64_t MaxFileSize, std::optional< uint64_t > NewFileInitialSize, uint32_t UserHeaderSize=0, function_ref< void(void *)> UserHeaderInit=nullptr)
OnDiskTrieRawHashMap is a persistent trie data structure used as hash maps.
static Expected< OnDiskTrieRawHashMap > create(const Twine &Path, const Twine &TrieName, size_t NumHashBits, uint64_t DataSize, uint64_t MaxFileSize, std::optional< uint64_t > NewFileInitialSize, std::optional< size_t > NewTableNumRootBits=std::nullopt, std::optional< size_t > NewTableNumSubtrieBits=std::nullopt)
Gets or creates a file at Path with a hash-mapped trie named TrieName.
static std::optional< InternalRef4B > tryToShrink(InternalRef Ref)
Shrink to 4B reference.
Array of internal node references.
Standard 8 byte reference inside OnDiskGraphDB.
static InternalRef getFromOffset(FileOffset Offset)
Handle for a loaded node object.
static ObjectHandle fromFileOffset(FileOffset Offset)
static ObjectHandle fromMemory(uintptr_t Ptr)
Reference to a node.
static Expected< std::unique_ptr< OnDiskGraphDB > > open(StringRef Path, StringRef HashName, unsigned HashByteSize, std::unique_ptr< OnDiskGraphDB > UpstreamDB=nullptr, FaultInPolicy Policy=FaultInPolicy::FullTree)
Open the on-disk store from a directory.
FaultInPolicy
How to fault-in nodes if an upstream database is used.
@ SingleNode
Copy only the requested node.
void print(raw_ostream &OS) const
Expected< std::optional< ObjectHandle > > load(ObjectID Ref)
Expected< bool > isMaterialized(ObjectID Ref)
Check whether the object associated with Ref is stored in the CAS.
Error validate(bool Deep, HashingFuncT Hasher) const
Validate the OnDiskGraphDB.
bool containsObject(ObjectID Ref) const
Check whether the object associated with Ref is stored in the CAS.
unsigned getHardStorageLimitUtilization() const
Error store(ObjectID ID, ArrayRef< ObjectID > Refs, ArrayRef< char > Data)
Associate data & references with a particular object ID.
ArrayRef< uint8_t > getDigest(ObjectID Ref) const
std::optional< ObjectID > getExistingReference(ArrayRef< uint8_t > Digest)
Get an existing reference to the object Digest.
Expected< ObjectID > getReference(ArrayRef< uint8_t > Hash)
Form a reference for the provided hash.
function_ref< void( ArrayRef< ArrayRef< uint8_t > >, ArrayRef< char >, SmallVectorImpl< uint8_t > &)> HashingFuncT
Hashing function type for validation.
ArrayRef< char > getObjectData(ObjectHandle Node) const
An efficient, type-erasing, non-owning reference to a callable.
This class implements an extremely fast bulk output stream that can only output to a stream.
Definition raw_ostream.h:53
static unsigned getPageSizeEstimate()
Get the process's estimated page size.
Definition Process.h:62
LLVM_ABI Error keep(const Twine &Name)
static LLVM_ABI Expected< TempFile > create(const Twine &Model, unsigned Mode=all_read|all_write, OpenFlags ExtraFlags=OF_None)
This creates a temporary file with createUniqueFile and schedules it for deletion with sys::RemoveFil...
Represents the result of a call to sys::fs::status().
Definition FileSystem.h:222
This class represents a memory mapped file.
LLVM_ABI size_t size() const
Definition Path.cpp:1159
@ readonly
May only access map via const_data as read only.
@ readwrite
May access map via data and modify it. Written to path.
LLVM_ABI char * data() const
Definition Path.cpp:1164
#define UINT64_MAX
Definition DataTypes.h:77
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
constexpr char Align[]
Key for Kernel::Arg::Metadata::mAlign.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition CallingConv.h:24
constexpr StringLiteral CASFormatVersion
The version for all the ondisk database files.
Expected< std::optional< uint64_t > > getOverriddenMaxMappingSize()
Retrieves an overridden maximum mapping size for CAS files, if any, speicified by LLVM_CAS_MAX_MAPPIN...
Expected< size_t > preallocateFileTail(int FD, size_t CurrentSize, size_t NewSize)
Allocate space for the file FD on disk, if the filesystem supports it.
bool useSmallMappingSize(const Twine &Path)
Whether to use a small file mapping for ondisk databases created in Path.
uint64_t getDataSize(const FuncRecordTy *Record)
Return the coverage map data size for the function.
uint64_t read64le(const void *P)
Definition Endian.h:435
void write64le(void *P, uint64_t V)
Definition Endian.h:478
void write32le(void *P, uint32_t V)
Definition Endian.h:475
uint32_t read32le(const void *P)
Definition Endian.h:432
LLVM_ABI std::error_code closeFile(file_t &F)
Close the file object.
LLVM_ABI std::error_code rename(const Twine &from, const Twine &to)
Rename from to to.
std::error_code resize_file_before_mapping_readwrite(int FD, uint64_t Size)
Resize FD to Size before mapping mapped_file_region::readwrite.
Definition FileSystem.h:410
LLVM_ABI bool exists(const basic_file_status &status)
Does file exist?
Definition Path.cpp:1077
LLVM_ABI std::error_code createUniqueFile(const Twine &Model, int &ResultFD, SmallVectorImpl< char > &ResultPath, OpenFlags Flags=OF_None, unsigned Mode=all_read|all_write)
Create a uniquely named file.
Definition Path.cpp:871
LLVM_ABI std::error_code remove(const Twine &path, bool IgnoreNonExisting=true)
Remove path.
LLVM_ABI Expected< file_t > openNativeFileForRead(const Twine &Name, OpenFlags Flags=OF_None, SmallVectorImpl< char > *RealPath=nullptr)
Opens the file with the given name in a read-only mode, returning its open file descriptor.
LLVM_ABI std::error_code create_directories(const Twine &path, bool IgnoreExisting=true, perms Perms=owner_all|group_all)
Create all the non-existent directories in path.
Definition Path.cpp:967
LLVM_ABI file_t convertFDToNativeFile(int FD)
Converts from a Posix file descriptor number to a native file handle.
Definition FileSystem.h:991
LLVM_ABI std::error_code status(const Twine &path, file_status &result, bool follow=true)
Get file status as if by POSIX stat().
LLVM_ABI void append(SmallVectorImpl< char > &path, const Twine &a, const Twine &b="", const Twine &c="", const Twine &d="")
Append to path.
Definition Path.cpp:456
This is an optimization pass for GlobalISel generic memory operations.
@ Offset
Definition DWP.cpp:477
Error createFileError(const Twine &F, Error E)
Concatenate a source file path and/or name with an Error.
Definition Error.h:1399
auto size(R &&Range, std::enable_if_t< std::is_base_of< std::random_access_iterator_tag, typename std::iterator_traits< decltype(Range.begin())>::iterator_category >::value, void > *=nullptr)
Get the size of a range.
Definition STLExtras.h:1655
detail::scope_exit< std::decay_t< Callable > > make_scope_exit(Callable &&F)
Definition ScopeExit.h:59
ArrayRef< CharT > arrayRefFromStringRef(StringRef Input)
Construct a string ref from an array ref of unsigned chars.
std::error_code make_error_code(BitcodeError E)
@ Done
Definition Threading.h:60
bool isAligned(Align Lhs, uint64_t SizeInBytes)
Checks that SizeInBytes is a multiple of the alignment.
Definition Alignment.h:134
Error handleErrors(Error E, HandlerTs &&... Hs)
Pass the ErrorInfo(s) contained in E to their respective handlers.
Definition Error.h:967
FunctionAddr VTableAddr uintptr_t uintptr_t DataSize
Definition InstrProf.h:267
std::string utohexstr(uint64_t X, bool LowerCase=false, unsigned Width=0)
constexpr bool isPowerOf2_64(uint64_t Value)
Return true if the argument is a power of two > 0 (64 bit edition.)
Definition MathExtras.h:284
Error createStringError(std::error_code EC, char const *Fmt, const Ts &... Vals)
Create formatted StringError object.
Definition Error.h:1305
@ argument_out_of_domain
Definition Errc.h:37
@ illegal_byte_sequence
Definition Errc.h:52
@ invalid_argument
Definition Errc.h:56
std::optional< T > expectedToOptional(Expected< T > &&E)
Convert an Expected to an Optional without doing anything.
Definition Error.h:1094
decltype(auto) get(const PointerIntPair< PointerTy, IntBits, IntType, PtrTraits, Info > &Pair)
void sort(IteratorTy Start, IteratorTy End)
Definition STLExtras.h:1622
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition Debug.cpp:207
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
@ Ref
The access may reference the value stored in memory.
Definition ModRef.h:32
@ Other
Any other memory.
Definition ModRef.h:68
void cantFail(Error Err, const char *Msg=nullptr)
Report a fatal error if Err is a failure value.
Definition Error.h:769
FunctionAddr VTableAddr uintptr_t uintptr_t Data
Definition InstrProf.h:189
FunctionAddr VTableAddr Next
Definition InstrProf.h:141
auto count(R &&Range, const E &Element)
Wrapper function around std::count to count the number of times an element Element occurs in the give...
Definition STLExtras.h:1954
ArrayRef(const T &OneElt) -> ArrayRef< T >
std::string toString(const APInt &I, unsigned Radix, bool Signed, bool formatAsCLiteral=false, bool UpperCase=true, bool InsertSeparators=false)
OutputIt copy(R &&Range, OutputIt Out)
Definition STLExtras.h:1835
void toHex(ArrayRef< uint8_t > Input, bool LowerCase, SmallVectorImpl< char > &Output)
Convert buffer Input to its hexadecimal representation. The returned string is double the size of Inp...
OutputIt move(R &&Range, OutputIt Out)
Provide wrappers to std::move which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1867
LLVM_ABI Error errorCodeToError(std::error_code EC)
Helper for converting an std::error_code to a Error.
Definition Error.cpp:111
void consumeError(Error Err)
Consume a Error without doing anything.
Definition Error.h:1083
bool isAddrAligned(Align Lhs, const void *Addr)
Checks that Addr is a multiple of the alignment.
Definition Alignment.h:139
Implement std::hash so that hash_code can be used in STL containers.
Definition BitVector.h:867
Proxy for an on-disk index record.
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition Alignment.h:39
static constexpr Align Of()
Allow constructions of constexpr Align from types.
Definition Alignment.h:94
Const value proxy to access the records stored in TrieRawHashMap.
Value proxy to access the records stored in TrieRawHashMap.