LLVM 22.0.0git
OnDiskGraphDB.cpp
Go to the documentation of this file.
1//===----------------------------------------------------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9/// \file
10/// This file implements OnDiskGraphDB, an on-disk CAS nodes database,
11/// independent of a particular hashing algorithm. It only needs to be
12/// configured for the hash size and controls the schema of the storage.
13///
14/// OnDiskGraphDB defines:
15///
16/// - How the data is stored inside database, either as a standalone file, or
17/// allocated inside a datapool.
18/// - How references to other objects inside the same database is stored. They
19/// are stored as internal references, instead of full hash value to save
20/// space.
21/// - How to chain databases together and import objects from upstream
22/// databases.
23///
24/// Here's a top-level description of the current layout:
25///
26/// - db/index.<version>: a file for the "index" table, named by \a
27/// IndexTableName and managed by \a TrieRawHashMap. The contents are 8B
28/// that are accessed atomically, describing the object kind and where/how
29/// it's stored (including an optional file offset). See \a TrieRecord for
30/// more details.
31/// - db/data.<version>: a file for the "data" table, named by \a
32/// DataPoolTableName and managed by \a DataStore. New objects within
33/// TrieRecord::MaxEmbeddedSize are inserted here as \a
34/// TrieRecord::StorageKind::DataPool.
35/// - db/obj.<offset>.<version>: a file storing an object outside the main
36/// "data" table, named by its offset into the "index" table, with the
37/// format of \a TrieRecord::StorageKind::Standalone.
38/// - db/leaf.<offset>.<version>: a file storing a leaf node outside the
39/// main "data" table, named by its offset into the "index" table, with
40/// the format of \a TrieRecord::StorageKind::StandaloneLeaf.
41/// - db/leaf+0.<offset>.<version>: a file storing a null-terminated leaf object
42/// outside the main "data" table, named by its offset into the "index" table,
43/// with the format of \a TrieRecord::StorageKind::StandaloneLeaf0.
44//
45//===----------------------------------------------------------------------===//
46
48#include "OnDiskCommon.h"
49#include "llvm/ADT/DenseMap.h"
50#include "llvm/ADT/ScopeExit.h"
56#include "llvm/Support/Errc.h"
57#include "llvm/Support/Error.h"
61#include "llvm/Support/Path.h"
63#include <atomic>
64#include <mutex>
65#include <optional>
66
67#define DEBUG_TYPE "on-disk-cas"
68
69using namespace llvm;
70using namespace llvm::cas;
71using namespace llvm::cas::ondisk;
72
73static constexpr StringLiteral IndexTableName = "llvm.cas.index";
74static constexpr StringLiteral DataPoolTableName = "llvm.cas.data";
75
76static constexpr StringLiteral IndexFilePrefix = "index.";
77static constexpr StringLiteral DataPoolFilePrefix = "data.";
78
79static constexpr StringLiteral FilePrefixObject = "obj.";
80static constexpr StringLiteral FilePrefixLeaf = "leaf.";
81static constexpr StringLiteral FilePrefixLeaf0 = "leaf+0.";
82
84 if (!ID)
85 return ID.takeError();
86
88 "corrupt object '" + toHex(*ID) + "'");
89}
90
91namespace {
92
93/// Trie record data: 8 bytes, atomic<uint64_t>
94/// - 1-byte: StorageKind
95/// - 7-bytes: DataStoreOffset (offset into referenced file)
96class TrieRecord {
97public:
98 enum class StorageKind : uint8_t {
99 /// Unknown object.
100 Unknown = 0,
101
102 /// data.vX: main pool, full DataStore record.
103 DataPool = 1,
104
105 /// obj.<TrieRecordOffset>.vX: standalone, with a full DataStore record.
106 Standalone = 10,
107
108 /// leaf.<TrieRecordOffset>.vX: standalone, just the data. File contents
109 /// exactly the data content and file size matches the data size. No refs.
110 StandaloneLeaf = 11,
111
112 /// leaf+0.<TrieRecordOffset>.vX: standalone, just the data plus an
113 /// extra null character ('\0'). File size is 1 bigger than the data size.
114 /// No refs.
115 StandaloneLeaf0 = 12,
116 };
117
118 static StringRef getStandaloneFilePrefix(StorageKind SK) {
119 switch (SK) {
120 default:
121 llvm_unreachable("Expected standalone storage kind");
122 case TrieRecord::StorageKind::Standalone:
123 return FilePrefixObject;
124 case TrieRecord::StorageKind::StandaloneLeaf:
125 return FilePrefixLeaf;
126 case TrieRecord::StorageKind::StandaloneLeaf0:
127 return FilePrefixLeaf0;
128 }
129 }
130
131 enum Limits : int64_t {
132 /// Saves files bigger than 64KB standalone instead of embedding them.
133 MaxEmbeddedSize = 64LL * 1024LL - 1,
134 };
135
136 struct Data {
137 StorageKind SK = StorageKind::Unknown;
138 FileOffset Offset;
139 };
140
141 /// Pack StorageKind and Offset from Data into 8 byte TrieRecord.
142 static uint64_t pack(Data D) {
143 assert(D.Offset.get() < (int64_t)(1ULL << 56));
144 uint64_t Packed = uint64_t(D.SK) << 56 | D.Offset.get();
145 assert(D.SK != StorageKind::Unknown || Packed == 0);
146#ifndef NDEBUG
147 Data RoundTrip = unpack(Packed);
148 assert(D.SK == RoundTrip.SK);
149 assert(D.Offset.get() == RoundTrip.Offset.get());
150#endif
151 return Packed;
152 }
153
154 // Unpack TrieRecord into Data.
155 static Data unpack(uint64_t Packed) {
156 Data D;
157 if (!Packed)
158 return D;
159 D.SK = (StorageKind)(Packed >> 56);
160 D.Offset = FileOffset(Packed & (UINT64_MAX >> 8));
161 return D;
162 }
163
164 TrieRecord() : Storage(0) {}
165
166 Data load() const { return unpack(Storage); }
167 bool compare_exchange_strong(Data &Existing, Data New);
168
169private:
170 std::atomic<uint64_t> Storage;
171};
172
173/// DataStore record data: 4B + size? + refs? + data + 0
174/// - 4-bytes: Header
175/// - {0,4,8}-bytes: DataSize (may be packed in Header)
176/// - {0,4,8}-bytes: NumRefs (may be packed in Header)
177/// - NumRefs*{4,8}-bytes: Refs[] (end-ptr is 8-byte aligned)
178/// - <data>
179/// - 1-byte: 0-term
180struct DataRecordHandle {
181 /// NumRefs storage: 4B, 2B, 1B, or 0B (no refs). Or, 8B, for alignment
182 /// convenience to avoid computing padding later.
183 enum class NumRefsFlags : uint8_t {
184 Uses0B = 0U,
185 Uses1B = 1U,
186 Uses2B = 2U,
187 Uses4B = 3U,
188 Uses8B = 4U,
189 Max = Uses8B,
190 };
191
192 /// DataSize storage: 8B, 4B, 2B, or 1B.
193 enum class DataSizeFlags {
194 Uses1B = 0U,
195 Uses2B = 1U,
196 Uses4B = 2U,
197 Uses8B = 3U,
198 Max = Uses8B,
199 };
200
201 /// Kind of ref stored in Refs[]: InternalRef or InternalRef4B.
202 enum class RefKindFlags {
203 InternalRef = 0U,
204 InternalRef4B = 1U,
205 Max = InternalRef4B,
206 };
207
208 enum Counts : int {
209 NumRefsShift = 0,
210 NumRefsBits = 3,
211 DataSizeShift = NumRefsShift + NumRefsBits,
212 DataSizeBits = 2,
213 RefKindShift = DataSizeShift + DataSizeBits,
214 RefKindBits = 1,
215 };
216 static_assert(((UINT32_MAX << NumRefsBits) & (uint32_t)NumRefsFlags::Max) ==
217 0,
218 "Not enough bits");
219 static_assert(((UINT32_MAX << DataSizeBits) & (uint32_t)DataSizeFlags::Max) ==
220 0,
221 "Not enough bits");
222 static_assert(((UINT32_MAX << RefKindBits) & (uint32_t)RefKindFlags::Max) ==
223 0,
224 "Not enough bits");
225
226 /// Layout of the DataRecordHandle and how to decode it.
227 struct LayoutFlags {
228 NumRefsFlags NumRefs;
229 DataSizeFlags DataSize;
230 RefKindFlags RefKind;
231
232 static uint64_t pack(LayoutFlags LF) {
233 unsigned Packed = ((unsigned)LF.NumRefs << NumRefsShift) |
234 ((unsigned)LF.DataSize << DataSizeShift) |
235 ((unsigned)LF.RefKind << RefKindShift);
236#ifndef NDEBUG
237 LayoutFlags RoundTrip = unpack(Packed);
238 assert(LF.NumRefs == RoundTrip.NumRefs);
239 assert(LF.DataSize == RoundTrip.DataSize);
240 assert(LF.RefKind == RoundTrip.RefKind);
241#endif
242 return Packed;
243 }
244 static LayoutFlags unpack(uint64_t Storage) {
245 assert(Storage <= UINT8_MAX && "Expect storage to fit in a byte");
246 LayoutFlags LF;
247 LF.NumRefs =
248 (NumRefsFlags)((Storage >> NumRefsShift) & ((1U << NumRefsBits) - 1));
249 LF.DataSize = (DataSizeFlags)((Storage >> DataSizeShift) &
250 ((1U << DataSizeBits) - 1));
251 LF.RefKind =
252 (RefKindFlags)((Storage >> RefKindShift) & ((1U << RefKindBits) - 1));
253 return LF;
254 }
255 };
256
257 /// Header layout:
258 /// - 1-byte: LayoutFlags
259 /// - 1-byte: 1B size field
260 /// - {0,2}-bytes: 2B size field
261 struct Header {
262 using PackTy = uint32_t;
263 PackTy Packed;
264
265 static constexpr unsigned LayoutFlagsShift =
266 (sizeof(PackTy) - 1) * CHAR_BIT;
267 };
268
269 struct Input {
270 InternalRefArrayRef Refs;
271 ArrayRef<char> Data;
272 };
273
274 LayoutFlags getLayoutFlags() const {
275 return LayoutFlags::unpack(H->Packed >> Header::LayoutFlagsShift);
276 }
277
278 uint64_t getDataSize() const;
279 void skipDataSize(LayoutFlags LF, int64_t &RelOffset) const;
280 uint32_t getNumRefs() const;
281 void skipNumRefs(LayoutFlags LF, int64_t &RelOffset) const;
282 int64_t getRefsRelOffset() const;
283 int64_t getDataRelOffset() const;
284
285 static uint64_t getTotalSize(uint64_t DataRelOffset, uint64_t DataSize) {
286 return DataRelOffset + DataSize + 1;
287 }
288 uint64_t getTotalSize() const {
289 return getDataRelOffset() + getDataSize() + 1;
290 }
291
292 /// Describe the layout of data stored and how to decode from
293 /// DataRecordHandle.
294 struct Layout {
295 explicit Layout(const Input &I);
296
297 LayoutFlags Flags;
298 uint64_t DataSize = 0;
299 uint32_t NumRefs = 0;
300 int64_t RefsRelOffset = 0;
301 int64_t DataRelOffset = 0;
302 uint64_t getTotalSize() const {
303 return DataRecordHandle::getTotalSize(DataRelOffset, DataSize);
304 }
305 };
306
307 InternalRefArrayRef getRefs() const {
308 assert(H && "Expected valid handle");
309 auto *BeginByte = reinterpret_cast<const char *>(H) + getRefsRelOffset();
310 size_t Size = getNumRefs();
311 if (!Size)
312 return InternalRefArrayRef();
313 if (getLayoutFlags().RefKind == RefKindFlags::InternalRef4B)
314 return ArrayRef(reinterpret_cast<const InternalRef4B *>(BeginByte), Size);
315 return ArrayRef(reinterpret_cast<const InternalRef *>(BeginByte), Size);
316 }
317
318 ArrayRef<char> getData() const {
319 assert(H && "Expected valid handle");
320 return ArrayRef(reinterpret_cast<const char *>(H) + getDataRelOffset(),
321 getDataSize());
322 }
323
324 static DataRecordHandle create(function_ref<char *(size_t Size)> Alloc,
325 const Input &I);
326 static Expected<DataRecordHandle>
327 createWithError(function_ref<Expected<char *>(size_t Size)> Alloc,
328 const Input &I);
329 static DataRecordHandle construct(char *Mem, const Input &I);
330
331 static DataRecordHandle get(const char *Mem) {
332 return DataRecordHandle(
333 *reinterpret_cast<const DataRecordHandle::Header *>(Mem));
334 }
335 static Expected<DataRecordHandle>
336 getFromDataPool(const OnDiskDataAllocator &Pool, FileOffset Offset);
337
338 explicit operator bool() const { return H; }
339 const Header &getHeader() const { return *H; }
340
341 DataRecordHandle() = default;
342 explicit DataRecordHandle(const Header &H) : H(&H) {}
343
344private:
345 static DataRecordHandle constructImpl(char *Mem, const Input &I,
346 const Layout &L);
347 const Header *H = nullptr;
348};
349
350/// Proxy for any on-disk object or raw data.
351struct OnDiskContent {
352 std::optional<DataRecordHandle> Record;
353 std::optional<ArrayRef<char>> Bytes;
354};
355
356/// Data loaded inside the memory from standalone file.
357class StandaloneDataInMemory {
358public:
359 OnDiskContent getContent() const;
360
361 StandaloneDataInMemory(std::unique_ptr<sys::fs::mapped_file_region> Region,
362 TrieRecord::StorageKind SK)
363 : Region(std::move(Region)), SK(SK) {
364#ifndef NDEBUG
365 bool IsStandalone = false;
366 switch (SK) {
367 case TrieRecord::StorageKind::Standalone:
368 case TrieRecord::StorageKind::StandaloneLeaf:
369 case TrieRecord::StorageKind::StandaloneLeaf0:
370 IsStandalone = true;
371 break;
372 default:
373 break;
374 }
375 assert(IsStandalone);
376#endif
377 }
378
379private:
380 std::unique_ptr<sys::fs::mapped_file_region> Region;
381 TrieRecord::StorageKind SK;
382};
383
384/// Container to lookup loaded standalone objects.
385template <size_t NumShards> class StandaloneDataMap {
386 static_assert(isPowerOf2_64(NumShards), "Expected power of 2");
387
388public:
389 uintptr_t insert(ArrayRef<uint8_t> Hash, TrieRecord::StorageKind SK,
390 std::unique_ptr<sys::fs::mapped_file_region> Region);
391
392 const StandaloneDataInMemory *lookup(ArrayRef<uint8_t> Hash) const;
393 bool count(ArrayRef<uint8_t> Hash) const { return bool(lookup(Hash)); }
394
395private:
396 struct Shard {
397 /// Needs to store a std::unique_ptr for a stable address identity.
398 DenseMap<const uint8_t *, std::unique_ptr<StandaloneDataInMemory>> Map;
399 mutable std::mutex Mutex;
400 };
401 Shard &getShard(ArrayRef<uint8_t> Hash) {
402 return const_cast<Shard &>(
403 const_cast<const StandaloneDataMap *>(this)->getShard(Hash));
404 }
405 const Shard &getShard(ArrayRef<uint8_t> Hash) const {
406 static_assert(NumShards <= 256, "Expected only 8 bits of shard");
407 return Shards[Hash[0] % NumShards];
408 }
409
410 Shard Shards[NumShards];
411};
412
413using StandaloneDataMapTy = StandaloneDataMap<16>;
414
415/// A vector of internal node references.
416class InternalRefVector {
417public:
418 void push_back(InternalRef Ref) {
419 if (NeedsFull)
420 return FullRefs.push_back(Ref);
421 if (std::optional<InternalRef4B> Small = InternalRef4B::tryToShrink(Ref))
422 return SmallRefs.push_back(*Small);
423 NeedsFull = true;
424 assert(FullRefs.empty());
425 FullRefs.reserve(SmallRefs.size() + 1);
426 for (InternalRef4B Small : SmallRefs)
427 FullRefs.push_back(Small);
428 FullRefs.push_back(Ref);
429 SmallRefs.clear();
430 }
431
432 operator InternalRefArrayRef() const {
433 assert(SmallRefs.empty() || FullRefs.empty());
434 return NeedsFull ? InternalRefArrayRef(FullRefs)
435 : InternalRefArrayRef(SmallRefs);
436 }
437
438private:
439 bool NeedsFull = false;
442};
443
444} // namespace
445
446Expected<DataRecordHandle> DataRecordHandle::createWithError(
447 function_ref<Expected<char *>(size_t Size)> Alloc, const Input &I) {
448 Layout L(I);
449 if (Expected<char *> Mem = Alloc(L.getTotalSize()))
450 return constructImpl(*Mem, I, L);
451 else
452 return Mem.takeError();
453}
454
456 // Store the file offset as it is.
457 assert(!(Offset.get() & 0x1));
458 return ObjectHandle(Offset.get());
459}
460
462 // Store the pointer from memory with lowest bit set.
463 assert(!(Ptr & 0x1));
464 return ObjectHandle(Ptr | 1);
465}
466
467/// Proxy for an on-disk index record.
473
474template <size_t N>
475uintptr_t StandaloneDataMap<N>::insert(
476 ArrayRef<uint8_t> Hash, TrieRecord::StorageKind SK,
477 std::unique_ptr<sys::fs::mapped_file_region> Region) {
478 auto &S = getShard(Hash);
479 std::lock_guard<std::mutex> Lock(S.Mutex);
480 auto &V = S.Map[Hash.data()];
481 if (!V)
482 V = std::make_unique<StandaloneDataInMemory>(std::move(Region), SK);
483 return reinterpret_cast<uintptr_t>(V.get());
484}
485
486template <size_t N>
487const StandaloneDataInMemory *
488StandaloneDataMap<N>::lookup(ArrayRef<uint8_t> Hash) const {
489 auto &S = getShard(Hash);
490 std::lock_guard<std::mutex> Lock(S.Mutex);
491 auto I = S.Map.find(Hash.data());
492 if (I == S.Map.end())
493 return nullptr;
494 return &*I->second;
495}
496
497namespace {
498
499/// Copy of \a sys::fs::TempFile that skips RemoveOnSignal, which is too
500/// expensive to register/unregister at this rate.
501///
502/// FIXME: Add a TempFileManager that maintains a thread-safe list of open temp
503/// files and has a signal handler registerd that removes them all.
504class TempFile {
505 bool Done = false;
506 TempFile(StringRef Name, int FD) : TmpName(std::string(Name)), FD(FD) {}
507
508public:
509 /// This creates a temporary file with createUniqueFile.
510 static Expected<TempFile> create(const Twine &Model);
511 TempFile(TempFile &&Other) { *this = std::move(Other); }
512 TempFile &operator=(TempFile &&Other) {
513 TmpName = std::move(Other.TmpName);
514 FD = Other.FD;
515 Other.Done = true;
516 Other.FD = -1;
517 return *this;
518 }
519
520 // Name of the temporary file.
521 std::string TmpName;
522
523 // The open file descriptor.
524 int FD = -1;
525
526 // Keep this with the given name.
527 Error keep(const Twine &Name);
528 Error discard();
529
530 // This checks that keep or delete was called.
531 ~TempFile() { consumeError(discard()); }
532};
533
534class MappedTempFile {
535public:
536 char *data() const { return Map.data(); }
537 size_t size() const { return Map.size(); }
538
539 Error discard() {
540 assert(Map && "Map already destroyed");
541 Map.unmap();
542 return Temp.discard();
543 }
544
545 Error keep(const Twine &Name) {
546 assert(Map && "Map already destroyed");
547 Map.unmap();
548 return Temp.keep(Name);
549 }
550
551 MappedTempFile(TempFile Temp, sys::fs::mapped_file_region Map)
552 : Temp(std::move(Temp)), Map(std::move(Map)) {}
553
554private:
555 TempFile Temp;
556 sys::fs::mapped_file_region Map;
557};
558} // namespace
559
561 Done = true;
562 if (FD != -1) {
564 if (std::error_code EC = sys::fs::closeFile(File))
565 return errorCodeToError(EC);
566 }
567 FD = -1;
568
569 // Always try to close and remove.
570 std::error_code RemoveEC;
571 if (!TmpName.empty()) {
572 std::error_code EC = sys::fs::remove(TmpName);
573 if (EC)
574 return errorCodeToError(EC);
575 }
576 TmpName = "";
577
578 return Error::success();
579}
580
582 assert(!Done);
583 Done = true;
584 // Always try to close and rename.
585 std::error_code RenameEC = sys::fs::rename(TmpName, Name);
586
587 if (!RenameEC)
588 TmpName = "";
589
591 if (std::error_code EC = sys::fs::closeFile(File))
592 return errorCodeToError(EC);
593 FD = -1;
594
595 return errorCodeToError(RenameEC);
596}
597
599 int FD;
600 SmallString<128> ResultPath;
601 if (std::error_code EC = sys::fs::createUniqueFile(Model, FD, ResultPath))
602 return errorCodeToError(EC);
603
604 TempFile Ret(ResultPath, FD);
605 return std::move(Ret);
606}
607
608bool TrieRecord::compare_exchange_strong(Data &Existing, Data New) {
609 uint64_t ExistingPacked = pack(Existing);
610 uint64_t NewPacked = pack(New);
611 if (Storage.compare_exchange_strong(ExistingPacked, NewPacked))
612 return true;
613 Existing = unpack(ExistingPacked);
614 return false;
615}
616
618DataRecordHandle::getFromDataPool(const OnDiskDataAllocator &Pool,
620 auto HeaderData = Pool.get(Offset, sizeof(DataRecordHandle::Header));
621 if (!HeaderData)
622 return HeaderData.takeError();
623
624 auto Record = DataRecordHandle::get(HeaderData->data());
625 if (Record.getTotalSize() + Offset.get() > Pool.size())
626 return createStringError(
627 make_error_code(std::errc::illegal_byte_sequence),
628 "data record span passed the end of the data pool");
629
630 return Record;
631}
632
633DataRecordHandle DataRecordHandle::constructImpl(char *Mem, const Input &I,
634 const Layout &L) {
635 char *Next = Mem + sizeof(Header);
636
637 // Fill in Packed and set other data, then come back to construct the header.
638 Header::PackTy Packed = 0;
639 Packed |= LayoutFlags::pack(L.Flags) << Header::LayoutFlagsShift;
640
641 // Construct DataSize.
642 switch (L.Flags.DataSize) {
643 case DataSizeFlags::Uses1B:
644 assert(I.Data.size() <= UINT8_MAX);
645 Packed |= (Header::PackTy)I.Data.size()
646 << ((sizeof(Packed) - 2) * CHAR_BIT);
647 break;
648 case DataSizeFlags::Uses2B:
649 assert(I.Data.size() <= UINT16_MAX);
650 Packed |= (Header::PackTy)I.Data.size()
651 << ((sizeof(Packed) - 4) * CHAR_BIT);
652 break;
653 case DataSizeFlags::Uses4B:
654 support::endian::write32le(Next, I.Data.size());
655 Next += 4;
656 break;
657 case DataSizeFlags::Uses8B:
658 support::endian::write64le(Next, I.Data.size());
659 Next += 8;
660 break;
661 }
662
663 // Construct NumRefs.
664 //
665 // NOTE: May be writing NumRefs even if there are zero refs in order to fix
666 // alignment.
667 switch (L.Flags.NumRefs) {
668 case NumRefsFlags::Uses0B:
669 break;
670 case NumRefsFlags::Uses1B:
671 assert(I.Refs.size() <= UINT8_MAX);
672 Packed |= (Header::PackTy)I.Refs.size()
673 << ((sizeof(Packed) - 2) * CHAR_BIT);
674 break;
675 case NumRefsFlags::Uses2B:
676 assert(I.Refs.size() <= UINT16_MAX);
677 Packed |= (Header::PackTy)I.Refs.size()
678 << ((sizeof(Packed) - 4) * CHAR_BIT);
679 break;
680 case NumRefsFlags::Uses4B:
681 support::endian::write32le(Next, I.Refs.size());
682 Next += 4;
683 break;
684 case NumRefsFlags::Uses8B:
685 support::endian::write64le(Next, I.Refs.size());
686 Next += 8;
687 break;
688 }
689
690 // Construct Refs[].
691 if (!I.Refs.empty()) {
692 assert((L.Flags.RefKind == RefKindFlags::InternalRef4B) == I.Refs.is4B());
693 ArrayRef<uint8_t> RefsBuffer = I.Refs.getBuffer();
694 llvm::copy(RefsBuffer, Next);
695 Next += RefsBuffer.size();
696 }
697
698 // Construct Data and the trailing null.
700 llvm::copy(I.Data, Next);
701 Next[I.Data.size()] = 0;
702
703 // Construct the header itself and return.
704 Header *H = new (Mem) Header{Packed};
705 DataRecordHandle Record(*H);
706 assert(Record.getData() == I.Data);
707 assert(Record.getNumRefs() == I.Refs.size());
708 assert(Record.getRefs() == I.Refs);
709 assert(Record.getLayoutFlags().DataSize == L.Flags.DataSize);
710 assert(Record.getLayoutFlags().NumRefs == L.Flags.NumRefs);
711 assert(Record.getLayoutFlags().RefKind == L.Flags.RefKind);
712 return Record;
713}
714
715DataRecordHandle::Layout::Layout(const Input &I) {
716 // Start initial relative offsets right after the Header.
717 uint64_t RelOffset = sizeof(Header);
718
719 // Initialize the easy stuff.
720 DataSize = I.Data.size();
721 NumRefs = I.Refs.size();
722
723 // Check refs size.
724 Flags.RefKind =
725 I.Refs.is4B() ? RefKindFlags::InternalRef4B : RefKindFlags::InternalRef;
726
727 // Find the smallest slot available for DataSize.
728 bool Has1B = true;
729 bool Has2B = true;
730 if (DataSize <= UINT8_MAX && Has1B) {
731 Flags.DataSize = DataSizeFlags::Uses1B;
732 Has1B = false;
733 } else if (DataSize <= UINT16_MAX && Has2B) {
734 Flags.DataSize = DataSizeFlags::Uses2B;
735 Has2B = false;
736 } else if (DataSize <= UINT32_MAX) {
737 Flags.DataSize = DataSizeFlags::Uses4B;
738 RelOffset += 4;
739 } else {
740 Flags.DataSize = DataSizeFlags::Uses8B;
741 RelOffset += 8;
742 }
743
744 // Find the smallest slot available for NumRefs. Never sets NumRefs8B here.
745 if (!NumRefs) {
746 Flags.NumRefs = NumRefsFlags::Uses0B;
747 } else if (NumRefs <= UINT8_MAX && Has1B) {
748 Flags.NumRefs = NumRefsFlags::Uses1B;
749 Has1B = false;
750 } else if (NumRefs <= UINT16_MAX && Has2B) {
751 Flags.NumRefs = NumRefsFlags::Uses2B;
752 Has2B = false;
753 } else {
754 Flags.NumRefs = NumRefsFlags::Uses4B;
755 RelOffset += 4;
756 }
757
758 // Helper to "upgrade" either DataSize or NumRefs by 4B to avoid complicated
759 // padding rules when reading and writing. This also bumps RelOffset.
760 //
761 // The value for NumRefs is strictly limited to UINT32_MAX, but it can be
762 // stored as 8B. This means we can *always* find a size to grow.
763 //
764 // NOTE: Only call this once.
765 auto GrowSizeFieldsBy4B = [&]() {
766 assert(isAligned(Align(4), RelOffset));
767 RelOffset += 4;
768
769 assert(Flags.NumRefs != NumRefsFlags::Uses8B &&
770 "Expected to be able to grow NumRefs8B");
771
772 // First try to grow DataSize. NumRefs will not (yet) be 8B, and if
773 // DataSize is upgraded to 8B it'll already be aligned.
774 //
775 // Failing that, grow NumRefs.
776 if (Flags.DataSize < DataSizeFlags::Uses4B)
777 Flags.DataSize = DataSizeFlags::Uses4B; // DataSize: Packed => 4B.
778 else if (Flags.DataSize < DataSizeFlags::Uses8B)
779 Flags.DataSize = DataSizeFlags::Uses8B; // DataSize: 4B => 8B.
780 else if (Flags.NumRefs < NumRefsFlags::Uses4B)
781 Flags.NumRefs = NumRefsFlags::Uses4B; // NumRefs: Packed => 4B.
782 else
783 Flags.NumRefs = NumRefsFlags::Uses8B; // NumRefs: 4B => 8B.
784 };
785
786 assert(isAligned(Align(4), RelOffset));
787 if (Flags.RefKind == RefKindFlags::InternalRef) {
788 // List of 8B refs should be 8B-aligned. Grow one of the sizes to get this
789 // without padding.
790 if (!isAligned(Align(8), RelOffset))
791 GrowSizeFieldsBy4B();
792
793 assert(isAligned(Align(8), RelOffset));
794 RefsRelOffset = RelOffset;
795 RelOffset += 8 * NumRefs;
796 } else {
797 // The array of 4B refs doesn't need 8B alignment, but the data will need
798 // to be 8B-aligned. Detect this now, and, if necessary, shift everything
799 // by 4B by growing one of the sizes.
800 // If we remove the need for 8B-alignment for data there is <1% savings in
801 // disk storage for a clang build using MCCAS but the 8B-alignment may be
802 // useful in the future so keep it for now.
803 uint64_t RefListSize = 4 * NumRefs;
804 if (!isAligned(Align(8), RelOffset + RefListSize))
805 GrowSizeFieldsBy4B();
806 RefsRelOffset = RelOffset;
807 RelOffset += RefListSize;
808 }
809
810 assert(isAligned(Align(8), RelOffset));
811 DataRelOffset = RelOffset;
812}
813
814uint64_t DataRecordHandle::getDataSize() const {
815 int64_t RelOffset = sizeof(Header);
816 auto *DataSizePtr = reinterpret_cast<const char *>(H) + RelOffset;
817 switch (getLayoutFlags().DataSize) {
818 case DataSizeFlags::Uses1B:
819 return (H->Packed >> ((sizeof(Header::PackTy) - 2) * CHAR_BIT)) & UINT8_MAX;
820 case DataSizeFlags::Uses2B:
821 return (H->Packed >> ((sizeof(Header::PackTy) - 4) * CHAR_BIT)) &
822 UINT16_MAX;
823 case DataSizeFlags::Uses4B:
824 return support::endian::read32le(DataSizePtr);
825 case DataSizeFlags::Uses8B:
826 return support::endian::read64le(DataSizePtr);
827 }
828 llvm_unreachable("Unknown DataSizeFlags enum");
829}
830
831void DataRecordHandle::skipDataSize(LayoutFlags LF, int64_t &RelOffset) const {
832 if (LF.DataSize >= DataSizeFlags::Uses4B)
833 RelOffset += 4;
834 if (LF.DataSize >= DataSizeFlags::Uses8B)
835 RelOffset += 4;
836}
837
838uint32_t DataRecordHandle::getNumRefs() const {
839 LayoutFlags LF = getLayoutFlags();
840 int64_t RelOffset = sizeof(Header);
841 skipDataSize(LF, RelOffset);
842 auto *NumRefsPtr = reinterpret_cast<const char *>(H) + RelOffset;
843 switch (LF.NumRefs) {
844 case NumRefsFlags::Uses0B:
845 return 0;
846 case NumRefsFlags::Uses1B:
847 return (H->Packed >> ((sizeof(Header::PackTy) - 2) * CHAR_BIT)) & UINT8_MAX;
848 case NumRefsFlags::Uses2B:
849 return (H->Packed >> ((sizeof(Header::PackTy) - 4) * CHAR_BIT)) &
850 UINT16_MAX;
851 case NumRefsFlags::Uses4B:
852 return support::endian::read32le(NumRefsPtr);
853 case NumRefsFlags::Uses8B:
854 return support::endian::read64le(NumRefsPtr);
855 }
856 llvm_unreachable("Unknown NumRefsFlags enum");
857}
858
859void DataRecordHandle::skipNumRefs(LayoutFlags LF, int64_t &RelOffset) const {
860 if (LF.NumRefs >= NumRefsFlags::Uses4B)
861 RelOffset += 4;
862 if (LF.NumRefs >= NumRefsFlags::Uses8B)
863 RelOffset += 4;
864}
865
866int64_t DataRecordHandle::getRefsRelOffset() const {
867 LayoutFlags LF = getLayoutFlags();
868 int64_t RelOffset = sizeof(Header);
869 skipDataSize(LF, RelOffset);
870 skipNumRefs(LF, RelOffset);
871 return RelOffset;
872}
873
874int64_t DataRecordHandle::getDataRelOffset() const {
875 LayoutFlags LF = getLayoutFlags();
876 int64_t RelOffset = sizeof(Header);
877 skipDataSize(LF, RelOffset);
878 skipNumRefs(LF, RelOffset);
879 uint32_t RefSize = LF.RefKind == RefKindFlags::InternalRef4B ? 4 : 8;
880 RelOffset += RefSize * getNumRefs();
881 return RelOffset;
882}
883
885 if (UpstreamDB) {
886 if (auto E = UpstreamDB->validate(Deep, Hasher))
887 return E;
888 }
889 return Index.validate([&](FileOffset Offset,
891 -> Error {
892 auto formatError = [&](Twine Msg) {
893 return createStringError(
895 "bad record at 0x" +
896 utohexstr((unsigned)Offset.get(), /*LowerCase=*/true) + ": " +
897 Msg.str());
898 };
899
900 if (Record.Data.size() != sizeof(TrieRecord))
901 return formatError("wrong data record size");
902 if (!isAligned(Align::Of<TrieRecord>(), Record.Data.size()))
903 return formatError("wrong data record alignment");
904
905 auto *R = reinterpret_cast<const TrieRecord *>(Record.Data.data());
906 TrieRecord::Data D = R->load();
907 std::unique_ptr<MemoryBuffer> FileBuffer;
908 if ((uint8_t)D.SK != (uint8_t)TrieRecord::StorageKind::Unknown &&
909 (uint8_t)D.SK != (uint8_t)TrieRecord::StorageKind::DataPool &&
910 (uint8_t)D.SK != (uint8_t)TrieRecord::StorageKind::Standalone &&
911 (uint8_t)D.SK != (uint8_t)TrieRecord::StorageKind::StandaloneLeaf &&
912 (uint8_t)D.SK != (uint8_t)TrieRecord::StorageKind::StandaloneLeaf0)
913 return formatError("invalid record kind value");
914
916 auto I = getIndexProxyFromRef(Ref);
917 if (!I)
918 return I.takeError();
919
920 switch (D.SK) {
921 case TrieRecord::StorageKind::Unknown:
922 // This could be an abandoned entry due to a termination before updating
923 // the record. It can be reused by later insertion so just skip this entry
924 // for now.
925 return Error::success();
926 case TrieRecord::StorageKind::DataPool:
927 // Check offset is a postive value, and large enough to hold the
928 // header for the data record.
929 if (D.Offset.get() <= 0 ||
930 D.Offset.get() + sizeof(DataRecordHandle::Header) >= DataPool.size())
931 return formatError("datapool record out of bound");
932 break;
933 case TrieRecord::StorageKind::Standalone:
934 case TrieRecord::StorageKind::StandaloneLeaf:
935 case TrieRecord::StorageKind::StandaloneLeaf0:
936 SmallString<256> Path;
937 getStandalonePath(TrieRecord::getStandaloneFilePrefix(D.SK), *I, Path);
938 // If need to validate the content of the file later, just load the
939 // buffer here. Otherwise, just check the existance of the file.
940 if (Deep) {
941 auto File = MemoryBuffer::getFile(Path, /*IsText=*/false,
942 /*RequiresNullTerminator=*/false);
943 if (!File || !*File)
944 return formatError("record file \'" + Path + "\' does not exist");
945
946 FileBuffer = std::move(*File);
947 } else if (!llvm::sys::fs::exists(Path))
948 return formatError("record file \'" + Path + "\' does not exist");
949 }
950
951 if (!Deep)
952 return Error::success();
953
954 auto dataError = [&](Twine Msg) {
956 "bad data for digest \'" + toHex(I->Hash) +
957 "\': " + Msg.str());
958 };
960 ArrayRef<char> StoredData;
961
962 switch (D.SK) {
963 case TrieRecord::StorageKind::Unknown:
964 llvm_unreachable("already handled");
965 case TrieRecord::StorageKind::DataPool: {
966 auto DataRecord = DataRecordHandle::getFromDataPool(DataPool, D.Offset);
967 if (!DataRecord)
968 return dataError(toString(DataRecord.takeError()));
969
970 for (auto InternRef : DataRecord->getRefs()) {
971 auto Index = getIndexProxyFromRef(InternRef);
972 if (!Index)
973 return Index.takeError();
974 Refs.push_back(Index->Hash);
975 }
976 StoredData = DataRecord->getData();
977 break;
978 }
979 case TrieRecord::StorageKind::Standalone: {
980 if (FileBuffer->getBufferSize() < sizeof(DataRecordHandle::Header))
981 return dataError("data record is not big enough to read the header");
982 auto DataRecord = DataRecordHandle::get(FileBuffer->getBufferStart());
983 if (DataRecord.getTotalSize() < FileBuffer->getBufferSize())
984 return dataError(
985 "data record span passed the end of the standalone file");
986 for (auto InternRef : DataRecord.getRefs()) {
987 auto Index = getIndexProxyFromRef(InternRef);
988 if (!Index)
989 return Index.takeError();
990 Refs.push_back(Index->Hash);
991 }
992 StoredData = DataRecord.getData();
993 break;
994 }
995 case TrieRecord::StorageKind::StandaloneLeaf:
996 case TrieRecord::StorageKind::StandaloneLeaf0: {
997 StoredData = arrayRefFromStringRef<char>(FileBuffer->getBuffer());
998 if (D.SK == TrieRecord::StorageKind::StandaloneLeaf0) {
999 if (!FileBuffer->getBuffer().ends_with('\0'))
1000 return dataError("standalone file is not zero terminated");
1001 StoredData = StoredData.drop_back(1);
1002 }
1003 break;
1004 }
1005 }
1006
1007 SmallVector<uint8_t> ComputedHash;
1008 Hasher(Refs, StoredData, ComputedHash);
1009 if (I->Hash != ArrayRef(ComputedHash))
1010 return dataError("hash mismatch, got \'" + toHex(ComputedHash) +
1011 "\' instead");
1012
1013 return Error::success();
1014 });
1015}
1016
1018 OS << "on-disk-root-path: " << RootPath << "\n";
1019
1020 struct PoolInfo {
1022 };
1024
1025 OS << "\n";
1026 OS << "index:\n";
1027 Index.print(OS, [&](ArrayRef<char> Data) {
1028 assert(Data.size() == sizeof(TrieRecord));
1030 auto *R = reinterpret_cast<const TrieRecord *>(Data.data());
1031 TrieRecord::Data D = R->load();
1032 OS << " SK=";
1033 switch (D.SK) {
1034 case TrieRecord::StorageKind::Unknown:
1035 OS << "unknown ";
1036 break;
1037 case TrieRecord::StorageKind::DataPool:
1038 OS << "datapool ";
1039 Pool.push_back({D.Offset.get()});
1040 break;
1041 case TrieRecord::StorageKind::Standalone:
1042 OS << "standalone-data ";
1043 break;
1044 case TrieRecord::StorageKind::StandaloneLeaf:
1045 OS << "standalone-leaf ";
1046 break;
1047 case TrieRecord::StorageKind::StandaloneLeaf0:
1048 OS << "standalone-leaf+0";
1049 break;
1050 }
1051 OS << " Offset=" << (void *)D.Offset.get();
1052 });
1053 if (Pool.empty())
1054 return;
1055
1056 OS << "\n";
1057 OS << "pool:\n";
1058 llvm::sort(
1059 Pool, [](PoolInfo LHS, PoolInfo RHS) { return LHS.Offset < RHS.Offset; });
1060 for (PoolInfo PI : Pool) {
1061 OS << "- addr=" << (void *)PI.Offset << " ";
1062 auto D = DataRecordHandle::getFromDataPool(DataPool, FileOffset(PI.Offset));
1063 if (!D) {
1064 OS << "error: " << toString(D.takeError());
1065 return;
1066 }
1067
1068 OS << "record refs=" << D->getNumRefs() << " data=" << D->getDataSize()
1069 << " size=" << D->getTotalSize()
1070 << " end=" << (void *)(PI.Offset + D->getTotalSize()) << "\n";
1071 }
1072}
1073
1075OnDiskGraphDB::indexHash(ArrayRef<uint8_t> Hash) {
1076 auto P = Index.insertLazy(
1077 Hash, [](FileOffset TentativeOffset,
1078 OnDiskTrieRawHashMap::ValueProxy TentativeValue) {
1079 assert(TentativeValue.Data.size() == sizeof(TrieRecord));
1080 assert(
1081 isAddrAligned(Align::Of<TrieRecord>(), TentativeValue.Data.data()));
1082 new (TentativeValue.Data.data()) TrieRecord();
1083 });
1084 if (LLVM_UNLIKELY(!P))
1085 return P.takeError();
1086
1087 assert(*P && "Expected insertion");
1088 return getIndexProxyFromPointer(*P);
1089}
1090
1091OnDiskGraphDB::IndexProxy OnDiskGraphDB::getIndexProxyFromPointer(
1093 assert(P);
1094 assert(P.getOffset());
1095 return IndexProxy{P.getOffset(), P->Hash,
1096 *const_cast<TrieRecord *>(
1097 reinterpret_cast<const TrieRecord *>(P->Data.data()))};
1098}
1099
1101 auto I = indexHash(Hash);
1102 if (LLVM_UNLIKELY(!I))
1103 return I.takeError();
1104 return getExternalReference(*I);
1105}
1106
1107ObjectID OnDiskGraphDB::getExternalReference(const IndexProxy &I) {
1108 return getExternalReference(makeInternalRef(I.Offset));
1109}
1110
1111std::optional<ObjectID>
1113 bool CheckUpstream) {
1114 auto tryUpstream =
1115 [&](std::optional<IndexProxy> I) -> std::optional<ObjectID> {
1116 if (!CheckUpstream || !UpstreamDB)
1117 return std::nullopt;
1118 std::optional<ObjectID> UpstreamID =
1119 UpstreamDB->getExistingReference(Digest);
1120 if (LLVM_UNLIKELY(!UpstreamID))
1121 return std::nullopt;
1122 auto Ref = expectedToOptional(indexHash(Digest));
1123 if (!Ref)
1124 return std::nullopt;
1125 if (!I)
1126 I.emplace(*Ref);
1127 return getExternalReference(*I);
1128 };
1129
1130 OnDiskTrieRawHashMap::ConstOnDiskPtr P = Index.find(Digest);
1131 if (!P)
1132 return tryUpstream(std::nullopt);
1133 IndexProxy I = getIndexProxyFromPointer(P);
1134 TrieRecord::Data Obj = I.Ref.load();
1135 if (Obj.SK == TrieRecord::StorageKind::Unknown)
1136 return tryUpstream(I);
1137 return getExternalReference(makeInternalRef(I.Offset));
1138}
1139
1141OnDiskGraphDB::getIndexProxyFromRef(InternalRef Ref) const {
1142 auto P = Index.recoverFromFileOffset(Ref.getFileOffset());
1143 if (LLVM_UNLIKELY(!P))
1144 return P.takeError();
1145 return getIndexProxyFromPointer(*P);
1146}
1147
1149 auto I = getIndexProxyFromRef(Ref);
1150 if (!I)
1151 return I.takeError();
1152 return I->Hash;
1153}
1154
1155ArrayRef<uint8_t> OnDiskGraphDB::getDigest(const IndexProxy &I) const {
1156 return I.Hash;
1157}
1158
1159static OnDiskContent getContentFromHandle(const OnDiskDataAllocator &DataPool,
1160 ObjectHandle OH) {
1161 // Decode ObjectHandle to locate the stored content.
1162 uint64_t Data = OH.getOpaqueData();
1163 if (Data & 1) {
1164 const auto *SDIM =
1165 reinterpret_cast<const StandaloneDataInMemory *>(Data & (-1ULL << 1));
1166 return SDIM->getContent();
1167 }
1168
1169 auto DataHandle =
1170 cantFail(DataRecordHandle::getFromDataPool(DataPool, FileOffset(Data)));
1171 assert(DataHandle.getData().end()[0] == 0 && "Null termination");
1172 return OnDiskContent{DataHandle, std::nullopt};
1173}
1174
1176 OnDiskContent Content = getContentFromHandle(DataPool, Node);
1177 if (Content.Bytes)
1178 return *Content.Bytes;
1179 assert(Content.Record && "Expected record or bytes");
1180 return Content.Record->getData();
1181}
1182
1183InternalRefArrayRef OnDiskGraphDB::getInternalRefs(ObjectHandle Node) const {
1184 if (std::optional<DataRecordHandle> Record =
1185 getContentFromHandle(DataPool, Node).Record)
1186 return Record->getRefs();
1187 return std::nullopt;
1188}
1189
1192 InternalRef Ref = getInternalRef(ExternalRef);
1193 auto I = getIndexProxyFromRef(Ref);
1194 if (!I)
1195 return I.takeError();
1196 TrieRecord::Data Object = I->Ref.load();
1197
1198 if (Object.SK == TrieRecord::StorageKind::Unknown)
1199 return faultInFromUpstream(ExternalRef);
1200
1201 if (Object.SK == TrieRecord::StorageKind::DataPool)
1202 return ObjectHandle::fromFileOffset(Object.Offset);
1203
1204 // Only TrieRecord::StorageKind::Standalone (and variants) need to be
1205 // explicitly loaded.
1206 //
1207 // There's corruption if standalone objects have offsets, or if we get here
1208 // for something that isn't standalone.
1209 if (Object.Offset)
1211 switch (Object.SK) {
1212 case TrieRecord::StorageKind::Unknown:
1213 case TrieRecord::StorageKind::DataPool:
1214 llvm_unreachable("unexpected storage kind");
1215 case TrieRecord::StorageKind::Standalone:
1216 case TrieRecord::StorageKind::StandaloneLeaf0:
1217 case TrieRecord::StorageKind::StandaloneLeaf:
1218 break;
1219 }
1220
1221 // Load it from disk.
1222 //
1223 // Note: Creation logic guarantees that data that needs null-termination is
1224 // suitably 0-padded. Requiring null-termination here would be too expensive
1225 // for extremely large objects that happen to be page-aligned.
1226 SmallString<256> Path;
1227 getStandalonePath(TrieRecord::getStandaloneFilePrefix(Object.SK), *I, Path);
1228
1229 auto File = sys::fs::openNativeFileForRead(Path);
1230 if (!File)
1231 return createFileError(Path, File.takeError());
1232
1233 auto CloseFile = make_scope_exit([&]() { sys::fs::closeFile(*File); });
1234
1236 if (std::error_code EC = sys::fs::status(*File, Status))
1238
1239 std::error_code EC;
1240 auto Region = std::make_unique<sys::fs::mapped_file_region>(
1241 *File, sys::fs::mapped_file_region::readonly, Status.getSize(), 0, EC);
1242 if (EC)
1244
1246 static_cast<StandaloneDataMapTy *>(StandaloneData)
1247 ->insert(I->Hash, Object.SK, std::move(Region)));
1248}
1249
1251 auto Presence = getObjectPresence(Ref, /*CheckUpstream=*/true);
1252 if (!Presence)
1253 return Presence.takeError();
1254
1255 switch (*Presence) {
1256 case ObjectPresence::Missing:
1257 return false;
1258 case ObjectPresence::InPrimaryDB:
1259 return true;
1260 case ObjectPresence::OnlyInUpstreamDB:
1261 if (auto FaultInResult = faultInFromUpstream(Ref); !FaultInResult)
1262 return FaultInResult.takeError();
1263 return true;
1264 }
1265 llvm_unreachable("Unknown ObjectPresence enum");
1266}
1267
1269OnDiskGraphDB::getObjectPresence(ObjectID ExternalRef,
1270 bool CheckUpstream) const {
1271 InternalRef Ref = getInternalRef(ExternalRef);
1272 auto I = getIndexProxyFromRef(Ref);
1273 if (!I)
1274 return I.takeError();
1275
1276 TrieRecord::Data Object = I->Ref.load();
1277 if (Object.SK != TrieRecord::StorageKind::Unknown)
1278 return ObjectPresence::InPrimaryDB;
1279
1280 if (!CheckUpstream || !UpstreamDB)
1281 return ObjectPresence::Missing;
1282
1283 std::optional<ObjectID> UpstreamID =
1284 UpstreamDB->getExistingReference(getDigest(*I));
1285 return UpstreamID.has_value() ? ObjectPresence::OnlyInUpstreamDB
1286 : ObjectPresence::Missing;
1287}
1288
1289InternalRef OnDiskGraphDB::makeInternalRef(FileOffset IndexOffset) {
1290 return InternalRef::getFromOffset(IndexOffset);
1291}
1292
1293void OnDiskGraphDB::getStandalonePath(StringRef Prefix, const IndexProxy &I,
1294 SmallVectorImpl<char> &Path) const {
1295 Path.assign(RootPath.begin(), RootPath.end());
1296 sys::path::append(Path,
1297 Prefix + Twine(I.Offset.get()) + "." + CASFormatVersion);
1298}
1299
1300OnDiskContent StandaloneDataInMemory::getContent() const {
1301 bool Leaf0 = false;
1302 bool Leaf = false;
1303 switch (SK) {
1304 default:
1305 llvm_unreachable("Storage kind must be standalone");
1306 case TrieRecord::StorageKind::Standalone:
1307 break;
1308 case TrieRecord::StorageKind::StandaloneLeaf0:
1309 Leaf = Leaf0 = true;
1310 break;
1311 case TrieRecord::StorageKind::StandaloneLeaf:
1312 Leaf = true;
1313 break;
1314 }
1315
1316 if (Leaf) {
1317 StringRef Data(Region->data(), Region->size());
1318 assert(Data.drop_back(Leaf0).end()[0] == 0 &&
1319 "Standalone node data missing null termination");
1320 return OnDiskContent{std::nullopt,
1321 arrayRefFromStringRef<char>(Data.drop_back(Leaf0))};
1322 }
1323
1324 DataRecordHandle Record = DataRecordHandle::get(Region->data());
1325 assert(Record.getData().end()[0] == 0 &&
1326 "Standalone object record missing null termination for data");
1327 return OnDiskContent{Record, std::nullopt};
1328}
1329
1331 uint64_t Size) {
1332 assert(Size && "Unexpected request for an empty temp file");
1333 Expected<TempFile> File = TempFile::create(FinalPath + ".%%%%%%");
1334 if (!File)
1335 return File.takeError();
1336
1337 if (Error E = preallocateFileTail(File->FD, 0, Size).takeError())
1338 return createFileError(File->TmpName, std::move(E));
1339
1340 if (auto EC = sys::fs::resize_file_before_mapping_readwrite(File->FD, Size))
1341 return createFileError(File->TmpName, EC);
1342
1343 std::error_code EC;
1346 0, EC);
1347 if (EC)
1348 return createFileError(File->TmpName, EC);
1349 return MappedTempFile(std::move(*File), std::move(Map));
1350}
1351
1352static size_t getPageSize() {
1354 return PageSize;
1355}
1356
1357Error OnDiskGraphDB::createStandaloneLeaf(IndexProxy &I, ArrayRef<char> Data) {
1358 assert(Data.size() > TrieRecord::MaxEmbeddedSize &&
1359 "Expected a bigger file for external content...");
1360
1361 bool Leaf0 = isAligned(Align(getPageSize()), Data.size());
1362 TrieRecord::StorageKind SK = Leaf0 ? TrieRecord::StorageKind::StandaloneLeaf0
1363 : TrieRecord::StorageKind::StandaloneLeaf;
1364
1365 SmallString<256> Path;
1366 int64_t FileSize = Data.size() + Leaf0;
1367 getStandalonePath(TrieRecord::getStandaloneFilePrefix(SK), I, Path);
1368
1369 // Write the file. Don't reuse this mapped_file_region, which is read/write.
1370 // Let load() pull up one that's read-only.
1371 Expected<MappedTempFile> File = createTempFile(Path, FileSize);
1372 if (!File)
1373 return File.takeError();
1374 assert(File->size() == (uint64_t)FileSize);
1375 llvm::copy(Data, File->data());
1376 if (Leaf0)
1377 File->data()[Data.size()] = 0;
1378 assert(File->data()[Data.size()] == 0);
1379 if (Error E = File->keep(Path))
1380 return E;
1381
1382 // Store the object reference.
1383 TrieRecord::Data Existing;
1384 {
1385 TrieRecord::Data Leaf{SK, FileOffset()};
1386 if (I.Ref.compare_exchange_strong(Existing, Leaf)) {
1387 recordStandaloneSizeIncrease(FileSize);
1388 return Error::success();
1389 }
1390 }
1391
1392 // If there was a race, confirm that the new value has valid storage.
1393 if (Existing.SK == TrieRecord::StorageKind::Unknown)
1394 return createCorruptObjectError(getDigest(I));
1395
1396 return Error::success();
1397}
1398
1401 auto I = getIndexProxyFromRef(getInternalRef(ID));
1402 if (LLVM_UNLIKELY(!I))
1403 return I.takeError();
1404
1405 // Early return in case the node exists.
1406 {
1407 TrieRecord::Data Existing = I->Ref.load();
1408 if (Existing.SK != TrieRecord::StorageKind::Unknown)
1409 return Error::success();
1410 }
1411
1412 // Big leaf nodes.
1413 if (Refs.empty() && Data.size() > TrieRecord::MaxEmbeddedSize)
1414 return createStandaloneLeaf(*I, Data);
1415
1416 // TODO: Check whether it's worth checking the index for an already existing
1417 // object (like storeTreeImpl() does) before building up the
1418 // InternalRefVector.
1419 InternalRefVector InternalRefs;
1420 for (ObjectID Ref : Refs)
1421 InternalRefs.push_back(getInternalRef(Ref));
1422
1423 // Create the object.
1424
1425 DataRecordHandle::Input Input{InternalRefs, Data};
1426
1427 // Compute the storage kind, allocate it, and create the record.
1428 TrieRecord::StorageKind SK = TrieRecord::StorageKind::Unknown;
1429 FileOffset PoolOffset;
1430 SmallString<256> Path;
1431 std::optional<MappedTempFile> File;
1432 std::optional<uint64_t> FileSize;
1433 auto AllocStandaloneFile = [&](size_t Size) -> Expected<char *> {
1434 getStandalonePath(TrieRecord::getStandaloneFilePrefix(
1435 TrieRecord::StorageKind::Standalone),
1436 *I, Path);
1437 if (Error E = createTempFile(Path, Size).moveInto(File))
1438 return std::move(E);
1439 assert(File->size() == Size);
1440 FileSize = Size;
1441 SK = TrieRecord::StorageKind::Standalone;
1442 return File->data();
1443 };
1444 auto Alloc = [&](size_t Size) -> Expected<char *> {
1445 if (Size <= TrieRecord::MaxEmbeddedSize) {
1446 SK = TrieRecord::StorageKind::DataPool;
1447 auto P = DataPool.allocate(Size);
1448 if (LLVM_UNLIKELY(!P)) {
1449 char *NewAlloc = nullptr;
1450 auto NewE = handleErrors(
1451 P.takeError(), [&](std::unique_ptr<StringError> E) -> Error {
1452 if (E->convertToErrorCode() == std::errc::not_enough_memory)
1453 return AllocStandaloneFile(Size).moveInto(NewAlloc);
1454 return Error(std::move(E));
1455 });
1456 if (!NewE)
1457 return NewAlloc;
1458 return std::move(NewE);
1459 }
1460 PoolOffset = P->getOffset();
1461 LLVM_DEBUG({
1462 dbgs() << "pool-alloc addr=" << (void *)PoolOffset.get()
1463 << " size=" << Size
1464 << " end=" << (void *)(PoolOffset.get() + Size) << "\n";
1465 });
1466 return (*P)->data();
1467 }
1468 return AllocStandaloneFile(Size);
1469 };
1470
1471 DataRecordHandle Record;
1472 if (Error E =
1473 DataRecordHandle::createWithError(Alloc, Input).moveInto(Record))
1474 return E;
1475 assert(Record.getData().end()[0] == 0 && "Expected null-termination");
1476 assert(Record.getData() == Input.Data && "Expected initialization");
1477 assert(SK != TrieRecord::StorageKind::Unknown);
1478 assert(bool(File) != bool(PoolOffset) &&
1479 "Expected either a mapped file or a pooled offset");
1480
1481 // Check for a race before calling MappedTempFile::keep().
1482 //
1483 // Then decide what to do with the file. Better to discard than overwrite if
1484 // another thread/process has already added this.
1485 TrieRecord::Data Existing = I->Ref.load();
1486 {
1487 TrieRecord::Data NewObject{SK, PoolOffset};
1488 if (File) {
1489 if (Existing.SK == TrieRecord::StorageKind::Unknown) {
1490 // Keep the file!
1491 if (Error E = File->keep(Path))
1492 return E;
1493 } else {
1494 File.reset();
1495 }
1496 }
1497
1498 // If we didn't already see a racing/existing write, then try storing the
1499 // new object. If that races, confirm that the new value has valid storage.
1500 //
1501 // TODO: Find a way to reuse the storage from the new-but-abandoned record
1502 // handle.
1503 if (Existing.SK == TrieRecord::StorageKind::Unknown) {
1504 if (I->Ref.compare_exchange_strong(Existing, NewObject)) {
1505 if (FileSize)
1506 recordStandaloneSizeIncrease(*FileSize);
1507 return Error::success();
1508 }
1509 }
1510 }
1511
1512 if (Existing.SK == TrieRecord::StorageKind::Unknown)
1514
1515 // Load existing object.
1516 return Error::success();
1517}
1518
1519void OnDiskGraphDB::recordStandaloneSizeIncrease(size_t SizeIncrease) {
1520 standaloneStorageSize().fetch_add(SizeIncrease, std::memory_order_relaxed);
1521}
1522
1523std::atomic<uint64_t> &OnDiskGraphDB::standaloneStorageSize() const {
1524 MutableArrayRef<uint8_t> UserHeader = DataPool.getUserHeader();
1525 assert(UserHeader.size() == sizeof(std::atomic<uint64_t>));
1526 assert(isAddrAligned(Align(8), UserHeader.data()));
1527 return *reinterpret_cast<std::atomic<uint64_t> *>(UserHeader.data());
1528}
1529
1530uint64_t OnDiskGraphDB::getStandaloneStorageSize() const {
1531 return standaloneStorageSize().load(std::memory_order_relaxed);
1532}
1533
1535 return Index.size() + DataPool.size() + getStandaloneStorageSize();
1536}
1537
1539 unsigned IndexPercent = Index.size() * 100ULL / Index.capacity();
1540 unsigned DataPercent = DataPool.size() * 100ULL / DataPool.capacity();
1541 return std::max(IndexPercent, DataPercent);
1542}
1543
1546 unsigned HashByteSize, OnDiskGraphDB *UpstreamDB,
1547 FaultInPolicy Policy) {
1548 if (std::error_code EC = sys::fs::create_directories(AbsPath))
1549 return createFileError(AbsPath, EC);
1550
1551 constexpr uint64_t MB = 1024ull * 1024ull;
1552 constexpr uint64_t GB = 1024ull * 1024ull * 1024ull;
1553
1554 uint64_t MaxIndexSize = 12 * GB;
1555 uint64_t MaxDataPoolSize = 24 * GB;
1556
1557 if (useSmallMappingSize(AbsPath)) {
1558 MaxIndexSize = 1 * GB;
1559 MaxDataPoolSize = 2 * GB;
1560 }
1561
1562 auto CustomSize = getOverriddenMaxMappingSize();
1563 if (!CustomSize)
1564 return CustomSize.takeError();
1565 if (*CustomSize)
1566 MaxIndexSize = MaxDataPoolSize = **CustomSize;
1567
1568 SmallString<256> IndexPath(AbsPath);
1570 std::optional<OnDiskTrieRawHashMap> Index;
1572 IndexPath, IndexTableName + "[" + HashName + "]",
1573 HashByteSize * CHAR_BIT,
1574 /*DataSize=*/sizeof(TrieRecord), MaxIndexSize,
1575 /*MinFileSize=*/MB)
1576 .moveInto(Index))
1577 return std::move(E);
1578
1579 uint32_t UserHeaderSize = sizeof(std::atomic<uint64_t>);
1580
1581 SmallString<256> DataPoolPath(AbsPath);
1583 std::optional<OnDiskDataAllocator> DataPool;
1584 StringRef PolicyName =
1585 Policy == FaultInPolicy::SingleNode ? "single" : "full";
1587 DataPoolPath,
1588 DataPoolTableName + "[" + HashName + "]" + PolicyName,
1589 MaxDataPoolSize, /*MinFileSize=*/MB, UserHeaderSize,
1590 [](void *UserHeaderPtr) {
1591 new (UserHeaderPtr) std::atomic<uint64_t>(0);
1592 })
1593 .moveInto(DataPool))
1594 return std::move(E);
1595 if (DataPool->getUserHeader().size() != UserHeaderSize)
1597 "unexpected user header in '" + DataPoolPath +
1598 "'");
1599
1600 return std::unique_ptr<OnDiskGraphDB>(new OnDiskGraphDB(
1601 AbsPath, std::move(*Index), std::move(*DataPool), UpstreamDB, Policy));
1602}
1603
1604OnDiskGraphDB::OnDiskGraphDB(StringRef RootPath, OnDiskTrieRawHashMap Index,
1605 OnDiskDataAllocator DataPool,
1606 OnDiskGraphDB *UpstreamDB, FaultInPolicy Policy)
1607 : Index(std::move(Index)), DataPool(std::move(DataPool)),
1608 RootPath(RootPath.str()), UpstreamDB(UpstreamDB), FIPolicy(Policy) {
1609 /// Lifetime for "big" objects not in DataPool.
1610 ///
1611 /// NOTE: Could use ThreadSafeTrieRawHashMap here. For now, doing something
1612 /// simpler on the assumption there won't be much contention since most data
1613 /// is not big. If there is contention, and we've already fixed ObjectProxy
1614 /// object handles to be cheap enough to use consistently, the fix might be
1615 /// to use better use of them rather than optimizing this map.
1616 ///
1617 /// FIXME: Figure out the right number of shards, if any.
1618 StandaloneData = new StandaloneDataMapTy();
1619}
1620
1622 delete static_cast<StandaloneDataMapTy *>(StandaloneData);
1623}
1624
1625Error OnDiskGraphDB::importFullTree(ObjectID PrimaryID,
1626 ObjectHandle UpstreamNode) {
1627 // Copies the full CAS tree from upstream. Uses depth-first copying to protect
1628 // against the process dying during importing and leaving the database with an
1629 // incomplete tree. Note that if the upstream has missing nodes then the tree
1630 // will be copied with missing nodes as well, it won't be considered an error.
1631 struct UpstreamCursor {
1633 size_t RefsCount;
1636 };
1637 /// Keeps track of the state of visitation for current node and all of its
1638 /// parents.
1640 /// Keeps track of the currently visited nodes as they are imported into
1641 /// primary database, from current node and its parents. When a node is
1642 /// entered for visitation it appends its own ID, then appends referenced IDs
1643 /// as they get imported. When a node is fully imported it removes the
1644 /// referenced IDs from the bottom of the stack which leaves its own ID at the
1645 /// bottom, adding to the list of referenced IDs for the parent node.
1646 SmallVector<ObjectID, 128> PrimaryNodesStack;
1647
1648 auto enqueueNode = [&](ObjectID PrimaryID, std::optional<ObjectHandle> Node) {
1649 PrimaryNodesStack.push_back(PrimaryID);
1650 if (!Node)
1651 return;
1652 auto Refs = UpstreamDB->getObjectRefs(*Node);
1653 CursorStack.push_back(
1654 {*Node, (size_t)llvm::size(Refs), Refs.begin(), Refs.end()});
1655 };
1656
1657 enqueueNode(PrimaryID, UpstreamNode);
1658
1659 while (!CursorStack.empty()) {
1660 UpstreamCursor &Cur = CursorStack.back();
1661 if (Cur.RefI == Cur.RefE) {
1662 // Copy the node data into the primary store.
1663 // FIXME: Use hard-link or cloning if the file-system supports it and data
1664 // is stored into a separate file.
1665
1666 // The bottom of \p PrimaryNodesStack contains the primary ID for the
1667 // current node plus the list of imported referenced IDs.
1668 assert(PrimaryNodesStack.size() >= Cur.RefsCount + 1);
1669 ObjectID PrimaryID = *(PrimaryNodesStack.end() - Cur.RefsCount - 1);
1670 auto PrimaryRefs = ArrayRef(PrimaryNodesStack)
1671 .slice(PrimaryNodesStack.size() - Cur.RefsCount);
1672 auto Data = UpstreamDB->getObjectData(Cur.Node);
1673 if (Error E = store(PrimaryID, PrimaryRefs, Data))
1674 return E;
1675 // Remove the current node and its IDs from the stack.
1676 PrimaryNodesStack.truncate(PrimaryNodesStack.size() - Cur.RefsCount);
1677 CursorStack.pop_back();
1678 continue;
1679 }
1680
1681 ObjectID UpstreamID = *(Cur.RefI++);
1682 auto PrimaryID = getReference(UpstreamDB->getDigest(UpstreamID));
1683 if (LLVM_UNLIKELY(!PrimaryID))
1684 return PrimaryID.takeError();
1685 if (containsObject(*PrimaryID, /*CheckUpstream=*/false)) {
1686 // This \p ObjectID already exists in the primary. Either it was imported
1687 // via \p importFullTree or the client created it, in which case the
1688 // client takes responsibility for how it was formed.
1689 enqueueNode(*PrimaryID, std::nullopt);
1690 continue;
1691 }
1692 Expected<std::optional<ObjectHandle>> UpstreamNode =
1693 UpstreamDB->load(UpstreamID);
1694 if (!UpstreamNode)
1695 return UpstreamNode.takeError();
1696 enqueueNode(*PrimaryID, *UpstreamNode);
1697 }
1698
1699 assert(PrimaryNodesStack.size() == 1);
1700 assert(PrimaryNodesStack.front() == PrimaryID);
1701 return Error::success();
1702}
1703
1704Error OnDiskGraphDB::importSingleNode(ObjectID PrimaryID,
1705 ObjectHandle UpstreamNode) {
1706 // Copies only a single node, it doesn't copy the referenced nodes.
1707
1708 // Copy the node data into the primary store.
1709 // FIXME: Use hard-link or cloning if the file-system supports it and data is
1710 // stored into a separate file.
1711 auto Data = UpstreamDB->getObjectData(UpstreamNode);
1712 auto UpstreamRefs = UpstreamDB->getObjectRefs(UpstreamNode);
1714 Refs.reserve(llvm::size(UpstreamRefs));
1715 for (ObjectID UpstreamRef : UpstreamRefs) {
1716 auto Ref = getReference(UpstreamDB->getDigest(UpstreamRef));
1717 if (LLVM_UNLIKELY(!Ref))
1718 return Ref.takeError();
1719 Refs.push_back(*Ref);
1720 }
1721
1722 return store(PrimaryID, Refs, Data);
1723}
1724
1725Expected<std::optional<ObjectHandle>>
1726OnDiskGraphDB::faultInFromUpstream(ObjectID PrimaryID) {
1727 if (!UpstreamDB)
1728 return std::nullopt;
1729
1730 auto UpstreamID = UpstreamDB->getReference(getDigest(PrimaryID));
1731 if (LLVM_UNLIKELY(!UpstreamID))
1732 return UpstreamID.takeError();
1733
1734 Expected<std::optional<ObjectHandle>> UpstreamNode =
1735 UpstreamDB->load(*UpstreamID);
1736 if (!UpstreamNode)
1737 return UpstreamNode.takeError();
1738 if (!*UpstreamNode)
1739 return std::nullopt;
1740
1741 if (Error E = FIPolicy == FaultInPolicy::SingleNode
1742 ? importSingleNode(PrimaryID, **UpstreamNode)
1743 : importFullTree(PrimaryID, **UpstreamNode))
1744 return std::move(E);
1745 return load(PrimaryID);
1746}
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
AMDGPU Mark last scratch load
AMDGPU Prepare AGPR Alloc
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
#define LLVM_UNLIKELY(EXPR)
Definition Compiler.h:336
This file defines the DenseMap class.
static cl::opt< int > PageSize("imp-null-check-page-size", cl::desc("The page size of the target in bytes"), cl::init(4096), cl::Hidden)
static bool lookup(const GsymReader &GR, DataExtractor &Data, uint64_t &Offset, uint64_t BaseAddr, uint64_t Addr, SourceLocations &SrcLocs, llvm::Error &Err)
A Lookup helper functions.
#define I(x, y, z)
Definition MD5.cpp:57
#define H(x, y, z)
Definition MD5.cpp:56
This file declares interface for OnDiskDataAllocator, a file backed data pool can be used to allocate...
static constexpr StringLiteral FilePrefixLeaf0
static constexpr StringLiteral DataPoolTableName
static constexpr StringLiteral FilePrefixObject
static constexpr StringLiteral FilePrefixLeaf
static constexpr StringLiteral IndexFilePrefix
static OnDiskContent getContentFromHandle(const OnDiskDataAllocator &DataPool, ObjectHandle OH)
static constexpr StringLiteral DataPoolFilePrefix
static Error createCorruptObjectError(Expected< ArrayRef< uint8_t > > ID)
static size_t getPageSize()
static Expected< MappedTempFile > createTempFile(StringRef FinalPath, uint64_t Size)
static constexpr StringLiteral IndexTableName
This declares OnDiskGraphDB, an ondisk CAS database with a fixed length hash.
This file declares interface for OnDiskTrieRawHashMap, a thread-safe and (mostly) lock-free hash map ...
#define P(N)
Provides a library for accessing information about this process and other processes on the operating ...
This file defines the make_scope_exit function, which executes user-defined cleanup logic at scope ex...
static Split data
This file contains some functions that are useful when dealing with strings.
#define LLVM_DEBUG(...)
Definition Debug.h:114
The Input class is used to parse a yaml document into in-memory structs and vectors.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition ArrayRef.h:40
size_t size() const
size - Get the array size.
Definition ArrayRef.h:142
ArrayRef< T > drop_back(size_t N=1) const
Drop the last N elements of the array.
Definition ArrayRef.h:201
bool empty() const
empty - Check if the array is empty.
Definition ArrayRef.h:137
const T * data() const
Definition ArrayRef.h:139
Lightweight error class with error context and mandatory checking.
Definition Error.h:159
static ErrorSuccess success()
Create a success value.
Definition Error.h:336
Tagged union holding either a T or a Error.
Definition Error.h:485
Error takeError()
Take ownership of the stored error.
Definition Error.h:612
static ErrorOr< std::unique_ptr< MemoryBuffer > > getFile(const Twine &Filename, bool IsText=false, bool RequiresNullTerminator=true, bool IsVolatile=false, std::optional< Align > Alignment=std::nullopt)
Open the specified file as a MemoryBuffer, returning a new MemoryBuffer if successful,...
MutableArrayRef - Represent a mutable reference to an array (0 or more elements consecutively in memo...
Definition ArrayRef.h:298
SmallString - A SmallString is just a SmallVector with methods and accessors that make it work better...
Definition SmallString.h:26
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
void reserve(size_type N)
void truncate(size_type N)
Like resize, but requires that N is less than size().
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
A wrapper around a string literal that serves as a proxy for constructing global tables of StringRefs...
Definition StringRef.h:854
StringRef - Represent a constant reference to a string, i.e.
Definition StringRef.h:55
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition Twine.h:82
FileOffset is a wrapper around uint64_t to represent the offset of data from the beginning of the fil...
Definition FileOffset.h:24
uint64_t get() const
Definition FileOffset.h:26
Handle to a loaded object in a ObjectStore instance.
LLVM_ABI_FOR_TEST Expected< ArrayRef< char > > get(FileOffset Offset, size_t Size) const
Get the data of Size stored at the given Offset.
static LLVM_ABI_FOR_TEST Expected< OnDiskDataAllocator > create(const Twine &Path, const Twine &TableName, uint64_t MaxFileSize, std::optional< uint64_t > NewFileInitialSize, uint32_t UserHeaderSize=0, function_ref< void(void *)> UserHeaderInit=nullptr)
LLVM_ABI_FOR_TEST size_t size() const
OnDiskTrieRawHashMap is a persistent trie data structure used as hash maps.
static LLVM_ABI_FOR_TEST Expected< OnDiskTrieRawHashMap > create(const Twine &Path, const Twine &TrieName, size_t NumHashBits, uint64_t DataSize, uint64_t MaxFileSize, std::optional< uint64_t > NewFileInitialSize, std::optional< size_t > NewTableNumRootBits=std::nullopt, std::optional< size_t > NewTableNumSubtrieBits=std::nullopt)
Gets or creates a file at Path with a hash-mapped trie named TrieName.
static std::optional< InternalRef4B > tryToShrink(InternalRef Ref)
Shrink to 4B reference.
Array of internal node references.
Standard 8 byte reference inside OnDiskGraphDB.
static InternalRef getFromOffset(FileOffset Offset)
Handle for a loaded node object.
static ObjectHandle fromFileOffset(FileOffset Offset)
static ObjectHandle fromMemory(uintptr_t Ptr)
Reference to a node.
On-disk CAS nodes database, independent of a particular hashing algorithm.
FaultInPolicy
How to fault-in nodes if an upstream database is used.
@ SingleNode
Copy only the requested node.
void print(raw_ostream &OS) const
LLVM_ABI_FOR_TEST Expected< std::optional< ObjectHandle > > load(ObjectID Ref)
Expected< bool > isMaterialized(ObjectID Ref)
Check whether the object associated with Ref is stored in the CAS.
Error validate(bool Deep, HashingFuncT Hasher) const
Validate the OnDiskGraphDB.
object_refs_range getObjectRefs(ObjectHandle Node) const
unsigned getHardStorageLimitUtilization() const
LLVM_ABI_FOR_TEST Error store(ObjectID ID, ArrayRef< ObjectID > Refs, ArrayRef< char > Data)
Associate data & references with a particular object ID.
ArrayRef< uint8_t > getDigest(ObjectID Ref) const
static LLVM_ABI_FOR_TEST Expected< std::unique_ptr< OnDiskGraphDB > > open(StringRef Path, StringRef HashName, unsigned HashByteSize, OnDiskGraphDB *UpstreamDB=nullptr, FaultInPolicy Policy=FaultInPolicy::FullTree)
Open the on-disk store from a directory.
LLVM_ABI_FOR_TEST size_t getStorageSize() const
bool containsObject(ObjectID Ref, bool CheckUpstream=true) const
Check whether the object associated with Ref is stored in the CAS.
LLVM_ABI_FOR_TEST Expected< ObjectID > getReference(ArrayRef< uint8_t > Hash)
Form a reference for the provided hash.
function_ref< void( ArrayRef< ArrayRef< uint8_t > >, ArrayRef< char >, SmallVectorImpl< uint8_t > &)> HashingFuncT
Hashing function type for validation.
LLVM_ABI_FOR_TEST ArrayRef< char > getObjectData(ObjectHandle Node) const
LLVM_ABI_FOR_TEST std::optional< ObjectID > getExistingReference(ArrayRef< uint8_t > Digest, bool CheckUpstream=true)
Get an existing reference to the object Digest.
An efficient, type-erasing, non-owning reference to a callable.
This class implements an extremely fast bulk output stream that can only output to a stream.
Definition raw_ostream.h:53
static unsigned getPageSizeEstimate()
Get the process's estimated page size.
Definition Process.h:62
LLVM_ABI Error keep(const Twine &Name)
static LLVM_ABI Expected< TempFile > create(const Twine &Model, unsigned Mode=all_read|all_write, OpenFlags ExtraFlags=OF_None)
This creates a temporary file with createUniqueFile and schedules it for deletion with sys::RemoveFil...
Represents the result of a call to sys::fs::status().
Definition FileSystem.h:222
This class represents a memory mapped file.
LLVM_ABI size_t size() const
Definition Path.cpp:1178
@ readonly
May only access map via const_data as read only.
@ readwrite
May access map via data and modify it. Written to path.
LLVM_ABI char * data() const
Definition Path.cpp:1183
#define UINT64_MAX
Definition DataTypes.h:77
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
constexpr char Align[]
Key for Kernel::Arg::Metadata::mAlign.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition CallingConv.h:24
constexpr StringLiteral CASFormatVersion
The version for all the ondisk database files.
Expected< std::optional< uint64_t > > getOverriddenMaxMappingSize()
Retrieves an overridden maximum mapping size for CAS files, if any, speicified by LLVM_CAS_MAX_MAPPIN...
Expected< size_t > preallocateFileTail(int FD, size_t CurrentSize, size_t NewSize)
Allocate space for the file FD on disk, if the filesystem supports it.
bool useSmallMappingSize(const Twine &Path)
Whether to use a small file mapping for ondisk databases created in Path.
uint64_t getDataSize(const FuncRecordTy *Record)
Return the coverage map data size for the function.
uint64_t read64le(const void *P)
Definition Endian.h:435
void write64le(void *P, uint64_t V)
Definition Endian.h:478
void write32le(void *P, uint32_t V)
Definition Endian.h:475
uint32_t read32le(const void *P)
Definition Endian.h:432
LLVM_ABI std::error_code closeFile(file_t &F)
Close the file object.
LLVM_ABI std::error_code rename(const Twine &from, const Twine &to)
Rename from to to.
std::error_code resize_file_before_mapping_readwrite(int FD, uint64_t Size)
Resize FD to Size before mapping mapped_file_region::readwrite.
Definition FileSystem.h:410
LLVM_ABI bool exists(const basic_file_status &status)
Does file exist?
Definition Path.cpp:1086
LLVM_ABI std::error_code createUniqueFile(const Twine &Model, int &ResultFD, SmallVectorImpl< char > &ResultPath, OpenFlags Flags=OF_None, unsigned Mode=all_read|all_write)
Create a uniquely named file.
Definition Path.cpp:874
LLVM_ABI std::error_code remove(const Twine &path, bool IgnoreNonExisting=true)
Remove path.
LLVM_ABI Expected< file_t > openNativeFileForRead(const Twine &Name, OpenFlags Flags=OF_None, SmallVectorImpl< char > *RealPath=nullptr)
Opens the file with the given name in a read-only mode, returning its open file descriptor.
LLVM_ABI std::error_code create_directories(const Twine &path, bool IgnoreExisting=true, perms Perms=owner_all|group_all)
Create all the non-existent directories in path.
Definition Path.cpp:972
LLVM_ABI file_t convertFDToNativeFile(int FD)
Converts from a Posix file descriptor number to a native file handle.
Definition FileSystem.h:991
LLVM_ABI std::error_code status(const Twine &path, file_status &result, bool follow=true)
Get file status as if by POSIX stat().
LLVM_ABI void append(SmallVectorImpl< char > &path, const Twine &a, const Twine &b="", const Twine &c="", const Twine &d="")
Append to path.
Definition Path.cpp:457
This is an optimization pass for GlobalISel generic memory operations.
@ Offset
Definition DWP.cpp:532
Error createFileError(const Twine &F, Error E)
Concatenate a source file path and/or name with an Error.
Definition Error.h:1399
auto size(R &&Range, std::enable_if_t< std::is_base_of< std::random_access_iterator_tag, typename std::iterator_traits< decltype(Range.begin())>::iterator_category >::value, void > *=nullptr)
Get the size of a range.
Definition STLExtras.h:1667
detail::scope_exit< std::decay_t< Callable > > make_scope_exit(Callable &&F)
Definition ScopeExit.h:59
ArrayRef< CharT > arrayRefFromStringRef(StringRef Input)
Construct a string ref from an array ref of unsigned chars.
std::error_code make_error_code(BitcodeError E)
@ Done
Definition Threading.h:60
bool isAligned(Align Lhs, uint64_t SizeInBytes)
Checks that SizeInBytes is a multiple of the alignment.
Definition Alignment.h:134
Error handleErrors(Error E, HandlerTs &&... Hs)
Pass the ErrorInfo(s) contained in E to their respective handlers.
Definition Error.h:967
FunctionAddr VTableAddr uintptr_t uintptr_t DataSize
Definition InstrProf.h:267
std::string utohexstr(uint64_t X, bool LowerCase=false, unsigned Width=0)
constexpr bool isPowerOf2_64(uint64_t Value)
Return true if the argument is a power of two > 0 (64 bit edition.)
Definition MathExtras.h:284
Error createStringError(std::error_code EC, char const *Fmt, const Ts &... Vals)
Create formatted StringError object.
Definition Error.h:1305
@ argument_out_of_domain
Definition Errc.h:37
@ illegal_byte_sequence
Definition Errc.h:52
@ invalid_argument
Definition Errc.h:56
std::optional< T > expectedToOptional(Expected< T > &&E)
Convert an Expected to an Optional without doing anything.
Definition Error.h:1094
decltype(auto) get(const PointerIntPair< PointerTy, IntBits, IntType, PtrTraits, Info > &Pair)
void sort(IteratorTy Start, IteratorTy End)
Definition STLExtras.h:1634
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition Debug.cpp:207
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
@ Ref
The access may reference the value stored in memory.
Definition ModRef.h:32
@ Other
Any other memory.
Definition ModRef.h:68
void cantFail(Error Err, const char *Msg=nullptr)
Report a fatal error if Err is a failure value.
Definition Error.h:769
FunctionAddr VTableAddr uintptr_t uintptr_t Data
Definition InstrProf.h:189
FunctionAddr VTableAddr Next
Definition InstrProf.h:141
auto count(R &&Range, const E &Element)
Wrapper function around std::count to count the number of times an element Element occurs in the give...
Definition STLExtras.h:1966
ArrayRef(const T &OneElt) -> ArrayRef< T >
std::string toString(const APInt &I, unsigned Radix, bool Signed, bool formatAsCLiteral=false, bool UpperCase=true, bool InsertSeparators=false)
OutputIt copy(R &&Range, OutputIt Out)
Definition STLExtras.h:1847
void toHex(ArrayRef< uint8_t > Input, bool LowerCase, SmallVectorImpl< char > &Output)
Convert buffer Input to its hexadecimal representation. The returned string is double the size of Inp...
OutputIt move(R &&Range, OutputIt Out)
Provide wrappers to std::move which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1879
LLVM_ABI Error errorCodeToError(std::error_code EC)
Helper for converting an std::error_code to a Error.
Definition Error.cpp:111
void consumeError(Error Err)
Consume a Error without doing anything.
Definition Error.h:1083
bool isAddrAligned(Align Lhs, const void *Addr)
Checks that Addr is a multiple of the alignment.
Definition Alignment.h:139
Implement std::hash so that hash_code can be used in STL containers.
Definition BitVector.h:870
Proxy for an on-disk index record.
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition Alignment.h:39
static constexpr Align Of()
Allow constructions of constexpr Align from types.
Definition Alignment.h:94
Const value proxy to access the records stored in TrieRawHashMap.
Value proxy to access the records stored in TrieRawHashMap.