LLVM 22.0.0git
OnDiskGraphDB.cpp
Go to the documentation of this file.
1//===----------------------------------------------------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9/// \file
10/// This file implements OnDiskGraphDB, an on-disk CAS nodes database,
11/// independent of a particular hashing algorithm. It only needs to be
12/// configured for the hash size and controls the schema of the storage.
13///
14/// OnDiskGraphDB defines:
15///
16/// - How the data is stored inside database, either as a standalone file, or
17/// allocated inside a datapool.
18/// - How references to other objects inside the same database is stored. They
19/// are stored as internal references, instead of full hash value to save
20/// space.
21/// - How to chain databases together and import objects from upstream
22/// databases.
23///
24/// Here's a top-level description of the current layout:
25///
26/// - db/index.<version>: a file for the "index" table, named by \a
27/// IndexTableName and managed by \a TrieRawHashMap. The contents are 8B
28/// that are accessed atomically, describing the object kind and where/how
29/// it's stored (including an optional file offset). See \a TrieRecord for
30/// more details.
31/// - db/data.<version>: a file for the "data" table, named by \a
32/// DataPoolTableName and managed by \a DataStore. New objects within
33/// TrieRecord::MaxEmbeddedSize are inserted here as \a
34/// TrieRecord::StorageKind::DataPool.
35/// - db/obj.<offset>.<version>: a file storing an object outside the main
36/// "data" table, named by its offset into the "index" table, with the
37/// format of \a TrieRecord::StorageKind::Standalone.
38/// - db/leaf.<offset>.<version>: a file storing a leaf node outside the
39/// main "data" table, named by its offset into the "index" table, with
40/// the format of \a TrieRecord::StorageKind::StandaloneLeaf.
41/// - db/leaf+0.<offset>.<version>: a file storing a null-terminated leaf object
42/// outside the main "data" table, named by its offset into the "index" table,
43/// with the format of \a TrieRecord::StorageKind::StandaloneLeaf0.
44//
45//===----------------------------------------------------------------------===//
46
48#include "OnDiskCommon.h"
49#include "llvm/ADT/DenseMap.h"
50#include "llvm/ADT/ScopeExit.h"
56#include "llvm/Support/Errc.h"
57#include "llvm/Support/Error.h"
62#include "llvm/Support/Path.h"
64#include <atomic>
65#include <mutex>
66#include <optional>
67
68#define DEBUG_TYPE "on-disk-cas"
69
70using namespace llvm;
71using namespace llvm::cas;
72using namespace llvm::cas::ondisk;
73
74static constexpr StringLiteral IndexTableName = "llvm.cas.index";
75static constexpr StringLiteral DataPoolTableName = "llvm.cas.data";
76
77static constexpr StringLiteral IndexFilePrefix = "index.";
78static constexpr StringLiteral DataPoolFilePrefix = "data.";
79
80static constexpr StringLiteral FilePrefixObject = "obj.";
81static constexpr StringLiteral FilePrefixLeaf = "leaf.";
82static constexpr StringLiteral FilePrefixLeaf0 = "leaf+0.";
83
85 if (!ID)
86 return ID.takeError();
87
89 "corrupt object '" + toHex(*ID) + "'");
90}
91
92namespace {
93
94/// Trie record data: 8 bytes, atomic<uint64_t>
95/// - 1-byte: StorageKind
96/// - 7-bytes: DataStoreOffset (offset into referenced file)
97class TrieRecord {
98public:
99 enum class StorageKind : uint8_t {
100 /// Unknown object.
101 Unknown = 0,
102
103 /// data.vX: main pool, full DataStore record.
104 DataPool = 1,
105
106 /// obj.<TrieRecordOffset>.vX: standalone, with a full DataStore record.
107 Standalone = 10,
108
109 /// leaf.<TrieRecordOffset>.vX: standalone, just the data. File contents
110 /// exactly the data content and file size matches the data size. No refs.
111 StandaloneLeaf = 11,
112
113 /// leaf+0.<TrieRecordOffset>.vX: standalone, just the data plus an
114 /// extra null character ('\0'). File size is 1 bigger than the data size.
115 /// No refs.
116 StandaloneLeaf0 = 12,
117 };
118
119 static StringRef getStandaloneFilePrefix(StorageKind SK) {
120 switch (SK) {
121 default:
122 llvm_unreachable("Expected standalone storage kind");
123 case TrieRecord::StorageKind::Standalone:
124 return FilePrefixObject;
125 case TrieRecord::StorageKind::StandaloneLeaf:
126 return FilePrefixLeaf;
127 case TrieRecord::StorageKind::StandaloneLeaf0:
128 return FilePrefixLeaf0;
129 }
130 }
131
132 enum Limits : int64_t {
133 /// Saves files bigger than 64KB standalone instead of embedding them.
134 MaxEmbeddedSize = 64LL * 1024LL - 1,
135 };
136
137 struct Data {
138 StorageKind SK = StorageKind::Unknown;
139 FileOffset Offset;
140 };
141
142 /// Pack StorageKind and Offset from Data into 8 byte TrieRecord.
143 static uint64_t pack(Data D) {
144 assert(D.Offset.get() < (int64_t)(1ULL << 56));
145 uint64_t Packed = uint64_t(D.SK) << 56 | D.Offset.get();
146 assert(D.SK != StorageKind::Unknown || Packed == 0);
147#ifndef NDEBUG
148 Data RoundTrip = unpack(Packed);
149 assert(D.SK == RoundTrip.SK);
150 assert(D.Offset.get() == RoundTrip.Offset.get());
151#endif
152 return Packed;
153 }
154
155 // Unpack TrieRecord into Data.
156 static Data unpack(uint64_t Packed) {
157 Data D;
158 if (!Packed)
159 return D;
160 D.SK = (StorageKind)(Packed >> 56);
161 D.Offset = FileOffset(Packed & (UINT64_MAX >> 8));
162 return D;
163 }
164
165 TrieRecord() : Storage(0) {}
166
167 Data load() const { return unpack(Storage); }
168 bool compare_exchange_strong(Data &Existing, Data New);
169
170private:
171 std::atomic<uint64_t> Storage;
172};
173
174/// DataStore record data: 4B + size? + refs? + data + 0
175/// - 4-bytes: Header
176/// - {0,4,8}-bytes: DataSize (may be packed in Header)
177/// - {0,4,8}-bytes: NumRefs (may be packed in Header)
178/// - NumRefs*{4,8}-bytes: Refs[] (end-ptr is 8-byte aligned)
179/// - <data>
180/// - 1-byte: 0-term
181struct DataRecordHandle {
182 /// NumRefs storage: 4B, 2B, 1B, or 0B (no refs). Or, 8B, for alignment
183 /// convenience to avoid computing padding later.
184 enum class NumRefsFlags : uint8_t {
185 Uses0B = 0U,
186 Uses1B = 1U,
187 Uses2B = 2U,
188 Uses4B = 3U,
189 Uses8B = 4U,
190 Max = Uses8B,
191 };
192
193 /// DataSize storage: 8B, 4B, 2B, or 1B.
194 enum class DataSizeFlags {
195 Uses1B = 0U,
196 Uses2B = 1U,
197 Uses4B = 2U,
198 Uses8B = 3U,
199 Max = Uses8B,
200 };
201
202 /// Kind of ref stored in Refs[]: InternalRef or InternalRef4B.
203 enum class RefKindFlags {
204 InternalRef = 0U,
205 InternalRef4B = 1U,
206 Max = InternalRef4B,
207 };
208
209 enum Counts : int {
210 NumRefsShift = 0,
211 NumRefsBits = 3,
212 DataSizeShift = NumRefsShift + NumRefsBits,
213 DataSizeBits = 2,
214 RefKindShift = DataSizeShift + DataSizeBits,
215 RefKindBits = 1,
216 };
217 static_assert(((UINT32_MAX << NumRefsBits) & (uint32_t)NumRefsFlags::Max) ==
218 0,
219 "Not enough bits");
220 static_assert(((UINT32_MAX << DataSizeBits) & (uint32_t)DataSizeFlags::Max) ==
221 0,
222 "Not enough bits");
223 static_assert(((UINT32_MAX << RefKindBits) & (uint32_t)RefKindFlags::Max) ==
224 0,
225 "Not enough bits");
226
227 /// Layout of the DataRecordHandle and how to decode it.
228 struct LayoutFlags {
229 NumRefsFlags NumRefs;
230 DataSizeFlags DataSize;
231 RefKindFlags RefKind;
232
233 static uint64_t pack(LayoutFlags LF) {
234 unsigned Packed = ((unsigned)LF.NumRefs << NumRefsShift) |
235 ((unsigned)LF.DataSize << DataSizeShift) |
236 ((unsigned)LF.RefKind << RefKindShift);
237#ifndef NDEBUG
238 LayoutFlags RoundTrip = unpack(Packed);
239 assert(LF.NumRefs == RoundTrip.NumRefs);
240 assert(LF.DataSize == RoundTrip.DataSize);
241 assert(LF.RefKind == RoundTrip.RefKind);
242#endif
243 return Packed;
244 }
245 static LayoutFlags unpack(uint64_t Storage) {
246 assert(Storage <= UINT8_MAX && "Expect storage to fit in a byte");
247 LayoutFlags LF;
248 LF.NumRefs =
249 (NumRefsFlags)((Storage >> NumRefsShift) & ((1U << NumRefsBits) - 1));
250 LF.DataSize = (DataSizeFlags)((Storage >> DataSizeShift) &
251 ((1U << DataSizeBits) - 1));
252 LF.RefKind =
253 (RefKindFlags)((Storage >> RefKindShift) & ((1U << RefKindBits) - 1));
254 return LF;
255 }
256 };
257
258 /// Header layout:
259 /// - 1-byte: LayoutFlags
260 /// - 1-byte: 1B size field
261 /// - {0,2}-bytes: 2B size field
262 struct Header {
263 using PackTy = uint32_t;
264 PackTy Packed;
265
266 static constexpr unsigned LayoutFlagsShift =
267 (sizeof(PackTy) - 1) * CHAR_BIT;
268 };
269
270 struct Input {
271 InternalRefArrayRef Refs;
272 ArrayRef<char> Data;
273 };
274
275 LayoutFlags getLayoutFlags() const {
276 return LayoutFlags::unpack(H->Packed >> Header::LayoutFlagsShift);
277 }
278
279 uint64_t getDataSize() const;
280 void skipDataSize(LayoutFlags LF, int64_t &RelOffset) const;
281 uint32_t getNumRefs() const;
282 void skipNumRefs(LayoutFlags LF, int64_t &RelOffset) const;
283 int64_t getRefsRelOffset() const;
284 int64_t getDataRelOffset() const;
285
286 static uint64_t getTotalSize(uint64_t DataRelOffset, uint64_t DataSize) {
287 return DataRelOffset + DataSize + 1;
288 }
289 uint64_t getTotalSize() const {
290 return getDataRelOffset() + getDataSize() + 1;
291 }
292
293 /// Describe the layout of data stored and how to decode from
294 /// DataRecordHandle.
295 struct Layout {
296 explicit Layout(const Input &I);
297
298 LayoutFlags Flags;
299 uint64_t DataSize = 0;
300 uint32_t NumRefs = 0;
301 int64_t RefsRelOffset = 0;
302 int64_t DataRelOffset = 0;
303 uint64_t getTotalSize() const {
304 return DataRecordHandle::getTotalSize(DataRelOffset, DataSize);
305 }
306 };
307
308 InternalRefArrayRef getRefs() const {
309 assert(H && "Expected valid handle");
310 auto *BeginByte = reinterpret_cast<const char *>(H) + getRefsRelOffset();
311 size_t Size = getNumRefs();
312 if (!Size)
313 return InternalRefArrayRef();
314 if (getLayoutFlags().RefKind == RefKindFlags::InternalRef4B)
315 return ArrayRef(reinterpret_cast<const InternalRef4B *>(BeginByte), Size);
316 return ArrayRef(reinterpret_cast<const InternalRef *>(BeginByte), Size);
317 }
318
319 ArrayRef<char> getData() const {
320 assert(H && "Expected valid handle");
321 return ArrayRef(reinterpret_cast<const char *>(H) + getDataRelOffset(),
322 getDataSize());
323 }
324
325 static DataRecordHandle create(function_ref<char *(size_t Size)> Alloc,
326 const Input &I);
327 static Expected<DataRecordHandle>
328 createWithError(function_ref<Expected<char *>(size_t Size)> Alloc,
329 const Input &I);
330 static DataRecordHandle construct(char *Mem, const Input &I);
331
332 static DataRecordHandle get(const char *Mem) {
333 return DataRecordHandle(
334 *reinterpret_cast<const DataRecordHandle::Header *>(Mem));
335 }
336 static Expected<DataRecordHandle>
337 getFromDataPool(const OnDiskDataAllocator &Pool, FileOffset Offset);
338
339 explicit operator bool() const { return H; }
340 const Header &getHeader() const { return *H; }
341
342 DataRecordHandle() = default;
343 explicit DataRecordHandle(const Header &H) : H(&H) {}
344
345private:
346 static DataRecordHandle constructImpl(char *Mem, const Input &I,
347 const Layout &L);
348 const Header *H = nullptr;
349};
350
351/// Proxy for any on-disk object or raw data.
352struct OnDiskContent {
353 std::optional<DataRecordHandle> Record;
354 std::optional<ArrayRef<char>> Bytes;
355};
356
357/// Data loaded inside the memory from standalone file.
358class StandaloneDataInMemory {
359public:
360 OnDiskContent getContent() const;
361
362 StandaloneDataInMemory(std::unique_ptr<sys::fs::mapped_file_region> Region,
363 TrieRecord::StorageKind SK)
364 : Region(std::move(Region)), SK(SK) {
365#ifndef NDEBUG
366 bool IsStandalone = false;
367 switch (SK) {
368 case TrieRecord::StorageKind::Standalone:
369 case TrieRecord::StorageKind::StandaloneLeaf:
370 case TrieRecord::StorageKind::StandaloneLeaf0:
371 IsStandalone = true;
372 break;
373 default:
374 break;
375 }
376 assert(IsStandalone);
377#endif
378 }
379
380private:
381 std::unique_ptr<sys::fs::mapped_file_region> Region;
382 TrieRecord::StorageKind SK;
383};
384
385/// Container to lookup loaded standalone objects.
386template <size_t NumShards> class StandaloneDataMap {
387 static_assert(isPowerOf2_64(NumShards), "Expected power of 2");
388
389public:
390 uintptr_t insert(ArrayRef<uint8_t> Hash, TrieRecord::StorageKind SK,
391 std::unique_ptr<sys::fs::mapped_file_region> Region);
392
393 const StandaloneDataInMemory *lookup(ArrayRef<uint8_t> Hash) const;
394 bool count(ArrayRef<uint8_t> Hash) const { return bool(lookup(Hash)); }
395
396private:
397 struct Shard {
398 /// Needs to store a std::unique_ptr for a stable address identity.
399 DenseMap<const uint8_t *, std::unique_ptr<StandaloneDataInMemory>> Map;
400 mutable std::mutex Mutex;
401 };
402 Shard &getShard(ArrayRef<uint8_t> Hash) {
403 return const_cast<Shard &>(
404 const_cast<const StandaloneDataMap *>(this)->getShard(Hash));
405 }
406 const Shard &getShard(ArrayRef<uint8_t> Hash) const {
407 static_assert(NumShards <= 256, "Expected only 8 bits of shard");
408 return Shards[Hash[0] % NumShards];
409 }
410
411 Shard Shards[NumShards];
412};
413
414using StandaloneDataMapTy = StandaloneDataMap<16>;
415
416/// A vector of internal node references.
417class InternalRefVector {
418public:
419 void push_back(InternalRef Ref) {
420 if (NeedsFull)
421 return FullRefs.push_back(Ref);
422 if (std::optional<InternalRef4B> Small = InternalRef4B::tryToShrink(Ref))
423 return SmallRefs.push_back(*Small);
424 NeedsFull = true;
425 assert(FullRefs.empty());
426 FullRefs.reserve(SmallRefs.size() + 1);
427 for (InternalRef4B Small : SmallRefs)
428 FullRefs.push_back(Small);
429 FullRefs.push_back(Ref);
430 SmallRefs.clear();
431 }
432
433 operator InternalRefArrayRef() const {
434 assert(SmallRefs.empty() || FullRefs.empty());
435 return NeedsFull ? InternalRefArrayRef(FullRefs)
436 : InternalRefArrayRef(SmallRefs);
437 }
438
439private:
440 bool NeedsFull = false;
443};
444
445} // namespace
446
447Expected<DataRecordHandle> DataRecordHandle::createWithError(
448 function_ref<Expected<char *>(size_t Size)> Alloc, const Input &I) {
449 Layout L(I);
450 if (Expected<char *> Mem = Alloc(L.getTotalSize()))
451 return constructImpl(*Mem, I, L);
452 else
453 return Mem.takeError();
454}
455
457 // Store the file offset as it is.
458 assert(!(Offset.get() & 0x1));
459 return ObjectHandle(Offset.get());
460}
461
463 // Store the pointer from memory with lowest bit set.
464 assert(!(Ptr & 0x1));
465 return ObjectHandle(Ptr | 1);
466}
467
468/// Proxy for an on-disk index record.
474
475template <size_t N>
476uintptr_t StandaloneDataMap<N>::insert(
477 ArrayRef<uint8_t> Hash, TrieRecord::StorageKind SK,
478 std::unique_ptr<sys::fs::mapped_file_region> Region) {
479 auto &S = getShard(Hash);
480 std::lock_guard<std::mutex> Lock(S.Mutex);
481 auto &V = S.Map[Hash.data()];
482 if (!V)
483 V = std::make_unique<StandaloneDataInMemory>(std::move(Region), SK);
484 return reinterpret_cast<uintptr_t>(V.get());
485}
486
487template <size_t N>
488const StandaloneDataInMemory *
489StandaloneDataMap<N>::lookup(ArrayRef<uint8_t> Hash) const {
490 auto &S = getShard(Hash);
491 std::lock_guard<std::mutex> Lock(S.Mutex);
492 auto I = S.Map.find(Hash.data());
493 if (I == S.Map.end())
494 return nullptr;
495 return &*I->second;
496}
497
498namespace {
499
500/// Copy of \a sys::fs::TempFile that skips RemoveOnSignal, which is too
501/// expensive to register/unregister at this rate.
502///
503/// FIXME: Add a TempFileManager that maintains a thread-safe list of open temp
504/// files and has a signal handler registerd that removes them all.
505class TempFile {
506 bool Done = false;
507 TempFile(StringRef Name, int FD) : TmpName(std::string(Name)), FD(FD) {}
508
509public:
510 /// This creates a temporary file with createUniqueFile.
511 static Expected<TempFile> create(const Twine &Model);
512 TempFile(TempFile &&Other) { *this = std::move(Other); }
513 TempFile &operator=(TempFile &&Other) {
514 TmpName = std::move(Other.TmpName);
515 FD = Other.FD;
516 Other.Done = true;
517 Other.FD = -1;
518 return *this;
519 }
520
521 // Name of the temporary file.
522 std::string TmpName;
523
524 // The open file descriptor.
525 int FD = -1;
526
527 // Keep this with the given name.
528 Error keep(const Twine &Name);
529 Error discard();
530
531 // This checks that keep or delete was called.
532 ~TempFile() { consumeError(discard()); }
533};
534
535class MappedTempFile {
536public:
537 char *data() const { return Map.data(); }
538 size_t size() const { return Map.size(); }
539
540 Error discard() {
541 assert(Map && "Map already destroyed");
542 Map.unmap();
543 return Temp.discard();
544 }
545
546 Error keep(const Twine &Name) {
547 assert(Map && "Map already destroyed");
548 Map.unmap();
549 return Temp.keep(Name);
550 }
551
552 MappedTempFile(TempFile Temp, sys::fs::mapped_file_region Map)
553 : Temp(std::move(Temp)), Map(std::move(Map)) {}
554
555private:
556 TempFile Temp;
557 sys::fs::mapped_file_region Map;
558};
559} // namespace
560
562 Done = true;
563 if (FD != -1) {
565 if (std::error_code EC = sys::fs::closeFile(File))
566 return errorCodeToError(EC);
567 }
568 FD = -1;
569
570 // Always try to close and remove.
571 std::error_code RemoveEC;
572 if (!TmpName.empty()) {
573 std::error_code EC = sys::fs::remove(TmpName);
574 if (EC)
575 return errorCodeToError(EC);
576 }
577 TmpName = "";
578
579 return Error::success();
580}
581
583 assert(!Done);
584 Done = true;
585 // Always try to close and rename.
586 std::error_code RenameEC = sys::fs::rename(TmpName, Name);
587
588 if (!RenameEC)
589 TmpName = "";
590
592 if (std::error_code EC = sys::fs::closeFile(File))
593 return errorCodeToError(EC);
594 FD = -1;
595
596 return errorCodeToError(RenameEC);
597}
598
600 int FD;
601 SmallString<128> ResultPath;
602 if (std::error_code EC = sys::fs::createUniqueFile(Model, FD, ResultPath))
603 return errorCodeToError(EC);
604
605 TempFile Ret(ResultPath, FD);
606 return std::move(Ret);
607}
608
609bool TrieRecord::compare_exchange_strong(Data &Existing, Data New) {
610 uint64_t ExistingPacked = pack(Existing);
611 uint64_t NewPacked = pack(New);
612 if (Storage.compare_exchange_strong(ExistingPacked, NewPacked))
613 return true;
614 Existing = unpack(ExistingPacked);
615 return false;
616}
617
619DataRecordHandle::getFromDataPool(const OnDiskDataAllocator &Pool,
621 auto HeaderData = Pool.get(Offset, sizeof(DataRecordHandle::Header));
622 if (!HeaderData)
623 return HeaderData.takeError();
624
625 auto Record = DataRecordHandle::get(HeaderData->data());
626 if (Record.getTotalSize() + Offset.get() > Pool.size())
627 return createStringError(
628 make_error_code(std::errc::illegal_byte_sequence),
629 "data record span passed the end of the data pool");
630
631 return Record;
632}
633
634DataRecordHandle DataRecordHandle::constructImpl(char *Mem, const Input &I,
635 const Layout &L) {
636 char *Next = Mem + sizeof(Header);
637
638 // Fill in Packed and set other data, then come back to construct the header.
639 Header::PackTy Packed = 0;
640 Packed |= LayoutFlags::pack(L.Flags) << Header::LayoutFlagsShift;
641
642 // Construct DataSize.
643 switch (L.Flags.DataSize) {
644 case DataSizeFlags::Uses1B:
645 assert(I.Data.size() <= UINT8_MAX);
646 Packed |= (Header::PackTy)I.Data.size()
647 << ((sizeof(Packed) - 2) * CHAR_BIT);
648 break;
649 case DataSizeFlags::Uses2B:
650 assert(I.Data.size() <= UINT16_MAX);
651 Packed |= (Header::PackTy)I.Data.size()
652 << ((sizeof(Packed) - 4) * CHAR_BIT);
653 break;
654 case DataSizeFlags::Uses4B:
655 support::endian::write32le(Next, I.Data.size());
656 Next += 4;
657 break;
658 case DataSizeFlags::Uses8B:
659 support::endian::write64le(Next, I.Data.size());
660 Next += 8;
661 break;
662 }
663
664 // Construct NumRefs.
665 //
666 // NOTE: May be writing NumRefs even if there are zero refs in order to fix
667 // alignment.
668 switch (L.Flags.NumRefs) {
669 case NumRefsFlags::Uses0B:
670 break;
671 case NumRefsFlags::Uses1B:
672 assert(I.Refs.size() <= UINT8_MAX);
673 Packed |= (Header::PackTy)I.Refs.size()
674 << ((sizeof(Packed) - 2) * CHAR_BIT);
675 break;
676 case NumRefsFlags::Uses2B:
677 assert(I.Refs.size() <= UINT16_MAX);
678 Packed |= (Header::PackTy)I.Refs.size()
679 << ((sizeof(Packed) - 4) * CHAR_BIT);
680 break;
681 case NumRefsFlags::Uses4B:
682 support::endian::write32le(Next, I.Refs.size());
683 Next += 4;
684 break;
685 case NumRefsFlags::Uses8B:
686 support::endian::write64le(Next, I.Refs.size());
687 Next += 8;
688 break;
689 }
690
691 // Construct Refs[].
692 if (!I.Refs.empty()) {
693 assert((L.Flags.RefKind == RefKindFlags::InternalRef4B) == I.Refs.is4B());
694 ArrayRef<uint8_t> RefsBuffer = I.Refs.getBuffer();
695 llvm::copy(RefsBuffer, Next);
696 Next += RefsBuffer.size();
697 }
698
699 // Construct Data and the trailing null.
701 llvm::copy(I.Data, Next);
702 Next[I.Data.size()] = 0;
703
704 // Construct the header itself and return.
705 Header *H = new (Mem) Header{Packed};
706 DataRecordHandle Record(*H);
707 assert(Record.getData() == I.Data);
708 assert(Record.getNumRefs() == I.Refs.size());
709 assert(Record.getRefs() == I.Refs);
710 assert(Record.getLayoutFlags().DataSize == L.Flags.DataSize);
711 assert(Record.getLayoutFlags().NumRefs == L.Flags.NumRefs);
712 assert(Record.getLayoutFlags().RefKind == L.Flags.RefKind);
713 return Record;
714}
715
716DataRecordHandle::Layout::Layout(const Input &I) {
717 // Start initial relative offsets right after the Header.
718 uint64_t RelOffset = sizeof(Header);
719
720 // Initialize the easy stuff.
721 DataSize = I.Data.size();
722 NumRefs = I.Refs.size();
723
724 // Check refs size.
725 Flags.RefKind =
726 I.Refs.is4B() ? RefKindFlags::InternalRef4B : RefKindFlags::InternalRef;
727
728 // Find the smallest slot available for DataSize.
729 bool Has1B = true;
730 bool Has2B = true;
731 if (DataSize <= UINT8_MAX && Has1B) {
732 Flags.DataSize = DataSizeFlags::Uses1B;
733 Has1B = false;
734 } else if (DataSize <= UINT16_MAX && Has2B) {
735 Flags.DataSize = DataSizeFlags::Uses2B;
736 Has2B = false;
737 } else if (DataSize <= UINT32_MAX) {
738 Flags.DataSize = DataSizeFlags::Uses4B;
739 RelOffset += 4;
740 } else {
741 Flags.DataSize = DataSizeFlags::Uses8B;
742 RelOffset += 8;
743 }
744
745 // Find the smallest slot available for NumRefs. Never sets NumRefs8B here.
746 if (!NumRefs) {
747 Flags.NumRefs = NumRefsFlags::Uses0B;
748 } else if (NumRefs <= UINT8_MAX && Has1B) {
749 Flags.NumRefs = NumRefsFlags::Uses1B;
750 Has1B = false;
751 } else if (NumRefs <= UINT16_MAX && Has2B) {
752 Flags.NumRefs = NumRefsFlags::Uses2B;
753 Has2B = false;
754 } else {
755 Flags.NumRefs = NumRefsFlags::Uses4B;
756 RelOffset += 4;
757 }
758
759 // Helper to "upgrade" either DataSize or NumRefs by 4B to avoid complicated
760 // padding rules when reading and writing. This also bumps RelOffset.
761 //
762 // The value for NumRefs is strictly limited to UINT32_MAX, but it can be
763 // stored as 8B. This means we can *always* find a size to grow.
764 //
765 // NOTE: Only call this once.
766 auto GrowSizeFieldsBy4B = [&]() {
767 assert(isAligned(Align(4), RelOffset));
768 RelOffset += 4;
769
770 assert(Flags.NumRefs != NumRefsFlags::Uses8B &&
771 "Expected to be able to grow NumRefs8B");
772
773 // First try to grow DataSize. NumRefs will not (yet) be 8B, and if
774 // DataSize is upgraded to 8B it'll already be aligned.
775 //
776 // Failing that, grow NumRefs.
777 if (Flags.DataSize < DataSizeFlags::Uses4B)
778 Flags.DataSize = DataSizeFlags::Uses4B; // DataSize: Packed => 4B.
779 else if (Flags.DataSize < DataSizeFlags::Uses8B)
780 Flags.DataSize = DataSizeFlags::Uses8B; // DataSize: 4B => 8B.
781 else if (Flags.NumRefs < NumRefsFlags::Uses4B)
782 Flags.NumRefs = NumRefsFlags::Uses4B; // NumRefs: Packed => 4B.
783 else
784 Flags.NumRefs = NumRefsFlags::Uses8B; // NumRefs: 4B => 8B.
785 };
786
787 assert(isAligned(Align(4), RelOffset));
788 if (Flags.RefKind == RefKindFlags::InternalRef) {
789 // List of 8B refs should be 8B-aligned. Grow one of the sizes to get this
790 // without padding.
791 if (!isAligned(Align(8), RelOffset))
792 GrowSizeFieldsBy4B();
793
794 assert(isAligned(Align(8), RelOffset));
795 RefsRelOffset = RelOffset;
796 RelOffset += 8 * NumRefs;
797 } else {
798 // The array of 4B refs doesn't need 8B alignment, but the data will need
799 // to be 8B-aligned. Detect this now, and, if necessary, shift everything
800 // by 4B by growing one of the sizes.
801 // If we remove the need for 8B-alignment for data there is <1% savings in
802 // disk storage for a clang build using MCCAS but the 8B-alignment may be
803 // useful in the future so keep it for now.
804 uint64_t RefListSize = 4 * NumRefs;
805 if (!isAligned(Align(8), RelOffset + RefListSize))
806 GrowSizeFieldsBy4B();
807 RefsRelOffset = RelOffset;
808 RelOffset += RefListSize;
809 }
810
811 assert(isAligned(Align(8), RelOffset));
812 DataRelOffset = RelOffset;
813}
814
815uint64_t DataRecordHandle::getDataSize() const {
816 int64_t RelOffset = sizeof(Header);
817 auto *DataSizePtr = reinterpret_cast<const char *>(H) + RelOffset;
818 switch (getLayoutFlags().DataSize) {
819 case DataSizeFlags::Uses1B:
820 return (H->Packed >> ((sizeof(Header::PackTy) - 2) * CHAR_BIT)) & UINT8_MAX;
821 case DataSizeFlags::Uses2B:
822 return (H->Packed >> ((sizeof(Header::PackTy) - 4) * CHAR_BIT)) &
823 UINT16_MAX;
824 case DataSizeFlags::Uses4B:
825 return support::endian::read32le(DataSizePtr);
826 case DataSizeFlags::Uses8B:
827 return support::endian::read64le(DataSizePtr);
828 }
829 llvm_unreachable("Unknown DataSizeFlags enum");
830}
831
832void DataRecordHandle::skipDataSize(LayoutFlags LF, int64_t &RelOffset) const {
833 if (LF.DataSize >= DataSizeFlags::Uses4B)
834 RelOffset += 4;
835 if (LF.DataSize >= DataSizeFlags::Uses8B)
836 RelOffset += 4;
837}
838
839uint32_t DataRecordHandle::getNumRefs() const {
840 LayoutFlags LF = getLayoutFlags();
841 int64_t RelOffset = sizeof(Header);
842 skipDataSize(LF, RelOffset);
843 auto *NumRefsPtr = reinterpret_cast<const char *>(H) + RelOffset;
844 switch (LF.NumRefs) {
845 case NumRefsFlags::Uses0B:
846 return 0;
847 case NumRefsFlags::Uses1B:
848 return (H->Packed >> ((sizeof(Header::PackTy) - 2) * CHAR_BIT)) & UINT8_MAX;
849 case NumRefsFlags::Uses2B:
850 return (H->Packed >> ((sizeof(Header::PackTy) - 4) * CHAR_BIT)) &
851 UINT16_MAX;
852 case NumRefsFlags::Uses4B:
853 return support::endian::read32le(NumRefsPtr);
854 case NumRefsFlags::Uses8B:
855 return support::endian::read64le(NumRefsPtr);
856 }
857 llvm_unreachable("Unknown NumRefsFlags enum");
858}
859
860void DataRecordHandle::skipNumRefs(LayoutFlags LF, int64_t &RelOffset) const {
861 if (LF.NumRefs >= NumRefsFlags::Uses4B)
862 RelOffset += 4;
863 if (LF.NumRefs >= NumRefsFlags::Uses8B)
864 RelOffset += 4;
865}
866
867int64_t DataRecordHandle::getRefsRelOffset() const {
868 LayoutFlags LF = getLayoutFlags();
869 int64_t RelOffset = sizeof(Header);
870 skipDataSize(LF, RelOffset);
871 skipNumRefs(LF, RelOffset);
872 return RelOffset;
873}
874
875int64_t DataRecordHandle::getDataRelOffset() const {
876 LayoutFlags LF = getLayoutFlags();
877 int64_t RelOffset = sizeof(Header);
878 skipDataSize(LF, RelOffset);
879 skipNumRefs(LF, RelOffset);
880 uint32_t RefSize = LF.RefKind == RefKindFlags::InternalRef4B ? 4 : 8;
881 RelOffset += RefSize * getNumRefs();
882 return RelOffset;
883}
884
886 if (UpstreamDB) {
887 if (auto E = UpstreamDB->validate(Deep, Hasher))
888 return E;
889 }
890 return Index.validate([&](FileOffset Offset,
892 -> Error {
893 auto formatError = [&](Twine Msg) {
894 return createStringError(
896 "bad record at 0x" +
897 utohexstr((unsigned)Offset.get(), /*LowerCase=*/true) + ": " +
898 Msg.str());
899 };
900
901 if (Record.Data.size() != sizeof(TrieRecord))
902 return formatError("wrong data record size");
903 if (!isAligned(Align::Of<TrieRecord>(), Record.Data.size()))
904 return formatError("wrong data record alignment");
905
906 auto *R = reinterpret_cast<const TrieRecord *>(Record.Data.data());
907 TrieRecord::Data D = R->load();
908 std::unique_ptr<MemoryBuffer> FileBuffer;
909 if ((uint8_t)D.SK != (uint8_t)TrieRecord::StorageKind::Unknown &&
910 (uint8_t)D.SK != (uint8_t)TrieRecord::StorageKind::DataPool &&
911 (uint8_t)D.SK != (uint8_t)TrieRecord::StorageKind::Standalone &&
912 (uint8_t)D.SK != (uint8_t)TrieRecord::StorageKind::StandaloneLeaf &&
913 (uint8_t)D.SK != (uint8_t)TrieRecord::StorageKind::StandaloneLeaf0)
914 return formatError("invalid record kind value");
915
917 auto I = getIndexProxyFromRef(Ref);
918 if (!I)
919 return I.takeError();
920
921 switch (D.SK) {
922 case TrieRecord::StorageKind::Unknown:
923 // This could be an abandoned entry due to a termination before updating
924 // the record. It can be reused by later insertion so just skip this entry
925 // for now.
926 return Error::success();
927 case TrieRecord::StorageKind::DataPool:
928 // Check offset is a postive value, and large enough to hold the
929 // header for the data record.
930 if (D.Offset.get() <= 0 ||
931 D.Offset.get() + sizeof(DataRecordHandle::Header) >= DataPool.size())
932 return formatError("datapool record out of bound");
933 break;
934 case TrieRecord::StorageKind::Standalone:
935 case TrieRecord::StorageKind::StandaloneLeaf:
936 case TrieRecord::StorageKind::StandaloneLeaf0:
937 SmallString<256> Path;
938 getStandalonePath(TrieRecord::getStandaloneFilePrefix(D.SK), *I, Path);
939 // If need to validate the content of the file later, just load the
940 // buffer here. Otherwise, just check the existance of the file.
941 if (Deep) {
942 auto File = MemoryBuffer::getFile(Path, /*IsText=*/false,
943 /*RequiresNullTerminator=*/false);
944 if (!File || !*File)
945 return formatError("record file \'" + Path + "\' does not exist");
946
947 FileBuffer = std::move(*File);
948 } else if (!llvm::sys::fs::exists(Path))
949 return formatError("record file \'" + Path + "\' does not exist");
950 }
951
952 if (!Deep)
953 return Error::success();
954
955 auto dataError = [&](Twine Msg) {
957 "bad data for digest \'" + toHex(I->Hash) +
958 "\': " + Msg.str());
959 };
961 ArrayRef<char> StoredData;
962
963 switch (D.SK) {
964 case TrieRecord::StorageKind::Unknown:
965 llvm_unreachable("already handled");
966 case TrieRecord::StorageKind::DataPool: {
967 auto DataRecord = DataRecordHandle::getFromDataPool(DataPool, D.Offset);
968 if (!DataRecord)
969 return dataError(toString(DataRecord.takeError()));
970
971 for (auto InternRef : DataRecord->getRefs()) {
972 auto Index = getIndexProxyFromRef(InternRef);
973 if (!Index)
974 return Index.takeError();
975 Refs.push_back(Index->Hash);
976 }
977 StoredData = DataRecord->getData();
978 break;
979 }
980 case TrieRecord::StorageKind::Standalone: {
981 if (FileBuffer->getBufferSize() < sizeof(DataRecordHandle::Header))
982 return dataError("data record is not big enough to read the header");
983 auto DataRecord = DataRecordHandle::get(FileBuffer->getBufferStart());
984 if (DataRecord.getTotalSize() < FileBuffer->getBufferSize())
985 return dataError(
986 "data record span passed the end of the standalone file");
987 for (auto InternRef : DataRecord.getRefs()) {
988 auto Index = getIndexProxyFromRef(InternRef);
989 if (!Index)
990 return Index.takeError();
991 Refs.push_back(Index->Hash);
992 }
993 StoredData = DataRecord.getData();
994 break;
995 }
996 case TrieRecord::StorageKind::StandaloneLeaf:
997 case TrieRecord::StorageKind::StandaloneLeaf0: {
998 StoredData = arrayRefFromStringRef<char>(FileBuffer->getBuffer());
999 if (D.SK == TrieRecord::StorageKind::StandaloneLeaf0) {
1000 if (!FileBuffer->getBuffer().ends_with('\0'))
1001 return dataError("standalone file is not zero terminated");
1002 StoredData = StoredData.drop_back(1);
1003 }
1004 break;
1005 }
1006 }
1007
1008 SmallVector<uint8_t> ComputedHash;
1009 Hasher(Refs, StoredData, ComputedHash);
1010 if (I->Hash != ArrayRef(ComputedHash))
1011 return dataError("hash mismatch, got \'" + toHex(ComputedHash) +
1012 "\' instead");
1013
1014 return Error::success();
1015 });
1016}
1017
1019 OS << "on-disk-root-path: " << RootPath << "\n";
1020
1021 struct PoolInfo {
1023 };
1025
1026 OS << "\n";
1027 OS << "index:\n";
1028 Index.print(OS, [&](ArrayRef<char> Data) {
1029 assert(Data.size() == sizeof(TrieRecord));
1031 auto *R = reinterpret_cast<const TrieRecord *>(Data.data());
1032 TrieRecord::Data D = R->load();
1033 OS << " SK=";
1034 switch (D.SK) {
1035 case TrieRecord::StorageKind::Unknown:
1036 OS << "unknown ";
1037 break;
1038 case TrieRecord::StorageKind::DataPool:
1039 OS << "datapool ";
1040 Pool.push_back({D.Offset.get()});
1041 break;
1042 case TrieRecord::StorageKind::Standalone:
1043 OS << "standalone-data ";
1044 break;
1045 case TrieRecord::StorageKind::StandaloneLeaf:
1046 OS << "standalone-leaf ";
1047 break;
1048 case TrieRecord::StorageKind::StandaloneLeaf0:
1049 OS << "standalone-leaf+0";
1050 break;
1051 }
1052 OS << " Offset=" << (void *)D.Offset.get();
1053 });
1054 if (Pool.empty())
1055 return;
1056
1057 OS << "\n";
1058 OS << "pool:\n";
1059 llvm::sort(
1060 Pool, [](PoolInfo LHS, PoolInfo RHS) { return LHS.Offset < RHS.Offset; });
1061 for (PoolInfo PI : Pool) {
1062 OS << "- addr=" << (void *)PI.Offset << " ";
1063 auto D = DataRecordHandle::getFromDataPool(DataPool, FileOffset(PI.Offset));
1064 if (!D) {
1065 OS << "error: " << toString(D.takeError());
1066 return;
1067 }
1068
1069 OS << "record refs=" << D->getNumRefs() << " data=" << D->getDataSize()
1070 << " size=" << D->getTotalSize()
1071 << " end=" << (void *)(PI.Offset + D->getTotalSize()) << "\n";
1072 }
1073}
1074
1076OnDiskGraphDB::indexHash(ArrayRef<uint8_t> Hash) {
1077 auto P = Index.insertLazy(
1078 Hash, [](FileOffset TentativeOffset,
1079 OnDiskTrieRawHashMap::ValueProxy TentativeValue) {
1080 assert(TentativeValue.Data.size() == sizeof(TrieRecord));
1081 assert(
1082 isAddrAligned(Align::Of<TrieRecord>(), TentativeValue.Data.data()));
1083 new (TentativeValue.Data.data()) TrieRecord();
1084 });
1085 if (LLVM_UNLIKELY(!P))
1086 return P.takeError();
1087
1088 assert(*P && "Expected insertion");
1089 return getIndexProxyFromPointer(*P);
1090}
1091
1092OnDiskGraphDB::IndexProxy OnDiskGraphDB::getIndexProxyFromPointer(
1094 assert(P);
1095 assert(P.getOffset());
1096 return IndexProxy{P.getOffset(), P->Hash,
1097 *const_cast<TrieRecord *>(
1098 reinterpret_cast<const TrieRecord *>(P->Data.data()))};
1099}
1100
1102 auto I = indexHash(Hash);
1103 if (LLVM_UNLIKELY(!I))
1104 return I.takeError();
1105 return getExternalReference(*I);
1106}
1107
1108ObjectID OnDiskGraphDB::getExternalReference(const IndexProxy &I) {
1109 return getExternalReference(makeInternalRef(I.Offset));
1110}
1111
1112std::optional<ObjectID>
1114 bool CheckUpstream) {
1115 auto tryUpstream =
1116 [&](std::optional<IndexProxy> I) -> std::optional<ObjectID> {
1117 if (!CheckUpstream || !UpstreamDB)
1118 return std::nullopt;
1119 std::optional<ObjectID> UpstreamID =
1120 UpstreamDB->getExistingReference(Digest);
1121 if (LLVM_UNLIKELY(!UpstreamID))
1122 return std::nullopt;
1123 auto Ref = expectedToOptional(indexHash(Digest));
1124 if (!Ref)
1125 return std::nullopt;
1126 if (!I)
1127 I.emplace(*Ref);
1128 return getExternalReference(*I);
1129 };
1130
1131 OnDiskTrieRawHashMap::ConstOnDiskPtr P = Index.find(Digest);
1132 if (!P)
1133 return tryUpstream(std::nullopt);
1134 IndexProxy I = getIndexProxyFromPointer(P);
1135 TrieRecord::Data Obj = I.Ref.load();
1136 if (Obj.SK == TrieRecord::StorageKind::Unknown)
1137 return tryUpstream(I);
1138 return getExternalReference(makeInternalRef(I.Offset));
1139}
1140
1142OnDiskGraphDB::getIndexProxyFromRef(InternalRef Ref) const {
1143 auto P = Index.recoverFromFileOffset(Ref.getFileOffset());
1144 if (LLVM_UNLIKELY(!P))
1145 return P.takeError();
1146 return getIndexProxyFromPointer(*P);
1147}
1148
1150 auto I = getIndexProxyFromRef(Ref);
1151 if (!I)
1152 return I.takeError();
1153 return I->Hash;
1154}
1155
1156ArrayRef<uint8_t> OnDiskGraphDB::getDigest(const IndexProxy &I) const {
1157 return I.Hash;
1158}
1159
1160static OnDiskContent getContentFromHandle(const OnDiskDataAllocator &DataPool,
1161 ObjectHandle OH) {
1162 // Decode ObjectHandle to locate the stored content.
1163 uint64_t Data = OH.getOpaqueData();
1164 if (Data & 1) {
1165 const auto *SDIM =
1166 reinterpret_cast<const StandaloneDataInMemory *>(Data & (-1ULL << 1));
1167 return SDIM->getContent();
1168 }
1169
1170 auto DataHandle =
1171 cantFail(DataRecordHandle::getFromDataPool(DataPool, FileOffset(Data)));
1172 assert(DataHandle.getData().end()[0] == 0 && "Null termination");
1173 return OnDiskContent{DataHandle, std::nullopt};
1174}
1175
1177 OnDiskContent Content = getContentFromHandle(DataPool, Node);
1178 if (Content.Bytes)
1179 return *Content.Bytes;
1180 assert(Content.Record && "Expected record or bytes");
1181 return Content.Record->getData();
1182}
1183
1184InternalRefArrayRef OnDiskGraphDB::getInternalRefs(ObjectHandle Node) const {
1185 if (std::optional<DataRecordHandle> Record =
1186 getContentFromHandle(DataPool, Node).Record)
1187 return Record->getRefs();
1188 return std::nullopt;
1189}
1190
1193 InternalRef Ref = getInternalRef(ExternalRef);
1194 auto I = getIndexProxyFromRef(Ref);
1195 if (!I)
1196 return I.takeError();
1197 TrieRecord::Data Object = I->Ref.load();
1198
1199 if (Object.SK == TrieRecord::StorageKind::Unknown)
1200 return faultInFromUpstream(ExternalRef);
1201
1202 if (Object.SK == TrieRecord::StorageKind::DataPool)
1203 return ObjectHandle::fromFileOffset(Object.Offset);
1204
1205 // Only TrieRecord::StorageKind::Standalone (and variants) need to be
1206 // explicitly loaded.
1207 //
1208 // There's corruption if standalone objects have offsets, or if we get here
1209 // for something that isn't standalone.
1210 if (Object.Offset)
1212 switch (Object.SK) {
1213 case TrieRecord::StorageKind::Unknown:
1214 case TrieRecord::StorageKind::DataPool:
1215 llvm_unreachable("unexpected storage kind");
1216 case TrieRecord::StorageKind::Standalone:
1217 case TrieRecord::StorageKind::StandaloneLeaf0:
1218 case TrieRecord::StorageKind::StandaloneLeaf:
1219 break;
1220 }
1221
1222 // Load it from disk.
1223 //
1224 // Note: Creation logic guarantees that data that needs null-termination is
1225 // suitably 0-padded. Requiring null-termination here would be too expensive
1226 // for extremely large objects that happen to be page-aligned.
1227 SmallString<256> Path;
1228 getStandalonePath(TrieRecord::getStandaloneFilePrefix(Object.SK), *I, Path);
1229
1230 auto BypassSandbox = sys::sandbox::scopedDisable();
1231
1232 auto File = sys::fs::openNativeFileForRead(Path);
1233 if (!File)
1234 return createFileError(Path, File.takeError());
1235
1236 auto CloseFile = make_scope_exit([&]() { sys::fs::closeFile(*File); });
1237
1239 if (std::error_code EC = sys::fs::status(*File, Status))
1241
1242 std::error_code EC;
1243 auto Region = std::make_unique<sys::fs::mapped_file_region>(
1244 *File, sys::fs::mapped_file_region::readonly, Status.getSize(), 0, EC);
1245 if (EC)
1247
1249 static_cast<StandaloneDataMapTy *>(StandaloneData)
1250 ->insert(I->Hash, Object.SK, std::move(Region)));
1251}
1252
1254 auto Presence = getObjectPresence(Ref, /*CheckUpstream=*/true);
1255 if (!Presence)
1256 return Presence.takeError();
1257
1258 switch (*Presence) {
1259 case ObjectPresence::Missing:
1260 return false;
1261 case ObjectPresence::InPrimaryDB:
1262 return true;
1263 case ObjectPresence::OnlyInUpstreamDB:
1264 if (auto FaultInResult = faultInFromUpstream(Ref); !FaultInResult)
1265 return FaultInResult.takeError();
1266 return true;
1267 }
1268 llvm_unreachable("Unknown ObjectPresence enum");
1269}
1270
1272OnDiskGraphDB::getObjectPresence(ObjectID ExternalRef,
1273 bool CheckUpstream) const {
1274 InternalRef Ref = getInternalRef(ExternalRef);
1275 auto I = getIndexProxyFromRef(Ref);
1276 if (!I)
1277 return I.takeError();
1278
1279 TrieRecord::Data Object = I->Ref.load();
1280 if (Object.SK != TrieRecord::StorageKind::Unknown)
1281 return ObjectPresence::InPrimaryDB;
1282
1283 if (!CheckUpstream || !UpstreamDB)
1284 return ObjectPresence::Missing;
1285
1286 std::optional<ObjectID> UpstreamID =
1287 UpstreamDB->getExistingReference(getDigest(*I));
1288 return UpstreamID.has_value() ? ObjectPresence::OnlyInUpstreamDB
1289 : ObjectPresence::Missing;
1290}
1291
1292InternalRef OnDiskGraphDB::makeInternalRef(FileOffset IndexOffset) {
1293 return InternalRef::getFromOffset(IndexOffset);
1294}
1295
1296void OnDiskGraphDB::getStandalonePath(StringRef Prefix, const IndexProxy &I,
1297 SmallVectorImpl<char> &Path) const {
1298 Path.assign(RootPath.begin(), RootPath.end());
1299 sys::path::append(Path,
1300 Prefix + Twine(I.Offset.get()) + "." + CASFormatVersion);
1301}
1302
1303OnDiskContent StandaloneDataInMemory::getContent() const {
1304 bool Leaf0 = false;
1305 bool Leaf = false;
1306 switch (SK) {
1307 default:
1308 llvm_unreachable("Storage kind must be standalone");
1309 case TrieRecord::StorageKind::Standalone:
1310 break;
1311 case TrieRecord::StorageKind::StandaloneLeaf0:
1312 Leaf = Leaf0 = true;
1313 break;
1314 case TrieRecord::StorageKind::StandaloneLeaf:
1315 Leaf = true;
1316 break;
1317 }
1318
1319 if (Leaf) {
1320 StringRef Data(Region->data(), Region->size());
1321 assert(Data.drop_back(Leaf0).end()[0] == 0 &&
1322 "Standalone node data missing null termination");
1323 return OnDiskContent{std::nullopt,
1324 arrayRefFromStringRef<char>(Data.drop_back(Leaf0))};
1325 }
1326
1327 DataRecordHandle Record = DataRecordHandle::get(Region->data());
1328 assert(Record.getData().end()[0] == 0 &&
1329 "Standalone object record missing null termination for data");
1330 return OnDiskContent{Record, std::nullopt};
1331}
1332
1334 uint64_t Size) {
1335 auto BypassSandbox = sys::sandbox::scopedDisable();
1336
1337 assert(Size && "Unexpected request for an empty temp file");
1338 Expected<TempFile> File = TempFile::create(FinalPath + ".%%%%%%");
1339 if (!File)
1340 return File.takeError();
1341
1342 if (Error E = preallocateFileTail(File->FD, 0, Size).takeError())
1343 return createFileError(File->TmpName, std::move(E));
1344
1345 if (auto EC = sys::fs::resize_file_before_mapping_readwrite(File->FD, Size))
1346 return createFileError(File->TmpName, EC);
1347
1348 std::error_code EC;
1351 0, EC);
1352 if (EC)
1353 return createFileError(File->TmpName, EC);
1354 return MappedTempFile(std::move(*File), std::move(Map));
1355}
1356
1357static size_t getPageSize() {
1359 return PageSize;
1360}
1361
1362Error OnDiskGraphDB::createStandaloneLeaf(IndexProxy &I, ArrayRef<char> Data) {
1363 assert(Data.size() > TrieRecord::MaxEmbeddedSize &&
1364 "Expected a bigger file for external content...");
1365
1366 bool Leaf0 = isAligned(Align(getPageSize()), Data.size());
1367 TrieRecord::StorageKind SK = Leaf0 ? TrieRecord::StorageKind::StandaloneLeaf0
1368 : TrieRecord::StorageKind::StandaloneLeaf;
1369
1370 SmallString<256> Path;
1371 int64_t FileSize = Data.size() + Leaf0;
1372 getStandalonePath(TrieRecord::getStandaloneFilePrefix(SK), I, Path);
1373
1374 auto BypassSandbox = sys::sandbox::scopedDisable();
1375
1376 // Write the file. Don't reuse this mapped_file_region, which is read/write.
1377 // Let load() pull up one that's read-only.
1378 Expected<MappedTempFile> File = createTempFile(Path, FileSize);
1379 if (!File)
1380 return File.takeError();
1381 assert(File->size() == (uint64_t)FileSize);
1382 llvm::copy(Data, File->data());
1383 if (Leaf0)
1384 File->data()[Data.size()] = 0;
1385 assert(File->data()[Data.size()] == 0);
1386 if (Error E = File->keep(Path))
1387 return E;
1388
1389 // Store the object reference.
1390 TrieRecord::Data Existing;
1391 {
1392 TrieRecord::Data Leaf{SK, FileOffset()};
1393 if (I.Ref.compare_exchange_strong(Existing, Leaf)) {
1394 recordStandaloneSizeIncrease(FileSize);
1395 return Error::success();
1396 }
1397 }
1398
1399 // If there was a race, confirm that the new value has valid storage.
1400 if (Existing.SK == TrieRecord::StorageKind::Unknown)
1401 return createCorruptObjectError(getDigest(I));
1402
1403 return Error::success();
1404}
1405
1408 auto I = getIndexProxyFromRef(getInternalRef(ID));
1409 if (LLVM_UNLIKELY(!I))
1410 return I.takeError();
1411
1412 // Early return in case the node exists.
1413 {
1414 TrieRecord::Data Existing = I->Ref.load();
1415 if (Existing.SK != TrieRecord::StorageKind::Unknown)
1416 return Error::success();
1417 }
1418
1419 // Big leaf nodes.
1420 if (Refs.empty() && Data.size() > TrieRecord::MaxEmbeddedSize)
1421 return createStandaloneLeaf(*I, Data);
1422
1423 // TODO: Check whether it's worth checking the index for an already existing
1424 // object (like storeTreeImpl() does) before building up the
1425 // InternalRefVector.
1426 InternalRefVector InternalRefs;
1427 for (ObjectID Ref : Refs)
1428 InternalRefs.push_back(getInternalRef(Ref));
1429
1430 // Create the object.
1431
1432 DataRecordHandle::Input Input{InternalRefs, Data};
1433
1434 // Compute the storage kind, allocate it, and create the record.
1435 TrieRecord::StorageKind SK = TrieRecord::StorageKind::Unknown;
1436 FileOffset PoolOffset;
1437 SmallString<256> Path;
1438 std::optional<MappedTempFile> File;
1439 std::optional<uint64_t> FileSize;
1440 auto AllocStandaloneFile = [&](size_t Size) -> Expected<char *> {
1441 getStandalonePath(TrieRecord::getStandaloneFilePrefix(
1442 TrieRecord::StorageKind::Standalone),
1443 *I, Path);
1444 if (Error E = createTempFile(Path, Size).moveInto(File))
1445 return std::move(E);
1446 assert(File->size() == Size);
1447 FileSize = Size;
1448 SK = TrieRecord::StorageKind::Standalone;
1449 return File->data();
1450 };
1451 auto Alloc = [&](size_t Size) -> Expected<char *> {
1452 if (Size <= TrieRecord::MaxEmbeddedSize) {
1453 SK = TrieRecord::StorageKind::DataPool;
1454 auto P = DataPool.allocate(Size);
1455 if (LLVM_UNLIKELY(!P)) {
1456 char *NewAlloc = nullptr;
1457 auto NewE = handleErrors(
1458 P.takeError(), [&](std::unique_ptr<StringError> E) -> Error {
1459 if (E->convertToErrorCode() == std::errc::not_enough_memory)
1460 return AllocStandaloneFile(Size).moveInto(NewAlloc);
1461 return Error(std::move(E));
1462 });
1463 if (!NewE)
1464 return NewAlloc;
1465 return std::move(NewE);
1466 }
1467 PoolOffset = P->getOffset();
1468 LLVM_DEBUG({
1469 dbgs() << "pool-alloc addr=" << (void *)PoolOffset.get()
1470 << " size=" << Size
1471 << " end=" << (void *)(PoolOffset.get() + Size) << "\n";
1472 });
1473 return (*P)->data();
1474 }
1475 return AllocStandaloneFile(Size);
1476 };
1477
1478 DataRecordHandle Record;
1479 if (Error E =
1480 DataRecordHandle::createWithError(Alloc, Input).moveInto(Record))
1481 return E;
1482 assert(Record.getData().end()[0] == 0 && "Expected null-termination");
1483 assert(Record.getData() == Input.Data && "Expected initialization");
1484 assert(SK != TrieRecord::StorageKind::Unknown);
1485 assert(bool(File) != bool(PoolOffset) &&
1486 "Expected either a mapped file or a pooled offset");
1487
1488 // Check for a race before calling MappedTempFile::keep().
1489 //
1490 // Then decide what to do with the file. Better to discard than overwrite if
1491 // another thread/process has already added this.
1492 TrieRecord::Data Existing = I->Ref.load();
1493 {
1494 TrieRecord::Data NewObject{SK, PoolOffset};
1495 if (File) {
1496 if (Existing.SK == TrieRecord::StorageKind::Unknown) {
1497 // Keep the file!
1498 if (Error E = File->keep(Path))
1499 return E;
1500 } else {
1501 File.reset();
1502 }
1503 }
1504
1505 // If we didn't already see a racing/existing write, then try storing the
1506 // new object. If that races, confirm that the new value has valid storage.
1507 //
1508 // TODO: Find a way to reuse the storage from the new-but-abandoned record
1509 // handle.
1510 if (Existing.SK == TrieRecord::StorageKind::Unknown) {
1511 if (I->Ref.compare_exchange_strong(Existing, NewObject)) {
1512 if (FileSize)
1513 recordStandaloneSizeIncrease(*FileSize);
1514 return Error::success();
1515 }
1516 }
1517 }
1518
1519 if (Existing.SK == TrieRecord::StorageKind::Unknown)
1521
1522 // Load existing object.
1523 return Error::success();
1524}
1525
1526void OnDiskGraphDB::recordStandaloneSizeIncrease(size_t SizeIncrease) {
1527 standaloneStorageSize().fetch_add(SizeIncrease, std::memory_order_relaxed);
1528}
1529
1530std::atomic<uint64_t> &OnDiskGraphDB::standaloneStorageSize() const {
1531 MutableArrayRef<uint8_t> UserHeader = DataPool.getUserHeader();
1532 assert(UserHeader.size() == sizeof(std::atomic<uint64_t>));
1533 assert(isAddrAligned(Align(8), UserHeader.data()));
1534 return *reinterpret_cast<std::atomic<uint64_t> *>(UserHeader.data());
1535}
1536
1537uint64_t OnDiskGraphDB::getStandaloneStorageSize() const {
1538 return standaloneStorageSize().load(std::memory_order_relaxed);
1539}
1540
1542 return Index.size() + DataPool.size() + getStandaloneStorageSize();
1543}
1544
1546 unsigned IndexPercent = Index.size() * 100ULL / Index.capacity();
1547 unsigned DataPercent = DataPool.size() * 100ULL / DataPool.capacity();
1548 return std::max(IndexPercent, DataPercent);
1549}
1550
1553 unsigned HashByteSize, OnDiskGraphDB *UpstreamDB,
1554 FaultInPolicy Policy) {
1555 if (std::error_code EC = sys::fs::create_directories(AbsPath))
1556 return createFileError(AbsPath, EC);
1557
1558 constexpr uint64_t MB = 1024ull * 1024ull;
1559 constexpr uint64_t GB = 1024ull * 1024ull * 1024ull;
1560
1561 uint64_t MaxIndexSize = 12 * GB;
1562 uint64_t MaxDataPoolSize = 24 * GB;
1563
1564 if (useSmallMappingSize(AbsPath)) {
1565 MaxIndexSize = 1 * GB;
1566 MaxDataPoolSize = 2 * GB;
1567 }
1568
1569 auto CustomSize = getOverriddenMaxMappingSize();
1570 if (!CustomSize)
1571 return CustomSize.takeError();
1572 if (*CustomSize)
1573 MaxIndexSize = MaxDataPoolSize = **CustomSize;
1574
1575 SmallString<256> IndexPath(AbsPath);
1577 std::optional<OnDiskTrieRawHashMap> Index;
1579 IndexPath, IndexTableName + "[" + HashName + "]",
1580 HashByteSize * CHAR_BIT,
1581 /*DataSize=*/sizeof(TrieRecord), MaxIndexSize,
1582 /*MinFileSize=*/MB)
1583 .moveInto(Index))
1584 return std::move(E);
1585
1586 uint32_t UserHeaderSize = sizeof(std::atomic<uint64_t>);
1587
1588 SmallString<256> DataPoolPath(AbsPath);
1590 std::optional<OnDiskDataAllocator> DataPool;
1591 StringRef PolicyName =
1592 Policy == FaultInPolicy::SingleNode ? "single" : "full";
1594 DataPoolPath,
1595 DataPoolTableName + "[" + HashName + "]" + PolicyName,
1596 MaxDataPoolSize, /*MinFileSize=*/MB, UserHeaderSize,
1597 [](void *UserHeaderPtr) {
1598 new (UserHeaderPtr) std::atomic<uint64_t>(0);
1599 })
1600 .moveInto(DataPool))
1601 return std::move(E);
1602 if (DataPool->getUserHeader().size() != UserHeaderSize)
1604 "unexpected user header in '" + DataPoolPath +
1605 "'");
1606
1607 return std::unique_ptr<OnDiskGraphDB>(new OnDiskGraphDB(
1608 AbsPath, std::move(*Index), std::move(*DataPool), UpstreamDB, Policy));
1609}
1610
1611OnDiskGraphDB::OnDiskGraphDB(StringRef RootPath, OnDiskTrieRawHashMap Index,
1612 OnDiskDataAllocator DataPool,
1613 OnDiskGraphDB *UpstreamDB, FaultInPolicy Policy)
1614 : Index(std::move(Index)), DataPool(std::move(DataPool)),
1615 RootPath(RootPath.str()), UpstreamDB(UpstreamDB), FIPolicy(Policy) {
1616 /// Lifetime for "big" objects not in DataPool.
1617 ///
1618 /// NOTE: Could use ThreadSafeTrieRawHashMap here. For now, doing something
1619 /// simpler on the assumption there won't be much contention since most data
1620 /// is not big. If there is contention, and we've already fixed ObjectProxy
1621 /// object handles to be cheap enough to use consistently, the fix might be
1622 /// to use better use of them rather than optimizing this map.
1623 ///
1624 /// FIXME: Figure out the right number of shards, if any.
1625 StandaloneData = new StandaloneDataMapTy();
1626}
1627
1629 delete static_cast<StandaloneDataMapTy *>(StandaloneData);
1630}
1631
1632Error OnDiskGraphDB::importFullTree(ObjectID PrimaryID,
1633 ObjectHandle UpstreamNode) {
1634 // Copies the full CAS tree from upstream. Uses depth-first copying to protect
1635 // against the process dying during importing and leaving the database with an
1636 // incomplete tree. Note that if the upstream has missing nodes then the tree
1637 // will be copied with missing nodes as well, it won't be considered an error.
1638 struct UpstreamCursor {
1640 size_t RefsCount;
1643 };
1644 /// Keeps track of the state of visitation for current node and all of its
1645 /// parents.
1647 /// Keeps track of the currently visited nodes as they are imported into
1648 /// primary database, from current node and its parents. When a node is
1649 /// entered for visitation it appends its own ID, then appends referenced IDs
1650 /// as they get imported. When a node is fully imported it removes the
1651 /// referenced IDs from the bottom of the stack which leaves its own ID at the
1652 /// bottom, adding to the list of referenced IDs for the parent node.
1653 SmallVector<ObjectID, 128> PrimaryNodesStack;
1654
1655 auto enqueueNode = [&](ObjectID PrimaryID, std::optional<ObjectHandle> Node) {
1656 PrimaryNodesStack.push_back(PrimaryID);
1657 if (!Node)
1658 return;
1659 auto Refs = UpstreamDB->getObjectRefs(*Node);
1660 CursorStack.push_back(
1661 {*Node, (size_t)llvm::size(Refs), Refs.begin(), Refs.end()});
1662 };
1663
1664 enqueueNode(PrimaryID, UpstreamNode);
1665
1666 while (!CursorStack.empty()) {
1667 UpstreamCursor &Cur = CursorStack.back();
1668 if (Cur.RefI == Cur.RefE) {
1669 // Copy the node data into the primary store.
1670 // FIXME: Use hard-link or cloning if the file-system supports it and data
1671 // is stored into a separate file.
1672
1673 // The bottom of \p PrimaryNodesStack contains the primary ID for the
1674 // current node plus the list of imported referenced IDs.
1675 assert(PrimaryNodesStack.size() >= Cur.RefsCount + 1);
1676 ObjectID PrimaryID = *(PrimaryNodesStack.end() - Cur.RefsCount - 1);
1677 auto PrimaryRefs = ArrayRef(PrimaryNodesStack)
1678 .slice(PrimaryNodesStack.size() - Cur.RefsCount);
1679 auto Data = UpstreamDB->getObjectData(Cur.Node);
1680 if (Error E = store(PrimaryID, PrimaryRefs, Data))
1681 return E;
1682 // Remove the current node and its IDs from the stack.
1683 PrimaryNodesStack.truncate(PrimaryNodesStack.size() - Cur.RefsCount);
1684 CursorStack.pop_back();
1685 continue;
1686 }
1687
1688 ObjectID UpstreamID = *(Cur.RefI++);
1689 auto PrimaryID = getReference(UpstreamDB->getDigest(UpstreamID));
1690 if (LLVM_UNLIKELY(!PrimaryID))
1691 return PrimaryID.takeError();
1692 if (containsObject(*PrimaryID, /*CheckUpstream=*/false)) {
1693 // This \p ObjectID already exists in the primary. Either it was imported
1694 // via \p importFullTree or the client created it, in which case the
1695 // client takes responsibility for how it was formed.
1696 enqueueNode(*PrimaryID, std::nullopt);
1697 continue;
1698 }
1699 Expected<std::optional<ObjectHandle>> UpstreamNode =
1700 UpstreamDB->load(UpstreamID);
1701 if (!UpstreamNode)
1702 return UpstreamNode.takeError();
1703 enqueueNode(*PrimaryID, *UpstreamNode);
1704 }
1705
1706 assert(PrimaryNodesStack.size() == 1);
1707 assert(PrimaryNodesStack.front() == PrimaryID);
1708 return Error::success();
1709}
1710
1711Error OnDiskGraphDB::importSingleNode(ObjectID PrimaryID,
1712 ObjectHandle UpstreamNode) {
1713 // Copies only a single node, it doesn't copy the referenced nodes.
1714
1715 // Copy the node data into the primary store.
1716 // FIXME: Use hard-link or cloning if the file-system supports it and data is
1717 // stored into a separate file.
1718 auto Data = UpstreamDB->getObjectData(UpstreamNode);
1719 auto UpstreamRefs = UpstreamDB->getObjectRefs(UpstreamNode);
1721 Refs.reserve(llvm::size(UpstreamRefs));
1722 for (ObjectID UpstreamRef : UpstreamRefs) {
1723 auto Ref = getReference(UpstreamDB->getDigest(UpstreamRef));
1724 if (LLVM_UNLIKELY(!Ref))
1725 return Ref.takeError();
1726 Refs.push_back(*Ref);
1727 }
1728
1729 return store(PrimaryID, Refs, Data);
1730}
1731
1732Expected<std::optional<ObjectHandle>>
1733OnDiskGraphDB::faultInFromUpstream(ObjectID PrimaryID) {
1734 if (!UpstreamDB)
1735 return std::nullopt;
1736
1737 auto UpstreamID = UpstreamDB->getReference(getDigest(PrimaryID));
1738 if (LLVM_UNLIKELY(!UpstreamID))
1739 return UpstreamID.takeError();
1740
1741 Expected<std::optional<ObjectHandle>> UpstreamNode =
1742 UpstreamDB->load(*UpstreamID);
1743 if (!UpstreamNode)
1744 return UpstreamNode.takeError();
1745 if (!*UpstreamNode)
1746 return std::nullopt;
1747
1748 if (Error E = FIPolicy == FaultInPolicy::SingleNode
1749 ? importSingleNode(PrimaryID, **UpstreamNode)
1750 : importFullTree(PrimaryID, **UpstreamNode))
1751 return std::move(E);
1752 return load(PrimaryID);
1753}
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
AMDGPU Mark last scratch load
AMDGPU Prepare AGPR Alloc
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
#define LLVM_UNLIKELY(EXPR)
Definition Compiler.h:336
This file defines the DenseMap class.
static cl::opt< int > PageSize("imp-null-check-page-size", cl::desc("The page size of the target in bytes"), cl::init(4096), cl::Hidden)
static bool lookup(const GsymReader &GR, DataExtractor &Data, uint64_t &Offset, uint64_t BaseAddr, uint64_t Addr, SourceLocations &SrcLocs, llvm::Error &Err)
A Lookup helper functions.
#define I(x, y, z)
Definition MD5.cpp:57
#define H(x, y, z)
Definition MD5.cpp:56
This file declares interface for OnDiskDataAllocator, a file backed data pool can be used to allocate...
static constexpr StringLiteral FilePrefixLeaf0
static constexpr StringLiteral DataPoolTableName
static constexpr StringLiteral FilePrefixObject
static constexpr StringLiteral FilePrefixLeaf
static constexpr StringLiteral IndexFilePrefix
static OnDiskContent getContentFromHandle(const OnDiskDataAllocator &DataPool, ObjectHandle OH)
static constexpr StringLiteral DataPoolFilePrefix
static Error createCorruptObjectError(Expected< ArrayRef< uint8_t > > ID)
static size_t getPageSize()
static Expected< MappedTempFile > createTempFile(StringRef FinalPath, uint64_t Size)
static constexpr StringLiteral IndexTableName
This declares OnDiskGraphDB, an ondisk CAS database with a fixed length hash.
This file declares interface for OnDiskTrieRawHashMap, a thread-safe and (mostly) lock-free hash map ...
#define P(N)
Provides a library for accessing information about this process and other processes on the operating ...
This file defines the make_scope_exit function, which executes user-defined cleanup logic at scope ex...
static Split data
This file contains some functions that are useful when dealing with strings.
#define LLVM_DEBUG(...)
Definition Debug.h:114
The Input class is used to parse a yaml document into in-memory structs and vectors.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition ArrayRef.h:40
size_t size() const
size - Get the array size.
Definition ArrayRef.h:142
ArrayRef< T > drop_back(size_t N=1) const
Drop the last N elements of the array.
Definition ArrayRef.h:201
bool empty() const
empty - Check if the array is empty.
Definition ArrayRef.h:137
const T * data() const
Definition ArrayRef.h:139
Lightweight error class with error context and mandatory checking.
Definition Error.h:159
static ErrorSuccess success()
Create a success value.
Definition Error.h:336
Tagged union holding either a T or a Error.
Definition Error.h:485
Error takeError()
Take ownership of the stored error.
Definition Error.h:612
static ErrorOr< std::unique_ptr< MemoryBuffer > > getFile(const Twine &Filename, bool IsText=false, bool RequiresNullTerminator=true, bool IsVolatile=false, std::optional< Align > Alignment=std::nullopt)
Open the specified file as a MemoryBuffer, returning a new MemoryBuffer if successful,...
MutableArrayRef - Represent a mutable reference to an array (0 or more elements consecutively in memo...
Definition ArrayRef.h:298
SmallString - A SmallString is just a SmallVector with methods and accessors that make it work better...
Definition SmallString.h:26
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
void reserve(size_type N)
void truncate(size_type N)
Like resize, but requires that N is less than size().
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
A wrapper around a string literal that serves as a proxy for constructing global tables of StringRefs...
Definition StringRef.h:864
StringRef - Represent a constant reference to a string, i.e.
Definition StringRef.h:55
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition Twine.h:82
FileOffset is a wrapper around uint64_t to represent the offset of data from the beginning of the fil...
Definition FileOffset.h:24
uint64_t get() const
Definition FileOffset.h:26
Handle to a loaded object in a ObjectStore instance.
LLVM_ABI_FOR_TEST Expected< ArrayRef< char > > get(FileOffset Offset, size_t Size) const
Get the data of Size stored at the given Offset.
static LLVM_ABI_FOR_TEST Expected< OnDiskDataAllocator > create(const Twine &Path, const Twine &TableName, uint64_t MaxFileSize, std::optional< uint64_t > NewFileInitialSize, uint32_t UserHeaderSize=0, function_ref< void(void *)> UserHeaderInit=nullptr)
LLVM_ABI_FOR_TEST size_t size() const
OnDiskTrieRawHashMap is a persistent trie data structure used as hash maps.
static LLVM_ABI_FOR_TEST Expected< OnDiskTrieRawHashMap > create(const Twine &Path, const Twine &TrieName, size_t NumHashBits, uint64_t DataSize, uint64_t MaxFileSize, std::optional< uint64_t > NewFileInitialSize, std::optional< size_t > NewTableNumRootBits=std::nullopt, std::optional< size_t > NewTableNumSubtrieBits=std::nullopt)
Gets or creates a file at Path with a hash-mapped trie named TrieName.
static std::optional< InternalRef4B > tryToShrink(InternalRef Ref)
Shrink to 4B reference.
Array of internal node references.
Standard 8 byte reference inside OnDiskGraphDB.
static InternalRef getFromOffset(FileOffset Offset)
Handle for a loaded node object.
static ObjectHandle fromFileOffset(FileOffset Offset)
static ObjectHandle fromMemory(uintptr_t Ptr)
Reference to a node.
On-disk CAS nodes database, independent of a particular hashing algorithm.
FaultInPolicy
How to fault-in nodes if an upstream database is used.
@ SingleNode
Copy only the requested node.
void print(raw_ostream &OS) const
LLVM_ABI_FOR_TEST Expected< std::optional< ObjectHandle > > load(ObjectID Ref)
Expected< bool > isMaterialized(ObjectID Ref)
Check whether the object associated with Ref is stored in the CAS.
Error validate(bool Deep, HashingFuncT Hasher) const
Validate the OnDiskGraphDB.
object_refs_range getObjectRefs(ObjectHandle Node) const
unsigned getHardStorageLimitUtilization() const
LLVM_ABI_FOR_TEST Error store(ObjectID ID, ArrayRef< ObjectID > Refs, ArrayRef< char > Data)
Associate data & references with a particular object ID.
ArrayRef< uint8_t > getDigest(ObjectID Ref) const
static LLVM_ABI_FOR_TEST Expected< std::unique_ptr< OnDiskGraphDB > > open(StringRef Path, StringRef HashName, unsigned HashByteSize, OnDiskGraphDB *UpstreamDB=nullptr, FaultInPolicy Policy=FaultInPolicy::FullTree)
Open the on-disk store from a directory.
LLVM_ABI_FOR_TEST size_t getStorageSize() const
bool containsObject(ObjectID Ref, bool CheckUpstream=true) const
Check whether the object associated with Ref is stored in the CAS.
LLVM_ABI_FOR_TEST Expected< ObjectID > getReference(ArrayRef< uint8_t > Hash)
Form a reference for the provided hash.
function_ref< void( ArrayRef< ArrayRef< uint8_t > >, ArrayRef< char >, SmallVectorImpl< uint8_t > &)> HashingFuncT
Hashing function type for validation.
LLVM_ABI_FOR_TEST ArrayRef< char > getObjectData(ObjectHandle Node) const
LLVM_ABI_FOR_TEST std::optional< ObjectID > getExistingReference(ArrayRef< uint8_t > Digest, bool CheckUpstream=true)
Get an existing reference to the object Digest.
An efficient, type-erasing, non-owning reference to a callable.
This class implements an extremely fast bulk output stream that can only output to a stream.
Definition raw_ostream.h:53
static unsigned getPageSizeEstimate()
Get the process's estimated page size.
Definition Process.h:62
LLVM_ABI Error keep(const Twine &Name)
static LLVM_ABI Expected< TempFile > create(const Twine &Model, unsigned Mode=all_read|all_write, OpenFlags ExtraFlags=OF_None)
This creates a temporary file with createUniqueFile and schedules it for deletion with sys::RemoveFil...
Represents the result of a call to sys::fs::status().
Definition FileSystem.h:222
This class represents a memory mapped file.
LLVM_ABI size_t size() const
Definition Path.cpp:1178
@ readonly
May only access map via const_data as read only.
@ readwrite
May access map via data and modify it. Written to path.
LLVM_ABI char * data() const
Definition Path.cpp:1183
#define UINT64_MAX
Definition DataTypes.h:77
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
constexpr char Align[]
Key for Kernel::Arg::Metadata::mAlign.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition CallingConv.h:24
constexpr StringLiteral CASFormatVersion
The version for all the ondisk database files.
Expected< std::optional< uint64_t > > getOverriddenMaxMappingSize()
Retrieves an overridden maximum mapping size for CAS files, if any, speicified by LLVM_CAS_MAX_MAPPIN...
Expected< size_t > preallocateFileTail(int FD, size_t CurrentSize, size_t NewSize)
Allocate space for the file FD on disk, if the filesystem supports it.
bool useSmallMappingSize(const Twine &Path)
Whether to use a small file mapping for ondisk databases created in Path.
uint64_t getDataSize(const FuncRecordTy *Record)
Return the coverage map data size for the function.
uint64_t read64le(const void *P)
Definition Endian.h:435
void write64le(void *P, uint64_t V)
Definition Endian.h:478
void write32le(void *P, uint32_t V)
Definition Endian.h:475
uint32_t read32le(const void *P)
Definition Endian.h:432
LLVM_ABI std::error_code closeFile(file_t &F)
Close the file object.
LLVM_ABI std::error_code rename(const Twine &from, const Twine &to)
Rename from to to.
std::error_code resize_file_before_mapping_readwrite(int FD, uint64_t Size)
Resize FD to Size before mapping mapped_file_region::readwrite.
Definition FileSystem.h:410
LLVM_ABI bool exists(const basic_file_status &status)
Does file exist?
Definition Path.cpp:1086
LLVM_ABI std::error_code createUniqueFile(const Twine &Model, int &ResultFD, SmallVectorImpl< char > &ResultPath, OpenFlags Flags=OF_None, unsigned Mode=all_read|all_write)
Create a uniquely named file.
Definition Path.cpp:874
LLVM_ABI std::error_code remove(const Twine &path, bool IgnoreNonExisting=true)
Remove path.
LLVM_ABI Expected< file_t > openNativeFileForRead(const Twine &Name, OpenFlags Flags=OF_None, SmallVectorImpl< char > *RealPath=nullptr)
Opens the file with the given name in a read-only mode, returning its open file descriptor.
LLVM_ABI std::error_code create_directories(const Twine &path, bool IgnoreExisting=true, perms Perms=owner_all|group_all)
Create all the non-existent directories in path.
Definition Path.cpp:972
LLVM_ABI file_t convertFDToNativeFile(int FD)
Converts from a Posix file descriptor number to a native file handle.
Definition FileSystem.h:991
LLVM_ABI std::error_code status(const Twine &path, file_status &result, bool follow=true)
Get file status as if by POSIX stat().
LLVM_ABI void append(SmallVectorImpl< char > &path, const Twine &a, const Twine &b="", const Twine &c="", const Twine &d="")
Append to path.
Definition Path.cpp:457
ScopedSetting scopedDisable()
Definition IOSandbox.h:36
This is an optimization pass for GlobalISel generic memory operations.
@ Offset
Definition DWP.cpp:532
Error createFileError(const Twine &F, Error E)
Concatenate a source file path and/or name with an Error.
Definition Error.h:1399
auto size(R &&Range, std::enable_if_t< std::is_base_of< std::random_access_iterator_tag, typename std::iterator_traits< decltype(Range.begin())>::iterator_category >::value, void > *=nullptr)
Get the size of a range.
Definition STLExtras.h:1667
ArrayRef< CharT > arrayRefFromStringRef(StringRef Input)
Construct a string ref from an array ref of unsigned chars.
std::error_code make_error_code(BitcodeError E)
@ Done
Definition Threading.h:60
bool isAligned(Align Lhs, uint64_t SizeInBytes)
Checks that SizeInBytes is a multiple of the alignment.
Definition Alignment.h:134
Error handleErrors(Error E, HandlerTs &&... Hs)
Pass the ErrorInfo(s) contained in E to their respective handlers.
Definition Error.h:967
FunctionAddr VTableAddr uintptr_t uintptr_t DataSize
Definition InstrProf.h:267
std::string utohexstr(uint64_t X, bool LowerCase=false, unsigned Width=0)
constexpr bool isPowerOf2_64(uint64_t Value)
Return true if the argument is a power of two > 0 (64 bit edition.)
Definition MathExtras.h:284
Error createStringError(std::error_code EC, char const *Fmt, const Ts &... Vals)
Create formatted StringError object.
Definition Error.h:1305
@ argument_out_of_domain
Definition Errc.h:37
@ illegal_byte_sequence
Definition Errc.h:52
@ invalid_argument
Definition Errc.h:56
std::optional< T > expectedToOptional(Expected< T > &&E)
Convert an Expected to an Optional without doing anything.
Definition Error.h:1094
decltype(auto) get(const PointerIntPair< PointerTy, IntBits, IntType, PtrTraits, Info > &Pair)
void sort(IteratorTy Start, IteratorTy End)
Definition STLExtras.h:1634
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition Debug.cpp:207
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
@ Ref
The access may reference the value stored in memory.
Definition ModRef.h:32
@ Other
Any other memory.
Definition ModRef.h:68
void cantFail(Error Err, const char *Msg=nullptr)
Report a fatal error if Err is a failure value.
Definition Error.h:769
FunctionAddr VTableAddr uintptr_t uintptr_t Data
Definition InstrProf.h:189
auto make_scope_exit(Callable &&F)
Definition ScopeExit.h:53
FunctionAddr VTableAddr Next
Definition InstrProf.h:141
auto count(R &&Range, const E &Element)
Wrapper function around std::count to count the number of times an element Element occurs in the give...
Definition STLExtras.h:2002
ArrayRef(const T &OneElt) -> ArrayRef< T >
std::string toString(const APInt &I, unsigned Radix, bool Signed, bool formatAsCLiteral=false, bool UpperCase=true, bool InsertSeparators=false)
OutputIt copy(R &&Range, OutputIt Out)
Definition STLExtras.h:1883
void toHex(ArrayRef< uint8_t > Input, bool LowerCase, SmallVectorImpl< char > &Output)
Convert buffer Input to its hexadecimal representation. The returned string is double the size of Inp...
OutputIt move(R &&Range, OutputIt Out)
Provide wrappers to std::move which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1915
LLVM_ABI Error errorCodeToError(std::error_code EC)
Helper for converting an std::error_code to a Error.
Definition Error.cpp:111
void consumeError(Error Err)
Consume a Error without doing anything.
Definition Error.h:1083
bool isAddrAligned(Align Lhs, const void *Addr)
Checks that Addr is a multiple of the alignment.
Definition Alignment.h:139
Implement std::hash so that hash_code can be used in STL containers.
Definition BitVector.h:870
Proxy for an on-disk index record.
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition Alignment.h:39
static constexpr Align Of()
Allow constructions of constexpr Align from types.
Definition Alignment.h:94
Const value proxy to access the records stored in TrieRawHashMap.
Value proxy to access the records stored in TrieRawHashMap.