LLVM 23.0.0git
OnDiskGraphDB.cpp
Go to the documentation of this file.
1//===----------------------------------------------------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9/// \file
10/// This file implements OnDiskGraphDB, an on-disk CAS nodes database,
11/// independent of a particular hashing algorithm. It only needs to be
12/// configured for the hash size and controls the schema of the storage.
13///
14/// OnDiskGraphDB defines:
15///
16/// - How the data is stored inside database, either as a standalone file, or
17/// allocated inside a datapool.
18/// - How references to other objects inside the same database is stored. They
19/// are stored as internal references, instead of full hash value to save
20/// space.
21/// - How to chain databases together and import objects from upstream
22/// databases.
23///
24/// Here's a top-level description of the current layout:
25///
26/// - db/index.<version>: a file for the "index" table, named by \a
27/// IndexTableName and managed by \a TrieRawHashMap. The contents are 8B
28/// that are accessed atomically, describing the object kind and where/how
29/// it's stored (including an optional file offset). See \a TrieRecord for
30/// more details.
31/// - db/data.<version>: a file for the "data" table, named by \a
32/// DataPoolTableName and managed by \a DataStore. New objects within
33/// TrieRecord::MaxEmbeddedSize are inserted here as \a
34/// TrieRecord::StorageKind::DataPool.
35/// - db/obj.<offset>.<version>: a file storing an object outside the main
36/// "data" table, named by its offset into the "index" table, with the
37/// format of \a TrieRecord::StorageKind::Standalone.
38/// - db/leaf.<offset>.<version>: a file storing a leaf node outside the
39/// main "data" table, named by its offset into the "index" table, with
40/// the format of \a TrieRecord::StorageKind::StandaloneLeaf.
41/// - db/leaf+0.<offset>.<version>: a file storing a null-terminated leaf object
42/// outside the main "data" table, named by its offset into the "index" table,
43/// with the format of \a TrieRecord::StorageKind::StandaloneLeaf0.
44//
45//===----------------------------------------------------------------------===//
46
48#include "OnDiskCommon.h"
49#include "llvm/ADT/DenseMap.h"
50#include "llvm/ADT/ScopeExit.h"
57#include "llvm/Support/Errc.h"
58#include "llvm/Support/Error.h"
63#include "llvm/Support/Path.h"
65#include <atomic>
66#include <mutex>
67#include <optional>
68
69#define DEBUG_TYPE "on-disk-cas"
70
71using namespace llvm;
72using namespace llvm::cas;
73using namespace llvm::cas::ondisk;
74
75static constexpr StringLiteral IndexTableName = "llvm.cas.index";
76static constexpr StringLiteral DataPoolTableName = "llvm.cas.data";
77
78static constexpr StringLiteral IndexFilePrefix = "index.";
79static constexpr StringLiteral DataPoolFilePrefix = "data.";
80
81static constexpr StringLiteral FilePrefixObject = "obj.";
82static constexpr StringLiteral FilePrefixLeaf = "leaf.";
83static constexpr StringLiteral FilePrefixLeaf0 = "leaf+0.";
84
86 if (!ID)
87 return ID.takeError();
88
90 "corrupt object '" + toHex(*ID) + "'");
91}
92
93namespace {
94
95/// Trie record data: 8 bytes, atomic<uint64_t>
96/// - 1-byte: StorageKind
97/// - 7-bytes: DataStoreOffset (offset into referenced file)
98class TrieRecord {
99public:
100 enum class StorageKind : uint8_t {
101 /// Unknown object.
102 Unknown = 0,
103
104 /// data.vX: main pool, full DataStore record.
105 DataPool = 1,
106
107 /// obj.<TrieRecordOffset>.vX: standalone, with a full DataStore record.
108 Standalone = 10,
109
110 /// leaf.<TrieRecordOffset>.vX: standalone, just the data. File contents
111 /// exactly the data content and file size matches the data size. No refs.
112 StandaloneLeaf = 11,
113
114 /// leaf+0.<TrieRecordOffset>.vX: standalone, just the data plus an
115 /// extra null character ('\0'). File size is 1 bigger than the data size.
116 /// No refs.
117 StandaloneLeaf0 = 12,
118 };
119
120 static StringRef getStandaloneFilePrefix(StorageKind SK) {
121 switch (SK) {
122 default:
123 llvm_unreachable("Expected standalone storage kind");
124 case TrieRecord::StorageKind::Standalone:
125 return FilePrefixObject;
126 case TrieRecord::StorageKind::StandaloneLeaf:
127 return FilePrefixLeaf;
128 case TrieRecord::StorageKind::StandaloneLeaf0:
129 return FilePrefixLeaf0;
130 }
131 }
132
133 enum Limits : int64_t {
134 /// Saves files bigger than 64KB standalone instead of embedding them.
135 MaxEmbeddedSize = 64LL * 1024LL - 1,
136 };
137
138 struct Data {
139 StorageKind SK = StorageKind::Unknown;
140 FileOffset Offset;
141 };
142
143 /// Pack StorageKind and Offset from Data into 8 byte TrieRecord.
144 static uint64_t pack(Data D) {
145 assert(D.Offset.get() < (int64_t)(1ULL << 56));
146 uint64_t Packed = uint64_t(D.SK) << 56 | D.Offset.get();
147 assert(D.SK != StorageKind::Unknown || Packed == 0);
148#ifndef NDEBUG
149 Data RoundTrip = unpack(Packed);
150 assert(D.SK == RoundTrip.SK);
151 assert(D.Offset.get() == RoundTrip.Offset.get());
152#endif
153 return Packed;
154 }
155
156 // Unpack TrieRecord into Data.
157 static Data unpack(uint64_t Packed) {
158 Data D;
159 if (!Packed)
160 return D;
161 D.SK = (StorageKind)(Packed >> 56);
162 D.Offset = FileOffset(Packed & (UINT64_MAX >> 8));
163 return D;
164 }
165
166 TrieRecord() : Storage(0) {}
167
168 Data load() const { return unpack(Storage); }
169 bool compare_exchange_strong(Data &Existing, Data New);
170
171private:
172 std::atomic<uint64_t> Storage;
173};
174
175/// DataStore record data: 4B + size? + refs? + data + 0
176/// - 4-bytes: Header
177/// - {0,4,8}-bytes: DataSize (may be packed in Header)
178/// - {0,4,8}-bytes: NumRefs (may be packed in Header)
179/// - NumRefs*{4,8}-bytes: Refs[] (end-ptr is 8-byte aligned)
180/// - <data>
181/// - 1-byte: 0-term
182struct DataRecordHandle {
183 /// NumRefs storage: 4B, 2B, 1B, or 0B (no refs). Or, 8B, for alignment
184 /// convenience to avoid computing padding later.
185 enum class NumRefsFlags : uint8_t {
186 Uses0B = 0U,
187 Uses1B = 1U,
188 Uses2B = 2U,
189 Uses4B = 3U,
190 Uses8B = 4U,
191 Max = Uses8B,
192 };
193
194 /// DataSize storage: 8B, 4B, 2B, or 1B.
195 enum class DataSizeFlags {
196 Uses1B = 0U,
197 Uses2B = 1U,
198 Uses4B = 2U,
199 Uses8B = 3U,
200 Max = Uses8B,
201 };
202
203 /// Kind of ref stored in Refs[]: InternalRef or InternalRef4B.
204 enum class RefKindFlags {
205 InternalRef = 0U,
206 InternalRef4B = 1U,
207 Max = InternalRef4B,
208 };
209
210 enum Counts : int {
211 NumRefsShift = 0,
212 NumRefsBits = 3,
213 DataSizeShift = NumRefsShift + NumRefsBits,
214 DataSizeBits = 2,
215 RefKindShift = DataSizeShift + DataSizeBits,
216 RefKindBits = 1,
217 };
218 static_assert(((UINT32_MAX << NumRefsBits) & (uint32_t)NumRefsFlags::Max) ==
219 0,
220 "Not enough bits");
221 static_assert(((UINT32_MAX << DataSizeBits) & (uint32_t)DataSizeFlags::Max) ==
222 0,
223 "Not enough bits");
224 static_assert(((UINT32_MAX << RefKindBits) & (uint32_t)RefKindFlags::Max) ==
225 0,
226 "Not enough bits");
227
228 /// Layout of the DataRecordHandle and how to decode it.
229 struct LayoutFlags {
230 NumRefsFlags NumRefs;
231 DataSizeFlags DataSize;
232 RefKindFlags RefKind;
233
234 static uint64_t pack(LayoutFlags LF) {
235 unsigned Packed = ((unsigned)LF.NumRefs << NumRefsShift) |
236 ((unsigned)LF.DataSize << DataSizeShift) |
237 ((unsigned)LF.RefKind << RefKindShift);
238#ifndef NDEBUG
239 LayoutFlags RoundTrip = unpack(Packed);
240 assert(LF.NumRefs == RoundTrip.NumRefs);
241 assert(LF.DataSize == RoundTrip.DataSize);
242 assert(LF.RefKind == RoundTrip.RefKind);
243#endif
244 return Packed;
245 }
246 static LayoutFlags unpack(uint64_t Storage) {
247 assert(Storage <= UINT8_MAX && "Expect storage to fit in a byte");
248 LayoutFlags LF;
249 LF.NumRefs =
250 (NumRefsFlags)((Storage >> NumRefsShift) & ((1U << NumRefsBits) - 1));
251 LF.DataSize = (DataSizeFlags)((Storage >> DataSizeShift) &
252 ((1U << DataSizeBits) - 1));
253 LF.RefKind =
254 (RefKindFlags)((Storage >> RefKindShift) & ((1U << RefKindBits) - 1));
255 return LF;
256 }
257 };
258
259 /// Header layout:
260 /// - 1-byte: LayoutFlags
261 /// - 1-byte: 1B size field
262 /// - {0,2}-bytes: 2B size field
263 struct Header {
264 using PackTy = uint32_t;
265 PackTy Packed;
266
267 static constexpr unsigned LayoutFlagsShift =
268 (sizeof(PackTy) - 1) * CHAR_BIT;
269 };
270
271 struct Input {
272 InternalRefArrayRef Refs;
273 ArrayRef<char> Data;
274 };
275
276 LayoutFlags getLayoutFlags() const {
277 return LayoutFlags::unpack(H->Packed >> Header::LayoutFlagsShift);
278 }
279
280 uint64_t getDataSize() const;
281 void skipDataSize(LayoutFlags LF, int64_t &RelOffset) const;
282 uint32_t getNumRefs() const;
283 void skipNumRefs(LayoutFlags LF, int64_t &RelOffset) const;
284 int64_t getRefsRelOffset() const;
285 int64_t getDataRelOffset() const;
286
287 static uint64_t getTotalSize(uint64_t DataRelOffset, uint64_t DataSize) {
288 return DataRelOffset + DataSize + 1;
289 }
290 uint64_t getTotalSize() const {
291 return getDataRelOffset() + getDataSize() + 1;
292 }
293
294 /// Describe the layout of data stored and how to decode from
295 /// DataRecordHandle.
296 struct Layout {
297 explicit Layout(const Input &I);
298
299 LayoutFlags Flags;
300 uint64_t DataSize = 0;
301 uint32_t NumRefs = 0;
302 int64_t RefsRelOffset = 0;
303 int64_t DataRelOffset = 0;
304 uint64_t getTotalSize() const {
305 return DataRecordHandle::getTotalSize(DataRelOffset, DataSize);
306 }
307 };
308
309 InternalRefArrayRef getRefs() const {
310 assert(H && "Expected valid handle");
311 auto *BeginByte = reinterpret_cast<const char *>(H) + getRefsRelOffset();
312 size_t Size = getNumRefs();
313 if (!Size)
314 return InternalRefArrayRef();
315 if (getLayoutFlags().RefKind == RefKindFlags::InternalRef4B)
316 return ArrayRef(reinterpret_cast<const InternalRef4B *>(BeginByte), Size);
317 return ArrayRef(reinterpret_cast<const InternalRef *>(BeginByte), Size);
318 }
319
320 ArrayRef<char> getData() const {
321 assert(H && "Expected valid handle");
322 return ArrayRef(reinterpret_cast<const char *>(H) + getDataRelOffset(),
323 getDataSize());
324 }
325
326 static DataRecordHandle create(function_ref<char *(size_t Size)> Alloc,
327 const Input &I);
328 static Expected<DataRecordHandle>
329 createWithError(function_ref<Expected<char *>(size_t Size)> Alloc,
330 const Input &I);
331 static DataRecordHandle construct(char *Mem, const Input &I);
332
333 static DataRecordHandle get(const char *Mem) {
334 return DataRecordHandle(
335 *reinterpret_cast<const DataRecordHandle::Header *>(Mem));
336 }
337 static Expected<DataRecordHandle>
338 getFromDataPool(const OnDiskDataAllocator &Pool, FileOffset Offset);
339
340 explicit operator bool() const { return H; }
341 const Header &getHeader() const { return *H; }
342
343 DataRecordHandle() = default;
344 explicit DataRecordHandle(const Header &H) : H(&H) {}
345
346private:
347 static DataRecordHandle constructImpl(char *Mem, const Input &I,
348 const Layout &L);
349 const Header *H = nullptr;
350};
351
352/// Proxy for any on-disk object or raw data.
353struct OnDiskContent {
354 std::optional<DataRecordHandle> Record;
355 std::optional<ArrayRef<char>> Bytes;
356};
357
358/// Data loaded inside the memory from standalone file.
359class StandaloneDataInMemory {
360public:
361 OnDiskContent getContent() const;
362
363 StandaloneDataInMemory(std::unique_ptr<sys::fs::mapped_file_region> Region,
364 TrieRecord::StorageKind SK)
365 : Region(std::move(Region)), SK(SK) {
366#ifndef NDEBUG
367 bool IsStandalone = false;
368 switch (SK) {
369 case TrieRecord::StorageKind::Standalone:
370 case TrieRecord::StorageKind::StandaloneLeaf:
371 case TrieRecord::StorageKind::StandaloneLeaf0:
372 IsStandalone = true;
373 break;
374 default:
375 break;
376 }
377 assert(IsStandalone);
378#endif
379 }
380
381private:
382 std::unique_ptr<sys::fs::mapped_file_region> Region;
383 TrieRecord::StorageKind SK;
384};
385
386/// Container to lookup loaded standalone objects.
387template <size_t NumShards> class StandaloneDataMap {
388 static_assert(isPowerOf2_64(NumShards), "Expected power of 2");
389
390public:
391 uintptr_t insert(ArrayRef<uint8_t> Hash, TrieRecord::StorageKind SK,
392 std::unique_ptr<sys::fs::mapped_file_region> Region);
393
394 const StandaloneDataInMemory *lookup(ArrayRef<uint8_t> Hash) const;
395 bool count(ArrayRef<uint8_t> Hash) const { return bool(lookup(Hash)); }
396
397private:
398 struct Shard {
399 /// Needs to store a std::unique_ptr for a stable address identity.
400 DenseMap<const uint8_t *, std::unique_ptr<StandaloneDataInMemory>> Map;
401 mutable std::mutex Mutex;
402 };
403 Shard &getShard(ArrayRef<uint8_t> Hash) {
404 return const_cast<Shard &>(
405 const_cast<const StandaloneDataMap *>(this)->getShard(Hash));
406 }
407 const Shard &getShard(ArrayRef<uint8_t> Hash) const {
408 static_assert(NumShards <= 256, "Expected only 8 bits of shard");
409 return Shards[Hash[0] % NumShards];
410 }
411
412 Shard Shards[NumShards];
413};
414
415using StandaloneDataMapTy = StandaloneDataMap<16>;
416
417/// A vector of internal node references.
418class InternalRefVector {
419public:
420 void push_back(InternalRef Ref) {
421 if (NeedsFull)
422 return FullRefs.push_back(Ref);
423 if (std::optional<InternalRef4B> Small = InternalRef4B::tryToShrink(Ref))
424 return SmallRefs.push_back(*Small);
425 NeedsFull = true;
426 assert(FullRefs.empty());
427 FullRefs.reserve(SmallRefs.size() + 1);
428 for (InternalRef4B Small : SmallRefs)
429 FullRefs.push_back(Small);
430 FullRefs.push_back(Ref);
431 SmallRefs.clear();
432 }
433
434 operator InternalRefArrayRef() const {
435 assert(SmallRefs.empty() || FullRefs.empty());
436 return NeedsFull ? InternalRefArrayRef(FullRefs)
437 : InternalRefArrayRef(SmallRefs);
438 }
439
440private:
441 bool NeedsFull = false;
444};
445
446} // namespace
447
448Expected<DataRecordHandle> DataRecordHandle::createWithError(
449 function_ref<Expected<char *>(size_t Size)> Alloc, const Input &I) {
450 Layout L(I);
451 if (Expected<char *> Mem = Alloc(L.getTotalSize()))
452 return constructImpl(*Mem, I, L);
453 else
454 return Mem.takeError();
455}
456
458 // Store the file offset as it is.
459 assert(!(Offset.get() & 0x1));
460 return ObjectHandle(Offset.get());
461}
462
464 // Store the pointer from memory with lowest bit set.
465 assert(!(Ptr & 0x1));
466 return ObjectHandle(Ptr | 1);
467}
468
469/// Proxy for an on-disk index record.
475
476template <size_t N>
477uintptr_t StandaloneDataMap<N>::insert(
478 ArrayRef<uint8_t> Hash, TrieRecord::StorageKind SK,
479 std::unique_ptr<sys::fs::mapped_file_region> Region) {
480 auto &S = getShard(Hash);
481 std::lock_guard<std::mutex> Lock(S.Mutex);
482 auto &V = S.Map[Hash.data()];
483 if (!V)
484 V = std::make_unique<StandaloneDataInMemory>(std::move(Region), SK);
485 return reinterpret_cast<uintptr_t>(V.get());
486}
487
488template <size_t N>
489const StandaloneDataInMemory *
490StandaloneDataMap<N>::lookup(ArrayRef<uint8_t> Hash) const {
491 auto &S = getShard(Hash);
492 std::lock_guard<std::mutex> Lock(S.Mutex);
493 auto I = S.Map.find(Hash.data());
494 if (I == S.Map.end())
495 return nullptr;
496 return &*I->second;
497}
498
499namespace {
500
501/// Copy of \a sys::fs::TempFile that skips RemoveOnSignal, which is too
502/// expensive to register/unregister at this rate.
503///
504/// FIXME: Add a TempFileManager that maintains a thread-safe list of open temp
505/// files and has a signal handler registerd that removes them all.
506class TempFile {
507 bool Done = false;
508 TempFile(StringRef Name, int FD, OnDiskCASLogger *Logger)
509 : TmpName(std::string(Name)), FD(FD), Logger(Logger) {}
510
511public:
512 /// This creates a temporary file with createUniqueFile.
513 static Expected<TempFile> create(const Twine &Model, OnDiskCASLogger *Logger);
514 TempFile(TempFile &&Other) { *this = std::move(Other); }
515 TempFile &operator=(TempFile &&Other) {
516 TmpName = std::move(Other.TmpName);
517 FD = Other.FD;
518 Logger = Other.Logger;
519 Other.Done = true;
520 Other.FD = -1;
521 return *this;
522 }
523
524 // Name of the temporary file.
525 std::string TmpName;
526
527 // The open file descriptor.
528 int FD = -1;
529
530 OnDiskCASLogger *Logger = nullptr;
531
532 // Keep this with the given name.
533 Error keep(const Twine &Name);
534 Error discard();
535
536 // This checks that keep or delete was called.
537 ~TempFile() { consumeError(discard()); }
538};
539
540class MappedTempFile {
541public:
542 char *data() const { return Map.data(); }
543 size_t size() const { return Map.size(); }
544
545 Error discard() {
546 assert(Map && "Map already destroyed");
547 Map.unmap();
548 return Temp.discard();
549 }
550
551 Error keep(const Twine &Name) {
552 assert(Map && "Map already destroyed");
553 Map.unmap();
554 return Temp.keep(Name);
555 }
556
557 MappedTempFile(TempFile Temp, sys::fs::mapped_file_region Map)
558 : Temp(std::move(Temp)), Map(std::move(Map)) {}
559
560private:
561 TempFile Temp;
562 sys::fs::mapped_file_region Map;
563};
564} // namespace
565
567 Done = true;
568 if (FD != -1) {
570 if (std::error_code EC = sys::fs::closeFile(File))
571 return errorCodeToError(EC);
572 }
573 FD = -1;
574
575 // Always try to close and remove.
576 std::error_code RemoveEC;
577 if (!TmpName.empty()) {
578 std::error_code EC = sys::fs::remove(TmpName);
579 if (Logger)
580 Logger->logTempFileRemove(TmpName, EC);
581 if (EC)
582 return errorCodeToError(EC);
583 }
584 TmpName = "";
585
586 return Error::success();
587}
588
590 assert(!Done);
591 Done = true;
592 // Always try to close and rename.
593 std::error_code RenameEC = sys::fs::rename(TmpName, Name);
594
595 if (Logger)
596 Logger->logTempFileKeep(TmpName, Name.str(), RenameEC);
597
598 if (!RenameEC)
599 TmpName = "";
600
602 if (std::error_code EC = sys::fs::closeFile(File))
603 return errorCodeToError(EC);
604 FD = -1;
605
606 return errorCodeToError(RenameEC);
607}
608
611 int FD;
612 SmallString<128> ResultPath;
613 if (std::error_code EC = sys::fs::createUniqueFile(Model, FD, ResultPath))
614 return errorCodeToError(EC);
615
616 if (Logger)
617 Logger->logTempFileCreate(ResultPath);
618
619 TempFile Ret(ResultPath, FD, Logger);
620 return std::move(Ret);
621}
622
623bool TrieRecord::compare_exchange_strong(Data &Existing, Data New) {
624 uint64_t ExistingPacked = pack(Existing);
625 uint64_t NewPacked = pack(New);
626 if (Storage.compare_exchange_strong(ExistingPacked, NewPacked))
627 return true;
628 Existing = unpack(ExistingPacked);
629 return false;
630}
631
633DataRecordHandle::getFromDataPool(const OnDiskDataAllocator &Pool,
635 auto HeaderData = Pool.get(Offset, sizeof(DataRecordHandle::Header));
636 if (!HeaderData)
637 return HeaderData.takeError();
638
639 auto Record = DataRecordHandle::get(HeaderData->data());
640 if (Record.getTotalSize() + Offset.get() > Pool.size())
641 return createStringError(
642 make_error_code(std::errc::illegal_byte_sequence),
643 "data record span passed the end of the data pool");
644
645 return Record;
646}
647
648DataRecordHandle DataRecordHandle::constructImpl(char *Mem, const Input &I,
649 const Layout &L) {
650 char *Next = Mem + sizeof(Header);
651
652 // Fill in Packed and set other data, then come back to construct the header.
653 Header::PackTy Packed = 0;
654 Packed |= LayoutFlags::pack(L.Flags) << Header::LayoutFlagsShift;
655
656 // Construct DataSize.
657 switch (L.Flags.DataSize) {
658 case DataSizeFlags::Uses1B:
659 assert(I.Data.size() <= UINT8_MAX);
660 Packed |= (Header::PackTy)I.Data.size()
661 << ((sizeof(Packed) - 2) * CHAR_BIT);
662 break;
663 case DataSizeFlags::Uses2B:
664 assert(I.Data.size() <= UINT16_MAX);
665 Packed |= (Header::PackTy)I.Data.size()
666 << ((sizeof(Packed) - 4) * CHAR_BIT);
667 break;
668 case DataSizeFlags::Uses4B:
669 support::endian::write32le(Next, I.Data.size());
670 Next += 4;
671 break;
672 case DataSizeFlags::Uses8B:
673 support::endian::write64le(Next, I.Data.size());
674 Next += 8;
675 break;
676 }
677
678 // Construct NumRefs.
679 //
680 // NOTE: May be writing NumRefs even if there are zero refs in order to fix
681 // alignment.
682 switch (L.Flags.NumRefs) {
683 case NumRefsFlags::Uses0B:
684 break;
685 case NumRefsFlags::Uses1B:
686 assert(I.Refs.size() <= UINT8_MAX);
687 Packed |= (Header::PackTy)I.Refs.size()
688 << ((sizeof(Packed) - 2) * CHAR_BIT);
689 break;
690 case NumRefsFlags::Uses2B:
691 assert(I.Refs.size() <= UINT16_MAX);
692 Packed |= (Header::PackTy)I.Refs.size()
693 << ((sizeof(Packed) - 4) * CHAR_BIT);
694 break;
695 case NumRefsFlags::Uses4B:
696 support::endian::write32le(Next, I.Refs.size());
697 Next += 4;
698 break;
699 case NumRefsFlags::Uses8B:
700 support::endian::write64le(Next, I.Refs.size());
701 Next += 8;
702 break;
703 }
704
705 // Construct Refs[].
706 if (!I.Refs.empty()) {
707 assert((L.Flags.RefKind == RefKindFlags::InternalRef4B) == I.Refs.is4B());
708 ArrayRef<uint8_t> RefsBuffer = I.Refs.getBuffer();
709 llvm::copy(RefsBuffer, Next);
710 Next += RefsBuffer.size();
711 }
712
713 // Construct Data and the trailing null.
715 llvm::copy(I.Data, Next);
716 Next[I.Data.size()] = 0;
717
718 // Construct the header itself and return.
719 Header *H = new (Mem) Header{Packed};
720 DataRecordHandle Record(*H);
721 assert(Record.getData() == I.Data);
722 assert(Record.getNumRefs() == I.Refs.size());
723 assert(Record.getRefs() == I.Refs);
724 assert(Record.getLayoutFlags().DataSize == L.Flags.DataSize);
725 assert(Record.getLayoutFlags().NumRefs == L.Flags.NumRefs);
726 assert(Record.getLayoutFlags().RefKind == L.Flags.RefKind);
727 return Record;
728}
729
730DataRecordHandle::Layout::Layout(const Input &I) {
731 // Start initial relative offsets right after the Header.
732 uint64_t RelOffset = sizeof(Header);
733
734 // Initialize the easy stuff.
735 DataSize = I.Data.size();
736 NumRefs = I.Refs.size();
737
738 // Check refs size.
739 Flags.RefKind =
740 I.Refs.is4B() ? RefKindFlags::InternalRef4B : RefKindFlags::InternalRef;
741
742 // Find the smallest slot available for DataSize.
743 bool Has1B = true;
744 bool Has2B = true;
745 if (DataSize <= UINT8_MAX && Has1B) {
746 Flags.DataSize = DataSizeFlags::Uses1B;
747 Has1B = false;
748 } else if (DataSize <= UINT16_MAX && Has2B) {
749 Flags.DataSize = DataSizeFlags::Uses2B;
750 Has2B = false;
751 } else if (DataSize <= UINT32_MAX) {
752 Flags.DataSize = DataSizeFlags::Uses4B;
753 RelOffset += 4;
754 } else {
755 Flags.DataSize = DataSizeFlags::Uses8B;
756 RelOffset += 8;
757 }
758
759 // Find the smallest slot available for NumRefs. Never sets NumRefs8B here.
760 if (!NumRefs) {
761 Flags.NumRefs = NumRefsFlags::Uses0B;
762 } else if (NumRefs <= UINT8_MAX && Has1B) {
763 Flags.NumRefs = NumRefsFlags::Uses1B;
764 Has1B = false;
765 } else if (NumRefs <= UINT16_MAX && Has2B) {
766 Flags.NumRefs = NumRefsFlags::Uses2B;
767 Has2B = false;
768 } else {
769 Flags.NumRefs = NumRefsFlags::Uses4B;
770 RelOffset += 4;
771 }
772
773 // Helper to "upgrade" either DataSize or NumRefs by 4B to avoid complicated
774 // padding rules when reading and writing. This also bumps RelOffset.
775 //
776 // The value for NumRefs is strictly limited to UINT32_MAX, but it can be
777 // stored as 8B. This means we can *always* find a size to grow.
778 //
779 // NOTE: Only call this once.
780 auto GrowSizeFieldsBy4B = [&]() {
781 assert(isAligned(Align(4), RelOffset));
782 RelOffset += 4;
783
784 assert(Flags.NumRefs != NumRefsFlags::Uses8B &&
785 "Expected to be able to grow NumRefs8B");
786
787 // First try to grow DataSize. NumRefs will not (yet) be 8B, and if
788 // DataSize is upgraded to 8B it'll already be aligned.
789 //
790 // Failing that, grow NumRefs.
791 if (Flags.DataSize < DataSizeFlags::Uses4B)
792 Flags.DataSize = DataSizeFlags::Uses4B; // DataSize: Packed => 4B.
793 else if (Flags.DataSize < DataSizeFlags::Uses8B)
794 Flags.DataSize = DataSizeFlags::Uses8B; // DataSize: 4B => 8B.
795 else if (Flags.NumRefs < NumRefsFlags::Uses4B)
796 Flags.NumRefs = NumRefsFlags::Uses4B; // NumRefs: Packed => 4B.
797 else
798 Flags.NumRefs = NumRefsFlags::Uses8B; // NumRefs: 4B => 8B.
799 };
800
801 assert(isAligned(Align(4), RelOffset));
802 if (Flags.RefKind == RefKindFlags::InternalRef) {
803 // List of 8B refs should be 8B-aligned. Grow one of the sizes to get this
804 // without padding.
805 if (!isAligned(Align(8), RelOffset))
806 GrowSizeFieldsBy4B();
807
808 assert(isAligned(Align(8), RelOffset));
809 RefsRelOffset = RelOffset;
810 RelOffset += 8 * NumRefs;
811 } else {
812 // The array of 4B refs doesn't need 8B alignment, but the data will need
813 // to be 8B-aligned. Detect this now, and, if necessary, shift everything
814 // by 4B by growing one of the sizes.
815 // If we remove the need for 8B-alignment for data there is <1% savings in
816 // disk storage for a clang build using MCCAS but the 8B-alignment may be
817 // useful in the future so keep it for now.
818 uint64_t RefListSize = 4 * NumRefs;
819 if (!isAligned(Align(8), RelOffset + RefListSize))
820 GrowSizeFieldsBy4B();
821 RefsRelOffset = RelOffset;
822 RelOffset += RefListSize;
823 }
824
825 assert(isAligned(Align(8), RelOffset));
826 DataRelOffset = RelOffset;
827}
828
829uint64_t DataRecordHandle::getDataSize() const {
830 int64_t RelOffset = sizeof(Header);
831 auto *DataSizePtr = reinterpret_cast<const char *>(H) + RelOffset;
832 switch (getLayoutFlags().DataSize) {
833 case DataSizeFlags::Uses1B:
834 return (H->Packed >> ((sizeof(Header::PackTy) - 2) * CHAR_BIT)) & UINT8_MAX;
835 case DataSizeFlags::Uses2B:
836 return (H->Packed >> ((sizeof(Header::PackTy) - 4) * CHAR_BIT)) &
837 UINT16_MAX;
838 case DataSizeFlags::Uses4B:
839 return support::endian::read32le(DataSizePtr);
840 case DataSizeFlags::Uses8B:
841 return support::endian::read64le(DataSizePtr);
842 }
843 llvm_unreachable("Unknown DataSizeFlags enum");
844}
845
846void DataRecordHandle::skipDataSize(LayoutFlags LF, int64_t &RelOffset) const {
847 if (LF.DataSize >= DataSizeFlags::Uses4B)
848 RelOffset += 4;
849 if (LF.DataSize >= DataSizeFlags::Uses8B)
850 RelOffset += 4;
851}
852
853uint32_t DataRecordHandle::getNumRefs() const {
854 LayoutFlags LF = getLayoutFlags();
855 int64_t RelOffset = sizeof(Header);
856 skipDataSize(LF, RelOffset);
857 auto *NumRefsPtr = reinterpret_cast<const char *>(H) + RelOffset;
858 switch (LF.NumRefs) {
859 case NumRefsFlags::Uses0B:
860 return 0;
861 case NumRefsFlags::Uses1B:
862 return (H->Packed >> ((sizeof(Header::PackTy) - 2) * CHAR_BIT)) & UINT8_MAX;
863 case NumRefsFlags::Uses2B:
864 return (H->Packed >> ((sizeof(Header::PackTy) - 4) * CHAR_BIT)) &
865 UINT16_MAX;
866 case NumRefsFlags::Uses4B:
867 return support::endian::read32le(NumRefsPtr);
868 case NumRefsFlags::Uses8B:
869 return support::endian::read64le(NumRefsPtr);
870 }
871 llvm_unreachable("Unknown NumRefsFlags enum");
872}
873
874void DataRecordHandle::skipNumRefs(LayoutFlags LF, int64_t &RelOffset) const {
875 if (LF.NumRefs >= NumRefsFlags::Uses4B)
876 RelOffset += 4;
877 if (LF.NumRefs >= NumRefsFlags::Uses8B)
878 RelOffset += 4;
879}
880
881int64_t DataRecordHandle::getRefsRelOffset() const {
882 LayoutFlags LF = getLayoutFlags();
883 int64_t RelOffset = sizeof(Header);
884 skipDataSize(LF, RelOffset);
885 skipNumRefs(LF, RelOffset);
886 return RelOffset;
887}
888
889int64_t DataRecordHandle::getDataRelOffset() const {
890 LayoutFlags LF = getLayoutFlags();
891 int64_t RelOffset = sizeof(Header);
892 skipDataSize(LF, RelOffset);
893 skipNumRefs(LF, RelOffset);
894 uint32_t RefSize = LF.RefKind == RefKindFlags::InternalRef4B ? 4 : 8;
895 RelOffset += RefSize * getNumRefs();
896 return RelOffset;
897}
898
900 if (UpstreamDB) {
901 if (auto E = UpstreamDB->validate(Deep, Hasher))
902 return E;
903 }
904 return Index.validate([&](FileOffset Offset,
906 -> Error {
907 auto formatError = [&](Twine Msg) {
908 return createStringError(
910 "bad record at 0x" +
911 utohexstr((unsigned)Offset.get(), /*LowerCase=*/true) + ": " +
912 Msg.str());
913 };
914
915 if (Record.Data.size() != sizeof(TrieRecord))
916 return formatError("wrong data record size");
917 if (!isAligned(Align::Of<TrieRecord>(), Record.Data.size()))
918 return formatError("wrong data record alignment");
919
920 auto *R = reinterpret_cast<const TrieRecord *>(Record.Data.data());
921 TrieRecord::Data D = R->load();
922 std::unique_ptr<MemoryBuffer> FileBuffer;
923 if ((uint8_t)D.SK != (uint8_t)TrieRecord::StorageKind::Unknown &&
924 (uint8_t)D.SK != (uint8_t)TrieRecord::StorageKind::DataPool &&
925 (uint8_t)D.SK != (uint8_t)TrieRecord::StorageKind::Standalone &&
926 (uint8_t)D.SK != (uint8_t)TrieRecord::StorageKind::StandaloneLeaf &&
927 (uint8_t)D.SK != (uint8_t)TrieRecord::StorageKind::StandaloneLeaf0)
928 return formatError("invalid record kind value");
929
931 auto I = getIndexProxyFromRef(Ref);
932 if (!I)
933 return I.takeError();
934
935 switch (D.SK) {
936 case TrieRecord::StorageKind::Unknown:
937 // This could be an abandoned entry due to a termination before updating
938 // the record. It can be reused by later insertion so just skip this entry
939 // for now.
940 return Error::success();
941 case TrieRecord::StorageKind::DataPool:
942 // Check offset is a postive value, and large enough to hold the
943 // header for the data record.
944 if (D.Offset.get() <= 0 ||
945 D.Offset.get() + sizeof(DataRecordHandle::Header) >= DataPool.size())
946 return formatError("datapool record out of bound");
947 break;
948 case TrieRecord::StorageKind::Standalone:
949 case TrieRecord::StorageKind::StandaloneLeaf:
950 case TrieRecord::StorageKind::StandaloneLeaf0:
951 SmallString<256> Path;
952 getStandalonePath(TrieRecord::getStandaloneFilePrefix(D.SK), *I, Path);
953 // If need to validate the content of the file later, just load the
954 // buffer here. Otherwise, just check the existance of the file.
955 if (Deep) {
956 auto File = MemoryBuffer::getFile(Path, /*IsText=*/false,
957 /*RequiresNullTerminator=*/false);
958 if (!File || !*File)
959 return formatError("record file \'" + Path + "\' does not exist");
960
961 FileBuffer = std::move(*File);
962 } else if (!llvm::sys::fs::exists(Path))
963 return formatError("record file \'" + Path + "\' does not exist");
964 }
965
966 if (!Deep)
967 return Error::success();
968
969 auto dataError = [&](Twine Msg) {
971 "bad data for digest \'" + toHex(I->Hash) +
972 "\': " + Msg.str());
973 };
975 ArrayRef<char> StoredData;
976
977 switch (D.SK) {
978 case TrieRecord::StorageKind::Unknown:
979 llvm_unreachable("already handled");
980 case TrieRecord::StorageKind::DataPool: {
981 auto DataRecord = DataRecordHandle::getFromDataPool(DataPool, D.Offset);
982 if (!DataRecord)
983 return dataError(toString(DataRecord.takeError()));
984
985 for (auto InternRef : DataRecord->getRefs()) {
986 auto Index = getIndexProxyFromRef(InternRef);
987 if (!Index)
988 return Index.takeError();
989 Refs.push_back(Index->Hash);
990 }
991 StoredData = DataRecord->getData();
992 break;
993 }
994 case TrieRecord::StorageKind::Standalone: {
995 if (FileBuffer->getBufferSize() < sizeof(DataRecordHandle::Header))
996 return dataError("data record is not big enough to read the header");
997 auto DataRecord = DataRecordHandle::get(FileBuffer->getBufferStart());
998 if (DataRecord.getTotalSize() < FileBuffer->getBufferSize())
999 return dataError(
1000 "data record span passed the end of the standalone file");
1001 for (auto InternRef : DataRecord.getRefs()) {
1002 auto Index = getIndexProxyFromRef(InternRef);
1003 if (!Index)
1004 return Index.takeError();
1005 Refs.push_back(Index->Hash);
1006 }
1007 StoredData = DataRecord.getData();
1008 break;
1009 }
1010 case TrieRecord::StorageKind::StandaloneLeaf:
1011 case TrieRecord::StorageKind::StandaloneLeaf0: {
1012 StoredData = arrayRefFromStringRef<char>(FileBuffer->getBuffer());
1013 if (D.SK == TrieRecord::StorageKind::StandaloneLeaf0) {
1014 if (!FileBuffer->getBuffer().ends_with('\0'))
1015 return dataError("standalone file is not zero terminated");
1016 StoredData = StoredData.drop_back(1);
1017 }
1018 break;
1019 }
1020 }
1021
1022 SmallVector<uint8_t> ComputedHash;
1023 Hasher(Refs, StoredData, ComputedHash);
1024 if (I->Hash != ArrayRef(ComputedHash))
1025 return dataError("hash mismatch, got \'" + toHex(ComputedHash) +
1026 "\' instead");
1027
1028 return Error::success();
1029 });
1030}
1031
1033 auto formatError = [&](Twine Msg) {
1034 return createStringError(
1036 "bad ref=0x" +
1037 utohexstr(ExternalRef.getOpaqueData(), /*LowerCase=*/true) + ": " +
1038 Msg.str());
1039 };
1040
1041 if (ExternalRef.getOpaqueData() == 0)
1042 return formatError("zero is not a valid ref");
1043
1044 InternalRef InternalRef = getInternalRef(ExternalRef);
1045 auto I = getIndexProxyFromRef(InternalRef);
1046 if (!I)
1047 return formatError(llvm::toString(I.takeError()));
1048 auto Hash = getDigest(*I);
1049
1050 OnDiskTrieRawHashMap::ConstOnDiskPtr P = Index.find(Hash);
1051 if (!P)
1052 return formatError("not found using hash " + toHex(Hash));
1053 IndexProxy OtherI = getIndexProxyFromPointer(P);
1054 ObjectID OtherRef = getExternalReference(makeInternalRef(OtherI.Offset));
1055 if (OtherRef != ExternalRef)
1056 return formatError("ref does not match indexed offset " +
1057 utohexstr(OtherRef.getOpaqueData(), /*LowerCase=*/true) +
1058 " for hash " + toHex(Hash));
1059 return Error::success();
1060}
1061
1063 OS << "on-disk-root-path: " << RootPath << "\n";
1064
1065 struct PoolInfo {
1067 };
1069
1070 OS << "\n";
1071 OS << "index:\n";
1072 Index.print(OS, [&](ArrayRef<char> Data) {
1073 assert(Data.size() == sizeof(TrieRecord));
1075 auto *R = reinterpret_cast<const TrieRecord *>(Data.data());
1076 TrieRecord::Data D = R->load();
1077 OS << " SK=";
1078 switch (D.SK) {
1079 case TrieRecord::StorageKind::Unknown:
1080 OS << "unknown ";
1081 break;
1082 case TrieRecord::StorageKind::DataPool:
1083 OS << "datapool ";
1084 Pool.push_back({D.Offset.get()});
1085 break;
1086 case TrieRecord::StorageKind::Standalone:
1087 OS << "standalone-data ";
1088 break;
1089 case TrieRecord::StorageKind::StandaloneLeaf:
1090 OS << "standalone-leaf ";
1091 break;
1092 case TrieRecord::StorageKind::StandaloneLeaf0:
1093 OS << "standalone-leaf+0";
1094 break;
1095 }
1096 OS << " Offset=" << (void *)D.Offset.get();
1097 });
1098 if (Pool.empty())
1099 return;
1100
1101 OS << "\n";
1102 OS << "pool:\n";
1103 llvm::sort(
1104 Pool, [](PoolInfo LHS, PoolInfo RHS) { return LHS.Offset < RHS.Offset; });
1105 for (PoolInfo PI : Pool) {
1106 OS << "- addr=" << (void *)PI.Offset << " ";
1107 auto D = DataRecordHandle::getFromDataPool(DataPool, FileOffset(PI.Offset));
1108 if (!D) {
1109 OS << "error: " << toString(D.takeError());
1110 return;
1111 }
1112
1113 OS << "record refs=" << D->getNumRefs() << " data=" << D->getDataSize()
1114 << " size=" << D->getTotalSize()
1115 << " end=" << (void *)(PI.Offset + D->getTotalSize()) << "\n";
1116 }
1117}
1118
1120OnDiskGraphDB::indexHash(ArrayRef<uint8_t> Hash) {
1121 auto P = Index.insertLazy(
1122 Hash, [](FileOffset TentativeOffset,
1123 OnDiskTrieRawHashMap::ValueProxy TentativeValue) {
1124 assert(TentativeValue.Data.size() == sizeof(TrieRecord));
1125 assert(
1126 isAddrAligned(Align::Of<TrieRecord>(), TentativeValue.Data.data()));
1127 new (TentativeValue.Data.data()) TrieRecord();
1128 });
1129 if (LLVM_UNLIKELY(!P))
1130 return P.takeError();
1131
1132 assert(*P && "Expected insertion");
1133 return getIndexProxyFromPointer(*P);
1134}
1135
1136OnDiskGraphDB::IndexProxy OnDiskGraphDB::getIndexProxyFromPointer(
1138 assert(P);
1139 assert(P.getOffset());
1140 return IndexProxy{P.getOffset(), P->Hash,
1141 *const_cast<TrieRecord *>(
1142 reinterpret_cast<const TrieRecord *>(P->Data.data()))};
1143}
1144
1146 auto I = indexHash(Hash);
1147 if (LLVM_UNLIKELY(!I))
1148 return I.takeError();
1149 return getExternalReference(*I);
1150}
1151
1152ObjectID OnDiskGraphDB::getExternalReference(const IndexProxy &I) {
1153 return getExternalReference(makeInternalRef(I.Offset));
1154}
1155
1156std::optional<ObjectID>
1158 bool CheckUpstream) {
1159 auto tryUpstream =
1160 [&](std::optional<IndexProxy> I) -> std::optional<ObjectID> {
1161 if (!CheckUpstream || !UpstreamDB)
1162 return std::nullopt;
1163 std::optional<ObjectID> UpstreamID =
1164 UpstreamDB->getExistingReference(Digest);
1165 if (LLVM_UNLIKELY(!UpstreamID))
1166 return std::nullopt;
1167 auto Ref = expectedToOptional(indexHash(Digest));
1168 if (!Ref)
1169 return std::nullopt;
1170 if (!I)
1171 I.emplace(*Ref);
1172 return getExternalReference(*I);
1173 };
1174
1175 OnDiskTrieRawHashMap::ConstOnDiskPtr P = Index.find(Digest);
1176 if (!P)
1177 return tryUpstream(std::nullopt);
1178 IndexProxy I = getIndexProxyFromPointer(P);
1179 TrieRecord::Data Obj = I.Ref.load();
1180 if (Obj.SK == TrieRecord::StorageKind::Unknown)
1181 return tryUpstream(I);
1182 return getExternalReference(makeInternalRef(I.Offset));
1183}
1184
1186OnDiskGraphDB::getIndexProxyFromRef(InternalRef Ref) const {
1187 auto P = Index.recoverFromFileOffset(Ref.getFileOffset());
1188 if (LLVM_UNLIKELY(!P))
1189 return P.takeError();
1190 return getIndexProxyFromPointer(*P);
1191}
1192
1194 auto I = getIndexProxyFromRef(Ref);
1195 if (!I)
1196 return I.takeError();
1197 return I->Hash;
1198}
1199
1200ArrayRef<uint8_t> OnDiskGraphDB::getDigest(const IndexProxy &I) const {
1201 return I.Hash;
1202}
1203
1204static OnDiskContent getContentFromHandle(const OnDiskDataAllocator &DataPool,
1205 ObjectHandle OH) {
1206 // Decode ObjectHandle to locate the stored content.
1207 uint64_t Data = OH.getOpaqueData();
1208 if (Data & 1) {
1209 const auto *SDIM =
1210 reinterpret_cast<const StandaloneDataInMemory *>(Data & (-1ULL << 1));
1211 return SDIM->getContent();
1212 }
1213
1214 auto DataHandle =
1215 cantFail(DataRecordHandle::getFromDataPool(DataPool, FileOffset(Data)));
1216 assert(DataHandle.getData().end()[0] == 0 && "Null termination");
1217 return OnDiskContent{DataHandle, std::nullopt};
1218}
1219
1221 OnDiskContent Content = getContentFromHandle(DataPool, Node);
1222 if (Content.Bytes)
1223 return *Content.Bytes;
1224 assert(Content.Record && "Expected record or bytes");
1225 return Content.Record->getData();
1226}
1227
1228InternalRefArrayRef OnDiskGraphDB::getInternalRefs(ObjectHandle Node) const {
1229 if (std::optional<DataRecordHandle> Record =
1230 getContentFromHandle(DataPool, Node).Record)
1231 return Record->getRefs();
1232 return std::nullopt;
1233}
1234
1237 InternalRef Ref = getInternalRef(ExternalRef);
1238 auto I = getIndexProxyFromRef(Ref);
1239 if (!I)
1240 return I.takeError();
1241 TrieRecord::Data Object = I->Ref.load();
1242
1243 if (Object.SK == TrieRecord::StorageKind::Unknown)
1244 return faultInFromUpstream(ExternalRef);
1245
1246 if (Object.SK == TrieRecord::StorageKind::DataPool)
1247 return ObjectHandle::fromFileOffset(Object.Offset);
1248
1249 // Only TrieRecord::StorageKind::Standalone (and variants) need to be
1250 // explicitly loaded.
1251 //
1252 // There's corruption if standalone objects have offsets, or if we get here
1253 // for something that isn't standalone.
1254 if (Object.Offset)
1256 switch (Object.SK) {
1257 case TrieRecord::StorageKind::Unknown:
1258 case TrieRecord::StorageKind::DataPool:
1259 llvm_unreachable("unexpected storage kind");
1260 case TrieRecord::StorageKind::Standalone:
1261 case TrieRecord::StorageKind::StandaloneLeaf0:
1262 case TrieRecord::StorageKind::StandaloneLeaf:
1263 break;
1264 }
1265
1266 // Load it from disk.
1267 //
1268 // Note: Creation logic guarantees that data that needs null-termination is
1269 // suitably 0-padded. Requiring null-termination here would be too expensive
1270 // for extremely large objects that happen to be page-aligned.
1271 SmallString<256> Path;
1272 getStandalonePath(TrieRecord::getStandaloneFilePrefix(Object.SK), *I, Path);
1273
1274 auto BypassSandbox = sys::sandbox::scopedDisable();
1275
1276 auto File = sys::fs::openNativeFileForRead(Path);
1277 if (!File)
1278 return createFileError(Path, File.takeError());
1279
1280 llvm::scope_exit CloseFile([&]() { sys::fs::closeFile(*File); });
1281
1283 if (std::error_code EC = sys::fs::status(*File, Status))
1285
1286 std::error_code EC;
1287 auto Region = std::make_unique<sys::fs::mapped_file_region>(
1288 *File, sys::fs::mapped_file_region::readonly, Status.getSize(), 0, EC);
1289 if (EC)
1291
1293 static_cast<StandaloneDataMapTy *>(StandaloneData)
1294 ->insert(I->Hash, Object.SK, std::move(Region)));
1295}
1296
1298 auto Presence = getObjectPresence(Ref, /*CheckUpstream=*/true);
1299 if (!Presence)
1300 return Presence.takeError();
1301
1302 switch (*Presence) {
1303 case ObjectPresence::Missing:
1304 return false;
1305 case ObjectPresence::InPrimaryDB:
1306 return true;
1307 case ObjectPresence::OnlyInUpstreamDB:
1308 if (auto FaultInResult = faultInFromUpstream(Ref); !FaultInResult)
1309 return FaultInResult.takeError();
1310 return true;
1311 }
1312 llvm_unreachable("Unknown ObjectPresence enum");
1313}
1314
1316OnDiskGraphDB::getObjectPresence(ObjectID ExternalRef,
1317 bool CheckUpstream) const {
1318 InternalRef Ref = getInternalRef(ExternalRef);
1319 auto I = getIndexProxyFromRef(Ref);
1320 if (!I)
1321 return I.takeError();
1322
1323 TrieRecord::Data Object = I->Ref.load();
1324 if (Object.SK != TrieRecord::StorageKind::Unknown)
1325 return ObjectPresence::InPrimaryDB;
1326
1327 if (!CheckUpstream || !UpstreamDB)
1328 return ObjectPresence::Missing;
1329
1330 std::optional<ObjectID> UpstreamID =
1331 UpstreamDB->getExistingReference(getDigest(*I));
1332 return UpstreamID.has_value() ? ObjectPresence::OnlyInUpstreamDB
1333 : ObjectPresence::Missing;
1334}
1335
1336InternalRef OnDiskGraphDB::makeInternalRef(FileOffset IndexOffset) {
1337 return InternalRef::getFromOffset(IndexOffset);
1338}
1339
1340void OnDiskGraphDB::getStandalonePath(StringRef Prefix, const IndexProxy &I,
1341 SmallVectorImpl<char> &Path) const {
1342 Path.assign(RootPath.begin(), RootPath.end());
1343 sys::path::append(Path,
1344 Prefix + Twine(I.Offset.get()) + "." + CASFormatVersion);
1345}
1346
1347OnDiskContent StandaloneDataInMemory::getContent() const {
1348 bool Leaf0 = false;
1349 bool Leaf = false;
1350 switch (SK) {
1351 default:
1352 llvm_unreachable("Storage kind must be standalone");
1353 case TrieRecord::StorageKind::Standalone:
1354 break;
1355 case TrieRecord::StorageKind::StandaloneLeaf0:
1356 Leaf = Leaf0 = true;
1357 break;
1358 case TrieRecord::StorageKind::StandaloneLeaf:
1359 Leaf = true;
1360 break;
1361 }
1362
1363 if (Leaf) {
1364 StringRef Data(Region->data(), Region->size());
1365 assert(Data.drop_back(Leaf0).end()[0] == 0 &&
1366 "Standalone node data missing null termination");
1367 return OnDiskContent{std::nullopt,
1368 arrayRefFromStringRef<char>(Data.drop_back(Leaf0))};
1369 }
1370
1371 DataRecordHandle Record = DataRecordHandle::get(Region->data());
1372 assert(Record.getData().end()[0] == 0 &&
1373 "Standalone object record missing null termination for data");
1374 return OnDiskContent{Record, std::nullopt};
1375}
1376
1377static Expected<MappedTempFile>
1379 auto BypassSandbox = sys::sandbox::scopedDisable();
1380
1381 assert(Size && "Unexpected request for an empty temp file");
1382 Expected<TempFile> File = TempFile::create(FinalPath + ".%%%%%%", Logger);
1383 if (!File)
1384 return File.takeError();
1385
1386 if (Error E = preallocateFileTail(File->FD, 0, Size).takeError())
1387 return createFileError(File->TmpName, std::move(E));
1388
1389 if (auto EC = sys::fs::resize_file_before_mapping_readwrite(File->FD, Size))
1390 return createFileError(File->TmpName, EC);
1391
1392 std::error_code EC;
1395 0, EC);
1396 if (EC)
1397 return createFileError(File->TmpName, EC);
1398 return MappedTempFile(std::move(*File), std::move(Map));
1399}
1400
1401static size_t getPageSize() {
1403 return PageSize;
1404}
1405
1406Error OnDiskGraphDB::createStandaloneLeaf(IndexProxy &I, ArrayRef<char> Data) {
1407 assert(Data.size() > TrieRecord::MaxEmbeddedSize &&
1408 "Expected a bigger file for external content...");
1409
1410 bool Leaf0 = isAligned(Align(getPageSize()), Data.size());
1411 TrieRecord::StorageKind SK = Leaf0 ? TrieRecord::StorageKind::StandaloneLeaf0
1412 : TrieRecord::StorageKind::StandaloneLeaf;
1413
1414 SmallString<256> Path;
1415 int64_t FileSize = Data.size() + Leaf0;
1416 getStandalonePath(TrieRecord::getStandaloneFilePrefix(SK), I, Path);
1417
1418 auto BypassSandbox = sys::sandbox::scopedDisable();
1419
1420 // Write the file. Don't reuse this mapped_file_region, which is read/write.
1421 // Let load() pull up one that's read-only.
1422 Expected<MappedTempFile> File = createTempFile(Path, FileSize, Logger.get());
1423 if (!File)
1424 return File.takeError();
1425 assert(File->size() == (uint64_t)FileSize);
1426 llvm::copy(Data, File->data());
1427 if (Leaf0)
1428 File->data()[Data.size()] = 0;
1429 assert(File->data()[Data.size()] == 0);
1430 if (Error E = File->keep(Path))
1431 return E;
1432
1433 // Store the object reference.
1434 TrieRecord::Data Existing;
1435 {
1436 TrieRecord::Data Leaf{SK, FileOffset()};
1437 if (I.Ref.compare_exchange_strong(Existing, Leaf)) {
1438 recordStandaloneSizeIncrease(FileSize);
1439 return Error::success();
1440 }
1441 }
1442
1443 // If there was a race, confirm that the new value has valid storage.
1444 if (Existing.SK == TrieRecord::StorageKind::Unknown)
1445 return createCorruptObjectError(getDigest(I));
1446
1447 return Error::success();
1448}
1449
1452 auto I = getIndexProxyFromRef(getInternalRef(ID));
1453 if (LLVM_UNLIKELY(!I))
1454 return I.takeError();
1455
1456 // Early return in case the node exists.
1457 {
1458 TrieRecord::Data Existing = I->Ref.load();
1459 if (Existing.SK != TrieRecord::StorageKind::Unknown)
1460 return Error::success();
1461 }
1462
1463 // Big leaf nodes.
1464 if (Refs.empty() && Data.size() > TrieRecord::MaxEmbeddedSize)
1465 return createStandaloneLeaf(*I, Data);
1466
1467 // TODO: Check whether it's worth checking the index for an already existing
1468 // object (like storeTreeImpl() does) before building up the
1469 // InternalRefVector.
1470 InternalRefVector InternalRefs;
1471 for (ObjectID Ref : Refs)
1472 InternalRefs.push_back(getInternalRef(Ref));
1473
1474 // Create the object.
1475
1476 DataRecordHandle::Input Input{InternalRefs, Data};
1477
1478 // Compute the storage kind, allocate it, and create the record.
1479 TrieRecord::StorageKind SK = TrieRecord::StorageKind::Unknown;
1480 FileOffset PoolOffset;
1481 SmallString<256> Path;
1482 std::optional<MappedTempFile> File;
1483 std::optional<uint64_t> FileSize;
1484 auto AllocStandaloneFile = [&](size_t Size) -> Expected<char *> {
1485 getStandalonePath(TrieRecord::getStandaloneFilePrefix(
1486 TrieRecord::StorageKind::Standalone),
1487 *I, Path);
1488 if (Error E = createTempFile(Path, Size, Logger.get()).moveInto(File))
1489 return std::move(E);
1490 assert(File->size() == Size);
1491 FileSize = Size;
1492 SK = TrieRecord::StorageKind::Standalone;
1493 return File->data();
1494 };
1495 auto Alloc = [&](size_t Size) -> Expected<char *> {
1496 if (Size <= TrieRecord::MaxEmbeddedSize) {
1497 SK = TrieRecord::StorageKind::DataPool;
1498 auto P = DataPool.allocate(Size);
1499 if (LLVM_UNLIKELY(!P)) {
1500 char *NewAlloc = nullptr;
1501 auto NewE = handleErrors(
1502 P.takeError(), [&](std::unique_ptr<StringError> E) -> Error {
1503 if (E->convertToErrorCode() == std::errc::not_enough_memory)
1504 return AllocStandaloneFile(Size).moveInto(NewAlloc);
1505 return Error(std::move(E));
1506 });
1507 if (!NewE)
1508 return NewAlloc;
1509 return std::move(NewE);
1510 }
1511 PoolOffset = P->getOffset();
1512 LLVM_DEBUG({
1513 dbgs() << "pool-alloc addr=" << (void *)PoolOffset.get()
1514 << " size=" << Size
1515 << " end=" << (void *)(PoolOffset.get() + Size) << "\n";
1516 });
1517 return (*P)->data();
1518 }
1519 return AllocStandaloneFile(Size);
1520 };
1521
1522 DataRecordHandle Record;
1523 if (Error E =
1524 DataRecordHandle::createWithError(Alloc, Input).moveInto(Record))
1525 return E;
1526 assert(Record.getData().end()[0] == 0 && "Expected null-termination");
1527 assert(Record.getData() == Input.Data && "Expected initialization");
1528 assert(SK != TrieRecord::StorageKind::Unknown);
1529 assert(bool(File) != bool(PoolOffset) &&
1530 "Expected either a mapped file or a pooled offset");
1531
1532 // Check for a race before calling MappedTempFile::keep().
1533 //
1534 // Then decide what to do with the file. Better to discard than overwrite if
1535 // another thread/process has already added this.
1536 TrieRecord::Data Existing = I->Ref.load();
1537 {
1538 TrieRecord::Data NewObject{SK, PoolOffset};
1539 if (File) {
1540 if (Existing.SK == TrieRecord::StorageKind::Unknown) {
1541 // Keep the file!
1542 if (Error E = File->keep(Path))
1543 return E;
1544 } else {
1545 File.reset();
1546 }
1547 }
1548
1549 // If we didn't already see a racing/existing write, then try storing the
1550 // new object. If that races, confirm that the new value has valid storage.
1551 //
1552 // TODO: Find a way to reuse the storage from the new-but-abandoned record
1553 // handle.
1554 if (Existing.SK == TrieRecord::StorageKind::Unknown) {
1555 if (I->Ref.compare_exchange_strong(Existing, NewObject)) {
1556 if (FileSize)
1557 recordStandaloneSizeIncrease(*FileSize);
1558 return Error::success();
1559 }
1560 }
1561 }
1562
1563 if (Existing.SK == TrieRecord::StorageKind::Unknown)
1565
1566 // Load existing object.
1567 return Error::success();
1568}
1569
1570void OnDiskGraphDB::recordStandaloneSizeIncrease(size_t SizeIncrease) {
1571 standaloneStorageSize().fetch_add(SizeIncrease, std::memory_order_relaxed);
1572}
1573
1574std::atomic<uint64_t> &OnDiskGraphDB::standaloneStorageSize() const {
1575 MutableArrayRef<uint8_t> UserHeader = DataPool.getUserHeader();
1576 assert(UserHeader.size() == sizeof(std::atomic<uint64_t>));
1577 assert(isAddrAligned(Align(8), UserHeader.data()));
1578 return *reinterpret_cast<std::atomic<uint64_t> *>(UserHeader.data());
1579}
1580
1581uint64_t OnDiskGraphDB::getStandaloneStorageSize() const {
1582 return standaloneStorageSize().load(std::memory_order_relaxed);
1583}
1584
1586 return Index.size() + DataPool.size() + getStandaloneStorageSize();
1587}
1588
1590 unsigned IndexPercent = Index.size() * 100ULL / Index.capacity();
1591 unsigned DataPercent = DataPool.size() * 100ULL / DataPool.capacity();
1592 return std::max(IndexPercent, DataPercent);
1593}
1594
1597 unsigned HashByteSize, OnDiskGraphDB *UpstreamDB,
1598 std::shared_ptr<OnDiskCASLogger> Logger,
1599 FaultInPolicy Policy) {
1600 if (std::error_code EC = sys::fs::create_directories(AbsPath))
1601 return createFileError(AbsPath, EC);
1602
1603 constexpr uint64_t MB = 1024ull * 1024ull;
1604 constexpr uint64_t GB = 1024ull * 1024ull * 1024ull;
1605
1606 uint64_t MaxIndexSize = 12 * GB;
1607 uint64_t MaxDataPoolSize = 24 * GB;
1608
1609 if (useSmallMappingSize(AbsPath)) {
1610 MaxIndexSize = 1 * GB;
1611 MaxDataPoolSize = 2 * GB;
1612 }
1613
1614 auto CustomSize = getOverriddenMaxMappingSize();
1615 if (!CustomSize)
1616 return CustomSize.takeError();
1617 if (*CustomSize)
1618 MaxIndexSize = MaxDataPoolSize = **CustomSize;
1619
1620 SmallString<256> IndexPath(AbsPath);
1622 std::optional<OnDiskTrieRawHashMap> Index;
1624 IndexPath, IndexTableName + "[" + HashName + "]",
1625 HashByteSize * CHAR_BIT,
1626 /*DataSize=*/sizeof(TrieRecord), MaxIndexSize,
1627 /*MinFileSize=*/MB, Logger)
1628 .moveInto(Index))
1629 return std::move(E);
1630
1631 uint32_t UserHeaderSize = sizeof(std::atomic<uint64_t>);
1632
1633 SmallString<256> DataPoolPath(AbsPath);
1635 std::optional<OnDiskDataAllocator> DataPool;
1636 StringRef PolicyName =
1637 Policy == FaultInPolicy::SingleNode ? "single" : "full";
1639 DataPoolPath,
1640 DataPoolTableName + "[" + HashName + "]" + PolicyName,
1641 MaxDataPoolSize, /*MinFileSize=*/MB, UserHeaderSize, Logger,
1642 [](void *UserHeaderPtr) {
1643 new (UserHeaderPtr) std::atomic<uint64_t>(0);
1644 })
1645 .moveInto(DataPool))
1646 return std::move(E);
1647 if (DataPool->getUserHeader().size() != UserHeaderSize)
1649 "unexpected user header in '" + DataPoolPath +
1650 "'");
1651
1652 return std::unique_ptr<OnDiskGraphDB>(
1653 new OnDiskGraphDB(AbsPath, std::move(*Index), std::move(*DataPool),
1654 UpstreamDB, Policy, std::move(Logger)));
1655}
1656
1657OnDiskGraphDB::OnDiskGraphDB(StringRef RootPath, OnDiskTrieRawHashMap Index,
1658 OnDiskDataAllocator DataPool,
1659 OnDiskGraphDB *UpstreamDB, FaultInPolicy Policy,
1660 std::shared_ptr<OnDiskCASLogger> Logger)
1661 : Index(std::move(Index)), DataPool(std::move(DataPool)),
1662 RootPath(RootPath.str()), UpstreamDB(UpstreamDB), FIPolicy(Policy),
1663 Logger(std::move(Logger)) {
1664 /// Lifetime for "big" objects not in DataPool.
1665 ///
1666 /// NOTE: Could use ThreadSafeTrieRawHashMap here. For now, doing something
1667 /// simpler on the assumption there won't be much contention since most data
1668 /// is not big. If there is contention, and we've already fixed ObjectProxy
1669 /// object handles to be cheap enough to use consistently, the fix might be
1670 /// to use better use of them rather than optimizing this map.
1671 ///
1672 /// FIXME: Figure out the right number of shards, if any.
1673 StandaloneData = new StandaloneDataMapTy();
1674}
1675
1677 delete static_cast<StandaloneDataMapTy *>(StandaloneData);
1678}
1679
1680Error OnDiskGraphDB::importFullTree(ObjectID PrimaryID,
1681 ObjectHandle UpstreamNode) {
1682 // Copies the full CAS tree from upstream. Uses depth-first copying to protect
1683 // against the process dying during importing and leaving the database with an
1684 // incomplete tree. Note that if the upstream has missing nodes then the tree
1685 // will be copied with missing nodes as well, it won't be considered an error.
1686 struct UpstreamCursor {
1688 size_t RefsCount;
1691 };
1692 /// Keeps track of the state of visitation for current node and all of its
1693 /// parents.
1695 /// Keeps track of the currently visited nodes as they are imported into
1696 /// primary database, from current node and its parents. When a node is
1697 /// entered for visitation it appends its own ID, then appends referenced IDs
1698 /// as they get imported. When a node is fully imported it removes the
1699 /// referenced IDs from the bottom of the stack which leaves its own ID at the
1700 /// bottom, adding to the list of referenced IDs for the parent node.
1701 SmallVector<ObjectID, 128> PrimaryNodesStack;
1702
1703 auto enqueueNode = [&](ObjectID PrimaryID, std::optional<ObjectHandle> Node) {
1704 PrimaryNodesStack.push_back(PrimaryID);
1705 if (!Node)
1706 return;
1707 auto Refs = UpstreamDB->getObjectRefs(*Node);
1708 CursorStack.push_back(
1709 {*Node, (size_t)llvm::size(Refs), Refs.begin(), Refs.end()});
1710 };
1711
1712 enqueueNode(PrimaryID, UpstreamNode);
1713
1714 while (!CursorStack.empty()) {
1715 UpstreamCursor &Cur = CursorStack.back();
1716 if (Cur.RefI == Cur.RefE) {
1717 // Copy the node data into the primary store.
1718 // FIXME: Use hard-link or cloning if the file-system supports it and data
1719 // is stored into a separate file.
1720
1721 // The bottom of \p PrimaryNodesStack contains the primary ID for the
1722 // current node plus the list of imported referenced IDs.
1723 assert(PrimaryNodesStack.size() >= Cur.RefsCount + 1);
1724 ObjectID PrimaryID = *(PrimaryNodesStack.end() - Cur.RefsCount - 1);
1725 auto PrimaryRefs = ArrayRef(PrimaryNodesStack)
1726 .slice(PrimaryNodesStack.size() - Cur.RefsCount);
1727 auto Data = UpstreamDB->getObjectData(Cur.Node);
1728 if (Error E = store(PrimaryID, PrimaryRefs, Data))
1729 return E;
1730 // Remove the current node and its IDs from the stack.
1731 PrimaryNodesStack.truncate(PrimaryNodesStack.size() - Cur.RefsCount);
1732 CursorStack.pop_back();
1733 continue;
1734 }
1735
1736 ObjectID UpstreamID = *(Cur.RefI++);
1737 auto PrimaryID = getReference(UpstreamDB->getDigest(UpstreamID));
1738 if (LLVM_UNLIKELY(!PrimaryID))
1739 return PrimaryID.takeError();
1740 if (containsObject(*PrimaryID, /*CheckUpstream=*/false)) {
1741 // This \p ObjectID already exists in the primary. Either it was imported
1742 // via \p importFullTree or the client created it, in which case the
1743 // client takes responsibility for how it was formed.
1744 enqueueNode(*PrimaryID, std::nullopt);
1745 continue;
1746 }
1747 Expected<std::optional<ObjectHandle>> UpstreamNode =
1748 UpstreamDB->load(UpstreamID);
1749 if (!UpstreamNode)
1750 return UpstreamNode.takeError();
1751 enqueueNode(*PrimaryID, *UpstreamNode);
1752 }
1753
1754 assert(PrimaryNodesStack.size() == 1);
1755 assert(PrimaryNodesStack.front() == PrimaryID);
1756 return Error::success();
1757}
1758
1759Error OnDiskGraphDB::importSingleNode(ObjectID PrimaryID,
1760 ObjectHandle UpstreamNode) {
1761 // Copies only a single node, it doesn't copy the referenced nodes.
1762
1763 // Copy the node data into the primary store.
1764 // FIXME: Use hard-link or cloning if the file-system supports it and data is
1765 // stored into a separate file.
1766 auto Data = UpstreamDB->getObjectData(UpstreamNode);
1767 auto UpstreamRefs = UpstreamDB->getObjectRefs(UpstreamNode);
1769 Refs.reserve(llvm::size(UpstreamRefs));
1770 for (ObjectID UpstreamRef : UpstreamRefs) {
1771 auto Ref = getReference(UpstreamDB->getDigest(UpstreamRef));
1772 if (LLVM_UNLIKELY(!Ref))
1773 return Ref.takeError();
1774 Refs.push_back(*Ref);
1775 }
1776
1777 return store(PrimaryID, Refs, Data);
1778}
1779
1780Expected<std::optional<ObjectHandle>>
1781OnDiskGraphDB::faultInFromUpstream(ObjectID PrimaryID) {
1782 if (!UpstreamDB)
1783 return std::nullopt;
1784
1785 auto UpstreamID = UpstreamDB->getReference(getDigest(PrimaryID));
1786 if (LLVM_UNLIKELY(!UpstreamID))
1787 return UpstreamID.takeError();
1788
1789 Expected<std::optional<ObjectHandle>> UpstreamNode =
1790 UpstreamDB->load(*UpstreamID);
1791 if (!UpstreamNode)
1792 return UpstreamNode.takeError();
1793 if (!*UpstreamNode)
1794 return std::nullopt;
1795
1796 if (Error E = FIPolicy == FaultInPolicy::SingleNode
1797 ? importSingleNode(PrimaryID, **UpstreamNode)
1798 : importFullTree(PrimaryID, **UpstreamNode))
1799 return std::move(E);
1800 return load(PrimaryID);
1801}
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
AMDGPU Mark last scratch load
AMDGPU Prepare AGPR Alloc
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
#define LLVM_UNLIKELY(EXPR)
Definition Compiler.h:336
This file defines the DenseMap class.
static cl::opt< int > PageSize("imp-null-check-page-size", cl::desc("The page size of the target in bytes"), cl::init(4096), cl::Hidden)
static bool lookup(const GsymReader &GR, DataExtractor &Data, uint64_t &Offset, uint64_t BaseAddr, uint64_t Addr, SourceLocations &SrcLocs, llvm::Error &Err)
A Lookup helper functions.
#define I(x, y, z)
Definition MD5.cpp:57
#define H(x, y, z)
Definition MD5.cpp:56
This file declares interface for OnDiskCASLogger, an interface that can be used to log CAS events to ...
This file declares interface for OnDiskDataAllocator, a file backed data pool can be used to allocate...
static constexpr StringLiteral FilePrefixLeaf0
static constexpr StringLiteral DataPoolTableName
static constexpr StringLiteral FilePrefixObject
static constexpr StringLiteral FilePrefixLeaf
static constexpr StringLiteral IndexFilePrefix
static OnDiskContent getContentFromHandle(const OnDiskDataAllocator &DataPool, ObjectHandle OH)
static constexpr StringLiteral DataPoolFilePrefix
static Error createCorruptObjectError(Expected< ArrayRef< uint8_t > > ID)
static size_t getPageSize()
static Expected< MappedTempFile > createTempFile(StringRef FinalPath, uint64_t Size, OnDiskCASLogger *Logger)
static constexpr StringLiteral IndexTableName
This declares OnDiskGraphDB, an ondisk CAS database with a fixed length hash.
This file declares interface for OnDiskTrieRawHashMap, a thread-safe and (mostly) lock-free hash map ...
#define P(N)
Provides a library for accessing information about this process and other processes on the operating ...
This file defines the make_scope_exit function, which executes user-defined cleanup logic at scope ex...
static Split data
This file contains some functions that are useful when dealing with strings.
#define LLVM_DEBUG(...)
Definition Debug.h:114
The Input class is used to parse a yaml document into in-memory structs and vectors.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition ArrayRef.h:40
size_t size() const
size - Get the array size.
Definition ArrayRef.h:142
ArrayRef< T > drop_back(size_t N=1) const
Drop the last N elements of the array.
Definition ArrayRef.h:201
bool empty() const
empty - Check if the array is empty.
Definition ArrayRef.h:137
const T * data() const
Definition ArrayRef.h:139
Lightweight error class with error context and mandatory checking.
Definition Error.h:159
static ErrorSuccess success()
Create a success value.
Definition Error.h:336
Tagged union holding either a T or a Error.
Definition Error.h:485
Error takeError()
Take ownership of the stored error.
Definition Error.h:612
Logging utility - given an ordered specification of features, and assuming a scalar reward,...
static ErrorOr< std::unique_ptr< MemoryBuffer > > getFile(const Twine &Filename, bool IsText=false, bool RequiresNullTerminator=true, bool IsVolatile=false, std::optional< Align > Alignment=std::nullopt)
Open the specified file as a MemoryBuffer, returning a new MemoryBuffer if successful,...
MutableArrayRef - Represent a mutable reference to an array (0 or more elements consecutively in memo...
Definition ArrayRef.h:298
SmallString - A SmallString is just a SmallVector with methods and accessors that make it work better...
Definition SmallString.h:26
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
void reserve(size_type N)
void truncate(size_type N)
Like resize, but requires that N is less than size().
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
A wrapper around a string literal that serves as a proxy for constructing global tables of StringRefs...
Definition StringRef.h:864
StringRef - Represent a constant reference to a string, i.e.
Definition StringRef.h:55
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition Twine.h:82
FileOffset is a wrapper around uint64_t to represent the offset of data from the beginning of the fil...
Definition FileOffset.h:24
uint64_t get() const
Definition FileOffset.h:26
Handle to a loaded object in a ObjectStore instance.
LLVM_ABI_FOR_TEST Expected< ArrayRef< char > > get(FileOffset Offset, size_t Size) const
Get the data of Size stored at the given Offset.
static LLVM_ABI_FOR_TEST Expected< OnDiskDataAllocator > create(const Twine &Path, const Twine &TableName, uint64_t MaxFileSize, std::optional< uint64_t > NewFileInitialSize, uint32_t UserHeaderSize=0, std::shared_ptr< ondisk::OnDiskCASLogger > Logger=nullptr, function_ref< void(void *)> UserHeaderInit=nullptr)
LLVM_ABI_FOR_TEST size_t size() const
OnDiskTrieRawHashMap is a persistent trie data structure used as hash maps.
static LLVM_ABI_FOR_TEST Expected< OnDiskTrieRawHashMap > create(const Twine &Path, const Twine &TrieName, size_t NumHashBits, uint64_t DataSize, uint64_t MaxFileSize, std::optional< uint64_t > NewFileInitialSize, std::shared_ptr< ondisk::OnDiskCASLogger > Logger=nullptr, std::optional< size_t > NewTableNumRootBits=std::nullopt, std::optional< size_t > NewTableNumSubtrieBits=std::nullopt)
Gets or creates a file at Path with a hash-mapped trie named TrieName.
static std::optional< InternalRef4B > tryToShrink(InternalRef Ref)
Shrink to 4B reference.
Array of internal node references.
Standard 8 byte reference inside OnDiskGraphDB.
static InternalRef getFromOffset(FileOffset Offset)
Handle for a loaded node object.
static ObjectHandle fromFileOffset(FileOffset Offset)
static ObjectHandle fromMemory(uintptr_t Ptr)
Reference to a node.
uint64_t getOpaqueData() const
Interface for logging low-level on-disk cas operations.
On-disk CAS nodes database, independent of a particular hashing algorithm.
FaultInPolicy
How to fault-in nodes if an upstream database is used.
@ SingleNode
Copy only the requested node.
void print(raw_ostream &OS) const
LLVM_ABI_FOR_TEST Error validateObjectID(ObjectID ID) const
Checks that ID exists in the index.
LLVM_ABI_FOR_TEST Expected< std::optional< ObjectHandle > > load(ObjectID Ref)
Expected< bool > isMaterialized(ObjectID Ref)
Check whether the object associated with Ref is stored in the CAS.
Error validate(bool Deep, HashingFuncT Hasher) const
Validate the OnDiskGraphDB.
object_refs_range getObjectRefs(ObjectHandle Node) const
unsigned getHardStorageLimitUtilization() const
LLVM_ABI_FOR_TEST Error store(ObjectID ID, ArrayRef< ObjectID > Refs, ArrayRef< char > Data)
Associate data & references with a particular object ID.
ArrayRef< uint8_t > getDigest(ObjectID Ref) const
LLVM_ABI_FOR_TEST size_t getStorageSize() const
static LLVM_ABI_FOR_TEST Expected< std::unique_ptr< OnDiskGraphDB > > open(StringRef Path, StringRef HashName, unsigned HashByteSize, OnDiskGraphDB *UpstreamDB=nullptr, std::shared_ptr< OnDiskCASLogger > Logger=nullptr, FaultInPolicy Policy=FaultInPolicy::FullTree)
Open the on-disk store from a directory.
bool containsObject(ObjectID Ref, bool CheckUpstream=true) const
Check whether the object associated with Ref is stored in the CAS.
LLVM_ABI_FOR_TEST Expected< ObjectID > getReference(ArrayRef< uint8_t > Hash)
Form a reference for the provided hash.
function_ref< void( ArrayRef< ArrayRef< uint8_t > >, ArrayRef< char >, SmallVectorImpl< uint8_t > &)> HashingFuncT
Hashing function type for validation.
LLVM_ABI_FOR_TEST ArrayRef< char > getObjectData(ObjectHandle Node) const
LLVM_ABI_FOR_TEST std::optional< ObjectID > getExistingReference(ArrayRef< uint8_t > Digest, bool CheckUpstream=true)
Get an existing reference to the object Digest.
An efficient, type-erasing, non-owning reference to a callable.
This class implements an extremely fast bulk output stream that can only output to a stream.
Definition raw_ostream.h:53
static unsigned getPageSizeEstimate()
Get the process's estimated page size.
Definition Process.h:62
LLVM_ABI Error keep(const Twine &Name)
static LLVM_ABI Expected< TempFile > create(const Twine &Model, unsigned Mode=all_read|all_write, OpenFlags ExtraFlags=OF_None)
This creates a temporary file with createUniqueFile and schedules it for deletion with sys::RemoveFil...
Represents the result of a call to sys::fs::status().
Definition FileSystem.h:222
This class represents a memory mapped file.
LLVM_ABI size_t size() const
Definition Path.cpp:1182
@ readonly
May only access map via const_data as read only.
@ readwrite
May access map via data and modify it. Written to path.
LLVM_ABI char * data() const
Definition Path.cpp:1187
#define UINT64_MAX
Definition DataTypes.h:77
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
constexpr char Align[]
Key for Kernel::Arg::Metadata::mAlign.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition CallingConv.h:24
constexpr StringLiteral CASFormatVersion
The version for all the ondisk database files.
Expected< std::optional< uint64_t > > getOverriddenMaxMappingSize()
Retrieves an overridden maximum mapping size for CAS files, if any, speicified by LLVM_CAS_MAX_MAPPIN...
Expected< size_t > preallocateFileTail(int FD, size_t CurrentSize, size_t NewSize)
Allocate space for the file FD on disk, if the filesystem supports it.
bool useSmallMappingSize(const Twine &Path)
Whether to use a small file mapping for ondisk databases created in Path.
uint64_t getDataSize(const FuncRecordTy *Record)
Return the coverage map data size for the function.
uint64_t read64le(const void *P)
Definition Endian.h:435
void write64le(void *P, uint64_t V)
Definition Endian.h:478
void write32le(void *P, uint32_t V)
Definition Endian.h:475
uint32_t read32le(const void *P)
Definition Endian.h:432
LLVM_ABI std::error_code closeFile(file_t &F)
Close the file object.
LLVM_ABI std::error_code rename(const Twine &from, const Twine &to)
Rename from to to.
std::error_code resize_file_before_mapping_readwrite(int FD, uint64_t Size)
Resize FD to Size before mapping mapped_file_region::readwrite.
Definition FileSystem.h:410
LLVM_ABI bool exists(const basic_file_status &status)
Does file exist?
Definition Path.cpp:1090
LLVM_ABI std::error_code createUniqueFile(const Twine &Model, int &ResultFD, SmallVectorImpl< char > &ResultPath, OpenFlags Flags=OF_None, unsigned Mode=all_read|all_write)
Create a uniquely named file.
Definition Path.cpp:874
LLVM_ABI std::error_code remove(const Twine &path, bool IgnoreNonExisting=true)
Remove path.
LLVM_ABI Expected< file_t > openNativeFileForRead(const Twine &Name, OpenFlags Flags=OF_None, SmallVectorImpl< char > *RealPath=nullptr)
Opens the file with the given name in a read-only mode, returning its open file descriptor.
LLVM_ABI std::error_code create_directories(const Twine &path, bool IgnoreExisting=true, perms Perms=owner_all|group_all)
Create all the non-existent directories in path.
Definition Path.cpp:976
LLVM_ABI file_t convertFDToNativeFile(int FD)
Converts from a Posix file descriptor number to a native file handle.
Definition FileSystem.h:991
LLVM_ABI std::error_code status(const Twine &path, file_status &result, bool follow=true)
Get file status as if by POSIX stat().
LLVM_ABI void append(SmallVectorImpl< char > &path, const Twine &a, const Twine &b="", const Twine &c="", const Twine &d="")
Append to path.
Definition Path.cpp:457
ScopedSetting scopedDisable()
Definition IOSandbox.h:36
This is an optimization pass for GlobalISel generic memory operations.
Definition Types.h:26
@ Offset
Definition DWP.cpp:532
Error createFileError(const Twine &F, Error E)
Concatenate a source file path and/or name with an Error.
Definition Error.h:1399
auto size(R &&Range, std::enable_if_t< std::is_base_of< std::random_access_iterator_tag, typename std::iterator_traits< decltype(Range.begin())>::iterator_category >::value, void > *=nullptr)
Get the size of a range.
Definition STLExtras.h:1667
ArrayRef< CharT > arrayRefFromStringRef(StringRef Input)
Construct a string ref from an array ref of unsigned chars.
std::error_code make_error_code(BitcodeError E)
@ Done
Definition Threading.h:60
bool isAligned(Align Lhs, uint64_t SizeInBytes)
Checks that SizeInBytes is a multiple of the alignment.
Definition Alignment.h:134
Error handleErrors(Error E, HandlerTs &&... Hs)
Pass the ErrorInfo(s) contained in E to their respective handlers.
Definition Error.h:967
FunctionAddr VTableAddr uintptr_t uintptr_t DataSize
Definition InstrProf.h:267
std::string utohexstr(uint64_t X, bool LowerCase=false, unsigned Width=0)
constexpr bool isPowerOf2_64(uint64_t Value)
Return true if the argument is a power of two > 0 (64 bit edition.)
Definition MathExtras.h:284
Error createStringError(std::error_code EC, char const *Fmt, const Ts &... Vals)
Create formatted StringError object.
Definition Error.h:1305
@ argument_out_of_domain
Definition Errc.h:37
@ illegal_byte_sequence
Definition Errc.h:52
@ invalid_argument
Definition Errc.h:56
std::optional< T > expectedToOptional(Expected< T > &&E)
Convert an Expected to an Optional without doing anything.
Definition Error.h:1094
decltype(auto) get(const PointerIntPair< PointerTy, IntBits, IntType, PtrTraits, Info > &Pair)
void sort(IteratorTy Start, IteratorTy End)
Definition STLExtras.h:1634
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition Debug.cpp:207
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
@ Ref
The access may reference the value stored in memory.
Definition ModRef.h:32
@ Other
Any other memory.
Definition ModRef.h:68
void cantFail(Error Err, const char *Msg=nullptr)
Report a fatal error if Err is a failure value.
Definition Error.h:769
FunctionAddr VTableAddr uintptr_t uintptr_t Data
Definition InstrProf.h:189
FunctionAddr VTableAddr Next
Definition InstrProf.h:141
auto count(R &&Range, const E &Element)
Wrapper function around std::count to count the number of times an element Element occurs in the give...
Definition STLExtras.h:2002
ArrayRef(const T &OneElt) -> ArrayRef< T >
std::string toString(const APInt &I, unsigned Radix, bool Signed, bool formatAsCLiteral=false, bool UpperCase=true, bool InsertSeparators=false)
OutputIt copy(R &&Range, OutputIt Out)
Definition STLExtras.h:1883
void toHex(ArrayRef< uint8_t > Input, bool LowerCase, SmallVectorImpl< char > &Output)
Convert buffer Input to its hexadecimal representation. The returned string is double the size of Inp...
OutputIt move(R &&Range, OutputIt Out)
Provide wrappers to std::move which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1915
LLVM_ABI Error errorCodeToError(std::error_code EC)
Helper for converting an std::error_code to a Error.
Definition Error.cpp:107
void consumeError(Error Err)
Consume a Error without doing anything.
Definition Error.h:1083
bool isAddrAligned(Align Lhs, const void *Addr)
Checks that Addr is a multiple of the alignment.
Definition Alignment.h:139
Implement std::hash so that hash_code can be used in STL containers.
Definition BitVector.h:870
Proxy for an on-disk index record.
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition Alignment.h:39
static constexpr Align Of()
Allow constructions of constexpr Align from types.
Definition Alignment.h:94
Const value proxy to access the records stored in TrieRawHashMap.
Value proxy to access the records stored in TrieRawHashMap.