LLVM 20.0.0git
TrieRawHashMap.h
Go to the documentation of this file.
1//===- TrieRawHashMap.h -----------------------------------------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
9#ifndef LLVM_ADT_TRIERAWHASHMAP_H
10#define LLVM_ADT_TRIERAWHASHMAP_H
11
12#include "llvm/ADT/ArrayRef.h"
13#include <atomic>
14#include <optional>
15
16namespace llvm {
17
18class raw_ostream;
19
20/// TrieRawHashMap - is a lock-free thread-safe trie that is can be used to
21/// store/index data based on a hash value. It can be customized to work with
22/// any hash algorithm or store any data.
23///
24/// Data structure:
25/// Data node stored in the Trie contains both hash and data:
26/// struct {
27/// HashT Hash;
28/// DataT Data;
29/// };
30///
31/// Data is stored/indexed via a prefix tree, where each node in the tree can be
32/// either the root, a sub-trie or a data node. Assuming a 4-bit hash and two
33/// data objects {0001, A} and {0100, B}, it can be stored in a trie
34/// (assuming Root has 2 bits, SubTrie has 1 bit):
35/// +--------+
36/// |Root[00]| -> {0001, A}
37/// | [01]| -> {0100, B}
38/// | [10]| (empty)
39/// | [11]| (empty)
40/// +--------+
41///
42/// Inserting a new object {0010, C} will result in:
43/// +--------+ +----------+
44/// |Root[00]| -> |SubTrie[0]| -> {0001, A}
45/// | | | [1]| -> {0010, C}
46/// | | +----------+
47/// | [01]| -> {0100, B}
48/// | [10]| (empty)
49/// | [11]| (empty)
50/// +--------+
51/// Note object A is sunk down to a sub-trie during the insertion. All the
52/// nodes are inserted through compare-exchange to ensure thread-safe and
53/// lock-free.
54///
55/// To find an object in the trie, walk the tree with prefix of the hash until
56/// the data node is found. Then the hash is compared with the hash stored in
57/// the data node to see if the is the same object.
58///
59/// Hash collision is not allowed so it is recommended to use trie with a
60/// "strong" hashing algorithm. A well-distributed hash can also result in
61/// better performance and memory usage.
62///
63/// It currently does not support iteration and deletion.
64
65/// Base class for a lock-free thread-safe hash-mapped trie.
67public:
68 static constexpr size_t TrieContentBaseSize = 4;
69 static constexpr size_t DefaultNumRootBits = 6;
70 static constexpr size_t DefaultNumSubtrieBits = 4;
71
72private:
73 template <class T> struct AllocValueType {
75 std::aligned_union_t<sizeof(T), T> Content;
76 };
77
78protected:
79 template <class T>
80 static constexpr size_t DefaultContentAllocSize = sizeof(AllocValueType<T>);
81
82 template <class T>
83 static constexpr size_t DefaultContentAllocAlign = alignof(AllocValueType<T>);
84
85 template <class T>
86 static constexpr size_t DefaultContentOffset =
87 offsetof(AllocValueType<T>, Content);
88
89public:
90 static void *operator new(size_t Size) { return ::operator new(Size); }
91 void operator delete(void *Ptr) { ::operator delete(Ptr); }
92
93 LLVM_DUMP_METHOD void dump() const;
94 void print(raw_ostream &OS) const;
95
96protected:
97 /// Result of a lookup. Suitable for an insertion hint. Maybe could be
98 /// expanded into an iterator of sorts, but likely not useful (visiting
99 /// everything in the trie should probably be done some way other than
100 /// through an iterator pattern).
102 protected:
103 void *get() const { return I == -2u ? P : nullptr; }
104
105 public:
106 PointerBase() noexcept = default;
107
108 private:
110 explicit PointerBase(void *Content) : P(Content), I(-2u) {}
111 PointerBase(void *P, unsigned I, unsigned B) : P(P), I(I), B(B) {}
112
113 bool isHint() const { return I != -1u && I != -2u; }
114
115 void *P = nullptr;
116 unsigned I = -1u;
117 unsigned B = 0;
118 };
119
120 /// Find the stored content with hash.
121 PointerBase find(ArrayRef<uint8_t> Hash) const;
122
123 /// Insert and return the stored content.
124 PointerBase
125 insert(PointerBase Hint, ArrayRef<uint8_t> Hash,
126 function_ref<const uint8_t *(void *Mem, ArrayRef<uint8_t> Hash)>
127 Constructor);
128
130
132 size_t ContentAllocSize, size_t ContentAllocAlign, size_t ContentOffset,
133 std::optional<size_t> NumRootBits = std::nullopt,
134 std::optional<size_t> NumSubtrieBits = std::nullopt);
135
136 /// Destructor, which asserts if there's anything to do. Subclasses should
137 /// call \a destroyImpl().
138 ///
139 /// \pre \a destroyImpl() was already called.
141 void destroyImpl(function_ref<void(void *ValueMem)> Destructor);
142
144
145 // Move assignment is not supported as it is not thread-safe.
148
149 // No copy.
153
154 // Debug functions. Implementation details and not guaranteed to be
155 // thread-safe.
156 PointerBase getRoot() const;
157 unsigned getStartBit(PointerBase P) const;
158 unsigned getNumBits(PointerBase P) const;
159 unsigned getNumSlotUsed(PointerBase P) const;
160 std::string getTriePrefixAsString(PointerBase P) const;
161 unsigned getNumTries() const;
162 // Visit next trie in the allocation chain.
164
165private:
167 const unsigned short ContentAllocSize;
168 const unsigned short ContentAllocAlign;
169 const unsigned short ContentOffset;
170 unsigned short NumRootBits;
171 unsigned short NumSubtrieBits;
172 class ImplType;
173 // ImplPtr is owned by ThreadSafeTrieRawHashMapBase and needs to be freed in
174 // destroyImpl.
175 std::atomic<ImplType *> ImplPtr;
176 ImplType &getOrCreateImpl();
177 ImplType *getImpl() const;
178};
179
180/// Lock-free thread-safe hash-mapped trie.
181template <class T, size_t NumHashBytes>
183public:
184 using HashT = std::array<uint8_t, NumHashBytes>;
185
187 struct value_type {
188 const HashT Hash;
190
191 value_type(value_type &&) = default;
192 value_type(const value_type &) = default;
193
195 : Hash(makeHash(Hash)), Data(Data) {}
197 : Hash(makeHash(Hash)), Data(std::move(Data)) {}
198
199 private:
201
202 struct EmplaceTag {};
203 template <class... ArgsT>
204 value_type(ArrayRef<uint8_t> Hash, EmplaceTag, ArgsT &&...Args)
205 : Hash(makeHash(Hash)), Data(std::forward<ArgsT>(Args)...) {}
206
207 static HashT makeHash(ArrayRef<uint8_t> HashRef) {
208 HashT Hash;
209 std::copy(HashRef.begin(), HashRef.end(), Hash.data());
210 return Hash;
211 }
212 };
213
214 using ThreadSafeTrieRawHashMapBase::operator delete;
216
219
220private:
221 template <class ValueT> class PointerImpl : PointerBase {
222 friend class ThreadSafeTrieRawHashMap;
223
224 ValueT *get() const {
225 return reinterpret_cast<ValueT *>(PointerBase::get());
226 }
227
228 public:
229 ValueT &operator*() const {
230 assert(get());
231 return *get();
232 }
233 ValueT *operator->() const {
234 assert(get());
235 return get();
236 }
237 explicit operator bool() const { return get(); }
238
239 PointerImpl() = default;
240
241 protected:
242 PointerImpl(PointerBase Result) : PointerBase(Result) {}
243 };
244
245public:
246 class pointer;
247 class const_pointer;
248 class pointer : public PointerImpl<value_type> {
250 friend class const_pointer;
251
252 public:
253 pointer() = default;
254
255 private:
256 pointer(PointerBase Result) : pointer::PointerImpl(Result) {}
257 };
258
259 class const_pointer : public PointerImpl<const value_type> {
261
262 public:
263 const_pointer() = default;
264 const_pointer(const pointer &P) : const_pointer::PointerImpl(P) {}
265
266 private:
267 const_pointer(PointerBase Result) : const_pointer::PointerImpl(Result) {}
268 };
269
271 public:
273 assert(Mem && "Constructor already called, or moved away");
274 return assign(::new (Mem) value_type(Hash, std::move(RHS)));
275 }
277 assert(Mem && "Constructor already called, or moved away");
278 return assign(::new (Mem) value_type(Hash, RHS));
279 }
280 template <class... ArgsT> value_type &emplace(ArgsT &&...Args) {
281 assert(Mem && "Constructor already called, or moved away");
282 return assign(::new (Mem)
283 value_type(Hash, typename value_type::EmplaceTag{},
284 std::forward<ArgsT>(Args)...));
285 }
286
288 : Mem(RHS.Mem), Result(RHS.Result), Hash(RHS.Hash) {
289 RHS.Mem = nullptr; // Moved away, cannot call.
290 }
291 ~LazyValueConstructor() { assert(!Mem && "Constructor never called!"); }
292
293 private:
294 value_type &assign(value_type *V) {
295 Mem = nullptr;
296 Result = V;
297 return *V;
298 }
300 LazyValueConstructor() = delete;
301 LazyValueConstructor(void *Mem, value_type *&Result, ArrayRef<uint8_t> Hash)
302 : Mem(Mem), Result(Result), Hash(Hash) {
303 assert(Hash.size() == sizeof(HashT) && "Invalid hash");
304 assert(Mem && "Invalid memory for construction");
305 }
306 void *Mem;
307 value_type *&Result;
309 };
310
311 /// Insert with a hint. Default-constructed hint will work, but it's
312 /// recommended to start with a lookup to avoid overhead in object creation
313 /// if it already exists.
315 function_ref<void(LazyValueConstructor)> OnConstruct) {
317 Hint, Hash, [&](void *Mem, ArrayRef<uint8_t> Hash) {
318 value_type *Result = nullptr;
319 OnConstruct(LazyValueConstructor(Mem, Result, Hash));
320 return Result->Hash.data();
321 }));
322 }
323
325 function_ref<void(LazyValueConstructor)> OnConstruct) {
326 return insertLazy(const_pointer(), Hash, OnConstruct);
327 }
328
330 return insertLazy(Hint, HashedData.Hash, [&](LazyValueConstructor C) {
331 C(std::move(HashedData.Data));
332 });
333 }
334
335 pointer insert(const_pointer Hint, const value_type &HashedData) {
336 return insertLazy(Hint, HashedData.Hash,
337 [&](LazyValueConstructor C) { C(HashedData.Data); });
338 }
339
341 assert(Hash.size() == std::tuple_size<HashT>::value);
343 }
344
346 assert(Hash.size() == std::tuple_size<HashT>::value);
348 }
349
350 ThreadSafeTrieRawHashMap(std::optional<size_t> NumRootBits = std::nullopt,
351 std::optional<size_t> NumSubtrieBits = std::nullopt)
355 NumRootBits, NumSubtrieBits) {}
356
358 if constexpr (std::is_trivially_destructible<value_type>::value)
359 this->destroyImpl(nullptr);
360 else
361 this->destroyImpl(
362 [](void *P) { static_cast<value_type *>(P)->~value_type(); });
363 }
364
365 // Move constructor okay.
367
368 // No move assignment or any copy.
373};
374
375} // namespace llvm
376
377#endif // LLVM_ADT_TRIERAWHASHMAP_H
#define offsetof(TYPE, MEMBER)
#define LLVM_DUMP_METHOD
Mark debug helper function definitions like dump() that should not be stripped from debug builds.
Definition: Compiler.h:622
T Content
uint64_t Size
#define I(x, y, z)
Definition: MD5.cpp:58
#define T
#define P(N)
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
raw_pwrite_stream & OS
Value * RHS
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: ArrayRef.h:41
iterator end() const
Definition: ArrayRef.h:157
size_t size() const
size - Get the array size.
Definition: ArrayRef.h:168
iterator begin() const
Definition: ArrayRef.h:156
TrieRawHashMap - is a lock-free thread-safe trie that is can be used to store/index data based on a h...
PointerBase getNextTrie(PointerBase P) const
unsigned getNumBits(PointerBase P) const
ThreadSafeTrieRawHashMapBase & operator=(const ThreadSafeTrieRawHashMapBase &)=delete
unsigned getNumSlotUsed(PointerBase P) const
~ThreadSafeTrieRawHashMapBase()
Destructor, which asserts if there's anything to do.
void destroyImpl(function_ref< void(void *ValueMem)> Destructor)
static constexpr size_t DefaultContentAllocSize
static constexpr size_t DefaultContentAllocAlign
static constexpr size_t DefaultContentOffset
static constexpr size_t DefaultNumSubtrieBits
PointerBase find(ArrayRef< uint8_t > Hash) const
Find the stored content with hash.
LLVM_DUMP_METHOD void dump() const
PointerBase insert(PointerBase Hint, ArrayRef< uint8_t > Hash, function_ref< const uint8_t *(void *Mem, ArrayRef< uint8_t > Hash)> Constructor)
Insert and return the stored content.
unsigned getStartBit(PointerBase P) const
void print(raw_ostream &OS) const
ThreadSafeTrieRawHashMapBase & operator=(ThreadSafeTrieRawHashMapBase &&RHS)=delete
std::string getTriePrefixAsString(PointerBase P) const
static constexpr size_t TrieContentBaseSize
static constexpr size_t DefaultNumRootBits
ThreadSafeTrieRawHashMapBase(const ThreadSafeTrieRawHashMapBase &)=delete
Lock-free thread-safe hash-mapped trie.
ThreadSafeTrieRawHashMap & operator=(const ThreadSafeTrieRawHashMap &)=delete
pointer insertLazy(ArrayRef< uint8_t > Hash, function_ref< void(LazyValueConstructor)> OnConstruct)
ThreadSafeTrieRawHashMap & operator=(ThreadSafeTrieRawHashMap &&)=delete
pointer find(ArrayRef< uint8_t > Hash)
std::array< uint8_t, NumHashBytes > HashT
pointer insertLazy(const_pointer Hint, ArrayRef< uint8_t > Hash, function_ref< void(LazyValueConstructor)> OnConstruct)
Insert with a hint.
const_pointer find(ArrayRef< uint8_t > Hash) const
pointer insert(const_pointer Hint, value_type &&HashedData)
pointer insert(const_pointer Hint, const value_type &HashedData)
ThreadSafeTrieRawHashMap(std::optional< size_t > NumRootBits=std::nullopt, std::optional< size_t > NumSubtrieBits=std::nullopt)
ThreadSafeTrieRawHashMap(const ThreadSafeTrieRawHashMap &)=delete
ThreadSafeTrieRawHashMap(ThreadSafeTrieRawHashMap &&)=default
An efficient, type-erasing, non-owning reference to a callable.
This class implements an extremely fast bulk output stream that can only output to a stream.
Definition: raw_ostream.h:52
@ C
The default llvm calling convention, compatible with C.
Definition: CallingConv.h:34
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
APInt operator*(APInt a, uint64_t RHS)
Definition: APInt.h:2204
decltype(auto) get(const PointerIntPair< PointerTy, IntBits, IntType, PtrTraits, Info > &Pair)
OutputIt move(R &&Range, OutputIt Out)
Provide wrappers to std::move which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1873
Implement std::hash so that hash_code can be used in STL containers.
Definition: BitVector.h:858
value_type(const value_type &)=default
value_type(ArrayRef< uint8_t > Hash, T &&Data)
value_type(ArrayRef< uint8_t > Hash, const T &Data)