LLVM 18.0.0git
ConcurrentHashtable.h
Go to the documentation of this file.
1//===- ConcurrentHashtable.h ------------------------------------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
9#ifndef LLVM_ADT_CONCURRENTHASHTABLE_H
10#define LLVM_ADT_CONCURRENTHASHTABLE_H
11
12#include "llvm/ADT/DenseMap.h"
13#include "llvm/ADT/Hashing.h"
15#include "llvm/ADT/STLExtras.h"
18#include "llvm/Support/Debug.h"
21#include "llvm/Support/xxhash.h"
22#include <atomic>
23#include <cstddef>
24#include <iomanip>
25#include <mutex>
26#include <sstream>
27#include <type_traits>
28
29namespace llvm {
30
31/// ConcurrentHashTable - is a resizeable concurrent hashtable.
32/// The number of resizings limited up to x2^31. This hashtable is
33/// useful to have efficient access to aggregate data(like strings,
34/// type descriptors...) and to keep only single copy of such
35/// an aggregate. The hashtable allows only concurrent insertions:
36///
37/// KeyDataTy* = insert ( const KeyTy& );
38///
39/// Data structure:
40///
41/// Inserted value KeyTy is mapped to 64-bit hash value ->
42///
43/// [------- 64-bit Hash value --------]
44/// [ StartEntryIndex ][ Bucket Index ]
45/// | |
46/// points to the points to
47/// first probe the bucket.
48/// position inside
49/// bucket entries
50///
51/// After initialization, all buckets have an initial size. During insertions,
52/// buckets might be extended to contain more entries. Each bucket can be
53/// independently resized and rehashed(no need to lock the whole table).
54/// Different buckets may have different sizes. If the single bucket is full
55/// then the bucket is resized.
56///
57/// BucketsArray keeps all buckets. Each bucket keeps an array of Entries
58/// (pointers to KeyDataTy) and another array of entries hashes:
59///
60/// BucketsArray[BucketIdx].Hashes[EntryIdx]:
61/// BucketsArray[BucketIdx].Entries[EntryIdx]:
62///
63/// [Bucket 0].Hashes -> [uint32_t][uint32_t]
64/// [Bucket 0].Entries -> [KeyDataTy*][KeyDataTy*]
65///
66/// [Bucket 1].Hashes -> [uint32_t][uint32_t][uint32_t][uint32_t]
67/// [Bucket 1].Entries -> [KeyDataTy*][KeyDataTy*][KeyDataTy*][KeyDataTy*]
68/// .........................
69/// [Bucket N].Hashes -> [uint32_t][uint32_t][uint32_t]
70/// [Bucket N].Entries -> [KeyDataTy*][KeyDataTy*][KeyDataTy*]
71///
72/// ConcurrentHashTableByPtr uses an external thread-safe allocator to allocate
73/// KeyDataTy items.
74
75template <typename KeyTy, typename KeyDataTy, typename AllocatorTy>
77public:
78 /// \returns Hash value for the specified \p Key.
79 static inline uint64_t getHashValue(const KeyTy &Key) {
80 return xxh3_64bits(Key);
81 }
82
83 /// \returns true if both \p LHS and \p RHS are equal.
84 static inline bool isEqual(const KeyTy &LHS, const KeyTy &RHS) {
85 return LHS == RHS;
86 }
87
88 /// \returns key for the specified \p KeyData.
89 static inline const KeyTy &getKey(const KeyDataTy &KeyData) {
90 return KeyData.getKey();
91 }
92
93 /// \returns newly created object of KeyDataTy type.
94 static inline KeyDataTy *create(const KeyTy &Key, AllocatorTy &Allocator) {
95 return KeyDataTy::create(Key, Allocator);
96 }
97};
98
99template <typename KeyTy, typename KeyDataTy, typename AllocatorTy,
100 typename Info =
101 ConcurrentHashTableInfoByPtr<KeyTy, KeyDataTy, AllocatorTy>>
103public:
105 AllocatorTy &Allocator, uint64_t EstimatedSize = 100000,
106 size_t ThreadsNum = parallel::strategy.compute_thread_count(),
107 size_t InitialNumberOfBuckets = 128)
109 assert((ThreadsNum > 0) && "ThreadsNum must be greater than 0");
110 assert((InitialNumberOfBuckets > 0) &&
111 "InitialNumberOfBuckets must be greater than 0");
112
113 // Calculate number of buckets.
114 uint64_t EstimatedNumberOfBuckets = ThreadsNum;
115 if (ThreadsNum > 1) {
116 EstimatedNumberOfBuckets *= InitialNumberOfBuckets;
117 EstimatedNumberOfBuckets *= std::max(
118 1,
119 countr_zero(PowerOf2Ceil(EstimatedSize / InitialNumberOfBuckets)) >>
120 2);
121 }
122 EstimatedNumberOfBuckets = PowerOf2Ceil(EstimatedNumberOfBuckets);
124 std::min(EstimatedNumberOfBuckets, (uint64_t)(1Ull << 31));
125
126 // Allocate buckets.
127 BucketsArray = std::make_unique<Bucket[]>(NumberOfBuckets);
128
129 InitialBucketSize = EstimatedSize / NumberOfBuckets;
132
133 // Initialize each bucket.
134 for (uint32_t Idx = 0; Idx < NumberOfBuckets; Idx++) {
136 memset(Hashes, 0, sizeof(ExtHashBitsTy) * InitialBucketSize);
137
139 memset(Entries, 0, sizeof(EntryDataTy) * InitialBucketSize);
140
142 BucketsArray[Idx].Hashes = Hashes;
143 BucketsArray[Idx].Entries = Entries;
144 }
145
146 // Calculate masks.
148
149 size_t LeadingZerosNumber = countl_zero(HashMask);
150 HashBitsNum = 64 - LeadingZerosNumber;
151
152 // We keep only high 32-bits of hash value. So bucket size cannot
153 // exceed 2^31. Bucket size is always power of two.
154 MaxBucketSize = 1Ull << (std::min((size_t)31, LeadingZerosNumber));
155
156 // Calculate mask for extended hash bits.
158 }
159
161 // Deallocate buckets.
162 for (uint32_t Idx = 0; Idx < NumberOfBuckets; Idx++) {
163 delete[] BucketsArray[Idx].Hashes;
164 delete[] BucketsArray[Idx].Entries;
165 }
166 }
167
168 /// Insert new value \p NewValue or return already existing entry.
169 ///
170 /// \returns entry and "true" if an entry is just inserted or
171 /// "false" if an entry already exists.
172 std::pair<KeyDataTy *, bool> insert(const KeyTy &NewValue) {
173 // Calculate bucket index.
174 uint64_t Hash = Info::getHashValue(NewValue);
175 Bucket &CurBucket = BucketsArray[getBucketIdx(Hash)];
176 uint32_t ExtHashBits = getExtHashBits(Hash);
177
178#if LLVM_ENABLE_THREADS
179 // Lock bucket.
180 CurBucket.Guard.lock();
181#endif
182
183 HashesPtr BucketHashes = CurBucket.Hashes;
184 DataPtr BucketEntries = CurBucket.Entries;
185 uint32_t CurEntryIdx = getStartIdx(ExtHashBits, CurBucket.Size);
186
187 while (true) {
188 uint32_t CurEntryHashBits = BucketHashes[CurEntryIdx];
189
190 if (CurEntryHashBits == 0 && BucketEntries[CurEntryIdx] == nullptr) {
191 // Found empty slot. Insert data.
192 KeyDataTy *NewData = Info::create(NewValue, MultiThreadAllocator);
193 BucketEntries[CurEntryIdx] = NewData;
194 BucketHashes[CurEntryIdx] = ExtHashBits;
195
196 CurBucket.NumberOfEntries++;
197 RehashBucket(CurBucket);
198
199#if LLVM_ENABLE_THREADS
200 CurBucket.Guard.unlock();
201#endif
202
203 return {NewData, true};
204 }
205
206 if (CurEntryHashBits == ExtHashBits) {
207 // Hash matched. Check value for equality.
208 KeyDataTy *EntryData = BucketEntries[CurEntryIdx];
209 if (Info::isEqual(Info::getKey(*EntryData), NewValue)) {
210 // Already existed entry matched with inserted data is found.
211#if LLVM_ENABLE_THREADS
212 CurBucket.Guard.unlock();
213#endif
214
215 return {EntryData, false};
216 }
217 }
218
219 CurEntryIdx++;
220 CurEntryIdx &= (CurBucket.Size - 1);
221 }
222
223 llvm_unreachable("Insertion error.");
224 return {};
225 }
226
227 /// Print information about current state of hash table structures.
229 OS << "\n--- HashTable statistic:\n";
230 OS << "\nNumber of buckets = " << NumberOfBuckets;
231 OS << "\nInitial bucket size = " << InitialBucketSize;
232
233 uint64_t NumberOfNonEmptyBuckets = 0;
234 uint64_t NumberOfEntriesPlusEmpty = 0;
235 uint64_t OverallNumberOfEntries = 0;
236 uint64_t OverallSize = sizeof(*this) + NumberOfBuckets * sizeof(Bucket);
237
238 DenseMap<uint32_t, uint32_t> BucketSizesMap;
239
240 // For each bucket...
241 for (uint32_t Idx = 0; Idx < NumberOfBuckets; Idx++) {
242 Bucket &CurBucket = BucketsArray[Idx];
243
244 BucketSizesMap[CurBucket.Size]++;
245
246 if (CurBucket.NumberOfEntries != 0)
247 NumberOfNonEmptyBuckets++;
248 NumberOfEntriesPlusEmpty += CurBucket.Size;
249 OverallNumberOfEntries += CurBucket.NumberOfEntries;
250 OverallSize +=
251 (sizeof(ExtHashBitsTy) + sizeof(EntryDataTy)) * CurBucket.Size;
252 }
253
254 OS << "\nOverall number of entries = " << OverallNumberOfEntries;
255 OS << "\nOverall number of non empty buckets = " << NumberOfNonEmptyBuckets;
256 for (auto &BucketSize : BucketSizesMap)
257 OS << "\n Number of buckets with size " << BucketSize.first << ": "
258 << BucketSize.second;
259
260 std::stringstream stream;
261 stream << std::fixed << std::setprecision(2)
262 << ((float)OverallNumberOfEntries / (float)NumberOfEntriesPlusEmpty);
263 std::string str = stream.str();
264
265 OS << "\nLoad factor = " << str;
266 OS << "\nOverall allocated size = " << OverallSize;
267 }
268
269protected:
271 using EntryDataTy = KeyDataTy *;
272
275
276 // Bucket structure. Keeps bucket data.
277 struct Bucket {
278 Bucket() = default;
279
280 // Size of bucket.
282
283 // Number of non-null entries.
285
286 // Hashes for [Size] entries.
287 HashesPtr Hashes = nullptr;
288
289 // [Size] entries.
290 DataPtr Entries = nullptr;
291
292#if LLVM_ENABLE_THREADS
293 // Mutex for this bucket.
294 std::mutex Guard;
295#endif
296 };
297
298 // Reallocate and rehash bucket if this is full enough.
299 void RehashBucket(Bucket &CurBucket) {
300 assert((CurBucket.Size > 0) && "Uninitialised bucket");
301 if (CurBucket.NumberOfEntries < CurBucket.Size * 0.9)
302 return;
303
304 if (CurBucket.Size >= MaxBucketSize)
305 report_fatal_error("ConcurrentHashTable is full");
306
307 uint32_t NewBucketSize = CurBucket.Size << 1;
308 assert((NewBucketSize <= MaxBucketSize) && "New bucket size is too big");
309 assert((CurBucket.Size < NewBucketSize) &&
310 "New bucket size less than size of current bucket");
311
312 // Store old entries & hashes arrays.
313 HashesPtr SrcHashes = CurBucket.Hashes;
314 DataPtr SrcEntries = CurBucket.Entries;
315
316 // Allocate new entries&hashes arrays.
317 HashesPtr DestHashes = new ExtHashBitsTy[NewBucketSize];
318 memset(DestHashes, 0, sizeof(ExtHashBitsTy) * NewBucketSize);
319
320 DataPtr DestEntries = new EntryDataTy[NewBucketSize];
321 memset(DestEntries, 0, sizeof(EntryDataTy) * NewBucketSize);
322
323 // For each entry in source arrays...
324 for (uint32_t CurSrcEntryIdx = 0; CurSrcEntryIdx < CurBucket.Size;
325 CurSrcEntryIdx++) {
326 uint32_t CurSrcEntryHashBits = SrcHashes[CurSrcEntryIdx];
327
328 // Check for null entry.
329 if (CurSrcEntryHashBits == 0 && SrcEntries[CurSrcEntryIdx] == nullptr)
330 continue;
331
332 uint32_t StartDestIdx = getStartIdx(CurSrcEntryHashBits, NewBucketSize);
333
334 // Insert non-null entry into the new arrays.
335 while (true) {
336 uint32_t CurDestEntryHashBits = DestHashes[StartDestIdx];
337
338 if (CurDestEntryHashBits == 0 && DestEntries[StartDestIdx] == nullptr) {
339 // Found empty slot. Insert data.
340 DestHashes[StartDestIdx] = CurSrcEntryHashBits;
341 DestEntries[StartDestIdx] = SrcEntries[CurSrcEntryIdx];
342 break;
343 }
344
345 StartDestIdx++;
346 StartDestIdx = StartDestIdx & (NewBucketSize - 1);
347 }
348 }
349
350 // Update bucket fields.
351 CurBucket.Hashes = DestHashes;
352 CurBucket.Entries = DestEntries;
353 CurBucket.Size = NewBucketSize;
354
355 // Delete old bucket entries.
356 if (SrcHashes != nullptr)
357 delete[] SrcHashes;
358 if (SrcEntries != nullptr)
359 delete[] SrcEntries;
360 }
361
362 uint32_t getBucketIdx(hash_code Hash) { return Hash & HashMask; }
363
365 return (Hash & ExtHashMask) >> HashBitsNum;
366 }
367
368 uint32_t getStartIdx(uint32_t ExtHashBits, uint32_t BucketSize) {
369 assert((BucketSize > 0) && "Empty bucket");
370
371 return ExtHashBits & (BucketSize - 1);
372 }
373
374 // Number of bits in hash mask.
376
377 // Hash mask.
379
380 // Hash mask for the extended hash bits.
382
383 // The maximal bucket size.
385
386 // Initial size of bucket.
388
389 // The number of buckets.
391
392 // Array of buckets.
393 std::unique_ptr<Bucket[]> BucketsArray;
394
395 // Used for allocating KeyDataTy values.
397};
398
399} // end namespace llvm
400
401#endif // LLVM_ADT_CONCURRENTHASHTABLE_H
This file defines the BumpPtrAllocator interface.
Analysis containing CSE Info
Definition: CSEInfo.cpp:27
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
This file defines the DenseMap class.
This file defines the PointerIntPair class.
Basic Register Allocator
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
This file contains some templates that are useful if you are working with the STL at all.
raw_pwrite_stream & OS
This file defines the SmallVector class.
Value * RHS
Value * LHS
std::pair< KeyDataTy *, bool > insert(const KeyTy &NewValue)
Insert new value NewValue or return already existing entry.
uint32_t getStartIdx(uint32_t ExtHashBits, uint32_t BucketSize)
std::unique_ptr< Bucket[]> BucketsArray
uint32_t getBucketIdx(hash_code Hash)
void printStatistic(raw_ostream &OS)
Print information about current state of hash table structures.
void RehashBucket(Bucket &CurBucket)
uint32_t getExtHashBits(uint64_t Hash)
ConcurrentHashTableByPtr(AllocatorTy &Allocator, uint64_t EstimatedSize=100000, size_t ThreadsNum=parallel::strategy.compute_thread_count(), size_t InitialNumberOfBuckets=128)
ConcurrentHashTable - is a resizeable concurrent hashtable.
static KeyDataTy * create(const KeyTy &Key, AllocatorTy &Allocator)
static const KeyTy & getKey(const KeyDataTy &KeyData)
static bool isEqual(const KeyTy &LHS, const KeyTy &RHS)
static uint64_t getHashValue(const KeyTy &Key)
An opaque object representing a hash code.
Definition: Hashing.h:74
This class implements an extremely fast bulk output stream that can only output to a stream.
Definition: raw_ostream.h:52
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
ThreadPoolStrategy strategy
Definition: Parallel.cpp:20
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
uint64_t xxh3_64bits(ArrayRef< uint8_t > data)
Definition: xxhash.cpp:397
uint64_t PowerOf2Ceil(uint64_t A)
Returns the power of two which is greater than or equal to the given value.
Definition: MathExtras.h:361
int countr_zero(T Val)
Count number of 0's from the least significant bit to the most stopping at the first 1.
Definition: bit.h:179
int countl_zero(T Val)
Count number of 0's from the most significant bit to the least stopping at the first 1.
Definition: bit.h:245
void report_fatal_error(Error Err, bool gen_crash_diag=true)
Report a serious error, calling any installed error handler.
Definition: Error.cpp:156