LLVM 22.0.0git
ConcurrentHashtable.h
Go to the documentation of this file.
1//===- ConcurrentHashtable.h ------------------------------------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
9#ifndef LLVM_ADT_CONCURRENTHASHTABLE_H
10#define LLVM_ADT_CONCURRENTHASHTABLE_H
11
12#include "llvm/ADT/DenseMap.h"
13#include "llvm/ADT/Hashing.h"
14#include "llvm/ADT/STLExtras.h"
16#include "llvm/Config/llvm-config.h" // for LLVM_ENABLE_THREADS
18#include "llvm/Support/Debug.h"
21#include "llvm/Support/xxhash.h"
22#include <atomic>
23#include <cstddef>
24#include <iomanip>
25#include <mutex>
26#include <sstream>
27#include <type_traits>
28
29namespace llvm {
30
31/// ConcurrentHashTable - is a resizeable concurrent hashtable.
32/// The number of resizings limited up to x2^31. This hashtable is
33/// useful to have efficient access to aggregate data(like strings,
34/// type descriptors...) and to keep only single copy of such
35/// an aggregate. The hashtable allows only concurrent insertions:
36///
37/// KeyDataTy* = insert ( const KeyTy& );
38///
39/// Data structure:
40///
41/// Inserted value KeyTy is mapped to 64-bit hash value ->
42///
43/// [------- 64-bit Hash value --------]
44/// [ StartEntryIndex ][ Bucket Index ]
45/// | |
46/// points to the points to
47/// first probe the bucket.
48/// position inside
49/// bucket entries
50///
51/// After initialization, all buckets have an initial size. During insertions,
52/// buckets might be extended to contain more entries. Each bucket can be
53/// independently resized and rehashed(no need to lock the whole table).
54/// Different buckets may have different sizes. If the single bucket is full
55/// then the bucket is resized.
56///
57/// BucketsArray keeps all buckets. Each bucket keeps an array of Entries
58/// (pointers to KeyDataTy) and another array of entries hashes:
59///
60/// BucketsArray[BucketIdx].Hashes[EntryIdx]:
61/// BucketsArray[BucketIdx].Entries[EntryIdx]:
62///
63/// [Bucket 0].Hashes -> [uint32_t][uint32_t]
64/// [Bucket 0].Entries -> [KeyDataTy*][KeyDataTy*]
65///
66/// [Bucket 1].Hashes -> [uint32_t][uint32_t][uint32_t][uint32_t]
67/// [Bucket 1].Entries -> [KeyDataTy*][KeyDataTy*][KeyDataTy*][KeyDataTy*]
68/// .........................
69/// [Bucket N].Hashes -> [uint32_t][uint32_t][uint32_t]
70/// [Bucket N].Entries -> [KeyDataTy*][KeyDataTy*][KeyDataTy*]
71///
72/// ConcurrentHashTableByPtr uses an external thread-safe allocator to allocate
73/// KeyDataTy items.
74
75template <typename KeyTy, typename KeyDataTy, typename AllocatorTy>
77public:
78 /// \returns Hash value for the specified \p Key.
79 static inline uint64_t getHashValue(const KeyTy &Key) {
80 return xxh3_64bits(Key);
81 }
82
83 /// \returns true if both \p LHS and \p RHS are equal.
84 static inline bool isEqual(const KeyTy &LHS, const KeyTy &RHS) {
85 return LHS == RHS;
86 }
87
88 /// \returns key for the specified \p KeyData.
89 static inline const KeyTy &getKey(const KeyDataTy &KeyData) {
90 return KeyData.getKey();
91 }
92
93 /// \returns newly created object of KeyDataTy type.
94 static inline KeyDataTy *create(const KeyTy &Key, AllocatorTy &Allocator) {
95 return KeyDataTy::create(Key, Allocator);
96 }
97};
98
99template <typename KeyTy, typename KeyDataTy, typename AllocatorTy,
100 typename Info =
101 ConcurrentHashTableInfoByPtr<KeyTy, KeyDataTy, AllocatorTy>>
103public:
105 AllocatorTy &Allocator, uint64_t EstimatedSize = 100000,
106 size_t ThreadsNum = parallel::strategy.compute_thread_count(),
107 size_t InitialNumberOfBuckets = 128)
109 assert((ThreadsNum > 0) && "ThreadsNum must be greater than 0");
110 assert((InitialNumberOfBuckets > 0) &&
111 "InitialNumberOfBuckets must be greater than 0");
112
113 // Calculate number of buckets.
114 uint64_t EstimatedNumberOfBuckets = ThreadsNum;
115 if (ThreadsNum > 1) {
116 EstimatedNumberOfBuckets *= InitialNumberOfBuckets;
117 EstimatedNumberOfBuckets *= std::max(
118 1,
119 countr_zero(PowerOf2Ceil(EstimatedSize / InitialNumberOfBuckets)) >>
120 2);
121 }
122 EstimatedNumberOfBuckets = PowerOf2Ceil(EstimatedNumberOfBuckets);
124 std::min(EstimatedNumberOfBuckets, (uint64_t)(1Ull << 31));
125
126 // Allocate buckets.
127 BucketsArray = std::make_unique<Bucket[]>(NumberOfBuckets);
128
129 InitialBucketSize = EstimatedSize / NumberOfBuckets;
132
133 // Initialize each bucket.
134 for (uint32_t Idx = 0; Idx < NumberOfBuckets; Idx++) {
136 memset(Hashes, 0, sizeof(ExtHashBitsTy) * InitialBucketSize);
137
139 memset(Entries, 0, sizeof(EntryDataTy) * InitialBucketSize);
140
142 BucketsArray[Idx].Hashes = Hashes;
143 BucketsArray[Idx].Entries = Entries;
144 }
145
146 // Calculate masks.
148
149 size_t LeadingZerosNumber = countl_zero(HashMask);
150 HashBitsNum = 64 - LeadingZerosNumber;
151
152 // We keep only high 32-bits of hash value. So bucket size cannot
153 // exceed 2^31. Bucket size is always power of two.
154 MaxBucketSize = 1Ull << (std::min((size_t)31, LeadingZerosNumber));
155
156 // Calculate mask for extended hash bits.
158 }
159
161 // Deallocate buckets.
162 for (uint32_t Idx = 0; Idx < NumberOfBuckets; Idx++) {
163 delete[] BucketsArray[Idx].Hashes;
164 delete[] BucketsArray[Idx].Entries;
165 }
166 }
167
168 /// Insert new value \p NewValue or return already existing entry.
169 ///
170 /// \returns entry and "true" if an entry is just inserted or
171 /// "false" if an entry already exists.
172 std::pair<KeyDataTy *, bool> insert(const KeyTy &NewValue) {
173 // Calculate bucket index.
174 uint64_t Hash = Info::getHashValue(NewValue);
175 Bucket &CurBucket = BucketsArray[getBucketIdx(Hash)];
176 uint32_t ExtHashBits = getExtHashBits(Hash);
177
178#if LLVM_ENABLE_THREADS
179 // Lock bucket.
180 CurBucket.Guard.lock();
181#endif
182
183 HashesPtr BucketHashes = CurBucket.Hashes;
184 DataPtr BucketEntries = CurBucket.Entries;
185 uint32_t CurEntryIdx = getStartIdx(ExtHashBits, CurBucket.Size);
186
187 while (true) {
188 uint32_t CurEntryHashBits = BucketHashes[CurEntryIdx];
189
190 if (CurEntryHashBits == 0 && BucketEntries[CurEntryIdx] == nullptr) {
191 // Found empty slot. Insert data.
192 KeyDataTy *NewData = Info::create(NewValue, MultiThreadAllocator);
193 BucketEntries[CurEntryIdx] = NewData;
194 BucketHashes[CurEntryIdx] = ExtHashBits;
195
196 CurBucket.NumberOfEntries++;
197 RehashBucket(CurBucket);
198
199#if LLVM_ENABLE_THREADS
200 CurBucket.Guard.unlock();
201#endif
202
203 return {NewData, true};
204 }
205
206 if (CurEntryHashBits == ExtHashBits) {
207 // Hash matched. Check value for equality.
208 KeyDataTy *EntryData = BucketEntries[CurEntryIdx];
209 if (Info::isEqual(Info::getKey(*EntryData), NewValue)) {
210 // Already existed entry matched with inserted data is found.
211#if LLVM_ENABLE_THREADS
212 CurBucket.Guard.unlock();
213#endif
214
215 return {EntryData, false};
216 }
217 }
218
219 CurEntryIdx++;
220 CurEntryIdx &= (CurBucket.Size - 1);
221 }
222
223 llvm_unreachable("Insertion error.");
224 return {};
225 }
226
227 /// Print information about current state of hash table structures.
229 OS << "\n--- HashTable statistic:\n";
230 OS << "\nNumber of buckets = " << NumberOfBuckets;
231 OS << "\nInitial bucket size = " << InitialBucketSize;
232
233 uint64_t NumberOfNonEmptyBuckets = 0;
234 uint64_t NumberOfEntriesPlusEmpty = 0;
235 uint64_t OverallNumberOfEntries = 0;
236 uint64_t OverallSize = sizeof(*this) + NumberOfBuckets * sizeof(Bucket);
237
238 DenseMap<uint32_t, uint32_t> BucketSizesMap;
239
240 // For each bucket...
241 for (uint32_t Idx = 0; Idx < NumberOfBuckets; Idx++) {
242 Bucket &CurBucket = BucketsArray[Idx];
243
244 BucketSizesMap[CurBucket.Size]++;
245
246 if (CurBucket.NumberOfEntries != 0)
247 NumberOfNonEmptyBuckets++;
248 NumberOfEntriesPlusEmpty += CurBucket.Size;
249 OverallNumberOfEntries += CurBucket.NumberOfEntries;
250 OverallSize +=
251 (sizeof(ExtHashBitsTy) + sizeof(EntryDataTy)) * CurBucket.Size;
252 }
253
254 OS << "\nOverall number of entries = " << OverallNumberOfEntries;
255 OS << "\nOverall number of non empty buckets = " << NumberOfNonEmptyBuckets;
256 for (auto [Size, Count] : BucketSizesMap)
257 OS << "\n Number of buckets with size " << Size << ": " << Count;
258
259 std::stringstream stream;
260 stream << std::fixed << std::setprecision(2)
261 << ((float)OverallNumberOfEntries / (float)NumberOfEntriesPlusEmpty);
262 std::string str = stream.str();
263
264 OS << "\nLoad factor = " << str;
265 OS << "\nOverall allocated size = " << OverallSize;
266 }
267
268protected:
270 using EntryDataTy = KeyDataTy *;
271
274
275 // Bucket structure. Keeps bucket data.
276 struct Bucket {
277 Bucket() = default;
278
279 // Size of bucket.
281
282 // Number of non-null entries.
284
285 // Hashes for [Size] entries.
286 HashesPtr Hashes = nullptr;
287
288 // [Size] entries.
289 DataPtr Entries = nullptr;
290
291#if LLVM_ENABLE_THREADS
292 // Mutex for this bucket.
293 std::mutex Guard;
294#endif
295 };
296
297 // Reallocate and rehash bucket if this is full enough.
298 void RehashBucket(Bucket &CurBucket) {
299 assert((CurBucket.Size > 0) && "Uninitialised bucket");
300 if (CurBucket.NumberOfEntries < CurBucket.Size * 0.9)
301 return;
302
303 if (CurBucket.Size >= MaxBucketSize)
304 report_fatal_error("ConcurrentHashTable is full");
305
306 uint32_t NewBucketSize = CurBucket.Size << 1;
307 assert((NewBucketSize <= MaxBucketSize) && "New bucket size is too big");
308 assert((CurBucket.Size < NewBucketSize) &&
309 "New bucket size less than size of current bucket");
310
311 // Store old entries & hashes arrays.
312 HashesPtr SrcHashes = CurBucket.Hashes;
313 DataPtr SrcEntries = CurBucket.Entries;
314
315 // Allocate new entries&hashes arrays.
316 HashesPtr DestHashes = new ExtHashBitsTy[NewBucketSize];
317 memset(DestHashes, 0, sizeof(ExtHashBitsTy) * NewBucketSize);
318
319 DataPtr DestEntries = new EntryDataTy[NewBucketSize];
320 memset(DestEntries, 0, sizeof(EntryDataTy) * NewBucketSize);
321
322 // For each entry in source arrays...
323 for (uint32_t CurSrcEntryIdx = 0; CurSrcEntryIdx < CurBucket.Size;
324 CurSrcEntryIdx++) {
325 uint32_t CurSrcEntryHashBits = SrcHashes[CurSrcEntryIdx];
326
327 // Check for null entry.
328 if (CurSrcEntryHashBits == 0 && SrcEntries[CurSrcEntryIdx] == nullptr)
329 continue;
330
331 uint32_t StartDestIdx = getStartIdx(CurSrcEntryHashBits, NewBucketSize);
332
333 // Insert non-null entry into the new arrays.
334 while (true) {
335 uint32_t CurDestEntryHashBits = DestHashes[StartDestIdx];
336
337 if (CurDestEntryHashBits == 0 && DestEntries[StartDestIdx] == nullptr) {
338 // Found empty slot. Insert data.
339 DestHashes[StartDestIdx] = CurSrcEntryHashBits;
340 DestEntries[StartDestIdx] = SrcEntries[CurSrcEntryIdx];
341 break;
342 }
343
344 StartDestIdx++;
345 StartDestIdx = StartDestIdx & (NewBucketSize - 1);
346 }
347 }
348
349 // Update bucket fields.
350 CurBucket.Hashes = DestHashes;
351 CurBucket.Entries = DestEntries;
352 CurBucket.Size = NewBucketSize;
353
354 // Delete old bucket entries.
355 if (SrcHashes != nullptr)
356 delete[] SrcHashes;
357 if (SrcEntries != nullptr)
358 delete[] SrcEntries;
359 }
360
361 uint32_t getBucketIdx(hash_code Hash) { return Hash & HashMask; }
362
364 return (Hash & ExtHashMask) >> HashBitsNum;
365 }
366
367 uint32_t getStartIdx(uint32_t ExtHashBits, uint32_t BucketSize) {
368 assert((BucketSize > 0) && "Empty bucket");
369
370 return ExtHashBits & (BucketSize - 1);
371 }
372
373 // Number of bits in hash mask.
375
376 // Hash mask.
378
379 // Hash mask for the extended hash bits.
381
382 // The maximal bucket size.
384
385 // Initial size of bucket.
387
388 // The number of buckets.
390
391 // Array of buckets.
392 std::unique_ptr<Bucket[]> BucketsArray;
393
394 // Used for allocating KeyDataTy values.
396};
397
398} // end namespace llvm
399
400#endif // LLVM_ADT_CONCURRENTHASHTABLE_H
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
This file defines the BumpPtrAllocator interface.
Analysis containing CSE Info
Definition CSEInfo.cpp:27
This file defines the DenseMap class.
Basic Register Allocator
This file contains some templates that are useful if you are working with the STL at all.
This file defines the SmallVector class.
Value * RHS
Value * LHS
std::pair< KeyDataTy *, bool > insert(const KeyTy &NewValue)
Insert new value NewValue or return already existing entry.
uint32_t getStartIdx(uint32_t ExtHashBits, uint32_t BucketSize)
std::unique_ptr< Bucket[]> BucketsArray
uint32_t getBucketIdx(hash_code Hash)
void printStatistic(raw_ostream &OS)
Print information about current state of hash table structures.
void RehashBucket(Bucket &CurBucket)
uint32_t getExtHashBits(uint64_t Hash)
ConcurrentHashTableByPtr(AllocatorTy &Allocator, uint64_t EstimatedSize=100000, size_t ThreadsNum=parallel::strategy.compute_thread_count(), size_t InitialNumberOfBuckets=128)
ConcurrentHashTable - is a resizeable concurrent hashtable.
static KeyDataTy * create(const KeyTy &Key, AllocatorTy &Allocator)
static const KeyTy & getKey(const KeyDataTy &KeyData)
static bool isEqual(const KeyTy &LHS, const KeyTy &RHS)
static uint64_t getHashValue(const KeyTy &Key)
An opaque object representing a hash code.
Definition Hashing.h:76
This class implements an extremely fast bulk output stream that can only output to a stream.
Definition raw_ostream.h:53
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
LLVM_ABI ThreadPoolStrategy strategy
Definition Parallel.cpp:24
This is an optimization pass for GlobalISel generic memory operations.
LLVM_ABI uint64_t xxh3_64bits(ArrayRef< uint8_t > data)
Definition xxhash.cpp:553
uint64_t PowerOf2Ceil(uint64_t A)
Returns the power of two which is greater than or equal to the given value.
Definition MathExtras.h:396
int countr_zero(T Val)
Count number of 0's from the least significant bit to the most stopping at the first 1.
Definition bit.h:186
int countl_zero(T Val)
Count number of 0's from the most significant bit to the least stopping at the first 1.
Definition bit.h:222
LLVM_ABI void report_fatal_error(Error Err, bool gen_crash_diag=true)
Definition Error.cpp:167
FunctionAddr VTableAddr Count
Definition InstrProf.h:139
LLVM_ATTRIBUTE_VISIBILITY_DEFAULT AnalysisKey InnerAnalysisManagerProxy< AnalysisManagerT, IRUnitT, ExtraArgTs... >::Key