LLVM 22.0.0git
ConcurrentHashtable.h
Go to the documentation of this file.
1//===- ConcurrentHashtable.h ------------------------------------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
9#ifndef LLVM_ADT_CONCURRENTHASHTABLE_H
10#define LLVM_ADT_CONCURRENTHASHTABLE_H
11
12#include "llvm/ADT/DenseMap.h"
13#include "llvm/ADT/Hashing.h"
14#include "llvm/ADT/STLExtras.h"
16#include "llvm/Config/llvm-config.h" // for LLVM_ENABLE_THREADS
18#include "llvm/Support/Debug.h"
21#include "llvm/Support/xxhash.h"
22#include <atomic>
23#include <cstddef>
24#include <iomanip>
25#include <mutex>
26#include <sstream>
27#include <type_traits>
28
29namespace llvm {
30
31/// ConcurrentHashTable - is a resizeable concurrent hashtable.
32/// The number of resizings limited up to x2^31. This hashtable is
33/// useful to have efficient access to aggregate data(like strings,
34/// type descriptors...) and to keep only single copy of such
35/// an aggregate. The hashtable allows only concurrent insertions:
36///
37/// KeyDataTy* = insert ( const KeyTy& );
38///
39/// Data structure:
40///
41/// Inserted value KeyTy is mapped to 64-bit hash value ->
42///
43/// [------- 64-bit Hash value --------]
44/// [ StartEntryIndex ][ Bucket Index ]
45/// | |
46/// points to the points to
47/// first probe the bucket.
48/// position inside
49/// bucket entries
50///
51/// After initialization, all buckets have an initial size. During insertions,
52/// buckets might be extended to contain more entries. Each bucket can be
53/// independently resized and rehashed(no need to lock the whole table).
54/// Different buckets may have different sizes. If the single bucket is full
55/// then the bucket is resized.
56///
57/// BucketsArray keeps all buckets. Each bucket keeps an array of Entries
58/// (pointers to KeyDataTy) and another array of entries hashes:
59///
60/// BucketsArray[BucketIdx].Hashes[EntryIdx]:
61/// BucketsArray[BucketIdx].Entries[EntryIdx]:
62///
63/// [Bucket 0].Hashes -> [uint32_t][uint32_t]
64/// [Bucket 0].Entries -> [KeyDataTy*][KeyDataTy*]
65///
66/// [Bucket 1].Hashes -> [uint32_t][uint32_t][uint32_t][uint32_t]
67/// [Bucket 1].Entries -> [KeyDataTy*][KeyDataTy*][KeyDataTy*][KeyDataTy*]
68/// .........................
69/// [Bucket N].Hashes -> [uint32_t][uint32_t][uint32_t]
70/// [Bucket N].Entries -> [KeyDataTy*][KeyDataTy*][KeyDataTy*]
71///
72/// ConcurrentHashTableByPtr uses an external thread-safe allocator to allocate
73/// KeyDataTy items.
74
75template <typename KeyTy, typename KeyDataTy, typename AllocatorTy>
77public:
78 /// \returns Hash value for the specified \p Key.
79 static inline uint64_t getHashValue(const KeyTy &Key) {
80 return xxh3_64bits(Key);
81 }
82
83 /// \returns true if both \p LHS and \p RHS are equal.
84 static inline bool isEqual(const KeyTy &LHS, const KeyTy &RHS) {
85 return LHS == RHS;
86 }
87
88 /// \returns key for the specified \p KeyData.
89 static inline const KeyTy &getKey(const KeyDataTy &KeyData) {
90 return KeyData.getKey();
91 }
92
93 /// \returns newly created object of KeyDataTy type.
94 static inline KeyDataTy *create(const KeyTy &Key, AllocatorTy &Allocator) {
95 return KeyDataTy::create(Key, Allocator);
96 }
97};
98
99template <typename KeyTy, typename KeyDataTy, typename AllocatorTy,
100 typename Info =
101 ConcurrentHashTableInfoByPtr<KeyTy, KeyDataTy, AllocatorTy>>
103public:
105 AllocatorTy &Allocator, uint64_t EstimatedSize = 100000,
106 size_t ThreadsNum = parallel::strategy.compute_thread_count(),
107 size_t InitialNumberOfBuckets = 128)
109 assert((ThreadsNum > 0) && "ThreadsNum must be greater than 0");
110 assert((InitialNumberOfBuckets > 0) &&
111 "InitialNumberOfBuckets must be greater than 0");
112
113 // Calculate number of buckets.
114 uint64_t EstimatedNumberOfBuckets = ThreadsNum;
115 if (ThreadsNum > 1) {
116 EstimatedNumberOfBuckets *= InitialNumberOfBuckets;
117 EstimatedNumberOfBuckets *= std::max(
118 1,
119 countr_zero(PowerOf2Ceil(EstimatedSize / InitialNumberOfBuckets)) >>
120 2);
121 }
122 EstimatedNumberOfBuckets = PowerOf2Ceil(EstimatedNumberOfBuckets);
124 std::min(EstimatedNumberOfBuckets, (uint64_t)(1Ull << 31));
125
126 // Allocate buckets.
127 BucketsArray = std::make_unique<Bucket[]>(NumberOfBuckets);
128
129 InitialBucketSize = EstimatedSize / NumberOfBuckets;
132
133 // Initialize each bucket.
134 for (uint32_t Idx = 0; Idx < NumberOfBuckets; Idx++) {
136 memset(Hashes, 0, sizeof(ExtHashBitsTy) * InitialBucketSize);
137
139 memset(Entries, 0, sizeof(EntryDataTy) * InitialBucketSize);
140
142 BucketsArray[Idx].Hashes = Hashes;
143 BucketsArray[Idx].Entries = Entries;
144 }
145
146 // Calculate masks.
148
149 size_t LeadingZerosNumber = countl_zero(HashMask);
150 HashBitsNum = 64 - LeadingZerosNumber;
151
152 // We keep only high 32-bits of hash value. So bucket size cannot
153 // exceed 2^31. Bucket size is always power of two.
154 MaxBucketSize = 1Ull << (std::min((size_t)31, LeadingZerosNumber));
155
156 // Calculate mask for extended hash bits.
158 }
159
161 // Deallocate buckets.
162 for (uint32_t Idx = 0; Idx < NumberOfBuckets; Idx++) {
163 delete[] BucketsArray[Idx].Hashes;
164 delete[] BucketsArray[Idx].Entries;
165 }
166 }
167
168 /// Insert new value \p NewValue or return already existing entry.
169 ///
170 /// \returns entry and "true" if an entry is just inserted or
171 /// "false" if an entry already exists.
172 std::pair<KeyDataTy *, bool> insert(const KeyTy &NewValue) {
173 // Calculate bucket index.
174 uint64_t Hash = Info::getHashValue(NewValue);
175 Bucket &CurBucket = BucketsArray[getBucketIdx(Hash)];
176 uint32_t ExtHashBits = getExtHashBits(Hash);
177
178#if LLVM_ENABLE_THREADS
179 // Lock bucket.
180 std::scoped_lock<std::mutex> Lock(CurBucket.Guard);
181#endif
182
183 HashesPtr BucketHashes = CurBucket.Hashes;
184 DataPtr BucketEntries = CurBucket.Entries;
185 uint32_t CurEntryIdx = getStartIdx(ExtHashBits, CurBucket.Size);
186
187 while (true) {
188 uint32_t CurEntryHashBits = BucketHashes[CurEntryIdx];
189
190 if (CurEntryHashBits == 0 && BucketEntries[CurEntryIdx] == nullptr) {
191 // Found empty slot. Insert data.
192 KeyDataTy *NewData = Info::create(NewValue, MultiThreadAllocator);
193 BucketEntries[CurEntryIdx] = NewData;
194 BucketHashes[CurEntryIdx] = ExtHashBits;
195
196 CurBucket.NumberOfEntries++;
197 RehashBucket(CurBucket);
198 return {NewData, true};
199 }
200
201 if (CurEntryHashBits == ExtHashBits) {
202 // Hash matched. Check value for equality.
203 KeyDataTy *EntryData = BucketEntries[CurEntryIdx];
204 if (Info::isEqual(Info::getKey(*EntryData), NewValue)) {
205 // Already existed entry matched with inserted data is found.
206 return {EntryData, false};
207 }
208 }
209
210 CurEntryIdx++;
211 CurEntryIdx &= (CurBucket.Size - 1);
212 }
213
214 llvm_unreachable("Insertion error.");
215 return {};
216 }
217
218 /// Print information about current state of hash table structures.
220 OS << "\n--- HashTable statistic:\n";
221 OS << "\nNumber of buckets = " << NumberOfBuckets;
222 OS << "\nInitial bucket size = " << InitialBucketSize;
223
224 uint64_t NumberOfNonEmptyBuckets = 0;
225 uint64_t NumberOfEntriesPlusEmpty = 0;
226 uint64_t OverallNumberOfEntries = 0;
227 uint64_t OverallSize = sizeof(*this) + NumberOfBuckets * sizeof(Bucket);
228
229 DenseMap<uint32_t, uint32_t> BucketSizesMap;
230
231 // For each bucket...
232 for (uint32_t Idx = 0; Idx < NumberOfBuckets; Idx++) {
233 Bucket &CurBucket = BucketsArray[Idx];
234
235 BucketSizesMap[CurBucket.Size]++;
236
237 if (CurBucket.NumberOfEntries != 0)
238 NumberOfNonEmptyBuckets++;
239 NumberOfEntriesPlusEmpty += CurBucket.Size;
240 OverallNumberOfEntries += CurBucket.NumberOfEntries;
241 OverallSize +=
242 (sizeof(ExtHashBitsTy) + sizeof(EntryDataTy)) * CurBucket.Size;
243 }
244
245 OS << "\nOverall number of entries = " << OverallNumberOfEntries;
246 OS << "\nOverall number of non empty buckets = " << NumberOfNonEmptyBuckets;
247 for (auto [Size, Count] : BucketSizesMap)
248 OS << "\n Number of buckets with size " << Size << ": " << Count;
249
250 std::stringstream stream;
251 stream << std::fixed << std::setprecision(2)
252 << ((float)OverallNumberOfEntries / (float)NumberOfEntriesPlusEmpty);
253 std::string str = stream.str();
254
255 OS << "\nLoad factor = " << str;
256 OS << "\nOverall allocated size = " << OverallSize;
257 }
258
259protected:
261 using EntryDataTy = KeyDataTy *;
262
265
266 // Bucket structure. Keeps bucket data.
267 struct Bucket {
268 Bucket() = default;
269
270 // Size of bucket.
272
273 // Number of non-null entries.
275
276 // Hashes for [Size] entries.
277 HashesPtr Hashes = nullptr;
278
279 // [Size] entries.
280 DataPtr Entries = nullptr;
281
282#if LLVM_ENABLE_THREADS
283 // Mutex for this bucket.
284 std::mutex Guard;
285#endif
286 };
287
288 // Reallocate and rehash bucket if this is full enough.
289 void RehashBucket(Bucket &CurBucket) {
290 assert((CurBucket.Size > 0) && "Uninitialised bucket");
291 if (CurBucket.NumberOfEntries < CurBucket.Size * 0.9)
292 return;
293
294 if (CurBucket.Size >= MaxBucketSize)
295 report_fatal_error("ConcurrentHashTable is full");
296
297 uint32_t NewBucketSize = CurBucket.Size << 1;
298 assert((NewBucketSize <= MaxBucketSize) && "New bucket size is too big");
299 assert((CurBucket.Size < NewBucketSize) &&
300 "New bucket size less than size of current bucket");
301
302 // Store old entries & hashes arrays.
303 HashesPtr SrcHashes = CurBucket.Hashes;
304 DataPtr SrcEntries = CurBucket.Entries;
305
306 // Allocate new entries&hashes arrays.
307 HashesPtr DestHashes = new ExtHashBitsTy[NewBucketSize];
308 memset(DestHashes, 0, sizeof(ExtHashBitsTy) * NewBucketSize);
309
310 DataPtr DestEntries = new EntryDataTy[NewBucketSize];
311 memset(DestEntries, 0, sizeof(EntryDataTy) * NewBucketSize);
312
313 // For each entry in source arrays...
314 for (uint32_t CurSrcEntryIdx = 0; CurSrcEntryIdx < CurBucket.Size;
315 CurSrcEntryIdx++) {
316 uint32_t CurSrcEntryHashBits = SrcHashes[CurSrcEntryIdx];
317
318 // Check for null entry.
319 if (CurSrcEntryHashBits == 0 && SrcEntries[CurSrcEntryIdx] == nullptr)
320 continue;
321
322 uint32_t StartDestIdx = getStartIdx(CurSrcEntryHashBits, NewBucketSize);
323
324 // Insert non-null entry into the new arrays.
325 while (true) {
326 uint32_t CurDestEntryHashBits = DestHashes[StartDestIdx];
327
328 if (CurDestEntryHashBits == 0 && DestEntries[StartDestIdx] == nullptr) {
329 // Found empty slot. Insert data.
330 DestHashes[StartDestIdx] = CurSrcEntryHashBits;
331 DestEntries[StartDestIdx] = SrcEntries[CurSrcEntryIdx];
332 break;
333 }
334
335 StartDestIdx++;
336 StartDestIdx = StartDestIdx & (NewBucketSize - 1);
337 }
338 }
339
340 // Update bucket fields.
341 CurBucket.Hashes = DestHashes;
342 CurBucket.Entries = DestEntries;
343 CurBucket.Size = NewBucketSize;
344
345 // Delete old bucket entries.
346 if (SrcHashes != nullptr)
347 delete[] SrcHashes;
348 if (SrcEntries != nullptr)
349 delete[] SrcEntries;
350 }
351
352 uint32_t getBucketIdx(hash_code Hash) { return Hash & HashMask; }
353
355 return (Hash & ExtHashMask) >> HashBitsNum;
356 }
357
358 uint32_t getStartIdx(uint32_t ExtHashBits, uint32_t BucketSize) {
359 assert((BucketSize > 0) && "Empty bucket");
360
361 return ExtHashBits & (BucketSize - 1);
362 }
363
364 // Number of bits in hash mask.
366
367 // Hash mask.
369
370 // Hash mask for the extended hash bits.
372
373 // The maximal bucket size.
375
376 // Initial size of bucket.
378
379 // The number of buckets.
381
382 // Array of buckets.
383 std::unique_ptr<Bucket[]> BucketsArray;
384
385 // Used for allocating KeyDataTy values.
387};
388
389} // end namespace llvm
390
391#endif // LLVM_ADT_CONCURRENTHASHTABLE_H
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
This file defines the BumpPtrAllocator interface.
Analysis containing CSE Info
Definition CSEInfo.cpp:27
This file defines the DenseMap class.
Basic Register Allocator
This file contains some templates that are useful if you are working with the STL at all.
This file defines the SmallVector class.
Value * RHS
Value * LHS
std::pair< KeyDataTy *, bool > insert(const KeyTy &NewValue)
Insert new value NewValue or return already existing entry.
uint32_t getStartIdx(uint32_t ExtHashBits, uint32_t BucketSize)
std::unique_ptr< Bucket[]> BucketsArray
uint32_t getBucketIdx(hash_code Hash)
void printStatistic(raw_ostream &OS)
Print information about current state of hash table structures.
void RehashBucket(Bucket &CurBucket)
uint32_t getExtHashBits(uint64_t Hash)
ConcurrentHashTableByPtr(AllocatorTy &Allocator, uint64_t EstimatedSize=100000, size_t ThreadsNum=parallel::strategy.compute_thread_count(), size_t InitialNumberOfBuckets=128)
ConcurrentHashTable - is a resizeable concurrent hashtable.
static KeyDataTy * create(const KeyTy &Key, AllocatorTy &Allocator)
static const KeyTy & getKey(const KeyDataTy &KeyData)
static bool isEqual(const KeyTy &LHS, const KeyTy &RHS)
static uint64_t getHashValue(const KeyTy &Key)
An opaque object representing a hash code.
Definition Hashing.h:76
This class implements an extremely fast bulk output stream that can only output to a stream.
Definition raw_ostream.h:53
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
LLVM_ABI ThreadPoolStrategy strategy
Definition Parallel.cpp:24
This is an optimization pass for GlobalISel generic memory operations.
LLVM_ABI uint64_t xxh3_64bits(ArrayRef< uint8_t > data)
Definition xxhash.cpp:553
uint64_t PowerOf2Ceil(uint64_t A)
Returns the power of two which is greater than or equal to the given value.
Definition MathExtras.h:385
int countr_zero(T Val)
Count number of 0's from the least significant bit to the most stopping at the first 1.
Definition bit.h:202
int countl_zero(T Val)
Count number of 0's from the most significant bit to the least stopping at the first 1.
Definition bit.h:236
LLVM_ABI void report_fatal_error(Error Err, bool gen_crash_diag=true)
Definition Error.cpp:167
FunctionAddr VTableAddr Count
Definition InstrProf.h:139
LLVM_ATTRIBUTE_VISIBILITY_DEFAULT AnalysisKey InnerAnalysisManagerProxy< AnalysisManagerT, IRUnitT, ExtraArgTs... >::Key