LLVM 22.0.0git
ConcurrentHashtable.h
Go to the documentation of this file.
1//===- ConcurrentHashtable.h ------------------------------------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
9#ifndef LLVM_ADT_CONCURRENTHASHTABLE_H
10#define LLVM_ADT_CONCURRENTHASHTABLE_H
11
12#include "llvm/ADT/DenseMap.h"
13#include "llvm/ADT/Hashing.h"
14#include "llvm/ADT/STLExtras.h"
16#include "llvm/Config/llvm-config.h" // for LLVM_ENABLE_THREADS
18#include "llvm/Support/Debug.h"
21#include "llvm/Support/xxhash.h"
22#include <atomic>
23#include <cstddef>
24#include <iomanip>
25#include <mutex>
26#include <sstream>
27
28namespace llvm {
29
30/// ConcurrentHashTable - is a resizeable concurrent hashtable.
31/// The number of resizings limited up to x2^31. This hashtable is
32/// useful to have efficient access to aggregate data(like strings,
33/// type descriptors...) and to keep only single copy of such
34/// an aggregate. The hashtable allows only concurrent insertions:
35///
36/// KeyDataTy* = insert ( const KeyTy& );
37///
38/// Data structure:
39///
40/// Inserted value KeyTy is mapped to 64-bit hash value ->
41///
42/// [------- 64-bit Hash value --------]
43/// [ StartEntryIndex ][ Bucket Index ]
44/// | |
45/// points to the points to
46/// first probe the bucket.
47/// position inside
48/// bucket entries
49///
50/// After initialization, all buckets have an initial size. During insertions,
51/// buckets might be extended to contain more entries. Each bucket can be
52/// independently resized and rehashed(no need to lock the whole table).
53/// Different buckets may have different sizes. If the single bucket is full
54/// then the bucket is resized.
55///
56/// BucketsArray keeps all buckets. Each bucket keeps an array of Entries
57/// (pointers to KeyDataTy) and another array of entries hashes:
58///
59/// BucketsArray[BucketIdx].Hashes[EntryIdx]:
60/// BucketsArray[BucketIdx].Entries[EntryIdx]:
61///
62/// [Bucket 0].Hashes -> [uint32_t][uint32_t]
63/// [Bucket 0].Entries -> [KeyDataTy*][KeyDataTy*]
64///
65/// [Bucket 1].Hashes -> [uint32_t][uint32_t][uint32_t][uint32_t]
66/// [Bucket 1].Entries -> [KeyDataTy*][KeyDataTy*][KeyDataTy*][KeyDataTy*]
67/// .........................
68/// [Bucket N].Hashes -> [uint32_t][uint32_t][uint32_t]
69/// [Bucket N].Entries -> [KeyDataTy*][KeyDataTy*][KeyDataTy*]
70///
71/// ConcurrentHashTableByPtr uses an external thread-safe allocator to allocate
72/// KeyDataTy items.
73
74template <typename KeyTy, typename KeyDataTy, typename AllocatorTy>
76public:
77 /// \returns Hash value for the specified \p Key.
78 static inline uint64_t getHashValue(const KeyTy &Key) {
79 return xxh3_64bits(Key);
80 }
81
82 /// \returns true if both \p LHS and \p RHS are equal.
83 static inline bool isEqual(const KeyTy &LHS, const KeyTy &RHS) {
84 return LHS == RHS;
85 }
86
87 /// \returns key for the specified \p KeyData.
88 static inline const KeyTy &getKey(const KeyDataTy &KeyData) {
89 return KeyData.getKey();
90 }
91
92 /// \returns newly created object of KeyDataTy type.
93 static inline KeyDataTy *create(const KeyTy &Key, AllocatorTy &Allocator) {
94 return KeyDataTy::create(Key, Allocator);
95 }
96};
97
98template <typename KeyTy, typename KeyDataTy, typename AllocatorTy,
99 typename Info =
100 ConcurrentHashTableInfoByPtr<KeyTy, KeyDataTy, AllocatorTy>>
102public:
104 AllocatorTy &Allocator, uint64_t EstimatedSize = 100000,
105 size_t ThreadsNum = parallel::strategy.compute_thread_count(),
106 size_t InitialNumberOfBuckets = 128)
108 assert((ThreadsNum > 0) && "ThreadsNum must be greater than 0");
109 assert((InitialNumberOfBuckets > 0) &&
110 "InitialNumberOfBuckets must be greater than 0");
111
112 // Calculate number of buckets.
113 uint64_t EstimatedNumberOfBuckets = ThreadsNum;
114 if (ThreadsNum > 1) {
115 EstimatedNumberOfBuckets *= InitialNumberOfBuckets;
116 EstimatedNumberOfBuckets *= std::max(
117 1,
118 countr_zero(PowerOf2Ceil(EstimatedSize / InitialNumberOfBuckets)) >>
119 2);
120 }
121 EstimatedNumberOfBuckets = PowerOf2Ceil(EstimatedNumberOfBuckets);
123 std::min(EstimatedNumberOfBuckets, (uint64_t)(1Ull << 31));
124
125 // Allocate buckets.
126 BucketsArray = std::make_unique<Bucket[]>(NumberOfBuckets);
127
128 InitialBucketSize = EstimatedSize / NumberOfBuckets;
131
132 // Initialize each bucket.
133 for (uint32_t Idx = 0; Idx < NumberOfBuckets; Idx++) {
135 memset(Hashes, 0, sizeof(ExtHashBitsTy) * InitialBucketSize);
136
138 memset(Entries, 0, sizeof(EntryDataTy) * InitialBucketSize);
139
141 BucketsArray[Idx].Hashes = Hashes;
142 BucketsArray[Idx].Entries = Entries;
143 }
144
145 // Calculate masks.
147
148 size_t LeadingZerosNumber = countl_zero(HashMask);
149 HashBitsNum = 64 - LeadingZerosNumber;
150
151 // We keep only high 32-bits of hash value. So bucket size cannot
152 // exceed 2^31. Bucket size is always power of two.
153 MaxBucketSize = 1Ull << (std::min((size_t)31, LeadingZerosNumber));
154
155 // Calculate mask for extended hash bits.
157 }
158
160 // Deallocate buckets.
161 for (uint32_t Idx = 0; Idx < NumberOfBuckets; Idx++) {
162 delete[] BucketsArray[Idx].Hashes;
163 delete[] BucketsArray[Idx].Entries;
164 }
165 }
166
167 /// Insert new value \p NewValue or return already existing entry.
168 ///
169 /// \returns entry and "true" if an entry is just inserted or
170 /// "false" if an entry already exists.
171 std::pair<KeyDataTy *, bool> insert(const KeyTy &NewValue) {
172 // Calculate bucket index.
173 uint64_t Hash = Info::getHashValue(NewValue);
174 Bucket &CurBucket = BucketsArray[getBucketIdx(Hash)];
175 uint32_t ExtHashBits = getExtHashBits(Hash);
176
177#if LLVM_ENABLE_THREADS
178 // Lock bucket.
179 std::scoped_lock<std::mutex> Lock(CurBucket.Guard);
180#endif
181
182 HashesPtr BucketHashes = CurBucket.Hashes;
183 DataPtr BucketEntries = CurBucket.Entries;
184 uint32_t CurEntryIdx = getStartIdx(ExtHashBits, CurBucket.Size);
185
186 while (true) {
187 uint32_t CurEntryHashBits = BucketHashes[CurEntryIdx];
188
189 if (CurEntryHashBits == 0 && BucketEntries[CurEntryIdx] == nullptr) {
190 // Found empty slot. Insert data.
191 KeyDataTy *NewData = Info::create(NewValue, MultiThreadAllocator);
192 BucketEntries[CurEntryIdx] = NewData;
193 BucketHashes[CurEntryIdx] = ExtHashBits;
194
195 CurBucket.NumberOfEntries++;
196 RehashBucket(CurBucket);
197 return {NewData, true};
198 }
199
200 if (CurEntryHashBits == ExtHashBits) {
201 // Hash matched. Check value for equality.
202 KeyDataTy *EntryData = BucketEntries[CurEntryIdx];
203 if (Info::isEqual(Info::getKey(*EntryData), NewValue)) {
204 // Already existed entry matched with inserted data is found.
205 return {EntryData, false};
206 }
207 }
208
209 CurEntryIdx++;
210 CurEntryIdx &= (CurBucket.Size - 1);
211 }
212
213 llvm_unreachable("Insertion error.");
214 return {};
215 }
216
217 /// Print information about current state of hash table structures.
219 OS << "\n--- HashTable statistic:\n";
220 OS << "\nNumber of buckets = " << NumberOfBuckets;
221 OS << "\nInitial bucket size = " << InitialBucketSize;
222
223 uint64_t NumberOfNonEmptyBuckets = 0;
224 uint64_t NumberOfEntriesPlusEmpty = 0;
225 uint64_t OverallNumberOfEntries = 0;
226 uint64_t OverallSize = sizeof(*this) + NumberOfBuckets * sizeof(Bucket);
227
228 DenseMap<uint32_t, uint32_t> BucketSizesMap;
229
230 // For each bucket...
231 for (uint32_t Idx = 0; Idx < NumberOfBuckets; Idx++) {
232 Bucket &CurBucket = BucketsArray[Idx];
233
234 BucketSizesMap[CurBucket.Size]++;
235
236 if (CurBucket.NumberOfEntries != 0)
237 NumberOfNonEmptyBuckets++;
238 NumberOfEntriesPlusEmpty += CurBucket.Size;
239 OverallNumberOfEntries += CurBucket.NumberOfEntries;
240 OverallSize +=
241 (sizeof(ExtHashBitsTy) + sizeof(EntryDataTy)) * CurBucket.Size;
242 }
243
244 OS << "\nOverall number of entries = " << OverallNumberOfEntries;
245 OS << "\nOverall number of non empty buckets = " << NumberOfNonEmptyBuckets;
246 for (auto [Size, Count] : BucketSizesMap)
247 OS << "\n Number of buckets with size " << Size << ": " << Count;
248
249 std::stringstream stream;
250 stream << std::fixed << std::setprecision(2)
251 << ((float)OverallNumberOfEntries / (float)NumberOfEntriesPlusEmpty);
252 std::string str = stream.str();
253
254 OS << "\nLoad factor = " << str;
255 OS << "\nOverall allocated size = " << OverallSize;
256 }
257
258protected:
260 using EntryDataTy = KeyDataTy *;
261
264
265 // Bucket structure. Keeps bucket data.
266 struct Bucket {
267 Bucket() = default;
268
269 // Size of bucket.
271
272 // Number of non-null entries.
274
275 // Hashes for [Size] entries.
276 HashesPtr Hashes = nullptr;
277
278 // [Size] entries.
279 DataPtr Entries = nullptr;
280
281#if LLVM_ENABLE_THREADS
282 // Mutex for this bucket.
283 std::mutex Guard;
284#endif
285 };
286
287 // Reallocate and rehash bucket if this is full enough.
288 void RehashBucket(Bucket &CurBucket) {
289 assert((CurBucket.Size > 0) && "Uninitialised bucket");
290 if (CurBucket.NumberOfEntries < CurBucket.Size * 0.9)
291 return;
292
293 if (CurBucket.Size >= MaxBucketSize)
294 report_fatal_error("ConcurrentHashTable is full");
295
296 uint32_t NewBucketSize = CurBucket.Size << 1;
297 assert((NewBucketSize <= MaxBucketSize) && "New bucket size is too big");
298 assert((CurBucket.Size < NewBucketSize) &&
299 "New bucket size less than size of current bucket");
300
301 // Store old entries & hashes arrays.
302 HashesPtr SrcHashes = CurBucket.Hashes;
303 DataPtr SrcEntries = CurBucket.Entries;
304
305 // Allocate new entries&hashes arrays.
306 HashesPtr DestHashes = new ExtHashBitsTy[NewBucketSize];
307 memset(DestHashes, 0, sizeof(ExtHashBitsTy) * NewBucketSize);
308
309 DataPtr DestEntries = new EntryDataTy[NewBucketSize];
310 memset(DestEntries, 0, sizeof(EntryDataTy) * NewBucketSize);
311
312 // For each entry in source arrays...
313 for (uint32_t CurSrcEntryIdx = 0; CurSrcEntryIdx < CurBucket.Size;
314 CurSrcEntryIdx++) {
315 uint32_t CurSrcEntryHashBits = SrcHashes[CurSrcEntryIdx];
316
317 // Check for null entry.
318 if (CurSrcEntryHashBits == 0 && SrcEntries[CurSrcEntryIdx] == nullptr)
319 continue;
320
321 uint32_t StartDestIdx = getStartIdx(CurSrcEntryHashBits, NewBucketSize);
322
323 // Insert non-null entry into the new arrays.
324 while (true) {
325 uint32_t CurDestEntryHashBits = DestHashes[StartDestIdx];
326
327 if (CurDestEntryHashBits == 0 && DestEntries[StartDestIdx] == nullptr) {
328 // Found empty slot. Insert data.
329 DestHashes[StartDestIdx] = CurSrcEntryHashBits;
330 DestEntries[StartDestIdx] = SrcEntries[CurSrcEntryIdx];
331 break;
332 }
333
334 StartDestIdx++;
335 StartDestIdx = StartDestIdx & (NewBucketSize - 1);
336 }
337 }
338
339 // Update bucket fields.
340 CurBucket.Hashes = DestHashes;
341 CurBucket.Entries = DestEntries;
342 CurBucket.Size = NewBucketSize;
343
344 // Delete old bucket entries.
345 delete[] SrcHashes;
346 delete[] SrcEntries;
347 }
348
349 uint32_t getBucketIdx(hash_code Hash) { return Hash & HashMask; }
350
352 return (Hash & ExtHashMask) >> HashBitsNum;
353 }
354
355 uint32_t getStartIdx(uint32_t ExtHashBits, uint32_t BucketSize) {
356 assert((BucketSize > 0) && "Empty bucket");
357
358 return ExtHashBits & (BucketSize - 1);
359 }
360
361 // Number of bits in hash mask.
363
364 // Hash mask.
366
367 // Hash mask for the extended hash bits.
369
370 // The maximal bucket size.
372
373 // Initial size of bucket.
375
376 // The number of buckets.
378
379 // Array of buckets.
380 std::unique_ptr<Bucket[]> BucketsArray;
381
382 // Used for allocating KeyDataTy values.
384};
385
386} // end namespace llvm
387
388#endif // LLVM_ADT_CONCURRENTHASHTABLE_H
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
This file defines the BumpPtrAllocator interface.
Analysis containing CSE Info
Definition CSEInfo.cpp:27
This file defines the DenseMap class.
Basic Register Allocator
This file contains some templates that are useful if you are working with the STL at all.
This file defines the SmallVector class.
Value * RHS
Value * LHS
std::pair< KeyDataTy *, bool > insert(const KeyTy &NewValue)
Insert new value NewValue or return already existing entry.
uint32_t getStartIdx(uint32_t ExtHashBits, uint32_t BucketSize)
std::unique_ptr< Bucket[]> BucketsArray
uint32_t getBucketIdx(hash_code Hash)
void printStatistic(raw_ostream &OS)
Print information about current state of hash table structures.
void RehashBucket(Bucket &CurBucket)
uint32_t getExtHashBits(uint64_t Hash)
ConcurrentHashTableByPtr(AllocatorTy &Allocator, uint64_t EstimatedSize=100000, size_t ThreadsNum=parallel::strategy.compute_thread_count(), size_t InitialNumberOfBuckets=128)
ConcurrentHashTable - is a resizeable concurrent hashtable.
static KeyDataTy * create(const KeyTy &Key, AllocatorTy &Allocator)
static const KeyTy & getKey(const KeyDataTy &KeyData)
static bool isEqual(const KeyTy &LHS, const KeyTy &RHS)
static uint64_t getHashValue(const KeyTy &Key)
An opaque object representing a hash code.
Definition Hashing.h:76
This class implements an extremely fast bulk output stream that can only output to a stream.
Definition raw_ostream.h:53
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
LLVM_ABI ThreadPoolStrategy strategy
Definition Parallel.cpp:24
This is an optimization pass for GlobalISel generic memory operations.
LLVM_ABI uint64_t xxh3_64bits(ArrayRef< uint8_t > data)
Definition xxhash.cpp:553
uint64_t PowerOf2Ceil(uint64_t A)
Returns the power of two which is greater than or equal to the given value.
Definition MathExtras.h:385
int countr_zero(T Val)
Count number of 0's from the least significant bit to the most stopping at the first 1.
Definition bit.h:202
int countl_zero(T Val)
Count number of 0's from the most significant bit to the least stopping at the first 1.
Definition bit.h:236
LLVM_ABI void report_fatal_error(Error Err, bool gen_crash_diag=true)
Definition Error.cpp:167
FunctionAddr VTableAddr Count
Definition InstrProf.h:139
LLVM_ATTRIBUTE_VISIBILITY_DEFAULT AnalysisKey InnerAnalysisManagerProxy< AnalysisManagerT, IRUnitT, ExtraArgTs... >::Key