LLVM 20.0.0git
HashTable.h
Go to the documentation of this file.
1//===- HashTable.h - PDB Hash Table -----------------------------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
9#ifndef LLVM_DEBUGINFO_PDB_NATIVE_HASHTABLE_H
10#define LLVM_DEBUGINFO_PDB_NATIVE_HASHTABLE_H
11
13#include "llvm/ADT/iterator.h"
17#include "llvm/Support/Endian.h"
18#include "llvm/Support/Error.h"
19#include <cstdint>
20#include <iterator>
21#include <utility>
22#include <vector>
23
24namespace llvm {
25
26namespace pdb {
27
28Error readSparseBitVector(BinaryStreamReader &Stream, SparseBitVector<> &V);
29Error writeSparseBitVector(BinaryStreamWriter &Writer, SparseBitVector<> &Vec);
30
31template <typename ValueT> class HashTable;
32
33template <typename ValueT>
35 : public iterator_facade_base<HashTableIterator<ValueT>,
36 std::forward_iterator_tag,
37 const std::pair<uint32_t, ValueT>> {
38 using BaseT = typename HashTableIterator::iterator_facade_base;
39 friend HashTable<ValueT>;
40
42 bool IsEnd)
43 : Map(&Map), Index(Index), IsEnd(IsEnd) {}
44
45public:
46 HashTableIterator(const HashTable<ValueT> &Map) : Map(&Map) {
47 int I = Map.Present.find_first();
48 if (I == -1) {
49 Index = 0;
50 IsEnd = true;
51 } else {
52 Index = static_cast<uint32_t>(I);
53 IsEnd = false;
54 }
55 }
56
59 Map = R.Map;
60 return *this;
61 }
62 bool operator==(const HashTableIterator &R) const {
63 if (IsEnd && R.IsEnd)
64 return true;
65 if (IsEnd != R.IsEnd)
66 return false;
67
68 return (Map == R.Map) && (Index == R.Index);
69 }
70 const std::pair<uint32_t, ValueT> &operator*() const {
71 assert(Map->Present.test(Index));
72 return Map->Buckets[Index];
73 }
74
75 // Implement postfix op++ in terms of prefix op++ by using the superclass
76 // implementation.
77 using BaseT::operator++;
79 while (Index < Map->Buckets.size()) {
80 ++Index;
81 if (Map->Present.test(Index))
82 return *this;
83 }
84
85 IsEnd = true;
86 return *this;
87 }
88
89private:
90 bool isEnd() const { return IsEnd; }
91 uint32_t index() const { return Index; }
92
93 const HashTable<ValueT> *Map;
94 uint32_t Index;
95 bool IsEnd;
96};
97
98template <typename ValueT>
99class HashTable {
100 struct Header {
102 support::ulittle32_t Capacity;
103 };
104
105 using BucketList = std::vector<std::pair<uint32_t, ValueT>>;
106
107public:
110
111 HashTable() { Buckets.resize(8); }
112 explicit HashTable(uint32_t Capacity) {
113 Buckets.resize(Capacity);
114 }
115
117 const Header *H;
118 if (auto EC = Stream.readObject(H))
119 return EC;
120 if (H->Capacity == 0)
121 return make_error<RawError>(raw_error_code::corrupt_file,
122 "Invalid Hash Table Capacity");
123 if (H->Size > maxLoad(H->Capacity))
124 return make_error<RawError>(raw_error_code::corrupt_file,
125 "Invalid Hash Table Size");
126
127 Buckets.resize(H->Capacity);
128
129 if (auto EC = readSparseBitVector(Stream, Present))
130 return EC;
131 if (Present.count() != H->Size)
132 return make_error<RawError>(raw_error_code::corrupt_file,
133 "Present bit vector does not match size!");
134
135 if (auto EC = readSparseBitVector(Stream, Deleted))
136 return EC;
138 return make_error<RawError>(raw_error_code::corrupt_file,
139 "Present bit vector intersects deleted!");
140
141 for (uint32_t P : Present) {
142 if (auto EC = Stream.readInteger(Buckets[P].first))
143 return EC;
144 const ValueT *Value;
145 if (auto EC = Stream.readObject(Value))
146 return EC;
147 Buckets[P].second = *Value;
148 }
149
150 return Error::success();
151 }
152
154 uint32_t Size = sizeof(Header);
155
156 constexpr int BitsPerWord = 8 * sizeof(uint32_t);
157
158 int NumBitsP = Present.find_last() + 1;
159 int NumBitsD = Deleted.find_last() + 1;
160
161 uint32_t NumWordsP = alignTo(NumBitsP, BitsPerWord) / BitsPerWord;
162 uint32_t NumWordsD = alignTo(NumBitsD, BitsPerWord) / BitsPerWord;
163
164 // Present bit set number of words (4 bytes), followed by that many actual
165 // words (4 bytes each).
166 Size += sizeof(uint32_t);
167 Size += NumWordsP * sizeof(uint32_t);
168
169 // Deleted bit set number of words (4 bytes), followed by that many actual
170 // words (4 bytes each).
171 Size += sizeof(uint32_t);
172 Size += NumWordsD * sizeof(uint32_t);
173
174 // One (Key, ValueT) pair for each entry Present.
175 Size += (sizeof(uint32_t) + sizeof(ValueT)) * size();
176
177 return Size;
178 }
179
181 Header H;
182 H.Size = size();
183 H.Capacity = capacity();
184 if (auto EC = Writer.writeObject(H))
185 return EC;
186
187 if (auto EC = writeSparseBitVector(Writer, Present))
188 return EC;
189
190 if (auto EC = writeSparseBitVector(Writer, Deleted))
191 return EC;
192
193 for (const auto &Entry : *this) {
194 if (auto EC = Writer.writeInteger(Entry.first))
195 return EC;
196 if (auto EC = Writer.writeObject(Entry.second))
197 return EC;
198 }
199 return Error::success();
200 }
201
202 void clear() {
203 Buckets.resize(8);
204 Present.clear();
205 Deleted.clear();
206 }
207
208 bool empty() const { return size() == 0; }
209 uint32_t capacity() const { return Buckets.size(); }
210 uint32_t size() const { return Present.count(); }
211
212 const_iterator begin() const { return const_iterator(*this); }
213 const_iterator end() const { return const_iterator(*this, 0, true); }
214
215 /// Find the entry whose key has the specified hash value, using the specified
216 /// traits defining hash function and equality.
217 template <typename Key, typename TraitsT>
218 const_iterator find_as(const Key &K, TraitsT &Traits) const {
219 uint32_t H = Traits.hashLookupKey(K) % capacity();
220 uint32_t I = H;
221 std::optional<uint32_t> FirstUnused;
222 do {
223 if (isPresent(I)) {
224 if (Traits.storageKeyToLookupKey(Buckets[I].first) == K)
225 return const_iterator(*this, I, false);
226 } else {
227 if (!FirstUnused)
228 FirstUnused = I;
229 // Insertion occurs via linear probing from the slot hint, and will be
230 // inserted at the first empty / deleted location. Therefore, if we are
231 // probing and find a location that is neither present nor deleted, then
232 // nothing must have EVER been inserted at this location, and thus it is
233 // not possible for a matching value to occur later.
234 if (!isDeleted(I))
235 break;
236 }
237 I = (I + 1) % capacity();
238 } while (I != H);
239
240 // The only way FirstUnused would not be set is if every single entry in the
241 // table were Present. But this would violate the load factor constraints
242 // that we impose, so it should never happen.
243 assert(FirstUnused);
244 return const_iterator(*this, *FirstUnused, true);
245 }
246
247 /// Set the entry using a key type that the specified Traits can convert
248 /// from a real key to an internal key.
249 template <typename Key, typename TraitsT>
250 bool set_as(const Key &K, ValueT V, TraitsT &Traits) {
251 return set_as_internal(K, std::move(V), Traits, std::nullopt);
252 }
253
254 template <typename Key, typename TraitsT>
255 ValueT get(const Key &K, TraitsT &Traits) const {
256 auto Iter = find_as(K, Traits);
257 assert(Iter != end());
258 return (*Iter).second;
259 }
260
261protected:
262 bool isPresent(uint32_t K) const { return Present.test(K); }
263 bool isDeleted(uint32_t K) const { return Deleted.test(K); }
264
265 BucketList Buckets;
268
269private:
270 /// Set the entry using a key type that the specified Traits can convert
271 /// from a real key to an internal key.
272 template <typename Key, typename TraitsT>
273 bool set_as_internal(const Key &K, ValueT V, TraitsT &Traits,
274 std::optional<uint32_t> InternalKey) {
275 auto Entry = find_as(K, Traits);
276 if (Entry != end()) {
277 assert(isPresent(Entry.index()));
278 assert(Traits.storageKeyToLookupKey(Buckets[Entry.index()].first) == K);
279 // We're updating, no need to do anything special.
280 Buckets[Entry.index()].second = V;
281 return false;
282 }
283
284 auto &B = Buckets[Entry.index()];
285 assert(!isPresent(Entry.index()));
286 assert(Entry.isEnd());
287 B.first = InternalKey ? *InternalKey : Traits.lookupKeyToStorageKey(K);
288 B.second = V;
289 Present.set(Entry.index());
290 Deleted.reset(Entry.index());
291
292 grow(Traits);
293
294 assert((find_as(K, Traits)) != end());
295 return true;
296 }
297
298 static uint32_t maxLoad(uint32_t capacity) { return capacity * 2 / 3 + 1; }
299
300 template <typename TraitsT>
301 void grow(TraitsT &Traits) {
302 uint32_t S = size();
303 uint32_t MaxLoad = maxLoad(capacity());
304 if (S < maxLoad(capacity()))
305 return;
306 assert(capacity() != UINT32_MAX && "Can't grow Hash table!");
307
308 uint32_t NewCapacity = (capacity() <= INT32_MAX) ? MaxLoad * 2 : UINT32_MAX;
309
310 // Growing requires rebuilding the table and re-hashing every item. Make a
311 // copy with a larger capacity, insert everything into the copy, then swap
312 // it in.
313 HashTable NewMap(NewCapacity);
314 for (auto I : Present) {
315 auto LookupKey = Traits.storageKeyToLookupKey(Buckets[I].first);
316 NewMap.set_as_internal(LookupKey, Buckets[I].second, Traits,
317 Buckets[I].first);
318 }
319
320 Buckets.swap(NewMap.Buckets);
321 std::swap(Present, NewMap.Present);
322 std::swap(Deleted, NewMap.Deleted);
323 assert(capacity() == NewCapacity);
324 assert(size() == S);
325 }
326};
327
328} // end namespace pdb
329
330} // end namespace llvm
331
332#endif // LLVM_DEBUGINFO_PDB_NATIVE_HASHTABLE_H
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
uint64_t Size
#define I(x, y, z)
Definition: MD5.cpp:58
#define H(x, y, z)
Definition: MD5.cpp:57
#define P(N)
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
This file defines the SparseBitVector class.
Provides read only access to a subclass of BinaryStream.
Error readObject(const T *&Dest)
Get a pointer to an object of type T from the underlying stream, as if by memcpy, and store the resul...
Error readInteger(T &Dest)
Read an integer of the specified endianness into Dest and update the stream's offset.
Provides write only access to a subclass of WritableBinaryStream.
Error writeInteger(T Value)
Write the integer Value to the underlying stream in the specified endianness.
Error writeObject(const T &Obj)
Writes the object Obj to the underlying stream, as if by using memcpy.
Lightweight error class with error context and mandatory checking.
Definition: Error.h:160
static ErrorSuccess success()
Create a success value.
Definition: Error.h:337
void set(unsigned Idx)
unsigned count() const
bool test(unsigned Idx) const
void reset(unsigned Idx)
bool intersects(const SparseBitVector< ElementSize > *RHS) const
LLVM Value Representation.
Definition: Value.h:74
CRTP base class which implements the entire standard iterator facade in terms of a minimal subset of ...
Definition: iterator.h:80
HashTableIterator & operator++()
Definition: HashTable.h:78
bool operator==(const HashTableIterator &R) const
Definition: HashTable.h:62
HashTableIterator(const HashTable< ValueT > &Map)
Definition: HashTable.h:46
const std::pair< uint32_t, ValueT > & operator*() const
Definition: HashTable.h:70
HashTableIterator & operator=(const HashTableIterator &R)
Definition: HashTable.h:58
HashTableIterator(const HashTableIterator &R)=default
Error load(BinaryStreamReader &Stream)
Definition: HashTable.h:116
bool isDeleted(uint32_t K) const
Definition: HashTable.h:263
bool set_as(const Key &K, ValueT V, TraitsT &Traits)
Set the entry using a key type that the specified Traits can convert from a real key to an internal k...
Definition: HashTable.h:250
uint32_t size() const
Definition: HashTable.h:210
Error commit(BinaryStreamWriter &Writer) const
Definition: HashTable.h:180
HashTableIterator< ValueT > const_iterator
Definition: HashTable.h:108
BucketList Buckets
Definition: HashTable.h:265
const_iterator begin() const
Definition: HashTable.h:212
ValueT get(const Key &K, TraitsT &Traits) const
Definition: HashTable.h:255
uint32_t capacity() const
Definition: HashTable.h:209
bool empty() const
Definition: HashTable.h:208
bool isPresent(uint32_t K) const
Definition: HashTable.h:262
SparseBitVector Present
Definition: HashTable.h:266
const_iterator find_as(const Key &K, TraitsT &Traits) const
Find the entry whose key has the specified hash value, using the specified traits defining hash funct...
Definition: HashTable.h:218
const_iterator end() const
Definition: HashTable.h:213
uint32_t calculateSerializedLength() const
Definition: HashTable.h:153
HashTable(uint32_t Capacity)
Definition: HashTable.h:112
SparseBitVector Deleted
Definition: HashTable.h:267
Error readSparseBitVector(BinaryStreamReader &Stream, SparseBitVector<> &V)
Definition: HashTable.cpp:21
Error writeSparseBitVector(BinaryStreamWriter &Writer, SparseBitVector<> &Vec)
Definition: HashTable.cpp:43
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
uint64_t alignTo(uint64_t Size, Align A)
Returns a multiple of A needed to store Size bytes.
Definition: Alignment.h:155
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:860