Line data Source code
1 : //===- HashTable.h - PDB Hash Table -----------------------------*- C++ -*-===//
2 : //
3 : // The LLVM Compiler Infrastructure
4 : //
5 : // This file is distributed under the University of Illinois Open Source
6 : // License. See LICENSE.TXT for details.
7 : //
8 : //===----------------------------------------------------------------------===//
9 :
10 : #ifndef LLVM_DEBUGINFO_PDB_NATIVE_HASHTABLE_H
11 : #define LLVM_DEBUGINFO_PDB_NATIVE_HASHTABLE_H
12 :
13 : #include "llvm/ADT/SparseBitVector.h"
14 : #include "llvm/ADT/iterator.h"
15 : #include "llvm/DebugInfo/PDB/Native/RawError.h"
16 : #include "llvm/Support/BinaryStreamReader.h"
17 : #include "llvm/Support/BinaryStreamWriter.h"
18 : #include "llvm/Support/Endian.h"
19 : #include "llvm/Support/Error.h"
20 : #include <cstdint>
21 : #include <iterator>
22 : #include <utility>
23 : #include <vector>
24 :
25 : namespace llvm {
26 :
27 : class BinaryStreamReader;
28 : class BinaryStreamWriter;
29 :
30 : namespace pdb {
31 :
32 : Error readSparseBitVector(BinaryStreamReader &Stream, SparseBitVector<> &V);
33 : Error writeSparseBitVector(BinaryStreamWriter &Writer, SparseBitVector<> &Vec);
34 :
35 : template <typename ValueT, typename TraitsT> class HashTable;
36 :
37 : template <typename ValueT, typename TraitsT>
38 : class HashTableIterator
39 : : public iterator_facade_base<HashTableIterator<ValueT, TraitsT>,
40 : std::forward_iterator_tag,
41 : std::pair<uint32_t, ValueT>> {
42 : friend HashTable<ValueT, TraitsT>;
43 :
44 : HashTableIterator(const HashTable<ValueT, TraitsT> &Map, uint32_t Index,
45 : bool IsEnd)
46 : : Map(&Map), Index(Index), IsEnd(IsEnd) {}
47 :
48 : public:
49 126 : HashTableIterator(const HashTable<ValueT, TraitsT> &Map) : Map(&Map) {
50 : int I = Map.Present.find_first();
51 122 : if (I == -1) {
52 4 : Index = 0;
53 4 : IsEnd = true;
54 : } else {
55 122 : Index = static_cast<uint32_t>(I);
56 122 : IsEnd = false;
57 : }
58 126 : }
59 1 :
60 : HashTableIterator &operator=(const HashTableIterator &R) {
61 1 : Map = R.Map;
62 0 : return *this;
63 0 : }
64 : bool operator==(const HashTableIterator &R) const {
65 20805 : if (IsEnd && R.IsEnd)
66 1 : return true;
67 : if (IsEnd != R.IsEnd)
68 1 : return false;
69 1 :
70 : return (Map == R.Map) && (Index == R.Index);
71 1 : }
72 0 : const std::pair<uint32_t, ValueT> &operator*() const {
73 0 : assert(Map->Present.test(Index));
74 20458 : return Map->Buckets[Index];
75 1 : }
76 250 : HashTableIterator &operator++() {
77 996 : while (Index < Map->Buckets.size()) {
78 379 : ++Index;
79 378 : if (Map->Present.test(Index))
80 : return *this;
81 : }
82 :
83 120 : IsEnd = true;
84 120 : return *this;
85 34 : }
86 :
87 16 : private:
88 : bool isEnd() const { return IsEnd; }
89 0 : uint32_t index() const { return Index; }
90 0 :
91 : const HashTable<ValueT, TraitsT> *Map;
92 0 : uint32_t Index;
93 : bool IsEnd;
94 32 : };
95 :
96 0 : template <typename T> struct PdbHashTraits {};
97 :
98 0 : template <> struct PdbHashTraits<uint32_t> {
99 : uint32_t hashLookupKey(uint32_t N) const { return N; }
100 0 : uint32_t storageKeyToLookupKey(uint32_t N) const { return N; }
101 : uint32_t lookupKeyToStorageKey(uint32_t N) { return N; }
102 0 : };
103 :
104 16 : template <typename ValueT, typename TraitsT = PdbHashTraits<ValueT>>
105 44 : class HashTable {
106 20 : using iterator = HashTableIterator<ValueT, TraitsT>;
107 20 : friend iterator;
108 :
109 : struct Header {
110 : support::ulittle32_t Size;
111 2 : support::ulittle32_t Capacity;
112 2 : };
113 :
114 8 : using BucketList = std::vector<std::pair<uint32_t, ValueT>>;
115 18 :
116 8 : public:
117 342 : HashTable() { Buckets.resize(8); }
118 :
119 : explicit HashTable(TraitsT Traits) : HashTable(8, std::move(Traits)) {}
120 17912 : HashTable(uint32_t Capacity, TraitsT Traits) : Traits(Traits) {
121 17913 : Buckets.resize(Capacity);
122 1 : }
123 :
124 94 : Error load(BinaryStreamReader &Stream) {
125 26 : const Header *H;
126 184 : if (auto EC = Stream.readObject(H))
127 12 : return EC;
128 172 : if (H->Capacity == 0)
129 : return make_error<RawError>(raw_error_code::corrupt_file,
130 : "Invalid Hash Table Capacity");
131 87 : if (H->Size > maxLoad(H->Capacity))
132 1 : return make_error<RawError>(raw_error_code::corrupt_file,
133 : "Invalid Hash Table Size");
134 :
135 86 : Buckets.resize(H->Capacity);
136 :
137 172 : if (auto EC = readSparseBitVector(Stream, Present))
138 : return EC;
139 172 : if (Present.count() != H->Size)
140 : return make_error<RawError>(raw_error_code::corrupt_file,
141 : "Present bit vector does not match size!");
142 :
143 172 : if (auto EC = readSparseBitVector(Stream, Deleted))
144 : return EC;
145 86 : if (Present.intersects(Deleted))
146 : return make_error<RawError>(raw_error_code::corrupt_file,
147 0 : "Present bit vector interesects deleted!");
148 0 :
149 202 : for (uint32_t P : Present) {
150 606 : if (auto EC = Stream.readInteger(Buckets[P].first))
151 : return EC;
152 : const ValueT *Value;
153 404 : if (auto EC = Stream.readObject(Value))
154 : return EC;
155 404 : Buckets[P].second = *Value;
156 : }
157 :
158 : return Error::success();
159 : }
160 :
161 222 : uint32_t calculateSerializedLength() const {
162 : uint32_t Size = sizeof(Header);
163 :
164 : constexpr int BitsPerWord = 8 * sizeof(uint32_t);
165 16 :
166 222 : int NumBitsP = Present.find_last() + 1;
167 222 : int NumBitsD = Deleted.find_last() + 1;
168 3 :
169 225 : uint32_t NumWordsP = alignTo(NumBitsP, BitsPerWord) / BitsPerWord;
170 223 : uint32_t NumWordsD = alignTo(NumBitsD, BitsPerWord) / BitsPerWord;
171 :
172 2 : // Present bit set number of words (4 bytes), followed by that many actual
173 : // words (4 bytes each).
174 4 : Size += sizeof(uint32_t);
175 222 : Size += NumWordsP * sizeof(uint32_t);
176 4 :
177 : // Deleted bit set number of words (4 bytes), followed by that many actual
178 : // words (4 bytes each).
179 224 : Size += sizeof(uint32_t);
180 222 : Size += NumWordsD * sizeof(uint32_t);
181 :
182 : // One (Key, ValueT) pair for each entry Present.
183 224 : Size += (sizeof(uint32_t) + sizeof(ValueT)) * size();
184 :
185 226 : return Size;
186 : }
187 4 :
188 111 : Error commit(BinaryStreamWriter &Writer) const {
189 : Header H;
190 : H.Size = size();
191 4 : H.Capacity = capacity();
192 111 : if (auto EC = Writer.writeObject(H))
193 2 : return EC;
194 :
195 222 : if (auto EC = writeSparseBitVector(Writer, Present))
196 : return EC;
197 16 :
198 270 : if (auto EC = writeSparseBitVector(Writer, Deleted))
199 : return EC;
200 :
201 476 : for (const auto &Entry : *this) {
202 444 : if (auto EC = Writer.writeInteger(Entry.first))
203 32 : return EC;
204 444 : if (auto EC = Writer.writeObject(Entry.second))
205 : return EC;
206 : }
207 : return Error::success();
208 1 : }
209 :
210 2 : void clear() {
211 : Buckets.resize(8);
212 2 : Present.clear();
213 : Deleted.clear();
214 : }
215 1 :
216 : bool empty() const { return size() == 0; }
217 73202 : uint32_t capacity() const { return Buckets.size(); }
218 : uint32_t size() const { return Present.count(); }
219 1 :
220 124 : iterator begin() const { return iterator(*this); }
221 2 : iterator end() const { return iterator(*this, 0, true); }
222 :
223 2 : /// Find the entry whose key has the specified hash value, using the specified
224 : /// traits defining hash function and equality.
225 93411 : template <typename Key> iterator find_as(const Key &K) const {
226 93411 : uint32_t H = Traits.hashLookupKey(K) % capacity();
227 2 : uint32_t I = H;
228 : Optional<uint32_t> FirstUnused;
229 1 : do {
230 110911 : if (isPresent(I)) {
231 75862 : if (Traits.storageKeyToLookupKey(Buckets[I].first) == K)
232 20431 : return iterator(*this, I, false);
233 8 : } else {
234 73004 : if (!FirstUnused)
235 : FirstUnused = I;
236 : // Insertion occurs via linear probing from the slot hint, and will be
237 16 : // inserted at the first empty / deleted location. Therefore, if we are
238 : // probing and find a location that is neither present nor deleted, then
239 16 : // nothing must have EVER been inserted at this location, and thus it is
240 : // not possible for a matching value to occur later.
241 72980 : if (!isDeleted(I))
242 : break;
243 : }
244 17501 : I = (I + 1) % capacity();
245 17500 : } while (I != H);
246 2 :
247 : // The only way FirstUnused would not be set is if every single entry in the
248 2 : // table were Present. But this would violate the load factor constraints
249 : // that we impose, so it should never happen.
250 : assert(FirstUnused);
251 72981 : return iterator(*this, *FirstUnused, true);
252 : }
253 :
254 : /// Set the entry using a key type that the specified Traits can convert
255 1 : /// from a real key to an internal key.
256 : template <typename Key> bool set_as(const Key &K, ValueT V) {
257 20503 : return set_as_internal(K, std::move(V), None);
258 : }
259 2 :
260 : template <typename Key> ValueT get(const Key &K) const {
261 : auto Iter = find_as(K);
262 : assert(Iter != end());
263 2 : return (*Iter).second;
264 : }
265 1 :
266 : protected:
267 110911 : bool isPresent(uint32_t K) const { return Present.test(K); }
268 72980 : bool isDeleted(uint32_t K) const { return Deleted.test(K); }
269 8 :
270 24 : TraitsT Traits;
271 : BucketList Buckets;
272 : mutable SparseBitVector<> Present;
273 16 : mutable SparseBitVector<> Deleted;
274 :
275 16 : private:
276 : /// Set the entry using a key type that the specified Traits can convert
277 : /// from a real key to an internal key.
278 : template <typename Key>
279 72980 : bool set_as_internal(const Key &K, ValueT V, Optional<uint32_t> InternalKey) {
280 72980 : auto Entry = find_as(K);
281 2 : if (Entry != end()) {
282 : assert(isPresent(Entry.index()));
283 : assert(Traits.storageKeyToLookupKey(Buckets[Entry.index()].first) == K);
284 : // We're updating, no need to do anything special.
285 0 : Buckets[Entry.index()].second = V;
286 2 : return false;
287 2 : }
288 :
289 72982 : auto &B = Buckets[Entry.index()];
290 2 : assert(!isPresent(Entry.index()));
291 : assert(Entry.isEnd());
292 72980 : B.first = InternalKey ? *InternalKey : Traits.lookupKeyToStorageKey(K);
293 72980 : B.second = V;
294 72980 : Present.set(Entry.index());
295 72982 : Deleted.reset(Entry.index());
296 :
297 72980 : grow();
298 :
299 2 : assert((find_as(K)) != end());
300 72982 : return true;
301 : }
302 :
303 88 : static uint32_t maxLoad(uint32_t capacity) { return capacity * 2 / 3 + 1; }
304 :
305 72982 : void grow() {
306 : uint32_t S = size();
307 1 : uint32_t MaxLoad = maxLoad(capacity());
308 72980 : if (S < maxLoad(capacity()))
309 58273 : return;
310 : assert(capacity() != UINT32_MAX && "Can't grow Hash table!");
311 :
312 14708 : uint32_t NewCapacity = (capacity() <= INT32_MAX) ? MaxLoad * 2 : UINT32_MAX;
313 1 :
314 : // Growing requires rebuilding the table and re-hashing every item. Make a
315 1 : // copy with a larger capacity, insert everything into the copy, then swap
316 1 : // it in.
317 14707 : HashTable NewMap(NewCapacity, Traits);
318 14707 : for (auto I : Present) {
319 104958 : auto LookupKey = Traits.storageKeyToLookupKey(Buckets[I].first);
320 104958 : NewMap.set_as_internal(LookupKey, Buckets[I].second, Buckets[I].first);
321 1 : }
322 :
323 : Buckets.swap(NewMap.Buckets);
324 14707 : std::swap(Present, NewMap.Present);
325 14708 : std::swap(Deleted, NewMap.Deleted);
326 1 : assert(capacity() == NewCapacity);
327 : assert(size() == S);
328 : }
329 1 : };
330 :
331 1 : } // end namespace pdb
332 :
333 1 : } // end namespace llvm
334 :
335 : #endif // LLVM_DEBUGINFO_PDB_NATIVE_HASHTABLE_H
|