Line data Source code
1 : //===- HashTable.cpp - PDB Hash Table ---------------------------*- C++ -*-===//
2 : //
3 : // The LLVM Compiler Infrastructure
4 : //
5 : // This file is distributed under the University of Illinois Open Source
6 : // License. See LICENSE.TXT for details.
7 : //
8 : //===----------------------------------------------------------------------===//
9 :
10 : #include "llvm/DebugInfo/PDB/Raw/HashTable.h"
11 :
12 : #include "llvm/ADT/Optional.h"
13 : #include "llvm/ADT/SparseBitVector.h"
14 : #include "llvm/DebugInfo/PDB/Raw/RawError.h"
15 :
16 : #include <assert.h>
17 :
18 : using namespace llvm;
19 : using namespace llvm::pdb;
20 :
21 41 : HashTable::HashTable() : HashTable(8) {}
22 :
23 172 : HashTable::HashTable(uint32_t Capacity) { Buckets.resize(Capacity); }
24 :
25 16 : Error HashTable::load(msf::StreamReader &Stream) {
26 : const Header *H;
27 48 : if (auto EC = Stream.readObject(H))
28 0 : return EC;
29 32 : if (H->Capacity == 0)
30 : return make_error<RawError>(raw_error_code::corrupt_file,
31 0 : "Invalid Hash Table Capacity");
32 48 : if (H->Size > maxLoad(H->Capacity))
33 : return make_error<RawError>(raw_error_code::corrupt_file,
34 0 : "Invalid Hash Table Size");
35 :
36 32 : Buckets.resize(H->Capacity);
37 :
38 48 : if (auto EC = readSparseBitVector(Stream, Present))
39 0 : return EC;
40 48 : if (Present.count() != H->Size)
41 : return make_error<RawError>(raw_error_code::corrupt_file,
42 0 : "Present bit vector does not match size!");
43 :
44 48 : if (auto EC = readSparseBitVector(Stream, Deleted))
45 0 : return EC;
46 16 : if (Present.intersects(Deleted))
47 : return make_error<RawError>(raw_error_code::corrupt_file,
48 0 : "Present bit vector interesects deleted!");
49 :
50 101 : for (uint32_t P : Present) {
51 212 : if (auto EC = Stream.readInteger(Buckets[P].first))
52 0 : return EC;
53 212 : if (auto EC = Stream.readInteger(Buckets[P].second))
54 0 : return EC;
55 : }
56 :
57 48 : return Error::success();
58 : }
59 :
60 5 : uint32_t HashTable::calculateSerializedLength() const {
61 5 : uint32_t Size = sizeof(Header);
62 :
63 5 : int NumBitsP = Present.find_last() + 1;
64 5 : int NumBitsD = Deleted.find_last() + 1;
65 :
66 : // Present bit set number of words, followed by that many actual words.
67 5 : Size += sizeof(uint32_t);
68 10 : Size += alignTo(NumBitsP, sizeof(uint32_t));
69 :
70 : // Deleted bit set number of words, followed by that many actual words.
71 5 : Size += sizeof(uint32_t);
72 10 : Size += alignTo(NumBitsD, sizeof(uint32_t));
73 :
74 : // One (Key, Value) pair for each entry Present.
75 5 : Size += 2 * sizeof(uint32_t) * size();
76 :
77 5 : return Size;
78 : }
79 :
80 5 : Error HashTable::commit(msf::StreamWriter &Writer) const {
81 5 : Header H;
82 10 : H.Size = size();
83 10 : H.Capacity = capacity();
84 15 : if (auto EC = Writer.writeObject(H))
85 0 : return EC;
86 :
87 15 : if (auto EC = writeSparseBitVector(Writer, Present))
88 0 : return EC;
89 :
90 15 : if (auto EC = writeSparseBitVector(Writer, Deleted))
91 0 : return EC;
92 :
93 50 : for (const auto &Entry : *this) {
94 60 : if (auto EC = Writer.writeInteger(Entry.first))
95 0 : return EC;
96 60 : if (auto EC = Writer.writeInteger(Entry.second))
97 0 : return EC;
98 : }
99 15 : return Error::success();
100 : }
101 :
102 19 : void HashTable::clear() {
103 19 : Buckets.resize(8);
104 38 : Present.clear();
105 38 : Deleted.clear();
106 19 : }
107 :
108 348 : uint32_t HashTable::capacity() const { return Buckets.size(); }
109 144 : uint32_t HashTable::size() const { return Present.count(); }
110 :
111 20 : HashTableIterator HashTable::begin() const { return HashTableIterator(*this); }
112 92 : HashTableIterator HashTable::end() const {
113 92 : return HashTableIterator(*this, 0, true);
114 : }
115 :
116 92 : HashTableIterator HashTable::find(uint32_t K) {
117 92 : uint32_t H = K % capacity();
118 92 : uint32_t I = H;
119 92 : Optional<uint32_t> FirstUnused;
120 : do {
121 210 : if (isPresent(I)) {
122 104 : if (Buckets[I].first == K)
123 42 : return HashTableIterator(*this, I, false);
124 : } else {
125 53 : if (!FirstUnused)
126 : FirstUnused = I;
127 : // Insertion occurs via linear probing from the slot hint, and will be
128 : // inserted at the first empty / deleted location. Therefore, if we are
129 : // probing and find a location that is neither present nor deleted, then
130 : // nothing must have EVER been inserted at this location, and thus it is
131 : // not possible for a matching value to occur later.
132 106 : if (!isDeleted(I))
133 : break;
134 : }
135 13 : I = (I + 1) % capacity();
136 13 : } while (I != H);
137 :
138 : // The only way FirstUnused would not be set is if every single entry in the
139 : // table were Present. But this would violate the load factor constraints
140 : // that we impose, so it should never happen.
141 : assert(FirstUnused);
142 50 : return HashTableIterator(*this, *FirstUnused, true);
143 : }
144 :
145 49 : void HashTable::set(uint32_t K, uint32_t V) {
146 49 : auto Entry = find(K);
147 98 : if (Entry != end()) {
148 : assert(isPresent(Entry.index()));
149 : assert(Buckets[Entry.index()].first == K);
150 : // We're updating, no need to do anything special.
151 0 : Buckets[Entry.index()].second = V;
152 0 : return;
153 : }
154 :
155 98 : auto &B = Buckets[Entry.index()];
156 : assert(!isPresent(Entry.index()));
157 : assert(Entry.isEnd());
158 49 : B.first = K;
159 49 : B.second = V;
160 49 : Present.set(Entry.index());
161 49 : Deleted.reset(Entry.index());
162 :
163 49 : grow();
164 :
165 : assert(find(K) != end());
166 : }
167 :
168 2 : void HashTable::remove(uint32_t K) {
169 2 : auto Iter = find(K);
170 : // It wasn't here to begin with, just exit.
171 2 : if (Iter == end())
172 0 : return;
173 :
174 : assert(Present.test(Iter.index()));
175 : assert(!Deleted.test(Iter.index()));
176 2 : Deleted.set(Iter.index());
177 2 : Present.reset(Iter.index());
178 : }
179 :
180 20 : uint32_t HashTable::get(uint32_t K) {
181 20 : auto I = find(K);
182 : assert(I != end());
183 20 : return (*I).second;
184 : }
185 :
186 65 : uint32_t HashTable::maxLoad(uint32_t capacity) { return capacity * 2 / 3 + 1; }
187 :
188 49 : void HashTable::grow() {
189 49 : uint32_t S = size();
190 49 : if (S < maxLoad(capacity()))
191 47 : return;
192 : assert(capacity() != UINT32_MAX && "Can't grow Hash table!");
193 :
194 : uint32_t NewCapacity =
195 2 : (capacity() <= INT32_MAX) ? capacity() * 2 : UINT32_MAX;
196 :
197 : // Growing requires rebuilding the table and re-hashing every item. Make a
198 : // copy with a larger capacity, insert everything into the copy, then swap
199 : // it in.
200 4 : HashTable NewMap(NewCapacity);
201 18 : for (auto I : Present) {
202 24 : NewMap.set(Buckets[I].first, Buckets[I].second);
203 : }
204 :
205 4 : Buckets.swap(NewMap.Buckets);
206 2 : std::swap(Present, NewMap.Present);
207 2 : std::swap(Deleted, NewMap.Deleted);
208 : assert(capacity() == NewCapacity);
209 : assert(size() == S);
210 : }
211 :
212 32 : Error HashTable::readSparseBitVector(msf::StreamReader &Stream,
213 : SparseBitVector<> &V) {
214 : uint32_t NumWords;
215 96 : if (auto EC = Stream.readInteger(NumWords))
216 : return joinErrors(
217 0 : std::move(EC),
218 0 : make_error<RawError>(raw_error_code::corrupt_file,
219 0 : "Expected hash table number of words"));
220 :
221 54 : for (uint32_t I = 0; I != NumWords; ++I) {
222 : uint32_t Word;
223 66 : if (auto EC = Stream.readInteger(Word))
224 0 : return joinErrors(std::move(EC),
225 0 : make_error<RawError>(raw_error_code::corrupt_file,
226 0 : "Expected hash table word"));
227 726 : for (unsigned Idx = 0; Idx < 32; ++Idx)
228 704 : if (Word & (1U << Idx))
229 53 : V.set((I * 32) + Idx);
230 : }
231 96 : return Error::success();
232 : }
233 :
234 10 : Error HashTable::writeSparseBitVector(msf::StreamWriter &Writer,
235 : SparseBitVector<> &Vec) {
236 10 : int ReqBits = Vec.find_last() + 1;
237 20 : uint32_t NumWords = alignTo(ReqBits, sizeof(uint32_t)) / sizeof(uint32_t);
238 30 : if (auto EC = Writer.writeInteger(NumWords))
239 : return joinErrors(
240 0 : std::move(EC),
241 0 : make_error<RawError>(raw_error_code::corrupt_file,
242 0 : "Could not write linear map number of words"));
243 :
244 10 : uint32_t Idx = 0;
245 22 : for (uint32_t I = 0; I != NumWords; ++I) {
246 : uint32_t Word = 0;
247 780 : for (uint32_t WordIdx = 0; WordIdx < 32; ++WordIdx, ++Idx) {
248 384 : if (Vec.test(Idx))
249 20 : Word |= (1 << WordIdx);
250 : }
251 36 : if (auto EC = Writer.writeInteger(Word))
252 0 : return joinErrors(std::move(EC), make_error<RawError>(
253 : raw_error_code::corrupt_file,
254 0 : "Could not write linear map word"));
255 : }
256 30 : return Error::success();
257 : }
258 :
259 184 : HashTableIterator::HashTableIterator(const HashTable &Map, uint32_t Index,
260 184 : bool IsEnd)
261 184 : : Map(&Map), Index(Index), IsEnd(IsEnd) {}
262 :
263 40 : HashTableIterator::HashTableIterator(const HashTable &Map) : Map(&Map) {
264 40 : int I = Map.Present.find_first();
265 20 : if (I == -1) {
266 0 : Index = 0;
267 0 : IsEnd = true;
268 : } else {
269 20 : Index = static_cast<uint32_t>(I);
270 20 : IsEnd = false;
271 : }
272 20 : }
273 :
274 0 : HashTableIterator &HashTableIterator::operator=(const HashTableIterator &R) {
275 0 : Map = R.Map;
276 0 : return *this;
277 : }
278 :
279 157 : bool HashTableIterator::operator==(const HashTableIterator &R) const {
280 157 : if (IsEnd && R.IsEnd)
281 : return true;
282 87 : if (IsEnd != R.IsEnd)
283 : return false;
284 :
285 0 : return (Map == R.Map) && (Index == R.Index);
286 : }
287 :
288 85 : const std::pair<uint32_t, uint32_t> &HashTableIterator::operator*() const {
289 : assert(Map->Present.test(Index));
290 170 : return Map->Buckets[Index];
291 : }
292 :
293 65 : HashTableIterator &HashTableIterator::operator++() {
294 302 : while (Index < Map->Buckets.size()) {
295 131 : ++Index;
296 131 : if (Map->Present.test(Index))
297 : return *this;
298 : }
299 :
300 20 : IsEnd = true;
301 20 : return *this;
302 : }
|