LLVM  9.0.0svn
HashTable.h
Go to the documentation of this file.
1 //===- HashTable.h - PDB Hash Table -----------------------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #ifndef LLVM_DEBUGINFO_PDB_NATIVE_HASHTABLE_H
10 #define LLVM_DEBUGINFO_PDB_NATIVE_HASHTABLE_H
11 
13 #include "llvm/ADT/iterator.h"
17 #include "llvm/Support/Endian.h"
18 #include "llvm/Support/Error.h"
19 #include <cstdint>
20 #include <iterator>
21 #include <utility>
22 #include <vector>
23 
24 namespace llvm {
25 
26 class BinaryStreamReader;
27 class BinaryStreamWriter;
28 
29 namespace pdb {
30 
31 Error readSparseBitVector(BinaryStreamReader &Stream, SparseBitVector<> &V);
32 Error writeSparseBitVector(BinaryStreamWriter &Writer, SparseBitVector<> &Vec);
33 
34 template <typename ValueT, typename TraitsT> class HashTable;
35 
36 template <typename ValueT, typename TraitsT>
38  : public iterator_facade_base<HashTableIterator<ValueT, TraitsT>,
39  std::forward_iterator_tag,
40  std::pair<uint32_t, ValueT>> {
42 
44  bool IsEnd)
45  : Map(&Map), Index(Index), IsEnd(IsEnd) {}
46 
47 public:
49  int I = Map.Present.find_first();
50  if (I == -1) {
51  Index = 0;
52  IsEnd = true;
53  } else {
54  Index = static_cast<uint32_t>(I);
55  IsEnd = false;
56  }
57  }
58 
60  Map = R.Map;
61  return *this;
62  }
63  bool operator==(const HashTableIterator &R) const {
64  if (IsEnd && R.IsEnd)
65  return true;
66  if (IsEnd != R.IsEnd)
67  return false;
68 
69  return (Map == R.Map) && (Index == R.Index);
70  }
71  const std::pair<uint32_t, ValueT> &operator*() const {
72  assert(Map->Present.test(Index));
73  return Map->Buckets[Index];
74  }
76  while (Index < Map->Buckets.size()) {
77  ++Index;
78  if (Map->Present.test(Index))
79  return *this;
80  }
81 
82  IsEnd = true;
83  return *this;
84  }
85 
86 private:
87  bool isEnd() const { return IsEnd; }
88  uint32_t index() const { return Index; }
89 
90  const HashTable<ValueT, TraitsT> *Map;
91  uint32_t Index;
92  bool IsEnd;
93 };
94 
95 template <typename T> struct PdbHashTraits {};
96 
97 template <> struct PdbHashTraits<uint32_t> {
98  uint32_t hashLookupKey(uint32_t N) const { return N; }
101 };
102 
103 template <typename ValueT, typename TraitsT = PdbHashTraits<ValueT>>
104 class HashTable {
105  using iterator = HashTableIterator<ValueT, TraitsT>;
106  friend iterator;
107 
108  struct Header {
110  support::ulittle32_t Capacity;
111  };
112 
113  using BucketList = std::vector<std::pair<uint32_t, ValueT>>;
114 
115 public:
116  HashTable() { Buckets.resize(8); }
117 
118  explicit HashTable(TraitsT Traits) : HashTable(8, std::move(Traits)) {}
119  HashTable(uint32_t Capacity, TraitsT Traits) : Traits(Traits) {
120  Buckets.resize(Capacity);
121  }
122 
124  const Header *H;
125  if (auto EC = Stream.readObject(H))
126  return EC;
127  if (H->Capacity == 0)
128  return make_error<RawError>(raw_error_code::corrupt_file,
129  "Invalid Hash Table Capacity");
130  if (H->Size > maxLoad(H->Capacity))
131  return make_error<RawError>(raw_error_code::corrupt_file,
132  "Invalid Hash Table Size");
133 
134  Buckets.resize(H->Capacity);
135 
136  if (auto EC = readSparseBitVector(Stream, Present))
137  return EC;
138  if (Present.count() != H->Size)
139  return make_error<RawError>(raw_error_code::corrupt_file,
140  "Present bit vector does not match size!");
141 
142  if (auto EC = readSparseBitVector(Stream, Deleted))
143  return EC;
144  if (Present.intersects(Deleted))
145  return make_error<RawError>(raw_error_code::corrupt_file,
146  "Present bit vector interesects deleted!");
147 
148  for (uint32_t P : Present) {
149  if (auto EC = Stream.readInteger(Buckets[P].first))
150  return EC;
151  const ValueT *Value;
152  if (auto EC = Stream.readObject(Value))
153  return EC;
154  Buckets[P].second = *Value;
155  }
156 
157  return Error::success();
158  }
159 
161  uint32_t Size = sizeof(Header);
162 
163  constexpr int BitsPerWord = 8 * sizeof(uint32_t);
164 
165  int NumBitsP = Present.find_last() + 1;
166  int NumBitsD = Deleted.find_last() + 1;
167 
168  uint32_t NumWordsP = alignTo(NumBitsP, BitsPerWord) / BitsPerWord;
169  uint32_t NumWordsD = alignTo(NumBitsD, BitsPerWord) / BitsPerWord;
170 
171  // Present bit set number of words (4 bytes), followed by that many actual
172  // words (4 bytes each).
173  Size += sizeof(uint32_t);
174  Size += NumWordsP * sizeof(uint32_t);
175 
176  // Deleted bit set number of words (4 bytes), followed by that many actual
177  // words (4 bytes each).
178  Size += sizeof(uint32_t);
179  Size += NumWordsD * sizeof(uint32_t);
180 
181  // One (Key, ValueT) pair for each entry Present.
182  Size += (sizeof(uint32_t) + sizeof(ValueT)) * size();
183 
184  return Size;
185  }
186 
187  Error commit(BinaryStreamWriter &Writer) const {
188  Header H;
189  H.Size = size();
190  H.Capacity = capacity();
191  if (auto EC = Writer.writeObject(H))
192  return EC;
193 
194  if (auto EC = writeSparseBitVector(Writer, Present))
195  return EC;
196 
197  if (auto EC = writeSparseBitVector(Writer, Deleted))
198  return EC;
199 
200  for (const auto &Entry : *this) {
201  if (auto EC = Writer.writeInteger(Entry.first))
202  return EC;
203  if (auto EC = Writer.writeObject(Entry.second))
204  return EC;
205  }
206  return Error::success();
207  }
208 
209  void clear() {
210  Buckets.resize(8);
211  Present.clear();
212  Deleted.clear();
213  }
214 
215  bool empty() const { return size() == 0; }
216  uint32_t capacity() const { return Buckets.size(); }
217  uint32_t size() const { return Present.count(); }
218 
219  iterator begin() const { return iterator(*this); }
220  iterator end() const { return iterator(*this, 0, true); }
221 
222  /// Find the entry whose key has the specified hash value, using the specified
223  /// traits defining hash function and equality.
224  template <typename Key> iterator find_as(const Key &K) const {
225  uint32_t H = Traits.hashLookupKey(K) % capacity();
226  uint32_t I = H;
227  Optional<uint32_t> FirstUnused;
228  do {
229  if (isPresent(I)) {
230  if (Traits.storageKeyToLookupKey(Buckets[I].first) == K)
231  return iterator(*this, I, false);
232  } else {
233  if (!FirstUnused)
234  FirstUnused = I;
235  // Insertion occurs via linear probing from the slot hint, and will be
236  // inserted at the first empty / deleted location. Therefore, if we are
237  // probing and find a location that is neither present nor deleted, then
238  // nothing must have EVER been inserted at this location, and thus it is
239  // not possible for a matching value to occur later.
240  if (!isDeleted(I))
241  break;
242  }
243  I = (I + 1) % capacity();
244  } while (I != H);
245 
246  // The only way FirstUnused would not be set is if every single entry in the
247  // table were Present. But this would violate the load factor constraints
248  // that we impose, so it should never happen.
249  assert(FirstUnused);
250  return iterator(*this, *FirstUnused, true);
251  }
252 
253  /// Set the entry using a key type that the specified Traits can convert
254  /// from a real key to an internal key.
255  template <typename Key> bool set_as(const Key &K, ValueT V) {
256  return set_as_internal(K, std::move(V), None);
257  }
258 
259  template <typename Key> ValueT get(const Key &K) const {
260  auto Iter = find_as(K);
261  assert(Iter != end());
262  return (*Iter).second;
263  }
264 
265 protected:
266  bool isPresent(uint32_t K) const { return Present.test(K); }
267  bool isDeleted(uint32_t K) const { return Deleted.test(K); }
268 
270  BucketList Buckets;
273 
274 private:
275  /// Set the entry using a key type that the specified Traits can convert
276  /// from a real key to an internal key.
277  template <typename Key>
278  bool set_as_internal(const Key &K, ValueT V, Optional<uint32_t> InternalKey) {
279  auto Entry = find_as(K);
280  if (Entry != end()) {
281  assert(isPresent(Entry.index()));
282  assert(Traits.storageKeyToLookupKey(Buckets[Entry.index()].first) == K);
283  // We're updating, no need to do anything special.
284  Buckets[Entry.index()].second = V;
285  return false;
286  }
287 
288  auto &B = Buckets[Entry.index()];
289  assert(!isPresent(Entry.index()));
290  assert(Entry.isEnd());
291  B.first = InternalKey ? *InternalKey : Traits.lookupKeyToStorageKey(K);
292  B.second = V;
293  Present.set(Entry.index());
294  Deleted.reset(Entry.index());
295 
296  grow();
297 
298  assert((find_as(K)) != end());
299  return true;
300  }
301 
302  static uint32_t maxLoad(uint32_t capacity) { return capacity * 2 / 3 + 1; }
303 
304  void grow() {
305  uint32_t S = size();
306  uint32_t MaxLoad = maxLoad(capacity());
307  if (S < maxLoad(capacity()))
308  return;
309  assert(capacity() != UINT32_MAX && "Can't grow Hash table!");
310 
311  uint32_t NewCapacity = (capacity() <= INT32_MAX) ? MaxLoad * 2 : UINT32_MAX;
312 
313  // Growing requires rebuilding the table and re-hashing every item. Make a
314  // copy with a larger capacity, insert everything into the copy, then swap
315  // it in.
316  HashTable NewMap(NewCapacity, Traits);
317  for (auto I : Present) {
318  auto LookupKey = Traits.storageKeyToLookupKey(Buckets[I].first);
319  NewMap.set_as_internal(LookupKey, Buckets[I].second, Buckets[I].first);
320  }
321 
322  Buckets.swap(NewMap.Buckets);
323  std::swap(Present, NewMap.Present);
324  std::swap(Deleted, NewMap.Deleted);
325  assert(capacity() == NewCapacity);
326  assert(size() == S);
327  }
328 };
329 
330 } // end namespace pdb
331 
332 } // end namespace llvm
333 
334 #endif // LLVM_DEBUGINFO_PDB_NATIVE_HASHTABLE_H
Error writeObject(const T &Obj)
Writes the object Obj to the underlying stream, as if by using memcpy.
iterator end() const
Definition: HashTable.h:220
Error load(BinaryStreamReader &Stream)
Definition: HashTable.h:123
SparseBitVector Deleted
Definition: HashTable.h:272
uint32_t lookupKeyToStorageKey(uint32_t N)
Definition: HashTable.h:100
const_iterator end(StringRef path)
Get end iterator over path.
Definition: Path.cpp:233
uint32_t size() const
Definition: HashTable.h:217
HashTableIterator & operator++()
Definition: HashTable.h:75
This class represents lattice values for constants.
Definition: AllocatorList.h:23
Error writeSparseBitVector(BinaryStreamWriter &Writer, SparseBitVector<> &Vec)
Definition: HashTable.cpp:46
Error readInteger(T &Dest)
Read an integer of the specified endianness into Dest and update the stream&#39;s offset.
void set(unsigned Idx)
Error readObject(const T *&Dest)
Get a pointer to an object of type T from the underlying stream, as if by memcpy, and store the resul...
unsigned second
iterator find_as(const Key &K) const
Find the entry whose key has the specified hash value, using the specified traits defining hash funct...
Definition: HashTable.h:224
uint64_t alignTo(uint64_t Value, uint64_t Align, uint64_t Skew=0)
Returns the next integer (mod 2**64) that is greater than or equal to Value and is a multiple of Alig...
Definition: MathExtras.h:684
iterator begin() const
Definition: HashTable.h:219
uint32_t capacity() const
Definition: HashTable.h:216
const std::pair< uint32_t, ValueT > & operator*() const
Definition: HashTable.h:71
HashTableIterator & operator=(const HashTableIterator &R)
Definition: HashTable.h:59
Definition: BitVector.h:937
Key
PAL metadata keys.
CRTP base class which implements the entire standard iterator facade in terms of a minimal subset of ...
Definition: iterator.h:67
Error readSparseBitVector(BinaryStreamReader &Stream, SparseBitVector<> &V)
Definition: HashTable.cpp:24
#define P(N)
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
#define H(x, y, z)
Definition: MD5.cpp:57
Provides write only access to a subclass of WritableBinaryStream.
Error writeInteger(T Value)
Write the integer Value to the underlying stream in the specified endianness.
void reset(unsigned Idx)
bool set_as(const Key &K, ValueT V)
Set the entry using a key type that the specified Traits can convert from a real key to an internal k...
Definition: HashTable.h:255
SparseBitVector Present
Definition: HashTable.h:271
unsigned first
static ErrorSuccess success()
Create a success value.
Definition: Error.h:326
auto size(R &&Range, typename std::enable_if< std::is_same< typename std::iterator_traits< decltype(Range.begin())>::iterator_category, std::random_access_iterator_tag >::value, void >::type *=nullptr) -> decltype(std::distance(Range.begin(), Range.end()))
Get the size of a range.
Definition: STLExtras.h:1166
HashTable(TraitsT Traits)
Definition: HashTable.h:118
bool operator==(const HashTableIterator &R) const
Definition: HashTable.h:63
uint32_t hashLookupKey(uint32_t N) const
Definition: HashTable.h:98
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:940
bool empty() const
Definition: HashTable.h:215
HashTable(uint32_t Capacity, TraitsT Traits)
Definition: HashTable.h:119
#define I(x, y, z)
Definition: MD5.cpp:58
#define N
uint32_t Size
Definition: Profile.cpp:46
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
BucketList Buckets
Definition: HashTable.h:270
HashTableIterator(const HashTable< ValueT, TraitsT > &Map)
Definition: HashTable.h:48
LLVM Value Representation.
Definition: Value.h:72
Lightweight error class with error context and mandatory checking.
Definition: Error.h:157
Error commit(BinaryStreamWriter &Writer) const
Definition: HashTable.h:187
uint32_t calculateSerializedLength() const
Definition: HashTable.h:160
uint32_t storageKeyToLookupKey(uint32_t N) const
Definition: HashTable.h:99
bool isPresent(uint32_t K) const
Definition: HashTable.h:266
Provides read only access to a subclass of BinaryStream.
bool isDeleted(uint32_t K) const
Definition: HashTable.h:267