LLVM  3.7.0
SparseMultiSet.h
Go to the documentation of this file.
1 //===--- llvm/ADT/SparseMultiSet.h - Sparse multiset ------------*- C++ -*-===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This file defines the SparseMultiSet class, which adds multiset behavior to
11 // the SparseSet.
12 //
13 // A sparse multiset holds a small number of objects identified by integer keys
14 // from a moderately sized universe. The sparse multiset uses more memory than
15 // other containers in order to provide faster operations. Any key can map to
16 // multiple values. A SparseMultiSetNode class is provided, which serves as a
17 // convenient base class for the contents of a SparseMultiSet.
18 //
19 //===----------------------------------------------------------------------===//
20 
21 #ifndef LLVM_ADT_SPARSEMULTISET_H
22 #define LLVM_ADT_SPARSEMULTISET_H
23 
24 #include "llvm/ADT/SparseSet.h"
25 
26 namespace llvm {
27 
28 /// Fast multiset implementation for objects that can be identified by small
29 /// unsigned keys.
30 ///
31 /// SparseMultiSet allocates memory proportional to the size of the key
32 /// universe, so it is not recommended for building composite data structures.
33 /// It is useful for algorithms that require a single set with fast operations.
34 ///
35 /// Compared to DenseSet and DenseMap, SparseMultiSet provides constant-time
36 /// fast clear() as fast as a vector. The find(), insert(), and erase()
37 /// operations are all constant time, and typically faster than a hash table.
38 /// The iteration order doesn't depend on numerical key values, it only depends
39 /// on the order of insert() and erase() operations. Iteration order is the
40 /// insertion order. Iteration is only provided over elements of equivalent
41 /// keys, but iterators are bidirectional.
42 ///
43 /// Compared to BitVector, SparseMultiSet<unsigned> uses 8x-40x more memory, but
44 /// offers constant-time clear() and size() operations as well as fast iteration
45 /// independent on the size of the universe.
46 ///
47 /// SparseMultiSet contains a dense vector holding all the objects and a sparse
48 /// array holding indexes into the dense vector. Most of the memory is used by
49 /// the sparse array which is the size of the key universe. The SparseT template
50 /// parameter provides a space/speed tradeoff for sets holding many elements.
51 ///
52 /// When SparseT is uint32_t, find() only touches up to 3 cache lines, but the
53 /// sparse array uses 4 x Universe bytes.
54 ///
55 /// When SparseT is uint8_t (the default), find() touches up to 3+[N/256] cache
56 /// lines, but the sparse array is 4x smaller. N is the number of elements in
57 /// the set.
58 ///
59 /// For sets that may grow to thousands of elements, SparseT should be set to
60 /// uint16_t or uint32_t.
61 ///
62 /// Multiset behavior is provided by providing doubly linked lists for values
63 /// that are inlined in the dense vector. SparseMultiSet is a good choice when
64 /// one desires a growable number of entries per key, as it will retain the
65 /// SparseSet algorithmic properties despite being growable. Thus, it is often a
66 /// better choice than a SparseSet of growable containers or a vector of
67 /// vectors. SparseMultiSet also keeps iterators valid after erasure (provided
68 /// the iterators don't point to the element erased), allowing for more
69 /// intuitive and fast removal.
70 ///
71 /// @tparam ValueT The type of objects in the set.
72 /// @tparam KeyFunctorT A functor that computes an unsigned index from KeyT.
73 /// @tparam SparseT An unsigned integer type. See above.
74 ///
75 template<typename ValueT,
76  typename KeyFunctorT = llvm::identity<unsigned>,
77  typename SparseT = uint8_t>
79  static_assert(std::numeric_limits<SparseT>::is_integer &&
80  !std::numeric_limits<SparseT>::is_signed,
81  "SparseT must be an unsigned integer type");
82 
83  /// The actual data that's stored, as a doubly-linked list implemented via
84  /// indices into the DenseVector. The doubly linked list is implemented
85  /// circular in Prev indices, and INVALID-terminated in Next indices. This
86  /// provides efficient access to list tails. These nodes can also be
87  /// tombstones, in which case they are actually nodes in a single-linked
88  /// freelist of recyclable slots.
89  struct SMSNode {
90  static const unsigned INVALID = ~0U;
91 
92  ValueT Data;
93  unsigned Prev;
94  unsigned Next;
95 
96  SMSNode(ValueT D, unsigned P, unsigned N) : Data(D), Prev(P), Next(N) { }
97 
98  /// List tails have invalid Nexts.
99  bool isTail() const {
100  return Next == INVALID;
101  }
102 
103  /// Whether this node is a tombstone node, and thus is in our freelist.
104  bool isTombstone() const {
105  return Prev == INVALID;
106  }
107 
108  /// Since the list is circular in Prev, all non-tombstone nodes have a valid
109  /// Prev.
110  bool isValid() const { return Prev != INVALID; }
111  };
112 
113  typedef typename KeyFunctorT::argument_type KeyT;
115  DenseT Dense;
116  SparseT *Sparse;
117  unsigned Universe;
118  KeyFunctorT KeyIndexOf;
120 
121  /// We have a built-in recycler for reusing tombstone slots. This recycler
122  /// puts a singly-linked free list into tombstone slots, allowing us quick
123  /// erasure, iterator preservation, and dense size.
124  unsigned FreelistIdx;
125  unsigned NumFree;
126 
127  unsigned sparseIndex(const ValueT &Val) const {
128  assert(ValIndexOf(Val) < Universe &&
129  "Invalid key in set. Did object mutate?");
130  return ValIndexOf(Val);
131  }
132  unsigned sparseIndex(const SMSNode &N) const { return sparseIndex(N.Data); }
133 
134  // Disable copy construction and assignment.
135  // This data structure is not meant to be used that way.
136  SparseMultiSet(const SparseMultiSet&) = delete;
137  SparseMultiSet &operator=(const SparseMultiSet&) = delete;
138 
139  /// Whether the given entry is the head of the list. List heads's previous
140  /// pointers are to the tail of the list, allowing for efficient access to the
141  /// list tail. D must be a valid entry node.
142  bool isHead(const SMSNode &D) const {
143  assert(D.isValid() && "Invalid node for head");
144  return Dense[D.Prev].isTail();
145  }
146 
147  /// Whether the given entry is a singleton entry, i.e. the only entry with
148  /// that key.
149  bool isSingleton(const SMSNode &N) const {
150  assert(N.isValid() && "Invalid node for singleton");
151  // Is N its own predecessor?
152  return &Dense[N.Prev] == &N;
153  }
154 
155  /// Add in the given SMSNode. Uses a free entry in our freelist if
156  /// available. Returns the index of the added node.
157  unsigned addValue(const ValueT& V, unsigned Prev, unsigned Next) {
158  if (NumFree == 0) {
159  Dense.push_back(SMSNode(V, Prev, Next));
160  return Dense.size() - 1;
161  }
162 
163  // Peel off a free slot
164  unsigned Idx = FreelistIdx;
165  unsigned NextFree = Dense[Idx].Next;
166  assert(Dense[Idx].isTombstone() && "Non-tombstone free?");
167 
168  Dense[Idx] = SMSNode(V, Prev, Next);
169  FreelistIdx = NextFree;
170  --NumFree;
171  return Idx;
172  }
173 
174  /// Make the current index a new tombstone. Pushes it onto the freelist.
175  void makeTombstone(unsigned Idx) {
176  Dense[Idx].Prev = SMSNode::INVALID;
177  Dense[Idx].Next = FreelistIdx;
178  FreelistIdx = Idx;
179  ++NumFree;
180  }
181 
182 public:
183  typedef ValueT value_type;
184  typedef ValueT &reference;
185  typedef const ValueT &const_reference;
186  typedef ValueT *pointer;
187  typedef const ValueT *const_pointer;
188  typedef unsigned size_type;
189 
191  : Sparse(nullptr), Universe(0), FreelistIdx(SMSNode::INVALID), NumFree(0) {}
192 
193  ~SparseMultiSet() { free(Sparse); }
194 
195  /// Set the universe size which determines the largest key the set can hold.
196  /// The universe must be sized before any elements can be added.
197  ///
198  /// @param U Universe size. All object keys must be less than U.
199  ///
200  void setUniverse(unsigned U) {
201  // It's not hard to resize the universe on a non-empty set, but it doesn't
202  // seem like a likely use case, so we can add that code when we need it.
203  assert(empty() && "Can only resize universe on an empty map");
204  // Hysteresis prevents needless reallocations.
205  if (U >= Universe/4 && U <= Universe)
206  return;
207  free(Sparse);
208  // The Sparse array doesn't actually need to be initialized, so malloc
209  // would be enough here, but that will cause tools like valgrind to
210  // complain about branching on uninitialized data.
211  Sparse = reinterpret_cast<SparseT*>(calloc(U, sizeof(SparseT)));
212  Universe = U;
213  }
214 
215  /// Our iterators are iterators over the collection of objects that share a
216  /// key.
217  template<typename SMSPtrTy>
218  class iterator_base : public std::iterator<std::bidirectional_iterator_tag,
219  ValueT> {
220  friend class SparseMultiSet;
221  SMSPtrTy SMS;
222  unsigned Idx;
223  unsigned SparseIdx;
224 
225  iterator_base(SMSPtrTy P, unsigned I, unsigned SI)
226  : SMS(P), Idx(I), SparseIdx(SI) { }
227 
228  /// Whether our iterator has fallen outside our dense vector.
229  bool isEnd() const {
230  if (Idx == SMSNode::INVALID)
231  return true;
232 
233  assert(Idx < SMS->Dense.size() && "Out of range, non-INVALID Idx?");
234  return false;
235  }
236 
237  /// Whether our iterator is properly keyed, i.e. the SparseIdx is valid
238  bool isKeyed() const { return SparseIdx < SMS->Universe; }
239 
240  unsigned Prev() const { return SMS->Dense[Idx].Prev; }
241  unsigned Next() const { return SMS->Dense[Idx].Next; }
242 
243  void setPrev(unsigned P) { SMS->Dense[Idx].Prev = P; }
244  void setNext(unsigned N) { SMS->Dense[Idx].Next = N; }
245 
246  public:
247  typedef std::iterator<std::bidirectional_iterator_tag, ValueT> super;
248  typedef typename super::value_type value_type;
249  typedef typename super::difference_type difference_type;
250  typedef typename super::pointer pointer;
251  typedef typename super::reference reference;
252 
254  assert(isKeyed() && SMS->sparseIndex(SMS->Dense[Idx].Data) == SparseIdx &&
255  "Dereferencing iterator of invalid key or index");
256 
257  return SMS->Dense[Idx].Data;
258  }
259  pointer operator->() const { return &operator*(); }
260 
261  /// Comparison operators
262  bool operator==(const iterator_base &RHS) const {
263  // end compares equal
264  if (SMS == RHS.SMS && Idx == RHS.Idx) {
265  assert((isEnd() || SparseIdx == RHS.SparseIdx) &&
266  "Same dense entry, but different keys?");
267  return true;
268  }
269 
270  return false;
271  }
272 
273  bool operator!=(const iterator_base &RHS) const {
274  return !operator==(RHS);
275  }
276 
277  /// Increment and decrement operators
278  iterator_base &operator--() { // predecrement - Back up
279  assert(isKeyed() && "Decrementing an invalid iterator");
280  assert((isEnd() || !SMS->isHead(SMS->Dense[Idx])) &&
281  "Decrementing head of list");
282 
283  // If we're at the end, then issue a new find()
284  if (isEnd())
285  Idx = SMS->findIndex(SparseIdx).Prev();
286  else
287  Idx = Prev();
288 
289  return *this;
290  }
291  iterator_base &operator++() { // preincrement - Advance
292  assert(!isEnd() && isKeyed() && "Incrementing an invalid/end iterator");
293  Idx = Next();
294  return *this;
295  }
296  iterator_base operator--(int) { // postdecrement
297  iterator_base I(*this);
298  --*this;
299  return I;
300  }
301  iterator_base operator++(int) { // postincrement
302  iterator_base I(*this);
303  ++*this;
304  return I;
305  }
306  };
307  typedef iterator_base<SparseMultiSet *> iterator;
308  typedef iterator_base<const SparseMultiSet *> const_iterator;
309 
310  // Convenience types
311  typedef std::pair<iterator, iterator> RangePair;
312 
313  /// Returns an iterator past this container. Note that such an iterator cannot
314  /// be decremented, but will compare equal to other end iterators.
315  iterator end() { return iterator(this, SMSNode::INVALID, SMSNode::INVALID); }
316  const_iterator end() const {
317  return const_iterator(this, SMSNode::INVALID, SMSNode::INVALID);
318  }
319 
320  /// Returns true if the set is empty.
321  ///
322  /// This is not the same as BitVector::empty().
323  ///
324  bool empty() const { return size() == 0; }
325 
326  /// Returns the number of elements in the set.
327  ///
328  /// This is not the same as BitVector::size() which returns the size of the
329  /// universe.
330  ///
331  size_type size() const {
332  assert(NumFree <= Dense.size() && "Out-of-bounds free entries");
333  return Dense.size() - NumFree;
334  }
335 
336  /// Clears the set. This is a very fast constant time operation.
337  ///
338  void clear() {
339  // Sparse does not need to be cleared, see find().
340  Dense.clear();
341  NumFree = 0;
342  FreelistIdx = SMSNode::INVALID;
343  }
344 
345  /// Find an element by its index.
346  ///
347  /// @param Idx A valid index to find.
348  /// @returns An iterator to the element identified by key, or end().
349  ///
350  iterator findIndex(unsigned Idx) {
351  assert(Idx < Universe && "Key out of range");
352  const unsigned Stride = std::numeric_limits<SparseT>::max() + 1u;
353  for (unsigned i = Sparse[Idx], e = Dense.size(); i < e; i += Stride) {
354  const unsigned FoundIdx = sparseIndex(Dense[i]);
355  // Check that we're pointing at the correct entry and that it is the head
356  // of a valid list.
357  if (Idx == FoundIdx && Dense[i].isValid() && isHead(Dense[i]))
358  return iterator(this, i, Idx);
359  // Stride is 0 when SparseT >= unsigned. We don't need to loop.
360  if (!Stride)
361  break;
362  }
363  return end();
364  }
365 
366  /// Find an element by its key.
367  ///
368  /// @param Key A valid key to find.
369  /// @returns An iterator to the element identified by key, or end().
370  ///
371  iterator find(const KeyT &Key) {
372  return findIndex(KeyIndexOf(Key));
373  }
374 
375  const_iterator find(const KeyT &Key) const {
376  iterator I = const_cast<SparseMultiSet*>(this)->findIndex(KeyIndexOf(Key));
377  return const_iterator(I.SMS, I.Idx, KeyIndexOf(Key));
378  }
379 
380  /// Returns the number of elements identified by Key. This will be linear in
381  /// the number of elements of that key.
382  size_type count(const KeyT &Key) const {
383  unsigned Ret = 0;
384  for (const_iterator It = find(Key); It != end(); ++It)
385  ++Ret;
386 
387  return Ret;
388  }
389 
390  /// Returns true if this set contains an element identified by Key.
391  bool contains(const KeyT &Key) const {
392  return find(Key) != end();
393  }
394 
395  /// Return the head and tail of the subset's list, otherwise returns end().
396  iterator getHead(const KeyT &Key) { return find(Key); }
397  iterator getTail(const KeyT &Key) {
398  iterator I = find(Key);
399  if (I != end())
400  I = iterator(this, I.Prev(), KeyIndexOf(Key));
401  return I;
402  }
403 
404  /// The bounds of the range of items sharing Key K. First member is the head
405  /// of the list, and the second member is a decrementable end iterator for
406  /// that key.
407  RangePair equal_range(const KeyT &K) {
408  iterator B = find(K);
409  iterator E = iterator(this, SMSNode::INVALID, B.SparseIdx);
410  return make_pair(B, E);
411  }
412 
413  /// Insert a new element at the tail of the subset list. Returns an iterator
414  /// to the newly added entry.
415  iterator insert(const ValueT &Val) {
416  unsigned Idx = sparseIndex(Val);
417  iterator I = findIndex(Idx);
418 
419  unsigned NodeIdx = addValue(Val, SMSNode::INVALID, SMSNode::INVALID);
420 
421  if (I == end()) {
422  // Make a singleton list
423  Sparse[Idx] = NodeIdx;
424  Dense[NodeIdx].Prev = NodeIdx;
425  return iterator(this, NodeIdx, Idx);
426  }
427 
428  // Stick it at the end.
429  unsigned HeadIdx = I.Idx;
430  unsigned TailIdx = I.Prev();
431  Dense[TailIdx].Next = NodeIdx;
432  Dense[HeadIdx].Prev = NodeIdx;
433  Dense[NodeIdx].Prev = TailIdx;
434 
435  return iterator(this, NodeIdx, Idx);
436  }
437 
438  /// Erases an existing element identified by a valid iterator.
439  ///
440  /// This invalidates iterators pointing at the same entry, but erase() returns
441  /// an iterator pointing to the next element in the subset's list. This makes
442  /// it possible to erase selected elements while iterating over the subset:
443  ///
444  /// tie(I, E) = Set.equal_range(Key);
445  /// while (I != E)
446  /// if (test(*I))
447  /// I = Set.erase(I);
448  /// else
449  /// ++I;
450  ///
451  /// Note that if the last element in the subset list is erased, this will
452  /// return an end iterator which can be decremented to get the new tail (if it
453  /// exists):
454  ///
455  /// tie(B, I) = Set.equal_range(Key);
456  /// for (bool isBegin = B == I; !isBegin; /* empty */) {
457  /// isBegin = (--I) == B;
458  /// if (test(I))
459  /// break;
460  /// I = erase(I);
461  /// }
463  assert(I.isKeyed() && !I.isEnd() && !Dense[I.Idx].isTombstone() &&
464  "erasing invalid/end/tombstone iterator");
465 
466  // First, unlink the node from its list. Then swap the node out with the
467  // dense vector's last entry
468  iterator NextI = unlink(Dense[I.Idx]);
469 
470  // Put in a tombstone.
471  makeTombstone(I.Idx);
472 
473  return NextI;
474  }
475 
476  /// Erase all elements with the given key. This invalidates all
477  /// iterators of that key.
478  void eraseAll(const KeyT &K) {
479  for (iterator I = find(K); I != end(); /* empty */)
480  I = erase(I);
481  }
482 
483 private:
484  /// Unlink the node from its list. Returns the next node in the list.
485  iterator unlink(const SMSNode &N) {
486  if (isSingleton(N)) {
487  // Singleton is already unlinked
488  assert(N.Next == SMSNode::INVALID && "Singleton has next?");
489  return iterator(this, SMSNode::INVALID, ValIndexOf(N.Data));
490  }
491 
492  if (isHead(N)) {
493  // If we're the head, then update the sparse array and our next.
494  Sparse[sparseIndex(N)] = N.Next;
495  Dense[N.Next].Prev = N.Prev;
496  return iterator(this, N.Next, ValIndexOf(N.Data));
497  }
498 
499  if (N.isTail()) {
500  // If we're the tail, then update our head and our previous.
501  findIndex(sparseIndex(N)).setPrev(N.Prev);
502  Dense[N.Prev].Next = N.Next;
503 
504  // Give back an end iterator that can be decremented
505  iterator I(this, N.Prev, ValIndexOf(N.Data));
506  return ++I;
507  }
508 
509  // Otherwise, just drop us
510  Dense[N.Next].Prev = N.Prev;
511  Dense[N.Prev].Next = N.Next;
512  return iterator(this, N.Next, ValIndexOf(N.Data));
513  }
514 };
515 
516 } // end namespace llvm
517 
518 #endif
iterator end()
Returns an iterator past this container.
void push_back(const T &Elt)
Definition: SmallVector.h:222
iterator insert(const ValueT &Val)
Insert a new element at the tail of the subset list.
bool contains(const KeyT &Key) const
Returns true if this set contains an element identified by Key.
size_type size() const
Returns the number of elements in the set.
iterator_base & operator--()
Increment and decrement operators.
const_iterator find(const KeyT &Key) const
RangePair equal_range(const KeyT &K)
The bounds of the range of items sharing Key K.
iterator_base< SparseMultiSet * > iterator
iterator getHead(const KeyT &Key)
Return the head and tail of the subset's list, otherwise returns end().
const ValueT * const_pointer
iterator getTail(const KeyT &Key)
iterator_base< const SparseMultiSet * > const_iterator
#define P(N)
void setUniverse(unsigned U)
Set the universe size which determines the largest key the set can hold.
void clear()
Clears the set.
iterator find(const KeyT &Key)
Find an element by its key.
const_iterator end() const
super::difference_type difference_type
void eraseAll(const KeyT &K)
Erase all elements with the given key.
iterator findIndex(unsigned Idx)
Find an element by its index.
**iterator erase(iterator I)
SparseSetValFunctor - Helper class for selecting SparseSetValTraits.
Definition: SparseSet.h:64
#define N
std::iterator< std::bidirectional_iterator_tag, ValueT > super
bool operator!=(const iterator_base &RHS) const
Fast multiset implementation for objects that can be identified by small unsigned keys...
bool empty() const
Returns true if the set is empty.
Our iterators are iterators over the collection of objects that share a key.
const ValueT & const_reference
bool operator==(const iterator_base &RHS) const
Comparison operators.
std::pair< iterator, iterator > RangePair
size_type count(const KeyT &Key) const
Returns the number of elements identified by Key.