LLVM  4.0.0
SparseMultiSet.h
Go to the documentation of this file.
1 //===--- llvm/ADT/SparseMultiSet.h - Sparse multiset ------------*- C++ -*-===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This file defines the SparseMultiSet class, which adds multiset behavior to
11 // the SparseSet.
12 //
13 // A sparse multiset holds a small number of objects identified by integer keys
14 // from a moderately sized universe. The sparse multiset uses more memory than
15 // other containers in order to provide faster operations. Any key can map to
16 // multiple values. A SparseMultiSetNode class is provided, which serves as a
17 // convenient base class for the contents of a SparseMultiSet.
18 //
19 //===----------------------------------------------------------------------===//
20 
21 #ifndef LLVM_ADT_SPARSEMULTISET_H
22 #define LLVM_ADT_SPARSEMULTISET_H
23 
24 #include "llvm/ADT/SmallVector.h"
25 #include "llvm/ADT/SparseSet.h"
26 #include "llvm/ADT/STLExtras.h"
27 #include <cassert>
28 #include <cstdint>
29 #include <cstdlib>
30 #include <iterator>
31 #include <limits>
32 #include <utility>
33 
34 namespace llvm {
35 
36 /// Fast multiset implementation for objects that can be identified by small
37 /// unsigned keys.
38 ///
39 /// SparseMultiSet allocates memory proportional to the size of the key
40 /// universe, so it is not recommended for building composite data structures.
41 /// It is useful for algorithms that require a single set with fast operations.
42 ///
43 /// Compared to DenseSet and DenseMap, SparseMultiSet provides constant-time
44 /// fast clear() as fast as a vector. The find(), insert(), and erase()
45 /// operations are all constant time, and typically faster than a hash table.
46 /// The iteration order doesn't depend on numerical key values, it only depends
47 /// on the order of insert() and erase() operations. Iteration order is the
48 /// insertion order. Iteration is only provided over elements of equivalent
49 /// keys, but iterators are bidirectional.
50 ///
51 /// Compared to BitVector, SparseMultiSet<unsigned> uses 8x-40x more memory, but
52 /// offers constant-time clear() and size() operations as well as fast iteration
53 /// independent on the size of the universe.
54 ///
55 /// SparseMultiSet contains a dense vector holding all the objects and a sparse
56 /// array holding indexes into the dense vector. Most of the memory is used by
57 /// the sparse array which is the size of the key universe. The SparseT template
58 /// parameter provides a space/speed tradeoff for sets holding many elements.
59 ///
60 /// When SparseT is uint32_t, find() only touches up to 3 cache lines, but the
61 /// sparse array uses 4 x Universe bytes.
62 ///
63 /// When SparseT is uint8_t (the default), find() touches up to 3+[N/256] cache
64 /// lines, but the sparse array is 4x smaller. N is the number of elements in
65 /// the set.
66 ///
67 /// For sets that may grow to thousands of elements, SparseT should be set to
68 /// uint16_t or uint32_t.
69 ///
70 /// Multiset behavior is provided by providing doubly linked lists for values
71 /// that are inlined in the dense vector. SparseMultiSet is a good choice when
72 /// one desires a growable number of entries per key, as it will retain the
73 /// SparseSet algorithmic properties despite being growable. Thus, it is often a
74 /// better choice than a SparseSet of growable containers or a vector of
75 /// vectors. SparseMultiSet also keeps iterators valid after erasure (provided
76 /// the iterators don't point to the element erased), allowing for more
77 /// intuitive and fast removal.
78 ///
79 /// @tparam ValueT The type of objects in the set.
80 /// @tparam KeyFunctorT A functor that computes an unsigned index from KeyT.
81 /// @tparam SparseT An unsigned integer type. See above.
82 ///
83 template<typename ValueT,
84  typename KeyFunctorT = identity<unsigned>,
85  typename SparseT = uint8_t>
87  static_assert(std::numeric_limits<SparseT>::is_integer &&
88  !std::numeric_limits<SparseT>::is_signed,
89  "SparseT must be an unsigned integer type");
90 
91  /// The actual data that's stored, as a doubly-linked list implemented via
92  /// indices into the DenseVector. The doubly linked list is implemented
93  /// circular in Prev indices, and INVALID-terminated in Next indices. This
94  /// provides efficient access to list tails. These nodes can also be
95  /// tombstones, in which case they are actually nodes in a single-linked
96  /// freelist of recyclable slots.
97  struct SMSNode {
98  static const unsigned INVALID = ~0U;
99 
100  ValueT Data;
101  unsigned Prev;
102  unsigned Next;
103 
104  SMSNode(ValueT D, unsigned P, unsigned N) : Data(D), Prev(P), Next(N) { }
105 
106  /// List tails have invalid Nexts.
107  bool isTail() const {
108  return Next == INVALID;
109  }
110 
111  /// Whether this node is a tombstone node, and thus is in our freelist.
112  bool isTombstone() const {
113  return Prev == INVALID;
114  }
115 
116  /// Since the list is circular in Prev, all non-tombstone nodes have a valid
117  /// Prev.
118  bool isValid() const { return Prev != INVALID; }
119  };
120 
121  typedef typename KeyFunctorT::argument_type KeyT;
123  DenseT Dense;
124  SparseT *Sparse = nullptr;
125  unsigned Universe = 0;
126  KeyFunctorT KeyIndexOf;
128 
129  /// We have a built-in recycler for reusing tombstone slots. This recycler
130  /// puts a singly-linked free list into tombstone slots, allowing us quick
131  /// erasure, iterator preservation, and dense size.
132  unsigned FreelistIdx = SMSNode::INVALID;
133  unsigned NumFree = 0;
134 
135  unsigned sparseIndex(const ValueT &Val) const {
136  assert(ValIndexOf(Val) < Universe &&
137  "Invalid key in set. Did object mutate?");
138  return ValIndexOf(Val);
139  }
140  unsigned sparseIndex(const SMSNode &N) const { return sparseIndex(N.Data); }
141 
142  /// Whether the given entry is the head of the list. List heads's previous
143  /// pointers are to the tail of the list, allowing for efficient access to the
144  /// list tail. D must be a valid entry node.
145  bool isHead(const SMSNode &D) const {
146  assert(D.isValid() && "Invalid node for head");
147  return Dense[D.Prev].isTail();
148  }
149 
150  /// Whether the given entry is a singleton entry, i.e. the only entry with
151  /// that key.
152  bool isSingleton(const SMSNode &N) const {
153  assert(N.isValid() && "Invalid node for singleton");
154  // Is N its own predecessor?
155  return &Dense[N.Prev] == &N;
156  }
157 
158  /// Add in the given SMSNode. Uses a free entry in our freelist if
159  /// available. Returns the index of the added node.
160  unsigned addValue(const ValueT& V, unsigned Prev, unsigned Next) {
161  if (NumFree == 0) {
162  Dense.push_back(SMSNode(V, Prev, Next));
163  return Dense.size() - 1;
164  }
165 
166  // Peel off a free slot
167  unsigned Idx = FreelistIdx;
168  unsigned NextFree = Dense[Idx].Next;
169  assert(Dense[Idx].isTombstone() && "Non-tombstone free?");
170 
171  Dense[Idx] = SMSNode(V, Prev, Next);
172  FreelistIdx = NextFree;
173  --NumFree;
174  return Idx;
175  }
176 
177  /// Make the current index a new tombstone. Pushes it onto the freelist.
178  void makeTombstone(unsigned Idx) {
179  Dense[Idx].Prev = SMSNode::INVALID;
180  Dense[Idx].Next = FreelistIdx;
181  FreelistIdx = Idx;
182  ++NumFree;
183  }
184 
185 public:
187  typedef ValueT &reference;
188  typedef const ValueT &const_reference;
189  typedef ValueT *pointer;
190  typedef const ValueT *const_pointer;
191  typedef unsigned size_type;
192 
193  SparseMultiSet() = default;
194  SparseMultiSet(const SparseMultiSet &) = delete;
195  SparseMultiSet &operator=(const SparseMultiSet &) = delete;
196  ~SparseMultiSet() { free(Sparse); }
197 
198  /// Set the universe size which determines the largest key the set can hold.
199  /// The universe must be sized before any elements can be added.
200  ///
201  /// @param U Universe size. All object keys must be less than U.
202  ///
203  void setUniverse(unsigned U) {
204  // It's not hard to resize the universe on a non-empty set, but it doesn't
205  // seem like a likely use case, so we can add that code when we need it.
206  assert(empty() && "Can only resize universe on an empty map");
207  // Hysteresis prevents needless reallocations.
208  if (U >= Universe/4 && U <= Universe)
209  return;
210  free(Sparse);
211  // The Sparse array doesn't actually need to be initialized, so malloc
212  // would be enough here, but that will cause tools like valgrind to
213  // complain about branching on uninitialized data.
214  Sparse = reinterpret_cast<SparseT*>(calloc(U, sizeof(SparseT)));
215  Universe = U;
216  }
217 
218  /// Our iterators are iterators over the collection of objects that share a
219  /// key.
220  template<typename SMSPtrTy>
221  class iterator_base : public std::iterator<std::bidirectional_iterator_tag,
222  ValueT> {
223  friend class SparseMultiSet;
224 
225  SMSPtrTy SMS;
226  unsigned Idx;
227  unsigned SparseIdx;
228 
229  iterator_base(SMSPtrTy P, unsigned I, unsigned SI)
230  : SMS(P), Idx(I), SparseIdx(SI) { }
231 
232  /// Whether our iterator has fallen outside our dense vector.
233  bool isEnd() const {
234  if (Idx == SMSNode::INVALID)
235  return true;
236 
237  assert(Idx < SMS->Dense.size() && "Out of range, non-INVALID Idx?");
238  return false;
239  }
240 
241  /// Whether our iterator is properly keyed, i.e. the SparseIdx is valid
242  bool isKeyed() const { return SparseIdx < SMS->Universe; }
243 
244  unsigned Prev() const { return SMS->Dense[Idx].Prev; }
245  unsigned Next() const { return SMS->Dense[Idx].Next; }
246 
247  void setPrev(unsigned P) { SMS->Dense[Idx].Prev = P; }
248  void setNext(unsigned N) { SMS->Dense[Idx].Next = N; }
249 
250  public:
251  typedef std::iterator<std::bidirectional_iterator_tag, ValueT> super;
252  typedef typename super::value_type value_type;
253  typedef typename super::difference_type difference_type;
254  typedef typename super::pointer pointer;
255  typedef typename super::reference reference;
256 
258  assert(isKeyed() && SMS->sparseIndex(SMS->Dense[Idx].Data) == SparseIdx &&
259  "Dereferencing iterator of invalid key or index");
260 
261  return SMS->Dense[Idx].Data;
262  }
263  pointer operator->() const { return &operator*(); }
264 
265  /// Comparison operators
266  bool operator==(const iterator_base &RHS) const {
267  // end compares equal
268  if (SMS == RHS.SMS && Idx == RHS.Idx) {
269  assert((isEnd() || SparseIdx == RHS.SparseIdx) &&
270  "Same dense entry, but different keys?");
271  return true;
272  }
273 
274  return false;
275  }
276 
277  bool operator!=(const iterator_base &RHS) const {
278  return !operator==(RHS);
279  }
280 
281  /// Increment and decrement operators
282  iterator_base &operator--() { // predecrement - Back up
283  assert(isKeyed() && "Decrementing an invalid iterator");
284  assert((isEnd() || !SMS->isHead(SMS->Dense[Idx])) &&
285  "Decrementing head of list");
286 
287  // If we're at the end, then issue a new find()
288  if (isEnd())
289  Idx = SMS->findIndex(SparseIdx).Prev();
290  else
291  Idx = Prev();
292 
293  return *this;
294  }
295  iterator_base &operator++() { // preincrement - Advance
296  assert(!isEnd() && isKeyed() && "Incrementing an invalid/end iterator");
297  Idx = Next();
298  return *this;
299  }
300  iterator_base operator--(int) { // postdecrement
301  iterator_base I(*this);
302  --*this;
303  return I;
304  }
305  iterator_base operator++(int) { // postincrement
306  iterator_base I(*this);
307  ++*this;
308  return I;
309  }
310  };
311  typedef iterator_base<SparseMultiSet *> iterator;
312  typedef iterator_base<const SparseMultiSet *> const_iterator;
313 
314  // Convenience types
315  typedef std::pair<iterator, iterator> RangePair;
316 
317  /// Returns an iterator past this container. Note that such an iterator cannot
318  /// be decremented, but will compare equal to other end iterators.
319  iterator end() { return iterator(this, SMSNode::INVALID, SMSNode::INVALID); }
320  const_iterator end() const {
321  return const_iterator(this, SMSNode::INVALID, SMSNode::INVALID);
322  }
323 
324  /// Returns true if the set is empty.
325  ///
326  /// This is not the same as BitVector::empty().
327  ///
328  bool empty() const { return size() == 0; }
329 
330  /// Returns the number of elements in the set.
331  ///
332  /// This is not the same as BitVector::size() which returns the size of the
333  /// universe.
334  ///
335  size_type size() const {
336  assert(NumFree <= Dense.size() && "Out-of-bounds free entries");
337  return Dense.size() - NumFree;
338  }
339 
340  /// Clears the set. This is a very fast constant time operation.
341  ///
342  void clear() {
343  // Sparse does not need to be cleared, see find().
344  Dense.clear();
345  NumFree = 0;
346  FreelistIdx = SMSNode::INVALID;
347  }
348 
349  /// Find an element by its index.
350  ///
351  /// @param Idx A valid index to find.
352  /// @returns An iterator to the element identified by key, or end().
353  ///
354  iterator findIndex(unsigned Idx) {
355  assert(Idx < Universe && "Key out of range");
356  const unsigned Stride = std::numeric_limits<SparseT>::max() + 1u;
357  for (unsigned i = Sparse[Idx], e = Dense.size(); i < e; i += Stride) {
358  const unsigned FoundIdx = sparseIndex(Dense[i]);
359  // Check that we're pointing at the correct entry and that it is the head
360  // of a valid list.
361  if (Idx == FoundIdx && Dense[i].isValid() && isHead(Dense[i]))
362  return iterator(this, i, Idx);
363  // Stride is 0 when SparseT >= unsigned. We don't need to loop.
364  if (!Stride)
365  break;
366  }
367  return end();
368  }
369 
370  /// Find an element by its key.
371  ///
372  /// @param Key A valid key to find.
373  /// @returns An iterator to the element identified by key, or end().
374  ///
375  iterator find(const KeyT &Key) {
376  return findIndex(KeyIndexOf(Key));
377  }
378 
379  const_iterator find(const KeyT &Key) const {
380  iterator I = const_cast<SparseMultiSet*>(this)->findIndex(KeyIndexOf(Key));
381  return const_iterator(I.SMS, I.Idx, KeyIndexOf(Key));
382  }
383 
384  /// Returns the number of elements identified by Key. This will be linear in
385  /// the number of elements of that key.
386  size_type count(const KeyT &Key) const {
387  unsigned Ret = 0;
388  for (const_iterator It = find(Key); It != end(); ++It)
389  ++Ret;
390 
391  return Ret;
392  }
393 
394  /// Returns true if this set contains an element identified by Key.
395  bool contains(const KeyT &Key) const {
396  return find(Key) != end();
397  }
398 
399  /// Return the head and tail of the subset's list, otherwise returns end().
400  iterator getHead(const KeyT &Key) { return find(Key); }
401  iterator getTail(const KeyT &Key) {
402  iterator I = find(Key);
403  if (I != end())
404  I = iterator(this, I.Prev(), KeyIndexOf(Key));
405  return I;
406  }
407 
408  /// The bounds of the range of items sharing Key K. First member is the head
409  /// of the list, and the second member is a decrementable end iterator for
410  /// that key.
411  RangePair equal_range(const KeyT &K) {
412  iterator B = find(K);
413  iterator E = iterator(this, SMSNode::INVALID, B.SparseIdx);
414  return make_pair(B, E);
415  }
416 
417  /// Insert a new element at the tail of the subset list. Returns an iterator
418  /// to the newly added entry.
419  iterator insert(const ValueT &Val) {
420  unsigned Idx = sparseIndex(Val);
421  iterator I = findIndex(Idx);
422 
423  unsigned NodeIdx = addValue(Val, SMSNode::INVALID, SMSNode::INVALID);
424 
425  if (I == end()) {
426  // Make a singleton list
427  Sparse[Idx] = NodeIdx;
428  Dense[NodeIdx].Prev = NodeIdx;
429  return iterator(this, NodeIdx, Idx);
430  }
431 
432  // Stick it at the end.
433  unsigned HeadIdx = I.Idx;
434  unsigned TailIdx = I.Prev();
435  Dense[TailIdx].Next = NodeIdx;
436  Dense[HeadIdx].Prev = NodeIdx;
437  Dense[NodeIdx].Prev = TailIdx;
438 
439  return iterator(this, NodeIdx, Idx);
440  }
441 
442  /// Erases an existing element identified by a valid iterator.
443  ///
444  /// This invalidates iterators pointing at the same entry, but erase() returns
445  /// an iterator pointing to the next element in the subset's list. This makes
446  /// it possible to erase selected elements while iterating over the subset:
447  ///
448  /// tie(I, E) = Set.equal_range(Key);
449  /// while (I != E)
450  /// if (test(*I))
451  /// I = Set.erase(I);
452  /// else
453  /// ++I;
454  ///
455  /// Note that if the last element in the subset list is erased, this will
456  /// return an end iterator which can be decremented to get the new tail (if it
457  /// exists):
458  ///
459  /// tie(B, I) = Set.equal_range(Key);
460  /// for (bool isBegin = B == I; !isBegin; /* empty */) {
461  /// isBegin = (--I) == B;
462  /// if (test(I))
463  /// break;
464  /// I = erase(I);
465  /// }
467  assert(I.isKeyed() && !I.isEnd() && !Dense[I.Idx].isTombstone() &&
468  "erasing invalid/end/tombstone iterator");
469 
470  // First, unlink the node from its list. Then swap the node out with the
471  // dense vector's last entry
472  iterator NextI = unlink(Dense[I.Idx]);
473 
474  // Put in a tombstone.
475  makeTombstone(I.Idx);
476 
477  return NextI;
478  }
479 
480  /// Erase all elements with the given key. This invalidates all
481  /// iterators of that key.
482  void eraseAll(const KeyT &K) {
483  for (iterator I = find(K); I != end(); /* empty */)
484  I = erase(I);
485  }
486 
487 private:
488  /// Unlink the node from its list. Returns the next node in the list.
489  iterator unlink(const SMSNode &N) {
490  if (isSingleton(N)) {
491  // Singleton is already unlinked
492  assert(N.Next == SMSNode::INVALID && "Singleton has next?");
493  return iterator(this, SMSNode::INVALID, ValIndexOf(N.Data));
494  }
495 
496  if (isHead(N)) {
497  // If we're the head, then update the sparse array and our next.
498  Sparse[sparseIndex(N)] = N.Next;
499  Dense[N.Next].Prev = N.Prev;
500  return iterator(this, N.Next, ValIndexOf(N.Data));
501  }
502 
503  if (N.isTail()) {
504  // If we're the tail, then update our head and our previous.
505  findIndex(sparseIndex(N)).setPrev(N.Prev);
506  Dense[N.Prev].Next = N.Next;
507 
508  // Give back an end iterator that can be decremented
509  iterator I(this, N.Prev, ValIndexOf(N.Data));
510  return ++I;
511  }
512 
513  // Otherwise, just drop us
514  Dense[N.Next].Prev = N.Prev;
515  Dense[N.Prev].Next = N.Next;
516  return iterator(this, N.Next, ValIndexOf(N.Data));
517  }
518 };
519 
520 } // end namespace llvm
521 
522 #endif // LLVM_ADT_SPARSEMULTISET_H
iterator end()
Returns an iterator past this container.
void push_back(const T &Elt)
Definition: SmallVector.h:211
iterator insert(const ValueT &Val)
Insert a new element at the tail of the subset list.
bool contains(const KeyT &Key) const
Returns true if this set contains an element identified by Key.
size_t i
size_type size() const
Returns the number of elements in the set.
SparseMultiSet & operator=(const SparseMultiSet &)=delete
iterator_base & operator--()
Increment and decrement operators.
const_iterator find(const KeyT &Key) const
RangePair equal_range(const KeyT &K)
The bounds of the range of items sharing Key K.
iterator_base< SparseMultiSet * > iterator
iterator getHead(const KeyT &Key)
Return the head and tail of the subset's list, otherwise returns end().
const ValueT * const_pointer
static GCRegistry::Add< StatepointGC > D("statepoint-example","an example strategy for statepoint")
iterator getTail(const KeyT &Key)
static GCRegistry::Add< OcamlGC > B("ocaml","ocaml 3.10-compatible GC")
iterator_base< const SparseMultiSet * > const_iterator
static GCRegistry::Add< CoreCLRGC > E("coreclr","CoreCLR-compatible GC")
#define P(N)
void setUniverse(unsigned U)
Set the universe size which determines the largest key the set can hold.
void clear()
Clears the set.
iterator find(const KeyT &Key)
Find an element by its key.
const_iterator end() const
super::difference_type difference_type
void eraseAll(const KeyT &K)
Erase all elements with the given key.
INITIALIZE_PASS(HexagonGenMux,"hexagon-mux","Hexagon generate mux instructions", false, false) void HexagonGenMux I isValid()
iterator findIndex(unsigned Idx)
Find an element by its index.
**iterator erase(iterator I)
SparseSetValFunctor - Helper class for selecting SparseSetValTraits.
Definition: SparseSet.h:67
#define N
LLVM_ATTRIBUTE_ALWAYS_INLINE size_type size() const
Definition: SmallVector.h:135
std::iterator< std::bidirectional_iterator_tag, ValueT > super
bool operator!=(const iterator_base &RHS) const
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
Fast multiset implementation for objects that can be identified by small unsigned keys...
bool empty() const
Returns true if the set is empty.
Our iterators are iterators over the collection of objects that share a key.
const ValueT & const_reference
SparseMultiSet()=default
bool operator==(const iterator_base &RHS) const
Comparison operators.
std::pair< iterator, iterator > RangePair
size_type count(const KeyT &Key) const
Returns the number of elements identified by Key.