LLVM 22.0.0git
SparseMultiSet.h
Go to the documentation of this file.
1//===- llvm/ADT/SparseMultiSet.h - Sparse multiset --------------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8///
9/// \file
10/// This file defines the SparseMultiSet class, which adds multiset behavior to
11/// the SparseSet.
12///
13/// A sparse multiset holds a small number of objects identified by integer keys
14/// from a moderately sized universe. The sparse multiset uses more memory than
15/// other containers in order to provide faster operations. Any key can map to
16/// multiple values. A SparseMultiSetNode class is provided, which serves as a
17/// convenient base class for the contents of a SparseMultiSet.
18///
19//===----------------------------------------------------------------------===//
20
21#ifndef LLVM_ADT_SPARSEMULTISET_H
22#define LLVM_ADT_SPARSEMULTISET_H
23
25#include "llvm/ADT/SparseSet.h"
26#include "llvm/ADT/identity.h"
27#include <cassert>
28#include <cstdint>
29#include <cstdlib>
30#include <iterator>
31#include <limits>
32#include <utility>
33
34namespace llvm {
35
36/// Fast multiset implementation for objects that can be identified by small
37/// unsigned keys.
38///
39/// SparseMultiSet allocates memory proportional to the size of the key
40/// universe, so it is not recommended for building composite data structures.
41/// It is useful for algorithms that require a single set with fast operations.
42///
43/// Compared to DenseSet and DenseMap, SparseMultiSet provides constant-time
44/// fast clear() as fast as a vector. The find(), insert(), and erase()
45/// operations are all constant time, and typically faster than a hash table.
46/// The iteration order doesn't depend on numerical key values, it only depends
47/// on the order of insert() and erase() operations. Iteration order is the
48/// insertion order. Iteration is only provided over elements of equivalent
49/// keys, but iterators are bidirectional.
50///
51/// Compared to BitVector, SparseMultiSet<unsigned> uses 8x-40x more memory, but
52/// offers constant-time clear() and size() operations as well as fast iteration
53/// independent on the size of the universe.
54///
55/// SparseMultiSet contains a dense vector holding all the objects and a sparse
56/// array holding indexes into the dense vector. Most of the memory is used by
57/// the sparse array which is the size of the key universe. The SparseT template
58/// parameter provides a space/speed tradeoff for sets holding many elements.
59///
60/// When SparseT is uint32_t, find() only touches up to 3 cache lines, but the
61/// sparse array uses 4 x Universe bytes.
62///
63/// When SparseT is uint8_t (the default), find() touches up to 3+[N/256] cache
64/// lines, but the sparse array is 4x smaller. N is the number of elements in
65/// the set.
66///
67/// For sets that may grow to thousands of elements, SparseT should be set to
68/// uint16_t or uint32_t.
69///
70/// Multiset behavior is provided by providing doubly linked lists for values
71/// that are inlined in the dense vector. SparseMultiSet is a good choice when
72/// one desires a growable number of entries per key, as it will retain the
73/// SparseSet algorithmic properties despite being growable. Thus, it is often a
74/// better choice than a SparseSet of growable containers or a vector of
75/// vectors. SparseMultiSet also keeps iterators valid after erasure (provided
76/// the iterators don't point to the element erased), allowing for more
77/// intuitive and fast removal.
78///
79/// @tparam ValueT The type of objects in the set.
80/// @tparam KeyFunctorT A functor that computes an unsigned index from KeyT.
81/// @tparam SparseT An unsigned integer type. See above.
82///
83template <typename ValueT, typename KeyFunctorT = identity<unsigned>,
84 typename SparseT = uint8_t>
86 static_assert(std::is_unsigned_v<SparseT>,
87 "SparseT must be an unsigned integer type");
88
89 /// The actual data that's stored, as a doubly-linked list implemented via
90 /// indices into the DenseVector. The doubly linked list is implemented
91 /// circular in Prev indices, and INVALID-terminated in Next indices. This
92 /// provides efficient access to list tails. These nodes can also be
93 /// tombstones, in which case they are actually nodes in a single-linked
94 /// freelist of recyclable slots.
95 struct SMSNode {
96 static constexpr unsigned INVALID = ~0U;
97
98 ValueT Data;
99 unsigned Prev;
100 unsigned Next;
101
102 SMSNode(ValueT D, unsigned P, unsigned N) : Data(D), Prev(P), Next(N) {}
103
104 /// List tails have invalid Nexts.
105 bool isTail() const { return Next == INVALID; }
106
107 /// Whether this node is a tombstone node, and thus is in our freelist.
108 bool isTombstone() const { return Prev == INVALID; }
109
110 /// Since the list is circular in Prev, all non-tombstone nodes have a valid
111 /// Prev.
112 bool isValid() const { return Prev != INVALID; }
113 };
114
115 using KeyT = typename KeyFunctorT::argument_type;
116 using DenseT = SmallVector<SMSNode, 8>;
117 DenseT Dense;
118 SparseT *Sparse = nullptr;
119 unsigned Universe = 0;
120 KeyFunctorT KeyIndexOf;
122
123 /// We have a built-in recycler for reusing tombstone slots. This recycler
124 /// puts a singly-linked free list into tombstone slots, allowing us quick
125 /// erasure, iterator preservation, and dense size.
126 unsigned FreelistIdx = SMSNode::INVALID;
127 unsigned NumFree = 0;
128
129 unsigned sparseIndex(const ValueT &Val) const {
130 assert(ValIndexOf(Val) < Universe &&
131 "Invalid key in set. Did object mutate?");
132 return ValIndexOf(Val);
133 }
134 unsigned sparseIndex(const SMSNode &N) const { return sparseIndex(N.Data); }
135
136 /// Whether the given entry is the head of the list. List heads's previous
137 /// pointers are to the tail of the list, allowing for efficient access to the
138 /// list tail. D must be a valid entry node.
139 bool isHead(const SMSNode &D) const {
140 assert(D.isValid() && "Invalid node for head");
141 return Dense[D.Prev].isTail();
142 }
143
144 /// Whether the given entry is a singleton entry, i.e. the only entry with
145 /// that key.
146 bool isSingleton(const SMSNode &N) const {
147 assert(N.isValid() && "Invalid node for singleton");
148 // Is N its own predecessor?
149 return &Dense[N.Prev] == &N;
150 }
151
152 /// Add in the given SMSNode. Uses a free entry in our freelist if
153 /// available. Returns the index of the added node.
154 unsigned addValue(const ValueT &V, unsigned Prev, unsigned Next) {
155 if (NumFree == 0) {
156 Dense.push_back(SMSNode(V, Prev, Next));
157 return Dense.size() - 1;
158 }
159
160 // Peel off a free slot
161 unsigned Idx = FreelistIdx;
162 unsigned NextFree = Dense[Idx].Next;
163 assert(Dense[Idx].isTombstone() && "Non-tombstone free?");
164
165 Dense[Idx] = SMSNode(V, Prev, Next);
166 FreelistIdx = NextFree;
167 --NumFree;
168 return Idx;
169 }
170
171 /// Make the current index a new tombstone. Pushes it onto the freelist.
172 void makeTombstone(unsigned Idx) {
173 Dense[Idx].Prev = SMSNode::INVALID;
174 Dense[Idx].Next = FreelistIdx;
175 FreelistIdx = Idx;
176 ++NumFree;
177 }
178
179public:
180 using value_type = ValueT;
181 using reference = ValueT &;
182 using const_reference = const ValueT &;
183 using pointer = ValueT *;
184 using const_pointer = const ValueT *;
186
187 SparseMultiSet() = default;
190 ~SparseMultiSet() { free(Sparse); }
191
192 /// Set the universe size which determines the largest key the set can hold.
193 /// The universe must be sized before any elements can be added.
194 ///
195 /// @param U Universe size. All object keys must be less than U.
196 ///
197 void setUniverse(unsigned U) {
198 // It's not hard to resize the universe on a non-empty set, but it doesn't
199 // seem like a likely use case, so we can add that code when we need it.
200 assert(empty() && "Can only resize universe on an empty map");
201 // Hysteresis prevents needless reallocations.
202 if (U >= Universe / 4 && U <= Universe)
203 return;
204 free(Sparse);
205 // The Sparse array doesn't actually need to be initialized, so malloc
206 // would be enough here, but that will cause tools like valgrind to
207 // complain about branching on uninitialized data.
208 Sparse = static_cast<SparseT *>(safe_calloc(U, sizeof(SparseT)));
209 Universe = U;
210 }
211
212 /// Our iterators are iterators over the collection of objects that share a
213 /// key.
214 template <typename SMSPtrTy> class iterator_base {
215 friend class SparseMultiSet;
216
217 public:
218 using iterator_category = std::bidirectional_iterator_tag;
219 using value_type = ValueT;
220 using difference_type = std::ptrdiff_t;
223
224 private:
225 SMSPtrTy SMS;
226 unsigned Idx;
227 unsigned SparseIdx;
228
229 iterator_base(SMSPtrTy P, unsigned I, unsigned SI)
230 : SMS(P), Idx(I), SparseIdx(SI) {}
231
232 /// Whether our iterator has fallen outside our dense vector.
233 bool isEnd() const {
234 if (Idx == SMSNode::INVALID)
235 return true;
236
237 assert(Idx < SMS->Dense.size() && "Out of range, non-INVALID Idx?");
238 return false;
239 }
240
241 /// Whether our iterator is properly keyed, i.e. the SparseIdx is valid
242 bool isKeyed() const { return SparseIdx < SMS->Universe; }
243
244 unsigned Prev() const { return SMS->Dense[Idx].Prev; }
245 unsigned Next() const { return SMS->Dense[Idx].Next; }
246
247 void setPrev(unsigned P) { SMS->Dense[Idx].Prev = P; }
248 void setNext(unsigned N) { SMS->Dense[Idx].Next = N; }
249
250 public:
252 assert(isKeyed() && SMS->sparseIndex(SMS->Dense[Idx].Data) == SparseIdx &&
253 "Dereferencing iterator of invalid key or index");
254
255 return SMS->Dense[Idx].Data;
256 }
257 pointer operator->() const { return &operator*(); }
258
259 /// Comparison operators
260 bool operator==(const iterator_base &RHS) const {
261 // end compares equal
262 if (SMS == RHS.SMS && Idx == RHS.Idx) {
263 assert((isEnd() || SparseIdx == RHS.SparseIdx) &&
264 "Same dense entry, but different keys?");
265 return true;
266 }
267
268 return false;
269 }
270
271 bool operator!=(const iterator_base &RHS) const { return !operator==(RHS); }
272
273 /// Increment and decrement operators
274 iterator_base &operator--() { // predecrement - Back up
275 assert(isKeyed() && "Decrementing an invalid iterator");
276 assert((isEnd() || !SMS->isHead(SMS->Dense[Idx])) &&
277 "Decrementing head of list");
278
279 // If we're at the end, then issue a new find()
280 if (isEnd())
281 Idx = SMS->findIndex(SparseIdx).Prev();
282 else
283 Idx = Prev();
284
285 return *this;
286 }
287 iterator_base &operator++() { // preincrement - Advance
288 assert(!isEnd() && isKeyed() && "Incrementing an invalid/end iterator");
289 Idx = Next();
290 return *this;
291 }
292 iterator_base operator--(int) { // postdecrement
293 iterator_base I(*this);
294 --*this;
295 return I;
296 }
297 iterator_base operator++(int) { // postincrement
298 iterator_base I(*this);
299 ++*this;
300 return I;
301 }
302 };
303
306
307 // Convenience types
308 using RangePair = std::pair<iterator, iterator>;
309
310 /// Returns an iterator past this container. Note that such an iterator cannot
311 /// be decremented, but will compare equal to other end iterators.
312 iterator end() { return iterator(this, SMSNode::INVALID, SMSNode::INVALID); }
314 return const_iterator(this, SMSNode::INVALID, SMSNode::INVALID);
315 }
316
317 /// Returns true if the set is empty.
318 ///
319 /// This is not the same as BitVector::empty().
320 ///
321 bool empty() const { return size() == 0; }
322
323 /// Returns the number of elements in the set.
324 ///
325 /// This is not the same as BitVector::size() which returns the size of the
326 /// universe.
327 ///
328 size_type size() const {
329 assert(NumFree <= Dense.size() && "Out-of-bounds free entries");
330 return Dense.size() - NumFree;
331 }
332
333 /// Clears the set. This is a very fast constant time operation.
334 ///
335 void clear() {
336 // Sparse does not need to be cleared, see find().
337 Dense.clear();
338 NumFree = 0;
339 FreelistIdx = SMSNode::INVALID;
340 }
341
342 /// Find an element by its index.
343 ///
344 /// @param Idx A valid index to find.
345 /// @returns An iterator to the element identified by key, or end().
346 ///
347 iterator findIndex(unsigned Idx) {
348 assert(Idx < Universe && "Key out of range");
349 const unsigned Stride = std::numeric_limits<SparseT>::max() + 1u;
350 for (unsigned i = Sparse[Idx], e = Dense.size(); i < e; i += Stride) {
351 const unsigned FoundIdx = sparseIndex(Dense[i]);
352 // Check that we're pointing at the correct entry and that it is the head
353 // of a valid list.
354 if (Idx == FoundIdx && Dense[i].isValid() && isHead(Dense[i]))
355 return iterator(this, i, Idx);
356 // Stride is 0 when SparseT >= unsigned. We don't need to loop.
357 if (!Stride)
358 break;
359 }
360 return end();
361 }
362
363 /// Find an element by its key.
364 ///
365 /// @param Key A valid key to find.
366 /// @returns An iterator to the element identified by key, or end().
367 ///
368 iterator find(const KeyT &Key) { return findIndex(KeyIndexOf(Key)); }
369
370 const_iterator find(const KeyT &Key) const {
371 iterator I = const_cast<SparseMultiSet *>(this)->findIndex(KeyIndexOf(Key));
372 return const_iterator(I.SMS, I.Idx, KeyIndexOf(Key));
373 }
374
375 /// Returns the number of elements identified by Key. This will be linear in
376 /// the number of elements of that key.
377 size_type count(const KeyT &Key) const {
378 unsigned Ret = 0;
379 for (const_iterator It = find(Key); It != end(); ++It)
380 ++Ret;
381
382 return Ret;
383 }
384
385 /// Returns true if this set contains an element identified by Key.
386 bool contains(const KeyT &Key) const { return find(Key) != end(); }
387
388 /// Return the head and tail of the subset's list, otherwise returns end().
389 iterator getHead(const KeyT &Key) { return find(Key); }
390 iterator getTail(const KeyT &Key) {
391 iterator I = find(Key);
392 if (I != end())
393 I = iterator(this, I.Prev(), KeyIndexOf(Key));
394 return I;
395 }
396
397 /// The bounds of the range of items sharing Key K. First member is the head
398 /// of the list, and the second member is a decrementable end iterator for
399 /// that key.
400 RangePair equal_range(const KeyT &K) {
401 iterator B = find(K);
402 iterator E = iterator(this, SMSNode::INVALID, B.SparseIdx);
403 return std::make_pair(B, E);
404 }
405
406 /// Insert a new element at the tail of the subset list. Returns an iterator
407 /// to the newly added entry.
408 iterator insert(const ValueT &Val) {
409 unsigned Idx = sparseIndex(Val);
410 iterator I = findIndex(Idx);
411
412 unsigned NodeIdx = addValue(Val, SMSNode::INVALID, SMSNode::INVALID);
413
414 if (I == end()) {
415 // Make a singleton list
416 Sparse[Idx] = NodeIdx;
417 Dense[NodeIdx].Prev = NodeIdx;
418 return iterator(this, NodeIdx, Idx);
419 }
420
421 // Stick it at the end.
422 unsigned HeadIdx = I.Idx;
423 unsigned TailIdx = I.Prev();
424 Dense[TailIdx].Next = NodeIdx;
425 Dense[HeadIdx].Prev = NodeIdx;
426 Dense[NodeIdx].Prev = TailIdx;
427
428 return iterator(this, NodeIdx, Idx);
429 }
430
431 /// Erases an existing element identified by a valid iterator.
432 ///
433 /// This invalidates iterators pointing at the same entry, but erase() returns
434 /// an iterator pointing to the next element in the subset's list. This makes
435 /// it possible to erase selected elements while iterating over the subset:
436 ///
437 /// tie(I, E) = Set.equal_range(Key);
438 /// while (I != E)
439 /// if (test(*I))
440 /// I = Set.erase(I);
441 /// else
442 /// ++I;
443 ///
444 /// Note that if the last element in the subset list is erased, this will
445 /// return an end iterator which can be decremented to get the new tail (if it
446 /// exists):
447 ///
448 /// tie(B, I) = Set.equal_range(Key);
449 /// for (bool isBegin = B == I; !isBegin; /* empty */) {
450 /// isBegin = (--I) == B;
451 /// if (test(I))
452 /// break;
453 /// I = erase(I);
454 /// }
456 assert(I.isKeyed() && !I.isEnd() && !Dense[I.Idx].isTombstone() &&
457 "erasing invalid/end/tombstone iterator");
458
459 // First, unlink the node from its list. Then swap the node out with the
460 // dense vector's last entry
461 iterator NextI = unlink(Dense[I.Idx]);
462
463 // Put in a tombstone.
464 makeTombstone(I.Idx);
465
466 return NextI;
467 }
468
469 /// Erase all elements with the given key. This invalidates all
470 /// iterators of that key.
471 void eraseAll(const KeyT &K) {
472 for (iterator I = find(K); I != end(); /* empty */)
473 I = erase(I);
474 }
475
476private:
477 /// Unlink the node from its list. Returns the next node in the list.
478 iterator unlink(const SMSNode &N) {
479 if (isSingleton(N)) {
480 // Singleton is already unlinked
481 assert(N.Next == SMSNode::INVALID && "Singleton has next?");
482 return iterator(this, SMSNode::INVALID, ValIndexOf(N.Data));
483 }
484
485 if (isHead(N)) {
486 // If we're the head, then update the sparse array and our next.
487 Sparse[sparseIndex(N)] = N.Next;
488 Dense[N.Next].Prev = N.Prev;
489 return iterator(this, N.Next, ValIndexOf(N.Data));
490 }
491
492 if (N.isTail()) {
493 // If we're the tail, then update our head and our previous.
494 findIndex(sparseIndex(N)).setPrev(N.Prev);
495 Dense[N.Prev].Next = N.Next;
496
497 // Give back an end iterator that can be decremented
498 iterator I(this, N.Prev, ValIndexOf(N.Data));
499 return ++I;
500 }
501
502 // Otherwise, just drop us
503 Dense[N.Next].Prev = N.Prev;
504 Dense[N.Prev].Next = N.Next;
505 return iterator(this, N.Next, ValIndexOf(N.Data));
506 }
507};
508
509} // namespace llvm
510
511#endif // LLVM_ADT_SPARSEMULTISET_H
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
#define I(x, y, z)
Definition MD5.cpp:58
#define P(N)
static bool isValid(const char C)
Returns true if C is a valid mangled character: <0-9a-zA-Z_>.
This file defines the SmallVector class.
This file defines the SparseSet class derived from the version described in Briggs,...
Value * RHS
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Our iterators are iterators over the collection of objects that share a key.
bool operator!=(const iterator_base &RHS) const
std::bidirectional_iterator_tag iterator_category
iterator_base & operator--()
Increment and decrement operators.
bool operator==(const iterator_base &RHS) const
Comparison operators.
iterator find(const KeyT &Key)
Find an element by its key.
bool empty() const
Returns true if the set is empty.
iterator_base< const SparseMultiSet * > const_iterator
bool contains(const KeyT &Key) const
Returns true if this set contains an element identified by Key.
iterator findIndex(unsigned Idx)
Find an element by its index.
void clear()
Clears the set.
iterator erase(iterator I)
Erases an existing element identified by a valid iterator.
size_type size() const
Returns the number of elements in the set.
iterator end()
Returns an iterator past this container.
const_iterator find(const KeyT &Key) const
size_type count(const KeyT &Key) const
Returns the number of elements identified by Key.
iterator insert(const ValueT &Val)
Insert a new element at the tail of the subset list.
SparseMultiSet(const SparseMultiSet &)=delete
const ValueT & const_reference
iterator getHead(const KeyT &Key)
Return the head and tail of the subset's list, otherwise returns end().
const_iterator end() const
iterator getTail(const KeyT &Key)
SparseMultiSet & operator=(const SparseMultiSet &)=delete
void eraseAll(const KeyT &K)
Erase all elements with the given key.
SparseMultiSet()=default
RangePair equal_range(const KeyT &K)
The bounds of the range of items sharing Key K.
const ValueT * const_pointer
iterator_base< SparseMultiSet * > iterator
std::pair< iterator, iterator > RangePair
void setUniverse(unsigned U)
Set the universe size which determines the largest key the set can hold.
This is an optimization pass for GlobalISel generic memory operations.
LLVM_ATTRIBUTE_RETURNS_NONNULL void * safe_calloc(size_t Count, size_t Sz)
Definition MemAlloc.h:38
LLVM_ATTRIBUTE_VISIBILITY_DEFAULT AnalysisKey InnerAnalysisManagerProxy< AnalysisManagerT, IRUnitT, ExtraArgTs... >::Key
FunctionAddr VTableAddr uintptr_t uintptr_t Data
Definition InstrProf.h:189
FunctionAddr VTableAddr Next
Definition InstrProf.h:141
#define N
SparseSetValFunctor - Helper class for selecting SparseSetValTraits.
Definition SparseSet.h:67