LLVM 20.0.0git
SparseMultiSet.h
Go to the documentation of this file.
1//===- llvm/ADT/SparseMultiSet.h - Sparse multiset --------------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8///
9/// \file
10/// This file defines the SparseMultiSet class, which adds multiset behavior to
11/// the SparseSet.
12///
13/// A sparse multiset holds a small number of objects identified by integer keys
14/// from a moderately sized universe. The sparse multiset uses more memory than
15/// other containers in order to provide faster operations. Any key can map to
16/// multiple values. A SparseMultiSetNode class is provided, which serves as a
17/// convenient base class for the contents of a SparseMultiSet.
18///
19//===----------------------------------------------------------------------===//
20
21#ifndef LLVM_ADT_SPARSEMULTISET_H
22#define LLVM_ADT_SPARSEMULTISET_H
23
24#include "llvm/ADT/identity.h"
26#include "llvm/ADT/SparseSet.h"
27#include <cassert>
28#include <cstdint>
29#include <cstdlib>
30#include <iterator>
31#include <limits>
32#include <utility>
33
34namespace llvm {
35
36/// Fast multiset implementation for objects that can be identified by small
37/// unsigned keys.
38///
39/// SparseMultiSet allocates memory proportional to the size of the key
40/// universe, so it is not recommended for building composite data structures.
41/// It is useful for algorithms that require a single set with fast operations.
42///
43/// Compared to DenseSet and DenseMap, SparseMultiSet provides constant-time
44/// fast clear() as fast as a vector. The find(), insert(), and erase()
45/// operations are all constant time, and typically faster than a hash table.
46/// The iteration order doesn't depend on numerical key values, it only depends
47/// on the order of insert() and erase() operations. Iteration order is the
48/// insertion order. Iteration is only provided over elements of equivalent
49/// keys, but iterators are bidirectional.
50///
51/// Compared to BitVector, SparseMultiSet<unsigned> uses 8x-40x more memory, but
52/// offers constant-time clear() and size() operations as well as fast iteration
53/// independent on the size of the universe.
54///
55/// SparseMultiSet contains a dense vector holding all the objects and a sparse
56/// array holding indexes into the dense vector. Most of the memory is used by
57/// the sparse array which is the size of the key universe. The SparseT template
58/// parameter provides a space/speed tradeoff for sets holding many elements.
59///
60/// When SparseT is uint32_t, find() only touches up to 3 cache lines, but the
61/// sparse array uses 4 x Universe bytes.
62///
63/// When SparseT is uint8_t (the default), find() touches up to 3+[N/256] cache
64/// lines, but the sparse array is 4x smaller. N is the number of elements in
65/// the set.
66///
67/// For sets that may grow to thousands of elements, SparseT should be set to
68/// uint16_t or uint32_t.
69///
70/// Multiset behavior is provided by providing doubly linked lists for values
71/// that are inlined in the dense vector. SparseMultiSet is a good choice when
72/// one desires a growable number of entries per key, as it will retain the
73/// SparseSet algorithmic properties despite being growable. Thus, it is often a
74/// better choice than a SparseSet of growable containers or a vector of
75/// vectors. SparseMultiSet also keeps iterators valid after erasure (provided
76/// the iterators don't point to the element erased), allowing for more
77/// intuitive and fast removal.
78///
79/// @tparam ValueT The type of objects in the set.
80/// @tparam KeyFunctorT A functor that computes an unsigned index from KeyT.
81/// @tparam SparseT An unsigned integer type. See above.
82///
83template<typename ValueT,
84 typename KeyFunctorT = identity<unsigned>,
85 typename SparseT = uint8_t>
87 static_assert(std::is_unsigned_v<SparseT>,
88 "SparseT must be an unsigned integer type");
89
90 /// The actual data that's stored, as a doubly-linked list implemented via
91 /// indices into the DenseVector. The doubly linked list is implemented
92 /// circular in Prev indices, and INVALID-terminated in Next indices. This
93 /// provides efficient access to list tails. These nodes can also be
94 /// tombstones, in which case they are actually nodes in a single-linked
95 /// freelist of recyclable slots.
96 struct SMSNode {
97 static constexpr unsigned INVALID = ~0U;
98
99 ValueT Data;
100 unsigned Prev;
101 unsigned Next;
102
103 SMSNode(ValueT D, unsigned P, unsigned N) : Data(D), Prev(P), Next(N) {}
104
105 /// List tails have invalid Nexts.
106 bool isTail() const {
107 return Next == INVALID;
108 }
109
110 /// Whether this node is a tombstone node, and thus is in our freelist.
111 bool isTombstone() const {
112 return Prev == INVALID;
113 }
114
115 /// Since the list is circular in Prev, all non-tombstone nodes have a valid
116 /// Prev.
117 bool isValid() const { return Prev != INVALID; }
118 };
119
120 using KeyT = typename KeyFunctorT::argument_type;
122 DenseT Dense;
123 SparseT *Sparse = nullptr;
124 unsigned Universe = 0;
125 KeyFunctorT KeyIndexOf;
127
128 /// We have a built-in recycler for reusing tombstone slots. This recycler
129 /// puts a singly-linked free list into tombstone slots, allowing us quick
130 /// erasure, iterator preservation, and dense size.
131 unsigned FreelistIdx = SMSNode::INVALID;
132 unsigned NumFree = 0;
133
134 unsigned sparseIndex(const ValueT &Val) const {
135 assert(ValIndexOf(Val) < Universe &&
136 "Invalid key in set. Did object mutate?");
137 return ValIndexOf(Val);
138 }
139 unsigned sparseIndex(const SMSNode &N) const { return sparseIndex(N.Data); }
140
141 /// Whether the given entry is the head of the list. List heads's previous
142 /// pointers are to the tail of the list, allowing for efficient access to the
143 /// list tail. D must be a valid entry node.
144 bool isHead(const SMSNode &D) const {
145 assert(D.isValid() && "Invalid node for head");
146 return Dense[D.Prev].isTail();
147 }
148
149 /// Whether the given entry is a singleton entry, i.e. the only entry with
150 /// that key.
151 bool isSingleton(const SMSNode &N) const {
152 assert(N.isValid() && "Invalid node for singleton");
153 // Is N its own predecessor?
154 return &Dense[N.Prev] == &N;
155 }
156
157 /// Add in the given SMSNode. Uses a free entry in our freelist if
158 /// available. Returns the index of the added node.
159 unsigned addValue(const ValueT& V, unsigned Prev, unsigned Next) {
160 if (NumFree == 0) {
161 Dense.push_back(SMSNode(V, Prev, Next));
162 return Dense.size() - 1;
163 }
164
165 // Peel off a free slot
166 unsigned Idx = FreelistIdx;
167 unsigned NextFree = Dense[Idx].Next;
168 assert(Dense[Idx].isTombstone() && "Non-tombstone free?");
169
170 Dense[Idx] = SMSNode(V, Prev, Next);
171 FreelistIdx = NextFree;
172 --NumFree;
173 return Idx;
174 }
175
176 /// Make the current index a new tombstone. Pushes it onto the freelist.
177 void makeTombstone(unsigned Idx) {
178 Dense[Idx].Prev = SMSNode::INVALID;
179 Dense[Idx].Next = FreelistIdx;
180 FreelistIdx = Idx;
181 ++NumFree;
182 }
183
184public:
186 using reference = ValueT &;
187 using const_reference = const ValueT &;
188 using pointer = ValueT *;
189 using const_pointer = const ValueT *;
191
192 SparseMultiSet() = default;
195 ~SparseMultiSet() { free(Sparse); }
196
197 /// Set the universe size which determines the largest key the set can hold.
198 /// The universe must be sized before any elements can be added.
199 ///
200 /// @param U Universe size. All object keys must be less than U.
201 ///
202 void setUniverse(unsigned U) {
203 // It's not hard to resize the universe on a non-empty set, but it doesn't
204 // seem like a likely use case, so we can add that code when we need it.
205 assert(empty() && "Can only resize universe on an empty map");
206 // Hysteresis prevents needless reallocations.
207 if (U >= Universe/4 && U <= Universe)
208 return;
209 free(Sparse);
210 // The Sparse array doesn't actually need to be initialized, so malloc
211 // would be enough here, but that will cause tools like valgrind to
212 // complain about branching on uninitialized data.
213 Sparse = static_cast<SparseT*>(safe_calloc(U, sizeof(SparseT)));
214 Universe = U;
215 }
216
217 /// Our iterators are iterators over the collection of objects that share a
218 /// key.
219 template <typename SMSPtrTy> class iterator_base {
220 friend class SparseMultiSet;
221
222 public:
223 using iterator_category = std::bidirectional_iterator_tag;
225 using difference_type = std::ptrdiff_t;
228
229 private:
230 SMSPtrTy SMS;
231 unsigned Idx;
232 unsigned SparseIdx;
233
234 iterator_base(SMSPtrTy P, unsigned I, unsigned SI)
235 : SMS(P), Idx(I), SparseIdx(SI) {}
236
237 /// Whether our iterator has fallen outside our dense vector.
238 bool isEnd() const {
239 if (Idx == SMSNode::INVALID)
240 return true;
241
242 assert(Idx < SMS->Dense.size() && "Out of range, non-INVALID Idx?");
243 return false;
244 }
245
246 /// Whether our iterator is properly keyed, i.e. the SparseIdx is valid
247 bool isKeyed() const { return SparseIdx < SMS->Universe; }
248
249 unsigned Prev() const { return SMS->Dense[Idx].Prev; }
250 unsigned Next() const { return SMS->Dense[Idx].Next; }
251
252 void setPrev(unsigned P) { SMS->Dense[Idx].Prev = P; }
253 void setNext(unsigned N) { SMS->Dense[Idx].Next = N; }
254
255 public:
257 assert(isKeyed() && SMS->sparseIndex(SMS->Dense[Idx].Data) == SparseIdx &&
258 "Dereferencing iterator of invalid key or index");
259
260 return SMS->Dense[Idx].Data;
261 }
262 pointer operator->() const { return &operator*(); }
263
264 /// Comparison operators
265 bool operator==(const iterator_base &RHS) const {
266 // end compares equal
267 if (SMS == RHS.SMS && Idx == RHS.Idx) {
268 assert((isEnd() || SparseIdx == RHS.SparseIdx) &&
269 "Same dense entry, but different keys?");
270 return true;
271 }
272
273 return false;
274 }
275
276 bool operator!=(const iterator_base &RHS) const {
277 return !operator==(RHS);
278 }
279
280 /// Increment and decrement operators
281 iterator_base &operator--() { // predecrement - Back up
282 assert(isKeyed() && "Decrementing an invalid iterator");
283 assert((isEnd() || !SMS->isHead(SMS->Dense[Idx])) &&
284 "Decrementing head of list");
285
286 // If we're at the end, then issue a new find()
287 if (isEnd())
288 Idx = SMS->findIndex(SparseIdx).Prev();
289 else
290 Idx = Prev();
291
292 return *this;
293 }
294 iterator_base &operator++() { // preincrement - Advance
295 assert(!isEnd() && isKeyed() && "Incrementing an invalid/end iterator");
296 Idx = Next();
297 return *this;
298 }
299 iterator_base operator--(int) { // postdecrement
300 iterator_base I(*this);
301 --*this;
302 return I;
303 }
304 iterator_base operator++(int) { // postincrement
305 iterator_base I(*this);
306 ++*this;
307 return I;
308 }
309 };
310
313
314 // Convenience types
315 using RangePair = std::pair<iterator, iterator>;
316
317 /// Returns an iterator past this container. Note that such an iterator cannot
318 /// be decremented, but will compare equal to other end iterators.
319 iterator end() { return iterator(this, SMSNode::INVALID, SMSNode::INVALID); }
321 return const_iterator(this, SMSNode::INVALID, SMSNode::INVALID);
322 }
323
324 /// Returns true if the set is empty.
325 ///
326 /// This is not the same as BitVector::empty().
327 ///
328 bool empty() const { return size() == 0; }
329
330 /// Returns the number of elements in the set.
331 ///
332 /// This is not the same as BitVector::size() which returns the size of the
333 /// universe.
334 ///
335 size_type size() const {
336 assert(NumFree <= Dense.size() && "Out-of-bounds free entries");
337 return Dense.size() - NumFree;
338 }
339
340 /// Clears the set. This is a very fast constant time operation.
341 ///
342 void clear() {
343 // Sparse does not need to be cleared, see find().
344 Dense.clear();
345 NumFree = 0;
346 FreelistIdx = SMSNode::INVALID;
347 }
348
349 /// Find an element by its index.
350 ///
351 /// @param Idx A valid index to find.
352 /// @returns An iterator to the element identified by key, or end().
353 ///
355 assert(Idx < Universe && "Key out of range");
356 const unsigned Stride = std::numeric_limits<SparseT>::max() + 1u;
357 for (unsigned i = Sparse[Idx], e = Dense.size(); i < e; i += Stride) {
358 const unsigned FoundIdx = sparseIndex(Dense[i]);
359 // Check that we're pointing at the correct entry and that it is the head
360 // of a valid list.
361 if (Idx == FoundIdx && Dense[i].isValid() && isHead(Dense[i]))
362 return iterator(this, i, Idx);
363 // Stride is 0 when SparseT >= unsigned. We don't need to loop.
364 if (!Stride)
365 break;
366 }
367 return end();
368 }
369
370 /// Find an element by its key.
371 ///
372 /// @param Key A valid key to find.
373 /// @returns An iterator to the element identified by key, or end().
374 ///
375 iterator find(const KeyT &Key) {
376 return findIndex(KeyIndexOf(Key));
377 }
378
379 const_iterator find(const KeyT &Key) const {
380 iterator I = const_cast<SparseMultiSet*>(this)->findIndex(KeyIndexOf(Key));
381 return const_iterator(I.SMS, I.Idx, KeyIndexOf(Key));
382 }
383
384 /// Returns the number of elements identified by Key. This will be linear in
385 /// the number of elements of that key.
386 size_type count(const KeyT &Key) const {
387 unsigned Ret = 0;
388 for (const_iterator It = find(Key); It != end(); ++It)
389 ++Ret;
390
391 return Ret;
392 }
393
394 /// Returns true if this set contains an element identified by Key.
395 bool contains(const KeyT &Key) const {
396 return find(Key) != end();
397 }
398
399 /// Return the head and tail of the subset's list, otherwise returns end().
400 iterator getHead(const KeyT &Key) { return find(Key); }
401 iterator getTail(const KeyT &Key) {
402 iterator I = find(Key);
403 if (I != end())
404 I = iterator(this, I.Prev(), KeyIndexOf(Key));
405 return I;
406 }
407
408 /// The bounds of the range of items sharing Key K. First member is the head
409 /// of the list, and the second member is a decrementable end iterator for
410 /// that key.
411 RangePair equal_range(const KeyT &K) {
412 iterator B = find(K);
413 iterator E = iterator(this, SMSNode::INVALID, B.SparseIdx);
414 return std::make_pair(B, E);
415 }
416
417 /// Insert a new element at the tail of the subset list. Returns an iterator
418 /// to the newly added entry.
419 iterator insert(const ValueT &Val) {
420 unsigned Idx = sparseIndex(Val);
422
423 unsigned NodeIdx = addValue(Val, SMSNode::INVALID, SMSNode::INVALID);
424
425 if (I == end()) {
426 // Make a singleton list
427 Sparse[Idx] = NodeIdx;
428 Dense[NodeIdx].Prev = NodeIdx;
429 return iterator(this, NodeIdx, Idx);
430 }
431
432 // Stick it at the end.
433 unsigned HeadIdx = I.Idx;
434 unsigned TailIdx = I.Prev();
435 Dense[TailIdx].Next = NodeIdx;
436 Dense[HeadIdx].Prev = NodeIdx;
437 Dense[NodeIdx].Prev = TailIdx;
438
439 return iterator(this, NodeIdx, Idx);
440 }
441
442 /// Erases an existing element identified by a valid iterator.
443 ///
444 /// This invalidates iterators pointing at the same entry, but erase() returns
445 /// an iterator pointing to the next element in the subset's list. This makes
446 /// it possible to erase selected elements while iterating over the subset:
447 ///
448 /// tie(I, E) = Set.equal_range(Key);
449 /// while (I != E)
450 /// if (test(*I))
451 /// I = Set.erase(I);
452 /// else
453 /// ++I;
454 ///
455 /// Note that if the last element in the subset list is erased, this will
456 /// return an end iterator which can be decremented to get the new tail (if it
457 /// exists):
458 ///
459 /// tie(B, I) = Set.equal_range(Key);
460 /// for (bool isBegin = B == I; !isBegin; /* empty */) {
461 /// isBegin = (--I) == B;
462 /// if (test(I))
463 /// break;
464 /// I = erase(I);
465 /// }
467 assert(I.isKeyed() && !I.isEnd() && !Dense[I.Idx].isTombstone() &&
468 "erasing invalid/end/tombstone iterator");
469
470 // First, unlink the node from its list. Then swap the node out with the
471 // dense vector's last entry
472 iterator NextI = unlink(Dense[I.Idx]);
473
474 // Put in a tombstone.
475 makeTombstone(I.Idx);
476
477 return NextI;
478 }
479
480 /// Erase all elements with the given key. This invalidates all
481 /// iterators of that key.
482 void eraseAll(const KeyT &K) {
483 for (iterator I = find(K); I != end(); /* empty */)
484 I = erase(I);
485 }
486
487private:
488 /// Unlink the node from its list. Returns the next node in the list.
489 iterator unlink(const SMSNode &N) {
490 if (isSingleton(N)) {
491 // Singleton is already unlinked
492 assert(N.Next == SMSNode::INVALID && "Singleton has next?");
493 return iterator(this, SMSNode::INVALID, ValIndexOf(N.Data));
494 }
495
496 if (isHead(N)) {
497 // If we're the head, then update the sparse array and our next.
498 Sparse[sparseIndex(N)] = N.Next;
499 Dense[N.Next].Prev = N.Prev;
500 return iterator(this, N.Next, ValIndexOf(N.Data));
501 }
502
503 if (N.isTail()) {
504 // If we're the tail, then update our head and our previous.
505 findIndex(sparseIndex(N)).setPrev(N.Prev);
506 Dense[N.Prev].Next = N.Next;
507
508 // Give back an end iterator that can be decremented
509 iterator I(this, N.Prev, ValIndexOf(N.Data));
510 return ++I;
511 }
512
513 // Otherwise, just drop us
514 Dense[N.Next].Prev = N.Prev;
515 Dense[N.Prev].Next = N.Next;
516 return iterator(this, N.Next, ValIndexOf(N.Data));
517 }
518};
519
520} // end namespace llvm
521
522#endif // LLVM_ADT_SPARSEMULTISET_H
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
#define I(x, y, z)
Definition: MD5.cpp:58
#define P(N)
StandardInstrumentations SI(Mod->getContext(), Debug, VerifyEach)
static bool isValid(const char C)
Returns true if C is a valid mangled character: <0-9a-zA-Z_>.
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
This file defines the SmallVector class.
This file defines the SparseSet class derived from the version described in Briggs,...
Value * RHS
Our iterators are iterators over the collection of objects that share a key.
bool operator!=(const iterator_base &RHS) const
std::bidirectional_iterator_tag iterator_category
iterator_base & operator--()
Increment and decrement operators.
bool operator==(const iterator_base &RHS) const
Comparison operators.
Fast multiset implementation for objects that can be identified by small unsigned keys.
iterator find(const KeyT &Key)
Find an element by its key.
bool empty() const
Returns true if the set is empty.
iterator_base< const SparseMultiSet * > const_iterator
bool contains(const KeyT &Key) const
Returns true if this set contains an element identified by Key.
iterator findIndex(unsigned Idx)
Find an element by its index.
void clear()
Clears the set.
iterator erase(iterator I)
Erases an existing element identified by a valid iterator.
size_type size() const
Returns the number of elements in the set.
iterator end()
Returns an iterator past this container.
const_iterator find(const KeyT &Key) const
size_type count(const KeyT &Key) const
Returns the number of elements identified by Key.
iterator insert(const ValueT &Val)
Insert a new element at the tail of the subset list.
SparseMultiSet(const SparseMultiSet &)=delete
iterator getHead(const KeyT &Key)
Return the head and tail of the subset's list, otherwise returns end().
const_iterator end() const
iterator getTail(const KeyT &Key)
SparseMultiSet & operator=(const SparseMultiSet &)=delete
void eraseAll(const KeyT &K)
Erase all elements with the given key.
SparseMultiSet()=default
RangePair equal_range(const KeyT &K)
The bounds of the range of items sharing Key K.
iterator_base< SparseMultiSet * > iterator
std::pair< iterator, iterator > RangePair
void setUniverse(unsigned U)
Set the universe size which determines the largest key the set can hold.
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
LLVM_ATTRIBUTE_RETURNS_NONNULL void * safe_calloc(size_t Count, size_t Sz)
Definition: MemAlloc.h:38
#define N
SparseSetValFunctor - Helper class for selecting SparseSetValTraits.
Definition: SparseSet.h:68