LLVM 22.0.0git
RadixTree.h
Go to the documentation of this file.
1//===-- llvm/ADT/RadixTree.h - Radix Tree implementation --------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//===----------------------------------------------------------------------===//
7//
8// This file implements a Radix Tree.
9//
10//===----------------------------------------------------------------------===//
11
12#ifndef LLVM_ADT_RADIXTREE_H
13#define LLVM_ADT_RADIXTREE_H
14
15#include "llvm/ADT/ADL.h"
16#include "llvm/ADT/STLExtras.h"
17#include "llvm/ADT/iterator.h"
19#include <cassert>
20#include <cstddef>
21#include <iterator>
22#include <limits>
23#include <list>
24#include <utility>
25#include <vector>
26
27namespace llvm {
28
29/// \brief A Radix Tree implementation.
30///
31/// A Radix Tree (also known as a compact prefix tree or radix trie) is a
32/// data structure that stores a dynamic set or associative array where keys
33/// are strings and values are associated with these keys. Unlike a regular
34/// trie, the edges of a radix tree can be labeled with sequences of characters
35/// as well as single characters. This makes radix trees more efficient for
36/// storing sparse data sets, where many nodes in a regular trie would have
37/// only one child.
38///
39/// This implementation supports arbitrary key types that can be iterated over
40/// (e.g., `std::string`, `std::vector<char>`, `ArrayRef<char>`). The key type
41/// must provide `begin()` and `end()` for iteration.
42///
43/// The tree stores `std::pair<const KeyType, T>` as its value type.
44///
45/// Example usage:
46/// \code
47/// llvm::RadixTree<StringRef, int> Tree;
48/// Tree.emplace("apple", 1);
49/// Tree.emplace("grapefruit", 2);
50/// Tree.emplace("grape", 3);
51///
52/// // Find prefixes
53/// for (const auto &[Key, Value] : Tree.find_prefixes("grapefruit juice")) {
54/// // pair will be {"grape", 3}
55/// // pair will be {"grapefruit", 2}
56/// llvm::outs() << Key << ": " << Value << "\n";
57/// }
58///
59/// // Iterate over all elements
60/// for (const auto &[Key, Value] : Tree)
61/// llvm::outs() << Key << ": " << Value << "\n";
62/// \endcode
63///
64/// \note
65/// The `RadixTree` takes ownership of the `KeyType` and `T` objects
66/// inserted into it. When an element is removed or the tree is destroyed,
67/// these objects will be destructed.
68/// However, if `KeyType` is a reference-like type, e.g., StringRef or range,
69/// the user must guarantee that the referenced data has a lifetime longer than
70/// the tree.
71template <typename KeyType, typename T> class RadixTree {
72public:
73 using key_type = KeyType;
74 using mapped_type = T;
75 using value_type = std::pair<const KeyType, mapped_type>;
76
77private:
78 using KeyConstIteratorType =
79 decltype(adl_begin(std::declval<const key_type &>()));
80 using KeyConstIteratorRangeType = iterator_range<KeyConstIteratorType>;
81 using KeyValueType =
83 using ContainerType = std::list<value_type>;
84
85 /// Represents an internal node in the Radix Tree.
86 struct Node {
87 KeyConstIteratorRangeType Key{KeyConstIteratorType{},
88 KeyConstIteratorType{}};
89 std::vector<Node> Children;
90
91 /// An iterator to the value associated with this node.
92 ///
93 /// If this node does not have a value (i.e., it's an internal node that
94 /// only serves as a path to other values), this iterator will be equal
95 /// to default constructed `ContainerType::iterator()`.
96 typename ContainerType::iterator Value;
97
98 /// The first character of the Key. Used for fast child lookup.
99 KeyValueType KeyFront;
100
101 Node() = default;
102 Node(const KeyConstIteratorRangeType &Key)
103 : Key(Key), KeyFront(*Key.begin()) {
104 assert(!Key.empty());
105 }
106
107 Node(Node &&) = default;
108 Node &operator=(Node &&) = default;
109
110 Node(const Node &) = delete;
111 Node &operator=(const Node &) = delete;
112
113 const Node *findChild(const KeyConstIteratorRangeType &Key) const {
114 if (Key.empty())
115 return nullptr;
116 for (const Node &Child : Children) {
117 assert(!Child.Key.empty()); // Only root can be empty.
118 if (Child.KeyFront == *Key.begin())
119 return &Child;
120 }
121 return nullptr;
122 }
123
124 Node *findChild(const KeyConstIteratorRangeType &Query) {
125 const Node *This = this;
126 return const_cast<Node *>(This->findChild(Query));
127 }
128
129 size_t countNodes() const {
130 size_t R = 1;
131 for (const Node &C : Children)
132 R += C.countNodes();
133 return R;
134 }
135
136 ///
137 /// Splits the current node into two.
138 ///
139 /// This function is used when a new key needs to be inserted that shares
140 /// a common prefix with the current node's key, but then diverges.
141 /// The current `Key` is truncated to the common prefix, and a new child
142 /// node is created for the remainder of the original node's `Key`.
143 ///
144 /// \param SplitPoint An iterator pointing to the character in the current
145 /// `Key` where the split should occur.
146 void split(KeyConstIteratorType SplitPoint) {
147 Node Child(make_range(SplitPoint, Key.end()));
148 Key = make_range(Key.begin(), SplitPoint);
149
150 Children.swap(Child.Children);
151 std::swap(Value, Child.Value);
152
153 Children.emplace_back(std::move(Child));
154 }
155 };
156
157 /// Root always corresponds to the empty key, which is the shortest possible
158 /// prefix for everything.
159 Node Root;
160 ContainerType KeyValuePairs;
161
162 /// Finds or creates a new tail or leaf node corresponding to the `Key`.
163 Node &findOrCreate(KeyConstIteratorRangeType Key) {
164 Node *Curr = &Root;
165 if (Key.empty())
166 return *Curr;
167
168 for (;;) {
169 auto [I1, I2] = llvm::mismatch(Key, Curr->Key);
170 Key = make_range(I1, Key.end());
171
172 if (I2 != Curr->Key.end()) {
173 // Match is partial. Either query is too short, or there is mismatching
174 // character. Split either way, and put new node in between of the
175 // current and its children.
176 Curr->split(I2);
177
178 // Split was caused by mismatch, so `findChild` would fail.
179 break;
180 }
181
182 Node *Child = Curr->findChild(Key);
183 if (!Child)
184 break;
185
186 // Move to child with the same first character.
187 Curr = Child;
188 }
189
190 if (Key.empty()) {
191 // The current node completely matches the key, return it.
192 return *Curr;
193 }
194
195 // `Key` is a suffix of original `Key` unmatched by path from the `Root` to
196 // the `Curr`, and we have no candidate in the children to match more.
197 // Create a new one.
198 return Curr->Children.emplace_back(Key);
199 }
200
201 ///
202 /// An iterator for traversing prefixes search results.
203 ///
204 /// This iterator is used by `find_prefixes` to traverse the tree and find
205 /// elements that are prefixes to the given key. It's a forward iterator.
206 ///
207 /// \tparam MappedType The type of the value pointed to by the iterator.
208 /// This will be `value_type` for non-const iterators
209 /// and `const value_type` for const iterators.
210 template <typename MappedType>
211 class IteratorImpl
212 : public iterator_facade_base<IteratorImpl<MappedType>,
213 std::forward_iterator_tag, MappedType> {
214 const Node *Curr = nullptr;
215 KeyConstIteratorRangeType Query{KeyConstIteratorType{},
216 KeyConstIteratorType{}};
217
218 void findNextValid() {
219 while (Curr && Curr->Value == typename ContainerType::iterator())
220 advance();
221 }
222
223 void advance() {
224 assert(Curr);
225 if (Query.empty()) {
226 Curr = nullptr;
227 return;
228 }
229
230 Curr = Curr->findChild(Query);
231 if (!Curr) {
232 Curr = nullptr;
233 return;
234 }
235
236 auto [I1, I2] = llvm::mismatch(Query, Curr->Key);
237 if (I2 != Curr->Key.end()) {
238 Curr = nullptr;
239 return;
240 }
241 Query = make_range(I1, Query.end());
242 }
243
244 friend class RadixTree;
245 IteratorImpl(const Node *C, const KeyConstIteratorRangeType &Q)
246 : Curr(C), Query(Q) {
247 findNextValid();
248 }
249
250 public:
251 IteratorImpl() = default;
252
253 MappedType &operator*() const { return *Curr->Value; }
254
255 IteratorImpl &operator++() {
256 advance();
257 findNextValid();
258 return *this;
259 }
260
261 bool operator==(const IteratorImpl &Other) const {
262 return Curr == Other.Curr;
263 }
264 };
265
266public:
267 RadixTree() = default;
268 RadixTree(RadixTree &&) = default;
270
271 using prefix_iterator = IteratorImpl<value_type>;
272 using const_prefix_iterator = IteratorImpl<const value_type>;
273
274 using iterator = typename ContainerType::iterator;
275 using const_iterator = typename ContainerType::const_iterator;
276
277 /// Returns true if the tree is empty.
278 bool empty() const { return KeyValuePairs.empty(); }
279
280 /// Returns the number of elements in the tree.
281 size_t size() const { return KeyValuePairs.size(); }
282
283 /// Returns the number of nodes in the tree.
284 ///
285 /// This function counts all internal nodes in the tree. It can be useful for
286 /// understanding the memory footprint or complexity of the tree structure.
287 size_t countNodes() const { return Root.countNodes(); }
288
289 /// Returns an iterator to the first element.
290 iterator begin() { return KeyValuePairs.begin(); }
291 const_iterator begin() const { return KeyValuePairs.begin(); }
292
293 /// Returns an iterator to the end of the tree.
294 iterator end() { return KeyValuePairs.end(); }
295 const_iterator end() const { return KeyValuePairs.end(); }
296
297 /// Constructs and inserts a new element into the tree.
298 ///
299 /// This function constructs an element in place within the tree. If an
300 /// element with the same key already exists, the insertion fails and the
301 /// function returns an iterator to the existing element along with `false`.
302 /// Otherwise, the new element is inserted and the function returns an
303 /// iterator to the new element along with `true`.
304 ///
305 /// \param Key The key of the element to construct.
306 /// \param Args Arguments to forward to the constructor of the mapped_type.
307 /// \return A pair consisting of an iterator to the inserted element (or to
308 /// the element that prevented insertion) and a boolean value
309 /// indicating whether the insertion took place.
310 template <typename... Ts>
311 std::pair<iterator, bool> emplace(key_type &&Key, Ts &&...Args) {
312 // We want to make new `Node` to refer key in the container, not the one
313 // from the argument.
314 // FIXME: Determine that we need a new node, before expanding
315 // `KeyValuePairs`.
316 const value_type &NewValue = KeyValuePairs.emplace_front(
317 std::move(Key), T(std::forward<Ts>(Args)...));
318 Node &Node = findOrCreate(NewValue.first);
319 bool HasValue = Node.Value != typename ContainerType::iterator();
320 if (!HasValue)
321 Node.Value = KeyValuePairs.begin();
322 else
323 KeyValuePairs.pop_front();
324 return {Node.Value, !HasValue};
325 }
326
327 ///
328 /// Finds all elements whose keys are prefixes of the given `Key`.
329 ///
330 /// This function returns an iterator range over all elements in the tree
331 /// whose keys are prefixes of the provided `Key`. For example, if the tree
332 /// contains "abcde", "abc", "abcdefgh", and `Key` is "abcde", this function
333 /// would return iterators to "abcde" and "abc".
334 ///
335 /// \param Key The key to search for prefixes of.
336 /// \return An `iterator_range` of `const_prefix_iterator`s, allowing
337 /// iteration over the found prefix elements.
338 /// \note The returned iterators reference the `Key` provided by the caller.
339 /// The caller must ensure that `Key` remains valid for the lifetime
340 /// of the iterators.
342 find_prefixes(const key_type &Key) const {
344 const_prefix_iterator(&Root, KeyConstIteratorRangeType(Key)),
346 }
347};
348
349} // namespace llvm
350
351#endif // LLVM_ADT_RADIXTREE_H
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
bool operator==(const MergedFunctionsInfo &LHS, const MergedFunctionsInfo &RHS)
#define T
This file contains some templates that are useful if you are working with the STL at all.
iterator end()
Returns an iterator to the end of the tree.
Definition RadixTree.h:294
iterator_range< const_prefix_iterator > find_prefixes(const key_type &Key) const
Finds all elements whose keys are prefixes of the given Key.
Definition RadixTree.h:342
const_iterator end() const
Definition RadixTree.h:295
typename ContainerType::const_iterator const_iterator
Definition RadixTree.h:275
RadixTree(RadixTree &&)=default
std::pair< iterator, bool > emplace(key_type &&Key, Ts &&...Args)
Constructs and inserts a new element into the tree.
Definition RadixTree.h:311
RadixTree & operator=(RadixTree &&)=default
std::pair< const KeyType, mapped_type > value_type
Definition RadixTree.h:75
const_iterator begin() const
Definition RadixTree.h:291
typename ContainerType::iterator iterator
Definition RadixTree.h:274
bool empty() const
Returns true if the tree is empty.
Definition RadixTree.h:278
IteratorImpl< const value_type > const_prefix_iterator
Definition RadixTree.h:272
iterator begin()
Returns an iterator to the first element.
Definition RadixTree.h:290
IteratorImpl< value_type > prefix_iterator
Definition RadixTree.h:271
size_t countNodes() const
Returns the number of nodes in the tree.
Definition RadixTree.h:287
RadixTree()=default
size_t size() const
Returns the number of elements in the tree.
Definition RadixTree.h:281
KeyType key_type
Definition RadixTree.h:73
LLVM Value Representation.
Definition Value.h:75
A range adaptor for a pair of iterators.
This provides a very simple, boring adaptor for a begin and end iterator into a range type.
@ C
The default llvm calling convention, compatible with C.
Definition CallingConv.h:34
NodeAddr< NodeBase * > Node
Definition RDFGraph.h:381
This is an optimization pass for GlobalISel generic memory operations.
constexpr auto adl_begin(RangeT &&range) -> decltype(adl_detail::begin_impl(std::forward< RangeT >(range)))
Returns the begin iterator to range using std::begin and function found through Argument-Dependent Lo...
Definition ADL.h:78
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
auto mismatch(R1 &&Range1, R2 &&Range2)
Provide wrappers to std::mismatch which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:2047
detail::ValueMatchesPoly< M > HasValue(M Matcher)
Definition Error.h:221
iterator_range< SplittingIterator > split(StringRef Str, StringRef Separator)
Split the specified string over a separator and return a range-compatible iterable over its partition...
LLVM_ATTRIBUTE_VISIBILITY_DEFAULT AnalysisKey InnerAnalysisManagerProxy< AnalysisManagerT, IRUnitT, ExtraArgTs... >::Key
@ Other
Any other memory.
Definition ModRef.h:68
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition BitVector.h:869