clang-tools  7.0.0
Iterator.cpp
Go to the documentation of this file.
1 //===--- Iterator.cpp - Query Symbol Retrieval ------------------*- C++ -*-===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 
10 #include "Iterator.h"
11 #include <algorithm>
12 #include <cassert>
13 #include <numeric>
14 
15 namespace clang {
16 namespace clangd {
17 namespace dex {
18 
19 namespace {
20 
21 /// Implements Iterator over a PostingList. DocumentIterator is the most basic
22 /// iterator: it doesn't have any children (hence it is the leaf of iterator
23 /// tree) and is simply a wrapper around PostingList::const_iterator.
24 class DocumentIterator : public Iterator {
25 public:
26  DocumentIterator(PostingListRef Documents)
27  : Documents(Documents), Index(std::begin(Documents)) {}
28 
29  bool reachedEnd() const override { return Index == std::end(Documents); }
30 
31  /// Advances cursor to the next item.
32  void advance() override {
33  assert(!reachedEnd() && "DocumentIterator can't advance at the end.");
34  ++Index;
35  }
36 
37  /// Applies binary search to advance cursor to the next item with DocID equal
38  /// or higher than the given one.
39  void advanceTo(DocID ID) override {
40  assert(!reachedEnd() && "DocumentIterator can't advance at the end.");
41  Index = std::lower_bound(Index, std::end(Documents), ID);
42  }
43 
44  DocID peek() const override {
45  assert(!reachedEnd() && "DocumentIterator can't call peek() at the end.");
46  return *Index;
47  }
48 
49  llvm::raw_ostream &dump(llvm::raw_ostream &OS) const override {
50  OS << '[';
51  auto Separator = "";
52  for (const auto &ID : Documents) {
53  OS << Separator << ID;
54  Separator = ", ";
55  }
56  OS << ']';
57  return OS;
58  }
59 
60 private:
61  PostingListRef Documents;
62  PostingListRef::const_iterator Index;
63 };
64 
65 /// Implements Iterator over the intersection of other iterators.
66 ///
67 /// AndIterator iterates through common items among all children. It becomes
68 /// exhausted as soon as any child becomes exhausted. After each mutation, the
69 /// iterator restores the invariant: all children must point to the same item.
70 class AndIterator : public Iterator {
71 public:
72  AndIterator(std::vector<std::unique_ptr<Iterator>> AllChildren)
73  : Children(std::move(AllChildren)) {
74  assert(!Children.empty() && "AndIterator should have at least one child.");
75  // Establish invariants.
76  sync();
77  }
78 
79  bool reachedEnd() const override { return ReachedEnd; }
80 
81  /// Advances all children to the next common item.
82  void advance() override {
83  assert(!reachedEnd() && "AndIterator can't call advance() at the end.");
84  Children.front()->advance();
85  sync();
86  }
87 
88  /// Advances all children to the next common item with DocumentID >= ID.
89  void advanceTo(DocID ID) override {
90  assert(!reachedEnd() && "AndIterator can't call advanceTo() at the end.");
91  Children.front()->advanceTo(ID);
92  sync();
93  }
94 
95  DocID peek() const override { return Children.front()->peek(); }
96 
97  llvm::raw_ostream &dump(llvm::raw_ostream &OS) const override {
98  OS << "(& ";
99  auto Separator = "";
100  for (const auto &Child : Children) {
101  OS << Separator << *Child;
102  Separator = " ";
103  }
104  OS << ')';
105  return OS;
106  }
107 
108 private:
109  /// Restores class invariants: each child will point to the same element after
110  /// sync.
111  void sync() {
112  ReachedEnd |= Children.front()->reachedEnd();
113  if (ReachedEnd)
114  return;
115  auto SyncID = Children.front()->peek();
116  // Indicates whether any child needs to be advanced to new SyncID.
117  bool NeedsAdvance = false;
118  do {
119  NeedsAdvance = false;
120  for (auto &Child : Children) {
121  Child->advanceTo(SyncID);
122  ReachedEnd |= Child->reachedEnd();
123  // If any child reaches end And iterator can not match any other items.
124  // In this case, just terminate the process.
125  if (ReachedEnd)
126  return;
127  // If any child goes beyond given ID (i.e. ID is not the common item),
128  // all children should be advanced to the next common item.
129  // FIXME(kbobyrev): This is not a very optimized version; after costs
130  // are introduced, cycle should break whenever ID exceeds current one
131  // and cheapest children should be advanced over again.
132  if (Child->peek() > SyncID) {
133  SyncID = Child->peek();
134  NeedsAdvance = true;
135  }
136  }
137  } while (NeedsAdvance);
138  }
139 
140  /// AndIterator owns its children and ensures that all of them point to the
141  /// same element. As soon as one child gets exhausted, AndIterator can no
142  /// longer advance and has reached its end.
143  std::vector<std::unique_ptr<Iterator>> Children;
144  /// Indicates whether any child is exhausted. It is cheaper to maintain and
145  /// update the field, rather than traversing the whole subtree in each
146  /// reachedEnd() call.
147  bool ReachedEnd = false;
148 };
149 
150 /// Implements Iterator over the union of other iterators.
151 ///
152 /// OrIterator iterates through all items which can be pointed to by at least
153 /// one child. To preserve the sorted order, this iterator always advances the
154 /// child with smallest Child->peek() value. OrIterator becomes exhausted as
155 /// soon as all of its children are exhausted.
156 class OrIterator : public Iterator {
157 public:
158  OrIterator(std::vector<std::unique_ptr<Iterator>> AllChildren)
159  : Children(std::move(AllChildren)) {
160  assert(Children.size() > 0 && "Or Iterator must have at least one child.");
161  }
162 
163  /// Returns true if all children are exhausted.
164  bool reachedEnd() const override {
165  return std::all_of(begin(Children), end(Children),
166  [](const std::unique_ptr<Iterator> &Child) {
167  return Child->reachedEnd();
168  });
169  }
170 
171  /// Moves each child pointing to the smallest DocID to the next item.
172  void advance() override {
173  assert(!reachedEnd() &&
174  "OrIterator must have at least one child to advance().");
175  const auto SmallestID = peek();
176  for (const auto &Child : Children)
177  if (!Child->reachedEnd() && Child->peek() == SmallestID)
178  Child->advance();
179  }
180 
181  /// Advances each child to the next existing element with DocumentID >= ID.
182  void advanceTo(DocID ID) override {
183  assert(!reachedEnd() && "Can't advance iterator after it reached the end.");
184  for (const auto &Child : Children)
185  if (!Child->reachedEnd())
186  Child->advanceTo(ID);
187  }
188 
189  /// Returns the element under cursor of the child with smallest Child->peek()
190  /// value.
191  DocID peek() const override {
192  assert(!reachedEnd() &&
193  "OrIterator must have at least one child to peek().");
194  DocID Result = std::numeric_limits<DocID>::max();
195 
196  for (const auto &Child : Children)
197  if (!Child->reachedEnd())
198  Result = std::min(Result, Child->peek());
199 
200  return Result;
201  }
202 
203  llvm::raw_ostream &dump(llvm::raw_ostream &OS) const override {
204  OS << "(| ";
205  auto Separator = "";
206  for (const auto &Child : Children) {
207  OS << Separator << *Child;
208  Separator = " ";
209  }
210  OS << ')';
211  return OS;
212  }
213 
214 private:
215  // FIXME(kbobyrev): Would storing Children in min-heap be faster?
216  std::vector<std::unique_ptr<Iterator>> Children;
217 };
218 
219 } // end namespace
220 
221 std::vector<DocID> consume(Iterator &It) {
222  std::vector<DocID> Result;
223  for (; !It.reachedEnd(); It.advance())
224  Result.push_back(It.peek());
225  return Result;
226 }
227 
228 std::unique_ptr<Iterator> create(PostingListRef Documents) {
229  return llvm::make_unique<DocumentIterator>(Documents);
230 }
231 
232 std::unique_ptr<Iterator>
233 createAnd(std::vector<std::unique_ptr<Iterator>> Children) {
234  return llvm::make_unique<AndIterator>(move(Children));
235 }
236 
237 std::unique_ptr<Iterator>
238 createOr(std::vector<std::unique_ptr<Iterator>> Children) {
239  return llvm::make_unique<OrIterator>(move(Children));
240 }
241 
242 } // namespace dex
243 } // namespace clangd
244 } // namespace clang
std::vector< DocID > consume(Iterator &It)
Exhausts given iterator and returns all processed DocIDs.
Definition: Iterator.cpp:221
llvm::ArrayRef< DocID > PostingListRef
Immutable reference to PostingList object.
Definition: Iterator.h:52
Iterator is the interface for Query Tree node.
Definition: Iterator.h:60
virtual DocID peek() const =0
Returns the current element this iterator points to.
std::unique_ptr< Iterator > create(PostingListRef Documents)
Returns a document iterator over given PostingList.
Definition: Iterator.cpp:228
uint32_t DocID
Symbol position in the list of all index symbols sorted by a pre-computed symbol quality.
Definition: Iterator.h:46
virtual void advance()=0
Moves to next valid DocID.
std::unique_ptr< Iterator > createOr(std::vector< std::unique_ptr< Iterator >> Children)
Returns OR Iterator which performs the union of the PostingLists of its children. ...
Definition: Iterator.cpp:238
std::unique_ptr< Iterator > createAnd(std::vector< std::unique_ptr< Iterator >> Children)
Returns AND Iterator which performs the intersection of the PostingLists of its children.
Definition: Iterator.cpp:233
===– Representation.cpp - ClangDoc Representation --------—*- C++ -*-===//
virtual bool reachedEnd() const =0
Returns true if all valid DocIDs were processed and hence the iterator is exhausted.