clang-tools  7.0.0
Index.h
Go to the documentation of this file.
1 //===--- Index.h ------------------------------------------------*- C++-*-===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===---------------------------------------------------------------------===//
9 
10 #ifndef LLVM_CLANG_TOOLS_EXTRA_CLANGD_INDEX_INDEX_H
11 #define LLVM_CLANG_TOOLS_EXTRA_CLANGD_INDEX_INDEX_H
12 
13 #include "clang/Index/IndexSymbol.h"
14 #include "clang/Lex/Lexer.h"
15 #include "llvm/ADT/DenseMap.h"
16 #include "llvm/ADT/DenseSet.h"
17 #include "llvm/ADT/Hashing.h"
18 #include "llvm/ADT/Optional.h"
19 #include "llvm/ADT/StringExtras.h"
20 #include <array>
21 #include <string>
22 
23 namespace clang {
24 namespace clangd {
25 
27  // Specify a position (Line, Column) of symbol. Using Line/Column allows us to
28  // build LSP responses without reading the file content.
29  struct Position {
30  uint32_t Line = 0; // 0-based
31  // Using UTF-16 code units.
32  uint32_t Column = 0; // 0-based
33  bool operator==(const Position& P) const {
34  return Line == P.Line && Column == P.Column;
35  }
36  };
37 
38  // The URI of the source file where a symbol occurs.
39  llvm::StringRef FileURI;
40 
41  /// The symbol range, using half-open range [Start, End).
44 
45  explicit operator bool() const { return !FileURI.empty(); }
46  bool operator==(const SymbolLocation& Loc) const {
47  return std::tie(FileURI, Start, End) ==
48  std::tie(Loc.FileURI, Loc.Start, Loc.End);
49  }
50 };
51 llvm::raw_ostream &operator<<(llvm::raw_ostream &, const SymbolLocation &);
52 
53 // The class identifies a particular C++ symbol (class, function, method, etc).
54 //
55 // As USRs (Unified Symbol Resolution) could be large, especially for functions
56 // with long type arguments, SymbolID is using 160-bits SHA1(USR) values to
57 // guarantee the uniqueness of symbols while using a relatively small amount of
58 // memory (vs storing USRs directly).
59 //
60 // SymbolID can be used as key in the symbol indexes to lookup the symbol.
61 class SymbolID {
62 public:
63  SymbolID() = default;
64  explicit SymbolID(llvm::StringRef USR);
65 
66  bool operator==(const SymbolID &Sym) const {
67  return HashValue == Sym.HashValue;
68  }
69  bool operator<(const SymbolID &Sym) const {
70  return HashValue < Sym.HashValue;
71  }
72 
73  // Returns a 40-bytes hex encoded string.
74  std::string str() const;
75 
76 private:
77  static constexpr unsigned HashByteLength = 20;
78 
79  friend llvm::hash_code hash_value(const SymbolID &ID) {
80  // We already have a good hash, just return the first bytes.
81  static_assert(sizeof(size_t) <= HashByteLength, "size_t longer than SHA1!");
82  size_t Result;
83  memcpy(&Result, ID.HashValue.data(), sizeof(size_t));
84  return llvm::hash_code(Result);
85  }
86  friend llvm::raw_ostream &operator<<(llvm::raw_ostream &OS,
87  const SymbolID &ID);
88  friend void operator>>(llvm::StringRef Str, SymbolID &ID);
89 
90  std::array<uint8_t, HashByteLength> HashValue;
91 };
92 
93 // Write SymbolID into the given stream. SymbolID is encoded as a 40-bytes
94 // hex string.
95 llvm::raw_ostream &operator<<(llvm::raw_ostream &OS, const SymbolID &ID);
96 
97 // Construct SymbolID from a hex string.
98 // The HexStr is required to be a 40-bytes hex string, which is encoded from the
99 // "<<" operator.
100 void operator>>(llvm::StringRef HexStr, SymbolID &ID);
101 
102 } // namespace clangd
103 } // namespace clang
104 namespace llvm {
105 // Support SymbolIDs as DenseMap keys.
106 template <> struct DenseMapInfo<clang::clangd::SymbolID> {
108  static clang::clangd::SymbolID EmptyKey("EMPTYKEY");
109  return EmptyKey;
110  }
112  static clang::clangd::SymbolID TombstoneKey("TOMBSTONEKEY");
113  return TombstoneKey;
114  }
115  static unsigned getHashValue(const clang::clangd::SymbolID &Sym) {
116  return hash_value(Sym);
117  }
118  static bool isEqual(const clang::clangd::SymbolID &LHS,
119  const clang::clangd::SymbolID &RHS) {
120  return LHS == RHS;
121  }
122 };
123 } // namespace llvm
124 namespace clang {
125 namespace clangd {
126 
127 // Describes the source of information about a symbol.
128 // Mainly useful for debugging, e.g. understanding code completion reuslts.
129 // This is a bitfield as information can be combined from several sources.
130 enum class SymbolOrigin : uint8_t {
131  Unknown = 0,
132  AST = 1 << 0, // Directly from the AST (indexes should not set this).
133  Dynamic = 1 << 1, // From the dynamic index of opened files.
134  Static = 1 << 2, // From the static, externally-built index.
135  Merge = 1 << 3, // A non-trivial index merge was performed.
136  // Remaining bits reserved for index implementations.
137 };
139  return static_cast<SymbolOrigin>(static_cast<uint8_t>(A) |
140  static_cast<uint8_t>(B));
141 }
143  return A = A | B;
144 }
146  return static_cast<SymbolOrigin>(static_cast<uint8_t>(A) &
147  static_cast<uint8_t>(B));
148 }
149 raw_ostream &operator<<(raw_ostream &, SymbolOrigin);
150 
151 // The class presents a C++ symbol, e.g. class, function.
152 //
153 // WARNING: Symbols do not own much of their underlying data - typically strings
154 // are owned by a SymbolSlab. They should be treated as non-owning references.
155 // Copies are shallow.
156 // When adding new unowned data fields to Symbol, remember to update:
157 // - SymbolSlab::Builder in Index.cpp, to copy them to the slab's storage.
158 // - mergeSymbol in Merge.cpp, to properly combine two Symbols.
159 //
160 // A fully documented symbol can be split as:
161 // size_type std::map<k, t>::count(const K& key) const
162 // | Return | Scope |Name| Signature |
163 // We split up these components to allow display flexibility later.
164 struct Symbol {
165  // The ID of the symbol.
167  // The symbol information, like symbol kind.
169  // The unqualified name of the symbol, e.g. "bar" (for ns::bar).
170  llvm::StringRef Name;
171  // The containing namespace. e.g. "" (global), "ns::" (top-level namespace).
172  llvm::StringRef Scope;
173  // The location of the symbol's definition, if one was found.
174  // This just covers the symbol name (e.g. without class/function body).
176  // The location of the preferred declaration of the symbol.
177  // This just covers the symbol name.
178  // This may be the same as Definition.
179  //
180  // A C++ symbol may have multiple declarations, and we pick one to prefer.
181  // * For classes, the canonical declaration should be the definition.
182  // * For non-inline functions, the canonical declaration typically appears
183  // in the ".h" file corresponding to the definition.
185  // The number of translation units that reference this symbol from their main
186  // file. This number is only meaningful if aggregated in an index.
187  unsigned References = 0;
188  /// Whether or not this symbol is meant to be used for the code completion.
189  /// See also isIndexedForCodeCompletion().
190  bool IsIndexedForCodeCompletion = false;
191  /// Where this symbol came from. Usually an index provides a constant value.
193  /// A brief description of the symbol that can be appended in the completion
194  /// candidate list. For example, "(X x, Y y) const" is a function signature.
195  llvm::StringRef Signature;
196  /// What to insert when completing this symbol, after the symbol name.
197  /// This is in LSP snippet syntax (e.g. "({$0})" for a no-args function).
198  /// (When snippets are disabled, the symbol name alone is used).
199  llvm::StringRef CompletionSnippetSuffix;
200 
201  /// Optional symbol details that are not required to be set. For example, an
202  /// index fuzzy match can return a large number of symbol candidates, and it
203  /// is preferable to send only core symbol information in the batched results
204  /// and have clients resolve full symbol information for a specific candidate
205  /// if needed.
206  struct Details {
207  /// Documentation including comment for the symbol declaration.
208  llvm::StringRef Documentation;
209  /// Type when this symbol is used in an expression. (Short display form).
210  /// e.g. return type of a function, or type of a variable.
211  llvm::StringRef ReturnType;
212  /// This can be either a URI of the header to be #include'd for this symbol,
213  /// or a literal header quoted with <> or "" that is suitable to be included
214  /// directly. When this is a URI, the exact #include path needs to be
215  /// calculated according to the URI scheme.
216  ///
217  /// This is a canonical include for the symbol and can be different from
218  /// FileURI in the CanonicalDeclaration.
219  llvm::StringRef IncludeHeader;
220  };
221 
222  // Optional details of the symbol.
223  const Details *Detail = nullptr;
224 
225  // FIXME: add all occurrences support.
226  // FIXME: add extra fields for index scoring signals.
227 };
228 llvm::raw_ostream &operator<<(llvm::raw_ostream &OS, const Symbol &S);
229 
230 // Computes query-independent quality score for a Symbol.
231 // This currently falls in the range [1, ln(#indexed documents)].
232 // FIXME: this should probably be split into symbol -> signals
233 // and signals -> score, so it can be reused for Sema completions.
234 double quality(const Symbol &S);
235 
236 // An immutable symbol container that stores a set of symbols.
237 // The container will maintain the lifetime of the symbols.
238 class SymbolSlab {
239 public:
240  using const_iterator = std::vector<Symbol>::const_iterator;
242 
243  SymbolSlab() = default;
244 
245  const_iterator begin() const { return Symbols.begin(); }
246  const_iterator end() const { return Symbols.end(); }
247  const_iterator find(const SymbolID &SymID) const;
248 
249  size_t size() const { return Symbols.size(); }
250  // Estimates the total memory usage.
251  size_t bytes() const {
252  return sizeof(*this) + Arena.getTotalMemory() +
253  Symbols.capacity() * sizeof(Symbol);
254  }
255 
256  // SymbolSlab::Builder is a mutable container that can 'freeze' to SymbolSlab.
257  // The frozen SymbolSlab will use less memory.
258  class Builder {
259  public:
260  // Adds a symbol, overwriting any existing one with the same ID.
261  // This is a deep copy: underlying strings will be owned by the slab.
262  void insert(const Symbol &S);
263 
264  // Returns the symbol with an ID, if it exists. Valid until next insert().
265  const Symbol *find(const SymbolID &ID) {
266  auto I = SymbolIndex.find(ID);
267  return I == SymbolIndex.end() ? nullptr : &Symbols[I->second];
268  }
269 
270  // Consumes the builder to finalize the slab.
271  SymbolSlab build() &&;
272 
273  private:
274  llvm::BumpPtrAllocator Arena;
275  // Intern table for strings. Contents are on the arena.
276  llvm::DenseSet<llvm::StringRef> Strings;
277  std::vector<Symbol> Symbols;
278  // Values are indices into Symbols vector.
279  llvm::DenseMap<SymbolID, size_t> SymbolIndex;
280  };
281 
282 private:
283  SymbolSlab(llvm::BumpPtrAllocator Arena, std::vector<Symbol> Symbols)
284  : Arena(std::move(Arena)), Symbols(std::move(Symbols)) {}
285 
286  llvm::BumpPtrAllocator Arena; // Owns Symbol data that the Symbols do not.
287  std::vector<Symbol> Symbols; // Sorted by SymbolID to allow lookup.
288 };
289 
291  /// \brief A query string for the fuzzy find. This is matched against symbols'
292  /// un-qualified identifiers and should not contain qualifiers like "::".
293  std::string Query;
294  /// \brief If this is non-empty, symbols must be in at least one of the scopes
295  /// (e.g. namespaces) excluding nested scopes. For example, if a scope "xyz::"
296  /// is provided, the matched symbols must be defined in namespace xyz but not
297  /// namespace xyz::abc.
298  ///
299  /// The global scope is "", a top level scope is "foo::", etc.
300  std::vector<std::string> Scopes;
301  /// \brief The number of top candidates to return. The index may choose to
302  /// return more than this, e.g. if it doesn't know which candidates are best.
303  size_t MaxCandidateCount = UINT_MAX;
304  /// If set to true, only symbols for completion support will be considered.
305  bool RestrictForCodeCompletion = false;
306  /// Contextually relevant files (e.g. the file we're code-completing in).
307  /// Paths should be absolute.
308  std::vector<std::string> ProximityPaths;
309 };
310 
312  llvm::DenseSet<SymbolID> IDs;
313 };
314 
315 /// \brief Interface for symbol indexes that can be used for searching or
316 /// matching symbols among a set of symbols based on names or unique IDs.
317 class SymbolIndex {
318 public:
319  virtual ~SymbolIndex() = default;
320 
321  /// \brief Matches symbols in the index fuzzily and applies \p Callback on
322  /// each matched symbol before returning.
323  /// If returned Symbols are used outside Callback, they must be deep-copied!
324  ///
325  /// Returns true if there may be more results (limited by MaxCandidateCount).
326  virtual bool
327  fuzzyFind(const FuzzyFindRequest &Req,
328  llvm::function_ref<void(const Symbol &)> Callback) const = 0;
329 
330  /// Looks up symbols with any of the given symbol IDs and applies \p Callback
331  /// on each matched symbol.
332  /// The returned symbol must be deep-copied if it's used outside Callback.
333  virtual void
334  lookup(const LookupRequest &Req,
335  llvm::function_ref<void(const Symbol &)> Callback) const = 0;
336 
337  // FIXME: add interfaces for more index use cases:
338  // - getAllOccurrences(SymbolID);
339 };
340 
341 } // namespace clangd
342 } // namespace clang
343 #endif // LLVM_CLANG_TOOLS_EXTRA_CLANGD_INDEX_INDEX_H
SourceLocation Loc
&#39;#&#39; location in the include directive
llvm::StringRef Documentation
Documentation including comment for the symbol declaration.
Definition: Index.h:208
size_t bytes() const
Definition: Index.h:251
Optional symbol details that are not required to be set.
Definition: Index.h:206
Some operations such as code completion produce a set of candidates.
const Symbol * find(const SymbolID &ID)
Definition: Index.h:265
llvm::StringRef IncludeHeader
This can be either a URI of the header to be #include&#39;d for this symbol, or a literal header quoted w...
Definition: Index.h:219
friend llvm::hash_code hash_value(const SymbolID &ID)
Definition: Index.h:79
std::vector< Symbol >::const_iterator const_iterator
Definition: Index.h:240
Interface for symbol indexes that can be used for searching or matching symbols among a set of symbol...
Definition: Index.h:317
llvm::DenseSet< SymbolID > IDs
Definition: Index.h:312
SymbolOrigin operator|(SymbolOrigin A, SymbolOrigin B)
Definition: Index.h:138
static clang::clangd::SymbolID getEmptyKey()
Definition: Index.h:107
llvm::unique_function< void(llvm::Expected< T >)> Callback
A Callback<T> is a void function that accepts Expected<T>.
Definition: Function.h:28
llvm::StringRef Scope
Definition: Index.h:172
llvm::StringRef ReturnType
Type when this symbol is used in an expression.
Definition: Index.h:211
std::vector< std::string > Scopes
If this is non-empty, symbols must be in at least one of the scopes (e.g.
Definition: Index.h:300
index::SymbolInfo SymInfo
Definition: Index.h:168
SymbolOrigin operator &(SymbolOrigin A, SymbolOrigin B)
Definition: Index.h:145
SymbolLocation Definition
Definition: Index.h:175
clang::find_all_symbols::SymbolInfo SymbolInfo
llvm::StringRef Signature
A brief description of the symbol that can be appended in the completion candidate list...
Definition: Index.h:195
const_iterator iterator
Definition: Index.h:241
llvm::StringRef FileURI
Definition: Index.h:39
std::string Query
A query string for the fuzzy find.
Definition: Index.h:293
SymbolLocation CanonicalDeclaration
Definition: Index.h:184
bool operator==(const SymbolLocation &Loc) const
Definition: Index.h:46
static bool isEqual(const clang::clangd::SymbolID &LHS, const clang::clangd::SymbolID &RHS)
Definition: Index.h:118
Position Start
The symbol range, using half-open range [Start, End).
Definition: Index.h:42
===– Representation.cpp - ClangDoc Representation --------—*- C++ -*-===//
std::vector< std::string > ProximityPaths
Contextually relevant files (e.g.
Definition: Index.h:308
llvm::StringRef Name
Definition: Index.h:170
const_iterator begin() const
Definition: Index.h:245
SymbolOrigin & operator|=(SymbolOrigin &A, SymbolOrigin B)
Definition: Index.h:142
static clang::clangd::SymbolID getTombstoneKey()
Definition: Index.h:111
void operator>>(StringRef Str, SymbolID &ID)
Definition: Index.cpp:41
bool operator<(const SymbolID &Sym) const
Definition: Index.h:69
static unsigned getHashValue(const clang::clangd::SymbolID &Sym)
Definition: Index.h:115
double quality(const Symbol &S)
Definition: Index.cpp:61
llvm::StringRef CompletionSnippetSuffix
What to insert when completing this symbol, after the symbol name.
Definition: Index.h:199
const_iterator end() const
Definition: Index.h:246
size_t size() const
Definition: Index.h:249
std::array< uint8_t, 20 > SymbolID
raw_ostream & operator<<(raw_ostream &OS, const CodeCompletion &C)
bool operator==(const Position &P) const
Definition: Index.h:33
bool operator==(const SymbolID &Sym) const
Definition: Index.h:66