Line data Source code
1 : //===--- Token.h - Symbol Search primitive ----------------------*- C++ -*-===//
2 : //
3 : // The LLVM Compiler Infrastructure
4 : //
5 : // This file is distributed under the University of Illinois Open Source
6 : // License. See LICENSE.TXT for details.
7 : //
8 : //===----------------------------------------------------------------------===//
9 : ///
10 : /// \file
11 : /// Token objects represent a characteristic of a symbol, which can be used to
12 : /// perform efficient search. Tokens are keys for inverted index which are
13 : /// mapped to the corresponding posting lists.
14 : ///
15 : /// The symbol std::cout might have the tokens:
16 : /// * Scope "std::"
17 : /// * Trigram "cou"
18 : /// * Trigram "out"
19 : /// * Type "std::ostream"
20 : ///
21 : //===----------------------------------------------------------------------===//
22 :
23 : #ifndef LLVM_CLANG_TOOLS_EXTRA_CLANGD_DEX_TOKEN_H
24 : #define LLVM_CLANG_TOOLS_EXTRA_CLANGD_DEX_TOKEN_H
25 :
26 : #include "index/Index.h"
27 : #include "llvm/ADT/DenseMap.h"
28 : #include "llvm/Support/raw_ostream.h"
29 : #include <string>
30 : #include <vector>
31 :
32 : namespace clang {
33 : namespace clangd {
34 : namespace dex {
35 :
36 : /// A Token represents an attribute of a symbol, such as a particular trigram
37 : /// present in the name (used for fuzzy search).
38 : ///
39 : /// Tokens can be used to perform more sophisticated search queries by
40 : /// constructing complex iterator trees.
41 0 : struct Token {
42 : /// Kind specifies Token type which defines semantics for the internal
43 : /// representation. Each Kind has different representation stored in Data
44 : /// field.
45 : // FIXME(kbobyrev): Storing Data hash would be more efficient than storing raw
46 : // strings. For example, PathURI store URIs of each directory and its parents,
47 : // which induces a lot of overhead because these paths tend to be long and
48 : // each parent directory is a prefix.
49 : enum class Kind {
50 : /// Represents trigram used for fuzzy search of unqualified symbol names.
51 : ///
52 : /// Data contains 3 bytes with trigram contents.
53 : Trigram,
54 : /// Scope primitives, e.g. "symbol belongs to namespace foo::bar".
55 : ///
56 : /// Data stroes full scope name, e.g. "foo::bar::baz::" or "" (for global
57 : /// scope).
58 : Scope,
59 : /// Path Proximity URI to symbol declaration.
60 : ///
61 : /// Data stores path URI of symbol declaration file or its parent.
62 : ///
63 : /// Example: "file:///path/to/clang-tools-extra/clangd/index/SymbolIndex.h"
64 : /// and some amount of its parents.
65 : ProximityURI,
66 : /// Internal Token type for invalid/special tokens, e.g. empty tokens for
67 : /// llvm::DenseMap.
68 : Sentinel,
69 : /// FIXME(kbobyrev): Add other Token Kinds
70 : /// * Type with qualified type name or its USR
71 : };
72 :
73 : Token(Kind TokenKind, llvm::StringRef Data)
74 0 : : Data(Data), TokenKind(TokenKind) {}
75 :
76 : bool operator==(const Token &Other) const {
77 0 : return TokenKind == Other.TokenKind && Data == Other.Data;
78 : }
79 :
80 : /// Representation which is unique among Token with the same Kind.
81 : std::string Data;
82 : Kind TokenKind;
83 :
84 0 : friend llvm::raw_ostream &operator<<(llvm::raw_ostream &OS, const Token &T) {
85 0 : switch (T.TokenKind) {
86 0 : case Kind::Trigram:
87 0 : OS << "T=";
88 0 : break;
89 0 : case Kind::Scope:
90 0 : OS << "S=";
91 0 : break;
92 0 : case Kind::ProximityURI:
93 0 : OS << "U=";
94 0 : break;
95 0 : case Kind::Sentinel:
96 0 : OS << "?=";
97 0 : break;
98 : }
99 0 : return OS << T.Data;
100 : }
101 :
102 : private:
103 : friend llvm::hash_code hash_value(const Token &Token) {
104 : return llvm::hash_combine(static_cast<int>(Token.TokenKind), Token.Data);
105 : }
106 : };
107 :
108 : } // namespace dex
109 : } // namespace clangd
110 : } // namespace clang
111 :
112 : namespace llvm {
113 :
114 : // Support Tokens as DenseMap keys.
115 : template <> struct DenseMapInfo<clang::clangd::dex::Token> {
116 : static inline clang::clangd::dex::Token getEmptyKey() {
117 : return {clang::clangd::dex::Token::Kind::Sentinel, "EmptyKey"};
118 : }
119 :
120 : static inline clang::clangd::dex::Token getTombstoneKey() {
121 : return {clang::clangd::dex::Token::Kind::Sentinel, "TombstoneKey"};
122 : }
123 :
124 : static unsigned getHashValue(const clang::clangd::dex::Token &Tag) {
125 : return hash_value(Tag);
126 : }
127 :
128 : static bool isEqual(const clang::clangd::dex::Token &LHS,
129 : const clang::clangd::dex::Token &RHS) {
130 : return LHS == RHS;
131 : }
132 : };
133 :
134 : } // namespace llvm
135 :
136 : #endif // LLVM_CLANG_TOOLS_EXTRA_CLANGD_DEX_TOKEN_H
|