Line data Source code
1 : //===--- Dex.h - Dex Symbol Index Implementation ----------------*- C++ -*-===//
2 : //
3 : // The LLVM Compiler Infrastructure
4 : //
5 : // This file is distributed under the University of Illinois Open Source
6 : // License. See LICENSE.TXT for details.
7 : //
8 : //===----------------------------------------------------------------------===//
9 : ///
10 : /// \file
11 : /// This defines Dex - a symbol index implementation based on query iterators
12 : /// over symbol tokens, such as fuzzy matching trigrams, scopes, types, etc.
13 : /// While consuming more memory and having longer build stage due to
14 : /// preprocessing, Dex will have substantially lower latency. It will also allow
15 : /// efficient symbol searching which is crucial for operations like code
16 : /// completion, and can be very important for a number of different code
17 : /// transformations which will be eventually supported by Clangd.
18 : ///
19 : //===----------------------------------------------------------------------===//
20 :
21 : #ifndef LLVM_CLANG_TOOLS_EXTRA_CLANGD_INDEX_DEX_DEX_H
22 : #define LLVM_CLANG_TOOLS_EXTRA_CLANGD_INDEX_DEX_DEX_H
23 :
24 : #include "Iterator.h"
25 : #include "PostingList.h"
26 : #include "Token.h"
27 : #include "Trigram.h"
28 : #include "index/Index.h"
29 : #include "index/MemIndex.h"
30 : #include "index/SymbolCollector.h"
31 :
32 : namespace clang {
33 : namespace clangd {
34 : namespace dex {
35 :
36 : /// In-memory Dex trigram-based index implementation.
37 : // FIXME(kbobyrev): Introduce serialization and deserialization of the symbol
38 : // index so that it can be loaded from the disk. Since static index is not
39 : // changed frequently, it's safe to assume that it has to be built only once
40 : // (when the clangd process starts). Therefore, it can be easier to store built
41 : // index on disk and then load it if available.
42 : class Dex : public SymbolIndex {
43 : public:
44 : // All data must outlive this index.
45 : template <typename SymbolRange, typename RefsRange>
46 0 : Dex(SymbolRange &&Symbols, RefsRange &&Refs,
47 : llvm::ArrayRef<std::string> Schemes)
48 0 : : Corpus(0), URISchemes(Schemes) {
49 : // If Schemes don't contain any items, fall back to SymbolCollector's
50 : // default URI schemes.
51 0 : if (URISchemes.empty()) {
52 0 : SymbolCollector::Options Opts;
53 0 : URISchemes = Opts.URISchemes;
54 : }
55 : llvm::DenseSet<SymbolID> SeenIDs;
56 0 : for (auto &&Sym : Symbols)
57 0 : if (SeenIDs.insert(Sym.ID).second)
58 0 : this->Symbols.push_back(&Sym);
59 0 : for (auto &&Ref : Refs)
60 0 : this->Refs.try_emplace(Ref.first, Ref.second);
61 0 : buildIndex();
62 0 : }
63 0 : // Symbols and Refs are owned by BackingData, Index takes ownership.
64 : template <typename SymbolRange, typename RefsRange, typename Payload>
65 0 : Dex(SymbolRange &&Symbols, RefsRange &&Refs, Payload &&BackingData,
66 : size_t BackingDataSize, llvm::ArrayRef<std::string> URISchemes)
67 : : Dex(std::forward<SymbolRange>(Symbols), std::forward<RefsRange>(Refs),
68 0 : URISchemes) {
69 0 : KeepAlive = std::shared_ptr<void>(
70 0 : std::make_shared<Payload>(std::move(BackingData)), nullptr);
71 : this->BackingDataSize = BackingDataSize;
72 : }
73 0 :
74 0 : /// Builds an index from slabs. The index takes ownership of the slab.
75 0 : static std::unique_ptr<SymbolIndex>
76 0 : build(SymbolSlab, RefSlab, llvm::ArrayRef<std::string> URISchemes);
77 0 :
78 0 : bool
79 0 : fuzzyFind(const FuzzyFindRequest &Req,
80 0 : llvm::function_ref<void(const Symbol &)> Callback) const override;
81 :
82 0 : void lookup(const LookupRequest &Req,
83 : llvm::function_ref<void(const Symbol &)> Callback) const override;
84 :
85 0 : void refs(const RefsRequest &Req,
86 0 : llvm::function_ref<void(const Ref &)> Callback) const override;
87 0 :
88 : size_t estimateMemoryUsage() const override;
89 :
90 0 : private:
91 0 : void buildIndex();
92 0 : std::unique_ptr<Iterator> iterator(const Token &Tok) const;
93 0 :
94 0 : /// Stores symbols sorted in the descending order of symbol quality..
95 0 : std::vector<const Symbol *> Symbols;
96 0 : /// SymbolQuality[I] is the quality of Symbols[I].
97 : std::vector<float> SymbolQuality;
98 : llvm::DenseMap<SymbolID, const Symbol *> LookupTable;
99 : /// Inverted index is a mapping from the search token to the posting list,
100 : /// which contains all items which can be characterized by such search token.
101 : /// For example, if the search token is scope "std::", the corresponding
102 : /// posting list would contain all indices of symbols defined in namespace
103 : /// std. Inverted index is used to retrieve posting lists which are processed
104 : /// during the fuzzyFind process.
105 : llvm::DenseMap<Token, PostingList> InvertedIndex;
106 : dex::Corpus Corpus;
107 : llvm::DenseMap<SymbolID, llvm::ArrayRef<Ref>> Refs;
108 : std::shared_ptr<void> KeepAlive; // poor man's move-only std::any
109 : // Size of memory retained by KeepAlive.
110 : size_t BackingDataSize = 0;
111 :
112 : std::vector<std::string> URISchemes;
113 : };
114 :
115 : /// Returns Search Token for a number of parent directories of given Path.
116 : /// Should be used within the index build process.
117 : ///
118 : /// This function is exposed for testing only.
119 : std::vector<std::string> generateProximityURIs(llvm::StringRef URIPath);
120 :
121 : } // namespace dex
122 : } // namespace clangd
123 : } // namespace clang
124 :
125 : #endif // LLVM_CLANG_TOOLS_EXTRA_CLANGD_INDEX_DEX_DEX_H
|