Line data Source code
1 : //===--- Quality.h - Ranking alternatives for ambiguous queries --*- C++-*-===//
2 : //
3 : // The LLVM Compiler Infrastructure
4 : //
5 : // This file is distributed under the University of Illinois Open Source
6 : // License. See LICENSE.TXT for details.
7 : //
8 : //===----------------------------------------------------------------------===//
9 : ///
10 : /// Some operations such as code completion produce a set of candidates.
11 : /// Usually the user can choose between them, but we should put the best options
12 : /// at the top (they're easier to select, and more likely to be seen).
13 : ///
14 : /// This file defines building blocks for ranking candidates.
15 : /// It's used by the features directly and also in the implementation of
16 : /// indexes, as indexes also need to heuristically limit their results.
17 : ///
18 : /// The facilities here are:
19 : /// - retrieving scoring signals from e.g. indexes, AST, CodeCompletionString
20 : /// These are structured in a way that they can be debugged, and are fairly
21 : /// consistent regardless of the source.
22 : /// - compute scores from scoring signals. These are suitable for sorting.
23 : /// - sorting utilities like the TopN container.
24 : /// These could be split up further to isolate dependencies if we care.
25 : ///
26 : //===----------------------------------------------------------------------===//
27 :
28 : #ifndef LLVM_CLANG_TOOLS_EXTRA_CLANGD_QUALITY_H
29 : #define LLVM_CLANG_TOOLS_EXTRA_CLANGD_QUALITY_H
30 :
31 : #include "FileDistance.h"
32 : #include "clang/Sema/CodeCompleteConsumer.h"
33 : #include "llvm/ADT/ArrayRef.h"
34 : #include "llvm/ADT/StringRef.h"
35 : #include <algorithm>
36 : #include <functional>
37 : #include <vector>
38 :
39 : namespace llvm {
40 : class raw_ostream;
41 : }
42 :
43 : namespace clang {
44 : class CodeCompletionResult;
45 :
46 : namespace clangd {
47 :
48 : struct Symbol;
49 : class URIDistance;
50 :
51 : // Signals structs are designed to be aggregated from 0 or more sources.
52 : // A default instance has neutral signals, and sources are merged into it.
53 : // They can be dumped for debugging, and evaluate()d into a score.
54 :
55 : /// Attributes of a symbol that affect how much we like it.
56 : struct SymbolQualitySignals {
57 : bool Deprecated = false;
58 : bool ReservedName = false; // __foo, _Foo are usually implementation details.
59 : // FIXME: make these findable once user types _.
60 : bool ImplementationDetail = false;
61 : unsigned References = 0;
62 :
63 : enum SymbolCategory {
64 : Unknown = 0,
65 : Variable,
66 : Macro,
67 : Type,
68 : Function,
69 : Constructor,
70 : Namespace,
71 : Keyword,
72 : } Category = Unknown;
73 :
74 : void merge(const CodeCompletionResult &SemaCCResult);
75 : void merge(const Symbol &IndexResult);
76 :
77 : // Condense these signals down to a single number, higher is better.
78 : float evaluate() const;
79 : };
80 : llvm::raw_ostream &operator<<(llvm::raw_ostream &,
81 : const SymbolQualitySignals &);
82 :
83 : /// Attributes of a symbol-query pair that affect how much we like it.
84 0 : struct SymbolRelevanceSignals {
85 : /// 0-1+ fuzzy-match score for unqualified name. Must be explicitly assigned.
86 : float NameMatch = 1;
87 : bool Forbidden = false; // Unavailable (e.g const) or inaccessible (private).
88 : /// Whether fixits needs to be applied for that completion or not.
89 : bool NeedsFixIts = false;
90 :
91 : URIDistance *FileProximityMatch = nullptr;
92 : /// These are used to calculate proximity between the index symbol and the
93 : /// query.
94 : llvm::StringRef SymbolURI;
95 : /// FIXME: unify with index proximity score - signals should be
96 : /// source-independent.
97 : /// Proximity between best declaration and the query. [0-1], 1 is closest.
98 : float SemaFileProximityScore = 0;
99 :
100 : // Scope proximity is only considered (both index and sema) when this is set.
101 : ScopeDistance *ScopeProximityMatch = nullptr;
102 : llvm::Optional<llvm::StringRef> SymbolScope;
103 : // A symbol from sema should be accessible from the current scope.
104 : bool SemaSaysInScope = false;
105 :
106 : // An approximate measure of where we expect the symbol to be used.
107 : enum AccessibleScope {
108 : FunctionScope,
109 : ClassScope,
110 : FileScope,
111 : GlobalScope,
112 : } Scope = GlobalScope;
113 :
114 : enum QueryType {
115 : CodeComplete,
116 : Generic,
117 : } Query = Generic;
118 :
119 : CodeCompletionContext::Kind Context = CodeCompletionContext::CCC_Other;
120 :
121 : // Whether symbol is an instance member of a class.
122 : bool IsInstanceMember = false;
123 :
124 : void merge(const CodeCompletionResult &SemaResult);
125 : void merge(const Symbol &IndexResult);
126 :
127 : // Condense these signals down to a single number, higher is better.
128 : float evaluate() const;
129 : };
130 : llvm::raw_ostream &operator<<(llvm::raw_ostream &,
131 : const SymbolRelevanceSignals &);
132 :
133 : /// Combine symbol quality and relevance into a single score.
134 : float evaluateSymbolAndRelevance(float SymbolQuality, float SymbolRelevance);
135 :
136 : /// TopN<T> is a lossy container that preserves only the "best" N elements.
137 : template <typename T, typename Compare = std::greater<T>> class TopN {
138 : public:
139 : using value_type = T;
140 : TopN(size_t N, Compare Greater = Compare())
141 : : N(N), Greater(std::move(Greater)) {}
142 :
143 : // Adds a candidate to the set.
144 : // Returns true if a candidate was dropped to get back under N.
145 : bool push(value_type &&V) {
146 : bool Dropped = false;
147 : if (Heap.size() >= N) {
148 : Dropped = true;
149 : if (N > 0 && Greater(V, Heap.front())) {
150 : std::pop_heap(Heap.begin(), Heap.end(), Greater);
151 : Heap.back() = std::move(V);
152 : std::push_heap(Heap.begin(), Heap.end(), Greater);
153 : }
154 : } else {
155 : Heap.push_back(std::move(V));
156 : std::push_heap(Heap.begin(), Heap.end(), Greater);
157 : }
158 : assert(Heap.size() <= N);
159 : assert(std::is_heap(Heap.begin(), Heap.end(), Greater));
160 : return Dropped;
161 : }
162 :
163 : // Returns candidates from best to worst.
164 : std::vector<value_type> items() && {
165 : std::sort_heap(Heap.begin(), Heap.end(), Greater);
166 : assert(Heap.size() <= N);
167 : return std::move(Heap);
168 : }
169 :
170 : private:
171 : const size_t N;
172 : std::vector<value_type> Heap; // Min-heap, comparator is Greater.
173 : Compare Greater;
174 : };
175 :
176 : /// Returns a string that sorts in the same order as (-Score, Tiebreak), for
177 : /// LSP. (The highest score compares smallest so it sorts at the top).
178 : std::string sortText(float Score, llvm::StringRef Tiebreak = "");
179 :
180 : struct SignatureQualitySignals {
181 : uint32_t NumberOfParameters = 0;
182 : uint32_t NumberOfOptionalParameters = 0;
183 : bool ContainsActiveParameter = false;
184 : CodeCompleteConsumer::OverloadCandidate::CandidateKind Kind =
185 : CodeCompleteConsumer::OverloadCandidate::CandidateKind::CK_Function;
186 : };
187 : llvm::raw_ostream &operator<<(llvm::raw_ostream &,
188 : const SignatureQualitySignals &);
189 :
190 : } // namespace clangd
191 : } // namespace clang
192 :
193 : #endif // LLVM_CLANG_TOOLS_EXTRA_CLANGD_QUALITY_H
|