Line data Source code
1 : //===--- FileDistance.h - File proximity scoring -----------------*- C++-*-===//
2 : //
3 : // The LLVM Compiler Infrastructure
4 : //
5 : // This file is distributed under the University of Illinois Open Source
6 : // License. See LICENSE.TXT for details.
7 : //
8 : //===----------------------------------------------------------------------===//
9 : //
10 : // This library measures the distance between file paths.
11 : // It's used for ranking symbols, e.g. in code completion.
12 : // |foo/bar.h -> foo/bar.h| = 0.
13 : // |foo/bar.h -> foo/baz.h| < |foo/bar.h -> baz.h|.
14 : // This is an edit-distance, where edits go up or down the directory tree.
15 : // It's not symmetrical, the costs of going up and down may not match.
16 : //
17 : // Dealing with multiple sources:
18 : // In practice we care about the distance from a source file, but files near
19 : // its main-header and #included files are considered "close".
20 : // So we start with a set of (anchor, cost) pairs, and call the distance to a
21 : // path the minimum of `cost + |source -> path|`.
22 : //
23 : // We allow each source to limit the number of up-traversals paths may start
24 : // with. Up-traversals may reach things that are not "semantically near".
25 : //
26 : // Symbol URI schemes:
27 : // Symbol locations may be represented by URIs rather than file paths directly.
28 : // In this case we want to perform distance computations in URI space rather
29 : // than in file-space, without performing redundant conversions.
30 : // Therefore we have a lookup structure that accepts URIs, so that intermediate
31 : // calculations for the same scheme can be reused.
32 : //
33 : // Caveats:
34 : // Assuming up and down traversals each have uniform costs is simplistic.
35 : // Often there are "semantic roots" whose children are almost unrelated.
36 : // (e.g. /usr/include/, or / in an umbrella repository). We ignore this.
37 : //
38 : //===----------------------------------------------------------------------===//
39 :
40 : #ifndef LLVM_CLANG_TOOLS_EXTRA_CLANGD_FILEDISTANCE_H
41 : #define LLVM_CLANG_TOOLS_EXTRA_CLANGD_FILEDISTANCE_H
42 :
43 : #include "URI.h"
44 : #include "llvm/ADT/DenseMap.h"
45 : #include "llvm/ADT/DenseMapInfo.h"
46 : #include "llvm/ADT/SmallString.h"
47 : #include "llvm/ADT/StringRef.h"
48 : #include "llvm/Support/Allocator.h"
49 : #include "llvm/Support/Path.h"
50 : #include "llvm/Support/StringSaver.h"
51 : #include <memory>
52 :
53 : namespace clang {
54 : namespace clangd {
55 :
56 0 : struct FileDistanceOptions {
57 : unsigned UpCost = 2; // |foo/bar.h -> foo|
58 : unsigned DownCost = 1; // |foo -> foo/bar.h|
59 : unsigned IncludeCost = 2; // |foo.cc -> included_header.h|
60 : bool AllowDownTraversalFromRoot = true; // | / -> /a |
61 : };
62 :
63 0 : struct SourceParams {
64 : // Base cost for paths starting at this source.
65 : unsigned Cost = 0;
66 : // Limits the number of upwards traversals allowed from this source.
67 : unsigned MaxUpTraversals = std::numeric_limits<unsigned>::max();
68 : };
69 :
70 : // Supports lookups to find the minimum distance to a file from any source.
71 : // This object should be reused, it memoizes intermediate computations.
72 0 : class FileDistance {
73 : public:
74 : static constexpr unsigned Unreachable = std::numeric_limits<unsigned>::max();
75 : static const llvm::hash_code RootHash;
76 :
77 : FileDistance(llvm::StringMap<SourceParams> Sources,
78 : const FileDistanceOptions &Opts = {});
79 :
80 : // Computes the minimum distance from any source to the file path.
81 : unsigned distance(llvm::StringRef Path);
82 :
83 : private:
84 : // Costs computed so far. Always contains sources and their ancestors.
85 : // We store hash codes only. Collisions are rare and consequences aren't dire.
86 : llvm::DenseMap<llvm::hash_code, unsigned> Cache;
87 : FileDistanceOptions Opts;
88 : };
89 :
90 : // Supports lookups like FileDistance, but the lookup keys are URIs.
91 : // We convert each of the sources to the scheme of the URI and do a FileDistance
92 : // comparison on the bodies.
93 : class URIDistance {
94 : public:
95 : // \p Sources must contain absolute paths, not URIs.
96 0 : URIDistance(llvm::StringMap<SourceParams> Sources,
97 : const FileDistanceOptions &Opts = {})
98 0 : : Sources(Sources), Opts(Opts) {}
99 :
100 : // Computes the minimum distance from any source to the URI.
101 : // Only sources that can be mapped into the URI's scheme are considered.
102 : unsigned distance(llvm::StringRef URI);
103 :
104 : private:
105 : // Returns the FileDistance for a URI scheme, creating it if needed.
106 : FileDistance &forScheme(llvm::StringRef Scheme);
107 :
108 : // We cache the results using the original strings so we can skip URI parsing.
109 : llvm::DenseMap<llvm::hash_code, unsigned> Cache;
110 : llvm::StringMap<SourceParams> Sources;
111 : llvm::StringMap<std::unique_ptr<FileDistance>> ByScheme;
112 : FileDistanceOptions Opts;
113 : };
114 :
115 : /// Support lookups like FileDistance, but the lookup keys are symbol scopes.
116 : /// For example, a scope "na::nb::" is converted to "/na/nb".
117 : class ScopeDistance {
118 : public:
119 : /// QueryScopes[0] is the preferred scope.
120 : ScopeDistance(llvm::ArrayRef<std::string> QueryScopes);
121 :
122 : unsigned distance(llvm::StringRef SymbolScope);
123 :
124 : private:
125 : FileDistance Distance;
126 : };
127 :
128 : } // namespace clangd
129 : } // namespace clang
130 :
131 : #endif // LLVM_CLANG_TOOLS_EXTRA_CLANGD_FILEDISTANCE_H
|