clang-tools  7.0.0
FileDistance.cpp
Go to the documentation of this file.
1 //===--- FileDistance.cpp - File contents container -------------*- C++ -*-===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // The FileDistance structure allows calculating the minimum distance to paths
11 // in a single tree.
12 // We simply walk up the path's ancestors until we find a node whose cost is
13 // known, and add the cost of walking back down. Initialization ensures this
14 // gives the correct path to the roots.
15 // We cache the results, so that the runtime is O(|A|), where A is the set of
16 // all distinct ancestors of visited paths.
17 //
18 // Example after initialization with /=2, /bar=0, DownCost = 1:
19 // / = 2
20 // /bar = 0
21 //
22 // After querying /foo/bar and /bar/foo:
23 // / = 2
24 // /bar = 0
25 // /bar/foo = 1
26 // /foo = 3
27 // /foo/bar = 4
28 //
29 // URIDistance creates FileDistance lazily for each URI scheme encountered. In
30 // practice this is a small constant factor.
31 //
32 //===-------------------------------------------------------------------------//
33 
34 #include "FileDistance.h"
35 #include "Logger.h"
36 #include "llvm/ADT/STLExtras.h"
37 #include <queue>
38 
39 namespace clang {
40 namespace clangd {
41 using namespace llvm;
42 
43 // Convert a path into the canonical form.
44 // Canonical form is either "/", or "/segment" * N:
45 // C:\foo\bar --> /c:/foo/bar
46 // /foo/ --> /foo
47 // a/b/c --> /a/b/c
48 static SmallString<128> canonicalize(StringRef Path) {
49  SmallString<128> Result = Path.rtrim('/');
50  native(Result, sys::path::Style::posix);
51  if (Result.empty() || Result.front() != '/')
52  Result.insert(Result.begin(), '/');
53  return Result;
54 }
55 
56 constexpr const unsigned FileDistance::kUnreachable;
57 
59  const FileDistanceOptions &Opts)
60  : Opts(Opts) {
61  llvm::DenseMap<hash_code, SmallVector<hash_code, 4>> DownEdges;
62  // Compute the best distance following only up edges.
63  // Keep track of down edges, in case we can use them to improve on this.
64  for (const auto &S : Sources) {
65  auto Canonical = canonicalize(S.getKey());
66  dlog("Source {0} = {1}, MaxUp = {2}", Canonical, S.second.Cost,
67  S.second.MaxUpTraversals);
68  // Walk up to ancestors of this source, assigning cost.
69  StringRef Rest = Canonical;
70  llvm::hash_code Hash = hash_value(Rest);
71  for (unsigned I = 0; !Rest.empty(); ++I) {
72  Rest = parent_path(Rest, sys::path::Style::posix);
73  auto NextHash = hash_value(Rest);
74  auto &Down = DownEdges[NextHash];
75  if (std::find(Down.begin(), Down.end(), Hash) == Down.end())
76  DownEdges[NextHash].push_back(Hash);
77  // We can't just break after MaxUpTraversals, must still set DownEdges.
78  if (I > S.getValue().MaxUpTraversals) {
79  if (Cache.find(Hash) != Cache.end())
80  break;
81  } else {
82  unsigned Cost = S.getValue().Cost + I * Opts.UpCost;
83  auto R = Cache.try_emplace(Hash, Cost);
84  if (!R.second) {
85  if (Cost < R.first->second) {
86  R.first->second = Cost;
87  } else {
88  // If we're not the best way to get to this path, stop assigning.
89  break;
90  }
91  }
92  }
93  Hash = NextHash;
94  }
95  }
96  // Now propagate scores parent -> child if that's an improvement.
97  // BFS ensures we propagate down chains (must visit parents before children).
98  std::queue<hash_code> Next;
99  for (auto Child : DownEdges.lookup(hash_value(llvm::StringRef(""))))
100  Next.push(Child);
101  while (!Next.empty()) {
102  auto ParentCost = Cache.lookup(Next.front());
103  for (auto Child : DownEdges.lookup(Next.front())) {
104  auto &ChildCost =
105  Cache.try_emplace(Child, kUnreachable).first->getSecond();
106  if (ParentCost + Opts.DownCost < ChildCost)
107  ChildCost = ParentCost + Opts.DownCost;
108  Next.push(Child);
109  }
110  Next.pop();
111  }
112 }
113 
114 unsigned FileDistance::distance(StringRef Path) {
115  auto Canonical = canonicalize(Path);
116  unsigned Cost = kUnreachable;
117  SmallVector<hash_code, 16> Ancestors;
118  // Walk up ancestors until we find a path we know the distance for.
119  for (StringRef Rest = Canonical; !Rest.empty();
120  Rest = parent_path(Rest, sys::path::Style::posix)) {
121  auto Hash = hash_value(Rest);
122  auto It = Cache.find(Hash);
123  if (It != Cache.end()) {
124  Cost = It->second;
125  break;
126  }
127  Ancestors.push_back(Hash);
128  }
129  // Now we know the costs for (known node, queried node].
130  // Fill these in, walking down the directory tree.
131  for (hash_code Hash : reverse(Ancestors)) {
132  if (Cost != kUnreachable)
133  Cost += Opts.DownCost;
134  Cache.try_emplace(Hash, Cost);
135  }
136  dlog("distance({0} = {1})", Path, Cost);
137  return Cost;
138 }
139 
140 unsigned URIDistance::distance(llvm::StringRef URI) {
141  auto R = Cache.try_emplace(llvm::hash_value(URI), FileDistance::kUnreachable);
142  if (!R.second)
143  return R.first->getSecond();
144  if (auto U = clangd::URI::parse(URI)) {
145  dlog("distance({0} = {1})", URI, U->body());
146  R.first->second = forScheme(U->scheme()).distance(U->body());
147  } else {
148  log("URIDistance::distance() of unparseable {0}: {1}", URI, U.takeError());
149  }
150  return R.first->second;
151 }
152 
153 FileDistance &URIDistance::forScheme(llvm::StringRef Scheme) {
154  auto &Delegate = ByScheme[Scheme];
155  if (!Delegate) {
156  llvm::StringMap<SourceParams> SchemeSources;
157  for (const auto &Source : Sources) {
158  if (auto U = clangd::URI::create(Source.getKey(), Scheme))
159  SchemeSources.try_emplace(U->body(), Source.getValue());
160  else
161  consumeError(U.takeError());
162  }
163  dlog("FileDistance for scheme {0}: {1}/{2} sources", Scheme,
164  SchemeSources.size(), Sources.size());
165  Delegate.reset(new FileDistance(std::move(SchemeSources), Opts));
166  }
167  return *Delegate;
168 }
169 
170 } // namespace clangd
171 } // namespace clang
static const char * Scheme
Definition: URI.cpp:33
Some operations such as code completion produce a set of candidates.
void log(const char *Fmt, Ts &&... Vals)
Definition: Logger.h:62
std::string Path
A typedef to represent a file path.
Definition: Path.h:21
#define dlog(...)
Definition: Logger.h:72
FileDistance(llvm::StringMap< SourceParams > Sources, const FileDistanceOptions &Opts={})
unsigned distance(llvm::StringRef URI)
static constexpr unsigned kUnreachable
Definition: FileDistance.h:69
static SmallString< 128 > canonicalize(StringRef Path)
static llvm::Expected< URI > create(llvm::StringRef AbsolutePath, llvm::StringRef Scheme)
Creates a URI for a file in the given scheme.
Definition: URI.cpp:174
===– Representation.cpp - ClangDoc Representation --------—*- C++ -*-===//
A URI describes the location of a source file.
Definition: URI.h:29
static llvm::Expected< URI > parse(llvm::StringRef Uri)
Parse a URI string "<scheme>:[//<authority>/]<path>".
Definition: URI.cpp:156
unsigned distance(llvm::StringRef Path)