clang-tools  9.0.0
SourceCode.h
Go to the documentation of this file.
1 //===--- SourceCode.h - Manipulating source code as strings -----*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // Various code that examines C++ source code without using heavy AST machinery
10 // (and often not even the lexer). To be used sparingly!
11 //
12 //===----------------------------------------------------------------------===//
13 #ifndef LLVM_CLANG_TOOLS_EXTRA_CLANGD_SOURCECODE_H
14 #define LLVM_CLANG_TOOLS_EXTRA_CLANGD_SOURCECODE_H
15 #include "Context.h"
16 #include "Protocol.h"
17 #include "clang/Basic/Diagnostic.h"
18 #include "clang/Basic/LangOptions.h"
19 #include "clang/Basic/SourceLocation.h"
20 #include "clang/Basic/SourceManager.h"
21 #include "clang/Format/Format.h"
22 #include "clang/Tooling/Core/Replacement.h"
23 #include "llvm/ADT/StringRef.h"
24 #include "llvm/ADT/StringSet.h"
25 #include "llvm/Support/SHA1.h"
26 
27 namespace clang {
28 class SourceManager;
29 
30 namespace clangd {
31 
32 // We tend to generate digests for source codes in a lot of different places.
33 // This represents the type for those digests to prevent us hard coding details
34 // of hashing function at every place that needs to store this information.
35 using FileDigest = std::array<uint8_t, 8>;
36 FileDigest digest(StringRef Content);
37 Optional<FileDigest> digestFile(const SourceManager &SM, FileID FID);
38 
39 // This context variable controls the behavior of functions in this file
40 // that convert between LSP offsets and native clang byte offsets.
41 // If not set, defaults to UTF-16 for backwards-compatibility.
43 
44 // Counts the number of UTF-16 code units needed to represent a string (LSP
45 // specifies string lengths in UTF-16 code units).
46 // Use of UTF-16 may be overridden by kCurrentOffsetEncoding.
47 size_t lspLength(StringRef Code);
48 
49 /// Turn a [line, column] pair into an offset in Code.
50 ///
51 /// If P.character exceeds the line length, returns the offset at end-of-line.
52 /// (If !AllowColumnsBeyondLineLength, then returns an error instead).
53 /// If the line number is out of range, returns an error.
54 ///
55 /// The returned value is in the range [0, Code.size()].
56 llvm::Expected<size_t>
57 positionToOffset(llvm::StringRef Code, Position P,
58  bool AllowColumnsBeyondLineLength = true);
59 
60 /// Turn an offset in Code into a [line, column] pair.
61 /// The offset must be in range [0, Code.size()].
62 Position offsetToPosition(llvm::StringRef Code, size_t Offset);
63 
64 /// Turn a SourceLocation into a [line, column] pair.
65 /// FIXME: This should return an error if the location is invalid.
66 Position sourceLocToPosition(const SourceManager &SM, SourceLocation Loc);
67 
68 /// Returns the taken range at \p TokLoc.
69 llvm::Optional<Range> getTokenRange(const SourceManager &SM,
70  const LangOptions &LangOpts,
71  SourceLocation TokLoc);
72 
73 /// Return the file location, corresponding to \p P. Note that one should take
74 /// care to avoid comparing the result with expansion locations.
75 llvm::Expected<SourceLocation> sourceLocationInMainFile(const SourceManager &SM,
76  Position P);
77 
78 /// Returns true iff \p Loc is inside the main file. This function handles
79 /// file & macro locations. For macro locations, returns iff the macro is being
80 /// expanded inside the main file.
81 ///
82 /// The function is usually used to check whether a declaration is inside the
83 /// the main file.
84 bool isInsideMainFile(SourceLocation Loc, const SourceManager &SM);
85 
86 /// Turns a token range into a half-open range and checks its correctness.
87 /// The resulting range will have only valid source location on both sides, both
88 /// of which are file locations.
89 ///
90 /// File locations always point to a particular offset in a file, i.e. they
91 /// never refer to a location inside a macro expansion. Turning locations from
92 /// macro expansions into file locations is ambiguous - one can use
93 /// SourceManager::{getExpansion|getFile|getSpelling}Loc. This function
94 /// calls SourceManager::getFileLoc on both ends of \p R to do the conversion.
95 ///
96 /// User input (e.g. cursor position) is expressed as a file location, so this
97 /// function can be viewed as a way to normalize the ranges used in the clang
98 /// AST so that they are comparable with ranges coming from the user input.
99 llvm::Optional<SourceRange> toHalfOpenFileRange(const SourceManager &Mgr,
100  const LangOptions &LangOpts,
101  SourceRange R);
102 
103 /// Returns true iff all of the following conditions hold:
104 /// - start and end locations are valid,
105 /// - start and end locations are file locations from the same file
106 /// (i.e. expansion locations are not taken into account).
107 /// - start offset <= end offset.
108 /// FIXME: introduce a type for source range with this invariant.
109 bool isValidFileRange(const SourceManager &Mgr, SourceRange R);
110 
111 /// Returns true iff \p L is contained in \p R.
112 /// EXPECTS: isValidFileRange(R) == true, L is a file location.
113 bool halfOpenRangeContains(const SourceManager &Mgr, SourceRange R,
114  SourceLocation L);
115 
116 /// Returns true iff \p L is contained in \p R or \p L is equal to the end point
117 /// of \p R.
118 /// EXPECTS: isValidFileRange(R) == true, L is a file location.
119 bool halfOpenRangeTouches(const SourceManager &Mgr, SourceRange R,
120  SourceLocation L);
121 
122 /// Returns the source code covered by the source range.
123 /// EXPECTS: isValidFileRange(R) == true.
124 llvm::StringRef toSourceCode(const SourceManager &SM, SourceRange R);
125 
126 // Converts a half-open clang source range to an LSP range.
127 // Note that clang also uses closed source ranges, which this can't handle!
128 Range halfOpenToRange(const SourceManager &SM, CharSourceRange R);
129 
130 // Converts an offset to a clang line/column (1-based, columns are bytes).
131 // The offset must be in range [0, Code.size()].
132 // Prefer to use SourceManager if one is available.
133 std::pair<size_t, size_t> offsetToClangLineColumn(llvm::StringRef Code,
134  size_t Offset);
135 
136 /// From "a::b::c", return {"a::b::", "c"}. Scope is empty if there's no
137 /// qualifier.
138 std::pair<llvm::StringRef, llvm::StringRef>
139 splitQualifiedName(llvm::StringRef QName);
140 
141 TextEdit replacementToEdit(StringRef Code, const tooling::Replacement &R);
142 
143 std::vector<TextEdit> replacementsToEdits(StringRef Code,
144  const tooling::Replacements &Repls);
145 
146 TextEdit toTextEdit(const FixItHint &FixIt, const SourceManager &M,
147  const LangOptions &L);
148 
149 /// Get the canonical path of \p F. This means:
150 ///
151 /// - Absolute path
152 /// - Symlinks resolved
153 /// - No "." or ".." component
154 /// - No duplicate or trailing directory separator
155 ///
156 /// This function should be used when paths needs to be used outside the
157 /// component that generate it, so that paths are normalized as much as
158 /// possible.
159 llvm::Optional<std::string> getCanonicalPath(const FileEntry *F,
160  const SourceManager &SourceMgr);
161 
162 bool isRangeConsecutive(const Range &Left, const Range &Right);
163 
164 /// Choose the clang-format style we should apply to a certain file.
165 /// This will usually use FS to look for .clang-format directories.
166 /// FIXME: should we be caching the .clang-format file search?
167 /// This uses format::DefaultFormatStyle and format::DefaultFallbackStyle,
168 /// though the latter may have been overridden in main()!
169 format::FormatStyle getFormatStyleForFile(llvm::StringRef File,
170  llvm::StringRef Content,
171  llvm::vfs::FileSystem *FS);
172 
173 // Cleanup and format the given replacements.
174 llvm::Expected<tooling::Replacements>
175 cleanupAndFormat(StringRef Code, const tooling::Replacements &Replaces,
176  const format::FormatStyle &Style);
177 
178 /// Collects identifiers with counts in the source code.
179 llvm::StringMap<unsigned> collectIdentifiers(llvm::StringRef Content,
180  const format::FormatStyle &Style);
181 
182 /// Collects words from the source code.
183 /// Unlike collectIdentifiers:
184 /// - also finds text in comments:
185 /// - splits text into words
186 /// - drops stopwords like "get" and "for"
187 llvm::StringSet<> collectWords(llvm::StringRef Content);
188 
189 /// Heuristically determine namespaces visible at a point, without parsing Code.
190 /// This considers using-directives and enclosing namespace-declarations that
191 /// are visible (and not obfuscated) in the file itself (not headers).
192 /// Code should be truncated at the point of interest.
193 ///
194 /// The returned vector is always non-empty.
195 /// - The first element is the namespace that encloses the point: a declaration
196 /// near the point would be within this namespace.
197 /// - The elements are the namespaces in scope at the point: an unqualified
198 /// lookup would search within these namespaces.
199 ///
200 /// Using directives are resolved against all enclosing scopes, but no other
201 /// namespace directives.
202 ///
203 /// example:
204 /// using namespace a;
205 /// namespace foo {
206 /// using namespace b;
207 ///
208 /// visibleNamespaces are {"foo::", "", "a::", "b::", "foo::b::"}, not "a::b::".
209 std::vector<std::string> visibleNamespaces(llvm::StringRef Code,
210  const format::FormatStyle &Style);
211 
212 struct DefinedMacro {
213  llvm::StringRef Name;
214  const MacroInfo *Info;
215 };
216 // Gets the macro at a specified \p Loc.
217 llvm::Optional<DefinedMacro> locateMacroAt(SourceLocation Loc,
218  Preprocessor &PP);
219 
220 } // namespace clangd
221 } // namespace clang
222 #endif
SourceLocation Loc
&#39;#&#39; location in the include directive
llvm::StringSet collectWords(llvm::StringRef Content)
Collects words from the source code.
Definition: SourceCode.cpp:758
llvm::Expected< tooling::Replacements > cleanupAndFormat(StringRef Code, const tooling::Replacements &Replaces, const format::FormatStyle &Style)
Definition: SourceCode.cpp:544
size_t lspLength(llvm::StringRef Code)
Definition: SourceCode.cpp:117
const MacroInfo * Info
Definition: SourceCode.h:214
std::array< uint8_t, 8 > FileDigest
Definition: SourceCode.h:35
std::pair< StringRef, StringRef > splitQualifiedName(StringRef QName)
Definition: SourceCode.cpp:436
bool isInsideMainFile(SourceLocation Loc, const SourceManager &SM)
Returns true iff Loc is inside the main file.
Definition: SourceCode.cpp:372
bool halfOpenRangeContains(const SourceManager &Mgr, SourceRange R, SourceLocation L)
Returns true iff L is contained in R.
Definition: SourceCode.cpp:229
Values in a Context are indexed by typed keys.
Definition: Context.h:40
MockFSProvider FS
bool halfOpenRangeTouches(const SourceManager &Mgr, SourceRange R, SourceLocation L)
Returns true iff L is contained in R or L is equal to the end point of R.
Definition: SourceCode.cpp:244
llvm::Expected< SourceLocation > sourceLocationInMainFile(const SourceManager &SM, Position P)
Return the file location, corresponding to P.
Definition: SourceCode.cpp:408
std::vector< std::string > visibleNamespaces(llvm::StringRef Code, const format::FormatStyle &Style)
Heuristically determine namespaces visible at a point, without parsing Code.
Definition: SourceCode.cpp:708
bool isRangeConsecutive(const Range &Left, const Range &Right)
Definition: SourceCode.cpp:507
bool isValidFileRange(const SourceManager &Mgr, SourceRange R)
Returns true iff all of the following conditions hold:
Definition: SourceCode.cpp:214
std::pair< size_t, size_t > offsetToClangLineColumn(llvm::StringRef Code, size_t Offset)
Definition: SourceCode.cpp:426
TextEdit toTextEdit(const FixItHint &FixIt, const SourceManager &M, const LangOptions &L)
Definition: SourceCode.cpp:498
std::string QName
Position offsetToPosition(llvm::StringRef Code, size_t Offset)
Turn an offset in Code into a [line, column] pair.
Definition: SourceCode.cpp:174
llvm::Expected< size_t > positionToOffset(llvm::StringRef Code, Position P, bool AllowColumnsBeyondLineLength)
Turn a [line, column] pair into an offset in Code.
Definition: SourceCode.cpp:141
Key< OffsetEncoding > kCurrentOffsetEncoding
Definition: SourceCode.cpp:110
llvm::Optional< Range > getTokenRange(const SourceManager &SM, const LangOptions &LangOpts, SourceLocation TokLoc)
Returns the taken range at TokLoc.
Definition: SourceCode.cpp:203
llvm::Optional< FileDigest > digestFile(const SourceManager &SM, FileID FID)
Definition: SourceCode.cpp:522
Position sourceLocToPosition(const SourceManager &SM, SourceLocation Loc)
Turn a SourceLocation into a [line, column] pair.
Definition: SourceCode.cpp:186
format::FormatStyle getFormatStyleForFile(llvm::StringRef File, llvm::StringRef Content, llvm::vfs::FileSystem *FS)
Choose the clang-format style we should apply to a certain file.
Definition: SourceCode.cpp:530
FileDigest digest(llvm::StringRef Content)
Definition: SourceCode.cpp:512
llvm::Optional< SourceRange > toHalfOpenFileRange(const SourceManager &SM, const LangOptions &LangOpts, SourceRange R)
Turns a token range into a half-open range and checks its correctness.
Definition: SourceCode.cpp:376
llvm::StringRef toSourceCode(const SourceManager &SM, SourceRange R)
Returns the source code covered by the source range.
Definition: SourceCode.cpp:397
size_t Offset
===– Representation.cpp - ClangDoc Representation --------—*- C++ -*-===//
TextEdit replacementToEdit(llvm::StringRef Code, const tooling::Replacement &R)
Definition: SourceCode.cpp:443
llvm::Optional< std::string > getCanonicalPath(const FileEntry *F, const SourceManager &SourceMgr)
Get the canonical path of F.
Definition: SourceCode.cpp:459
std::vector< TextEdit > replacementsToEdits(llvm::StringRef Code, const tooling::Replacements &Repls)
Definition: SourceCode.cpp:451
llvm::Optional< FixItHint > FixIt
llvm::StringMap< unsigned > collectIdentifiers(llvm::StringRef Content, const format::FormatStyle &Style)
Collects identifiers with counts in the source code.
Definition: SourceCode.cpp:567
llvm::Optional< DefinedMacro > locateMacroAt(SourceLocation Loc, Preprocessor &PP)
Definition: SourceCode.cpp:796
Range halfOpenToRange(const SourceManager &SM, CharSourceRange R)
Definition: SourceCode.cpp:418
static cl::opt< std::string > FormatStyle("format-style", cl::desc(R"( Style for formatting code around applied fixes: - 'none' (default) turns off formatting - 'file' (literally 'file', not a placeholder) uses .clang-format file in the closest parent directory - '{ <json> }' specifies options inline, e.g. -format-style='{BasedOnStyle: llvm, IndentWidth: 8}' - 'llvm', 'google', 'webkit', 'mozilla' See clang-format documentation for the up-to-date information about formatting styles and options. This option overrides the 'FormatStyle` option in .clang-tidy file, if any. )"), cl::init("none"), cl::cat(ClangTidyCategory))