clang  9.0.0
TokenAnnotator.h
Go to the documentation of this file.
1 //===--- TokenAnnotator.h - Format C++ code ---------------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 ///
9 /// \file
10 /// This file implements a token annotator, i.e. creates
11 /// \c AnnotatedTokens out of \c FormatTokens with required extra information.
12 ///
13 //===----------------------------------------------------------------------===//
14 
15 #ifndef LLVM_CLANG_LIB_FORMAT_TOKENANNOTATOR_H
16 #define LLVM_CLANG_LIB_FORMAT_TOKENANNOTATOR_H
17 
18 #include "UnwrappedLineParser.h"
19 #include "clang/Format/Format.h"
20 
21 namespace clang {
22 class SourceManager;
23 
24 namespace format {
25 
26 enum LineType {
29  LT_ObjCDecl, // An @interface, @implementation, or @protocol line.
31  LT_ObjCProperty, // An @property line.
35 };
36 
38 public:
40  : First(Line.Tokens.front().Tok), Level(Line.Level),
48  assert(!Line.Tokens.empty());
49 
50  // Calculate Next and Previous for all tokens. Note that we must overwrite
51  // Next and Previous for every token, as previous formatting runs might have
52  // left them in a different state.
53  First->Previous = nullptr;
54  FormatToken *Current = First;
55  for (std::list<UnwrappedLineNode>::const_iterator I = ++Line.Tokens.begin(),
56  E = Line.Tokens.end();
57  I != E; ++I) {
58  const UnwrappedLineNode &Node = *I;
59  Current->Next = I->Tok;
60  I->Tok->Previous = Current;
61  Current = Current->Next;
62  Current->Children.clear();
63  for (const auto &Child : Node.Children) {
64  Children.push_back(new AnnotatedLine(Child));
65  Current->Children.push_back(Children.back());
66  }
67  }
68  Last = Current;
69  Last->Next = nullptr;
70  }
71 
73  for (unsigned i = 0, e = Children.size(); i != e; ++i) {
74  delete Children[i];
75  }
76  FormatToken *Current = First;
77  while (Current) {
78  Current->Children.clear();
79  Current->Role.reset();
80  Current = Current->Next;
81  }
82  }
83 
84  /// \c true if this line starts with the given tokens in order, ignoring
85  /// comments.
86  template <typename... Ts> bool startsWith(Ts... Tokens) const {
87  return First && First->startsSequence(Tokens...);
88  }
89 
90  /// \c true if this line ends with the given tokens in reversed order,
91  /// ignoring comments.
92  /// For example, given tokens [T1, T2, T3, ...], the function returns true if
93  /// this line is like "... T3 T2 T1".
94  template <typename... Ts> bool endsWith(Ts... Tokens) const {
95  return Last && Last->endsSequence(Tokens...);
96  }
97 
98  /// \c true if this line looks like a function definition instead of a
99  /// function declaration. Asserts MightBeFunctionDecl.
101  assert(MightBeFunctionDecl);
102  // Try to determine if the end of a stream of tokens is either the
103  // Definition or the Declaration for a function. It does this by looking for
104  // the ';' in foo(); and using that it ends with a ; to know this is the
105  // Definition, however the line could end with
106  // foo(); /* comment */
107  // or
108  // foo(); // comment
109  // or
110  // foo() // comment
111  // endsWith() ignores the comment.
112  return !endsWith(tok::semi);
113  }
114 
115  /// \c true if this line starts a namespace definition.
116  bool startsWithNamespace() const {
117  return startsWith(tok::kw_namespace) ||
118  startsWith(TT_NamespaceMacro) ||
119  startsWith(tok::kw_inline, tok::kw_namespace) ||
120  startsWith(tok::kw_export, tok::kw_namespace);
121  }
122 
125 
127 
129  unsigned Level;
136 
137  /// \c True if this line should be formatted, i.e. intersects directly or
138  /// indirectly with one of the input ranges.
139  bool Affected;
140 
141  /// \c True if the leading empty lines of this line intersect with one of the
142  /// input ranges.
144 
145  /// \c True if one of this line's children intersects with an input range.
147 
149 
150 private:
151  // Disallow copying.
152  AnnotatedLine(const AnnotatedLine &) = delete;
153  void operator=(const AnnotatedLine &) = delete;
154 };
155 
156 /// Determines extra information about the tokens comprising an
157 /// \c UnwrappedLine.
159 public:
161  : Style(Style), Keywords(Keywords) {}
162 
163  /// Adapts the indent levels of comment lines to the indent of the
164  /// subsequent line.
165  // FIXME: Can/should this be done in the UnwrappedLineParser?
166  void setCommentLineLevels(SmallVectorImpl<AnnotatedLine *> &Lines);
167 
168  void annotate(AnnotatedLine &Line);
169  void calculateFormattingInformation(AnnotatedLine &Line);
170 
171 private:
172  /// Calculate the penalty for splitting before \c Tok.
173  unsigned splitPenalty(const AnnotatedLine &Line, const FormatToken &Tok,
174  bool InFunctionDecl);
175 
176  bool spaceRequiredBeforeParens(const FormatToken &Right) const;
177 
178  bool spaceRequiredBetween(const AnnotatedLine &Line, const FormatToken &Left,
179  const FormatToken &Right);
180 
181  bool spaceRequiredBefore(const AnnotatedLine &Line, const FormatToken &Right);
182 
183  bool mustBreakBefore(const AnnotatedLine &Line, const FormatToken &Right);
184 
185  bool canBreakBefore(const AnnotatedLine &Line, const FormatToken &Right);
186 
187  bool mustBreakForReturnType(const AnnotatedLine &Line) const;
188 
189  void printDebugInfo(const AnnotatedLine &Line);
190 
191  void calculateUnbreakableTailLengths(AnnotatedLine &Line);
192 
193  const FormatStyle &Style;
194 
195  const AdditionalKeywords &Keywords;
196 };
197 
198 } // end namespace format
199 } // end namespace clang
200 
201 #endif
bool endsSequence(A K1, Ts... Tokens) const
true if this token ends a sequence with the given tokens in order, following the Previous pointers...
Definition: FormatToken.h:348
SmallVector< UnwrappedLine, 0 > Children
bool endsWith(Ts... Tokens) const
true if this line ends with the given tokens in reversed order, ignoring comments.
Token Tok
The Token.
Definition: FormatToken.h:133
std::unique_ptr< TokenRole > Role
A token can have a special role that can carry extra information about the token&#39;s formatting...
Definition: FormatToken.h:209
static LLVM_ATTRIBUTE_UNUSED void printDebugInfo(const UnwrappedLine &Line, StringRef Prefix="")
bool LeadingEmptyLinesAffected
True if the leading empty lines of this line intersect with one of the input ranges.
FormatToken * Next
The next token in the unwrapped line.
Definition: FormatToken.h:298
long i
Definition: xmmintrin.h:1456
FormatToken * Previous
The previous token in the unwrapped line.
Definition: FormatToken.h:295
const FormatToken & Tok
bool ChildrenAffected
True if one of this line&#39;s children intersects with an input range.
An unwrapped line is a sequence of Token, that we would like to put on a single line if there was no ...
bool mightBeFunctionDefinition() const
true if this line looks like a function definition instead of a function declaration.
Determines extra information about the tokens comprising an UnwrappedLine.
std::list< UnwrappedLineNode > Tokens
The Tokens comprising this UnwrappedLine.
const AnnotatedLine * Line
SmallVector< AnnotatedLine *, 0 > Children
A wrapper around a Token storing information about the whitespace characters preceding it...
Definition: FormatToken.h:129
TokenAnnotator(const FormatStyle &Style, const AdditionalKeywords &Keywords)
bool Affected
True if this line should be formatted, i.e.
#define false
Definition: stdbool.h:17
Various functions to configurably format source code.
Encapsulates keywords that are context sensitive or for languages not properly supported by Clang&#39;s l...
Definition: FormatToken.h:674
This file contains the declaration of the UnwrappedLineParser, which turns a stream of tokens into Un...
bool startsWith(Ts... Tokens) const
true if this line starts with the given tokens in order, ignoring comments.
The FormatStyle is used to configure the formatting to follow specific guidelines.
Definition: Format.h:49
ast_type_traits::DynTypedNode Node
Dataflow Directional Tag Classes.
AnnotatedLine(const UnwrappedLine &Line)
bool startsWithNamespace() const
true if this line starts a namespace definition.
SmallVector< AnnotatedLine *, 1 > Children
If this token starts a block, this contains all the unwrapped lines in it.
Definition: FormatToken.h:302
bool startsSequence(A K1, Ts... Tokens) const
true if this token starts a sequence with the given tokens in order, following the Next pointers...
Definition: FormatToken.h:341
const FormatStyle & Style