clang  5.0.0
ContinuationIndenter.h
Go to the documentation of this file.
1 //===--- ContinuationIndenter.h - Format C++ code ---------------*- C++ -*-===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 ///
10 /// \file
11 /// \brief This file implements an indenter that manages the indentation of
12 /// continuations.
13 ///
14 //===----------------------------------------------------------------------===//
15 
16 #ifndef LLVM_CLANG_LIB_FORMAT_CONTINUATIONINDENTER_H
17 #define LLVM_CLANG_LIB_FORMAT_CONTINUATIONINDENTER_H
18 
19 #include "Encoding.h"
20 #include "FormatToken.h"
21 #include "clang/Format/Format.h"
22 #include "llvm/Support/Regex.h"
23 
24 namespace clang {
25 class SourceManager;
26 
27 namespace format {
28 
29 class AnnotatedLine;
30 struct FormatToken;
31 struct LineState;
32 struct ParenState;
33 class WhitespaceManager;
34 
36 public:
37  /// \brief Constructs a \c ContinuationIndenter to format \p Line starting in
38  /// column \p FirstIndent.
39  ContinuationIndenter(const FormatStyle &Style,
40  const AdditionalKeywords &Keywords,
41  const SourceManager &SourceMgr,
42  WhitespaceManager &Whitespaces,
43  encoding::Encoding Encoding,
44  bool BinPackInconclusiveFunctions);
45 
46  /// \brief Get the initial state, i.e. the state after placing \p Line's
47  /// first token at \p FirstIndent.
48  LineState getInitialState(unsigned FirstIndent, const AnnotatedLine *Line,
49  bool DryRun);
50 
51  // FIXME: canBreak and mustBreak aren't strictly indentation-related. Find a
52  // better home.
53  /// \brief Returns \c true, if a line break after \p State is allowed.
54  bool canBreak(const LineState &State);
55 
56  /// \brief Returns \c true, if a line break after \p State is mandatory.
57  bool mustBreak(const LineState &State);
58 
59  /// \brief Appends the next token to \p State and updates information
60  /// necessary for indentation.
61  ///
62  /// Puts the token on the current line if \p Newline is \c false and adds a
63  /// line break and necessary indentation otherwise.
64  ///
65  /// If \p DryRun is \c false, also creates and stores the required
66  /// \c Replacement.
67  unsigned addTokenToState(LineState &State, bool Newline, bool DryRun,
68  unsigned ExtraSpaces = 0);
69 
70  /// \brief Get the column limit for this line. This is the style's column
71  /// limit, potentially reduced for preprocessor definitions.
72  unsigned getColumnLimit(const LineState &State) const;
73 
74 private:
75  /// \brief Mark the next token as consumed in \p State and modify its stacks
76  /// accordingly.
77  unsigned moveStateToNextToken(LineState &State, bool DryRun, bool Newline);
78 
79  /// \brief Update 'State' according to the next token's fake left parentheses.
80  void moveStatePastFakeLParens(LineState &State, bool Newline);
81  /// \brief Update 'State' according to the next token's fake r_parens.
82  void moveStatePastFakeRParens(LineState &State);
83 
84  /// \brief Update 'State' according to the next token being one of "(<{[".
85  void moveStatePastScopeOpener(LineState &State, bool Newline);
86  /// \brief Update 'State' according to the next token being one of ")>}]".
87  void moveStatePastScopeCloser(LineState &State);
88  /// \brief Update 'State' with the next token opening a nested block.
89  void moveStateToNewBlock(LineState &State);
90 
91  /// \brief If the current token sticks out over the end of the line, break
92  /// it if possible.
93  ///
94  /// \returns An extra penalty if a token was broken, otherwise 0.
95  ///
96  /// The returned penalty will cover the cost of the additional line breaks and
97  /// column limit violation in all lines except for the last one. The penalty
98  /// for the column limit violation in the last line (and in single line
99  /// tokens) is handled in \c addNextStateToQueue.
100  unsigned breakProtrudingToken(const FormatToken &Current, LineState &State,
101  bool DryRun);
102 
103  /// \brief Appends the next token to \p State and updates information
104  /// necessary for indentation.
105  ///
106  /// Puts the token on the current line.
107  ///
108  /// If \p DryRun is \c false, also creates and stores the required
109  /// \c Replacement.
110  void addTokenOnCurrentLine(LineState &State, bool DryRun,
111  unsigned ExtraSpaces);
112 
113  /// \brief Appends the next token to \p State and updates information
114  /// necessary for indentation.
115  ///
116  /// Adds a line break and necessary indentation.
117  ///
118  /// If \p DryRun is \c false, also creates and stores the required
119  /// \c Replacement.
120  unsigned addTokenOnNewLine(LineState &State, bool DryRun);
121 
122  /// \brief Calculate the new column for a line wrap before the next token.
123  unsigned getNewLineColumn(const LineState &State);
124 
125  /// \brief Adds a multiline token to the \p State.
126  ///
127  /// \returns Extra penalty for the first line of the literal: last line is
128  /// handled in \c addNextStateToQueue, and the penalty for other lines doesn't
129  /// matter, as we don't change them.
130  unsigned addMultilineToken(const FormatToken &Current, LineState &State);
131 
132  /// \brief Returns \c true if the next token starts a multiline string
133  /// literal.
134  ///
135  /// This includes implicitly concatenated strings, strings that will be broken
136  /// by clang-format and string literals with escaped newlines.
137  bool nextIsMultilineString(const LineState &State);
138 
139  FormatStyle Style;
140  const AdditionalKeywords &Keywords;
141  const SourceManager &SourceMgr;
142  WhitespaceManager &Whitespaces;
143  encoding::Encoding Encoding;
144  bool BinPackInconclusiveFunctions;
145  llvm::Regex CommentPragmasRegex;
146 };
147 
148 struct ParenState {
149  ParenState(unsigned Indent, unsigned LastSpace, bool AvoidBinPacking,
150  bool NoLineBreak)
151  : Indent(Indent), LastSpace(LastSpace), NestedBlockIndent(Indent),
152  BreakBeforeClosingBrace(false), AvoidBinPacking(AvoidBinPacking),
153  BreakBeforeParameter(false), NoLineBreak(NoLineBreak),
158 
159  /// \brief The position to which a specific parenthesis level needs to be
160  /// indented.
161  unsigned Indent;
162 
163  /// \brief The position of the last space on each level.
164  ///
165  /// Used e.g. to break like:
166  /// functionCall(Parameter, otherCall(
167  /// OtherParameter));
168  unsigned LastSpace;
169 
170  /// \brief If a block relative to this parenthesis level gets wrapped, indent
171  /// it this much.
173 
174  /// \brief The position the first "<<" operator encountered on each level.
175  ///
176  /// Used to align "<<" operators. 0 if no such operator has been encountered
177  /// on a level.
178  unsigned FirstLessLess = 0;
179 
180  /// \brief The column of a \c ? in a conditional expression;
181  unsigned QuestionColumn = 0;
182 
183  /// \brief The position of the colon in an ObjC method declaration/call.
184  unsigned ColonPos = 0;
185 
186  /// \brief The start of the most recent function in a builder-type call.
187  unsigned StartOfFunctionCall = 0;
188 
189  /// \brief Contains the start of array subscript expressions, so that they
190  /// can be aligned.
192 
193  /// \brief If a nested name specifier was broken over multiple lines, this
194  /// contains the start column of the second line. Otherwise 0.
196 
197  /// \brief If a call expression was broken over multiple lines, this
198  /// contains the start column of the second line. Otherwise 0.
199  unsigned CallContinuation = 0;
200 
201  /// \brief The column of the first variable name in a variable declaration.
202  ///
203  /// Used to align further variables if necessary.
204  unsigned VariablePos = 0;
205 
206  /// \brief Whether a newline needs to be inserted before the block's closing
207  /// brace.
208  ///
209  /// We only want to insert a newline before the closing brace if there also
210  /// was a newline after the beginning left brace.
212 
213  /// \brief Avoid bin packing, i.e. multiple parameters/elements on multiple
214  /// lines, in this context.
215  bool AvoidBinPacking : 1;
216 
217  /// \brief Break after the next comma (or all the commas in this context if
218  /// \c AvoidBinPacking is \c true).
220 
221  /// \brief Line breaking in this context would break a formatting rule.
222  bool NoLineBreak : 1;
223 
224  /// \brief Same as \c NoLineBreak, but is restricted until the end of the
225  /// operand (including the next ",").
227 
228  /// \brief True if the last binary operator on this level was wrapped to the
229  /// next line.
231 
232  /// \brief \c true if this \c ParenState already contains a line-break.
233  ///
234  /// The first line break in a certain \c ParenState causes extra penalty so
235  /// that clang-format prefers similar breaks, i.e. breaks in the same
236  /// parenthesis.
238 
239  /// \brief \c true if this \c ParenState contains multiple segments of a
240  /// builder-type call on one line.
242 
243  /// \brief \c true if the colons of the curren ObjC method expression should
244  /// be aligned.
245  ///
246  /// Not considered for memoization as it will always have the same value at
247  /// the same token.
248  bool AlignColons : 1;
249 
250  /// \brief \c true if at least one selector name was found in the current
251  /// ObjC method expression.
252  ///
253  /// Not considered for memoization as it will always have the same value at
254  /// the same token.
256 
257  /// \brief \c true if there are multiple nested blocks inside these parens.
258  ///
259  /// Not considered for memoization as it will always have the same value at
260  /// the same token.
262 
263  // \brief The start of a nested block (e.g. lambda introducer in C++ or
264  // "function" in JavaScript) is not wrapped to a new line.
266 
267  bool operator<(const ParenState &Other) const {
268  if (Indent != Other.Indent)
269  return Indent < Other.Indent;
270  if (LastSpace != Other.LastSpace)
271  return LastSpace < Other.LastSpace;
273  return NestedBlockIndent < Other.NestedBlockIndent;
274  if (FirstLessLess != Other.FirstLessLess)
275  return FirstLessLess < Other.FirstLessLess;
278  if (QuestionColumn != Other.QuestionColumn)
279  return QuestionColumn < Other.QuestionColumn;
280  if (AvoidBinPacking != Other.AvoidBinPacking)
281  return AvoidBinPacking;
283  return BreakBeforeParameter;
284  if (NoLineBreak != Other.NoLineBreak)
285  return NoLineBreak;
287  return LastOperatorWrapped;
288  if (ColonPos != Other.ColonPos)
289  return ColonPos < Other.ColonPos;
294  if (CallContinuation != Other.CallContinuation)
295  return CallContinuation < Other.CallContinuation;
296  if (VariablePos != Other.VariablePos)
297  return VariablePos < Other.VariablePos;
299  return ContainsLineBreak;
303  return NestedBlockInlined;
304  return false;
305  }
306 };
307 
308 /// \brief The current state when indenting a unwrapped line.
309 ///
310 /// As the indenting tries different combinations this is copied by value.
311 struct LineState {
312  /// \brief The number of used columns in the current line.
313  unsigned Column;
314 
315  /// \brief The token that needs to be next formatted.
317 
318  /// \brief \c true if this line contains a continued for-loop section.
320 
321  /// \brief The \c NestingLevel at the start of this line.
323 
324  /// \brief The lowest \c NestingLevel on the current line.
326 
327  /// \brief The start column of the string literal, if we're in a string
328  /// literal sequence, 0 otherwise.
330 
331  /// \brief A stack keeping track of properties applying to parenthesis
332  /// levels.
333  std::vector<ParenState> Stack;
334 
335  /// \brief Ignore the stack of \c ParenStates for state comparison.
336  ///
337  /// In long and deeply nested unwrapped lines, the current algorithm can
338  /// be insufficient for finding the best formatting with a reasonable amount
339  /// of time and memory. Setting this flag will effectively lead to the
340  /// algorithm not analyzing some combinations. However, these combinations
341  /// rarely contain the optimal solution: In short, accepting a higher
342  /// penalty early would need to lead to different values in the \c
343  /// ParenState stack (in an otherwise identical state) and these different
344  /// values would need to lead to a significant amount of avoided penalty
345  /// later.
346  ///
347  /// FIXME: Come up with a better algorithm instead.
349 
350  /// \brief The indent of the first token.
351  unsigned FirstIndent;
352 
353  /// \brief The line that is being formatted.
354  ///
355  /// Does not need to be considered for memoization because it doesn't change.
357 
358  /// \brief Comparison operator to be able to used \c LineState in \c map.
359  bool operator<(const LineState &Other) const {
360  if (NextToken != Other.NextToken)
361  return NextToken < Other.NextToken;
362  if (Column != Other.Column)
363  return Column < Other.Column;
367  if (StartOfLineLevel != Other.StartOfLineLevel)
368  return StartOfLineLevel < Other.StartOfLineLevel;
370  return LowestLevelOnLine < Other.LowestLevelOnLine;
374  return false;
375  return Stack < Other.Stack;
376  }
377 };
378 
379 } // end namespace format
380 } // end namespace clang
381 
382 #endif
unsigned LowestLevelOnLine
The lowest NestingLevel on the current line.
bool ContainsLineBreak
true if this ParenState already contains a line-break.
unsigned VariablePos
The column of the first variable name in a variable declaration.
bool BreakBeforeClosingBrace
Whether a newline needs to be inserted before the block's closing brace.
unsigned CallContinuation
If a call expression was broken over multiple lines, this contains the start column of the second lin...
LineState State
Contains functions for text encoding manipulation.
bool AlignColons
true if the colons of the curren ObjC method expression should be aligned.
unsigned Column
The number of used columns in the current line.
Manages the whitespaces around tokens and their replacements.
unsigned Indent
The position to which a specific parenthesis level needs to be indented.
bool HasMultipleNestedBlocks
true if there are multiple nested blocks inside these parens.
const AnnotatedLine * Line
The line that is being formatted.
bool NoLineBreakInOperand
Same as NoLineBreak, but is restricted until the end of the operand (including the next "...
bool operator<(const ParenState &Other) const
bool LineContainsContinuedForLoopSection
true if this line contains a continued for-loop section.
bool LastOperatorWrapped
True if the last binary operator on this level was wrapped to the next line.
bool BreakBeforeParameter
Break after the next comma (or all the commas in this context if AvoidBinPacking is true)...
bool ObjCSelectorNameFound
true if at least one selector name was found in the current ObjC method expression.
The current state when indenting a unwrapped line.
unsigned QuestionColumn
The column of a ? in a conditional expression;.
unsigned StartOfArraySubscripts
Contains the start of array subscript expressions, so that they can be aligned.
A wrapper around a Token storing information about the whitespace characters preceding it...
Definition: FormatToken.h:119
unsigned NestedNameSpecifierContinuation
If a nested name specifier was broken over multiple lines, this contains the start column of the seco...
std::vector< ParenState > Stack
A stack keeping track of properties applying to parenthesis levels.
bool NoLineBreak
Line breaking in this context would break a formatting rule.
#define false
Definition: stdbool.h:33
AnnotatedLine & Line
Various functions to configurably format source code.
unsigned LastSpace
The position of the last space on each level.
Encapsulates keywords that are context sensitive or for languages not properly supported by Clang's l...
Definition: FormatToken.h:621
bool IgnoreStackForComparison
Ignore the stack of ParenStates for state comparison.
unsigned addTokenToState(LineState &State, bool Newline, bool DryRun, unsigned ExtraSpaces=0)
Appends the next token to State and updates information necessary for indentation.
unsigned getColumnLimit(const LineState &State) const
Get the column limit for this line.
The FormatStyle is used to configure the formatting to follow specific guidelines.
Definition: Format.h:46
bool operator<(const LineState &Other) const
Comparison operator to be able to used LineState in map.
unsigned FirstIndent
The indent of the first token.
unsigned ColonPos
The position of the colon in an ObjC method declaration/call.
bool canBreak(const LineState &State)
Returns true, if a line break after State is allowed.
bool AvoidBinPacking
Avoid bin packing, i.e.
bool mustBreak(const LineState &State)
Returns true, if a line break after State is mandatory.
ContinuationIndenter(const FormatStyle &Style, const AdditionalKeywords &Keywords, const SourceManager &SourceMgr, WhitespaceManager &Whitespaces, encoding::Encoding Encoding, bool BinPackInconclusiveFunctions)
Constructs a ContinuationIndenter to format Line starting in column FirstIndent.
LineState getInitialState(unsigned FirstIndent, const AnnotatedLine *Line, bool DryRun)
Get the initial state, i.e.
bool ContainsUnwrappedBuilder
true if this ParenState contains multiple segments of a builder-type call on one line.
FormatToken * Current
unsigned NestedBlockIndent
If a block relative to this parenthesis level gets wrapped, indent it this much.
unsigned FirstLessLess
The position the first "<<" operator encountered on each level.
unsigned StartOfLineLevel
The NestingLevel at the start of this line.
This file contains the declaration of the FormatToken, a wrapper around Token with additional informa...
ParenState(unsigned Indent, unsigned LastSpace, bool AvoidBinPacking, bool NoLineBreak)
unsigned StartOfStringLiteral
The start column of the string literal, if we're in a string literal sequence, 0 otherwise.
FormatToken * NextToken
The token that needs to be next formatted.
#define true
Definition: stdbool.h:32
unsigned StartOfFunctionCall
The start of the most recent function in a builder-type call.
This class handles loading and caching of source files into memory.