clang  5.0.0
TokenAnnotator.cpp
Go to the documentation of this file.
1 //===--- TokenAnnotator.cpp - Format C++ code -----------------------------===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 ///
10 /// \file
11 /// \brief This file implements a token annotator, i.e. creates
12 /// \c AnnotatedTokens out of \c FormatTokens with required extra information.
13 ///
14 //===----------------------------------------------------------------------===//
15 
16 #include "TokenAnnotator.h"
18 #include "llvm/ADT/SmallPtrSet.h"
19 #include "llvm/Support/Debug.h"
20 
21 #define DEBUG_TYPE "format-token-annotator"
22 
23 namespace clang {
24 namespace format {
25 
26 namespace {
27 
28 /// \brief A parser that gathers additional information about tokens.
29 ///
30 /// The \c TokenAnnotator tries to match parenthesis and square brakets and
31 /// store a parenthesis levels. It also tries to resolve matching "<" and ">"
32 /// into template parameter lists.
33 class AnnotatingParser {
34 public:
35  AnnotatingParser(const FormatStyle &Style, AnnotatedLine &Line,
36  const AdditionalKeywords &Keywords)
37  : Style(Style), Line(Line), CurrentToken(Line.First), AutoFound(false),
38  Keywords(Keywords) {
39  Contexts.push_back(Context(tok::unknown, 1, /*IsExpression=*/false));
40  resetTokenMetadata(CurrentToken);
41  }
42 
43 private:
44  bool parseAngle() {
45  if (!CurrentToken || !CurrentToken->Previous)
46  return false;
47  if (NonTemplateLess.count(CurrentToken->Previous))
48  return false;
49 
50  const FormatToken& Previous = *CurrentToken->Previous;
51  if (Previous.Previous) {
52  if (Previous.Previous->Tok.isLiteral())
53  return false;
54  if (Previous.Previous->is(tok::r_paren) && Contexts.size() > 1 &&
55  (!Previous.Previous->MatchingParen ||
56  !Previous.Previous->MatchingParen->is(TT_OverloadedOperatorLParen)))
57  return false;
58  }
59 
60  FormatToken *Left = CurrentToken->Previous;
61  Left->ParentBracket = Contexts.back().ContextKind;
62  ScopedContextCreator ContextCreator(*this, tok::less, 12);
63 
64  // If this angle is in the context of an expression, we need to be more
65  // hesitant to detect it as opening template parameters.
66  bool InExprContext = Contexts.back().IsExpression;
67 
68  Contexts.back().IsExpression = false;
69  // If there's a template keyword before the opening angle bracket, this is a
70  // template parameter, not an argument.
71  Contexts.back().InTemplateArgument =
72  Left->Previous && Left->Previous->Tok.isNot(tok::kw_template);
73 
74  if (Style.Language == FormatStyle::LK_Java &&
75  CurrentToken->is(tok::question))
76  next();
77 
78  while (CurrentToken) {
79  if (CurrentToken->is(tok::greater)) {
80  Left->MatchingParen = CurrentToken;
81  CurrentToken->MatchingParen = Left;
82  CurrentToken->Type = TT_TemplateCloser;
83  next();
84  return true;
85  }
86  if (CurrentToken->is(tok::question) &&
87  Style.Language == FormatStyle::LK_Java) {
88  next();
89  continue;
90  }
91  if (CurrentToken->isOneOf(tok::r_paren, tok::r_square, tok::r_brace) ||
92  (CurrentToken->isOneOf(tok::colon, tok::question) && InExprContext &&
93  Style.Language != FormatStyle::LK_Proto &&
94  Style.Language != FormatStyle::LK_TextProto))
95  return false;
96  // If a && or || is found and interpreted as a binary operator, this set
97  // of angles is likely part of something like "a < b && c > d". If the
98  // angles are inside an expression, the ||/&& might also be a binary
99  // operator that was misinterpreted because we are parsing template
100  // parameters.
101  // FIXME: This is getting out of hand, write a decent parser.
102  if (CurrentToken->Previous->isOneOf(tok::pipepipe, tok::ampamp) &&
103  CurrentToken->Previous->is(TT_BinaryOperator) &&
104  Contexts[Contexts.size() - 2].IsExpression &&
105  !Line.startsWith(tok::kw_template))
106  return false;
107  updateParameterCount(Left, CurrentToken);
108  if (Style.Language == FormatStyle::LK_Proto) {
109  if (FormatToken *Previous = CurrentToken->getPreviousNonComment()) {
110  if (CurrentToken->is(tok::colon) ||
111  (CurrentToken->isOneOf(tok::l_brace, tok::less) &&
112  Previous->isNot(tok::colon)))
113  Previous->Type = TT_SelectorName;
114  }
115  }
116  if (!consumeToken())
117  return false;
118  }
119  return false;
120  }
121 
122  bool parseParens(bool LookForDecls = false) {
123  if (!CurrentToken)
124  return false;
125  FormatToken *Left = CurrentToken->Previous;
126  Left->ParentBracket = Contexts.back().ContextKind;
127  ScopedContextCreator ContextCreator(*this, tok::l_paren, 1);
128 
129  // FIXME: This is a bit of a hack. Do better.
130  Contexts.back().ColonIsForRangeExpr =
131  Contexts.size() == 2 && Contexts[0].ColonIsForRangeExpr;
132 
133  bool StartsObjCMethodExpr = false;
134  if (CurrentToken->is(tok::caret)) {
135  // (^ can start a block type.
136  Left->Type = TT_ObjCBlockLParen;
137  } else if (FormatToken *MaybeSel = Left->Previous) {
138  // @selector( starts a selector.
139  if (MaybeSel->isObjCAtKeyword(tok::objc_selector) && MaybeSel->Previous &&
140  MaybeSel->Previous->is(tok::at)) {
141  StartsObjCMethodExpr = true;
142  }
143  }
144 
145  if (Left->is(TT_OverloadedOperatorLParen)) {
146  Contexts.back().IsExpression = false;
147  } else if (Style.Language == FormatStyle::LK_JavaScript &&
148  (Line.startsWith(Keywords.kw_type, tok::identifier) ||
149  Line.startsWith(tok::kw_export, Keywords.kw_type,
150  tok::identifier))) {
151  // type X = (...);
152  // export type X = (...);
153  Contexts.back().IsExpression = false;
154  } else if (Left->Previous &&
155  (Left->Previous->isOneOf(tok::kw_static_assert, tok::kw_decltype,
156  tok::kw_if, tok::kw_while, tok::l_paren,
157  tok::comma) ||
158  Left->Previous->endsSequence(tok::kw_constexpr, tok::kw_if) ||
159  Left->Previous->is(TT_BinaryOperator))) {
160  // static_assert, if and while usually contain expressions.
161  Contexts.back().IsExpression = true;
162  } else if (Style.Language == FormatStyle::LK_JavaScript && Left->Previous &&
163  (Left->Previous->is(Keywords.kw_function) ||
164  (Left->Previous->endsSequence(tok::identifier,
165  Keywords.kw_function)))) {
166  // function(...) or function f(...)
167  Contexts.back().IsExpression = false;
168  } else if (Style.Language == FormatStyle::LK_JavaScript && Left->Previous &&
169  Left->Previous->is(TT_JsTypeColon)) {
170  // let x: (SomeType);
171  Contexts.back().IsExpression = false;
172  } else if (Left->Previous && Left->Previous->is(tok::r_square) &&
173  Left->Previous->MatchingParen &&
174  Left->Previous->MatchingParen->is(TT_LambdaLSquare)) {
175  // This is a parameter list of a lambda expression.
176  Contexts.back().IsExpression = false;
177  } else if (Line.InPPDirective &&
178  (!Left->Previous || !Left->Previous->is(tok::identifier))) {
179  Contexts.back().IsExpression = true;
180  } else if (Contexts[Contexts.size() - 2].CaretFound) {
181  // This is the parameter list of an ObjC block.
182  Contexts.back().IsExpression = false;
183  } else if (Left->Previous && Left->Previous->is(tok::kw___attribute)) {
184  Left->Type = TT_AttributeParen;
185  } else if (Left->Previous && Left->Previous->is(TT_ForEachMacro)) {
186  // The first argument to a foreach macro is a declaration.
187  Contexts.back().IsForEachMacro = true;
188  Contexts.back().IsExpression = false;
189  } else if (Left->Previous && Left->Previous->MatchingParen &&
190  Left->Previous->MatchingParen->is(TT_ObjCBlockLParen)) {
191  Contexts.back().IsExpression = false;
192  } else if (!Line.MustBeDeclaration && !Line.InPPDirective) {
193  bool IsForOrCatch =
194  Left->Previous && Left->Previous->isOneOf(tok::kw_for, tok::kw_catch);
195  Contexts.back().IsExpression = !IsForOrCatch;
196  }
197 
198  if (StartsObjCMethodExpr) {
199  Contexts.back().ColonIsObjCMethodExpr = true;
200  Left->Type = TT_ObjCMethodExpr;
201  }
202 
203  bool MightBeFunctionType = !Contexts[Contexts.size() - 2].IsExpression;
204  bool ProbablyFunctionType = CurrentToken->isOneOf(tok::star, tok::amp);
205  bool HasMultipleLines = false;
206  bool HasMultipleParametersOnALine = false;
207  bool MightBeObjCForRangeLoop =
208  Left->Previous && Left->Previous->is(tok::kw_for);
209  while (CurrentToken) {
210  // LookForDecls is set when "if (" has been seen. Check for
211  // 'identifier' '*' 'identifier' followed by not '=' -- this
212  // '*' has to be a binary operator but determineStarAmpUsage() will
213  // categorize it as an unary operator, so set the right type here.
214  if (LookForDecls && CurrentToken->Next) {
215  FormatToken *Prev = CurrentToken->getPreviousNonComment();
216  if (Prev) {
217  FormatToken *PrevPrev = Prev->getPreviousNonComment();
218  FormatToken *Next = CurrentToken->Next;
219  if (PrevPrev && PrevPrev->is(tok::identifier) &&
220  Prev->isOneOf(tok::star, tok::amp, tok::ampamp) &&
221  CurrentToken->is(tok::identifier) && Next->isNot(tok::equal)) {
222  Prev->Type = TT_BinaryOperator;
223  LookForDecls = false;
224  }
225  }
226  }
227 
228  if (CurrentToken->Previous->is(TT_PointerOrReference) &&
229  CurrentToken->Previous->Previous->isOneOf(tok::l_paren,
230  tok::coloncolon))
231  ProbablyFunctionType = true;
232  if (CurrentToken->is(tok::comma))
233  MightBeFunctionType = false;
234  if (CurrentToken->Previous->is(TT_BinaryOperator))
235  Contexts.back().IsExpression = true;
236  if (CurrentToken->is(tok::r_paren)) {
237  if (MightBeFunctionType && ProbablyFunctionType && CurrentToken->Next &&
238  (CurrentToken->Next->is(tok::l_paren) ||
239  (CurrentToken->Next->is(tok::l_square) && Line.MustBeDeclaration)))
240  Left->Type = TT_FunctionTypeLParen;
241  Left->MatchingParen = CurrentToken;
242  CurrentToken->MatchingParen = Left;
243 
244  if (CurrentToken->Next && CurrentToken->Next->is(tok::l_brace) &&
245  Left->Previous && Left->Previous->is(tok::l_paren)) {
246  // Detect the case where macros are used to generate lambdas or
247  // function bodies, e.g.:
248  // auto my_lambda = MARCO((Type *type, int i) { .. body .. });
249  for (FormatToken *Tok = Left; Tok != CurrentToken; Tok = Tok->Next) {
250  if (Tok->is(TT_BinaryOperator) &&
251  Tok->isOneOf(tok::star, tok::amp, tok::ampamp))
252  Tok->Type = TT_PointerOrReference;
253  }
254  }
255 
256  if (StartsObjCMethodExpr) {
257  CurrentToken->Type = TT_ObjCMethodExpr;
258  if (Contexts.back().FirstObjCSelectorName) {
259  Contexts.back().FirstObjCSelectorName->LongestObjCSelectorName =
260  Contexts.back().LongestObjCSelectorName;
261  }
262  }
263 
264  if (Left->is(TT_AttributeParen))
265  CurrentToken->Type = TT_AttributeParen;
266  if (Left->Previous && Left->Previous->is(TT_JavaAnnotation))
267  CurrentToken->Type = TT_JavaAnnotation;
268  if (Left->Previous && Left->Previous->is(TT_LeadingJavaAnnotation))
269  CurrentToken->Type = TT_LeadingJavaAnnotation;
270 
271  if (!HasMultipleLines)
272  Left->PackingKind = PPK_Inconclusive;
273  else if (HasMultipleParametersOnALine)
274  Left->PackingKind = PPK_BinPacked;
275  else
276  Left->PackingKind = PPK_OnePerLine;
277 
278  next();
279  return true;
280  }
281  if (CurrentToken->isOneOf(tok::r_square, tok::r_brace))
282  return false;
283 
284  if (CurrentToken->is(tok::l_brace))
285  Left->Type = TT_Unknown; // Not TT_ObjCBlockLParen
286  if (CurrentToken->is(tok::comma) && CurrentToken->Next &&
287  !CurrentToken->Next->HasUnescapedNewline &&
288  !CurrentToken->Next->isTrailingComment())
289  HasMultipleParametersOnALine = true;
290  if ((CurrentToken->Previous->isOneOf(tok::kw_const, tok::kw_auto) ||
291  CurrentToken->Previous->isSimpleTypeSpecifier()) &&
292  !CurrentToken->is(tok::l_brace))
293  Contexts.back().IsExpression = false;
294  if (CurrentToken->isOneOf(tok::semi, tok::colon))
295  MightBeObjCForRangeLoop = false;
296  if (MightBeObjCForRangeLoop && CurrentToken->is(Keywords.kw_in))
297  CurrentToken->Type = TT_ObjCForIn;
298  // When we discover a 'new', we set CanBeExpression to 'false' in order to
299  // parse the type correctly. Reset that after a comma.
300  if (CurrentToken->is(tok::comma))
301  Contexts.back().CanBeExpression = true;
302 
303  FormatToken *Tok = CurrentToken;
304  if (!consumeToken())
305  return false;
306  updateParameterCount(Left, Tok);
307  if (CurrentToken && CurrentToken->HasUnescapedNewline)
308  HasMultipleLines = true;
309  }
310  return false;
311  }
312 
313  bool parseSquare() {
314  if (!CurrentToken)
315  return false;
316 
317  // A '[' could be an index subscript (after an identifier or after
318  // ')' or ']'), it could be the start of an Objective-C method
319  // expression, or it could the start of an Objective-C array literal.
320  FormatToken *Left = CurrentToken->Previous;
321  Left->ParentBracket = Contexts.back().ContextKind;
322  FormatToken *Parent = Left->getPreviousNonComment();
323 
324  // Cases where '>' is followed by '['.
325  // In C++, this can happen either in array of templates (foo<int>[10])
326  // or when array is a nested template type (unique_ptr<type1<type2>[]>).
327  bool CppArrayTemplates =
328  Style.isCpp() && Parent &&
329  Parent->is(TT_TemplateCloser) &&
330  (Contexts.back().CanBeExpression || Contexts.back().IsExpression ||
331  Contexts.back().InTemplateArgument);
332 
333  bool StartsObjCMethodExpr =
334  !CppArrayTemplates && Style.isCpp() &&
335  Contexts.back().CanBeExpression && Left->isNot(TT_LambdaLSquare) &&
336  CurrentToken->isNot(tok::l_brace) &&
337  (!Parent ||
338  Parent->isOneOf(tok::colon, tok::l_square, tok::l_paren,
339  tok::kw_return, tok::kw_throw) ||
340  Parent->isUnaryOperator() ||
341  Parent->isOneOf(TT_ObjCForIn, TT_CastRParen) ||
342  getBinOpPrecedence(Parent->Tok.getKind(), true, true) > prec::Unknown);
343  bool ColonFound = false;
344 
345  unsigned BindingIncrease = 1;
346  if (Left->is(TT_Unknown)) {
347  if (StartsObjCMethodExpr) {
348  Left->Type = TT_ObjCMethodExpr;
349  } else if (Style.Language == FormatStyle::LK_JavaScript && Parent &&
350  Contexts.back().ContextKind == tok::l_brace &&
351  Parent->isOneOf(tok::l_brace, tok::comma)) {
352  Left->Type = TT_JsComputedPropertyName;
353  } else if (Style.isCpp() && Contexts.back().ContextKind == tok::l_brace &&
354  Parent && Parent->isOneOf(tok::l_brace, tok::comma)) {
355  Left->Type = TT_DesignatedInitializerLSquare;
356  } else if (CurrentToken->is(tok::r_square) && Parent &&
357  Parent->is(TT_TemplateCloser)) {
358  Left->Type = TT_ArraySubscriptLSquare;
359  } else if (Style.Language == FormatStyle::LK_Proto ||
360  (!CppArrayTemplates && Parent &&
361  Parent->isOneOf(TT_BinaryOperator, TT_TemplateCloser, tok::at,
362  tok::comma, tok::l_paren, tok::l_square,
363  tok::question, tok::colon, tok::kw_return,
364  // Should only be relevant to JavaScript:
365  tok::kw_default))) {
366  Left->Type = TT_ArrayInitializerLSquare;
367  } else {
368  BindingIncrease = 10;
369  Left->Type = TT_ArraySubscriptLSquare;
370  }
371  }
372 
373  ScopedContextCreator ContextCreator(*this, tok::l_square, BindingIncrease);
374  Contexts.back().IsExpression = true;
375  Contexts.back().ColonIsObjCMethodExpr = StartsObjCMethodExpr;
376 
377  while (CurrentToken) {
378  if (CurrentToken->is(tok::r_square)) {
379  if (CurrentToken->Next && CurrentToken->Next->is(tok::l_paren) &&
380  Left->is(TT_ObjCMethodExpr)) {
381  // An ObjC method call is rarely followed by an open parenthesis.
382  // FIXME: Do we incorrectly label ":" with this?
383  StartsObjCMethodExpr = false;
384  Left->Type = TT_Unknown;
385  }
386  if (StartsObjCMethodExpr && CurrentToken->Previous != Left) {
387  CurrentToken->Type = TT_ObjCMethodExpr;
388  // determineStarAmpUsage() thinks that '*' '[' is allocating an
389  // array of pointers, but if '[' starts a selector then '*' is a
390  // binary operator.
391  if (Parent && Parent->is(TT_PointerOrReference))
392  Parent->Type = TT_BinaryOperator;
393  }
394  Left->MatchingParen = CurrentToken;
395  CurrentToken->MatchingParen = Left;
396  if (Contexts.back().FirstObjCSelectorName) {
397  Contexts.back().FirstObjCSelectorName->LongestObjCSelectorName =
398  Contexts.back().LongestObjCSelectorName;
399  if (Left->BlockParameterCount > 1)
400  Contexts.back().FirstObjCSelectorName->LongestObjCSelectorName = 0;
401  }
402  next();
403  return true;
404  }
405  if (CurrentToken->isOneOf(tok::r_paren, tok::r_brace))
406  return false;
407  if (CurrentToken->is(tok::colon)) {
408  if (Left->isOneOf(TT_ArraySubscriptLSquare,
409  TT_DesignatedInitializerLSquare)) {
410  Left->Type = TT_ObjCMethodExpr;
411  StartsObjCMethodExpr = true;
412  Contexts.back().ColonIsObjCMethodExpr = true;
413  if (Parent && Parent->is(tok::r_paren))
414  Parent->Type = TT_CastRParen;
415  }
416  ColonFound = true;
417  }
418  if (CurrentToken->is(tok::comma) && Left->is(TT_ObjCMethodExpr) &&
419  !ColonFound)
420  Left->Type = TT_ArrayInitializerLSquare;
421  FormatToken *Tok = CurrentToken;
422  if (!consumeToken())
423  return false;
424  updateParameterCount(Left, Tok);
425  }
426  return false;
427  }
428 
429  bool parseBrace() {
430  if (CurrentToken) {
431  FormatToken *Left = CurrentToken->Previous;
432  Left->ParentBracket = Contexts.back().ContextKind;
433 
434  if (Contexts.back().CaretFound)
435  Left->Type = TT_ObjCBlockLBrace;
436  Contexts.back().CaretFound = false;
437 
438  ScopedContextCreator ContextCreator(*this, tok::l_brace, 1);
439  Contexts.back().ColonIsDictLiteral = true;
440  if (Left->BlockKind == BK_BracedInit)
441  Contexts.back().IsExpression = true;
442 
443  while (CurrentToken) {
444  if (CurrentToken->is(tok::r_brace)) {
445  Left->MatchingParen = CurrentToken;
446  CurrentToken->MatchingParen = Left;
447  next();
448  return true;
449  }
450  if (CurrentToken->isOneOf(tok::r_paren, tok::r_square))
451  return false;
452  updateParameterCount(Left, CurrentToken);
453  if (CurrentToken->isOneOf(tok::colon, tok::l_brace, tok::less)) {
454  FormatToken *Previous = CurrentToken->getPreviousNonComment();
455  if (((CurrentToken->is(tok::colon) &&
456  (!Contexts.back().ColonIsDictLiteral || !Style.isCpp())) ||
457  Style.Language == FormatStyle::LK_Proto ||
458  Style.Language == FormatStyle::LK_TextProto) &&
459  (Previous->Tok.getIdentifierInfo() ||
460  Previous->is(tok::string_literal)))
461  Previous->Type = TT_SelectorName;
462  if (CurrentToken->is(tok::colon) ||
463  Style.Language == FormatStyle::LK_JavaScript)
464  Left->Type = TT_DictLiteral;
465  }
466  if (CurrentToken->is(tok::comma) &&
467  Style.Language == FormatStyle::LK_JavaScript)
468  Left->Type = TT_DictLiteral;
469  if (!consumeToken())
470  return false;
471  }
472  }
473  return true;
474  }
475 
476  void updateParameterCount(FormatToken *Left, FormatToken *Current) {
477  if (Current->is(tok::l_brace) && Current->BlockKind == BK_Block)
478  ++Left->BlockParameterCount;
479  if (Current->is(tok::comma)) {
480  ++Left->ParameterCount;
481  if (!Left->Role)
482  Left->Role.reset(new CommaSeparatedList(Style));
483  Left->Role->CommaFound(Current);
484  } else if (Left->ParameterCount == 0 && Current->isNot(tok::comment)) {
485  Left->ParameterCount = 1;
486  }
487  }
488 
489  bool parseConditional() {
490  while (CurrentToken) {
491  if (CurrentToken->is(tok::colon)) {
492  CurrentToken->Type = TT_ConditionalExpr;
493  next();
494  return true;
495  }
496  if (!consumeToken())
497  return false;
498  }
499  return false;
500  }
501 
502  bool parseTemplateDeclaration() {
503  if (CurrentToken && CurrentToken->is(tok::less)) {
504  CurrentToken->Type = TT_TemplateOpener;
505  next();
506  if (!parseAngle())
507  return false;
508  if (CurrentToken)
509  CurrentToken->Previous->ClosesTemplateDeclaration = true;
510  return true;
511  }
512  return false;
513  }
514 
515  bool consumeToken() {
516  FormatToken *Tok = CurrentToken;
517  next();
518  switch (Tok->Tok.getKind()) {
519  case tok::plus:
520  case tok::minus:
521  if (!Tok->Previous && Line.MustBeDeclaration)
522  Tok->Type = TT_ObjCMethodSpecifier;
523  break;
524  case tok::colon:
525  if (!Tok->Previous)
526  return false;
527  // Colons from ?: are handled in parseConditional().
528  if (Style.Language == FormatStyle::LK_JavaScript) {
529  if (Contexts.back().ColonIsForRangeExpr || // colon in for loop
530  (Contexts.size() == 1 && // switch/case labels
531  !Line.First->isOneOf(tok::kw_enum, tok::kw_case)) ||
532  Contexts.back().ContextKind == tok::l_paren || // function params
533  Contexts.back().ContextKind == tok::l_square || // array type
534  (Contexts.size() == 1 &&
535  Line.MustBeDeclaration)) { // method/property declaration
536  Tok->Type = TT_JsTypeColon;
537  break;
538  }
539  }
540  if (Contexts.back().ColonIsDictLiteral ||
541  Style.Language == FormatStyle::LK_Proto ||
542  Style.Language == FormatStyle::LK_TextProto) {
543  Tok->Type = TT_DictLiteral;
544  if (Style.Language == FormatStyle::LK_TextProto) {
545  if (FormatToken *Previous = Tok->getPreviousNonComment())
546  Previous->Type = TT_SelectorName;
547  }
548  } else if (Contexts.back().ColonIsObjCMethodExpr ||
549  Line.startsWith(TT_ObjCMethodSpecifier)) {
550  Tok->Type = TT_ObjCMethodExpr;
551  const FormatToken *BeforePrevious = Tok->Previous->Previous;
552  if (!BeforePrevious ||
553  !(BeforePrevious->is(TT_CastRParen) ||
554  (BeforePrevious->is(TT_ObjCMethodExpr) &&
555  BeforePrevious->is(tok::colon))) ||
556  BeforePrevious->is(tok::r_square) ||
557  Contexts.back().LongestObjCSelectorName == 0) {
558  Tok->Previous->Type = TT_SelectorName;
559  if (Tok->Previous->ColumnWidth >
560  Contexts.back().LongestObjCSelectorName)
561  Contexts.back().LongestObjCSelectorName =
562  Tok->Previous->ColumnWidth;
563  if (!Contexts.back().FirstObjCSelectorName)
564  Contexts.back().FirstObjCSelectorName = Tok->Previous;
565  }
566  } else if (Contexts.back().ColonIsForRangeExpr) {
567  Tok->Type = TT_RangeBasedForLoopColon;
568  } else if (CurrentToken && CurrentToken->is(tok::numeric_constant)) {
569  Tok->Type = TT_BitFieldColon;
570  } else if (Contexts.size() == 1 &&
571  !Line.First->isOneOf(tok::kw_enum, tok::kw_case)) {
572  if (Tok->getPreviousNonComment()->isOneOf(tok::r_paren,
573  tok::kw_noexcept))
574  Tok->Type = TT_CtorInitializerColon;
575  else
576  Tok->Type = TT_InheritanceColon;
577  } else if (Tok->Previous->is(tok::identifier) && Tok->Next &&
578  Tok->Next->isOneOf(tok::r_paren, tok::comma)) {
579  // This handles a special macro in ObjC code where selectors including
580  // the colon are passed as macro arguments.
581  Tok->Type = TT_ObjCMethodExpr;
582  } else if (Contexts.back().ContextKind == tok::l_paren) {
583  Tok->Type = TT_InlineASMColon;
584  }
585  break;
586  case tok::pipe:
587  case tok::amp:
588  // | and & in declarations/type expressions represent union and
589  // intersection types, respectively.
590  if (Style.Language == FormatStyle::LK_JavaScript &&
591  !Contexts.back().IsExpression)
592  Tok->Type = TT_JsTypeOperator;
593  break;
594  case tok::kw_if:
595  case tok::kw_while:
596  if (Tok->is(tok::kw_if) && CurrentToken && CurrentToken->is(tok::kw_constexpr))
597  next();
598  if (CurrentToken && CurrentToken->is(tok::l_paren)) {
599  next();
600  if (!parseParens(/*LookForDecls=*/true))
601  return false;
602  }
603  break;
604  case tok::kw_for:
605  if (Style.Language == FormatStyle::LK_JavaScript) {
606  if (Tok->Previous && Tok->Previous->is(tok::period))
607  break;
608  // JS' for await ( ...
609  if (CurrentToken && CurrentToken->is(Keywords.kw_await))
610  next();
611  }
612  Contexts.back().ColonIsForRangeExpr = true;
613  next();
614  if (!parseParens())
615  return false;
616  break;
617  case tok::l_paren:
618  // When faced with 'operator()()', the kw_operator handler incorrectly
619  // marks the first l_paren as a OverloadedOperatorLParen. Here, we make
620  // the first two parens OverloadedOperators and the second l_paren an
621  // OverloadedOperatorLParen.
622  if (Tok->Previous &&
623  Tok->Previous->is(tok::r_paren) &&
624  Tok->Previous->MatchingParen &&
625  Tok->Previous->MatchingParen->is(TT_OverloadedOperatorLParen)) {
626  Tok->Previous->Type = TT_OverloadedOperator;
627  Tok->Previous->MatchingParen->Type = TT_OverloadedOperator;
628  Tok->Type = TT_OverloadedOperatorLParen;
629  }
630 
631  if (!parseParens())
632  return false;
633  if (Line.MustBeDeclaration && Contexts.size() == 1 &&
634  !Contexts.back().IsExpression && !Line.startsWith(TT_ObjCProperty) &&
635  (!Tok->Previous ||
636  !Tok->Previous->isOneOf(tok::kw_decltype, tok::kw___attribute,
637  TT_LeadingJavaAnnotation)))
638  Line.MightBeFunctionDecl = true;
639  break;
640  case tok::l_square:
641  if (!parseSquare())
642  return false;
643  break;
644  case tok::l_brace:
645  if (Style.Language == FormatStyle::LK_TextProto) {
646  FormatToken *Previous =Tok->getPreviousNonComment();
647  if (Previous && Previous->Type != TT_DictLiteral)
648  Previous->Type = TT_SelectorName;
649  }
650  if (!parseBrace())
651  return false;
652  break;
653  case tok::less:
654  if (parseAngle()) {
655  Tok->Type = TT_TemplateOpener;
656  if (Style.Language == FormatStyle::LK_TextProto) {
657  FormatToken *Previous = Tok->getPreviousNonComment();
658  if (Previous && Previous->Type != TT_DictLiteral)
659  Previous->Type = TT_SelectorName;
660  }
661  } else {
662  Tok->Type = TT_BinaryOperator;
663  NonTemplateLess.insert(Tok);
664  CurrentToken = Tok;
665  next();
666  }
667  break;
668  case tok::r_paren:
669  case tok::r_square:
670  return false;
671  case tok::r_brace:
672  // Lines can start with '}'.
673  if (Tok->Previous)
674  return false;
675  break;
676  case tok::greater:
677  Tok->Type = TT_BinaryOperator;
678  break;
679  case tok::kw_operator:
680  while (CurrentToken &&
681  !CurrentToken->isOneOf(tok::l_paren, tok::semi, tok::r_paren)) {
682  if (CurrentToken->isOneOf(tok::star, tok::amp))
683  CurrentToken->Type = TT_PointerOrReference;
684  consumeToken();
685  if (CurrentToken &&
686  CurrentToken->Previous->isOneOf(TT_BinaryOperator, tok::comma))
687  CurrentToken->Previous->Type = TT_OverloadedOperator;
688  }
689  if (CurrentToken) {
690  CurrentToken->Type = TT_OverloadedOperatorLParen;
691  if (CurrentToken->Previous->is(TT_BinaryOperator))
692  CurrentToken->Previous->Type = TT_OverloadedOperator;
693  }
694  break;
695  case tok::question:
696  if (Style.Language == FormatStyle::LK_JavaScript && Tok->Next &&
697  Tok->Next->isOneOf(tok::semi, tok::comma, tok::colon, tok::r_paren,
698  tok::r_brace)) {
699  // Question marks before semicolons, colons, etc. indicate optional
700  // types (fields, parameters), e.g.
701  // function(x?: string, y?) {...}
702  // class X { y?; }
703  Tok->Type = TT_JsTypeOptionalQuestion;
704  break;
705  }
706  // Declarations cannot be conditional expressions, this can only be part
707  // of a type declaration.
708  if (Line.MustBeDeclaration && !Contexts.back().IsExpression &&
709  Style.Language == FormatStyle::LK_JavaScript)
710  break;
711  parseConditional();
712  break;
713  case tok::kw_template:
714  parseTemplateDeclaration();
715  break;
716  case tok::comma:
717  if (Contexts.back().InCtorInitializer)
718  Tok->Type = TT_CtorInitializerComma;
719  else if (Contexts.back().InInheritanceList)
720  Tok->Type = TT_InheritanceComma;
721  else if (Contexts.back().FirstStartOfName &&
722  (Contexts.size() == 1 || Line.startsWith(tok::kw_for))) {
723  Contexts.back().FirstStartOfName->PartOfMultiVariableDeclStmt = true;
724  Line.IsMultiVariableDeclStmt = true;
725  }
726  if (Contexts.back().IsForEachMacro)
727  Contexts.back().IsExpression = true;
728  break;
729  case tok::identifier:
730  if (Tok->isOneOf(Keywords.kw___has_include,
731  Keywords.kw___has_include_next)) {
732  parseHasInclude();
733  }
734  break;
735  default:
736  break;
737  }
738  return true;
739  }
740 
741  void parseIncludeDirective() {
742  if (CurrentToken && CurrentToken->is(tok::less)) {
743  next();
744  while (CurrentToken) {
745  // Mark tokens up to the trailing line comments as implicit string
746  // literals.
747  if (CurrentToken->isNot(tok::comment) &&
748  !CurrentToken->TokenText.startswith("//"))
749  CurrentToken->Type = TT_ImplicitStringLiteral;
750  next();
751  }
752  }
753  }
754 
755  void parseWarningOrError() {
756  next();
757  // We still want to format the whitespace left of the first token of the
758  // warning or error.
759  next();
760  while (CurrentToken) {
761  CurrentToken->Type = TT_ImplicitStringLiteral;
762  next();
763  }
764  }
765 
766  void parsePragma() {
767  next(); // Consume "pragma".
768  if (CurrentToken &&
769  CurrentToken->isOneOf(Keywords.kw_mark, Keywords.kw_option)) {
770  bool IsMark = CurrentToken->is(Keywords.kw_mark);
771  next(); // Consume "mark".
772  next(); // Consume first token (so we fix leading whitespace).
773  while (CurrentToken) {
774  if (IsMark || CurrentToken->Previous->is(TT_BinaryOperator))
775  CurrentToken->Type = TT_ImplicitStringLiteral;
776  next();
777  }
778  }
779  }
780 
781  void parseHasInclude() {
782  if (!CurrentToken || !CurrentToken->is(tok::l_paren))
783  return;
784  next(); // '('
785  parseIncludeDirective();
786  next(); // ')'
787  }
788 
789  LineType parsePreprocessorDirective() {
790  bool IsFirstToken = CurrentToken->IsFirst;
792  next();
793  if (!CurrentToken)
794  return Type;
795 
796  if (Style.Language == FormatStyle::LK_JavaScript && IsFirstToken) {
797  // JavaScript files can contain shebang lines of the form:
798  // #!/usr/bin/env node
799  // Treat these like C++ #include directives.
800  while (CurrentToken) {
801  // Tokens cannot be comments here.
802  CurrentToken->Type = TT_ImplicitStringLiteral;
803  next();
804  }
805  return LT_ImportStatement;
806  }
807 
808  if (CurrentToken->Tok.is(tok::numeric_constant)) {
809  CurrentToken->SpacesRequiredBefore = 1;
810  return Type;
811  }
812  // Hashes in the middle of a line can lead to any strange token
813  // sequence.
814  if (!CurrentToken->Tok.getIdentifierInfo())
815  return Type;
816  switch (CurrentToken->Tok.getIdentifierInfo()->getPPKeywordID()) {
817  case tok::pp_include:
818  case tok::pp_include_next:
819  case tok::pp_import:
820  next();
821  parseIncludeDirective();
822  Type = LT_ImportStatement;
823  break;
824  case tok::pp_error:
825  case tok::pp_warning:
826  parseWarningOrError();
827  break;
828  case tok::pp_pragma:
829  parsePragma();
830  break;
831  case tok::pp_if:
832  case tok::pp_elif:
833  Contexts.back().IsExpression = true;
834  parseLine();
835  break;
836  default:
837  break;
838  }
839  while (CurrentToken) {
840  FormatToken *Tok = CurrentToken;
841  next();
842  if (Tok->is(tok::l_paren))
843  parseParens();
844  else if (Tok->isOneOf(Keywords.kw___has_include,
845  Keywords.kw___has_include_next))
846  parseHasInclude();
847  }
848  return Type;
849  }
850 
851 public:
852  LineType parseLine() {
853  NonTemplateLess.clear();
854  if (CurrentToken->is(tok::hash))
855  return parsePreprocessorDirective();
856 
857  // Directly allow to 'import <string-literal>' to support protocol buffer
858  // definitions (code.google.com/p/protobuf) or missing "#" (either way we
859  // should not break the line).
860  IdentifierInfo *Info = CurrentToken->Tok.getIdentifierInfo();
861  if ((Style.Language == FormatStyle::LK_Java &&
862  CurrentToken->is(Keywords.kw_package)) ||
863  (Info && Info->getPPKeywordID() == tok::pp_import &&
864  CurrentToken->Next &&
865  CurrentToken->Next->isOneOf(tok::string_literal, tok::identifier,
866  tok::kw_static))) {
867  next();
868  parseIncludeDirective();
869  return LT_ImportStatement;
870  }
871 
872  // If this line starts and ends in '<' and '>', respectively, it is likely
873  // part of "#define <a/b.h>".
874  if (CurrentToken->is(tok::less) && Line.Last->is(tok::greater)) {
875  parseIncludeDirective();
876  return LT_ImportStatement;
877  }
878 
879  // In .proto files, top-level options are very similar to import statements
880  // and should not be line-wrapped.
881  if (Style.Language == FormatStyle::LK_Proto && Line.Level == 0 &&
882  CurrentToken->is(Keywords.kw_option)) {
883  next();
884  if (CurrentToken && CurrentToken->is(tok::identifier))
885  return LT_ImportStatement;
886  }
887 
888  bool KeywordVirtualFound = false;
889  bool ImportStatement = false;
890 
891  // import {...} from '...';
892  if (Style.Language == FormatStyle::LK_JavaScript &&
893  CurrentToken->is(Keywords.kw_import))
894  ImportStatement = true;
895 
896  while (CurrentToken) {
897  if (CurrentToken->is(tok::kw_virtual))
898  KeywordVirtualFound = true;
899  if (Style.Language == FormatStyle::LK_JavaScript) {
900  // export {...} from '...';
901  // An export followed by "from 'some string';" is a re-export from
902  // another module identified by a URI and is treated as a
903  // LT_ImportStatement (i.e. prevent wraps on it for long URIs).
904  // Just "export {...};" or "export class ..." should not be treated as
905  // an import in this sense.
906  if (Line.First->is(tok::kw_export) &&
907  CurrentToken->is(Keywords.kw_from) && CurrentToken->Next &&
908  CurrentToken->Next->isStringLiteral())
909  ImportStatement = true;
910  if (isClosureImportStatement(*CurrentToken))
911  ImportStatement = true;
912  }
913  if (!consumeToken())
914  return LT_Invalid;
915  }
916  if (KeywordVirtualFound)
917  return LT_VirtualFunctionDecl;
918  if (ImportStatement)
919  return LT_ImportStatement;
920 
921  if (Line.startsWith(TT_ObjCMethodSpecifier)) {
922  if (Contexts.back().FirstObjCSelectorName)
923  Contexts.back().FirstObjCSelectorName->LongestObjCSelectorName =
924  Contexts.back().LongestObjCSelectorName;
925  return LT_ObjCMethodDecl;
926  }
927 
928  return LT_Other;
929  }
930 
931 private:
932  bool isClosureImportStatement(const FormatToken &Tok) {
933  // FIXME: Closure-library specific stuff should not be hard-coded but be
934  // configurable.
935  return Tok.TokenText == "goog" && Tok.Next && Tok.Next->is(tok::period) &&
936  Tok.Next->Next && (Tok.Next->Next->TokenText == "module" ||
937  Tok.Next->Next->TokenText == "provide" ||
938  Tok.Next->Next->TokenText == "require" ||
939  Tok.Next->Next->TokenText == "setTestOnly" ||
940  Tok.Next->Next->TokenText == "forwardDeclare") &&
941  Tok.Next->Next->Next && Tok.Next->Next->Next->is(tok::l_paren);
942  }
943 
944  void resetTokenMetadata(FormatToken *Token) {
945  if (!Token)
946  return;
947 
948  // Reset token type in case we have already looked at it and then
949  // recovered from an error (e.g. failure to find the matching >).
950  if (!CurrentToken->isOneOf(TT_LambdaLSquare, TT_ForEachMacro,
951  TT_FunctionLBrace, TT_ImplicitStringLiteral,
952  TT_InlineASMBrace, TT_JsFatArrow, TT_LambdaArrow,
953  TT_OverloadedOperator, TT_RegexLiteral,
954  TT_TemplateString, TT_ObjCStringLiteral))
955  CurrentToken->Type = TT_Unknown;
956  CurrentToken->Role.reset();
957  CurrentToken->MatchingParen = nullptr;
958  CurrentToken->FakeLParens.clear();
959  CurrentToken->FakeRParens = 0;
960  }
961 
962  void next() {
963  if (CurrentToken) {
964  CurrentToken->NestingLevel = Contexts.size() - 1;
965  CurrentToken->BindingStrength = Contexts.back().BindingStrength;
966  modifyContext(*CurrentToken);
967  determineTokenType(*CurrentToken);
968  CurrentToken = CurrentToken->Next;
969  }
970 
971  resetTokenMetadata(CurrentToken);
972  }
973 
974  /// \brief A struct to hold information valid in a specific context, e.g.
975  /// a pair of parenthesis.
976  struct Context {
978  bool IsExpression)
979  : ContextKind(ContextKind), BindingStrength(BindingStrength),
980  IsExpression(IsExpression) {}
981 
983  unsigned BindingStrength;
986  bool ColonIsForRangeExpr = false;
987  bool ColonIsDictLiteral = false;
988  bool ColonIsObjCMethodExpr = false;
989  FormatToken *FirstObjCSelectorName = nullptr;
990  FormatToken *FirstStartOfName = nullptr;
991  bool CanBeExpression = true;
992  bool InTemplateArgument = false;
993  bool InCtorInitializer = false;
994  bool InInheritanceList = false;
995  bool CaretFound = false;
996  bool IsForEachMacro = false;
997  };
998 
999  /// \brief Puts a new \c Context onto the stack \c Contexts for the lifetime
1000  /// of each instance.
1001  struct ScopedContextCreator {
1002  AnnotatingParser &P;
1003 
1004  ScopedContextCreator(AnnotatingParser &P, tok::TokenKind ContextKind,
1005  unsigned Increase)
1006  : P(P) {
1007  P.Contexts.push_back(Context(ContextKind,
1008  P.Contexts.back().BindingStrength + Increase,
1009  P.Contexts.back().IsExpression));
1010  }
1011 
1012  ~ScopedContextCreator() { P.Contexts.pop_back(); }
1013  };
1014 
1015  void modifyContext(const FormatToken &Current) {
1016  if (Current.getPrecedence() == prec::Assignment &&
1017  !Line.First->isOneOf(tok::kw_template, tok::kw_using, tok::kw_return) &&
1018  // Type aliases use `type X = ...;` in TypeScript and can be exported
1019  // using `export type ...`.
1020  !(Style.Language == FormatStyle::LK_JavaScript &&
1021  (Line.startsWith(Keywords.kw_type, tok::identifier) ||
1022  Line.startsWith(tok::kw_export, Keywords.kw_type,
1023  tok::identifier))) &&
1024  (!Current.Previous || Current.Previous->isNot(tok::kw_operator))) {
1025  Contexts.back().IsExpression = true;
1026  if (!Line.startsWith(TT_UnaryOperator)) {
1027  for (FormatToken *Previous = Current.Previous;
1028  Previous && Previous->Previous &&
1029  !Previous->Previous->isOneOf(tok::comma, tok::semi);
1030  Previous = Previous->Previous) {
1031  if (Previous->isOneOf(tok::r_square, tok::r_paren)) {
1032  Previous = Previous->MatchingParen;
1033  if (!Previous)
1034  break;
1035  }
1036  if (Previous->opensScope())
1037  break;
1038  if (Previous->isOneOf(TT_BinaryOperator, TT_UnaryOperator) &&
1039  Previous->isOneOf(tok::star, tok::amp, tok::ampamp) &&
1040  Previous->Previous && Previous->Previous->isNot(tok::equal))
1041  Previous->Type = TT_PointerOrReference;
1042  }
1043  }
1044  } else if (Current.is(tok::lessless) &&
1045  (!Current.Previous || !Current.Previous->is(tok::kw_operator))) {
1046  Contexts.back().IsExpression = true;
1047  } else if (Current.isOneOf(tok::kw_return, tok::kw_throw)) {
1048  Contexts.back().IsExpression = true;
1049  } else if (Current.is(TT_TrailingReturnArrow)) {
1050  Contexts.back().IsExpression = false;
1051  } else if (Current.is(TT_LambdaArrow) || Current.is(Keywords.kw_assert)) {
1052  Contexts.back().IsExpression = Style.Language == FormatStyle::LK_Java;
1053  } else if (Current.Previous &&
1054  Current.Previous->is(TT_CtorInitializerColon)) {
1055  Contexts.back().IsExpression = true;
1056  Contexts.back().InCtorInitializer = true;
1057  } else if (Current.Previous &&
1058  Current.Previous->is(TT_InheritanceColon)) {
1059  Contexts.back().InInheritanceList = true;
1060  } else if (Current.isOneOf(tok::r_paren, tok::greater, tok::comma)) {
1061  for (FormatToken *Previous = Current.Previous;
1062  Previous && Previous->isOneOf(tok::star, tok::amp);
1063  Previous = Previous->Previous)
1064  Previous->Type = TT_PointerOrReference;
1065  if (Line.MustBeDeclaration && !Contexts.front().InCtorInitializer)
1066  Contexts.back().IsExpression = false;
1067  } else if (Current.is(tok::kw_new)) {
1068  Contexts.back().CanBeExpression = false;
1069  } else if (Current.isOneOf(tok::semi, tok::exclaim)) {
1070  // This should be the condition or increment in a for-loop.
1071  Contexts.back().IsExpression = true;
1072  }
1073  }
1074 
1075  void determineTokenType(FormatToken &Current) {
1076  if (!Current.is(TT_Unknown))
1077  // The token type is already known.
1078  return;
1079 
1080  if (Style.Language == FormatStyle::LK_JavaScript) {
1081  if (Current.is(tok::exclaim)) {
1082  if (Current.Previous &&
1083  (Current.Previous->isOneOf(tok::identifier, tok::kw_namespace,
1084  tok::r_paren, tok::r_square,
1085  tok::r_brace) ||
1086  Current.Previous->Tok.isLiteral())) {
1087  Current.Type = TT_JsNonNullAssertion;
1088  return;
1089  }
1090  if (Current.Next &&
1091  Current.Next->isOneOf(TT_BinaryOperator, Keywords.kw_as)) {
1092  Current.Type = TT_JsNonNullAssertion;
1093  return;
1094  }
1095  }
1096  }
1097 
1098  // Line.MightBeFunctionDecl can only be true after the parentheses of a
1099  // function declaration have been found. In this case, 'Current' is a
1100  // trailing token of this declaration and thus cannot be a name.
1101  if (Current.is(Keywords.kw_instanceof)) {
1102  Current.Type = TT_BinaryOperator;
1103  } else if (isStartOfName(Current) &&
1104  (!Line.MightBeFunctionDecl || Current.NestingLevel != 0)) {
1105  Contexts.back().FirstStartOfName = &Current;
1106  Current.Type = TT_StartOfName;
1107  } else if (Current.isOneOf(tok::kw_auto, tok::kw___auto_type)) {
1108  AutoFound = true;
1109  } else if (Current.is(tok::arrow) &&
1110  Style.Language == FormatStyle::LK_Java) {
1111  Current.Type = TT_LambdaArrow;
1112  } else if (Current.is(tok::arrow) && AutoFound && Line.MustBeDeclaration &&
1113  Current.NestingLevel == 0) {
1114  Current.Type = TT_TrailingReturnArrow;
1115  } else if (Current.isOneOf(tok::star, tok::amp, tok::ampamp)) {
1116  Current.Type =
1117  determineStarAmpUsage(Current, Contexts.back().CanBeExpression &&
1118  Contexts.back().IsExpression,
1119  Contexts.back().InTemplateArgument);
1120  } else if (Current.isOneOf(tok::minus, tok::plus, tok::caret)) {
1121  Current.Type = determinePlusMinusCaretUsage(Current);
1122  if (Current.is(TT_UnaryOperator) && Current.is(tok::caret))
1123  Contexts.back().CaretFound = true;
1124  } else if (Current.isOneOf(tok::minusminus, tok::plusplus)) {
1125  Current.Type = determineIncrementUsage(Current);
1126  } else if (Current.isOneOf(tok::exclaim, tok::tilde)) {
1127  Current.Type = TT_UnaryOperator;
1128  } else if (Current.is(tok::question)) {
1129  if (Style.Language == FormatStyle::LK_JavaScript &&
1130  Line.MustBeDeclaration && !Contexts.back().IsExpression) {
1131  // In JavaScript, `interface X { foo?(): bar; }` is an optional method
1132  // on the interface, not a ternary expression.
1133  Current.Type = TT_JsTypeOptionalQuestion;
1134  } else {
1135  Current.Type = TT_ConditionalExpr;
1136  }
1137  } else if (Current.isBinaryOperator() &&
1138  (!Current.Previous || Current.Previous->isNot(tok::l_square))) {
1139  Current.Type = TT_BinaryOperator;
1140  } else if (Current.is(tok::comment)) {
1141  if (Current.TokenText.startswith("/*")) {
1142  if (Current.TokenText.endswith("*/"))
1143  Current.Type = TT_BlockComment;
1144  else
1145  // The lexer has for some reason determined a comment here. But we
1146  // cannot really handle it, if it isn't properly terminated.
1147  Current.Tok.setKind(tok::unknown);
1148  } else {
1149  Current.Type = TT_LineComment;
1150  }
1151  } else if (Current.is(tok::r_paren)) {
1152  if (rParenEndsCast(Current))
1153  Current.Type = TT_CastRParen;
1154  if (Current.MatchingParen && Current.Next &&
1155  !Current.Next->isBinaryOperator() &&
1156  !Current.Next->isOneOf(tok::semi, tok::colon, tok::l_brace,
1157  tok::comma, tok::period, tok::arrow,
1158  tok::coloncolon))
1159  if (FormatToken *AfterParen = Current.MatchingParen->Next) {
1160  // Make sure this isn't the return type of an Obj-C block declaration
1161  if (AfterParen->Tok.isNot(tok::caret)) {
1162  if (FormatToken *BeforeParen = Current.MatchingParen->Previous)
1163  if (BeforeParen->is(tok::identifier) &&
1164  BeforeParen->TokenText == BeforeParen->TokenText.upper() &&
1165  (!BeforeParen->Previous ||
1166  BeforeParen->Previous->ClosesTemplateDeclaration))
1167  Current.Type = TT_FunctionAnnotationRParen;
1168  }
1169  }
1170  } else if (Current.is(tok::at) && Current.Next &&
1171  Style.Language != FormatStyle::LK_JavaScript &&
1172  Style.Language != FormatStyle::LK_Java) {
1173  // In Java & JavaScript, "@..." is a decorator or annotation. In ObjC, it
1174  // marks declarations and properties that need special formatting.
1175  switch (Current.Next->Tok.getObjCKeywordID()) {
1176  case tok::objc_interface:
1177  case tok::objc_implementation:
1178  case tok::objc_protocol:
1179  Current.Type = TT_ObjCDecl;
1180  break;
1181  case tok::objc_property:
1182  Current.Type = TT_ObjCProperty;
1183  break;
1184  default:
1185  break;
1186  }
1187  } else if (Current.is(tok::period)) {
1188  FormatToken *PreviousNoComment = Current.getPreviousNonComment();
1189  if (PreviousNoComment &&
1190  PreviousNoComment->isOneOf(tok::comma, tok::l_brace))
1191  Current.Type = TT_DesignatedInitializerPeriod;
1192  else if (Style.Language == FormatStyle::LK_Java && Current.Previous &&
1193  Current.Previous->isOneOf(TT_JavaAnnotation,
1194  TT_LeadingJavaAnnotation)) {
1195  Current.Type = Current.Previous->Type;
1196  }
1197  } else if (Current.isOneOf(tok::identifier, tok::kw_const) &&
1198  Current.Previous &&
1199  !Current.Previous->isOneOf(tok::equal, tok::at) &&
1200  Line.MightBeFunctionDecl && Contexts.size() == 1) {
1201  // Line.MightBeFunctionDecl can only be true after the parentheses of a
1202  // function declaration have been found.
1203  Current.Type = TT_TrailingAnnotation;
1204  } else if ((Style.Language == FormatStyle::LK_Java ||
1205  Style.Language == FormatStyle::LK_JavaScript) &&
1206  Current.Previous) {
1207  if (Current.Previous->is(tok::at) &&
1208  Current.isNot(Keywords.kw_interface)) {
1209  const FormatToken &AtToken = *Current.Previous;
1210  const FormatToken *Previous = AtToken.getPreviousNonComment();
1211  if (!Previous || Previous->is(TT_LeadingJavaAnnotation))
1212  Current.Type = TT_LeadingJavaAnnotation;
1213  else
1214  Current.Type = TT_JavaAnnotation;
1215  } else if (Current.Previous->is(tok::period) &&
1216  Current.Previous->isOneOf(TT_JavaAnnotation,
1217  TT_LeadingJavaAnnotation)) {
1218  Current.Type = Current.Previous->Type;
1219  }
1220  }
1221  }
1222 
1223  /// \brief Take a guess at whether \p Tok starts a name of a function or
1224  /// variable declaration.
1225  ///
1226  /// This is a heuristic based on whether \p Tok is an identifier following
1227  /// something that is likely a type.
1228  bool isStartOfName(const FormatToken &Tok) {
1229  if (Tok.isNot(tok::identifier) || !Tok.Previous)
1230  return false;
1231 
1232  if (Tok.Previous->isOneOf(TT_LeadingJavaAnnotation, Keywords.kw_instanceof,
1233  Keywords.kw_as))
1234  return false;
1235  if (Style.Language == FormatStyle::LK_JavaScript &&
1236  Tok.Previous->is(Keywords.kw_in))
1237  return false;
1238 
1239  // Skip "const" as it does not have an influence on whether this is a name.
1240  FormatToken *PreviousNotConst = Tok.getPreviousNonComment();
1241  while (PreviousNotConst && PreviousNotConst->is(tok::kw_const))
1242  PreviousNotConst = PreviousNotConst->getPreviousNonComment();
1243 
1244  if (!PreviousNotConst)
1245  return false;
1246 
1247  bool IsPPKeyword = PreviousNotConst->is(tok::identifier) &&
1248  PreviousNotConst->Previous &&
1249  PreviousNotConst->Previous->is(tok::hash);
1250 
1251  if (PreviousNotConst->is(TT_TemplateCloser))
1252  return PreviousNotConst && PreviousNotConst->MatchingParen &&
1253  PreviousNotConst->MatchingParen->Previous &&
1254  PreviousNotConst->MatchingParen->Previous->isNot(tok::period) &&
1255  PreviousNotConst->MatchingParen->Previous->isNot(tok::kw_template);
1256 
1257  if (PreviousNotConst->is(tok::r_paren) && PreviousNotConst->MatchingParen &&
1258  PreviousNotConst->MatchingParen->Previous &&
1259  PreviousNotConst->MatchingParen->Previous->is(tok::kw_decltype))
1260  return true;
1261 
1262  return (!IsPPKeyword &&
1263  PreviousNotConst->isOneOf(tok::identifier, tok::kw_auto)) ||
1264  PreviousNotConst->is(TT_PointerOrReference) ||
1265  PreviousNotConst->isSimpleTypeSpecifier();
1266  }
1267 
1268  /// \brief Determine whether ')' is ending a cast.
1269  bool rParenEndsCast(const FormatToken &Tok) {
1270  // C-style casts are only used in C++ and Java.
1271  if (!Style.isCpp() && Style.Language != FormatStyle::LK_Java)
1272  return false;
1273 
1274  // Empty parens aren't casts and there are no casts at the end of the line.
1275  if (Tok.Previous == Tok.MatchingParen || !Tok.Next || !Tok.MatchingParen)
1276  return false;
1277 
1278  FormatToken *LeftOfParens = Tok.MatchingParen->getPreviousNonComment();
1279  if (LeftOfParens) {
1280  // If there is a closing parenthesis left of the current parentheses,
1281  // look past it as these might be chained casts.
1282  if (LeftOfParens->is(tok::r_paren)) {
1283  if (!LeftOfParens->MatchingParen ||
1284  !LeftOfParens->MatchingParen->Previous)
1285  return false;
1286  LeftOfParens = LeftOfParens->MatchingParen->Previous;
1287  }
1288 
1289  // If there is an identifier (or with a few exceptions a keyword) right
1290  // before the parentheses, this is unlikely to be a cast.
1291  if (LeftOfParens->Tok.getIdentifierInfo() &&
1292  !LeftOfParens->isOneOf(Keywords.kw_in, tok::kw_return, tok::kw_case,
1293  tok::kw_delete))
1294  return false;
1295 
1296  // Certain other tokens right before the parentheses are also signals that
1297  // this cannot be a cast.
1298  if (LeftOfParens->isOneOf(tok::at, tok::r_square, TT_OverloadedOperator,
1299  TT_TemplateCloser, tok::ellipsis))
1300  return false;
1301  }
1302 
1303  if (Tok.Next->is(tok::question))
1304  return false;
1305 
1306  // As Java has no function types, a "(" after the ")" likely means that this
1307  // is a cast.
1308  if (Style.Language == FormatStyle::LK_Java && Tok.Next->is(tok::l_paren))
1309  return true;
1310 
1311  // If a (non-string) literal follows, this is likely a cast.
1312  if (Tok.Next->isNot(tok::string_literal) &&
1313  (Tok.Next->Tok.isLiteral() ||
1314  Tok.Next->isOneOf(tok::kw_sizeof, tok::kw_alignof)))
1315  return true;
1316 
1317  // Heuristically try to determine whether the parentheses contain a type.
1318  bool ParensAreType =
1319  !Tok.Previous ||
1320  Tok.Previous->isOneOf(TT_PointerOrReference, TT_TemplateCloser) ||
1321  Tok.Previous->isSimpleTypeSpecifier();
1322  bool ParensCouldEndDecl =
1323  Tok.Next->isOneOf(tok::equal, tok::semi, tok::l_brace, tok::greater);
1324  if (ParensAreType && !ParensCouldEndDecl)
1325  return true;
1326 
1327  // At this point, we heuristically assume that there are no casts at the
1328  // start of the line. We assume that we have found most cases where there
1329  // are by the logic above, e.g. "(void)x;".
1330  if (!LeftOfParens)
1331  return false;
1332 
1333  // Certain token types inside the parentheses mean that this can't be a
1334  // cast.
1335  for (const FormatToken *Token = Tok.MatchingParen->Next; Token != &Tok;
1336  Token = Token->Next)
1337  if (Token->is(TT_BinaryOperator))
1338  return false;
1339 
1340  // If the following token is an identifier or 'this', this is a cast. All
1341  // cases where this can be something else are handled above.
1342  if (Tok.Next->isOneOf(tok::identifier, tok::kw_this))
1343  return true;
1344 
1345  if (!Tok.Next->Next)
1346  return false;
1347 
1348  // If the next token after the parenthesis is a unary operator, assume
1349  // that this is cast, unless there are unexpected tokens inside the
1350  // parenthesis.
1351  bool NextIsUnary =
1352  Tok.Next->isUnaryOperator() || Tok.Next->isOneOf(tok::amp, tok::star);
1353  if (!NextIsUnary || Tok.Next->is(tok::plus) ||
1354  !Tok.Next->Next->isOneOf(tok::identifier, tok::numeric_constant))
1355  return false;
1356  // Search for unexpected tokens.
1357  for (FormatToken *Prev = Tok.Previous; Prev != Tok.MatchingParen;
1358  Prev = Prev->Previous) {
1359  if (!Prev->isOneOf(tok::kw_const, tok::identifier, tok::coloncolon))
1360  return false;
1361  }
1362  return true;
1363  }
1364 
1365  /// \brief Return the type of the given token assuming it is * or &.
1366  TokenType determineStarAmpUsage(const FormatToken &Tok, bool IsExpression,
1367  bool InTemplateArgument) {
1368  if (Style.Language == FormatStyle::LK_JavaScript)
1369  return TT_BinaryOperator;
1370 
1371  const FormatToken *PrevToken = Tok.getPreviousNonComment();
1372  if (!PrevToken)
1373  return TT_UnaryOperator;
1374 
1375  const FormatToken *NextToken = Tok.getNextNonComment();
1376  if (!NextToken ||
1377  NextToken->isOneOf(tok::arrow, tok::equal, tok::kw_const) ||
1378  (NextToken->is(tok::l_brace) && !NextToken->getNextNonComment()))
1379  return TT_PointerOrReference;
1380 
1381  if (PrevToken->is(tok::coloncolon))
1382  return TT_PointerOrReference;
1383 
1384  if (PrevToken->isOneOf(tok::l_paren, tok::l_square, tok::l_brace,
1385  tok::comma, tok::semi, tok::kw_return, tok::colon,
1386  tok::equal, tok::kw_delete, tok::kw_sizeof,
1387  tok::kw_throw) ||
1388  PrevToken->isOneOf(TT_BinaryOperator, TT_ConditionalExpr,
1389  TT_UnaryOperator, TT_CastRParen))
1390  return TT_UnaryOperator;
1391 
1392  if (NextToken->is(tok::l_square) && NextToken->isNot(TT_LambdaLSquare))
1393  return TT_PointerOrReference;
1394  if (NextToken->is(tok::kw_operator) && !IsExpression)
1395  return TT_PointerOrReference;
1396  if (NextToken->isOneOf(tok::comma, tok::semi))
1397  return TT_PointerOrReference;
1398 
1399  if (PrevToken->is(tok::r_paren) && PrevToken->MatchingParen &&
1400  PrevToken->MatchingParen->Previous &&
1401  PrevToken->MatchingParen->Previous->isOneOf(tok::kw_typeof,
1402  tok::kw_decltype))
1403  return TT_PointerOrReference;
1404 
1405  if (PrevToken->Tok.isLiteral() ||
1406  PrevToken->isOneOf(tok::r_paren, tok::r_square, tok::kw_true,
1407  tok::kw_false, tok::r_brace) ||
1408  NextToken->Tok.isLiteral() ||
1409  NextToken->isOneOf(tok::kw_true, tok::kw_false) ||
1410  NextToken->isUnaryOperator() ||
1411  // If we know we're in a template argument, there are no named
1412  // declarations. Thus, having an identifier on the right-hand side
1413  // indicates a binary operator.
1414  (InTemplateArgument && NextToken->Tok.isAnyIdentifier()))
1415  return TT_BinaryOperator;
1416 
1417  // "&&(" is quite unlikely to be two successive unary "&".
1418  if (Tok.is(tok::ampamp) && NextToken && NextToken->is(tok::l_paren))
1419  return TT_BinaryOperator;
1420 
1421  // This catches some cases where evaluation order is used as control flow:
1422  // aaa && aaa->f();
1423  const FormatToken *NextNextToken = NextToken->getNextNonComment();
1424  if (NextNextToken && NextNextToken->is(tok::arrow))
1425  return TT_BinaryOperator;
1426 
1427  // It is very unlikely that we are going to find a pointer or reference type
1428  // definition on the RHS of an assignment.
1429  if (IsExpression && !Contexts.back().CaretFound)
1430  return TT_BinaryOperator;
1431 
1432  return TT_PointerOrReference;
1433  }
1434 
1435  TokenType determinePlusMinusCaretUsage(const FormatToken &Tok) {
1436  const FormatToken *PrevToken = Tok.getPreviousNonComment();
1437  if (!PrevToken)
1438  return TT_UnaryOperator;
1439 
1440  if (PrevToken->isOneOf(TT_CastRParen, TT_UnaryOperator) &&
1441  !PrevToken->is(tok::exclaim))
1442  // There aren't any trailing unary operators except for TypeScript's
1443  // non-null operator (!). Thus, this must be squence of leading operators.
1444  return TT_UnaryOperator;
1445 
1446  // Use heuristics to recognize unary operators.
1447  if (PrevToken->isOneOf(tok::equal, tok::l_paren, tok::comma, tok::l_square,
1448  tok::question, tok::colon, tok::kw_return,
1449  tok::kw_case, tok::at, tok::l_brace))
1450  return TT_UnaryOperator;
1451 
1452  // There can't be two consecutive binary operators.
1453  if (PrevToken->is(TT_BinaryOperator))
1454  return TT_UnaryOperator;
1455 
1456  // Fall back to marking the token as binary operator.
1457  return TT_BinaryOperator;
1458  }
1459 
1460  /// \brief Determine whether ++/-- are pre- or post-increments/-decrements.
1461  TokenType determineIncrementUsage(const FormatToken &Tok) {
1462  const FormatToken *PrevToken = Tok.getPreviousNonComment();
1463  if (!PrevToken || PrevToken->is(TT_CastRParen))
1464  return TT_UnaryOperator;
1465  if (PrevToken->isOneOf(tok::r_paren, tok::r_square, tok::identifier))
1466  return TT_TrailingUnaryOperator;
1467 
1468  return TT_UnaryOperator;
1469  }
1470 
1471  SmallVector<Context, 8> Contexts;
1472 
1473  const FormatStyle &Style;
1474  AnnotatedLine &Line;
1475  FormatToken *CurrentToken;
1477  const AdditionalKeywords &Keywords;
1478 
1479  // Set of "<" tokens that do not open a template parameter list. If parseAngle
1480  // determines that a specific token can't be a template opener, it will make
1481  // same decision irrespective of the decisions for tokens leading up to it.
1482  // Store this information to prevent this from causing exponential runtime.
1483  llvm::SmallPtrSet<FormatToken *, 16> NonTemplateLess;
1484 };
1485 
1486 static const int PrecedenceUnaryOperator = prec::PointerToMember + 1;
1487 static const int PrecedenceArrowAndPeriod = prec::PointerToMember + 2;
1488 
1489 /// \brief Parses binary expressions by inserting fake parenthesis based on
1490 /// operator precedence.
1491 class ExpressionParser {
1492 public:
1493  ExpressionParser(const FormatStyle &Style, const AdditionalKeywords &Keywords,
1494  AnnotatedLine &Line)
1495  : Style(Style), Keywords(Keywords), Current(Line.First) {}
1496 
1497  /// \brief Parse expressions with the given operatore precedence.
1498  void parse(int Precedence = 0) {
1499  // Skip 'return' and ObjC selector colons as they are not part of a binary
1500  // expression.
1501  while (Current && (Current->is(tok::kw_return) ||
1502  (Current->is(tok::colon) &&
1503  Current->isOneOf(TT_ObjCMethodExpr, TT_DictLiteral))))
1504  next();
1505 
1506  if (!Current || Precedence > PrecedenceArrowAndPeriod)
1507  return;
1508 
1509  // Conditional expressions need to be parsed separately for proper nesting.
1510  if (Precedence == prec::Conditional) {
1511  parseConditionalExpr();
1512  return;
1513  }
1514 
1515  // Parse unary operators, which all have a higher precedence than binary
1516  // operators.
1517  if (Precedence == PrecedenceUnaryOperator) {
1518  parseUnaryOperator();
1519  return;
1520  }
1521 
1522  FormatToken *Start = Current;
1523  FormatToken *LatestOperator = nullptr;
1524  unsigned OperatorIndex = 0;
1525 
1526  while (Current) {
1527  // Consume operators with higher precedence.
1528  parse(Precedence + 1);
1529 
1530  int CurrentPrecedence = getCurrentPrecedence();
1531 
1532  if (Current && Current->is(TT_SelectorName) &&
1533  Precedence == CurrentPrecedence) {
1534  if (LatestOperator)
1535  addFakeParenthesis(Start, prec::Level(Precedence));
1536  Start = Current;
1537  }
1538 
1539  // At the end of the line or when an operator with higher precedence is
1540  // found, insert fake parenthesis and return.
1541  if (!Current ||
1542  (Current->closesScope() &&
1543  (Current->MatchingParen || Current->is(TT_TemplateString))) ||
1544  (CurrentPrecedence != -1 && CurrentPrecedence < Precedence) ||
1545  (CurrentPrecedence == prec::Conditional &&
1546  Precedence == prec::Assignment && Current->is(tok::colon))) {
1547  break;
1548  }
1549 
1550  // Consume scopes: (), [], <> and {}
1551  if (Current->opensScope()) {
1552  // In fragment of a JavaScript template string can look like '}..${' and
1553  // thus close a scope and open a new one at the same time.
1554  while (Current && (!Current->closesScope() || Current->opensScope())) {
1555  next();
1556  parse();
1557  }
1558  next();
1559  } else {
1560  // Operator found.
1561  if (CurrentPrecedence == Precedence) {
1562  if (LatestOperator)
1563  LatestOperator->NextOperator = Current;
1564  LatestOperator = Current;
1565  Current->OperatorIndex = OperatorIndex;
1566  ++OperatorIndex;
1567  }
1568  next(/*SkipPastLeadingComments=*/Precedence > 0);
1569  }
1570  }
1571 
1572  if (LatestOperator && (Current || Precedence > 0)) {
1573  // LatestOperator->LastOperator = true;
1574  if (Precedence == PrecedenceArrowAndPeriod) {
1575  // Call expressions don't have a binary operator precedence.
1576  addFakeParenthesis(Start, prec::Unknown);
1577  } else {
1578  addFakeParenthesis(Start, prec::Level(Precedence));
1579  }
1580  }
1581  }
1582 
1583 private:
1584  /// \brief Gets the precedence (+1) of the given token for binary operators
1585  /// and other tokens that we treat like binary operators.
1586  int getCurrentPrecedence() {
1587  if (Current) {
1588  const FormatToken *NextNonComment = Current->getNextNonComment();
1589  if (Current->is(TT_ConditionalExpr))
1590  return prec::Conditional;
1591  if (NextNonComment && Current->is(TT_SelectorName) &&
1592  (NextNonComment->is(TT_DictLiteral) ||
1593  ((Style.Language == FormatStyle::LK_Proto ||
1594  Style.Language == FormatStyle::LK_TextProto) &&
1595  NextNonComment->is(tok::less))))
1596  return prec::Assignment;
1597  if (Current->is(TT_JsComputedPropertyName))
1598  return prec::Assignment;
1599  if (Current->is(TT_LambdaArrow))
1600  return prec::Comma;
1601  if (Current->is(TT_JsFatArrow))
1602  return prec::Assignment;
1603  if (Current->isOneOf(tok::semi, TT_InlineASMColon, TT_SelectorName) ||
1604  (Current->is(tok::comment) && NextNonComment &&
1605  NextNonComment->is(TT_SelectorName)))
1606  return 0;
1607  if (Current->is(TT_RangeBasedForLoopColon))
1608  return prec::Comma;
1609  if ((Style.Language == FormatStyle::LK_Java ||
1610  Style.Language == FormatStyle::LK_JavaScript) &&
1611  Current->is(Keywords.kw_instanceof))
1612  return prec::Relational;
1613  if (Style.Language == FormatStyle::LK_JavaScript &&
1614  Current->isOneOf(Keywords.kw_in, Keywords.kw_as))
1615  return prec::Relational;
1616  if (Current->is(TT_BinaryOperator) || Current->is(tok::comma))
1617  return Current->getPrecedence();
1618  if (Current->isOneOf(tok::period, tok::arrow))
1619  return PrecedenceArrowAndPeriod;
1620  if ((Style.Language == FormatStyle::LK_Java ||
1621  Style.Language == FormatStyle::LK_JavaScript) &&
1622  Current->isOneOf(Keywords.kw_extends, Keywords.kw_implements,
1623  Keywords.kw_throws))
1624  return 0;
1625  }
1626  return -1;
1627  }
1628 
1629  void addFakeParenthesis(FormatToken *Start, prec::Level Precedence) {
1630  Start->FakeLParens.push_back(Precedence);
1631  if (Precedence > prec::Unknown)
1632  Start->StartsBinaryExpression = true;
1633  if (Current) {
1634  FormatToken *Previous = Current->Previous;
1635  while (Previous->is(tok::comment) && Previous->Previous)
1636  Previous = Previous->Previous;
1637  ++Previous->FakeRParens;
1638  if (Precedence > prec::Unknown)
1639  Previous->EndsBinaryExpression = true;
1640  }
1641  }
1642 
1643  /// \brief Parse unary operator expressions and surround them with fake
1644  /// parentheses if appropriate.
1645  void parseUnaryOperator() {
1646  if (!Current || Current->isNot(TT_UnaryOperator)) {
1647  parse(PrecedenceArrowAndPeriod);
1648  return;
1649  }
1650 
1651  FormatToken *Start = Current;
1652  next();
1653  parseUnaryOperator();
1654 
1655  // The actual precedence doesn't matter.
1656  addFakeParenthesis(Start, prec::Unknown);
1657  }
1658 
1659  void parseConditionalExpr() {
1660  while (Current && Current->isTrailingComment()) {
1661  next();
1662  }
1663  FormatToken *Start = Current;
1664  parse(prec::LogicalOr);
1665  if (!Current || !Current->is(tok::question))
1666  return;
1667  next();
1668  parse(prec::Assignment);
1669  if (!Current || Current->isNot(TT_ConditionalExpr))
1670  return;
1671  next();
1672  parse(prec::Assignment);
1673  addFakeParenthesis(Start, prec::Conditional);
1674  }
1675 
1676  void next(bool SkipPastLeadingComments = true) {
1677  if (Current)
1678  Current = Current->Next;
1679  while (Current &&
1680  (Current->NewlinesBefore == 0 || SkipPastLeadingComments) &&
1681  Current->isTrailingComment())
1682  Current = Current->Next;
1683  }
1684 
1685  const FormatStyle &Style;
1686  const AdditionalKeywords &Keywords;
1687  FormatToken *Current;
1688 };
1689 
1690 } // end anonymous namespace
1691 
1694  const AnnotatedLine *NextNonCommentLine = nullptr;
1696  E = Lines.rend();
1697  I != E; ++I) {
1698  bool CommentLine = true;
1699  for (const FormatToken *Tok = (*I)->First; Tok; Tok = Tok->Next) {
1700  if (!Tok->is(tok::comment)) {
1701  CommentLine = false;
1702  break;
1703  }
1704  }
1705 
1706  if (NextNonCommentLine && CommentLine) {
1707  // If the comment is currently aligned with the line immediately following
1708  // it, that's probably intentional and we should keep it.
1709  bool AlignedWithNextLine =
1710  NextNonCommentLine->First->NewlinesBefore <= 1 &&
1711  NextNonCommentLine->First->OriginalColumn ==
1712  (*I)->First->OriginalColumn;
1713  if (AlignedWithNextLine)
1714  (*I)->Level = NextNonCommentLine->Level;
1715  } else {
1716  NextNonCommentLine = (*I)->First->isNot(tok::r_brace) ? (*I) : nullptr;
1717  }
1718 
1719  setCommentLineLevels((*I)->Children);
1720  }
1721 }
1722 
1723 static unsigned maxNestingDepth(const AnnotatedLine &Line) {
1724  unsigned Result = 0;
1725  for (const auto* Tok = Line.First; Tok != nullptr; Tok = Tok->Next)
1726  Result = std::max(Result, Tok->NestingLevel);
1727  return Result;
1728 }
1729 
1732  E = Line.Children.end();
1733  I != E; ++I) {
1734  annotate(**I);
1735  }
1736  AnnotatingParser Parser(Style, Line, Keywords);
1737  Line.Type = Parser.parseLine();
1738 
1739  // With very deep nesting, ExpressionParser uses lots of stack and the
1740  // formatting algorithm is very slow. We're not going to do a good job here
1741  // anyway - it's probably generated code being formatted by mistake.
1742  // Just skip the whole line.
1743  if (maxNestingDepth(Line) > 50)
1744  Line.Type = LT_Invalid;
1745 
1746  if (Line.Type == LT_Invalid)
1747  return;
1748 
1749  ExpressionParser ExprParser(Style, Keywords, Line);
1750  ExprParser.parse();
1751 
1752  if (Line.startsWith(TT_ObjCMethodSpecifier))
1753  Line.Type = LT_ObjCMethodDecl;
1754  else if (Line.startsWith(TT_ObjCDecl))
1755  Line.Type = LT_ObjCDecl;
1756  else if (Line.startsWith(TT_ObjCProperty))
1757  Line.Type = LT_ObjCProperty;
1758 
1759  Line.First->SpacesRequiredBefore = 1;
1760  Line.First->CanBreakBefore = Line.First->MustBreakBefore;
1761 }
1762 
1763 // This function heuristically determines whether 'Current' starts the name of a
1764 // function declaration.
1765 static bool isFunctionDeclarationName(const FormatToken &Current,
1766  const AnnotatedLine &Line) {
1767  auto skipOperatorName = [](const FormatToken* Next) -> const FormatToken* {
1768  for (; Next; Next = Next->Next) {
1769  if (Next->is(TT_OverloadedOperatorLParen))
1770  return Next;
1771  if (Next->is(TT_OverloadedOperator))
1772  continue;
1773  if (Next->isOneOf(tok::kw_new, tok::kw_delete)) {
1774  // For 'new[]' and 'delete[]'.
1775  if (Next->Next && Next->Next->is(tok::l_square) &&
1776  Next->Next->Next && Next->Next->Next->is(tok::r_square))
1777  Next = Next->Next->Next;
1778  continue;
1779  }
1780 
1781  break;
1782  }
1783  return nullptr;
1784  };
1785 
1786  // Find parentheses of parameter list.
1787  const FormatToken *Next = Current.Next;
1788  if (Current.is(tok::kw_operator)) {
1789  if (Current.Previous && Current.Previous->is(tok::coloncolon))
1790  return false;
1791  Next = skipOperatorName(Next);
1792  } else {
1793  if (!Current.is(TT_StartOfName) || Current.NestingLevel != 0)
1794  return false;
1795  for (; Next; Next = Next->Next) {
1796  if (Next->is(TT_TemplateOpener)) {
1797  Next = Next->MatchingParen;
1798  } else if (Next->is(tok::coloncolon)) {
1799  Next = Next->Next;
1800  if (!Next)
1801  return false;
1802  if (Next->is(tok::kw_operator)) {
1803  Next = skipOperatorName(Next->Next);
1804  break;
1805  }
1806  if (!Next->is(tok::identifier))
1807  return false;
1808  } else if (Next->is(tok::l_paren)) {
1809  break;
1810  } else {
1811  return false;
1812  }
1813  }
1814  }
1815 
1816  // Check whether parameter list can belong to a function declaration.
1817  if (!Next || !Next->is(tok::l_paren) || !Next->MatchingParen)
1818  return false;
1819  // If the lines ends with "{", this is likely an function definition.
1820  if (Line.Last->is(tok::l_brace))
1821  return true;
1822  if (Next->Next == Next->MatchingParen)
1823  return true; // Empty parentheses.
1824  // If there is an &/&& after the r_paren, this is likely a function.
1825  if (Next->MatchingParen->Next &&
1826  Next->MatchingParen->Next->is(TT_PointerOrReference))
1827  return true;
1828  for (const FormatToken *Tok = Next->Next; Tok && Tok != Next->MatchingParen;
1829  Tok = Tok->Next) {
1830  if (Tok->is(tok::l_paren) && Tok->MatchingParen) {
1831  Tok = Tok->MatchingParen;
1832  continue;
1833  }
1834  if (Tok->is(tok::kw_const) || Tok->isSimpleTypeSpecifier() ||
1835  Tok->isOneOf(TT_PointerOrReference, TT_StartOfName, tok::ellipsis))
1836  return true;
1837  if (Tok->isOneOf(tok::l_brace, tok::string_literal, TT_ObjCMethodExpr) ||
1838  Tok->Tok.isLiteral())
1839  return false;
1840  }
1841  return false;
1842 }
1843 
1844 bool TokenAnnotator::mustBreakForReturnType(const AnnotatedLine &Line) const {
1845  assert(Line.MightBeFunctionDecl);
1846 
1847  if ((Style.AlwaysBreakAfterReturnType == FormatStyle::RTBS_TopLevel ||
1848  Style.AlwaysBreakAfterReturnType ==
1850  Line.Level > 0)
1851  return false;
1852 
1853  switch (Style.AlwaysBreakAfterReturnType) {
1855  return false;
1856  case FormatStyle::RTBS_All:
1858  return true;
1861  return Line.mightBeFunctionDefinition();
1862  }
1863 
1864  return false;
1865 }
1866 
1869  E = Line.Children.end();
1870  I != E; ++I) {
1872  }
1873 
1874  Line.First->TotalLength =
1875  Line.First->IsMultiline ? Style.ColumnLimit : Line.First->ColumnWidth;
1876  FormatToken *Current = Line.First->Next;
1877  bool InFunctionDecl = Line.MightBeFunctionDecl;
1878  while (Current) {
1879  if (isFunctionDeclarationName(*Current, Line))
1880  Current->Type = TT_FunctionDeclarationName;
1881  if (Current->is(TT_LineComment)) {
1882  if (Current->Previous->BlockKind == BK_BracedInit &&
1883  Current->Previous->opensScope())
1884  Current->SpacesRequiredBefore = Style.Cpp11BracedListStyle ? 0 : 1;
1885  else
1886  Current->SpacesRequiredBefore = Style.SpacesBeforeTrailingComments;
1887 
1888  // If we find a trailing comment, iterate backwards to determine whether
1889  // it seems to relate to a specific parameter. If so, break before that
1890  // parameter to avoid changing the comment's meaning. E.g. don't move 'b'
1891  // to the previous line in:
1892  // SomeFunction(a,
1893  // b, // comment
1894  // c);
1895  if (!Current->HasUnescapedNewline) {
1896  for (FormatToken *Parameter = Current->Previous; Parameter;
1897  Parameter = Parameter->Previous) {
1898  if (Parameter->isOneOf(tok::comment, tok::r_brace))
1899  break;
1900  if (Parameter->Previous && Parameter->Previous->is(tok::comma)) {
1901  if (!Parameter->Previous->is(TT_CtorInitializerComma) &&
1902  Parameter->HasUnescapedNewline)
1903  Parameter->MustBreakBefore = true;
1904  break;
1905  }
1906  }
1907  }
1908  } else if (Current->SpacesRequiredBefore == 0 &&
1909  spaceRequiredBefore(Line, *Current)) {
1910  Current->SpacesRequiredBefore = 1;
1911  }
1912 
1913  Current->MustBreakBefore =
1914  Current->MustBreakBefore || mustBreakBefore(Line, *Current);
1915 
1916  if (!Current->MustBreakBefore && InFunctionDecl &&
1917  Current->is(TT_FunctionDeclarationName))
1918  Current->MustBreakBefore = mustBreakForReturnType(Line);
1919 
1920  Current->CanBreakBefore =
1921  Current->MustBreakBefore || canBreakBefore(Line, *Current);
1922  unsigned ChildSize = 0;
1923  if (Current->Previous->Children.size() == 1) {
1924  FormatToken &LastOfChild = *Current->Previous->Children[0]->Last;
1925  ChildSize = LastOfChild.isTrailingComment() ? Style.ColumnLimit
1926  : LastOfChild.TotalLength + 1;
1927  }
1928  const FormatToken *Prev = Current->Previous;
1929  if (Current->MustBreakBefore || Prev->Children.size() > 1 ||
1930  (Prev->Children.size() == 1 &&
1931  Prev->Children[0]->First->MustBreakBefore) ||
1932  Current->IsMultiline)
1933  Current->TotalLength = Prev->TotalLength + Style.ColumnLimit;
1934  else
1935  Current->TotalLength = Prev->TotalLength + Current->ColumnWidth +
1936  ChildSize + Current->SpacesRequiredBefore;
1937 
1938  if (Current->is(TT_CtorInitializerColon))
1939  InFunctionDecl = false;
1940 
1941  // FIXME: Only calculate this if CanBreakBefore is true once static
1942  // initializers etc. are sorted out.
1943  // FIXME: Move magic numbers to a better place.
1944  Current->SplitPenalty = 20 * Current->BindingStrength +
1945  splitPenalty(Line, *Current, InFunctionDecl);
1946 
1947  Current = Current->Next;
1948  }
1949 
1950  calculateUnbreakableTailLengths(Line);
1951  unsigned IndentLevel = Line.Level;
1952  for (Current = Line.First; Current != nullptr; Current = Current->Next) {
1953  if (Current->Role)
1954  Current->Role->precomputeFormattingInfos(Current);
1955  if (Current->MatchingParen &&
1956  Current->MatchingParen->opensBlockOrBlockTypeList(Style)) {
1957  assert(IndentLevel > 0);
1958  --IndentLevel;
1959  }
1960  Current->IndentLevel = IndentLevel;
1961  if (Current->opensBlockOrBlockTypeList(Style))
1962  ++IndentLevel;
1963  }
1964 
1965  DEBUG({ printDebugInfo(Line); });
1966 }
1967 
1968 void TokenAnnotator::calculateUnbreakableTailLengths(AnnotatedLine &Line) {
1969  unsigned UnbreakableTailLength = 0;
1970  FormatToken *Current = Line.Last;
1971  while (Current) {
1972  Current->UnbreakableTailLength = UnbreakableTailLength;
1973  if (Current->CanBreakBefore ||
1974  Current->isOneOf(tok::comment, tok::string_literal)) {
1975  UnbreakableTailLength = 0;
1976  } else {
1977  UnbreakableTailLength +=
1978  Current->ColumnWidth + Current->SpacesRequiredBefore;
1979  }
1980  Current = Current->Previous;
1981  }
1982 }
1983 
1984 unsigned TokenAnnotator::splitPenalty(const AnnotatedLine &Line,
1985  const FormatToken &Tok,
1986  bool InFunctionDecl) {
1987  const FormatToken &Left = *Tok.Previous;
1988  const FormatToken &Right = Tok;
1989 
1990  if (Left.is(tok::semi))
1991  return 0;
1992 
1993  if (Style.Language == FormatStyle::LK_Java) {
1994  if (Right.isOneOf(Keywords.kw_extends, Keywords.kw_throws))
1995  return 1;
1996  if (Right.is(Keywords.kw_implements))
1997  return 2;
1998  if (Left.is(tok::comma) && Left.NestingLevel == 0)
1999  return 3;
2000  } else if (Style.Language == FormatStyle::LK_JavaScript) {
2001  if (Right.is(Keywords.kw_function) && Left.isNot(tok::comma))
2002  return 100;
2003  if (Left.is(TT_JsTypeColon))
2004  return 35;
2005  if ((Left.is(TT_TemplateString) && Left.TokenText.endswith("${")) ||
2006  (Right.is(TT_TemplateString) && Right.TokenText.startswith("}")))
2007  return 100;
2008  }
2009 
2010  if (Right.is(tok::identifier) && Right.Next && Right.Next->is(TT_DictLiteral))
2011  return 1;
2012  if (Right.is(tok::l_square)) {
2013  if (Style.Language == FormatStyle::LK_Proto)
2014  return 1;
2015  if (Left.is(tok::r_square))
2016  return 200;
2017  // Slightly prefer formatting local lambda definitions like functions.
2018  if (Right.is(TT_LambdaLSquare) && Left.is(tok::equal))
2019  return 35;
2020  if (!Right.isOneOf(TT_ObjCMethodExpr, TT_LambdaLSquare,
2021  TT_ArrayInitializerLSquare,
2022  TT_DesignatedInitializerLSquare))
2023  return 500;
2024  }
2025 
2026  if (Right.isOneOf(TT_StartOfName, TT_FunctionDeclarationName) ||
2027  Right.is(tok::kw_operator)) {
2028  if (Line.startsWith(tok::kw_for) && Right.PartOfMultiVariableDeclStmt)
2029  return 3;
2030  if (Left.is(TT_StartOfName))
2031  return 110;
2032  if (InFunctionDecl && Right.NestingLevel == 0)
2033  return Style.PenaltyReturnTypeOnItsOwnLine;
2034  return 200;
2035  }
2036  if (Right.is(TT_PointerOrReference))
2037  return 190;
2038  if (Right.is(TT_LambdaArrow))
2039  return 110;
2040  if (Left.is(tok::equal) && Right.is(tok::l_brace))
2041  return 160;
2042  if (Left.is(TT_CastRParen))
2043  return 100;
2044  if (Left.is(tok::coloncolon) ||
2045  (Right.is(tok::period) && Style.Language == FormatStyle::LK_Proto))
2046  return 500;
2047  if (Left.isOneOf(tok::kw_class, tok::kw_struct))
2048  return 5000;
2049  if (Left.is(tok::comment))
2050  return 1000;
2051 
2052  if (Left.isOneOf(TT_RangeBasedForLoopColon, TT_InheritanceColon, TT_CtorInitializerColon))
2053  return 2;
2054 
2055  if (Right.isMemberAccess()) {
2056  // Breaking before the "./->" of a chained call/member access is reasonably
2057  // cheap, as formatting those with one call per line is generally
2058  // desirable. In particular, it should be cheaper to break before the call
2059  // than it is to break inside a call's parameters, which could lead to weird
2060  // "hanging" indents. The exception is the very last "./->" to support this
2061  // frequent pattern:
2062  //
2063  // aaaaaaaa.aaaaaaaa.bbbbbbb().ccccccccccccccccccccc(
2064  // dddddddd);
2065  //
2066  // which might otherwise be blown up onto many lines. Here, clang-format
2067  // won't produce "hanging" indents anyway as there is no other trailing
2068  // call.
2069  //
2070  // Also apply higher penalty is not a call as that might lead to a wrapping
2071  // like:
2072  //
2073  // aaaaaaa
2074  // .aaaaaaaaa.bbbbbbbb(cccccccc);
2075  return !Right.NextOperator || !Right.NextOperator->Previous->closesScope()
2076  ? 150
2077  : 35;
2078  }
2079 
2080  if (Right.is(TT_TrailingAnnotation) &&
2081  (!Right.Next || Right.Next->isNot(tok::l_paren))) {
2082  // Moving trailing annotations to the next line is fine for ObjC method
2083  // declarations.
2084  if (Line.startsWith(TT_ObjCMethodSpecifier))
2085  return 10;
2086  // Generally, breaking before a trailing annotation is bad unless it is
2087  // function-like. It seems to be especially preferable to keep standard
2088  // annotations (i.e. "const", "final" and "override") on the same line.
2089  // Use a slightly higher penalty after ")" so that annotations like
2090  // "const override" are kept together.
2091  bool is_short_annotation = Right.TokenText.size() < 10;
2092  return (Left.is(tok::r_paren) ? 100 : 120) + (is_short_annotation ? 50 : 0);
2093  }
2094 
2095  // In for-loops, prefer breaking at ',' and ';'.
2096  if (Line.startsWith(tok::kw_for) && Left.is(tok::equal))
2097  return 4;
2098 
2099  // In Objective-C method expressions, prefer breaking before "param:" over
2100  // breaking after it.
2101  if (Right.is(TT_SelectorName))
2102  return 0;
2103  if (Left.is(tok::colon) && Left.is(TT_ObjCMethodExpr))
2104  return Line.MightBeFunctionDecl ? 50 : 500;
2105 
2106  if (Left.is(tok::l_paren) && InFunctionDecl &&
2107  Style.AlignAfterOpenBracket != FormatStyle::BAS_DontAlign)
2108  return 100;
2109  if (Left.is(tok::l_paren) && Left.Previous &&
2110  (Left.Previous->isOneOf(tok::kw_if, tok::kw_for)
2111  || Left.Previous->endsSequence(tok::kw_constexpr, tok::kw_if)))
2112  return 1000;
2113  if (Left.is(tok::equal) && InFunctionDecl)
2114  return 110;
2115  if (Right.is(tok::r_brace))
2116  return 1;
2117  if (Left.is(TT_TemplateOpener))
2118  return 100;
2119  if (Left.opensScope()) {
2120  if (Style.AlignAfterOpenBracket == FormatStyle::BAS_DontAlign)
2121  return 0;
2122  return Left.ParameterCount > 1 ? Style.PenaltyBreakBeforeFirstCallParameter
2123  : 19;
2124  }
2125  if (Left.is(TT_JavaAnnotation))
2126  return 50;
2127 
2128  if (Left.isOneOf(tok::plus, tok::comma) && Left.Previous &&
2129  Left.Previous->isLabelString() &&
2130  (Left.NextOperator || Left.OperatorIndex != 0))
2131  return 45;
2132  if (Right.is(tok::plus) && Left.isLabelString() &&
2133  (Right.NextOperator || Right.OperatorIndex != 0))
2134  return 25;
2135  if (Left.is(tok::comma))
2136  return 1;
2137  if (Right.is(tok::lessless) && Left.isLabelString() &&
2138  (Right.NextOperator || Right.OperatorIndex != 1))
2139  return 25;
2140  if (Right.is(tok::lessless)) {
2141  // Breaking at a << is really cheap.
2142  if (!Left.is(tok::r_paren) || Right.OperatorIndex > 0)
2143  // Slightly prefer to break before the first one in log-like statements.
2144  return 2;
2145  return 1;
2146  }
2147  if (Left.is(TT_ConditionalExpr))
2148  return prec::Conditional;
2149  prec::Level Level = Left.getPrecedence();
2150  if (Level == prec::Unknown)
2151  Level = Right.getPrecedence();
2152  if (Level == prec::Assignment)
2153  return Style.PenaltyBreakAssignment;
2154  if (Level != prec::Unknown)
2155  return Level;
2156 
2157  return 3;
2158 }
2159 
2160 bool TokenAnnotator::spaceRequiredBetween(const AnnotatedLine &Line,
2161  const FormatToken &Left,
2162  const FormatToken &Right) {
2163  if (Left.is(tok::kw_return) && Right.isNot(tok::semi))
2164  return true;
2165  if (Style.ObjCSpaceAfterProperty && Line.Type == LT_ObjCProperty &&
2166  Left.Tok.getObjCKeywordID() == tok::objc_property)
2167  return true;
2168  if (Right.is(tok::hashhash))
2169  return Left.is(tok::hash);
2170  if (Left.isOneOf(tok::hashhash, tok::hash))
2171  return Right.is(tok::hash);
2172  if (Left.is(tok::l_paren) && Right.is(tok::r_paren))
2173  return Style.SpaceInEmptyParentheses;
2174  if (Left.is(tok::l_paren) || Right.is(tok::r_paren))
2175  return (Right.is(TT_CastRParen) ||
2176  (Left.MatchingParen && Left.MatchingParen->is(TT_CastRParen)))
2177  ? Style.SpacesInCStyleCastParentheses
2178  : Style.SpacesInParentheses;
2179  if (Right.isOneOf(tok::semi, tok::comma))
2180  return false;
2181  if (Right.is(tok::less) &&
2182  Line.Type == LT_ObjCDecl && Style.ObjCSpaceBeforeProtocolList)
2183  return true;
2184  if (Right.is(tok::less) && Left.is(tok::kw_template))
2185  return Style.SpaceAfterTemplateKeyword;
2186  if (Left.isOneOf(tok::exclaim, tok::tilde))
2187  return false;
2188  if (Left.is(tok::at) &&
2189  Right.isOneOf(tok::identifier, tok::string_literal, tok::char_constant,
2190  tok::numeric_constant, tok::l_paren, tok::l_brace,
2191  tok::kw_true, tok::kw_false))
2192  return false;
2193  if (Left.is(tok::colon))
2194  return !Left.is(TT_ObjCMethodExpr);
2195  if (Left.is(tok::coloncolon))
2196  return false;
2197  if (Left.is(tok::less) || Right.isOneOf(tok::greater, tok::less))
2198  return false;
2199  if (Right.is(tok::ellipsis))
2200  return Left.Tok.isLiteral() || (Left.is(tok::identifier) && Left.Previous &&
2201  Left.Previous->is(tok::kw_case));
2202  if (Left.is(tok::l_square) && Right.is(tok::amp))
2203  return false;
2204  if (Right.is(TT_PointerOrReference))
2205  return (Left.is(tok::r_paren) && Line.MightBeFunctionDecl) ||
2206  (Left.Tok.isLiteral() || (Left.is(tok::kw_const) && Left.Previous &&
2207  Left.Previous->is(tok::r_paren)) ||
2208  (!Left.isOneOf(TT_PointerOrReference, tok::l_paren) &&
2209  (Style.PointerAlignment != FormatStyle::PAS_Left ||
2210  (Line.IsMultiVariableDeclStmt &&
2211  (Left.NestingLevel == 0 ||
2212  (Left.NestingLevel == 1 && Line.First->is(tok::kw_for)))))));
2213  if (Right.is(TT_FunctionTypeLParen) && Left.isNot(tok::l_paren) &&
2214  (!Left.is(TT_PointerOrReference) ||
2215  (Style.PointerAlignment != FormatStyle::PAS_Right &&
2216  !Line.IsMultiVariableDeclStmt)))
2217  return true;
2218  if (Left.is(TT_PointerOrReference))
2219  return Right.Tok.isLiteral() || Right.is(TT_BlockComment) ||
2220  (Right.isOneOf(Keywords.kw_override, Keywords.kw_final) &&
2221  !Right.is(TT_StartOfName)) ||
2222  (Right.is(tok::l_brace) && Right.BlockKind == BK_Block) ||
2223  (!Right.isOneOf(TT_PointerOrReference, TT_ArraySubscriptLSquare,
2224  tok::l_paren) &&
2225  (Style.PointerAlignment != FormatStyle::PAS_Right &&
2226  !Line.IsMultiVariableDeclStmt) &&
2227  Left.Previous &&
2228  !Left.Previous->isOneOf(tok::l_paren, tok::coloncolon));
2229  if (Right.is(tok::star) && Left.is(tok::l_paren))
2230  return false;
2231  if (Left.is(tok::l_square))
2232  return (Left.is(TT_ArrayInitializerLSquare) &&
2233  Style.SpacesInContainerLiterals && Right.isNot(tok::r_square)) ||
2234  (Left.is(TT_ArraySubscriptLSquare) && Style.SpacesInSquareBrackets &&
2235  Right.isNot(tok::r_square));
2236  if (Right.is(tok::r_square))
2237  return Right.MatchingParen &&
2238  ((Style.SpacesInContainerLiterals &&
2239  Right.MatchingParen->is(TT_ArrayInitializerLSquare)) ||
2240  (Style.SpacesInSquareBrackets &&
2241  Right.MatchingParen->is(TT_ArraySubscriptLSquare)));
2242  if (Right.is(tok::l_square) &&
2243  !Right.isOneOf(TT_ObjCMethodExpr, TT_LambdaLSquare,
2244  TT_DesignatedInitializerLSquare) &&
2245  !Left.isOneOf(tok::numeric_constant, TT_DictLiteral))
2246  return false;
2247  if (Left.is(tok::l_brace) && Right.is(tok::r_brace))
2248  return !Left.Children.empty(); // No spaces in "{}".
2249  if ((Left.is(tok::l_brace) && Left.BlockKind != BK_Block) ||
2250  (Right.is(tok::r_brace) && Right.MatchingParen &&
2251  Right.MatchingParen->BlockKind != BK_Block))
2252  return !Style.Cpp11BracedListStyle;
2253  if (Left.is(TT_BlockComment))
2254  return !Left.TokenText.endswith("=*/");
2255  if (Right.is(tok::l_paren)) {
2256  if (Left.is(tok::r_paren) && Left.is(TT_AttributeParen))
2257  return true;
2258  return Line.Type == LT_ObjCDecl || Left.is(tok::semi) ||
2259  (Style.SpaceBeforeParens != FormatStyle::SBPO_Never &&
2260  (Left.isOneOf(tok::kw_if, tok::pp_elif, tok::kw_for, tok::kw_while,
2261  tok::kw_switch, tok::kw_case, TT_ForEachMacro,
2262  TT_ObjCForIn) ||
2263  Left.endsSequence(tok::kw_constexpr, tok::kw_if) ||
2264  (Left.isOneOf(tok::kw_try, Keywords.kw___except, tok::kw_catch,
2265  tok::kw_new, tok::kw_delete) &&
2266  (!Left.Previous || Left.Previous->isNot(tok::period))))) ||
2267  (Style.SpaceBeforeParens == FormatStyle::SBPO_Always &&
2268  (Left.is(tok::identifier) || Left.isFunctionLikeKeyword() ||
2269  Left.is(tok::r_paren)) &&
2270  Line.Type != LT_PreprocessorDirective);
2271  }
2272  if (Left.is(tok::at) && Right.Tok.getObjCKeywordID() != tok::objc_not_keyword)
2273  return false;
2274  if (Right.is(TT_UnaryOperator))
2275  return !Left.isOneOf(tok::l_paren, tok::l_square, tok::at) &&
2276  (Left.isNot(tok::colon) || Left.isNot(TT_ObjCMethodExpr));
2277  if ((Left.isOneOf(tok::identifier, tok::greater, tok::r_square,
2278  tok::r_paren) ||
2279  Left.isSimpleTypeSpecifier()) &&
2280  Right.is(tok::l_brace) && Right.getNextNonComment() &&
2281  Right.BlockKind != BK_Block)
2282  return false;
2283  if (Left.is(tok::period) || Right.is(tok::period))
2284  return false;
2285  if (Right.is(tok::hash) && Left.is(tok::identifier) && Left.TokenText == "L")
2286  return false;
2287  if (Left.is(TT_TemplateCloser) && Left.MatchingParen &&
2288  Left.MatchingParen->Previous &&
2289  Left.MatchingParen->Previous->is(tok::period))
2290  // A.<B>DoSomething();
2291  return false;
2292  if (Left.is(TT_TemplateCloser) && Right.is(tok::l_square))
2293  return false;
2294  return true;
2295 }
2296 
2297 bool TokenAnnotator::spaceRequiredBefore(const AnnotatedLine &Line,
2298  const FormatToken &Right) {
2299  const FormatToken &Left = *Right.Previous;
2300  if (Right.Tok.getIdentifierInfo() && Left.Tok.getIdentifierInfo())
2301  return true; // Never ever merge two identifiers.
2302  if (Style.isCpp()) {
2303  if (Left.is(tok::kw_operator))
2304  return Right.is(tok::coloncolon);
2305  } else if (Style.Language == FormatStyle::LK_Proto ||
2306  Style.Language == FormatStyle::LK_TextProto) {
2307  if (Right.is(tok::period) &&
2308  Left.isOneOf(Keywords.kw_optional, Keywords.kw_required,
2309  Keywords.kw_repeated, Keywords.kw_extend))
2310  return true;
2311  if (Right.is(tok::l_paren) &&
2312  Left.isOneOf(Keywords.kw_returns, Keywords.kw_option))
2313  return true;
2314  if (Right.isOneOf(tok::l_brace, tok::less) && Left.is(TT_SelectorName))
2315  return true;
2316  } else if (Style.Language == FormatStyle::LK_JavaScript) {
2317  if (Left.is(TT_JsFatArrow))
2318  return true;
2319  // for await ( ...
2320  if (Right.is(tok::l_paren) && Left.is(Keywords.kw_await) &&
2321  Left.Previous && Left.Previous->is(tok::kw_for))
2322  return true;
2323  if (Left.is(Keywords.kw_async) && Right.is(tok::l_paren) &&
2324  Right.MatchingParen) {
2325  const FormatToken *Next = Right.MatchingParen->getNextNonComment();
2326  // An async arrow function, for example: `x = async () => foo();`,
2327  // as opposed to calling a function called async: `x = async();`
2328  if (Next && Next->is(TT_JsFatArrow))
2329  return true;
2330  }
2331  if ((Left.is(TT_TemplateString) && Left.TokenText.endswith("${")) ||
2332  (Right.is(TT_TemplateString) && Right.TokenText.startswith("}")))
2333  return false;
2334  // In tagged template literals ("html`bar baz`"), there is no space between
2335  // the tag identifier and the template string. getIdentifierInfo makes sure
2336  // that the identifier is not a pseudo keyword like `yield`, either.
2337  if (Left.is(tok::identifier) && Keywords.IsJavaScriptIdentifier(Left) &&
2338  Right.is(TT_TemplateString))
2339  return false;
2340  if (Right.is(tok::star) &&
2341  Left.isOneOf(Keywords.kw_function, Keywords.kw_yield))
2342  return false;
2343  if (Right.isOneOf(tok::l_brace, tok::l_square) &&
2344  Left.isOneOf(Keywords.kw_function, Keywords.kw_yield))
2345  return true;
2346  // JS methods can use some keywords as names (e.g. `delete()`).
2347  if (Right.is(tok::l_paren) && Line.MustBeDeclaration &&
2348  Left.Tok.getIdentifierInfo())
2349  return false;
2350  if ((Left.isOneOf(Keywords.kw_let, Keywords.kw_var, Keywords.kw_in,
2351  tok::kw_const) ||
2352  // "of" is only a keyword if it appears after another identifier
2353  // (e.g. as "const x of y" in a for loop).
2354  (Left.is(Keywords.kw_of) && Left.Previous &&
2355  Left.Previous->Tok.getIdentifierInfo())) &&
2356  (!Left.Previous || !Left.Previous->is(tok::period)))
2357  return true;
2358  if (Left.isOneOf(tok::kw_for, Keywords.kw_as) && Left.Previous &&
2359  Left.Previous->is(tok::period) && Right.is(tok::l_paren))
2360  return false;
2361  if (Left.is(Keywords.kw_as) &&
2362  Right.isOneOf(tok::l_square, tok::l_brace, tok::l_paren))
2363  return true;
2364  if (Left.is(tok::kw_default) && Left.Previous &&
2365  Left.Previous->is(tok::kw_export))
2366  return true;
2367  if (Left.is(Keywords.kw_is) && Right.is(tok::l_brace))
2368  return true;
2369  if (Right.isOneOf(TT_JsTypeColon, TT_JsTypeOptionalQuestion))
2370  return false;
2371  if (Left.is(TT_JsTypeOperator) || Right.is(TT_JsTypeOperator))
2372  return false;
2373  if ((Left.is(tok::l_brace) || Right.is(tok::r_brace)) &&
2374  Line.First->isOneOf(Keywords.kw_import, tok::kw_export))
2375  return false;
2376  if (Left.is(tok::ellipsis))
2377  return false;
2378  if (Left.is(TT_TemplateCloser) &&
2379  !Right.isOneOf(tok::equal, tok::l_brace, tok::comma, tok::l_square,
2380  Keywords.kw_implements, Keywords.kw_extends))
2381  // Type assertions ('<type>expr') are not followed by whitespace. Other
2382  // locations that should have whitespace following are identified by the
2383  // above set of follower tokens.
2384  return false;
2385  if (Right.is(TT_JsNonNullAssertion))
2386  return false;
2387  if (Left.is(TT_JsNonNullAssertion) && Right.is(Keywords.kw_as))
2388  return true; // "x! as string"
2389  } else if (Style.Language == FormatStyle::LK_Java) {
2390  if (Left.is(tok::r_square) && Right.is(tok::l_brace))
2391  return true;
2392  if (Left.is(Keywords.kw_synchronized) && Right.is(tok::l_paren))
2393  return Style.SpaceBeforeParens != FormatStyle::SBPO_Never;
2394  if ((Left.isOneOf(tok::kw_static, tok::kw_public, tok::kw_private,
2395  tok::kw_protected) ||
2396  Left.isOneOf(Keywords.kw_final, Keywords.kw_abstract,
2397  Keywords.kw_native)) &&
2398  Right.is(TT_TemplateOpener))
2399  return true;
2400  }
2401  if (Left.is(TT_ImplicitStringLiteral))
2402  return Right.WhitespaceRange.getBegin() != Right.WhitespaceRange.getEnd();
2403  if (Line.Type == LT_ObjCMethodDecl) {
2404  if (Left.is(TT_ObjCMethodSpecifier))
2405  return true;
2406  if (Left.is(tok::r_paren) && Right.is(tok::identifier))
2407  // Don't space between ')' and <id>
2408  return false;
2409  }
2410  if (Line.Type == LT_ObjCProperty &&
2411  (Right.is(tok::equal) || Left.is(tok::equal)))
2412  return false;
2413 
2414  if (Right.isOneOf(TT_TrailingReturnArrow, TT_LambdaArrow) ||
2415  Left.isOneOf(TT_TrailingReturnArrow, TT_LambdaArrow))
2416  return true;
2417  if (Right.is(TT_OverloadedOperatorLParen))
2418  return Style.SpaceBeforeParens == FormatStyle::SBPO_Always;
2419  if (Left.is(tok::comma))
2420  return true;
2421  if (Right.is(tok::comma))
2422  return false;
2423  if (Right.isOneOf(TT_CtorInitializerColon, TT_ObjCBlockLParen))
2424  return true;
2425  if (Right.is(tok::colon)) {
2426  if (Line.First->isOneOf(tok::kw_case, tok::kw_default) ||
2427  !Right.getNextNonComment() || Right.getNextNonComment()->is(tok::semi))
2428  return false;
2429  if (Right.is(TT_ObjCMethodExpr))
2430  return false;
2431  if (Left.is(tok::question))
2432  return false;
2433  if (Right.is(TT_InlineASMColon) && Left.is(tok::coloncolon))
2434  return false;
2435  if (Right.is(TT_DictLiteral))
2436  return Style.SpacesInContainerLiterals;
2437  return true;
2438  }
2439  if (Left.is(TT_UnaryOperator))
2440  return Right.is(TT_BinaryOperator);
2441 
2442  // If the next token is a binary operator or a selector name, we have
2443  // incorrectly classified the parenthesis as a cast. FIXME: Detect correctly.
2444  if (Left.is(TT_CastRParen))
2445  return Style.SpaceAfterCStyleCast ||
2446  Right.isOneOf(TT_BinaryOperator, TT_SelectorName);
2447 
2448  if (Left.is(tok::greater) && Right.is(tok::greater))
2449  return Right.is(TT_TemplateCloser) && Left.is(TT_TemplateCloser) &&
2450  (Style.Standard != FormatStyle::LS_Cpp11 || Style.SpacesInAngles);
2451  if (Right.isOneOf(tok::arrow, tok::arrowstar, tok::periodstar) ||
2452  Left.isOneOf(tok::arrow, tok::period, tok::arrowstar, tok::periodstar) ||
2453  (Right.is(tok::period) && Right.isNot(TT_DesignatedInitializerPeriod)))
2454  return false;
2455  if (!Style.SpaceBeforeAssignmentOperators &&
2456  Right.getPrecedence() == prec::Assignment)
2457  return false;
2458  if (Right.is(tok::coloncolon) && Left.is(tok::identifier))
2459  // Generally don't remove existing spaces between an identifier and "::".
2460  // The identifier might actually be a macro name such as ALWAYS_INLINE. If
2461  // this turns out to be too lenient, add analysis of the identifier itself.
2462  return Right.WhitespaceRange.getBegin() != Right.WhitespaceRange.getEnd();
2463  if (Right.is(tok::coloncolon) && !Left.isOneOf(tok::l_brace, tok::comment))
2464  return (Left.is(TT_TemplateOpener) &&
2465  Style.Standard == FormatStyle::LS_Cpp03) ||
2466  !(Left.isOneOf(tok::l_paren, tok::r_paren, tok::l_square,
2467  tok::kw___super, TT_TemplateCloser, TT_TemplateOpener));
2468  if ((Left.is(TT_TemplateOpener)) != (Right.is(TT_TemplateCloser)))
2469  return Style.SpacesInAngles;
2470  if ((Right.is(TT_BinaryOperator) && !Left.is(tok::l_paren)) ||
2471  (Left.isOneOf(TT_BinaryOperator, TT_ConditionalExpr) &&
2472  !Right.is(tok::r_paren)))
2473  return true;
2474  if (Left.is(TT_TemplateCloser) && Right.is(tok::l_paren) &&
2475  Right.isNot(TT_FunctionTypeLParen))
2476  return Style.SpaceBeforeParens == FormatStyle::SBPO_Always;
2477  if (Right.is(TT_TemplateOpener) && Left.is(tok::r_paren) &&
2478  Left.MatchingParen && Left.MatchingParen->is(TT_OverloadedOperatorLParen))
2479  return false;
2480  if (Right.is(tok::less) && Left.isNot(tok::l_paren) &&
2481  Line.startsWith(tok::hash))
2482  return true;
2483  if (Right.is(TT_TrailingUnaryOperator))
2484  return false;
2485  if (Left.is(TT_RegexLiteral))
2486  return false;
2487  return spaceRequiredBetween(Line, Left, Right);
2488 }
2489 
2490 // Returns 'true' if 'Tok' is a brace we'd want to break before in Allman style.
2491 static bool isAllmanBrace(const FormatToken &Tok) {
2492  return Tok.is(tok::l_brace) && Tok.BlockKind == BK_Block &&
2493  !Tok.isOneOf(TT_ObjCBlockLBrace, TT_DictLiteral);
2494 }
2495 
2496 bool TokenAnnotator::mustBreakBefore(const AnnotatedLine &Line,
2497  const FormatToken &Right) {
2498  const FormatToken &Left = *Right.Previous;
2499  if (Right.NewlinesBefore > 1 && Style.MaxEmptyLinesToKeep > 0)
2500  return true;
2501 
2502  if (Style.Language == FormatStyle::LK_JavaScript) {
2503  // FIXME: This might apply to other languages and token kinds.
2504  if (Right.is(tok::string_literal) && Left.is(tok::plus) && Left.Previous &&
2505  Left.Previous->is(tok::string_literal))
2506  return true;
2507  if (Left.is(TT_DictLiteral) && Left.is(tok::l_brace) && Line.Level == 0 &&
2508  Left.Previous && Left.Previous->is(tok::equal) &&
2509  Line.First->isOneOf(tok::identifier, Keywords.kw_import, tok::kw_export,
2510  tok::kw_const) &&
2511  // kw_var/kw_let are pseudo-tokens that are tok::identifier, so match
2512  // above.
2513  !Line.First->isOneOf(Keywords.kw_var, Keywords.kw_let))
2514  // Object literals on the top level of a file are treated as "enum-style".
2515  // Each key/value pair is put on a separate line, instead of bin-packing.
2516  return true;
2517  if (Left.is(tok::l_brace) && Line.Level == 0 &&
2518  (Line.startsWith(tok::kw_enum) ||
2519  Line.startsWith(tok::kw_export, tok::kw_enum)))
2520  // JavaScript top-level enum key/value pairs are put on separate lines
2521  // instead of bin-packing.
2522  return true;
2523  if (Right.is(tok::r_brace) && Left.is(tok::l_brace) &&
2524  !Left.Children.empty())
2525  // Support AllowShortFunctionsOnASingleLine for JavaScript.
2526  return Style.AllowShortFunctionsOnASingleLine == FormatStyle::SFS_None ||
2527  Style.AllowShortFunctionsOnASingleLine == FormatStyle::SFS_Empty ||
2528  (Left.NestingLevel == 0 && Line.Level == 0 &&
2529  Style.AllowShortFunctionsOnASingleLine &
2531  } else if (Style.Language == FormatStyle::LK_Java) {
2532  if (Right.is(tok::plus) && Left.is(tok::string_literal) && Right.Next &&
2533  Right.Next->is(tok::string_literal))
2534  return true;
2535  } else if (Style.Language == FormatStyle::LK_Cpp ||
2536  Style.Language == FormatStyle::LK_ObjC ||
2537  Style.Language == FormatStyle::LK_Proto) {
2538  if (Left.isStringLiteral() && Right.isStringLiteral())
2539  return true;
2540  }
2541 
2542  // If the last token before a '}', ']', or ')' is a comma or a trailing
2543  // comment, the intention is to insert a line break after it in order to make
2544  // shuffling around entries easier. Import statements, especially in
2545  // JavaScript, can be an exception to this rule.
2546  if (Style.JavaScriptWrapImports || Line.Type != LT_ImportStatement) {
2547  const FormatToken *BeforeClosingBrace = nullptr;
2548  if ((Left.isOneOf(tok::l_brace, TT_ArrayInitializerLSquare) ||
2549  (Style.Language == FormatStyle::LK_JavaScript &&
2550  Left.is(tok::l_paren))) &&
2551  Left.BlockKind != BK_Block && Left.MatchingParen)
2552  BeforeClosingBrace = Left.MatchingParen->Previous;
2553  else if (Right.MatchingParen &&
2554  (Right.MatchingParen->isOneOf(tok::l_brace,
2555  TT_ArrayInitializerLSquare) ||
2556  (Style.Language == FormatStyle::LK_JavaScript &&
2557  Right.MatchingParen->is(tok::l_paren))))
2558  BeforeClosingBrace = &Left;
2559  if (BeforeClosingBrace && (BeforeClosingBrace->is(tok::comma) ||
2560  BeforeClosingBrace->isTrailingComment()))
2561  return true;
2562  }
2563 
2564  if (Right.is(tok::comment))
2565  return Left.BlockKind != BK_BracedInit &&
2566  Left.isNot(TT_CtorInitializerColon) &&
2567  (Right.NewlinesBefore > 0 && Right.HasUnescapedNewline);
2568  if (Left.isTrailingComment())
2569  return true;
2570  if (Right.Previous->IsUnterminatedLiteral)
2571  return true;
2572  if (Right.is(tok::lessless) && Right.Next &&
2573  Right.Previous->is(tok::string_literal) &&
2574  Right.Next->is(tok::string_literal))
2575  return true;
2576  if (Right.Previous->ClosesTemplateDeclaration &&
2577  Right.Previous->MatchingParen &&
2578  Right.Previous->MatchingParen->NestingLevel == 0 &&
2579  Style.AlwaysBreakTemplateDeclarations)
2580  return true;
2581  if (Right.is(TT_CtorInitializerComma) &&
2582  Style.BreakConstructorInitializers == FormatStyle::BCIS_BeforeComma &&
2583  !Style.ConstructorInitializerAllOnOneLineOrOnePerLine)
2584  return true;
2585  if (Right.is(TT_CtorInitializerColon) &&
2586  Style.BreakConstructorInitializers == FormatStyle::BCIS_BeforeComma &&
2587  !Style.ConstructorInitializerAllOnOneLineOrOnePerLine)
2588  return true;
2589  // Break only if we have multiple inheritance.
2590  if (Style.BreakBeforeInheritanceComma &&
2591  Right.is(TT_InheritanceComma))
2592  return true;
2593  if (Right.is(tok::string_literal) && Right.TokenText.startswith("R\""))
2594  // Raw string literals are special wrt. line breaks. The author has made a
2595  // deliberate choice and might have aligned the contents of the string
2596  // literal accordingly. Thus, we try keep existing line breaks.
2597  return Right.NewlinesBefore > 0;
2598  if ((Right.Previous->is(tok::l_brace) ||
2599  (Right.Previous->is(tok::less) &&
2600  Right.Previous->Previous &&
2601  Right.Previous->Previous->is(tok::equal))
2602  ) &&
2603  Right.NestingLevel == 1 && Style.Language == FormatStyle::LK_Proto) {
2604  // Don't put enums or option definitions onto single lines in protocol
2605  // buffers.
2606  return true;
2607  }
2608  if (Right.is(TT_InlineASMBrace))
2609  return Right.HasUnescapedNewline;
2610  if (isAllmanBrace(Left) || isAllmanBrace(Right))
2611  return (Line.startsWith(tok::kw_enum) && Style.BraceWrapping.AfterEnum) ||
2612  (Line.startsWith(tok::kw_class) && Style.BraceWrapping.AfterClass) ||
2613  (Line.startsWith(tok::kw_struct) && Style.BraceWrapping.AfterStruct);
2614  if (Left.is(TT_ObjCBlockLBrace) && !Style.AllowShortBlocksOnASingleLine)
2615  return true;
2616 
2617  if ((Style.Language == FormatStyle::LK_Java ||
2618  Style.Language == FormatStyle::LK_JavaScript) &&
2619  Left.is(TT_LeadingJavaAnnotation) &&
2620  Right.isNot(TT_LeadingJavaAnnotation) && Right.isNot(tok::l_paren) &&
2621  (Line.Last->is(tok::l_brace) || Style.BreakAfterJavaFieldAnnotations))
2622  return true;
2623 
2624  return false;
2625 }
2626 
2627 bool TokenAnnotator::canBreakBefore(const AnnotatedLine &Line,
2628  const FormatToken &Right) {
2629  const FormatToken &Left = *Right.Previous;
2630 
2631  // Language-specific stuff.
2632  if (Style.Language == FormatStyle::LK_Java) {
2633  if (Left.isOneOf(Keywords.kw_throws, Keywords.kw_extends,
2634  Keywords.kw_implements))
2635  return false;
2636  if (Right.isOneOf(Keywords.kw_throws, Keywords.kw_extends,
2637  Keywords.kw_implements))
2638  return true;
2639  } else if (Style.Language == FormatStyle::LK_JavaScript) {
2640  const FormatToken *NonComment = Right.getPreviousNonComment();
2641  if (NonComment &&
2642  NonComment->isOneOf(tok::kw_return, tok::kw_continue, tok::kw_break,
2643  tok::kw_throw, Keywords.kw_interface,
2644  Keywords.kw_type, tok::kw_static, tok::kw_public,
2645  tok::kw_private, tok::kw_protected,
2646  Keywords.kw_readonly, Keywords.kw_abstract,
2647  Keywords.kw_get, Keywords.kw_set))
2648  return false; // Otherwise automatic semicolon insertion would trigger.
2649  if (Left.is(TT_JsFatArrow) && Right.is(tok::l_brace))
2650  return false;
2651  if (Left.is(TT_JsTypeColon))
2652  return true;
2653  if (Right.NestingLevel == 0 && Right.is(Keywords.kw_is))
2654  return false;
2655  if (Left.is(Keywords.kw_in))
2656  return Style.BreakBeforeBinaryOperators == FormatStyle::BOS_None;
2657  if (Right.is(Keywords.kw_in))
2658  return Style.BreakBeforeBinaryOperators != FormatStyle::BOS_None;
2659  if (Right.is(Keywords.kw_as))
2660  return false; // must not break before as in 'x as type' casts
2661  if (Left.is(Keywords.kw_as))
2662  return true;
2663  if (Left.is(TT_JsNonNullAssertion))
2664  return true;
2665  if (Left.is(Keywords.kw_declare) &&
2666  Right.isOneOf(Keywords.kw_module, tok::kw_namespace,
2667  Keywords.kw_function, tok::kw_class, tok::kw_enum,
2668  Keywords.kw_interface, Keywords.kw_type, Keywords.kw_var,
2669  Keywords.kw_let, tok::kw_const))
2670  // See grammar for 'declare' statements at:
2671  // https://github.com/Microsoft/TypeScript/blob/master/doc/spec.md#A.10
2672  return false;
2673  if (Left.isOneOf(Keywords.kw_module, tok::kw_namespace) &&
2674  Right.isOneOf(tok::identifier, tok::string_literal))
2675  return false; // must not break in "module foo { ...}"
2676  if (Right.is(TT_TemplateString) && Right.closesScope())
2677  return false;
2678  if (Left.is(TT_TemplateString) && Left.opensScope())
2679  return true;
2680  }
2681 
2682  if (Left.is(tok::at))
2683  return false;
2684  if (Left.Tok.getObjCKeywordID() == tok::objc_interface)
2685  return false;
2686  if (Left.isOneOf(TT_JavaAnnotation, TT_LeadingJavaAnnotation))
2687  return !Right.is(tok::l_paren);
2688  if (Right.is(TT_PointerOrReference))
2689  return Line.IsMultiVariableDeclStmt ||
2690  (Style.PointerAlignment == FormatStyle::PAS_Right &&
2691  (!Right.Next || Right.Next->isNot(TT_FunctionDeclarationName)));
2692  if (Right.isOneOf(TT_StartOfName, TT_FunctionDeclarationName) ||
2693  Right.is(tok::kw_operator))
2694  return true;
2695  if (Left.is(TT_PointerOrReference))
2696  return false;
2697  if (Right.isTrailingComment())
2698  // We rely on MustBreakBefore being set correctly here as we should not
2699  // change the "binding" behavior of a comment.
2700  // The first comment in a braced lists is always interpreted as belonging to
2701  // the first list element. Otherwise, it should be placed outside of the
2702  // list.
2703  return Left.BlockKind == BK_BracedInit ||
2704  (Left.is(TT_CtorInitializerColon) &&
2705  Style.BreakConstructorInitializers ==
2707  if (Left.is(tok::question) && Right.is(tok::colon))
2708  return false;
2709  if (Right.is(TT_ConditionalExpr) || Right.is(tok::question))
2710  return Style.BreakBeforeTernaryOperators;
2711  if (Left.is(TT_ConditionalExpr) || Left.is(tok::question))
2712  return !Style.BreakBeforeTernaryOperators;
2713  if (Right.is(TT_InheritanceColon))
2714  return true;
2715  if (Right.is(TT_ObjCMethodExpr) && !Right.is(tok::r_square) &&
2716  Left.isNot(TT_SelectorName))
2717  return true;
2718  if (Right.is(tok::colon) &&
2719  !Right.isOneOf(TT_CtorInitializerColon, TT_InlineASMColon))
2720  return false;
2721  if (Left.is(tok::colon) && Left.isOneOf(TT_DictLiteral, TT_ObjCMethodExpr))
2722  return true;
2723  if (Right.is(TT_SelectorName) || (Right.is(tok::identifier) && Right.Next &&
2724  Right.Next->is(TT_ObjCMethodExpr)))
2725  return Left.isNot(tok::period); // FIXME: Properly parse ObjC calls.
2726  if (Left.is(tok::r_paren) && Line.Type == LT_ObjCProperty)
2727  return true;
2728  if (Left.ClosesTemplateDeclaration || Left.is(TT_FunctionAnnotationRParen))
2729  return true;
2730  if (Right.isOneOf(TT_RangeBasedForLoopColon, TT_OverloadedOperatorLParen,
2731  TT_OverloadedOperator))
2732  return false;
2733  if (Left.is(TT_RangeBasedForLoopColon))
2734  return true;
2735  if (Right.is(TT_RangeBasedForLoopColon))
2736  return false;
2737  if (Left.is(TT_TemplateCloser) && Right.is(TT_TemplateOpener))
2738  return true;
2739  if (Left.isOneOf(TT_TemplateCloser, TT_UnaryOperator) ||
2740  Left.is(tok::kw_operator))
2741  return false;
2742  if (Left.is(tok::equal) && !Right.isOneOf(tok::kw_default, tok::kw_delete) &&
2743  Line.Type == LT_VirtualFunctionDecl && Left.NestingLevel == 0)
2744  return false;
2745  if (Left.is(tok::l_paren) && Left.is(TT_AttributeParen))
2746  return false;
2747  if (Left.is(tok::l_paren) && Left.Previous &&
2748  (Left.Previous->isOneOf(TT_BinaryOperator, TT_CastRParen)))
2749  return false;
2750  if (Right.is(TT_ImplicitStringLiteral))
2751  return false;
2752 
2753  if (Right.is(tok::r_paren) || Right.is(TT_TemplateCloser))
2754  return false;
2755  if (Right.is(tok::r_square) && Right.MatchingParen &&
2756  Right.MatchingParen->is(TT_LambdaLSquare))
2757  return false;
2758 
2759  // We only break before r_brace if there was a corresponding break before
2760  // the l_brace, which is tracked by BreakBeforeClosingBrace.
2761  if (Right.is(tok::r_brace))
2762  return Right.MatchingParen && Right.MatchingParen->BlockKind == BK_Block;
2763 
2764  // Allow breaking after a trailing annotation, e.g. after a method
2765  // declaration.
2766  if (Left.is(TT_TrailingAnnotation))
2767  return !Right.isOneOf(tok::l_brace, tok::semi, tok::equal, tok::l_paren,
2768  tok::less, tok::coloncolon);
2769 
2770  if (Right.is(tok::kw___attribute))
2771  return true;
2772 
2773  if (Left.is(tok::identifier) && Right.is(tok::string_literal))
2774  return true;
2775 
2776  if (Right.is(tok::identifier) && Right.Next && Right.Next->is(TT_DictLiteral))
2777  return true;
2778 
2779  if (Left.is(TT_CtorInitializerColon))
2780  return Style.BreakConstructorInitializers == FormatStyle::BCIS_AfterColon;
2781  if (Right.is(TT_CtorInitializerColon))
2782  return Style.BreakConstructorInitializers != FormatStyle::BCIS_AfterColon;
2783  if (Left.is(TT_CtorInitializerComma) &&
2784  Style.BreakConstructorInitializers == FormatStyle::BCIS_BeforeComma)
2785  return false;
2786  if (Right.is(TT_CtorInitializerComma) &&
2787  Style.BreakConstructorInitializers == FormatStyle::BCIS_BeforeComma)
2788  return true;
2789  if (Left.is(TT_InheritanceComma) && Style.BreakBeforeInheritanceComma)
2790  return false;
2791  if (Right.is(TT_InheritanceComma) && Style.BreakBeforeInheritanceComma)
2792  return true;
2793  if ((Left.is(tok::greater) && Right.is(tok::greater)) ||
2794  (Left.is(tok::less) && Right.is(tok::less)))
2795  return false;
2796  if (Right.is(TT_BinaryOperator) &&
2797  Style.BreakBeforeBinaryOperators != FormatStyle::BOS_None &&
2798  (Style.BreakBeforeBinaryOperators == FormatStyle::BOS_All ||
2799  Right.getPrecedence() != prec::Assignment))
2800  return true;
2801  if (Left.is(TT_ArrayInitializerLSquare))
2802  return true;
2803  if (Right.is(tok::kw_typename) && Left.isNot(tok::kw_const))
2804  return true;
2805  if ((Left.isBinaryOperator() || Left.is(TT_BinaryOperator)) &&
2806  !Left.isOneOf(tok::arrowstar, tok::lessless) &&
2807  Style.BreakBeforeBinaryOperators != FormatStyle::BOS_All &&
2808  (Style.BreakBeforeBinaryOperators == FormatStyle::BOS_None ||
2809  Left.getPrecedence() == prec::Assignment))
2810  return true;
2811  return Left.isOneOf(tok::comma, tok::coloncolon, tok::semi, tok::l_brace,
2812  tok::kw_class, tok::kw_struct, tok::comment) ||
2813  Right.isMemberAccess() ||
2814  Right.isOneOf(TT_TrailingReturnArrow, TT_LambdaArrow, tok::lessless,
2815  tok::colon, tok::l_square, tok::at) ||
2816  (Left.is(tok::r_paren) &&
2817  Right.isOneOf(tok::identifier, tok::kw_const)) ||
2818  (Left.is(tok::l_paren) && !Right.is(tok::r_paren)) ||
2819  (Left.is(TT_TemplateOpener) && !Right.is(TT_TemplateCloser));
2820 }
2821 
2822 void TokenAnnotator::printDebugInfo(const AnnotatedLine &Line) {
2823  llvm::errs() << "AnnotatedTokens:\n";
2824  const FormatToken *Tok = Line.First;
2825  while (Tok) {
2826  llvm::errs() << " M=" << Tok->MustBreakBefore
2827  << " C=" << Tok->CanBreakBefore
2828  << " T=" << getTokenTypeName(Tok->Type)
2829  << " S=" << Tok->SpacesRequiredBefore
2830  << " B=" << Tok->BlockParameterCount
2831  << " BK=" << Tok->BlockKind
2832  << " P=" << Tok->SplitPenalty << " Name=" << Tok->Tok.getName()
2833  << " L=" << Tok->TotalLength << " PPK=" << Tok->PackingKind
2834  << " FakeLParens=";
2835  for (unsigned i = 0, e = Tok->FakeLParens.size(); i != e; ++i)
2836  llvm::errs() << Tok->FakeLParens[i] << "/";
2837  llvm::errs() << " FakeRParens=" << Tok->FakeRParens;
2838  llvm::errs() << " Text='" << Tok->TokenText << "'\n";
2839  if (!Tok->Next)
2840  assert(Tok == Line.Last);
2841  Tok = Tok->Next;
2842  }
2843  llvm::errs() << "----\n";
2844 }
2845 
2846 } // namespace format
2847 } // namespace clang
Always break after the return type of top-level definitions.
Definition: Format.h:321
unsigned NestingLevel
The nesting level of this token, i.e.
Definition: FormatToken.h:227
Should be used for Protocol Buffers (https://developers.google.com/protocol-buffers/).
Definition: Format.h:1121
bool AutoFound
Defines the SourceManager interface.
std::unique_ptr< TokenRole > Role
A token can have a special role that can carry extra information about the token's formatting...
Definition: FormatToken.h:203
Break constructor initializers before the colon and commas, and align the commas with the colon...
Definition: Format.h:781
unsigned OriginalColumn
The original 0-based column of this token, including expanded tabs.
Definition: FormatToken.h:214
bool isOneOf(A K1, B K2) const
Definition: FormatToken.h:305
Parser - This implements a parser for the C family of languages.
Definition: Parser.h:57
Align pointer to the left.
Definition: Format.h:1260
FormatToken * CurrentToken
Should be used for C, C++.
Definition: Format.h:1112
bool IsMultiline
Whether the token text contains newlines (escaped or not).
Definition: FormatToken.h:152
bool isNot(T Kind) const
Definition: FormatToken.h:312
void setCommentLineLevels(SmallVectorImpl< AnnotatedLine * > &Lines)
Adapts the indent levels of comment lines to the indent of the subsequent line.
unsigned TotalLength
The total length of the unwrapped line up to and including this token.
Definition: FormatToken.h:210
unsigned NewlinesBefore
The number of newlines immediately before the Token.
Definition: FormatToken.h:129
FormatToken * Next
The next token in the unwrapped line.
Definition: FormatToken.h:282
Break after operators.
Definition: Format.h:399
bool CanBeExpression
tok::TokenKind ContextKind
unsigned UnbreakableTailLength
The length of following tokens until the next natural split point, or the next token that can be brok...
Definition: FormatToken.h:218
Always break after the return type.
Definition: Format.h:281
unsigned SplitPenalty
Penalty for inserting a line break before this token.
Definition: FormatToken.h:233
bool ColonIsForRangeExpr
bool CanBreakBefore
true if it is allowed to break before this token.
Definition: FormatToken.h:181
Should be used for Java.
Definition: Format.h:1114
FormatToken * Previous
The previous token in the unwrapped line.
Definition: FormatToken.h:279
Always break after the return type of function definitions.
Definition: Format.h:309
Only merge functions defined inside a class.
Definition: Format.h:200
This file implements a token annotator, i.e.
unsigned SpacesRequiredBefore
The number of spaces that should be inserted before this token.
Definition: FormatToken.h:178
bool closesScope() const
Returns whether Tok is )]} or a template closing >.
Definition: FormatToken.h:357
bool InInheritanceList
llvm::SmallPtrSet< FormatToken *, 16 > NonTemplateLess
detail::InMemoryDirectory::const_iterator I
Always put a space before opening parentheses, except when it's prohibited by the syntax rules (in fu...
Definition: Format.h:1360
Never merge functions into a single line.
Definition: Format.h:186
AnnotatingParser & P
ASTContext * Context
Break after return type automatically.
Definition: Format.h:265
Only merge empty functions.
Definition: Format.h:208
Should be used for JavaScript.
Definition: Format.h:1116
StateNode * Previous
SmallVector< AnnotatedLine *, 0 > Children
A wrapper around a Token storing information about the whitespace characters preceding it...
Definition: FormatToken.h:119
FormatToken * Token
static unsigned maxNestingDepth(const AnnotatedLine &Line)
FormatToken * FirstStartOfName
Keywords(IdentTable)
void annotate(AnnotatedLine &Line)
bool startsWith(Ts...Tokens) const
true if this line starts with the given tokens in order, ignoring comments.
Don't align, instead use ContinuationIndentWidth, e.g.
Definition: Format.h:63
const char * getTokenTypeName(TokenType Type)
Determines the name of a token type.
Definition: FormatToken.cpp:25
bool isTrailingComment() const
Definition: FormatToken.h:392
SmallVectorImpl< AnnotatedLine * >::const_iterator Next
static bool isAllmanBrace(const FormatToken &Tok)
AnnotatedLine & Line
Break constructor initializers after the colon and commas.
Definition: Format.h:788
bool opensBlockOrBlockTypeList(const FormatStyle &Style) const
Returns true if this tokens starts a block-type list, i.e.
Definition: FormatToken.h:463
TokenKind
Provides a simple uniform namespace for tokens from all C languages.
Definition: TokenKinds.h:25
Never put a space before opening parentheses.
Definition: Format.h:1338
unsigned IndentLevel
The indent level of this token. Copied from the surrounding line.
Definition: FormatToken.h:230
if(T->getSizeExpr()) TRY_TO(TraverseStmt(T-> getSizeExpr()))
bool ColonIsObjCMethodExpr
unsigned ColumnWidth
The width of the non-whitespace parts of the token (or its first line for multi-line tokens) in colum...
Definition: FormatToken.h:145
Use C++03-compatible syntax.
Definition: Format.h:1438
Always break after the return types of top-level functions.
Definition: Format.h:294
bool IsExpression
Use features of C++11, C++14 and C++1z (e.g.
Definition: Format.h:1441
detail::InMemoryDirectory::const_iterator E
static bool isFunctionDeclarationName(const FormatToken &Current, const AnnotatedLine &Line)
SmallVector< Context, 8 > Contexts
bool opensScope() const
Returns whether Tok is ([{ or a template opening <.
Definition: FormatToken.h:350
void calculateFormattingInformation(AnnotatedLine &Line)
bool is(tok::TokenKind Kind) const
Definition: FormatToken.h:296
Should be used for Objective-C, Objective-C++.
Definition: Format.h:1118
FormatToken * NextOperator
If this is an operator (or "."/"->") in a sequence of operators with the same precedence, points to the next operator.
Definition: FormatToken.h:263
char __ovld __cnfn max(char x, char y)
Returns y if x < y, otherwise it returns x.
unsigned BindingStrength
FormatToken * MatchingParen
If this is a bracket, this points to the matching one.
Definition: FormatToken.h:276
SmallVector< AnnotatedLine *, 1 > Children
If this token starts a block, this contains all the unwrapped lines in it.
Definition: FormatToken.h:286
FormatToken * Current
FormatToken * FirstObjCSelectorName
bool InTemplateArgument
bool ColonIsDictLiteral
The parameter type of a method or function.
Break before operators.
Definition: Format.h:423
unsigned LongestObjCSelectorName
bool MustBreakBefore
Whether there must be a line break before this token.
Definition: FormatToken.h:161
Should be used for Protocol Buffer messages in text format (https://developers.google.com/protocol-buffers/).
Definition: Format.h:1126
prec::Level getBinOpPrecedence(tok::TokenKind Kind, bool GreaterThanIsOperator, bool CPlusPlus11)
Return the precedence of the specified binary operator token.
Align pointer to the right.
Definition: Format.h:1265
unsigned BindingStrength
The binding strength of a token.
Definition: FormatToken.h:223
bool CaretFound
bool InCtorInitializer
bool HasUnescapedNewline
Whether there is at least one unescaped newline before the Token.
Definition: FormatToken.h:133
BraceBlockKind BlockKind
Contains the kind of block if this token is a brace.
Definition: FormatToken.h:173
const NamedDecl * Result
Definition: USRFinder.cpp:70
bool IsForEachMacro