clang  5.0.0
UnwrappedLineParser.cpp
Go to the documentation of this file.
1 //===--- UnwrappedLineParser.cpp - Format C++ code ------------------------===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 ///
10 /// \file
11 /// \brief This file contains the implementation of the UnwrappedLineParser,
12 /// which turns a stream of tokens into UnwrappedLines.
13 ///
14 //===----------------------------------------------------------------------===//
15 
16 #include "UnwrappedLineParser.h"
17 #include "llvm/ADT/STLExtras.h"
18 #include "llvm/Support/Debug.h"
19 #include "llvm/Support/raw_ostream.h"
20 
21 #define DEBUG_TYPE "format-parser"
22 
23 namespace clang {
24 namespace format {
25 
27 public:
28  virtual ~FormatTokenSource() {}
29  virtual FormatToken *getNextToken() = 0;
30 
31  virtual unsigned getPosition() = 0;
32  virtual FormatToken *setPosition(unsigned Position) = 0;
33 };
34 
35 namespace {
36 
37 class ScopedDeclarationState {
38 public:
39  ScopedDeclarationState(UnwrappedLine &Line, std::vector<bool> &Stack,
40  bool MustBeDeclaration)
41  : Line(Line), Stack(Stack) {
42  Line.MustBeDeclaration = MustBeDeclaration;
43  Stack.push_back(MustBeDeclaration);
44  }
45  ~ScopedDeclarationState() {
46  Stack.pop_back();
47  if (!Stack.empty())
48  Line.MustBeDeclaration = Stack.back();
49  else
50  Line.MustBeDeclaration = true;
51  }
52 
53 private:
54  UnwrappedLine &Line;
55  std::vector<bool> &Stack;
56 };
57 
58 static bool isLineComment(const FormatToken &FormatTok) {
59  return FormatTok.is(tok::comment) &&
60  FormatTok.TokenText.startswith("//");
61 }
62 
63 // Checks if \p FormatTok is a line comment that continues the line comment
64 // \p Previous. The original column of \p MinColumnToken is used to determine
65 // whether \p FormatTok is indented enough to the right to continue \p Previous.
66 static bool continuesLineComment(const FormatToken &FormatTok,
67  const FormatToken *Previous,
68  const FormatToken *MinColumnToken) {
69  if (!Previous || !MinColumnToken)
70  return false;
71  unsigned MinContinueColumn =
72  MinColumnToken->OriginalColumn + (isLineComment(*MinColumnToken) ? 0 : 1);
73  return isLineComment(FormatTok) && FormatTok.NewlinesBefore == 1 &&
74  isLineComment(*Previous) &&
75  FormatTok.OriginalColumn >= MinContinueColumn;
76 }
77 
78 class ScopedMacroState : public FormatTokenSource {
79 public:
80  ScopedMacroState(UnwrappedLine &Line, FormatTokenSource *&TokenSource,
81  FormatToken *&ResetToken)
82  : Line(Line), TokenSource(TokenSource), ResetToken(ResetToken),
83  PreviousLineLevel(Line.Level), PreviousTokenSource(TokenSource),
84  Token(nullptr), PreviousToken(nullptr) {
85  TokenSource = this;
86  Line.Level = 0;
87  Line.InPPDirective = true;
88  }
89 
90  ~ScopedMacroState() override {
92  ResetToken = Token;
93  Line.InPPDirective = false;
94  Line.Level = PreviousLineLevel;
95  }
96 
97  FormatToken *getNextToken() override {
98  // The \c UnwrappedLineParser guards against this by never calling
99  // \c getNextToken() after it has encountered the first eof token.
100  assert(!eof());
102  Token = PreviousTokenSource->getNextToken();
103  if (eof())
104  return getFakeEOF();
105  return Token;
106  }
107 
108  unsigned getPosition() override { return PreviousTokenSource->getPosition(); }
109 
110  FormatToken *setPosition(unsigned Position) override {
111  PreviousToken = nullptr;
112  Token = PreviousTokenSource->setPosition(Position);
113  return Token;
114  }
115 
116 private:
117  bool eof() {
118  return Token && Token->HasUnescapedNewline &&
119  !continuesLineComment(*Token, PreviousToken,
120  /*MinColumnToken=*/PreviousToken);
121  }
122 
123  FormatToken *getFakeEOF() {
124  static bool EOFInitialized = false;
125  static FormatToken FormatTok;
126  if (!EOFInitialized) {
127  FormatTok.Tok.startToken();
128  FormatTok.Tok.setKind(tok::eof);
129  EOFInitialized = true;
130  }
131  return &FormatTok;
132  }
133 
134  UnwrappedLine &Line;
135  FormatTokenSource *&TokenSource;
136  FormatToken *&ResetToken;
138  FormatTokenSource *PreviousTokenSource;
139 
140  FormatToken *Token;
141  FormatToken *PreviousToken;
142 };
143 
144 } // end anonymous namespace
145 
147 public:
149  bool SwitchToPreprocessorLines = false)
150  : Parser(Parser), OriginalLines(Parser.CurrentLines) {
151  if (SwitchToPreprocessorLines)
152  Parser.CurrentLines = &Parser.PreprocessorDirectives;
153  else if (!Parser.Line->Tokens.empty())
154  Parser.CurrentLines = &Parser.Line->Tokens.back().Children;
155  PreBlockLine = std::move(Parser.Line);
156  Parser.Line = llvm::make_unique<UnwrappedLine>();
157  Parser.Line->Level = PreBlockLine->Level;
158  Parser.Line->InPPDirective = PreBlockLine->InPPDirective;
159  }
160 
162  if (!Parser.Line->Tokens.empty()) {
163  Parser.addUnwrappedLine();
164  }
165  assert(Parser.Line->Tokens.empty());
166  Parser.Line = std::move(PreBlockLine);
167  if (Parser.CurrentLines == &Parser.PreprocessorDirectives)
168  Parser.MustBreakBeforeNextToken = true;
169  Parser.CurrentLines = OriginalLines;
170  }
171 
172 private:
174 
175  std::unique_ptr<UnwrappedLine> PreBlockLine;
176  SmallVectorImpl<UnwrappedLine> *OriginalLines;
177 };
178 
180 public:
182  const FormatStyle &Style, unsigned &LineLevel)
183  : LineLevel(LineLevel), OldLineLevel(LineLevel) {
185  Parser->addUnwrappedLine();
186  if (Style.BraceWrapping.IndentBraces)
187  ++LineLevel;
188  }
189  ~CompoundStatementIndenter() { LineLevel = OldLineLevel; }
190 
191 private:
192  unsigned &LineLevel;
193  unsigned OldLineLevel;
194 };
195 
196 namespace {
197 
198 class IndexedTokenSource : public FormatTokenSource {
199 public:
200  IndexedTokenSource(ArrayRef<FormatToken *> Tokens)
201  : Tokens(Tokens), Position(-1) {}
202 
203  FormatToken *getNextToken() override {
204  ++Position;
205  return Tokens[Position];
206  }
207 
208  unsigned getPosition() override {
209  assert(Position >= 0);
210  return Position;
211  }
212 
213  FormatToken *setPosition(unsigned P) override {
214  Position = P;
215  return Tokens[Position];
216  }
217 
218  void reset() { Position = -1; }
219 
220 private:
221  ArrayRef<FormatToken *> Tokens;
222  int Position;
223 };
224 
225 } // end anonymous namespace
226 
231  : Line(new UnwrappedLine), MustBreakBeforeNextToken(false),
232  CurrentLines(&Lines), Style(Style), Keywords(Keywords),
233  CommentPragmasRegex(Style.CommentPragmas), Tokens(nullptr),
234  Callback(Callback), AllTokens(Tokens), PPBranchLevel(-1) {}
235 
236 void UnwrappedLineParser::reset() {
237  PPBranchLevel = -1;
238  Line.reset(new UnwrappedLine);
239  CommentsBeforeNextToken.clear();
240  FormatTok = nullptr;
241  MustBreakBeforeNextToken = false;
242  PreprocessorDirectives.clear();
243  CurrentLines = &Lines;
244  DeclarationScopeStack.clear();
245  PPStack.clear();
246 }
247 
249  IndexedTokenSource TokenSource(AllTokens);
250  do {
251  DEBUG(llvm::dbgs() << "----\n");
252  reset();
253  Tokens = &TokenSource;
254  TokenSource.reset();
255 
256  readToken();
257  parseFile();
258  // Create line with eof token.
259  pushToken(FormatTok);
260  addUnwrappedLine();
261 
262  for (SmallVectorImpl<UnwrappedLine>::iterator I = Lines.begin(),
263  E = Lines.end();
264  I != E; ++I) {
265  Callback.consumeUnwrappedLine(*I);
266  }
267  Callback.finishRun();
268  Lines.clear();
269  while (!PPLevelBranchIndex.empty() &&
270  PPLevelBranchIndex.back() + 1 >= PPLevelBranchCount.back()) {
271  PPLevelBranchIndex.resize(PPLevelBranchIndex.size() - 1);
272  PPLevelBranchCount.resize(PPLevelBranchCount.size() - 1);
273  }
274  if (!PPLevelBranchIndex.empty()) {
275  ++PPLevelBranchIndex.back();
276  assert(PPLevelBranchIndex.size() == PPLevelBranchCount.size());
277  assert(PPLevelBranchIndex.back() <= PPLevelBranchCount.back());
278  }
279  } while (!PPLevelBranchIndex.empty());
280 }
281 
282 void UnwrappedLineParser::parseFile() {
283  // The top-level context in a file always has declarations, except for pre-
284  // processor directives and JavaScript files.
285  bool MustBeDeclaration =
286  !Line->InPPDirective && Style.Language != FormatStyle::LK_JavaScript;
287  ScopedDeclarationState DeclarationState(*Line, DeclarationScopeStack,
288  MustBeDeclaration);
289  if (Style.Language == FormatStyle::LK_TextProto)
290  parseBracedList();
291  else
292  parseLevel(/*HasOpeningBrace=*/false);
293  // Make sure to format the remaining tokens.
294  flushComments(true);
295  addUnwrappedLine();
296 }
297 
298 void UnwrappedLineParser::parseLevel(bool HasOpeningBrace) {
299  bool SwitchLabelEncountered = false;
300  do {
301  tok::TokenKind kind = FormatTok->Tok.getKind();
302  if (FormatTok->Type == TT_MacroBlockBegin) {
303  kind = tok::l_brace;
304  } else if (FormatTok->Type == TT_MacroBlockEnd) {
305  kind = tok::r_brace;
306  }
307 
308  switch (kind) {
309  case tok::comment:
310  nextToken();
311  addUnwrappedLine();
312  break;
313  case tok::l_brace:
314  // FIXME: Add parameter whether this can happen - if this happens, we must
315  // be in a non-declaration context.
316  if (!FormatTok->is(TT_MacroBlockBegin) && tryToParseBracedList())
317  continue;
318  parseBlock(/*MustBeDeclaration=*/false);
319  addUnwrappedLine();
320  break;
321  case tok::r_brace:
322  if (HasOpeningBrace)
323  return;
324  nextToken();
325  addUnwrappedLine();
326  break;
327  case tok::kw_default:
328  case tok::kw_case:
329  if (!SwitchLabelEncountered &&
330  (Style.IndentCaseLabels || (Line->InPPDirective && Line->Level == 1)))
331  ++Line->Level;
332  SwitchLabelEncountered = true;
333  parseStructuralElement();
334  break;
335  default:
336  parseStructuralElement();
337  break;
338  }
339  } while (!eof());
340 }
341 
342 void UnwrappedLineParser::calculateBraceTypes(bool ExpectClassBody) {
343  // We'll parse forward through the tokens until we hit
344  // a closing brace or eof - note that getNextToken() will
345  // parse macros, so this will magically work inside macro
346  // definitions, too.
347  unsigned StoredPosition = Tokens->getPosition();
348  FormatToken *Tok = FormatTok;
349  const FormatToken *PrevTok = getPreviousToken();
350  // Keep a stack of positions of lbrace tokens. We will
351  // update information about whether an lbrace starts a
352  // braced init list or a different block during the loop.
353  SmallVector<FormatToken *, 8> LBraceStack;
354  assert(Tok->Tok.is(tok::l_brace));
355  do {
356  // Get next non-comment token.
357  FormatToken *NextTok;
358  unsigned ReadTokens = 0;
359  do {
360  NextTok = Tokens->getNextToken();
361  ++ReadTokens;
362  } while (NextTok->is(tok::comment));
363 
364  switch (Tok->Tok.getKind()) {
365  case tok::l_brace:
366  if (Style.Language == FormatStyle::LK_JavaScript && PrevTok) {
367  if (PrevTok->is(tok::colon))
368  // A colon indicates this code is in a type, or a braced list
369  // following a label in an object literal ({a: {b: 1}}). The code
370  // below could be confused by semicolons between the individual
371  // members in a type member list, which would normally trigger
372  // BK_Block. In both cases, this must be parsed as an inline braced
373  // init.
374  Tok->BlockKind = BK_BracedInit;
375  else if (PrevTok->is(tok::r_paren))
376  // `) { }` can only occur in function or method declarations in JS.
377  Tok->BlockKind = BK_Block;
378  } else {
379  Tok->BlockKind = BK_Unknown;
380  }
381  LBraceStack.push_back(Tok);
382  break;
383  case tok::r_brace:
384  if (LBraceStack.empty())
385  break;
386  if (LBraceStack.back()->BlockKind == BK_Unknown) {
387  bool ProbablyBracedList = false;
388  if (Style.Language == FormatStyle::LK_Proto) {
389  ProbablyBracedList = NextTok->isOneOf(tok::comma, tok::r_square);
390  } else {
391  // Using OriginalColumn to distinguish between ObjC methods and
392  // binary operators is a bit hacky.
393  bool NextIsObjCMethod = NextTok->isOneOf(tok::plus, tok::minus) &&
394  NextTok->OriginalColumn == 0;
395 
396  // If there is a comma, semicolon or right paren after the closing
397  // brace, we assume this is a braced initializer list. Note that
398  // regardless how we mark inner braces here, we will overwrite the
399  // BlockKind later if we parse a braced list (where all blocks
400  // inside are by default braced lists), or when we explicitly detect
401  // blocks (for example while parsing lambdas).
402  // FIXME: Some of these do not apply to JS, e.g. "} {" can never be a
403  // braced list in JS.
404  ProbablyBracedList =
406  NextTok->isOneOf(Keywords.kw_of, Keywords.kw_in,
407  Keywords.kw_as)) ||
408  (Style.isCpp() && NextTok->is(tok::l_paren)) ||
409  NextTok->isOneOf(tok::comma, tok::period, tok::colon,
410  tok::r_paren, tok::r_square, tok::l_brace,
411  tok::l_square, tok::ellipsis) ||
412  (NextTok->is(tok::identifier) &&
413  !PrevTok->isOneOf(tok::semi, tok::r_brace, tok::l_brace)) ||
414  (NextTok->is(tok::semi) &&
415  (!ExpectClassBody || LBraceStack.size() != 1)) ||
416  (NextTok->isBinaryOperator() && !NextIsObjCMethod);
417  }
418  if (ProbablyBracedList) {
419  Tok->BlockKind = BK_BracedInit;
420  LBraceStack.back()->BlockKind = BK_BracedInit;
421  } else {
422  Tok->BlockKind = BK_Block;
423  LBraceStack.back()->BlockKind = BK_Block;
424  }
425  }
426  LBraceStack.pop_back();
427  break;
428  case tok::at:
429  case tok::semi:
430  case tok::kw_if:
431  case tok::kw_while:
432  case tok::kw_for:
433  case tok::kw_switch:
434  case tok::kw_try:
435  case tok::kw___try:
436  if (!LBraceStack.empty() && LBraceStack.back()->BlockKind == BK_Unknown)
437  LBraceStack.back()->BlockKind = BK_Block;
438  break;
439  default:
440  break;
441  }
442  PrevTok = Tok;
443  Tok = NextTok;
444  } while (Tok->Tok.isNot(tok::eof) && !LBraceStack.empty());
445 
446  // Assume other blocks for all unclosed opening braces.
447  for (unsigned i = 0, e = LBraceStack.size(); i != e; ++i) {
448  if (LBraceStack[i]->BlockKind == BK_Unknown)
449  LBraceStack[i]->BlockKind = BK_Block;
450  }
451 
452  FormatTok = Tokens->setPosition(StoredPosition);
453 }
454 
455 void UnwrappedLineParser::parseBlock(bool MustBeDeclaration, bool AddLevel,
456  bool MunchSemi) {
457  assert(FormatTok->isOneOf(tok::l_brace, TT_MacroBlockBegin) &&
458  "'{' or macro block token expected");
459  const bool MacroBlock = FormatTok->is(TT_MacroBlockBegin);
460  FormatTok->BlockKind = BK_Block;
461 
462  unsigned InitialLevel = Line->Level;
463  nextToken();
464 
465  if (MacroBlock && FormatTok->is(tok::l_paren))
466  parseParens();
467 
468  addUnwrappedLine();
469  size_t OpeningLineIndex = CurrentLines->empty()
471  : (CurrentLines->size() - 1);
472 
473  ScopedDeclarationState DeclarationState(*Line, DeclarationScopeStack,
474  MustBeDeclaration);
475  if (AddLevel)
476  ++Line->Level;
477  parseLevel(/*HasOpeningBrace=*/true);
478 
479  if (eof())
480  return;
481 
482  if (MacroBlock ? !FormatTok->is(TT_MacroBlockEnd)
483  : !FormatTok->is(tok::r_brace)) {
484  Line->Level = InitialLevel;
485  FormatTok->BlockKind = BK_Block;
486  return;
487  }
488 
489  nextToken(); // Munch the closing brace.
490 
491  if (MacroBlock && FormatTok->is(tok::l_paren))
492  parseParens();
493 
494  if (MunchSemi && FormatTok->Tok.is(tok::semi))
495  nextToken();
496  Line->Level = InitialLevel;
497  Line->MatchingOpeningBlockLineIndex = OpeningLineIndex;
498  if (OpeningLineIndex != UnwrappedLine::kInvalidIndex) {
499  // Update the opening line to add the forward reference as well
500  (*CurrentLines)[OpeningLineIndex].MatchingOpeningBlockLineIndex =
501  CurrentLines->size() - 1;
502  }
503 }
504 
505 static bool isGoogScope(const UnwrappedLine &Line) {
506  // FIXME: Closure-library specific stuff should not be hard-coded but be
507  // configurable.
508  if (Line.Tokens.size() < 4)
509  return false;
510  auto I = Line.Tokens.begin();
511  if (I->Tok->TokenText != "goog")
512  return false;
513  ++I;
514  if (I->Tok->isNot(tok::period))
515  return false;
516  ++I;
517  if (I->Tok->TokenText != "scope")
518  return false;
519  ++I;
520  return I->Tok->is(tok::l_paren);
521 }
522 
523 static bool isIIFE(const UnwrappedLine &Line,
524  const AdditionalKeywords &Keywords) {
525  // Look for the start of an immediately invoked anonymous function.
526  // https://en.wikipedia.org/wiki/Immediately-invoked_function_expression
527  // This is commonly done in JavaScript to create a new, anonymous scope.
528  // Example: (function() { ... })()
529  if (Line.Tokens.size() < 3)
530  return false;
531  auto I = Line.Tokens.begin();
532  if (I->Tok->isNot(tok::l_paren))
533  return false;
534  ++I;
535  if (I->Tok->isNot(Keywords.kw_function))
536  return false;
537  ++I;
538  return I->Tok->is(tok::l_paren);
539 }
540 
542  const FormatToken &InitialToken) {
543  if (InitialToken.is(tok::kw_namespace))
544  return Style.BraceWrapping.AfterNamespace;
545  if (InitialToken.is(tok::kw_class))
546  return Style.BraceWrapping.AfterClass;
547  if (InitialToken.is(tok::kw_union))
548  return Style.BraceWrapping.AfterUnion;
549  if (InitialToken.is(tok::kw_struct))
550  return Style.BraceWrapping.AfterStruct;
551  return false;
552 }
553 
554 void UnwrappedLineParser::parseChildBlock() {
555  FormatTok->BlockKind = BK_Block;
556  nextToken();
557  {
558  bool SkipIndent =
560  (isGoogScope(*Line) || isIIFE(*Line, Keywords)));
561  ScopedLineState LineState(*this);
562  ScopedDeclarationState DeclarationState(*Line, DeclarationScopeStack,
563  /*MustBeDeclaration=*/false);
564  Line->Level += SkipIndent ? 0 : 1;
565  parseLevel(/*HasOpeningBrace=*/true);
566  flushComments(isOnNewLine(*FormatTok));
567  Line->Level -= SkipIndent ? 0 : 1;
568  }
569  nextToken();
570 }
571 
572 void UnwrappedLineParser::parsePPDirective() {
573  assert(FormatTok->Tok.is(tok::hash) && "'#' expected");
574  ScopedMacroState MacroState(*Line, Tokens, FormatTok);
575  nextToken();
576 
577  if (!FormatTok->Tok.getIdentifierInfo()) {
578  parsePPUnknown();
579  return;
580  }
581 
582  switch (FormatTok->Tok.getIdentifierInfo()->getPPKeywordID()) {
583  case tok::pp_define:
584  parsePPDefine();
585  return;
586  case tok::pp_if:
587  parsePPIf(/*IfDef=*/false);
588  break;
589  case tok::pp_ifdef:
590  case tok::pp_ifndef:
591  parsePPIf(/*IfDef=*/true);
592  break;
593  case tok::pp_else:
594  parsePPElse();
595  break;
596  case tok::pp_elif:
597  parsePPElIf();
598  break;
599  case tok::pp_endif:
600  parsePPEndIf();
601  break;
602  default:
603  parsePPUnknown();
604  break;
605  }
606 }
607 
608 void UnwrappedLineParser::conditionalCompilationCondition(bool Unreachable) {
609  if (Unreachable || (!PPStack.empty() && PPStack.back() == PP_Unreachable))
610  PPStack.push_back(PP_Unreachable);
611  else
612  PPStack.push_back(PP_Conditional);
613 }
614 
615 void UnwrappedLineParser::conditionalCompilationStart(bool Unreachable) {
616  ++PPBranchLevel;
617  assert(PPBranchLevel >= 0 && PPBranchLevel <= (int)PPLevelBranchIndex.size());
618  if (PPBranchLevel == (int)PPLevelBranchIndex.size()) {
619  PPLevelBranchIndex.push_back(0);
620  PPLevelBranchCount.push_back(0);
621  }
622  PPChainBranchIndex.push(0);
623  bool Skip = PPLevelBranchIndex[PPBranchLevel] > 0;
624  conditionalCompilationCondition(Unreachable || Skip);
625 }
626 
627 void UnwrappedLineParser::conditionalCompilationAlternative() {
628  if (!PPStack.empty())
629  PPStack.pop_back();
630  assert(PPBranchLevel < (int)PPLevelBranchIndex.size());
631  if (!PPChainBranchIndex.empty())
632  ++PPChainBranchIndex.top();
633  conditionalCompilationCondition(
634  PPBranchLevel >= 0 && !PPChainBranchIndex.empty() &&
635  PPLevelBranchIndex[PPBranchLevel] != PPChainBranchIndex.top());
636 }
637 
638 void UnwrappedLineParser::conditionalCompilationEnd() {
639  assert(PPBranchLevel < (int)PPLevelBranchIndex.size());
640  if (PPBranchLevel >= 0 && !PPChainBranchIndex.empty()) {
641  if (PPChainBranchIndex.top() + 1 > PPLevelBranchCount[PPBranchLevel]) {
642  PPLevelBranchCount[PPBranchLevel] = PPChainBranchIndex.top() + 1;
643  }
644  }
645  // Guard against #endif's without #if.
646  if (PPBranchLevel > 0)
647  --PPBranchLevel;
648  if (!PPChainBranchIndex.empty())
649  PPChainBranchIndex.pop();
650  if (!PPStack.empty())
651  PPStack.pop_back();
652 }
653 
654 void UnwrappedLineParser::parsePPIf(bool IfDef) {
655  bool IfNDef = FormatTok->is(tok::pp_ifndef);
656  nextToken();
657  bool Unreachable = false;
658  if (!IfDef && (FormatTok->is(tok::kw_false) || FormatTok->TokenText == "0"))
659  Unreachable = true;
660  if (IfDef && !IfNDef && FormatTok->TokenText == "SWIG")
661  Unreachable = true;
662  conditionalCompilationStart(Unreachable);
663  parsePPUnknown();
664 }
665 
666 void UnwrappedLineParser::parsePPElse() {
667  conditionalCompilationAlternative();
668  parsePPUnknown();
669 }
670 
671 void UnwrappedLineParser::parsePPElIf() { parsePPElse(); }
672 
673 void UnwrappedLineParser::parsePPEndIf() {
674  conditionalCompilationEnd();
675  parsePPUnknown();
676 }
677 
678 void UnwrappedLineParser::parsePPDefine() {
679  nextToken();
680 
681  if (FormatTok->Tok.getKind() != tok::identifier) {
682  parsePPUnknown();
683  return;
684  }
685  nextToken();
686  if (FormatTok->Tok.getKind() == tok::l_paren &&
687  FormatTok->WhitespaceRange.getBegin() ==
688  FormatTok->WhitespaceRange.getEnd()) {
689  parseParens();
690  }
691  addUnwrappedLine();
692  Line->Level = 1;
693 
694  // Errors during a preprocessor directive can only affect the layout of the
695  // preprocessor directive, and thus we ignore them. An alternative approach
696  // would be to use the same approach we use on the file level (no
697  // re-indentation if there was a structural error) within the macro
698  // definition.
699  parseFile();
700 }
701 
702 void UnwrappedLineParser::parsePPUnknown() {
703  do {
704  nextToken();
705  } while (!eof());
706  addUnwrappedLine();
707 }
708 
709 // Here we blacklist certain tokens that are not usually the first token in an
710 // unwrapped line. This is used in attempt to distinguish macro calls without
711 // trailing semicolons from other constructs split to several lines.
712 static bool tokenCanStartNewLine(const clang::Token &Tok) {
713  // Semicolon can be a null-statement, l_square can be a start of a macro or
714  // a C++11 attribute, but this doesn't seem to be common.
715  return Tok.isNot(tok::semi) && Tok.isNot(tok::l_brace) &&
716  Tok.isNot(tok::l_square) &&
717  // Tokens that can only be used as binary operators and a part of
718  // overloaded operator names.
719  Tok.isNot(tok::period) && Tok.isNot(tok::periodstar) &&
720  Tok.isNot(tok::arrow) && Tok.isNot(tok::arrowstar) &&
721  Tok.isNot(tok::less) && Tok.isNot(tok::greater) &&
722  Tok.isNot(tok::slash) && Tok.isNot(tok::percent) &&
723  Tok.isNot(tok::lessless) && Tok.isNot(tok::greatergreater) &&
724  Tok.isNot(tok::equal) && Tok.isNot(tok::plusequal) &&
725  Tok.isNot(tok::minusequal) && Tok.isNot(tok::starequal) &&
726  Tok.isNot(tok::slashequal) && Tok.isNot(tok::percentequal) &&
727  Tok.isNot(tok::ampequal) && Tok.isNot(tok::pipeequal) &&
728  Tok.isNot(tok::caretequal) && Tok.isNot(tok::greatergreaterequal) &&
729  Tok.isNot(tok::lesslessequal) &&
730  // Colon is used in labels, base class lists, initializer lists,
731  // range-based for loops, ternary operator, but should never be the
732  // first token in an unwrapped line.
733  Tok.isNot(tok::colon) &&
734  // 'noexcept' is a trailing annotation.
735  Tok.isNot(tok::kw_noexcept);
736 }
737 
739  const FormatToken *FormatTok) {
740  // FIXME: This returns true for C/C++ keywords like 'struct'.
741  return FormatTok->is(tok::identifier) &&
742  (FormatTok->Tok.getIdentifierInfo() == nullptr ||
743  !FormatTok->isOneOf(
744  Keywords.kw_in, Keywords.kw_of, Keywords.kw_as, Keywords.kw_async,
745  Keywords.kw_await, Keywords.kw_yield, Keywords.kw_finally,
746  Keywords.kw_function, Keywords.kw_import, Keywords.kw_is,
747  Keywords.kw_let, Keywords.kw_var, tok::kw_const,
748  Keywords.kw_abstract, Keywords.kw_extends, Keywords.kw_implements,
749  Keywords.kw_instanceof, Keywords.kw_interface,
750  Keywords.kw_throws, Keywords.kw_from));
751 }
752 
754  const FormatToken *FormatTok) {
755  return FormatTok->Tok.isLiteral() ||
756  FormatTok->isOneOf(tok::kw_true, tok::kw_false) ||
757  mustBeJSIdent(Keywords, FormatTok);
758 }
759 
760 // isJSDeclOrStmt returns true if |FormatTok| starts a declaration or statement
761 // when encountered after a value (see mustBeJSIdentOrValue).
763  const FormatToken *FormatTok) {
764  return FormatTok->isOneOf(
765  tok::kw_return, Keywords.kw_yield,
766  // conditionals
767  tok::kw_if, tok::kw_else,
768  // loops
769  tok::kw_for, tok::kw_while, tok::kw_do, tok::kw_continue, tok::kw_break,
770  // switch/case
771  tok::kw_switch, tok::kw_case,
772  // exceptions
773  tok::kw_throw, tok::kw_try, tok::kw_catch, Keywords.kw_finally,
774  // declaration
775  tok::kw_const, tok::kw_class, Keywords.kw_var, Keywords.kw_let,
776  Keywords.kw_async, Keywords.kw_function,
777  // import/export
778  Keywords.kw_import, tok::kw_export);
779 }
780 
781 // readTokenWithJavaScriptASI reads the next token and terminates the current
782 // line if JavaScript Automatic Semicolon Insertion must
783 // happen between the current token and the next token.
784 //
785 // This method is conservative - it cannot cover all edge cases of JavaScript,
786 // but only aims to correctly handle certain well known cases. It *must not*
787 // return true in speculative cases.
788 void UnwrappedLineParser::readTokenWithJavaScriptASI() {
789  FormatToken *Previous = FormatTok;
790  readToken();
791  FormatToken *Next = FormatTok;
792 
793  bool IsOnSameLine =
794  CommentsBeforeNextToken.empty()
795  ? Next->NewlinesBefore == 0
796  : CommentsBeforeNextToken.front()->NewlinesBefore == 0;
797  if (IsOnSameLine)
798  return;
799 
800  bool PreviousMustBeValue = mustBeJSIdentOrValue(Keywords, Previous);
801  bool PreviousStartsTemplateExpr =
802  Previous->is(TT_TemplateString) && Previous->TokenText.endswith("${");
803  if (PreviousMustBeValue && Line && Line->Tokens.size() > 1) {
804  // If the token before the previous one is an '@', the previous token is an
805  // annotation and can precede another identifier/value.
806  const FormatToken *PrePrevious = std::prev(Line->Tokens.end(), 2)->Tok;
807  if (PrePrevious->is(tok::at))
808  return;
809  }
810  if (Next->is(tok::exclaim) && PreviousMustBeValue)
811  return addUnwrappedLine();
812  bool NextMustBeValue = mustBeJSIdentOrValue(Keywords, Next);
813  bool NextEndsTemplateExpr =
814  Next->is(TT_TemplateString) && Next->TokenText.startswith("}");
815  if (NextMustBeValue && !NextEndsTemplateExpr && !PreviousStartsTemplateExpr &&
816  (PreviousMustBeValue ||
817  Previous->isOneOf(tok::r_square, tok::r_paren, tok::plusplus,
818  tok::minusminus)))
819  return addUnwrappedLine();
820  if (PreviousMustBeValue && isJSDeclOrStmt(Keywords, Next))
821  return addUnwrappedLine();
822 }
823 
824 void UnwrappedLineParser::parseStructuralElement() {
825  assert(!FormatTok->is(tok::l_brace));
826  if (Style.Language == FormatStyle::LK_TableGen &&
827  FormatTok->is(tok::pp_include)) {
828  nextToken();
829  if (FormatTok->is(tok::string_literal))
830  nextToken();
831  addUnwrappedLine();
832  return;
833  }
834  switch (FormatTok->Tok.getKind()) {
835  case tok::at:
836  nextToken();
837  if (FormatTok->Tok.is(tok::l_brace)) {
838  nextToken();
839  parseBracedList();
840  break;
841  }
842  switch (FormatTok->Tok.getObjCKeywordID()) {
843  case tok::objc_public:
844  case tok::objc_protected:
845  case tok::objc_package:
846  case tok::objc_private:
847  return parseAccessSpecifier();
848  case tok::objc_interface:
849  case tok::objc_implementation:
850  return parseObjCInterfaceOrImplementation();
851  case tok::objc_protocol:
852  return parseObjCProtocol();
853  case tok::objc_end:
854  return; // Handled by the caller.
855  case tok::objc_optional:
856  case tok::objc_required:
857  nextToken();
858  addUnwrappedLine();
859  return;
860  case tok::objc_autoreleasepool:
861  nextToken();
862  if (FormatTok->Tok.is(tok::l_brace)) {
864  addUnwrappedLine();
865  parseBlock(/*MustBeDeclaration=*/false);
866  }
867  addUnwrappedLine();
868  return;
869  case tok::objc_try:
870  // This branch isn't strictly necessary (the kw_try case below would
871  // do this too after the tok::at is parsed above). But be explicit.
872  parseTryCatch();
873  return;
874  default:
875  break;
876  }
877  break;
878  case tok::kw_asm:
879  nextToken();
880  if (FormatTok->is(tok::l_brace)) {
881  FormatTok->Type = TT_InlineASMBrace;
882  nextToken();
883  while (FormatTok && FormatTok->isNot(tok::eof)) {
884  if (FormatTok->is(tok::r_brace)) {
885  FormatTok->Type = TT_InlineASMBrace;
886  nextToken();
887  addUnwrappedLine();
888  break;
889  }
890  FormatTok->Finalized = true;
891  nextToken();
892  }
893  }
894  break;
895  case tok::kw_namespace:
896  parseNamespace();
897  return;
898  case tok::kw_inline:
899  nextToken();
900  if (FormatTok->Tok.is(tok::kw_namespace)) {
901  parseNamespace();
902  return;
903  }
904  break;
905  case tok::kw_public:
906  case tok::kw_protected:
907  case tok::kw_private:
908  if (Style.Language == FormatStyle::LK_Java ||
910  nextToken();
911  else
912  parseAccessSpecifier();
913  return;
914  case tok::kw_if:
915  parseIfThenElse();
916  return;
917  case tok::kw_for:
918  case tok::kw_while:
919  parseForOrWhileLoop();
920  return;
921  case tok::kw_do:
922  parseDoWhile();
923  return;
924  case tok::kw_switch:
925  parseSwitch();
926  return;
927  case tok::kw_default:
928  nextToken();
929  parseLabel();
930  return;
931  case tok::kw_case:
932  parseCaseLabel();
933  return;
934  case tok::kw_try:
935  case tok::kw___try:
936  parseTryCatch();
937  return;
938  case tok::kw_extern:
939  nextToken();
940  if (FormatTok->Tok.is(tok::string_literal)) {
941  nextToken();
942  if (FormatTok->Tok.is(tok::l_brace)) {
943  parseBlock(/*MustBeDeclaration=*/true, /*AddLevel=*/false);
944  addUnwrappedLine();
945  return;
946  }
947  }
948  break;
949  case tok::kw_export:
950  if (Style.Language == FormatStyle::LK_JavaScript) {
951  parseJavaScriptEs6ImportExport();
952  return;
953  }
954  break;
955  case tok::identifier:
956  if (FormatTok->is(TT_ForEachMacro)) {
957  parseForOrWhileLoop();
958  return;
959  }
960  if (FormatTok->is(TT_MacroBlockBegin)) {
961  parseBlock(/*MustBeDeclaration=*/false, /*AddLevel=*/true,
962  /*MunchSemi=*/false);
963  return;
964  }
965  if (FormatTok->is(Keywords.kw_import)) {
966  if (Style.Language == FormatStyle::LK_JavaScript) {
967  parseJavaScriptEs6ImportExport();
968  return;
969  }
970  if (Style.Language == FormatStyle::LK_Proto) {
971  nextToken();
972  if (FormatTok->is(tok::kw_public))
973  nextToken();
974  if (!FormatTok->is(tok::string_literal))
975  return;
976  nextToken();
977  if (FormatTok->is(tok::semi))
978  nextToken();
979  addUnwrappedLine();
980  return;
981  }
982  }
983  if (Style.isCpp() &&
984  FormatTok->isOneOf(Keywords.kw_signals, Keywords.kw_qsignals,
985  Keywords.kw_slots, Keywords.kw_qslots)) {
986  nextToken();
987  if (FormatTok->is(tok::colon)) {
988  nextToken();
989  addUnwrappedLine();
990  return;
991  }
992  }
993  // In all other cases, parse the declaration.
994  break;
995  default:
996  break;
997  }
998  do {
999  const FormatToken *Previous = getPreviousToken();
1000  switch (FormatTok->Tok.getKind()) {
1001  case tok::at:
1002  nextToken();
1003  if (FormatTok->Tok.is(tok::l_brace)) {
1004  nextToken();
1005  parseBracedList();
1006  }
1007  break;
1008  case tok::kw_enum:
1009  // Ignore if this is part of "template <enum ...".
1010  if (Previous && Previous->is(tok::less)) {
1011  nextToken();
1012  break;
1013  }
1014 
1015  // parseEnum falls through and does not yet add an unwrapped line as an
1016  // enum definition can start a structural element.
1017  if (!parseEnum())
1018  break;
1019  // This only applies for C++.
1020  if (!Style.isCpp()) {
1021  addUnwrappedLine();
1022  return;
1023  }
1024  break;
1025  case tok::kw_typedef:
1026  nextToken();
1027  if (FormatTok->isOneOf(Keywords.kw_NS_ENUM, Keywords.kw_NS_OPTIONS,
1028  Keywords.kw_CF_ENUM, Keywords.kw_CF_OPTIONS))
1029  parseEnum();
1030  break;
1031  case tok::kw_struct:
1032  case tok::kw_union:
1033  case tok::kw_class:
1034  // parseRecord falls through and does not yet add an unwrapped line as a
1035  // record declaration or definition can start a structural element.
1036  parseRecord();
1037  // This does not apply for Java and JavaScript.
1038  if (Style.Language == FormatStyle::LK_Java ||
1040  if (FormatTok->is(tok::semi))
1041  nextToken();
1042  addUnwrappedLine();
1043  return;
1044  }
1045  break;
1046  case tok::period:
1047  nextToken();
1048  // In Java, classes have an implicit static member "class".
1049  if (Style.Language == FormatStyle::LK_Java && FormatTok &&
1050  FormatTok->is(tok::kw_class))
1051  nextToken();
1052  if (Style.Language == FormatStyle::LK_JavaScript && FormatTok &&
1053  FormatTok->Tok.getIdentifierInfo())
1054  // JavaScript only has pseudo keywords, all keywords are allowed to
1055  // appear in "IdentifierName" positions. See http://es5.github.io/#x7.6
1056  nextToken();
1057  break;
1058  case tok::semi:
1059  nextToken();
1060  addUnwrappedLine();
1061  return;
1062  case tok::r_brace:
1063  addUnwrappedLine();
1064  return;
1065  case tok::l_paren:
1066  parseParens();
1067  break;
1068  case tok::kw_operator:
1069  nextToken();
1070  if (FormatTok->isBinaryOperator())
1071  nextToken();
1072  break;
1073  case tok::caret:
1074  nextToken();
1075  if (FormatTok->Tok.isAnyIdentifier() ||
1076  FormatTok->isSimpleTypeSpecifier())
1077  nextToken();
1078  if (FormatTok->is(tok::l_paren))
1079  parseParens();
1080  if (FormatTok->is(tok::l_brace))
1081  parseChildBlock();
1082  break;
1083  case tok::l_brace:
1084  if (!tryToParseBracedList()) {
1085  // A block outside of parentheses must be the last part of a
1086  // structural element.
1087  // FIXME: Figure out cases where this is not true, and add projections
1088  // for them (the one we know is missing are lambdas).
1089  if (Style.BraceWrapping.AfterFunction)
1090  addUnwrappedLine();
1091  FormatTok->Type = TT_FunctionLBrace;
1092  parseBlock(/*MustBeDeclaration=*/false);
1093  addUnwrappedLine();
1094  return;
1095  }
1096  // Otherwise this was a braced init list, and the structural
1097  // element continues.
1098  break;
1099  case tok::kw_try:
1100  // We arrive here when parsing function-try blocks.
1101  parseTryCatch();
1102  return;
1103  case tok::identifier: {
1104  if (FormatTok->is(TT_MacroBlockEnd)) {
1105  addUnwrappedLine();
1106  return;
1107  }
1108 
1109  // Function declarations (as opposed to function expressions) are parsed
1110  // on their own unwrapped line by continuing this loop. Function
1111  // expressions (functions that are not on their own line) must not create
1112  // a new unwrapped line, so they are special cased below.
1113  size_t TokenCount = Line->Tokens.size();
1114  if (Style.Language == FormatStyle::LK_JavaScript &&
1115  FormatTok->is(Keywords.kw_function) &&
1116  (TokenCount > 1 || (TokenCount == 1 && !Line->Tokens.front().Tok->is(
1117  Keywords.kw_async)))) {
1118  tryToParseJSFunction();
1119  break;
1120  }
1121  if ((Style.Language == FormatStyle::LK_JavaScript ||
1122  Style.Language == FormatStyle::LK_Java) &&
1123  FormatTok->is(Keywords.kw_interface)) {
1124  if (Style.Language == FormatStyle::LK_JavaScript) {
1125  // In JavaScript/TypeScript, "interface" can be used as a standalone
1126  // identifier, e.g. in `var interface = 1;`. If "interface" is
1127  // followed by another identifier, it is very like to be an actual
1128  // interface declaration.
1129  unsigned StoredPosition = Tokens->getPosition();
1130  FormatToken *Next = Tokens->getNextToken();
1131  FormatTok = Tokens->setPosition(StoredPosition);
1132  if (Next && !mustBeJSIdent(Keywords, Next)) {
1133  nextToken();
1134  break;
1135  }
1136  }
1137  parseRecord();
1138  addUnwrappedLine();
1139  return;
1140  }
1141 
1142  // See if the following token should start a new unwrapped line.
1143  StringRef Text = FormatTok->TokenText;
1144  nextToken();
1145  if (Line->Tokens.size() == 1 &&
1146  // JS doesn't have macros, and within classes colons indicate fields,
1147  // not labels.
1149  if (FormatTok->Tok.is(tok::colon) && !Line->MustBeDeclaration) {
1150  Line->Tokens.begin()->Tok->MustBreakBefore = true;
1151  parseLabel();
1152  return;
1153  }
1154  // Recognize function-like macro usages without trailing semicolon as
1155  // well as free-standing macros like Q_OBJECT.
1156  bool FunctionLike = FormatTok->is(tok::l_paren);
1157  if (FunctionLike)
1158  parseParens();
1159 
1160  bool FollowedByNewline =
1161  CommentsBeforeNextToken.empty()
1162  ? FormatTok->NewlinesBefore > 0
1163  : CommentsBeforeNextToken.front()->NewlinesBefore > 0;
1164 
1165  if (FollowedByNewline && (Text.size() >= 5 || FunctionLike) &&
1166  tokenCanStartNewLine(FormatTok->Tok) && Text == Text.upper()) {
1167  addUnwrappedLine();
1168  return;
1169  }
1170  }
1171  break;
1172  }
1173  case tok::equal:
1174  // Fat arrows (=>) have tok::TokenKind tok::equal but TokenType
1175  // TT_JsFatArrow. The always start an expression or a child block if
1176  // followed by a curly.
1177  if (FormatTok->is(TT_JsFatArrow)) {
1178  nextToken();
1179  if (FormatTok->is(tok::l_brace))
1180  parseChildBlock();
1181  break;
1182  }
1183 
1184  nextToken();
1185  if (FormatTok->Tok.is(tok::l_brace)) {
1186  nextToken();
1187  parseBracedList();
1188  } else if (Style.Language == FormatStyle::LK_Proto &&
1189  FormatTok->Tok.is(tok::less)) {
1190  nextToken();
1191  parseBracedList(/*ContinueOnSemicolons=*/false,
1192  /*ClosingBraceKind=*/tok::greater);
1193  }
1194  break;
1195  case tok::l_square:
1196  parseSquare();
1197  break;
1198  case tok::kw_new:
1199  parseNew();
1200  break;
1201  default:
1202  nextToken();
1203  break;
1204  }
1205  } while (!eof());
1206 }
1207 
1208 bool UnwrappedLineParser::tryToParseLambda() {
1209  if (!Style.isCpp()) {
1210  nextToken();
1211  return false;
1212  }
1213  const FormatToken* Previous = getPreviousToken();
1214  if (Previous &&
1215  (Previous->isOneOf(tok::identifier, tok::kw_operator, tok::kw_new,
1216  tok::kw_delete) ||
1217  Previous->closesScope() || Previous->isSimpleTypeSpecifier())) {
1218  nextToken();
1219  return false;
1220  }
1221  assert(FormatTok->is(tok::l_square));
1222  FormatToken &LSquare = *FormatTok;
1223  if (!tryToParseLambdaIntroducer())
1224  return false;
1225 
1226  while (FormatTok->isNot(tok::l_brace)) {
1227  if (FormatTok->isSimpleTypeSpecifier()) {
1228  nextToken();
1229  continue;
1230  }
1231  switch (FormatTok->Tok.getKind()) {
1232  case tok::l_brace:
1233  break;
1234  case tok::l_paren:
1235  parseParens();
1236  break;
1237  case tok::amp:
1238  case tok::star:
1239  case tok::kw_const:
1240  case tok::comma:
1241  case tok::less:
1242  case tok::greater:
1243  case tok::identifier:
1244  case tok::numeric_constant:
1245  case tok::coloncolon:
1246  case tok::kw_mutable:
1247  nextToken();
1248  break;
1249  case tok::arrow:
1250  FormatTok->Type = TT_LambdaArrow;
1251  nextToken();
1252  break;
1253  default:
1254  return true;
1255  }
1256  }
1257  LSquare.Type = TT_LambdaLSquare;
1258  parseChildBlock();
1259  return true;
1260 }
1261 
1262 bool UnwrappedLineParser::tryToParseLambdaIntroducer() {
1263  nextToken();
1264  if (FormatTok->is(tok::equal)) {
1265  nextToken();
1266  if (FormatTok->is(tok::r_square)) {
1267  nextToken();
1268  return true;
1269  }
1270  if (FormatTok->isNot(tok::comma))
1271  return false;
1272  nextToken();
1273  } else if (FormatTok->is(tok::amp)) {
1274  nextToken();
1275  if (FormatTok->is(tok::r_square)) {
1276  nextToken();
1277  return true;
1278  }
1279  if (!FormatTok->isOneOf(tok::comma, tok::identifier)) {
1280  return false;
1281  }
1282  if (FormatTok->is(tok::comma))
1283  nextToken();
1284  } else if (FormatTok->is(tok::r_square)) {
1285  nextToken();
1286  return true;
1287  }
1288  do {
1289  if (FormatTok->is(tok::amp))
1290  nextToken();
1291  if (!FormatTok->isOneOf(tok::identifier, tok::kw_this))
1292  return false;
1293  nextToken();
1294  if (FormatTok->is(tok::ellipsis))
1295  nextToken();
1296  if (FormatTok->is(tok::comma)) {
1297  nextToken();
1298  } else if (FormatTok->is(tok::r_square)) {
1299  nextToken();
1300  return true;
1301  } else {
1302  return false;
1303  }
1304  } while (!eof());
1305  return false;
1306 }
1307 
1308 void UnwrappedLineParser::tryToParseJSFunction() {
1309  assert(FormatTok->is(Keywords.kw_function) ||
1310  FormatTok->startsSequence(Keywords.kw_async, Keywords.kw_function));
1311  if (FormatTok->is(Keywords.kw_async))
1312  nextToken();
1313  // Consume "function".
1314  nextToken();
1315 
1316  // Consume * (generator function). Treat it like C++'s overloaded operators.
1317  if (FormatTok->is(tok::star)) {
1318  FormatTok->Type = TT_OverloadedOperator;
1319  nextToken();
1320  }
1321 
1322  // Consume function name.
1323  if (FormatTok->is(tok::identifier))
1324  nextToken();
1325 
1326  if (FormatTok->isNot(tok::l_paren))
1327  return;
1328 
1329  // Parse formal parameter list.
1330  parseParens();
1331 
1332  if (FormatTok->is(tok::colon)) {
1333  // Parse a type definition.
1334  nextToken();
1335 
1336  // Eat the type declaration. For braced inline object types, balance braces,
1337  // otherwise just parse until finding an l_brace for the function body.
1338  if (FormatTok->is(tok::l_brace))
1339  tryToParseBracedList();
1340  else
1341  while (!FormatTok->isOneOf(tok::l_brace, tok::semi) && !eof())
1342  nextToken();
1343  }
1344 
1345  if (FormatTok->is(tok::semi))
1346  return;
1347 
1348  parseChildBlock();
1349 }
1350 
1351 bool UnwrappedLineParser::tryToParseBracedList() {
1352  if (FormatTok->BlockKind == BK_Unknown)
1353  calculateBraceTypes();
1354  assert(FormatTok->BlockKind != BK_Unknown);
1355  if (FormatTok->BlockKind == BK_Block)
1356  return false;
1357  nextToken();
1358  parseBracedList();
1359  return true;
1360 }
1361 
1362 bool UnwrappedLineParser::parseBracedList(bool ContinueOnSemicolons,
1363  tok::TokenKind ClosingBraceKind) {
1364  bool HasError = false;
1365 
1366  // FIXME: Once we have an expression parser in the UnwrappedLineParser,
1367  // replace this by using parseAssigmentExpression() inside.
1368  do {
1369  if (Style.Language == FormatStyle::LK_JavaScript) {
1370  if (FormatTok->is(Keywords.kw_function) ||
1371  FormatTok->startsSequence(Keywords.kw_async, Keywords.kw_function)) {
1372  tryToParseJSFunction();
1373  continue;
1374  }
1375  if (FormatTok->is(TT_JsFatArrow)) {
1376  nextToken();
1377  // Fat arrows can be followed by simple expressions or by child blocks
1378  // in curly braces.
1379  if (FormatTok->is(tok::l_brace)) {
1380  parseChildBlock();
1381  continue;
1382  }
1383  }
1384  if (FormatTok->is(tok::l_brace)) {
1385  // Could be a method inside of a braced list `{a() { return 1; }}`.
1386  if (tryToParseBracedList())
1387  continue;
1388  parseChildBlock();
1389  }
1390  }
1391  if (FormatTok->Tok.getKind() == ClosingBraceKind) {
1392  nextToken();
1393  return !HasError;
1394  }
1395  switch (FormatTok->Tok.getKind()) {
1396  case tok::caret:
1397  nextToken();
1398  if (FormatTok->is(tok::l_brace)) {
1399  parseChildBlock();
1400  }
1401  break;
1402  case tok::l_square:
1403  tryToParseLambda();
1404  break;
1405  case tok::l_paren:
1406  parseParens();
1407  // JavaScript can just have free standing methods and getters/setters in
1408  // object literals. Detect them by a "{" following ")".
1409  if (Style.Language == FormatStyle::LK_JavaScript) {
1410  if (FormatTok->is(tok::l_brace))
1411  parseChildBlock();
1412  break;
1413  }
1414  break;
1415  case tok::l_brace:
1416  // Assume there are no blocks inside a braced init list apart
1417  // from the ones we explicitly parse out (like lambdas).
1418  FormatTok->BlockKind = BK_BracedInit;
1419  nextToken();
1420  parseBracedList();
1421  break;
1422  case tok::semi:
1423  // JavaScript (or more precisely TypeScript) can have semicolons in braced
1424  // lists (in so-called TypeMemberLists). Thus, the semicolon cannot be
1425  // used for error recovery if we have otherwise determined that this is
1426  // a braced list.
1427  if (Style.Language == FormatStyle::LK_JavaScript) {
1428  nextToken();
1429  break;
1430  }
1431  HasError = true;
1432  if (!ContinueOnSemicolons)
1433  return !HasError;
1434  nextToken();
1435  break;
1436  case tok::comma:
1437  nextToken();
1438  break;
1439  default:
1440  nextToken();
1441  break;
1442  }
1443  } while (!eof());
1444  return false;
1445 }
1446 
1447 void UnwrappedLineParser::parseParens() {
1448  assert(FormatTok->Tok.is(tok::l_paren) && "'(' expected.");
1449  nextToken();
1450  do {
1451  switch (FormatTok->Tok.getKind()) {
1452  case tok::l_paren:
1453  parseParens();
1454  if (Style.Language == FormatStyle::LK_Java && FormatTok->is(tok::l_brace))
1455  parseChildBlock();
1456  break;
1457  case tok::r_paren:
1458  nextToken();
1459  return;
1460  case tok::r_brace:
1461  // A "}" inside parenthesis is an error if there wasn't a matching "{".
1462  return;
1463  case tok::l_square:
1464  tryToParseLambda();
1465  break;
1466  case tok::l_brace:
1467  if (!tryToParseBracedList())
1468  parseChildBlock();
1469  break;
1470  case tok::at:
1471  nextToken();
1472  if (FormatTok->Tok.is(tok::l_brace)) {
1473  nextToken();
1474  parseBracedList();
1475  }
1476  break;
1477  case tok::kw_class:
1478  if (Style.Language == FormatStyle::LK_JavaScript)
1479  parseRecord(/*ParseAsExpr=*/true);
1480  else
1481  nextToken();
1482  break;
1483  case tok::identifier:
1484  if (Style.Language == FormatStyle::LK_JavaScript &&
1485  (FormatTok->is(Keywords.kw_function) ||
1486  FormatTok->startsSequence(Keywords.kw_async, Keywords.kw_function)))
1487  tryToParseJSFunction();
1488  else
1489  nextToken();
1490  break;
1491  default:
1492  nextToken();
1493  break;
1494  }
1495  } while (!eof());
1496 }
1497 
1498 void UnwrappedLineParser::parseSquare() {
1499  assert(FormatTok->Tok.is(tok::l_square) && "'[' expected.");
1500  if (tryToParseLambda())
1501  return;
1502  do {
1503  switch (FormatTok->Tok.getKind()) {
1504  case tok::l_paren:
1505  parseParens();
1506  break;
1507  case tok::r_square:
1508  nextToken();
1509  return;
1510  case tok::r_brace:
1511  // A "}" inside parenthesis is an error if there wasn't a matching "{".
1512  return;
1513  case tok::l_square:
1514  parseSquare();
1515  break;
1516  case tok::l_brace: {
1517  if (!tryToParseBracedList())
1518  parseChildBlock();
1519  break;
1520  }
1521  case tok::at:
1522  nextToken();
1523  if (FormatTok->Tok.is(tok::l_brace)) {
1524  nextToken();
1525  parseBracedList();
1526  }
1527  break;
1528  default:
1529  nextToken();
1530  break;
1531  }
1532  } while (!eof());
1533 }
1534 
1535 void UnwrappedLineParser::parseIfThenElse() {
1536  assert(FormatTok->Tok.is(tok::kw_if) && "'if' expected");
1537  nextToken();
1538  if (FormatTok->Tok.is(tok::kw_constexpr))
1539  nextToken();
1540  if (FormatTok->Tok.is(tok::l_paren))
1541  parseParens();
1542  bool NeedsUnwrappedLine = false;
1543  if (FormatTok->Tok.is(tok::l_brace)) {
1544  CompoundStatementIndenter Indenter(this, Style, Line->Level);
1545  parseBlock(/*MustBeDeclaration=*/false);
1546  if (Style.BraceWrapping.BeforeElse)
1547  addUnwrappedLine();
1548  else
1549  NeedsUnwrappedLine = true;
1550  } else {
1551  addUnwrappedLine();
1552  ++Line->Level;
1553  parseStructuralElement();
1554  --Line->Level;
1555  }
1556  if (FormatTok->Tok.is(tok::kw_else)) {
1557  nextToken();
1558  if (FormatTok->Tok.is(tok::l_brace)) {
1559  CompoundStatementIndenter Indenter(this, Style, Line->Level);
1560  parseBlock(/*MustBeDeclaration=*/false);
1561  addUnwrappedLine();
1562  } else if (FormatTok->Tok.is(tok::kw_if)) {
1563  parseIfThenElse();
1564  } else {
1565  addUnwrappedLine();
1566  ++Line->Level;
1567  parseStructuralElement();
1568  if (FormatTok->is(tok::eof))
1569  addUnwrappedLine();
1570  --Line->Level;
1571  }
1572  } else if (NeedsUnwrappedLine) {
1573  addUnwrappedLine();
1574  }
1575 }
1576 
1577 void UnwrappedLineParser::parseTryCatch() {
1578  assert(FormatTok->isOneOf(tok::kw_try, tok::kw___try) && "'try' expected");
1579  nextToken();
1580  bool NeedsUnwrappedLine = false;
1581  if (FormatTok->is(tok::colon)) {
1582  // We are in a function try block, what comes is an initializer list.
1583  nextToken();
1584  while (FormatTok->is(tok::identifier)) {
1585  nextToken();
1586  if (FormatTok->is(tok::l_paren))
1587  parseParens();
1588  if (FormatTok->is(tok::comma))
1589  nextToken();
1590  }
1591  }
1592  // Parse try with resource.
1593  if (Style.Language == FormatStyle::LK_Java && FormatTok->is(tok::l_paren)) {
1594  parseParens();
1595  }
1596  if (FormatTok->is(tok::l_brace)) {
1597  CompoundStatementIndenter Indenter(this, Style, Line->Level);
1598  parseBlock(/*MustBeDeclaration=*/false);
1599  if (Style.BraceWrapping.BeforeCatch) {
1600  addUnwrappedLine();
1601  } else {
1602  NeedsUnwrappedLine = true;
1603  }
1604  } else if (!FormatTok->is(tok::kw_catch)) {
1605  // The C++ standard requires a compound-statement after a try.
1606  // If there's none, we try to assume there's a structuralElement
1607  // and try to continue.
1608  addUnwrappedLine();
1609  ++Line->Level;
1610  parseStructuralElement();
1611  --Line->Level;
1612  }
1613  while (1) {
1614  if (FormatTok->is(tok::at))
1615  nextToken();
1616  if (!(FormatTok->isOneOf(tok::kw_catch, Keywords.kw___except,
1617  tok::kw___finally) ||
1618  ((Style.Language == FormatStyle::LK_Java ||
1620  FormatTok->is(Keywords.kw_finally)) ||
1621  (FormatTok->Tok.isObjCAtKeyword(tok::objc_catch) ||
1622  FormatTok->Tok.isObjCAtKeyword(tok::objc_finally))))
1623  break;
1624  nextToken();
1625  while (FormatTok->isNot(tok::l_brace)) {
1626  if (FormatTok->is(tok::l_paren)) {
1627  parseParens();
1628  continue;
1629  }
1630  if (FormatTok->isOneOf(tok::semi, tok::r_brace, tok::eof))
1631  return;
1632  nextToken();
1633  }
1634  NeedsUnwrappedLine = false;
1635  CompoundStatementIndenter Indenter(this, Style, Line->Level);
1636  parseBlock(/*MustBeDeclaration=*/false);
1637  if (Style.BraceWrapping.BeforeCatch)
1638  addUnwrappedLine();
1639  else
1640  NeedsUnwrappedLine = true;
1641  }
1642  if (NeedsUnwrappedLine)
1643  addUnwrappedLine();
1644 }
1645 
1646 void UnwrappedLineParser::parseNamespace() {
1647  assert(FormatTok->Tok.is(tok::kw_namespace) && "'namespace' expected");
1648 
1649  const FormatToken &InitialToken = *FormatTok;
1650  nextToken();
1651  while (FormatTok->isOneOf(tok::identifier, tok::coloncolon))
1652  nextToken();
1653  if (FormatTok->Tok.is(tok::l_brace)) {
1654  if (ShouldBreakBeforeBrace(Style, InitialToken))
1655  addUnwrappedLine();
1656 
1657  bool AddLevel = Style.NamespaceIndentation == FormatStyle::NI_All ||
1659  DeclarationScopeStack.size() > 1);
1660  parseBlock(/*MustBeDeclaration=*/true, AddLevel);
1661  // Munch the semicolon after a namespace. This is more common than one would
1662  // think. Puttin the semicolon into its own line is very ugly.
1663  if (FormatTok->Tok.is(tok::semi))
1664  nextToken();
1665  addUnwrappedLine();
1666  }
1667  // FIXME: Add error handling.
1668 }
1669 
1670 void UnwrappedLineParser::parseNew() {
1671  assert(FormatTok->is(tok::kw_new) && "'new' expected");
1672  nextToken();
1673  if (Style.Language != FormatStyle::LK_Java)
1674  return;
1675 
1676  // In Java, we can parse everything up to the parens, which aren't optional.
1677  do {
1678  // There should not be a ;, { or } before the new's open paren.
1679  if (FormatTok->isOneOf(tok::semi, tok::l_brace, tok::r_brace))
1680  return;
1681 
1682  // Consume the parens.
1683  if (FormatTok->is(tok::l_paren)) {
1684  parseParens();
1685 
1686  // If there is a class body of an anonymous class, consume that as child.
1687  if (FormatTok->is(tok::l_brace))
1688  parseChildBlock();
1689  return;
1690  }
1691  nextToken();
1692  } while (!eof());
1693 }
1694 
1695 void UnwrappedLineParser::parseForOrWhileLoop() {
1696  assert(FormatTok->isOneOf(tok::kw_for, tok::kw_while, TT_ForEachMacro) &&
1697  "'for', 'while' or foreach macro expected");
1698  nextToken();
1699  // JS' for await ( ...
1700  if (Style.Language == FormatStyle::LK_JavaScript &&
1701  FormatTok->is(Keywords.kw_await))
1702  nextToken();
1703  if (FormatTok->Tok.is(tok::l_paren))
1704  parseParens();
1705  if (FormatTok->Tok.is(tok::l_brace)) {
1706  CompoundStatementIndenter Indenter(this, Style, Line->Level);
1707  parseBlock(/*MustBeDeclaration=*/false);
1708  addUnwrappedLine();
1709  } else {
1710  addUnwrappedLine();
1711  ++Line->Level;
1712  parseStructuralElement();
1713  --Line->Level;
1714  }
1715 }
1716 
1717 void UnwrappedLineParser::parseDoWhile() {
1718  assert(FormatTok->Tok.is(tok::kw_do) && "'do' expected");
1719  nextToken();
1720  if (FormatTok->Tok.is(tok::l_brace)) {
1721  CompoundStatementIndenter Indenter(this, Style, Line->Level);
1722  parseBlock(/*MustBeDeclaration=*/false);
1723  if (Style.BraceWrapping.IndentBraces)
1724  addUnwrappedLine();
1725  } else {
1726  addUnwrappedLine();
1727  ++Line->Level;
1728  parseStructuralElement();
1729  --Line->Level;
1730  }
1731 
1732  // FIXME: Add error handling.
1733  if (!FormatTok->Tok.is(tok::kw_while)) {
1734  addUnwrappedLine();
1735  return;
1736  }
1737 
1738  nextToken();
1739  parseStructuralElement();
1740 }
1741 
1742 void UnwrappedLineParser::parseLabel() {
1743  nextToken();
1744  unsigned OldLineLevel = Line->Level;
1745  if (Line->Level > 1 || (!Line->InPPDirective && Line->Level > 0))
1746  --Line->Level;
1747  if (CommentsBeforeNextToken.empty() && FormatTok->Tok.is(tok::l_brace)) {
1748  CompoundStatementIndenter Indenter(this, Style, Line->Level);
1749  parseBlock(/*MustBeDeclaration=*/false);
1750  if (FormatTok->Tok.is(tok::kw_break)) {
1752  addUnwrappedLine();
1753  parseStructuralElement();
1754  }
1755  addUnwrappedLine();
1756  } else {
1757  if (FormatTok->is(tok::semi))
1758  nextToken();
1759  addUnwrappedLine();
1760  }
1761  Line->Level = OldLineLevel;
1762  if (FormatTok->isNot(tok::l_brace)) {
1763  parseStructuralElement();
1764  addUnwrappedLine();
1765  }
1766 }
1767 
1768 void UnwrappedLineParser::parseCaseLabel() {
1769  assert(FormatTok->Tok.is(tok::kw_case) && "'case' expected");
1770  // FIXME: fix handling of complex expressions here.
1771  do {
1772  nextToken();
1773  } while (!eof() && !FormatTok->Tok.is(tok::colon));
1774  parseLabel();
1775 }
1776 
1777 void UnwrappedLineParser::parseSwitch() {
1778  assert(FormatTok->Tok.is(tok::kw_switch) && "'switch' expected");
1779  nextToken();
1780  if (FormatTok->Tok.is(tok::l_paren))
1781  parseParens();
1782  if (FormatTok->Tok.is(tok::l_brace)) {
1783  CompoundStatementIndenter Indenter(this, Style, Line->Level);
1784  parseBlock(/*MustBeDeclaration=*/false);
1785  addUnwrappedLine();
1786  } else {
1787  addUnwrappedLine();
1788  ++Line->Level;
1789  parseStructuralElement();
1790  --Line->Level;
1791  }
1792 }
1793 
1794 void UnwrappedLineParser::parseAccessSpecifier() {
1795  nextToken();
1796  // Understand Qt's slots.
1797  if (FormatTok->isOneOf(Keywords.kw_slots, Keywords.kw_qslots))
1798  nextToken();
1799  // Otherwise, we don't know what it is, and we'd better keep the next token.
1800  if (FormatTok->Tok.is(tok::colon))
1801  nextToken();
1802  addUnwrappedLine();
1803 }
1804 
1805 bool UnwrappedLineParser::parseEnum() {
1806  // Won't be 'enum' for NS_ENUMs.
1807  if (FormatTok->Tok.is(tok::kw_enum))
1808  nextToken();
1809 
1810  // In TypeScript, "enum" can also be used as property name, e.g. in interface
1811  // declarations. An "enum" keyword followed by a colon would be a syntax
1812  // error and thus assume it is just an identifier.
1813  if (Style.Language == FormatStyle::LK_JavaScript &&
1814  FormatTok->isOneOf(tok::colon, tok::question))
1815  return false;
1816 
1817  // Eat up enum class ...
1818  if (FormatTok->Tok.is(tok::kw_class) || FormatTok->Tok.is(tok::kw_struct))
1819  nextToken();
1820 
1821  while (FormatTok->Tok.getIdentifierInfo() ||
1822  FormatTok->isOneOf(tok::colon, tok::coloncolon, tok::less,
1823  tok::greater, tok::comma, tok::question)) {
1824  nextToken();
1825  // We can have macros or attributes in between 'enum' and the enum name.
1826  if (FormatTok->is(tok::l_paren))
1827  parseParens();
1828  if (FormatTok->is(tok::identifier)) {
1829  nextToken();
1830  // If there are two identifiers in a row, this is likely an elaborate
1831  // return type. In Java, this can be "implements", etc.
1832  if (Style.isCpp() && FormatTok->is(tok::identifier))
1833  return false;
1834  }
1835  }
1836 
1837  // Just a declaration or something is wrong.
1838  if (FormatTok->isNot(tok::l_brace))
1839  return true;
1840  FormatTok->BlockKind = BK_Block;
1841 
1842  if (Style.Language == FormatStyle::LK_Java) {
1843  // Java enums are different.
1844  parseJavaEnumBody();
1845  return true;
1846  }
1847  if (Style.Language == FormatStyle::LK_Proto) {
1848  parseBlock(/*MustBeDeclaration=*/true);
1849  return true;
1850  }
1851 
1852  // Parse enum body.
1853  nextToken();
1854  bool HasError = !parseBracedList(/*ContinueOnSemicolons=*/true);
1855  if (HasError) {
1856  if (FormatTok->is(tok::semi))
1857  nextToken();
1858  addUnwrappedLine();
1859  }
1860  return true;
1861 
1862  // There is no addUnwrappedLine() here so that we fall through to parsing a
1863  // structural element afterwards. Thus, in "enum A {} n, m;",
1864  // "} n, m;" will end up in one unwrapped line.
1865 }
1866 
1867 void UnwrappedLineParser::parseJavaEnumBody() {
1868  // Determine whether the enum is simple, i.e. does not have a semicolon or
1869  // constants with class bodies. Simple enums can be formatted like braced
1870  // lists, contracted to a single line, etc.
1871  unsigned StoredPosition = Tokens->getPosition();
1872  bool IsSimple = true;
1873  FormatToken *Tok = Tokens->getNextToken();
1874  while (Tok) {
1875  if (Tok->is(tok::r_brace))
1876  break;
1877  if (Tok->isOneOf(tok::l_brace, tok::semi)) {
1878  IsSimple = false;
1879  break;
1880  }
1881  // FIXME: This will also mark enums with braces in the arguments to enum
1882  // constants as "not simple". This is probably fine in practice, though.
1883  Tok = Tokens->getNextToken();
1884  }
1885  FormatTok = Tokens->setPosition(StoredPosition);
1886 
1887  if (IsSimple) {
1888  nextToken();
1889  parseBracedList();
1890  addUnwrappedLine();
1891  return;
1892  }
1893 
1894  // Parse the body of a more complex enum.
1895  // First add a line for everything up to the "{".
1896  nextToken();
1897  addUnwrappedLine();
1898  ++Line->Level;
1899 
1900  // Parse the enum constants.
1901  while (FormatTok) {
1902  if (FormatTok->is(tok::l_brace)) {
1903  // Parse the constant's class body.
1904  parseBlock(/*MustBeDeclaration=*/true, /*AddLevel=*/true,
1905  /*MunchSemi=*/false);
1906  } else if (FormatTok->is(tok::l_paren)) {
1907  parseParens();
1908  } else if (FormatTok->is(tok::comma)) {
1909  nextToken();
1910  addUnwrappedLine();
1911  } else if (FormatTok->is(tok::semi)) {
1912  nextToken();
1913  addUnwrappedLine();
1914  break;
1915  } else if (FormatTok->is(tok::r_brace)) {
1916  addUnwrappedLine();
1917  break;
1918  } else {
1919  nextToken();
1920  }
1921  }
1922 
1923  // Parse the class body after the enum's ";" if any.
1924  parseLevel(/*HasOpeningBrace=*/true);
1925  nextToken();
1926  --Line->Level;
1927  addUnwrappedLine();
1928 }
1929 
1930 void UnwrappedLineParser::parseRecord(bool ParseAsExpr) {
1931  const FormatToken &InitialToken = *FormatTok;
1932  nextToken();
1933 
1934  // The actual identifier can be a nested name specifier, and in macros
1935  // it is often token-pasted.
1936  while (FormatTok->isOneOf(tok::identifier, tok::coloncolon, tok::hashhash,
1937  tok::kw___attribute, tok::kw___declspec,
1938  tok::kw_alignas) ||
1939  ((Style.Language == FormatStyle::LK_Java ||
1941  FormatTok->isOneOf(tok::period, tok::comma))) {
1942  bool IsNonMacroIdentifier =
1943  FormatTok->is(tok::identifier) &&
1944  FormatTok->TokenText != FormatTok->TokenText.upper();
1945  nextToken();
1946  // We can have macros or attributes in between 'class' and the class name.
1947  if (!IsNonMacroIdentifier && FormatTok->Tok.is(tok::l_paren))
1948  parseParens();
1949  }
1950 
1951  // Note that parsing away template declarations here leads to incorrectly
1952  // accepting function declarations as record declarations.
1953  // In general, we cannot solve this problem. Consider:
1954  // class A<int> B() {}
1955  // which can be a function definition or a class definition when B() is a
1956  // macro. If we find enough real-world cases where this is a problem, we
1957  // can parse for the 'template' keyword in the beginning of the statement,
1958  // and thus rule out the record production in case there is no template
1959  // (this would still leave us with an ambiguity between template function
1960  // and class declarations).
1961  if (FormatTok->isOneOf(tok::colon, tok::less)) {
1962  while (!eof()) {
1963  if (FormatTok->is(tok::l_brace)) {
1964  calculateBraceTypes(/*ExpectClassBody=*/true);
1965  if (!tryToParseBracedList())
1966  break;
1967  }
1968  if (FormatTok->Tok.is(tok::semi))
1969  return;
1970  nextToken();
1971  }
1972  }
1973  if (FormatTok->Tok.is(tok::l_brace)) {
1974  if (ParseAsExpr) {
1975  parseChildBlock();
1976  } else {
1977  if (ShouldBreakBeforeBrace(Style, InitialToken))
1978  addUnwrappedLine();
1979 
1980  parseBlock(/*MustBeDeclaration=*/true, /*AddLevel=*/true,
1981  /*MunchSemi=*/false);
1982  }
1983  }
1984  // There is no addUnwrappedLine() here so that we fall through to parsing a
1985  // structural element afterwards. Thus, in "class A {} n, m;",
1986  // "} n, m;" will end up in one unwrapped line.
1987 }
1988 
1989 void UnwrappedLineParser::parseObjCProtocolList() {
1990  assert(FormatTok->Tok.is(tok::less) && "'<' expected.");
1991  do
1992  nextToken();
1993  while (!eof() && FormatTok->Tok.isNot(tok::greater));
1994  nextToken(); // Skip '>'.
1995 }
1996 
1997 void UnwrappedLineParser::parseObjCUntilAtEnd() {
1998  do {
1999  if (FormatTok->Tok.isObjCAtKeyword(tok::objc_end)) {
2000  nextToken();
2001  addUnwrappedLine();
2002  break;
2003  }
2004  if (FormatTok->is(tok::l_brace)) {
2005  parseBlock(/*MustBeDeclaration=*/false);
2006  // In ObjC interfaces, nothing should be following the "}".
2007  addUnwrappedLine();
2008  } else if (FormatTok->is(tok::r_brace)) {
2009  // Ignore stray "}". parseStructuralElement doesn't consume them.
2010  nextToken();
2011  addUnwrappedLine();
2012  } else {
2013  parseStructuralElement();
2014  }
2015  } while (!eof());
2016 }
2017 
2018 void UnwrappedLineParser::parseObjCInterfaceOrImplementation() {
2019  nextToken();
2020  nextToken(); // interface name
2021 
2022  // @interface can be followed by either a base class, or a category.
2023  if (FormatTok->Tok.is(tok::colon)) {
2024  nextToken();
2025  nextToken(); // base class name
2026  } else if (FormatTok->Tok.is(tok::l_paren))
2027  // Skip category, if present.
2028  parseParens();
2029 
2030  if (FormatTok->Tok.is(tok::less))
2031  parseObjCProtocolList();
2032 
2033  if (FormatTok->Tok.is(tok::l_brace)) {
2035  addUnwrappedLine();
2036  parseBlock(/*MustBeDeclaration=*/true);
2037  }
2038 
2039  // With instance variables, this puts '}' on its own line. Without instance
2040  // variables, this ends the @interface line.
2041  addUnwrappedLine();
2042 
2043  parseObjCUntilAtEnd();
2044 }
2045 
2046 void UnwrappedLineParser::parseObjCProtocol() {
2047  nextToken();
2048  nextToken(); // protocol name
2049 
2050  if (FormatTok->Tok.is(tok::less))
2051  parseObjCProtocolList();
2052 
2053  // Check for protocol declaration.
2054  if (FormatTok->Tok.is(tok::semi)) {
2055  nextToken();
2056  return addUnwrappedLine();
2057  }
2058 
2059  addUnwrappedLine();
2060  parseObjCUntilAtEnd();
2061 }
2062 
2063 void UnwrappedLineParser::parseJavaScriptEs6ImportExport() {
2064  bool IsImport = FormatTok->is(Keywords.kw_import);
2065  assert(IsImport || FormatTok->is(tok::kw_export));
2066  nextToken();
2067 
2068  // Consume the "default" in "export default class/function".
2069  if (FormatTok->is(tok::kw_default))
2070  nextToken();
2071 
2072  // Consume "async function", "function" and "default function", so that these
2073  // get parsed as free-standing JS functions, i.e. do not require a trailing
2074  // semicolon.
2075  if (FormatTok->is(Keywords.kw_async))
2076  nextToken();
2077  if (FormatTok->is(Keywords.kw_function)) {
2078  nextToken();
2079  return;
2080  }
2081 
2082  // For imports, `export *`, `export {...}`, consume the rest of the line up
2083  // to the terminating `;`. For everything else, just return and continue
2084  // parsing the structural element, i.e. the declaration or expression for
2085  // `export default`.
2086  if (!IsImport && !FormatTok->isOneOf(tok::l_brace, tok::star) &&
2087  !FormatTok->isStringLiteral())
2088  return;
2089 
2090  while (!eof()) {
2091  if (FormatTok->is(tok::semi))
2092  return;
2093  if (Line->Tokens.size() == 0) {
2094  // Common issue: Automatic Semicolon Insertion wrapped the line, so the
2095  // import statement should terminate.
2096  return;
2097  }
2098  if (FormatTok->is(tok::l_brace)) {
2099  FormatTok->BlockKind = BK_Block;
2100  nextToken();
2101  parseBracedList();
2102  } else {
2103  nextToken();
2104  }
2105  }
2106 }
2107 
2108 LLVM_ATTRIBUTE_UNUSED static void printDebugInfo(const UnwrappedLine &Line,
2109  StringRef Prefix = "") {
2110  llvm::dbgs() << Prefix << "Line(" << Line.Level << ")"
2111  << (Line.InPPDirective ? " MACRO" : "") << ": ";
2112  for (std::list<UnwrappedLineNode>::const_iterator I = Line.Tokens.begin(),
2113  E = Line.Tokens.end();
2114  I != E; ++I) {
2115  llvm::dbgs() << I->Tok->Tok.getName() << "["
2116  << "T=" << I->Tok->Type
2117  << ", OC=" << I->Tok->OriginalColumn << "] ";
2118  }
2119  for (std::list<UnwrappedLineNode>::const_iterator I = Line.Tokens.begin(),
2120  E = Line.Tokens.end();
2121  I != E; ++I) {
2122  const UnwrappedLineNode &Node = *I;
2124  I = Node.Children.begin(),
2125  E = Node.Children.end();
2126  I != E; ++I) {
2127  printDebugInfo(*I, "\nChild: ");
2128  }
2129  }
2130  llvm::dbgs() << "\n";
2131 }
2132 
2133 void UnwrappedLineParser::addUnwrappedLine() {
2134  if (Line->Tokens.empty())
2135  return;
2136  DEBUG({
2137  if (CurrentLines == &Lines)
2138  printDebugInfo(*Line);
2139  });
2140  CurrentLines->push_back(std::move(*Line));
2141  Line->Tokens.clear();
2142  Line->MatchingOpeningBlockLineIndex = UnwrappedLine::kInvalidIndex;
2143  if (CurrentLines == &Lines && !PreprocessorDirectives.empty()) {
2144  CurrentLines->append(
2145  std::make_move_iterator(PreprocessorDirectives.begin()),
2146  std::make_move_iterator(PreprocessorDirectives.end()));
2147  PreprocessorDirectives.clear();
2148  }
2149 }
2150 
2151 bool UnwrappedLineParser::eof() const { return FormatTok->Tok.is(tok::eof); }
2152 
2153 bool UnwrappedLineParser::isOnNewLine(const FormatToken &FormatTok) {
2154  return (Line->InPPDirective || FormatTok.HasUnescapedNewline) &&
2155  FormatTok.NewlinesBefore > 0;
2156 }
2157 
2158 // Checks if \p FormatTok is a line comment that continues the line comment
2159 // section on \p Line.
2160 static bool continuesLineCommentSection(const FormatToken &FormatTok,
2161  const UnwrappedLine &Line,
2162  llvm::Regex &CommentPragmasRegex) {
2163  if (Line.Tokens.empty())
2164  return false;
2165 
2166  StringRef IndentContent = FormatTok.TokenText;
2167  if (FormatTok.TokenText.startswith("//") ||
2168  FormatTok.TokenText.startswith("/*"))
2169  IndentContent = FormatTok.TokenText.substr(2);
2170  if (CommentPragmasRegex.match(IndentContent))
2171  return false;
2172 
2173  // If Line starts with a line comment, then FormatTok continues the comment
2174  // section if its original column is greater or equal to the original start
2175  // column of the line.
2176  //
2177  // Define the min column token of a line as follows: if a line ends in '{' or
2178  // contains a '{' followed by a line comment, then the min column token is
2179  // that '{'. Otherwise, the min column token of the line is the first token of
2180  // the line.
2181  //
2182  // If Line starts with a token other than a line comment, then FormatTok
2183  // continues the comment section if its original column is greater than the
2184  // original start column of the min column token of the line.
2185  //
2186  // For example, the second line comment continues the first in these cases:
2187  //
2188  // // first line
2189  // // second line
2190  //
2191  // and:
2192  //
2193  // // first line
2194  // // second line
2195  //
2196  // and:
2197  //
2198  // int i; // first line
2199  // // second line
2200  //
2201  // and:
2202  //
2203  // do { // first line
2204  // // second line
2205  // int i;
2206  // } while (true);
2207  //
2208  // and:
2209  //
2210  // enum {
2211  // a, // first line
2212  // // second line
2213  // b
2214  // };
2215  //
2216  // The second line comment doesn't continue the first in these cases:
2217  //
2218  // // first line
2219  // // second line
2220  //
2221  // and:
2222  //
2223  // int i; // first line
2224  // // second line
2225  //
2226  // and:
2227  //
2228  // do { // first line
2229  // // second line
2230  // int i;
2231  // } while (true);
2232  //
2233  // and:
2234  //
2235  // enum {
2236  // a, // first line
2237  // // second line
2238  // };
2239  const FormatToken *MinColumnToken = Line.Tokens.front().Tok;
2240 
2241  // Scan for '{//'. If found, use the column of '{' as a min column for line
2242  // comment section continuation.
2243  const FormatToken *PreviousToken = nullptr;
2244  for (const UnwrappedLineNode &Node : Line.Tokens) {
2245  if (PreviousToken && PreviousToken->is(tok::l_brace) &&
2246  isLineComment(*Node.Tok)) {
2247  MinColumnToken = PreviousToken;
2248  break;
2249  }
2250  PreviousToken = Node.Tok;
2251 
2252  // Grab the last newline preceding a token in this unwrapped line.
2253  if (Node.Tok->NewlinesBefore > 0) {
2254  MinColumnToken = Node.Tok;
2255  }
2256  }
2257  if (PreviousToken && PreviousToken->is(tok::l_brace)) {
2258  MinColumnToken = PreviousToken;
2259  }
2260 
2261  return continuesLineComment(FormatTok, /*Previous=*/Line.Tokens.back().Tok,
2262  MinColumnToken);
2263 }
2264 
2265 void UnwrappedLineParser::flushComments(bool NewlineBeforeNext) {
2266  bool JustComments = Line->Tokens.empty();
2268  I = CommentsBeforeNextToken.begin(),
2269  E = CommentsBeforeNextToken.end();
2270  I != E; ++I) {
2271  // Line comments that belong to the same line comment section are put on the
2272  // same line since later we might want to reflow content between them.
2273  // Additional fine-grained breaking of line comment sections is controlled
2274  // by the class BreakableLineCommentSection in case it is desirable to keep
2275  // several line comment sections in the same unwrapped line.
2276  //
2277  // FIXME: Consider putting separate line comment sections as children to the
2278  // unwrapped line instead.
2280  continuesLineCommentSection(**I, *Line, CommentPragmasRegex);
2281  if (isOnNewLine(**I) && JustComments && !(*I)->ContinuesLineCommentSection)
2282  addUnwrappedLine();
2283  pushToken(*I);
2284  }
2285  if (NewlineBeforeNext && JustComments)
2286  addUnwrappedLine();
2287  CommentsBeforeNextToken.clear();
2288 }
2289 
2290 void UnwrappedLineParser::nextToken() {
2291  if (eof())
2292  return;
2293  flushComments(isOnNewLine(*FormatTok));
2294  pushToken(FormatTok);
2295  if (Style.Language != FormatStyle::LK_JavaScript)
2296  readToken();
2297  else
2298  readTokenWithJavaScriptASI();
2299 }
2300 
2301 const FormatToken *UnwrappedLineParser::getPreviousToken() {
2302  // FIXME: This is a dirty way to access the previous token. Find a better
2303  // solution.
2304  if (!Line || Line->Tokens.empty())
2305  return nullptr;
2306  return Line->Tokens.back().Tok;
2307 }
2308 
2309 void UnwrappedLineParser::distributeComments(
2310  const SmallVectorImpl<FormatToken *> &Comments,
2311  const FormatToken *NextTok) {
2312  // Whether or not a line comment token continues a line is controlled by
2313  // the method continuesLineCommentSection, with the following caveat:
2314  //
2315  // Define a trail of Comments to be a nonempty proper postfix of Comments such
2316  // that each comment line from the trail is aligned with the next token, if
2317  // the next token exists. If a trail exists, the beginning of the maximal
2318  // trail is marked as a start of a new comment section.
2319  //
2320  // For example in this code:
2321  //
2322  // int a; // line about a
2323  // // line 1 about b
2324  // // line 2 about b
2325  // int b;
2326  //
2327  // the two lines about b form a maximal trail, so there are two sections, the
2328  // first one consisting of the single comment "// line about a" and the
2329  // second one consisting of the next two comments.
2330  if (Comments.empty())
2331  return;
2332  bool ShouldPushCommentsInCurrentLine = true;
2333  bool HasTrailAlignedWithNextToken = false;
2334  unsigned StartOfTrailAlignedWithNextToken = 0;
2335  if (NextTok) {
2336  // We are skipping the first element intentionally.
2337  for (unsigned i = Comments.size() - 1; i > 0; --i) {
2338  if (Comments[i]->OriginalColumn == NextTok->OriginalColumn) {
2339  HasTrailAlignedWithNextToken = true;
2340  StartOfTrailAlignedWithNextToken = i;
2341  }
2342  }
2343  }
2344  for (unsigned i = 0, e = Comments.size(); i < e; ++i) {
2345  FormatToken *FormatTok = Comments[i];
2346  if (HasTrailAlignedWithNextToken &&
2347  i == StartOfTrailAlignedWithNextToken) {
2348  FormatTok->ContinuesLineCommentSection = false;
2349  } else {
2350  FormatTok->ContinuesLineCommentSection =
2351  continuesLineCommentSection(*FormatTok, *Line, CommentPragmasRegex);
2352  }
2353  if (!FormatTok->ContinuesLineCommentSection &&
2354  (isOnNewLine(*FormatTok) || FormatTok->IsFirst)) {
2355  ShouldPushCommentsInCurrentLine = false;
2356  }
2357  if (ShouldPushCommentsInCurrentLine) {
2358  pushToken(FormatTok);
2359  } else {
2360  CommentsBeforeNextToken.push_back(FormatTok);
2361  }
2362  }
2363 }
2364 
2365 void UnwrappedLineParser::readToken() {
2366  SmallVector<FormatToken *, 1> Comments;
2367  do {
2368  FormatTok = Tokens->getNextToken();
2369  assert(FormatTok);
2370  while (!Line->InPPDirective && FormatTok->Tok.is(tok::hash) &&
2371  (FormatTok->HasUnescapedNewline || FormatTok->IsFirst)) {
2372  distributeComments(Comments, FormatTok);
2373  Comments.clear();
2374  // If there is an unfinished unwrapped line, we flush the preprocessor
2375  // directives only after that unwrapped line was finished later.
2376  bool SwitchToPreprocessorLines = !Line->Tokens.empty();
2377  ScopedLineState BlockState(*this, SwitchToPreprocessorLines);
2378  // Comments stored before the preprocessor directive need to be output
2379  // before the preprocessor directive, at the same level as the
2380  // preprocessor directive, as we consider them to apply to the directive.
2381  flushComments(isOnNewLine(*FormatTok));
2382  parsePPDirective();
2383  }
2384  while (FormatTok->Type == TT_ConflictStart ||
2385  FormatTok->Type == TT_ConflictEnd ||
2386  FormatTok->Type == TT_ConflictAlternative) {
2387  if (FormatTok->Type == TT_ConflictStart) {
2388  conditionalCompilationStart(/*Unreachable=*/false);
2389  } else if (FormatTok->Type == TT_ConflictAlternative) {
2390  conditionalCompilationAlternative();
2391  } else if (FormatTok->Type == TT_ConflictEnd) {
2392  conditionalCompilationEnd();
2393  }
2394  FormatTok = Tokens->getNextToken();
2395  FormatTok->MustBreakBefore = true;
2396  }
2397 
2398  if (!PPStack.empty() && (PPStack.back() == PP_Unreachable) &&
2399  !Line->InPPDirective) {
2400  continue;
2401  }
2402 
2403  if (!FormatTok->Tok.is(tok::comment)) {
2404  distributeComments(Comments, FormatTok);
2405  Comments.clear();
2406  return;
2407  }
2408 
2409  Comments.push_back(FormatTok);
2410  } while (!eof());
2411 
2412  distributeComments(Comments, nullptr);
2413  Comments.clear();
2414 }
2415 
2416 void UnwrappedLineParser::pushToken(FormatToken *Tok) {
2417  Line->Tokens.push_back(UnwrappedLineNode(Tok));
2418  if (MustBreakBeforeNextToken) {
2419  Line->Tokens.back().Tok->MustBreakBefore = true;
2420  MustBreakBeforeNextToken = false;
2421  }
2422 }
2423 
2424 } // end namespace format
2425 } // end namespace clang
int Position
static bool isJSDeclOrStmt(const AdditionalKeywords &Keywords, const FormatToken *FormatTok)
SmallVector< UnwrappedLine, 0 > Children
static bool continuesLineCommentSection(const FormatToken &FormatTok, const UnwrappedLine &Line, llvm::Regex &CommentPragmasRegex)
bool AfterUnion
Wrap union definitions.
Definition: Format.h:676
Indent in all namespaces.
Definition: Format.h:1208
static bool isIIFE(const UnwrappedLine &Line, const AdditionalKeywords &Keywords)
Token Tok
The Token.
Definition: FormatToken.h:123
Should be used for Protocol Buffers (https://developers.google.com/protocol-buffers/).
Definition: Format.h:1121
bool IndentCaseLabels
Indent case labels one level from the switch statement.
Definition: Format.h:1025
static LLVM_ATTRIBUTE_UNUSED void printDebugInfo(const UnwrappedLine &Line, StringRef Prefix="")
StringRef P
bool isOneOf(A K1, B K2) const
Definition: FormatToken.h:305
Parser - This implements a parser for the C family of languages.
Definition: Parser.h:57
static bool mustBeJSIdent(const AdditionalKeywords &Keywords, const FormatToken *FormatTok)
UnwrappedLineParser(const FormatStyle &Style, const AdditionalKeywords &Keywords, ArrayRef< FormatToken * > Tokens, UnwrappedLineConsumer &Callback)
FormatToken *& ResetToken
virtual void consumeUnwrappedLine(const UnwrappedLine &Line)=0
unsigned NewlinesBefore
The number of newlines immediately before the Token.
Definition: FormatToken.h:129
unsigned Level
The indent level of the UnwrappedLine.
bool AfterObjCDeclaration
Wrap ObjC definitions (@autoreleasepool, interfaces, ..).
Definition: Format.h:648
bool IndentBraces
Indent the wrapped braces themselves.
Definition: Format.h:708
Should be used for Java.
Definition: Format.h:1114
static bool tokenCanStartNewLine(const clang::Token &Tok)
Token - This structure provides full information about a lexed token.
Definition: Token.h:35
NamespaceIndentationKind NamespaceIndentation
The indentation used for namespaces.
Definition: Format.h:1212
static bool isGoogScope(const UnwrappedLine &Line)
virtual FormatToken * getNextToken()=0
An unwrapped line is a sequence of Token, that we would like to put on a single line if there was no ...
detail::InMemoryDirectory::const_iterator I
CompoundStatementIndenter(UnwrappedLineParser *Parser, const FormatStyle &Style, unsigned &LineLevel)
ScopedLineState(UnwrappedLineParser &Parser, bool SwitchToPreprocessorLines=false)
std::vector< bool > & Stack
FormatTokenSource *& TokenSource
std::list< UnwrappedLineNode > Tokens
The Tokens comprising this UnwrappedLine.
Should be used for JavaScript.
Definition: Format.h:1116
ContinuationIndenter * Indenter
MatchFinder::MatchCallback * Callback
StateNode * Previous
static const size_t kInvalidIndex
bool InPPDirective
Whether this UnwrappedLine is part of a preprocessor directive.
BraceWrappingFlags BraceWrapping
Control of individual brace wrapping cases.
Definition: Format.h:749
bool AfterFunction
Wrap function definitions.
Definition: Format.h:630
A wrapper around a Token storing information about the whitespace characters preceding it...
Definition: FormatToken.h:119
FormatToken * Token
bool isNot(tok::TokenKind K) const
Definition: Token.h:96
Keywords(IdentTable)
#define false
Definition: stdbool.h:33
SmallVectorImpl< AnnotatedLine * >::const_iterator Next
Encapsulates keywords that are context sensitive or for languages not properly supported by Clang's l...
Definition: FormatToken.h:621
ArrayRef< FormatToken * > Tokens
This file contains the declaration of the UnwrappedLineParser, which turns a stream of tokens into Un...
TokenKind
Provides a simple uniform namespace for tokens from all C languages.
Definition: TokenKinds.h:25
StringRef TokenText
The raw text of the token.
Definition: FormatToken.h:167
static bool mustBeJSIdentOrValue(const AdditionalKeywords &Keywords, const FormatToken *FormatTok)
virtual FormatToken * setPosition(unsigned Position)=0
The FormatStyle is used to configure the formatting to follow specific guidelines.
Definition: Format.h:46
static bool ShouldBreakBeforeBrace(const FormatStyle &Style, const FormatToken &InitialToken)
Indent only in inner namespaces (nested in other namespaces).
Definition: Format.h:1198
LanguageKind Language
Language, this format style is targeted at.
Definition: Format.h:1131
ast_type_traits::DynTypedNode Node
if(T->getSizeExpr()) TRY_TO(TraverseStmt(T-> getSizeExpr()))
UnwrappedLine & Line
Should be used for TableGen code.
Definition: Format.h:1123
detail::InMemoryDirectory::const_iterator E
FormatToken * PreviousToken
bool isLiteral() const
Return true if this is a "literal", like a numeric constant, string, etc.
Definition: Token.h:113
bool AfterStruct
Wrap struct definitions.
Definition: Format.h:662
bool is(tok::TokenKind Kind) const
Definition: FormatToken.h:296
virtual unsigned getPosition()=0
bool AfterControlStatement
Wrap control statements (if/for/while/switch/..).
Definition: Format.h:602
unsigned kind
All of the diagnostics that can be emitted by the frontend.
Definition: DiagnosticIDs.h:44
unsigned PreviousLineLevel
bool MustBreakBefore
Whether there must be a line break before this token.
Definition: FormatToken.h:161
FormatTokenSource * PreviousTokenSource
bool AfterClass
Wrap class definitions.
Definition: Format.h:584
Should be used for Protocol Buffer messages in text format (https://developers.google.com/protocol-buffers/).
Definition: Format.h:1126
StringRef Text
Definition: Format.cpp:1302
bool isCpp() const
Definition: Format.h:1128
bool ContinuesLineCommentSection
Does this line comment continue a line comment section?
Definition: FormatToken.h:273
bool AfterNamespace
Wrap namespace definitions.
Definition: Format.h:646
IdentifierInfo * getIdentifierInfo() const
Definition: Token.h:177