Bug Summary

File:clang/lib/Format/TokenAnnotator.cpp
Warning:line 2370, column 13
Access to field 'OriginalColumn' results in a dereference of a null pointer (loaded from field 'First')

Annotated Source Code

Press '?' to see keyboard shortcuts

clang -cc1 -cc1 -triple x86_64-pc-linux-gnu -analyze -disable-free -clear-ast-before-backend -disable-llvm-verifier -discard-value-names -main-file-name TokenAnnotator.cpp -analyzer-store=region -analyzer-opt-analyze-nested-blocks -analyzer-checker=core -analyzer-checker=apiModeling -analyzer-checker=unix -analyzer-checker=deadcode -analyzer-checker=cplusplus -analyzer-checker=security.insecureAPI.UncheckedReturn -analyzer-checker=security.insecureAPI.getpw -analyzer-checker=security.insecureAPI.gets -analyzer-checker=security.insecureAPI.mktemp -analyzer-checker=security.insecureAPI.mkstemp -analyzer-checker=security.insecureAPI.vfork -analyzer-checker=nullability.NullPassedToNonnull -analyzer-checker=nullability.NullReturnedFromNonnull -analyzer-output plist -w -setup-static-analyzer -analyzer-config-compatibility-mode=true -mrelocation-model pic -pic-level 2 -mframe-pointer=none -relaxed-aliasing -fmath-errno -ffp-contract=on -fno-rounding-math -mconstructor-aliases -funwind-tables=2 -target-cpu x86-64 -tune-cpu generic -debugger-tuning=gdb -ffunction-sections -fdata-sections -fcoverage-compilation-dir=/build/llvm-toolchain-snapshot-14~++20220119111520+da61cb019eb2/build-llvm -resource-dir /usr/lib/llvm-14/lib/clang/14.0.0 -D _DEBUG -D _GNU_SOURCE -D __STDC_CONSTANT_MACROS -D __STDC_FORMAT_MACROS -D __STDC_LIMIT_MACROS -I tools/clang/lib/Format -I /build/llvm-toolchain-snapshot-14~++20220119111520+da61cb019eb2/clang/lib/Format -I /build/llvm-toolchain-snapshot-14~++20220119111520+da61cb019eb2/clang/include -I tools/clang/include -I include -I /build/llvm-toolchain-snapshot-14~++20220119111520+da61cb019eb2/llvm/include -D _FORTIFY_SOURCE=2 -D NDEBUG -U NDEBUG -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/10/../../../../include/c++/10 -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/10/../../../../include/x86_64-linux-gnu/c++/10 -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/10/../../../../include/c++/10/backward -internal-isystem /usr/lib/llvm-14/lib/clang/14.0.0/include -internal-isystem /usr/local/include -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/10/../../../../x86_64-linux-gnu/include -internal-externc-isystem /usr/include/x86_64-linux-gnu -internal-externc-isystem /include -internal-externc-isystem /usr/include -fmacro-prefix-map=/build/llvm-toolchain-snapshot-14~++20220119111520+da61cb019eb2/build-llvm=build-llvm -fmacro-prefix-map=/build/llvm-toolchain-snapshot-14~++20220119111520+da61cb019eb2/= -fcoverage-prefix-map=/build/llvm-toolchain-snapshot-14~++20220119111520+da61cb019eb2/build-llvm=build-llvm -fcoverage-prefix-map=/build/llvm-toolchain-snapshot-14~++20220119111520+da61cb019eb2/= -O3 -Wno-unused-command-line-argument -Wno-unused-parameter -Wwrite-strings -Wno-missing-field-initializers -Wno-long-long -Wno-maybe-uninitialized -Wno-class-memaccess -Wno-redundant-move -Wno-pessimizing-move -Wno-noexcept-type -Wno-comment -std=c++14 -fdeprecated-macro -fdebug-compilation-dir=/build/llvm-toolchain-snapshot-14~++20220119111520+da61cb019eb2/build-llvm -fdebug-prefix-map=/build/llvm-toolchain-snapshot-14~++20220119111520+da61cb019eb2/build-llvm=build-llvm -fdebug-prefix-map=/build/llvm-toolchain-snapshot-14~++20220119111520+da61cb019eb2/= -ferror-limit 19 -fvisibility-inlines-hidden -stack-protector 2 -fgnuc-version=4.2.1 -fcolor-diagnostics -vectorize-loops -vectorize-slp -analyzer-output=html -analyzer-config stable-report-filename=true -faddrsig -D__GCC_HAVE_DWARF2_CFI_ASM=1 -o /tmp/scan-build-2022-01-19-134126-35450-1 -x c++ /build/llvm-toolchain-snapshot-14~++20220119111520+da61cb019eb2/clang/lib/Format/TokenAnnotator.cpp
1//===--- TokenAnnotator.cpp - Format C++ code -----------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8///
9/// \file
10/// This file implements a token annotator, i.e. creates
11/// \c AnnotatedTokens out of \c FormatTokens with required extra information.
12///
13//===----------------------------------------------------------------------===//
14
15#include "TokenAnnotator.h"
16#include "FormatToken.h"
17#include "clang/Basic/SourceManager.h"
18#include "clang/Basic/TokenKinds.h"
19#include "llvm/ADT/SmallPtrSet.h"
20#include "llvm/Support/Debug.h"
21
22#define DEBUG_TYPE"format-token-annotator" "format-token-annotator"
23
24namespace clang {
25namespace format {
26
27namespace {
28
29/// Returns \c true if the token can be used as an identifier in
30/// an Objective-C \c \@selector, \c false otherwise.
31///
32/// Because getFormattingLangOpts() always lexes source code as
33/// Objective-C++, C++ keywords like \c new and \c delete are
34/// lexed as tok::kw_*, not tok::identifier, even for Objective-C.
35///
36/// For Objective-C and Objective-C++, both identifiers and keywords
37/// are valid inside @selector(...) (or a macro which
38/// invokes @selector(...)). So, we allow treat any identifier or
39/// keyword as a potential Objective-C selector component.
40static bool canBeObjCSelectorComponent(const FormatToken &Tok) {
41 return Tok.Tok.getIdentifierInfo() != nullptr;
42}
43
44/// With `Left` being '(', check if we're at either `[...](` or
45/// `[...]<...>(`, where the [ opens a lambda capture list.
46static bool isLambdaParameterList(const FormatToken *Left) {
47 // Skip <...> if present.
48 if (Left->Previous && Left->Previous->is(tok::greater) &&
49 Left->Previous->MatchingParen &&
50 Left->Previous->MatchingParen->is(TT_TemplateOpener))
51 Left = Left->Previous->MatchingParen;
52
53 // Check for `[...]`.
54 return Left->Previous && Left->Previous->is(tok::r_square) &&
55 Left->Previous->MatchingParen &&
56 Left->Previous->MatchingParen->is(TT_LambdaLSquare);
57}
58
59/// Returns \c true if the token is followed by a boolean condition, \c false
60/// otherwise.
61static bool isKeywordWithCondition(const FormatToken &Tok) {
62 return Tok.isOneOf(tok::kw_if, tok::kw_for, tok::kw_while, tok::kw_switch,
63 tok::kw_constexpr, tok::kw_catch);
64}
65
66/// A parser that gathers additional information about tokens.
67///
68/// The \c TokenAnnotator tries to match parenthesis and square brakets and
69/// store a parenthesis levels. It also tries to resolve matching "<" and ">"
70/// into template parameter lists.
71class AnnotatingParser {
72public:
73 AnnotatingParser(const FormatStyle &Style, AnnotatedLine &Line,
74 const AdditionalKeywords &Keywords)
75 : Style(Style), Line(Line), CurrentToken(Line.First), AutoFound(false),
76 Keywords(Keywords) {
77 Contexts.push_back(Context(tok::unknown, 1, /*IsExpression=*/false));
78 resetTokenMetadata();
79 }
80
81private:
82 bool parseAngle() {
83 if (!CurrentToken || !CurrentToken->Previous)
84 return false;
85 if (NonTemplateLess.count(CurrentToken->Previous))
86 return false;
87
88 const FormatToken &Previous = *CurrentToken->Previous; // The '<'.
89 if (Previous.Previous) {
90 if (Previous.Previous->Tok.isLiteral())
91 return false;
92 if (Previous.Previous->is(tok::r_paren) && Contexts.size() > 1 &&
93 (!Previous.Previous->MatchingParen ||
94 !Previous.Previous->MatchingParen->is(TT_OverloadedOperatorLParen)))
95 return false;
96 }
97
98 FormatToken *Left = CurrentToken->Previous;
99 Left->ParentBracket = Contexts.back().ContextKind;
100 ScopedContextCreator ContextCreator(*this, tok::less, 12);
101
102 // If this angle is in the context of an expression, we need to be more
103 // hesitant to detect it as opening template parameters.
104 bool InExprContext = Contexts.back().IsExpression;
105
106 Contexts.back().IsExpression = false;
107 // If there's a template keyword before the opening angle bracket, this is a
108 // template parameter, not an argument.
109 Contexts.back().InTemplateArgument =
110 Left->Previous && Left->Previous->Tok.isNot(tok::kw_template);
111
112 if (Style.Language == FormatStyle::LK_Java &&
113 CurrentToken->is(tok::question))
114 next();
115
116 while (CurrentToken) {
117 if (CurrentToken->is(tok::greater)) {
118 // Try to do a better job at looking for ">>" within the condition of
119 // a statement. Conservatively insert spaces between consecutive ">"
120 // tokens to prevent splitting right bitshift operators and potentially
121 // altering program semantics. This check is overly conservative and
122 // will prevent spaces from being inserted in select nested template
123 // parameter cases, but should not alter program semantics.
124 if (CurrentToken->Next && CurrentToken->Next->is(tok::greater) &&
125 Left->ParentBracket != tok::less &&
126 (isKeywordWithCondition(*Line.First) ||
127 CurrentToken->getStartOfNonWhitespace() ==
128 CurrentToken->Next->getStartOfNonWhitespace().getLocWithOffset(
129 -1)))
130 return false;
131 Left->MatchingParen = CurrentToken;
132 CurrentToken->MatchingParen = Left;
133 // In TT_Proto, we must distignuish between:
134 // map<key, value>
135 // msg < item: data >
136 // msg: < item: data >
137 // In TT_TextProto, map<key, value> does not occur.
138 if (Style.Language == FormatStyle::LK_TextProto ||
139 (Style.Language == FormatStyle::LK_Proto && Left->Previous &&
140 Left->Previous->isOneOf(TT_SelectorName, TT_DictLiteral)))
141 CurrentToken->setType(TT_DictLiteral);
142 else
143 CurrentToken->setType(TT_TemplateCloser);
144 next();
145 return true;
146 }
147 if (CurrentToken->is(tok::question) &&
148 Style.Language == FormatStyle::LK_Java) {
149 next();
150 continue;
151 }
152 if (CurrentToken->isOneOf(tok::r_paren, tok::r_square, tok::r_brace) ||
153 (CurrentToken->isOneOf(tok::colon, tok::question) && InExprContext &&
154 !Style.isCSharp() && Style.Language != FormatStyle::LK_Proto &&
155 Style.Language != FormatStyle::LK_TextProto))
156 return false;
157 // If a && or || is found and interpreted as a binary operator, this set
158 // of angles is likely part of something like "a < b && c > d". If the
159 // angles are inside an expression, the ||/&& might also be a binary
160 // operator that was misinterpreted because we are parsing template
161 // parameters.
162 // FIXME: This is getting out of hand, write a decent parser.
163 if (CurrentToken->Previous->isOneOf(tok::pipepipe, tok::ampamp) &&
164 CurrentToken->Previous->is(TT_BinaryOperator) &&
165 Contexts[Contexts.size() - 2].IsExpression &&
166 !Line.startsWith(tok::kw_template))
167 return false;
168 updateParameterCount(Left, CurrentToken);
169 if (Style.Language == FormatStyle::LK_Proto) {
170 if (FormatToken *Previous = CurrentToken->getPreviousNonComment()) {
171 if (CurrentToken->is(tok::colon) ||
172 (CurrentToken->isOneOf(tok::l_brace, tok::less) &&
173 Previous->isNot(tok::colon)))
174 Previous->setType(TT_SelectorName);
175 }
176 }
177 if (!consumeToken())
178 return false;
179 }
180 return false;
181 }
182
183 bool parseUntouchableParens() {
184 while (CurrentToken) {
185 CurrentToken->Finalized = true;
186 switch (CurrentToken->Tok.getKind()) {
187 case tok::l_paren:
188 next();
189 if (!parseUntouchableParens())
190 return false;
191 continue;
192 case tok::r_paren:
193 next();
194 return true;
195 default:
196 // no-op
197 break;
198 }
199 next();
200 }
201 return false;
202 }
203
204 bool parseParens(bool LookForDecls = false) {
205 if (!CurrentToken)
206 return false;
207 FormatToken *Left = CurrentToken->Previous;
208 assert(Left && "Unknown previous token")(static_cast <bool> (Left && "Unknown previous token"
) ? void (0) : __assert_fail ("Left && \"Unknown previous token\""
, "clang/lib/Format/TokenAnnotator.cpp", 208, __extension__ __PRETTY_FUNCTION__
))
;
209 FormatToken *PrevNonComment = Left->getPreviousNonComment();
210 Left->ParentBracket = Contexts.back().ContextKind;
211 ScopedContextCreator ContextCreator(*this, tok::l_paren, 1);
212
213 // FIXME: This is a bit of a hack. Do better.
214 Contexts.back().ColonIsForRangeExpr =
215 Contexts.size() == 2 && Contexts[0].ColonIsForRangeExpr;
216
217 if (Left->Previous && Left->Previous->is(TT_UntouchableMacroFunc)) {
218 Left->Finalized = true;
219 return parseUntouchableParens();
220 }
221
222 bool StartsObjCMethodExpr = false;
223 if (FormatToken *MaybeSel = Left->Previous) {
224 // @selector( starts a selector.
225 if (MaybeSel->isObjCAtKeyword(tok::objc_selector) && MaybeSel->Previous &&
226 MaybeSel->Previous->is(tok::at)) {
227 StartsObjCMethodExpr = true;
228 }
229 }
230
231 if (Left->is(TT_OverloadedOperatorLParen)) {
232 // Find the previous kw_operator token.
233 FormatToken *Prev = Left;
234 while (!Prev->is(tok::kw_operator)) {
235 Prev = Prev->Previous;
236 assert(Prev && "Expect a kw_operator prior to the OperatorLParen!")(static_cast <bool> (Prev && "Expect a kw_operator prior to the OperatorLParen!"
) ? void (0) : __assert_fail ("Prev && \"Expect a kw_operator prior to the OperatorLParen!\""
, "clang/lib/Format/TokenAnnotator.cpp", 236, __extension__ __PRETTY_FUNCTION__
))
;
237 }
238
239 // If faced with "a.operator*(argument)" or "a->operator*(argument)",
240 // i.e. the operator is called as a member function,
241 // then the argument must be an expression.
242 bool OperatorCalledAsMemberFunction =
243 Prev->Previous && Prev->Previous->isOneOf(tok::period, tok::arrow);
244 Contexts.back().IsExpression = OperatorCalledAsMemberFunction;
245 } else if (Style.isJavaScript() &&
246 (Line.startsWith(Keywords.kw_type, tok::identifier) ||
247 Line.startsWith(tok::kw_export, Keywords.kw_type,
248 tok::identifier))) {
249 // type X = (...);
250 // export type X = (...);
251 Contexts.back().IsExpression = false;
252 } else if (Left->Previous &&
253 (Left->Previous->isOneOf(tok::kw_static_assert, tok::kw_while,
254 tok::l_paren, tok::comma) ||
255 Left->Previous->isIf() ||
256 Left->Previous->is(TT_BinaryOperator))) {
257 // static_assert, if and while usually contain expressions.
258 Contexts.back().IsExpression = true;
259 } else if (Style.isJavaScript() && Left->Previous &&
260 (Left->Previous->is(Keywords.kw_function) ||
261 (Left->Previous->endsSequence(tok::identifier,
262 Keywords.kw_function)))) {
263 // function(...) or function f(...)
264 Contexts.back().IsExpression = false;
265 } else if (Style.isJavaScript() && Left->Previous &&
266 Left->Previous->is(TT_JsTypeColon)) {
267 // let x: (SomeType);
268 Contexts.back().IsExpression = false;
269 } else if (isLambdaParameterList(Left)) {
270 // This is a parameter list of a lambda expression.
271 Contexts.back().IsExpression = false;
272 } else if (Line.InPPDirective &&
273 (!Left->Previous || !Left->Previous->is(tok::identifier))) {
274 Contexts.back().IsExpression = true;
275 } else if (Contexts[Contexts.size() - 2].CaretFound) {
276 // This is the parameter list of an ObjC block.
277 Contexts.back().IsExpression = false;
278 } else if (Left->Previous && Left->Previous->is(TT_ForEachMacro)) {
279 // The first argument to a foreach macro is a declaration.
280 Contexts.back().IsForEachMacro = true;
281 Contexts.back().IsExpression = false;
282 } else if (Left->Previous && Left->Previous->MatchingParen &&
283 Left->Previous->MatchingParen->is(TT_ObjCBlockLParen)) {
284 Contexts.back().IsExpression = false;
285 } else if (!Line.MustBeDeclaration && !Line.InPPDirective) {
286 bool IsForOrCatch =
287 Left->Previous && Left->Previous->isOneOf(tok::kw_for, tok::kw_catch);
288 Contexts.back().IsExpression = !IsForOrCatch;
289 }
290
291 // Infer the role of the l_paren based on the previous token if we haven't
292 // detected one one yet.
293 if (PrevNonComment && Left->is(TT_Unknown)) {
294 if (PrevNonComment->is(tok::kw___attribute)) {
295 Left->setType(TT_AttributeParen);
296 } else if (PrevNonComment->isOneOf(TT_TypenameMacro, tok::kw_decltype,
297 tok::kw_typeof, tok::kw__Atomic,
298 tok::kw___underlying_type)) {
299 Left->setType(TT_TypeDeclarationParen);
300 // decltype() and typeof() usually contain expressions.
301 if (PrevNonComment->isOneOf(tok::kw_decltype, tok::kw_typeof))
302 Contexts.back().IsExpression = true;
303 }
304 }
305
306 if (StartsObjCMethodExpr) {
307 Contexts.back().ColonIsObjCMethodExpr = true;
308 Left->setType(TT_ObjCMethodExpr);
309 }
310
311 // MightBeFunctionType and ProbablyFunctionType are used for
312 // function pointer and reference types as well as Objective-C
313 // block types:
314 //
315 // void (*FunctionPointer)(void);
316 // void (&FunctionReference)(void);
317 // void (&&FunctionReference)(void);
318 // void (^ObjCBlock)(void);
319 bool MightBeFunctionType = !Contexts[Contexts.size() - 2].IsExpression;
320 bool ProbablyFunctionType =
321 CurrentToken->isOneOf(tok::star, tok::amp, tok::ampamp, tok::caret);
322 bool HasMultipleLines = false;
323 bool HasMultipleParametersOnALine = false;
324 bool MightBeObjCForRangeLoop =
325 Left->Previous && Left->Previous->is(tok::kw_for);
326 FormatToken *PossibleObjCForInToken = nullptr;
327 while (CurrentToken) {
328 // LookForDecls is set when "if (" has been seen. Check for
329 // 'identifier' '*' 'identifier' followed by not '=' -- this
330 // '*' has to be a binary operator but determineStarAmpUsage() will
331 // categorize it as an unary operator, so set the right type here.
332 if (LookForDecls && CurrentToken->Next) {
333 FormatToken *Prev = CurrentToken->getPreviousNonComment();
334 if (Prev) {
335 FormatToken *PrevPrev = Prev->getPreviousNonComment();
336 FormatToken *Next = CurrentToken->Next;
337 if (PrevPrev && PrevPrev->is(tok::identifier) &&
338 Prev->isOneOf(tok::star, tok::amp, tok::ampamp) &&
339 CurrentToken->is(tok::identifier) && Next->isNot(tok::equal)) {
340 Prev->setType(TT_BinaryOperator);
341 LookForDecls = false;
342 }
343 }
344 }
345
346 if (CurrentToken->Previous->is(TT_PointerOrReference) &&
347 CurrentToken->Previous->Previous->isOneOf(tok::l_paren,
348 tok::coloncolon))
349 ProbablyFunctionType = true;
350 if (CurrentToken->is(tok::comma))
351 MightBeFunctionType = false;
352 if (CurrentToken->Previous->is(TT_BinaryOperator))
353 Contexts.back().IsExpression = true;
354 if (CurrentToken->is(tok::r_paren)) {
355 if (MightBeFunctionType && ProbablyFunctionType && CurrentToken->Next &&
356 (CurrentToken->Next->is(tok::l_paren) ||
357 (CurrentToken->Next->is(tok::l_square) && Line.MustBeDeclaration)))
358 Left->setType(Left->Next->is(tok::caret) ? TT_ObjCBlockLParen
359 : TT_FunctionTypeLParen);
360 Left->MatchingParen = CurrentToken;
361 CurrentToken->MatchingParen = Left;
362
363 if (CurrentToken->Next && CurrentToken->Next->is(tok::l_brace) &&
364 Left->Previous && Left->Previous->is(tok::l_paren)) {
365 // Detect the case where macros are used to generate lambdas or
366 // function bodies, e.g.:
367 // auto my_lambda = MACRO((Type *type, int i) { .. body .. });
368 for (FormatToken *Tok = Left; Tok != CurrentToken; Tok = Tok->Next) {
369 if (Tok->is(TT_BinaryOperator) &&
370 Tok->isOneOf(tok::star, tok::amp, tok::ampamp))
371 Tok->setType(TT_PointerOrReference);
372 }
373 }
374
375 if (StartsObjCMethodExpr) {
376 CurrentToken->setType(TT_ObjCMethodExpr);
377 if (Contexts.back().FirstObjCSelectorName) {
378 Contexts.back().FirstObjCSelectorName->LongestObjCSelectorName =
379 Contexts.back().LongestObjCSelectorName;
380 }
381 }
382
383 if (Left->is(TT_AttributeParen))
384 CurrentToken->setType(TT_AttributeParen);
385 if (Left->is(TT_TypeDeclarationParen))
386 CurrentToken->setType(TT_TypeDeclarationParen);
387 if (Left->Previous && Left->Previous->is(TT_JavaAnnotation))
388 CurrentToken->setType(TT_JavaAnnotation);
389 if (Left->Previous && Left->Previous->is(TT_LeadingJavaAnnotation))
390 CurrentToken->setType(TT_LeadingJavaAnnotation);
391 if (Left->Previous && Left->Previous->is(TT_AttributeSquare))
392 CurrentToken->setType(TT_AttributeSquare);
393
394 if (!HasMultipleLines)
395 Left->setPackingKind(PPK_Inconclusive);
396 else if (HasMultipleParametersOnALine)
397 Left->setPackingKind(PPK_BinPacked);
398 else
399 Left->setPackingKind(PPK_OnePerLine);
400
401 next();
402 return true;
403 }
404 if (CurrentToken->isOneOf(tok::r_square, tok::r_brace))
405 return false;
406
407 if (CurrentToken->is(tok::l_brace))
408 Left->setType(TT_Unknown); // Not TT_ObjCBlockLParen
409 if (CurrentToken->is(tok::comma) && CurrentToken->Next &&
410 !CurrentToken->Next->HasUnescapedNewline &&
411 !CurrentToken->Next->isTrailingComment())
412 HasMultipleParametersOnALine = true;
413 bool ProbablyFunctionTypeLParen =
414 (CurrentToken->is(tok::l_paren) && CurrentToken->Next &&
415 CurrentToken->Next->isOneOf(tok::star, tok::amp, tok::caret));
416 if ((CurrentToken->Previous->isOneOf(tok::kw_const, tok::kw_auto) ||
417 CurrentToken->Previous->isSimpleTypeSpecifier()) &&
418 !(CurrentToken->is(tok::l_brace) ||
419 (CurrentToken->is(tok::l_paren) && !ProbablyFunctionTypeLParen)))
420 Contexts.back().IsExpression = false;
421 if (CurrentToken->isOneOf(tok::semi, tok::colon)) {
422 MightBeObjCForRangeLoop = false;
423 if (PossibleObjCForInToken) {
424 PossibleObjCForInToken->setType(TT_Unknown);
425 PossibleObjCForInToken = nullptr;
426 }
427 }
428 if (MightBeObjCForRangeLoop && CurrentToken->is(Keywords.kw_in)) {
429 PossibleObjCForInToken = CurrentToken;
430 PossibleObjCForInToken->setType(TT_ObjCForIn);
431 }
432 // When we discover a 'new', we set CanBeExpression to 'false' in order to
433 // parse the type correctly. Reset that after a comma.
434 if (CurrentToken->is(tok::comma))
435 Contexts.back().CanBeExpression = true;
436
437 FormatToken *Tok = CurrentToken;
438 if (!consumeToken())
439 return false;
440 updateParameterCount(Left, Tok);
441 if (CurrentToken && CurrentToken->HasUnescapedNewline)
442 HasMultipleLines = true;
443 }
444 return false;
445 }
446
447 bool isCSharpAttributeSpecifier(const FormatToken &Tok) {
448 if (!Style.isCSharp())
449 return false;
450
451 // `identifier[i]` is not an attribute.
452 if (Tok.Previous && Tok.Previous->is(tok::identifier))
453 return false;
454
455 // Chains of [] in `identifier[i][j][k]` are not attributes.
456 if (Tok.Previous && Tok.Previous->is(tok::r_square)) {
457 auto *MatchingParen = Tok.Previous->MatchingParen;
458 if (!MatchingParen || MatchingParen->is(TT_ArraySubscriptLSquare))
459 return false;
460 }
461
462 const FormatToken *AttrTok = Tok.Next;
463 if (!AttrTok)
464 return false;
465
466 // Just an empty declaration e.g. string [].
467 if (AttrTok->is(tok::r_square))
468 return false;
469
470 // Move along the tokens inbetween the '[' and ']' e.g. [STAThread].
471 while (AttrTok && AttrTok->isNot(tok::r_square)) {
472 AttrTok = AttrTok->Next;
473 }
474
475 if (!AttrTok)
476 return false;
477
478 // Allow an attribute to be the only content of a file.
479 AttrTok = AttrTok->Next;
480 if (!AttrTok)
481 return true;
482
483 // Limit this to being an access modifier that follows.
484 if (AttrTok->isOneOf(tok::kw_public, tok::kw_private, tok::kw_protected,
485 tok::comment, tok::kw_class, tok::kw_static,
486 tok::l_square, Keywords.kw_internal)) {
487 return true;
488 }
489
490 // incase its a [XXX] retval func(....
491 if (AttrTok->Next &&
492 AttrTok->Next->startsSequence(tok::identifier, tok::l_paren))
493 return true;
494
495 return false;
496 }
497
498 bool isCpp11AttributeSpecifier(const FormatToken &Tok) {
499 if (!Style.isCpp() || !Tok.startsSequence(tok::l_square, tok::l_square))
500 return false;
501 // The first square bracket is part of an ObjC array literal
502 if (Tok.Previous && Tok.Previous->is(tok::at)) {
503 return false;
504 }
505 const FormatToken *AttrTok = Tok.Next->Next;
506 if (!AttrTok)
507 return false;
508 // C++17 '[[using ns: foo, bar(baz, blech)]]'
509 // We assume nobody will name an ObjC variable 'using'.
510 if (AttrTok->startsSequence(tok::kw_using, tok::identifier, tok::colon))
511 return true;
512 if (AttrTok->isNot(tok::identifier))
513 return false;
514 while (AttrTok && !AttrTok->startsSequence(tok::r_square, tok::r_square)) {
515 // ObjC message send. We assume nobody will use : in a C++11 attribute
516 // specifier parameter, although this is technically valid:
517 // [[foo(:)]].
518 if (AttrTok->is(tok::colon) ||
519 AttrTok->startsSequence(tok::identifier, tok::identifier) ||
520 AttrTok->startsSequence(tok::r_paren, tok::identifier))
521 return false;
522 if (AttrTok->is(tok::ellipsis))
523 return true;
524 AttrTok = AttrTok->Next;
525 }
526 return AttrTok && AttrTok->startsSequence(tok::r_square, tok::r_square);
527 }
528
529 bool parseSquare() {
530 if (!CurrentToken)
531 return false;
532
533 // A '[' could be an index subscript (after an identifier or after
534 // ')' or ']'), it could be the start of an Objective-C method
535 // expression, it could the start of an Objective-C array literal,
536 // or it could be a C++ attribute specifier [[foo::bar]].
537 FormatToken *Left = CurrentToken->Previous;
538 Left->ParentBracket = Contexts.back().ContextKind;
539 FormatToken *Parent = Left->getPreviousNonComment();
540
541 // Cases where '>' is followed by '['.
542 // In C++, this can happen either in array of templates (foo<int>[10])
543 // or when array is a nested template type (unique_ptr<type1<type2>[]>).
544 bool CppArrayTemplates =
545 Style.isCpp() && Parent && Parent->is(TT_TemplateCloser) &&
546 (Contexts.back().CanBeExpression || Contexts.back().IsExpression ||
547 Contexts.back().InTemplateArgument);
548
549 bool IsCpp11AttributeSpecifier = isCpp11AttributeSpecifier(*Left) ||
550 Contexts.back().InCpp11AttributeSpecifier;
551
552 // Treat C# Attributes [STAThread] much like C++ attributes [[...]].
553 bool IsCSharpAttributeSpecifier =
554 isCSharpAttributeSpecifier(*Left) ||
555 Contexts.back().InCSharpAttributeSpecifier;
556
557 bool InsideInlineASM = Line.startsWith(tok::kw_asm);
558 bool IsCppStructuredBinding = Left->isCppStructuredBinding(Style);
559 bool StartsObjCMethodExpr =
560 !IsCppStructuredBinding && !InsideInlineASM && !CppArrayTemplates &&
561 Style.isCpp() && !IsCpp11AttributeSpecifier &&
562 !IsCSharpAttributeSpecifier && Contexts.back().CanBeExpression &&
563 Left->isNot(TT_LambdaLSquare) &&
564 !CurrentToken->isOneOf(tok::l_brace, tok::r_square) &&
565 (!Parent ||
566 Parent->isOneOf(tok::colon, tok::l_square, tok::l_paren,
567 tok::kw_return, tok::kw_throw) ||
568 Parent->isUnaryOperator() ||
569 // FIXME(bug 36976): ObjC return types shouldn't use TT_CastRParen.
570 Parent->isOneOf(TT_ObjCForIn, TT_CastRParen) ||
571 (getBinOpPrecedence(Parent->Tok.getKind(), true, true) >
572 prec::Unknown));
573 bool ColonFound = false;
574
575 unsigned BindingIncrease = 1;
576 if (IsCppStructuredBinding) {
577 Left->setType(TT_StructuredBindingLSquare);
578 } else if (Left->is(TT_Unknown)) {
579 if (StartsObjCMethodExpr) {
580 Left->setType(TT_ObjCMethodExpr);
581 } else if (InsideInlineASM) {
582 Left->setType(TT_InlineASMSymbolicNameLSquare);
583 } else if (IsCpp11AttributeSpecifier) {
584 Left->setType(TT_AttributeSquare);
585 } else if (Style.isJavaScript() && Parent &&
586 Contexts.back().ContextKind == tok::l_brace &&
587 Parent->isOneOf(tok::l_brace, tok::comma)) {
588 Left->setType(TT_JsComputedPropertyName);
589 } else if (Style.isCpp() && Contexts.back().ContextKind == tok::l_brace &&
590 Parent && Parent->isOneOf(tok::l_brace, tok::comma)) {
591 Left->setType(TT_DesignatedInitializerLSquare);
592 } else if (IsCSharpAttributeSpecifier) {
593 Left->setType(TT_AttributeSquare);
594 } else if (CurrentToken->is(tok::r_square) && Parent &&
595 Parent->is(TT_TemplateCloser)) {
596 Left->setType(TT_ArraySubscriptLSquare);
597 } else if (Style.Language == FormatStyle::LK_Proto ||
598 Style.Language == FormatStyle::LK_TextProto) {
599 // Square braces in LK_Proto can either be message field attributes:
600 //
601 // optional Aaa aaa = 1 [
602 // (aaa) = aaa
603 // ];
604 //
605 // extensions 123 [
606 // (aaa) = aaa
607 // ];
608 //
609 // or text proto extensions (in options):
610 //
611 // option (Aaa.options) = {
612 // [type.type/type] {
613 // key: value
614 // }
615 // }
616 //
617 // or repeated fields (in options):
618 //
619 // option (Aaa.options) = {
620 // keys: [ 1, 2, 3 ]
621 // }
622 //
623 // In the first and the third case we want to spread the contents inside
624 // the square braces; in the second we want to keep them inline.
625 Left->setType(TT_ArrayInitializerLSquare);
626 if (!Left->endsSequence(tok::l_square, tok::numeric_constant,
627 tok::equal) &&
628 !Left->endsSequence(tok::l_square, tok::numeric_constant,
629 tok::identifier) &&
630 !Left->endsSequence(tok::l_square, tok::colon, TT_SelectorName)) {
631 Left->setType(TT_ProtoExtensionLSquare);
632 BindingIncrease = 10;
633 }
634 } else if (!CppArrayTemplates && Parent &&
635 Parent->isOneOf(TT_BinaryOperator, TT_TemplateCloser, tok::at,
636 tok::comma, tok::l_paren, tok::l_square,
637 tok::question, tok::colon, tok::kw_return,
638 // Should only be relevant to JavaScript:
639 tok::kw_default)) {
640 Left->setType(TT_ArrayInitializerLSquare);
641 } else {
642 BindingIncrease = 10;
643 Left->setType(TT_ArraySubscriptLSquare);
644 }
645 }
646
647 ScopedContextCreator ContextCreator(*this, tok::l_square, BindingIncrease);
648 Contexts.back().IsExpression = true;
649 if (Style.isJavaScript() && Parent && Parent->is(TT_JsTypeColon))
650 Contexts.back().IsExpression = false;
651
652 Contexts.back().ColonIsObjCMethodExpr = StartsObjCMethodExpr;
653 Contexts.back().InCpp11AttributeSpecifier = IsCpp11AttributeSpecifier;
654 Contexts.back().InCSharpAttributeSpecifier = IsCSharpAttributeSpecifier;
655
656 while (CurrentToken) {
657 if (CurrentToken->is(tok::r_square)) {
658 if (IsCpp11AttributeSpecifier)
659 CurrentToken->setType(TT_AttributeSquare);
660 if (IsCSharpAttributeSpecifier)
661 CurrentToken->setType(TT_AttributeSquare);
662 else if (((CurrentToken->Next &&
663 CurrentToken->Next->is(tok::l_paren)) ||
664 (CurrentToken->Previous &&
665 CurrentToken->Previous->Previous == Left)) &&
666 Left->is(TT_ObjCMethodExpr)) {
667 // An ObjC method call is rarely followed by an open parenthesis. It
668 // also can't be composed of just one token, unless it's a macro that
669 // will be expanded to more tokens.
670 // FIXME: Do we incorrectly label ":" with this?
671 StartsObjCMethodExpr = false;
672 Left->setType(TT_Unknown);
673 }
674 if (StartsObjCMethodExpr && CurrentToken->Previous != Left) {
675 CurrentToken->setType(TT_ObjCMethodExpr);
676 // If we haven't seen a colon yet, make sure the last identifier
677 // before the r_square is tagged as a selector name component.
678 if (!ColonFound && CurrentToken->Previous &&
679 CurrentToken->Previous->is(TT_Unknown) &&
680 canBeObjCSelectorComponent(*CurrentToken->Previous))
681 CurrentToken->Previous->setType(TT_SelectorName);
682 // determineStarAmpUsage() thinks that '*' '[' is allocating an
683 // array of pointers, but if '[' starts a selector then '*' is a
684 // binary operator.
685 if (Parent && Parent->is(TT_PointerOrReference))
686 Parent->setType(TT_BinaryOperator);
687 }
688 // An arrow after an ObjC method expression is not a lambda arrow.
689 if (CurrentToken->getType() == TT_ObjCMethodExpr &&
690 CurrentToken->Next && CurrentToken->Next->is(TT_LambdaArrow))
691 CurrentToken->Next->setType(TT_Unknown);
692 Left->MatchingParen = CurrentToken;
693 CurrentToken->MatchingParen = Left;
694 // FirstObjCSelectorName is set when a colon is found. This does
695 // not work, however, when the method has no parameters.
696 // Here, we set FirstObjCSelectorName when the end of the method call is
697 // reached, in case it was not set already.
698 if (!Contexts.back().FirstObjCSelectorName) {
699 FormatToken *Previous = CurrentToken->getPreviousNonComment();
700 if (Previous && Previous->is(TT_SelectorName)) {
701 Previous->ObjCSelectorNameParts = 1;
702 Contexts.back().FirstObjCSelectorName = Previous;
703 }
704 } else {
705 Left->ParameterCount =
706 Contexts.back().FirstObjCSelectorName->ObjCSelectorNameParts;
707 }
708 if (Contexts.back().FirstObjCSelectorName) {
709 Contexts.back().FirstObjCSelectorName->LongestObjCSelectorName =
710 Contexts.back().LongestObjCSelectorName;
711 if (Left->BlockParameterCount > 1)
712 Contexts.back().FirstObjCSelectorName->LongestObjCSelectorName = 0;
713 }
714 next();
715 return true;
716 }
717 if (CurrentToken->isOneOf(tok::r_paren, tok::r_brace))
718 return false;
719 if (CurrentToken->is(tok::colon)) {
720 if (IsCpp11AttributeSpecifier &&
721 CurrentToken->endsSequence(tok::colon, tok::identifier,
722 tok::kw_using)) {
723 // Remember that this is a [[using ns: foo]] C++ attribute, so we
724 // don't add a space before the colon (unlike other colons).
725 CurrentToken->setType(TT_AttributeColon);
726 } else if (Left->isOneOf(TT_ArraySubscriptLSquare,
727 TT_DesignatedInitializerLSquare)) {
728 Left->setType(TT_ObjCMethodExpr);
729 StartsObjCMethodExpr = true;
730 Contexts.back().ColonIsObjCMethodExpr = true;
731 if (Parent && Parent->is(tok::r_paren))
732 // FIXME(bug 36976): ObjC return types shouldn't use TT_CastRParen.
733 Parent->setType(TT_CastRParen);
734 }
735 ColonFound = true;
736 }
737 if (CurrentToken->is(tok::comma) && Left->is(TT_ObjCMethodExpr) &&
738 !ColonFound)
739 Left->setType(TT_ArrayInitializerLSquare);
740 FormatToken *Tok = CurrentToken;
741 if (!consumeToken())
742 return false;
743 updateParameterCount(Left, Tok);
744 }
745 return false;
746 }
747
748 bool couldBeInStructArrayInitializer() const {
749 if (Contexts.size() < 2)
750 return false;
751 // We want to back up no more then 2 context levels i.e.
752 // . { { <-
753 const auto End = std::next(Contexts.rbegin(), 2);
754 auto Last = Contexts.rbegin();
755 unsigned Depth = 0;
756 for (; Last != End; ++Last) {
757 if (Last->ContextKind == tok::l_brace)
758 ++Depth;
759 }
760 return Depth == 2 && Last->ContextKind != tok::l_brace;
761 }
762
763 bool parseBrace() {
764 if (CurrentToken) {
765 FormatToken *Left = CurrentToken->Previous;
766 Left->ParentBracket = Contexts.back().ContextKind;
767
768 if (Contexts.back().CaretFound)
769 Left->setType(TT_ObjCBlockLBrace);
770 Contexts.back().CaretFound = false;
771
772 ScopedContextCreator ContextCreator(*this, tok::l_brace, 1);
773 Contexts.back().ColonIsDictLiteral = true;
774 if (Left->is(BK_BracedInit))
775 Contexts.back().IsExpression = true;
776 if (Style.isJavaScript() && Left->Previous &&
777 Left->Previous->is(TT_JsTypeColon))
778 Contexts.back().IsExpression = false;
779
780 unsigned CommaCount = 0;
781 while (CurrentToken) {
782 if (CurrentToken->is(tok::r_brace)) {
783 assert(Left->Optional == CurrentToken->Optional)(static_cast <bool> (Left->Optional == CurrentToken->
Optional) ? void (0) : __assert_fail ("Left->Optional == CurrentToken->Optional"
, "clang/lib/Format/TokenAnnotator.cpp", 783, __extension__ __PRETTY_FUNCTION__
))
;
784 Left->MatchingParen = CurrentToken;
785 CurrentToken->MatchingParen = Left;
786 if (Style.AlignArrayOfStructures != FormatStyle::AIAS_None) {
787 if (Left->ParentBracket == tok::l_brace &&
788 couldBeInStructArrayInitializer() && CommaCount > 0) {
789 Contexts.back().InStructArrayInitializer = true;
790 }
791 }
792 next();
793 return true;
794 }
795 if (CurrentToken->isOneOf(tok::r_paren, tok::r_square))
796 return false;
797 updateParameterCount(Left, CurrentToken);
798 if (CurrentToken->isOneOf(tok::colon, tok::l_brace, tok::less)) {
799 FormatToken *Previous = CurrentToken->getPreviousNonComment();
800 if (Previous->is(TT_JsTypeOptionalQuestion))
801 Previous = Previous->getPreviousNonComment();
802 if ((CurrentToken->is(tok::colon) &&
803 (!Contexts.back().ColonIsDictLiteral || !Style.isCpp())) ||
804 Style.Language == FormatStyle::LK_Proto ||
805 Style.Language == FormatStyle::LK_TextProto) {
806 Left->setType(TT_DictLiteral);
807 if (Previous->Tok.getIdentifierInfo() ||
808 Previous->is(tok::string_literal))
809 Previous->setType(TT_SelectorName);
810 }
811 if (CurrentToken->is(tok::colon) || Style.isJavaScript())
812 Left->setType(TT_DictLiteral);
813 }
814 if (CurrentToken->is(tok::comma)) {
815 if (Style.isJavaScript())
816 Left->setType(TT_DictLiteral);
817 ++CommaCount;
818 }
819 if (!consumeToken())
820 return false;
821 }
822 }
823 return true;
824 }
825
826 void updateParameterCount(FormatToken *Left, FormatToken *Current) {
827 // For ObjC methods, the number of parameters is calculated differently as
828 // method declarations have a different structure (the parameters are not
829 // inside a bracket scope).
830 if (Current->is(tok::l_brace) && Current->is(BK_Block))
831 ++Left->BlockParameterCount;
832 if (Current->is(tok::comma)) {
833 ++Left->ParameterCount;
834 if (!Left->Role)
835 Left->Role.reset(new CommaSeparatedList(Style));
836 Left->Role->CommaFound(Current);
837 } else if (Left->ParameterCount == 0 && Current->isNot(tok::comment)) {
838 Left->ParameterCount = 1;
839 }
840 }
841
842 bool parseConditional() {
843 while (CurrentToken) {
844 if (CurrentToken->is(tok::colon)) {
845 CurrentToken->setType(TT_ConditionalExpr);
846 next();
847 return true;
848 }
849 if (!consumeToken())
850 return false;
851 }
852 return false;
853 }
854
855 bool parseTemplateDeclaration() {
856 if (CurrentToken && CurrentToken->is(tok::less)) {
857 CurrentToken->setType(TT_TemplateOpener);
858 next();
859 if (!parseAngle())
860 return false;
861 if (CurrentToken)
862 CurrentToken->Previous->ClosesTemplateDeclaration = true;
863 return true;
864 }
865 return false;
866 }
867
868 bool consumeToken() {
869 FormatToken *Tok = CurrentToken;
870 next();
871 switch (Tok->Tok.getKind()) {
872 case tok::plus:
873 case tok::minus:
874 if (!Tok->Previous && Line.MustBeDeclaration)
875 Tok->setType(TT_ObjCMethodSpecifier);
876 break;
877 case tok::colon:
878 if (!Tok->Previous)
879 return false;
880 // Colons from ?: are handled in parseConditional().
881 if (Style.isJavaScript()) {
882 if (Contexts.back().ColonIsForRangeExpr || // colon in for loop
883 (Contexts.size() == 1 && // switch/case labels
884 !Line.First->isOneOf(tok::kw_enum, tok::kw_case)) ||
885 Contexts.back().ContextKind == tok::l_paren || // function params
886 Contexts.back().ContextKind == tok::l_square || // array type
887 (!Contexts.back().IsExpression &&
888 Contexts.back().ContextKind == tok::l_brace) || // object type
889 (Contexts.size() == 1 &&
890 Line.MustBeDeclaration)) { // method/property declaration
891 Contexts.back().IsExpression = false;
892 Tok->setType(TT_JsTypeColon);
893 break;
894 }
895 } else if (Style.isCSharp()) {
896 if (Contexts.back().InCSharpAttributeSpecifier) {
897 Tok->setType(TT_AttributeColon);
898 break;
899 }
900 if (Contexts.back().ContextKind == tok::l_paren) {
901 Tok->setType(TT_CSharpNamedArgumentColon);
902 break;
903 }
904 }
905 if (Line.First->isOneOf(Keywords.kw_module, Keywords.kw_import) ||
906 Line.First->startsSequence(tok::kw_export, Keywords.kw_module) ||
907 Line.First->startsSequence(tok::kw_export, Keywords.kw_import)) {
908 Tok->setType(TT_ModulePartitionColon);
909 } else if (Contexts.back().ColonIsDictLiteral ||
910 Style.Language == FormatStyle::LK_Proto ||
911 Style.Language == FormatStyle::LK_TextProto) {
912 Tok->setType(TT_DictLiteral);
913 if (Style.Language == FormatStyle::LK_TextProto) {
914 if (FormatToken *Previous = Tok->getPreviousNonComment())
915 Previous->setType(TT_SelectorName);
916 }
917 } else if (Contexts.back().ColonIsObjCMethodExpr ||
918 Line.startsWith(TT_ObjCMethodSpecifier)) {
919 Tok->setType(TT_ObjCMethodExpr);
920 const FormatToken *BeforePrevious = Tok->Previous->Previous;
921 // Ensure we tag all identifiers in method declarations as
922 // TT_SelectorName.
923 bool UnknownIdentifierInMethodDeclaration =
924 Line.startsWith(TT_ObjCMethodSpecifier) &&
925 Tok->Previous->is(tok::identifier) && Tok->Previous->is(TT_Unknown);
926 if (!BeforePrevious ||
927 // FIXME(bug 36976): ObjC return types shouldn't use TT_CastRParen.
928 !(BeforePrevious->is(TT_CastRParen) ||
929 (BeforePrevious->is(TT_ObjCMethodExpr) &&
930 BeforePrevious->is(tok::colon))) ||
931 BeforePrevious->is(tok::r_square) ||
932 Contexts.back().LongestObjCSelectorName == 0 ||
933 UnknownIdentifierInMethodDeclaration) {
934 Tok->Previous->setType(TT_SelectorName);
935 if (!Contexts.back().FirstObjCSelectorName)
936 Contexts.back().FirstObjCSelectorName = Tok->Previous;
937 else if (Tok->Previous->ColumnWidth >
938 Contexts.back().LongestObjCSelectorName)
939 Contexts.back().LongestObjCSelectorName =
940 Tok->Previous->ColumnWidth;
941 Tok->Previous->ParameterIndex =
942 Contexts.back().FirstObjCSelectorName->ObjCSelectorNameParts;
943 ++Contexts.back().FirstObjCSelectorName->ObjCSelectorNameParts;
944 }
945 } else if (Contexts.back().ColonIsForRangeExpr) {
946 Tok->setType(TT_RangeBasedForLoopColon);
947 } else if (CurrentToken && CurrentToken->is(tok::numeric_constant)) {
948 Tok->setType(TT_BitFieldColon);
949 } else if (Contexts.size() == 1 &&
950 !Line.First->isOneOf(tok::kw_enum, tok::kw_case,
951 tok::kw_default)) {
952 FormatToken *Prev = Tok->getPreviousNonComment();
953 if (!Prev)
954 break;
955 if (Prev->isOneOf(tok::r_paren, tok::kw_noexcept))
956 Tok->setType(TT_CtorInitializerColon);
957 else if (Prev->is(tok::kw_try)) {
958 // Member initializer list within function try block.
959 FormatToken *PrevPrev = Prev->getPreviousNonComment();
960 if (!PrevPrev)
961 break;
962 if (PrevPrev && PrevPrev->isOneOf(tok::r_paren, tok::kw_noexcept))
963 Tok->setType(TT_CtorInitializerColon);
964 } else
965 Tok->setType(TT_InheritanceColon);
966 } else if (canBeObjCSelectorComponent(*Tok->Previous) && Tok->Next &&
967 (Tok->Next->isOneOf(tok::r_paren, tok::comma) ||
968 (canBeObjCSelectorComponent(*Tok->Next) && Tok->Next->Next &&
969 Tok->Next->Next->is(tok::colon)))) {
970 // This handles a special macro in ObjC code where selectors including
971 // the colon are passed as macro arguments.
972 Tok->setType(TT_ObjCMethodExpr);
973 } else if (Contexts.back().ContextKind == tok::l_paren) {
974 Tok->setType(TT_InlineASMColon);
975 }
976 break;
977 case tok::pipe:
978 case tok::amp:
979 // | and & in declarations/type expressions represent union and
980 // intersection types, respectively.
981 if (Style.isJavaScript() && !Contexts.back().IsExpression)
982 Tok->setType(TT_JsTypeOperator);
983 break;
984 case tok::kw_if:
985 case tok::kw_while:
986 if (Tok->is(tok::kw_if) && CurrentToken &&
987 CurrentToken->isOneOf(tok::kw_constexpr, tok::identifier))
988 next();
989 if (CurrentToken && CurrentToken->is(tok::l_paren)) {
990 next();
991 if (!parseParens(/*LookForDecls=*/true))
992 return false;
993 }
994 break;
995 case tok::kw_for:
996 if (Style.isJavaScript()) {
997 // x.for and {for: ...}
998 if ((Tok->Previous && Tok->Previous->is(tok::period)) ||
999 (Tok->Next && Tok->Next->is(tok::colon)))
1000 break;
1001 // JS' for await ( ...
1002 if (CurrentToken && CurrentToken->is(Keywords.kw_await))
1003 next();
1004 }
1005 if (Style.isCpp() && CurrentToken && CurrentToken->is(tok::kw_co_await))
1006 next();
1007 Contexts.back().ColonIsForRangeExpr = true;
1008 next();
1009 if (!parseParens())
1010 return false;
1011 break;
1012 case tok::l_paren:
1013 // When faced with 'operator()()', the kw_operator handler incorrectly
1014 // marks the first l_paren as a OverloadedOperatorLParen. Here, we make
1015 // the first two parens OverloadedOperators and the second l_paren an
1016 // OverloadedOperatorLParen.
1017 if (Tok->Previous && Tok->Previous->is(tok::r_paren) &&
1018 Tok->Previous->MatchingParen &&
1019 Tok->Previous->MatchingParen->is(TT_OverloadedOperatorLParen)) {
1020 Tok->Previous->setType(TT_OverloadedOperator);
1021 Tok->Previous->MatchingParen->setType(TT_OverloadedOperator);
1022 Tok->setType(TT_OverloadedOperatorLParen);
1023 }
1024
1025 if (!parseParens())
1026 return false;
1027 if (Line.MustBeDeclaration && Contexts.size() == 1 &&
1028 !Contexts.back().IsExpression && !Line.startsWith(TT_ObjCProperty) &&
1029 !Tok->is(TT_TypeDeclarationParen) &&
1030 (!Tok->Previous || !Tok->Previous->isOneOf(tok::kw___attribute,
1031 TT_LeadingJavaAnnotation)))
1032 Line.MightBeFunctionDecl = true;
1033 break;
1034 case tok::l_square:
1035 if (!parseSquare())
1036 return false;
1037 break;
1038 case tok::l_brace:
1039 if (Style.Language == FormatStyle::LK_TextProto) {
1040 FormatToken *Previous = Tok->getPreviousNonComment();
1041 if (Previous && Previous->getType() != TT_DictLiteral)
1042 Previous->setType(TT_SelectorName);
1043 }
1044 if (!parseBrace())
1045 return false;
1046 break;
1047 case tok::less:
1048 if (parseAngle()) {
1049 Tok->setType(TT_TemplateOpener);
1050 // In TT_Proto, we must distignuish between:
1051 // map<key, value>
1052 // msg < item: data >
1053 // msg: < item: data >
1054 // In TT_TextProto, map<key, value> does not occur.
1055 if (Style.Language == FormatStyle::LK_TextProto ||
1056 (Style.Language == FormatStyle::LK_Proto && Tok->Previous &&
1057 Tok->Previous->isOneOf(TT_SelectorName, TT_DictLiteral))) {
1058 Tok->setType(TT_DictLiteral);
1059 FormatToken *Previous = Tok->getPreviousNonComment();
1060 if (Previous && Previous->getType() != TT_DictLiteral)
1061 Previous->setType(TT_SelectorName);
1062 }
1063 } else {
1064 Tok->setType(TT_BinaryOperator);
1065 NonTemplateLess.insert(Tok);
1066 CurrentToken = Tok;
1067 next();
1068 }
1069 break;
1070 case tok::r_paren:
1071 case tok::r_square:
1072 return false;
1073 case tok::r_brace:
1074 // Lines can start with '}'.
1075 if (Tok->Previous)
1076 return false;
1077 break;
1078 case tok::greater:
1079 if (Style.Language != FormatStyle::LK_TextProto)
1080 Tok->setType(TT_BinaryOperator);
1081 if (Tok->Previous && Tok->Previous->is(TT_TemplateCloser))
1082 Tok->SpacesRequiredBefore = 1;
1083 break;
1084 case tok::kw_operator:
1085 if (Style.Language == FormatStyle::LK_TextProto ||
1086 Style.Language == FormatStyle::LK_Proto)
1087 break;
1088 while (CurrentToken &&
1089 !CurrentToken->isOneOf(tok::l_paren, tok::semi, tok::r_paren)) {
1090 if (CurrentToken->isOneOf(tok::star, tok::amp))
1091 CurrentToken->setType(TT_PointerOrReference);
1092 consumeToken();
1093 if (CurrentToken && CurrentToken->is(tok::comma) &&
1094 CurrentToken->Previous->isNot(tok::kw_operator))
1095 break;
1096 if (CurrentToken && CurrentToken->Previous->isOneOf(
1097 TT_BinaryOperator, TT_UnaryOperator, tok::comma,
1098 tok::star, tok::arrow, tok::amp, tok::ampamp))
1099 CurrentToken->Previous->setType(TT_OverloadedOperator);
1100 }
1101 if (CurrentToken && CurrentToken->is(tok::l_paren))
1102 CurrentToken->setType(TT_OverloadedOperatorLParen);
1103 if (CurrentToken && CurrentToken->Previous->is(TT_BinaryOperator))
1104 CurrentToken->Previous->setType(TT_OverloadedOperator);
1105 break;
1106 case tok::question:
1107 if (Style.isJavaScript() && Tok->Next &&
1108 Tok->Next->isOneOf(tok::semi, tok::comma, tok::colon, tok::r_paren,
1109 tok::r_brace)) {
1110 // Question marks before semicolons, colons, etc. indicate optional
1111 // types (fields, parameters), e.g.
1112 // function(x?: string, y?) {...}
1113 // class X { y?; }
1114 Tok->setType(TT_JsTypeOptionalQuestion);
1115 break;
1116 }
1117 // Declarations cannot be conditional expressions, this can only be part
1118 // of a type declaration.
1119 if (Line.MustBeDeclaration && !Contexts.back().IsExpression &&
1120 Style.isJavaScript())
1121 break;
1122 if (Style.isCSharp()) {
1123 // `Type?)`, `Type?>`, `Type? name;` and `Type? name =` can only be
1124 // nullable types.
1125 // Line.MustBeDeclaration will be true for `Type? name;`.
1126 if ((!Contexts.back().IsExpression && Line.MustBeDeclaration) ||
1127 (Tok->Next && Tok->Next->isOneOf(tok::r_paren, tok::greater)) ||
1128 (Tok->Next && Tok->Next->is(tok::identifier) && Tok->Next->Next &&
1129 Tok->Next->Next->is(tok::equal))) {
1130 Tok->setType(TT_CSharpNullable);
1131 break;
1132 }
1133 }
1134 parseConditional();
1135 break;
1136 case tok::kw_template:
1137 parseTemplateDeclaration();
1138 break;
1139 case tok::comma:
1140 if (Contexts.back().InCtorInitializer)
1141 Tok->setType(TT_CtorInitializerComma);
1142 else if (Contexts.back().InInheritanceList)
1143 Tok->setType(TT_InheritanceComma);
1144 else if (Contexts.back().FirstStartOfName &&
1145 (Contexts.size() == 1 || Line.startsWith(tok::kw_for))) {
1146 Contexts.back().FirstStartOfName->PartOfMultiVariableDeclStmt = true;
1147 Line.IsMultiVariableDeclStmt = true;
1148 }
1149 if (Contexts.back().IsForEachMacro)
1150 Contexts.back().IsExpression = true;
1151 break;
1152 case tok::identifier:
1153 if (Tok->isOneOf(Keywords.kw___has_include,
1154 Keywords.kw___has_include_next)) {
1155 parseHasInclude();
1156 }
1157 if (Style.isCSharp() && Tok->is(Keywords.kw_where) && Tok->Next &&
1158 Tok->Next->isNot(tok::l_paren)) {
1159 Tok->setType(TT_CSharpGenericTypeConstraint);
1160 parseCSharpGenericTypeConstraint();
1161 }
1162 break;
1163 default:
1164 break;
1165 }
1166 return true;
1167 }
1168
1169 void parseCSharpGenericTypeConstraint() {
1170 int OpenAngleBracketsCount = 0;
1171 while (CurrentToken) {
1172 if (CurrentToken->is(tok::less)) {
1173 // parseAngle is too greedy and will consume the whole line.
1174 CurrentToken->setType(TT_TemplateOpener);
1175 ++OpenAngleBracketsCount;
1176 next();
1177 } else if (CurrentToken->is(tok::greater)) {
1178 CurrentToken->setType(TT_TemplateCloser);
1179 --OpenAngleBracketsCount;
1180 next();
1181 } else if (CurrentToken->is(tok::comma) && OpenAngleBracketsCount == 0) {
1182 // We allow line breaks after GenericTypeConstraintComma's
1183 // so do not flag commas in Generics as GenericTypeConstraintComma's.
1184 CurrentToken->setType(TT_CSharpGenericTypeConstraintComma);
1185 next();
1186 } else if (CurrentToken->is(Keywords.kw_where)) {
1187 CurrentToken->setType(TT_CSharpGenericTypeConstraint);
1188 next();
1189 } else if (CurrentToken->is(tok::colon)) {
1190 CurrentToken->setType(TT_CSharpGenericTypeConstraintColon);
1191 next();
1192 } else {
1193 next();
1194 }
1195 }
1196 }
1197
1198 void parseIncludeDirective() {
1199 if (CurrentToken && CurrentToken->is(tok::less)) {
1200 next();
1201 while (CurrentToken) {
1202 // Mark tokens up to the trailing line comments as implicit string
1203 // literals.
1204 if (CurrentToken->isNot(tok::comment) &&
1205 !CurrentToken->TokenText.startswith("//"))
1206 CurrentToken->setType(TT_ImplicitStringLiteral);
1207 next();
1208 }
1209 }
1210 }
1211
1212 void parseWarningOrError() {
1213 next();
1214 // We still want to format the whitespace left of the first token of the
1215 // warning or error.
1216 next();
1217 while (CurrentToken) {
1218 CurrentToken->setType(TT_ImplicitStringLiteral);
1219 next();
1220 }
1221 }
1222
1223 void parsePragma() {
1224 next(); // Consume "pragma".
1225 if (CurrentToken &&
1226 CurrentToken->isOneOf(Keywords.kw_mark, Keywords.kw_option)) {
1227 bool IsMark = CurrentToken->is(Keywords.kw_mark);
1228 next(); // Consume "mark".
1229 next(); // Consume first token (so we fix leading whitespace).
1230 while (CurrentToken) {
1231 if (IsMark || CurrentToken->Previous->is(TT_BinaryOperator))
1232 CurrentToken->setType(TT_ImplicitStringLiteral);
1233 next();
1234 }
1235 }
1236 }
1237
1238 void parseHasInclude() {
1239 if (!CurrentToken || !CurrentToken->is(tok::l_paren))
1240 return;
1241 next(); // '('
1242 parseIncludeDirective();
1243 next(); // ')'
1244 }
1245
1246 LineType parsePreprocessorDirective() {
1247 bool IsFirstToken = CurrentToken->IsFirst;
1248 LineType Type = LT_PreprocessorDirective;
1249 next();
1250 if (!CurrentToken)
1251 return Type;
1252
1253 if (Style.isJavaScript() && IsFirstToken) {
1254 // JavaScript files can contain shebang lines of the form:
1255 // #!/usr/bin/env node
1256 // Treat these like C++ #include directives.
1257 while (CurrentToken) {
1258 // Tokens cannot be comments here.
1259 CurrentToken->setType(TT_ImplicitStringLiteral);
1260 next();
1261 }
1262 return LT_ImportStatement;
1263 }
1264
1265 if (CurrentToken->Tok.is(tok::numeric_constant)) {
1266 CurrentToken->SpacesRequiredBefore = 1;
1267 return Type;
1268 }
1269 // Hashes in the middle of a line can lead to any strange token
1270 // sequence.
1271 if (!CurrentToken->Tok.getIdentifierInfo())
1272 return Type;
1273 switch (CurrentToken->Tok.getIdentifierInfo()->getPPKeywordID()) {
1274 case tok::pp_include:
1275 case tok::pp_include_next:
1276 case tok::pp_import:
1277 next();
1278 parseIncludeDirective();
1279 Type = LT_ImportStatement;
1280 break;
1281 case tok::pp_error:
1282 case tok::pp_warning:
1283 parseWarningOrError();
1284 break;
1285 case tok::pp_pragma:
1286 parsePragma();
1287 break;
1288 case tok::pp_if:
1289 case tok::pp_elif:
1290 Contexts.back().IsExpression = true;
1291 next();
1292 parseLine();
1293 break;
1294 default:
1295 break;
1296 }
1297 while (CurrentToken) {
1298 FormatToken *Tok = CurrentToken;
1299 next();
1300 if (Tok->is(tok::l_paren))
1301 parseParens();
1302 else if (Tok->isOneOf(Keywords.kw___has_include,
1303 Keywords.kw___has_include_next))
1304 parseHasInclude();
1305 }
1306 return Type;
1307 }
1308
1309public:
1310 LineType parseLine() {
1311 if (!CurrentToken)
1312 return LT_Invalid;
1313 NonTemplateLess.clear();
1314 if (CurrentToken->is(tok::hash))
1315 return parsePreprocessorDirective();
1316
1317 // Directly allow to 'import <string-literal>' to support protocol buffer
1318 // definitions (github.com/google/protobuf) or missing "#" (either way we
1319 // should not break the line).
1320 IdentifierInfo *Info = CurrentToken->Tok.getIdentifierInfo();
1321 if ((Style.Language == FormatStyle::LK_Java &&
1322 CurrentToken->is(Keywords.kw_package)) ||
1323 (Info && Info->getPPKeywordID() == tok::pp_import &&
1324 CurrentToken->Next &&
1325 CurrentToken->Next->isOneOf(tok::string_literal, tok::identifier,
1326 tok::kw_static))) {
1327 next();
1328 parseIncludeDirective();
1329 return LT_ImportStatement;
1330 }
1331
1332 // If this line starts and ends in '<' and '>', respectively, it is likely
1333 // part of "#define <a/b.h>".
1334 if (CurrentToken->is(tok::less) && Line.Last->is(tok::greater)) {
1335 parseIncludeDirective();
1336 return LT_ImportStatement;
1337 }
1338
1339 // In .proto files, top-level options and package statements are very
1340 // similar to import statements and should not be line-wrapped.
1341 if (Style.Language == FormatStyle::LK_Proto && Line.Level == 0 &&
1342 CurrentToken->isOneOf(Keywords.kw_option, Keywords.kw_package)) {
1343 next();
1344 if (CurrentToken && CurrentToken->is(tok::identifier)) {
1345 while (CurrentToken)
1346 next();
1347 return LT_ImportStatement;
1348 }
1349 }
1350
1351 bool KeywordVirtualFound = false;
1352 bool ImportStatement = false;
1353
1354 // import {...} from '...';
1355 if (Style.isJavaScript() && CurrentToken->is(Keywords.kw_import))
1356 ImportStatement = true;
1357
1358 while (CurrentToken) {
1359 if (CurrentToken->is(tok::kw_virtual))
1360 KeywordVirtualFound = true;
1361 if (Style.isJavaScript()) {
1362 // export {...} from '...';
1363 // An export followed by "from 'some string';" is a re-export from
1364 // another module identified by a URI and is treated as a
1365 // LT_ImportStatement (i.e. prevent wraps on it for long URIs).
1366 // Just "export {...};" or "export class ..." should not be treated as
1367 // an import in this sense.
1368 if (Line.First->is(tok::kw_export) &&
1369 CurrentToken->is(Keywords.kw_from) && CurrentToken->Next &&
1370 CurrentToken->Next->isStringLiteral())
1371 ImportStatement = true;
1372 if (isClosureImportStatement(*CurrentToken))
1373 ImportStatement = true;
1374 }
1375 if (!consumeToken())
1376 return LT_Invalid;
1377 }
1378 if (KeywordVirtualFound)
1379 return LT_VirtualFunctionDecl;
1380 if (ImportStatement)
1381 return LT_ImportStatement;
1382
1383 if (Line.startsWith(TT_ObjCMethodSpecifier)) {
1384 if (Contexts.back().FirstObjCSelectorName)
1385 Contexts.back().FirstObjCSelectorName->LongestObjCSelectorName =
1386 Contexts.back().LongestObjCSelectorName;
1387 return LT_ObjCMethodDecl;
1388 }
1389
1390 for (const auto &ctx : Contexts) {
1391 if (ctx.InStructArrayInitializer) {
1392 return LT_ArrayOfStructInitializer;
1393 }
1394 }
1395
1396 return LT_Other;
1397 }
1398
1399private:
1400 bool isClosureImportStatement(const FormatToken &Tok) {
1401 // FIXME: Closure-library specific stuff should not be hard-coded but be
1402 // configurable.
1403 return Tok.TokenText == "goog" && Tok.Next && Tok.Next->is(tok::period) &&
1404 Tok.Next->Next &&
1405 (Tok.Next->Next->TokenText == "module" ||
1406 Tok.Next->Next->TokenText == "provide" ||
1407 Tok.Next->Next->TokenText == "require" ||
1408 Tok.Next->Next->TokenText == "requireType" ||
1409 Tok.Next->Next->TokenText == "forwardDeclare") &&
1410 Tok.Next->Next->Next && Tok.Next->Next->Next->is(tok::l_paren);
1411 }
1412
1413 void resetTokenMetadata() {
1414 if (!CurrentToken)
1415 return;
1416
1417 // Reset token type in case we have already looked at it and then
1418 // recovered from an error (e.g. failure to find the matching >).
1419 if (!CurrentToken->isOneOf(
1420 TT_LambdaLSquare, TT_LambdaLBrace, TT_AttributeMacro, TT_IfMacro,
1421 TT_ForEachMacro, TT_TypenameMacro, TT_FunctionLBrace,
1422 TT_ImplicitStringLiteral, TT_InlineASMBrace, TT_FatArrow,
1423 TT_LambdaArrow, TT_NamespaceMacro, TT_OverloadedOperator,
1424 TT_RegexLiteral, TT_TemplateString, TT_ObjCStringLiteral,
1425 TT_UntouchableMacroFunc, TT_ConstraintJunctions,
1426 TT_StatementAttributeLikeMacro))
1427 CurrentToken->setType(TT_Unknown);
1428 CurrentToken->Role.reset();
1429 CurrentToken->MatchingParen = nullptr;
1430 CurrentToken->FakeLParens.clear();
1431 CurrentToken->FakeRParens = 0;
1432 }
1433
1434 void next() {
1435 if (!CurrentToken)
1436 return;
1437
1438 CurrentToken->NestingLevel = Contexts.size() - 1;
1439 CurrentToken->BindingStrength = Contexts.back().BindingStrength;
1440 modifyContext(*CurrentToken);
1441 determineTokenType(*CurrentToken);
1442 CurrentToken = CurrentToken->Next;
1443
1444 resetTokenMetadata();
1445 }
1446
1447 /// A struct to hold information valid in a specific context, e.g.
1448 /// a pair of parenthesis.
1449 struct Context {
1450 Context(tok::TokenKind ContextKind, unsigned BindingStrength,
1451 bool IsExpression)
1452 : ContextKind(ContextKind), BindingStrength(BindingStrength),
1453 IsExpression(IsExpression) {}
1454
1455 tok::TokenKind ContextKind;
1456 unsigned BindingStrength;
1457 bool IsExpression;
1458 unsigned LongestObjCSelectorName = 0;
1459 bool ColonIsForRangeExpr = false;
1460 bool ColonIsDictLiteral = false;
1461 bool ColonIsObjCMethodExpr = false;
1462 FormatToken *FirstObjCSelectorName = nullptr;
1463 FormatToken *FirstStartOfName = nullptr;
1464 bool CanBeExpression = true;
1465 bool InTemplateArgument = false;
1466 bool InCtorInitializer = false;
1467 bool InInheritanceList = false;
1468 bool CaretFound = false;
1469 bool IsForEachMacro = false;
1470 bool InCpp11AttributeSpecifier = false;
1471 bool InCSharpAttributeSpecifier = false;
1472 bool InStructArrayInitializer = false;
1473 };
1474
1475 /// Puts a new \c Context onto the stack \c Contexts for the lifetime
1476 /// of each instance.
1477 struct ScopedContextCreator {
1478 AnnotatingParser &P;
1479
1480 ScopedContextCreator(AnnotatingParser &P, tok::TokenKind ContextKind,
1481 unsigned Increase)
1482 : P(P) {
1483 P.Contexts.push_back(Context(ContextKind,
1484 P.Contexts.back().BindingStrength + Increase,
1485 P.Contexts.back().IsExpression));
1486 }
1487
1488 ~ScopedContextCreator() {
1489 if (P.Style.AlignArrayOfStructures != FormatStyle::AIAS_None) {
1490 if (P.Contexts.back().InStructArrayInitializer) {
1491 P.Contexts.pop_back();
1492 P.Contexts.back().InStructArrayInitializer = true;
1493 return;
1494 }
1495 }
1496 P.Contexts.pop_back();
1497 }
1498 };
1499
1500 void modifyContext(const FormatToken &Current) {
1501 if (Current.getPrecedence() == prec::Assignment &&
1502 !Line.First->isOneOf(tok::kw_template, tok::kw_using, tok::kw_return) &&
1503 // Type aliases use `type X = ...;` in TypeScript and can be exported
1504 // using `export type ...`.
1505 !(Style.isJavaScript() &&
1506 (Line.startsWith(Keywords.kw_type, tok::identifier) ||
1507 Line.startsWith(tok::kw_export, Keywords.kw_type,
1508 tok::identifier))) &&
1509 (!Current.Previous || Current.Previous->isNot(tok::kw_operator))) {
1510 Contexts.back().IsExpression = true;
1511 if (!Line.startsWith(TT_UnaryOperator)) {
1512 for (FormatToken *Previous = Current.Previous;
1513 Previous && Previous->Previous &&
1514 !Previous->Previous->isOneOf(tok::comma, tok::semi);
1515 Previous = Previous->Previous) {
1516 if (Previous->isOneOf(tok::r_square, tok::r_paren)) {
1517 Previous = Previous->MatchingParen;
1518 if (!Previous)
1519 break;
1520 }
1521 if (Previous->opensScope())
1522 break;
1523 if (Previous->isOneOf(TT_BinaryOperator, TT_UnaryOperator) &&
1524 Previous->isOneOf(tok::star, tok::amp, tok::ampamp) &&
1525 Previous->Previous && Previous->Previous->isNot(tok::equal))
1526 Previous->setType(TT_PointerOrReference);
1527 }
1528 }
1529 } else if (Current.is(tok::lessless) &&
1530 (!Current.Previous || !Current.Previous->is(tok::kw_operator))) {
1531 Contexts.back().IsExpression = true;
1532 } else if (Current.isOneOf(tok::kw_return, tok::kw_throw)) {
1533 Contexts.back().IsExpression = true;
1534 } else if (Current.is(TT_TrailingReturnArrow)) {
1535 Contexts.back().IsExpression = false;
1536 } else if (Current.is(TT_LambdaArrow) || Current.is(Keywords.kw_assert)) {
1537 Contexts.back().IsExpression = Style.Language == FormatStyle::LK_Java;
1538 } else if (Current.Previous &&
1539 Current.Previous->is(TT_CtorInitializerColon)) {
1540 Contexts.back().IsExpression = true;
1541 Contexts.back().InCtorInitializer = true;
1542 } else if (Current.Previous && Current.Previous->is(TT_InheritanceColon)) {
1543 Contexts.back().InInheritanceList = true;
1544 } else if (Current.isOneOf(tok::r_paren, tok::greater, tok::comma)) {
1545 for (FormatToken *Previous = Current.Previous;
1546 Previous && Previous->isOneOf(tok::star, tok::amp);
1547 Previous = Previous->Previous)
1548 Previous->setType(TT_PointerOrReference);
1549 if (Line.MustBeDeclaration && !Contexts.front().InCtorInitializer)
1550 Contexts.back().IsExpression = false;
1551 } else if (Current.is(tok::kw_new)) {
1552 Contexts.back().CanBeExpression = false;
1553 } else if (Current.is(tok::semi) ||
1554 (Current.is(tok::exclaim) && Current.Previous &&
1555 !Current.Previous->is(tok::kw_operator))) {
1556 // This should be the condition or increment in a for-loop.
1557 // But not operator !() (can't use TT_OverloadedOperator here as its not
1558 // been annotated yet).
1559 Contexts.back().IsExpression = true;
1560 }
1561 }
1562
1563 static FormatToken *untilMatchingParen(FormatToken *Current) {
1564 // Used when `MatchingParen` is not yet established.
1565 int ParenLevel = 0;
1566 while (Current) {
1567 if (Current->is(tok::l_paren))
1568 ++ParenLevel;
1569 if (Current->is(tok::r_paren))
1570 --ParenLevel;
1571 if (ParenLevel < 1)
1572 break;
1573 Current = Current->Next;
1574 }
1575 return Current;
1576 }
1577
1578 static bool isDeductionGuide(FormatToken &Current) {
1579 // Look for a deduction guide template<T> A(...) -> A<...>;
1580 if (Current.Previous && Current.Previous->is(tok::r_paren) &&
1581 Current.startsSequence(tok::arrow, tok::identifier, tok::less)) {
1582 // Find the TemplateCloser.
1583 FormatToken *TemplateCloser = Current.Next->Next;
1584 int NestingLevel = 0;
1585 while (TemplateCloser) {
1586 // Skip over an expressions in parens A<(3 < 2)>;
1587 if (TemplateCloser->is(tok::l_paren)) {
1588 // No Matching Paren yet so skip to matching paren
1589 TemplateCloser = untilMatchingParen(TemplateCloser);
1590 if (!TemplateCloser)
1591 break;
1592 }
1593 if (TemplateCloser->is(tok::less))
1594 ++NestingLevel;
1595 if (TemplateCloser->is(tok::greater))
1596 --NestingLevel;
1597 if (NestingLevel < 1)
1598 break;
1599 TemplateCloser = TemplateCloser->Next;
1600 }
1601 // Assuming we have found the end of the template ensure its followed
1602 // with a semi-colon.
1603 if (TemplateCloser && TemplateCloser->Next &&
1604 TemplateCloser->Next->is(tok::semi) &&
1605 Current.Previous->MatchingParen) {
1606 // Determine if the identifier `A` prior to the A<..>; is the same as
1607 // prior to the A(..)
1608 FormatToken *LeadingIdentifier =
1609 Current.Previous->MatchingParen->Previous;
1610
1611 // Differentiate a deduction guide by seeing the
1612 // > of the template prior to the leading identifier.
1613 if (LeadingIdentifier) {
1614 FormatToken *PriorLeadingIdentifier = LeadingIdentifier->Previous;
1615 // Skip back past explicit decoration
1616 if (PriorLeadingIdentifier &&
1617 PriorLeadingIdentifier->is(tok::kw_explicit))
1618 PriorLeadingIdentifier = PriorLeadingIdentifier->Previous;
1619
1620 return (PriorLeadingIdentifier &&
1621 PriorLeadingIdentifier->is(TT_TemplateCloser) &&
1622 LeadingIdentifier->TokenText == Current.Next->TokenText);
1623 }
1624 }
1625 }
1626 return false;
1627 }
1628
1629 void determineTokenType(FormatToken &Current) {
1630 if (!Current.is(TT_Unknown))
1631 // The token type is already known.
1632 return;
1633
1634 if ((Style.isJavaScript() || Style.isCSharp()) &&
1635 Current.is(tok::exclaim)) {
1636 if (Current.Previous) {
1637 bool IsIdentifier =
1638 Style.isJavaScript()
1639 ? Keywords.IsJavaScriptIdentifier(
1640 *Current.Previous, /* AcceptIdentifierName= */ true)
1641 : Current.Previous->is(tok::identifier);
1642 if (IsIdentifier ||
1643 Current.Previous->isOneOf(
1644 tok::kw_namespace, tok::r_paren, tok::r_square, tok::r_brace,
1645 tok::kw_false, tok::kw_true, Keywords.kw_type, Keywords.kw_get,
1646 Keywords.kw_set) ||
1647 Current.Previous->Tok.isLiteral()) {
1648 Current.setType(TT_NonNullAssertion);
1649 return;
1650 }
1651 }
1652 if (Current.Next &&
1653 Current.Next->isOneOf(TT_BinaryOperator, Keywords.kw_as)) {
1654 Current.setType(TT_NonNullAssertion);
1655 return;
1656 }
1657 }
1658
1659 // Line.MightBeFunctionDecl can only be true after the parentheses of a
1660 // function declaration have been found. In this case, 'Current' is a
1661 // trailing token of this declaration and thus cannot be a name.
1662 if (Current.is(Keywords.kw_instanceof)) {
1663 Current.setType(TT_BinaryOperator);
1664 } else if (isStartOfName(Current) &&
1665 (!Line.MightBeFunctionDecl || Current.NestingLevel != 0)) {
1666 Contexts.back().FirstStartOfName = &Current;
1667 Current.setType(TT_StartOfName);
1668 } else if (Current.is(tok::semi)) {
1669 // Reset FirstStartOfName after finding a semicolon so that a for loop
1670 // with multiple increment statements is not confused with a for loop
1671 // having multiple variable declarations.
1672 Contexts.back().FirstStartOfName = nullptr;
1673 } else if (Current.isOneOf(tok::kw_auto, tok::kw___auto_type)) {
1674 AutoFound = true;
1675 } else if (Current.is(tok::arrow) &&
1676 Style.Language == FormatStyle::LK_Java) {
1677 Current.setType(TT_LambdaArrow);
1678 } else if (Current.is(tok::arrow) && AutoFound && Line.MustBeDeclaration &&
1679 Current.NestingLevel == 0 &&
1680 !Current.Previous->isOneOf(tok::kw_operator, tok::identifier)) {
1681 // not auto operator->() -> xxx;
1682 Current.setType(TT_TrailingReturnArrow);
1683 } else if (Current.is(tok::arrow) && Current.Previous &&
1684 Current.Previous->is(tok::r_brace)) {
1685 // Concept implicit conversion constraint needs to be treated like
1686 // a trailing return type ... } -> <type>.
1687 Current.setType(TT_TrailingReturnArrow);
1688 } else if (isDeductionGuide(Current)) {
1689 // Deduction guides trailing arrow " A(...) -> A<T>;".
1690 Current.setType(TT_TrailingReturnArrow);
1691 } else if (Current.isOneOf(tok::star, tok::amp, tok::ampamp)) {
1692 Current.setType(determineStarAmpUsage(
1693 Current,
1694 Contexts.back().CanBeExpression && Contexts.back().IsExpression,
1695 Contexts.back().InTemplateArgument));
1696 } else if (Current.isOneOf(tok::minus, tok::plus, tok::caret)) {
1697 Current.setType(determinePlusMinusCaretUsage(Current));
1698 if (Current.is(TT_UnaryOperator) && Current.is(tok::caret))
1699 Contexts.back().CaretFound = true;
1700 } else if (Current.isOneOf(tok::minusminus, tok::plusplus)) {
1701 Current.setType(determineIncrementUsage(Current));
1702 } else if (Current.isOneOf(tok::exclaim, tok::tilde)) {
1703 Current.setType(TT_UnaryOperator);
1704 } else if (Current.is(tok::question)) {
1705 if (Style.isJavaScript() && Line.MustBeDeclaration &&
1706 !Contexts.back().IsExpression) {
1707 // In JavaScript, `interface X { foo?(): bar; }` is an optional method
1708 // on the interface, not a ternary expression.
1709 Current.setType(TT_JsTypeOptionalQuestion);
1710 } else {
1711 Current.setType(TT_ConditionalExpr);
1712 }
1713 } else if (Current.isBinaryOperator() &&
1714 (!Current.Previous || Current.Previous->isNot(tok::l_square)) &&
1715 (!Current.is(tok::greater) &&
1716 Style.Language != FormatStyle::LK_TextProto)) {
1717 Current.setType(TT_BinaryOperator);
1718 } else if (Current.is(tok::comment)) {
1719 if (Current.TokenText.startswith("/*")) {
1720 if (Current.TokenText.endswith("*/"))
1721 Current.setType(TT_BlockComment);
1722 else
1723 // The lexer has for some reason determined a comment here. But we
1724 // cannot really handle it, if it isn't properly terminated.
1725 Current.Tok.setKind(tok::unknown);
1726 } else {
1727 Current.setType(TT_LineComment);
1728 }
1729 } else if (Current.is(tok::r_paren)) {
1730 if (rParenEndsCast(Current))
1731 Current.setType(TT_CastRParen);
1732 if (Current.MatchingParen && Current.Next &&
1733 !Current.Next->isBinaryOperator() &&
1734 !Current.Next->isOneOf(tok::semi, tok::colon, tok::l_brace,
1735 tok::comma, tok::period, tok::arrow,
1736 tok::coloncolon))
1737 if (FormatToken *AfterParen = Current.MatchingParen->Next) {
1738 // Make sure this isn't the return type of an Obj-C block declaration
1739 if (AfterParen->Tok.isNot(tok::caret)) {
1740 if (FormatToken *BeforeParen = Current.MatchingParen->Previous)
1741 if (BeforeParen->is(tok::identifier) &&
1742 !BeforeParen->is(TT_TypenameMacro) &&
1743 BeforeParen->TokenText == BeforeParen->TokenText.upper() &&
1744 (!BeforeParen->Previous ||
1745 BeforeParen->Previous->ClosesTemplateDeclaration))
1746 Current.setType(TT_FunctionAnnotationRParen);
1747 }
1748 }
1749 } else if (Current.is(tok::at) && Current.Next && !Style.isJavaScript() &&
1750 Style.Language != FormatStyle::LK_Java) {
1751 // In Java & JavaScript, "@..." is a decorator or annotation. In ObjC, it
1752 // marks declarations and properties that need special formatting.
1753 switch (Current.Next->Tok.getObjCKeywordID()) {
1754 case tok::objc_interface:
1755 case tok::objc_implementation:
1756 case tok::objc_protocol:
1757 Current.setType(TT_ObjCDecl);
1758 break;
1759 case tok::objc_property:
1760 Current.setType(TT_ObjCProperty);
1761 break;
1762 default:
1763 break;
1764 }
1765 } else if (Current.is(tok::period)) {
1766 FormatToken *PreviousNoComment = Current.getPreviousNonComment();
1767 if (PreviousNoComment &&
1768 PreviousNoComment->isOneOf(tok::comma, tok::l_brace))
1769 Current.setType(TT_DesignatedInitializerPeriod);
1770 else if (Style.Language == FormatStyle::LK_Java && Current.Previous &&
1771 Current.Previous->isOneOf(TT_JavaAnnotation,
1772 TT_LeadingJavaAnnotation)) {
1773 Current.setType(Current.Previous->getType());
1774 }
1775 } else if (canBeObjCSelectorComponent(Current) &&
1776 // FIXME(bug 36976): ObjC return types shouldn't use
1777 // TT_CastRParen.
1778 Current.Previous && Current.Previous->is(TT_CastRParen) &&
1779 Current.Previous->MatchingParen &&
1780 Current.Previous->MatchingParen->Previous &&
1781 Current.Previous->MatchingParen->Previous->is(
1782 TT_ObjCMethodSpecifier)) {
1783 // This is the first part of an Objective-C selector name. (If there's no
1784 // colon after this, this is the only place which annotates the identifier
1785 // as a selector.)
1786 Current.setType(TT_SelectorName);
1787 } else if (Current.isOneOf(tok::identifier, tok::kw_const, tok::kw_noexcept,
1788 tok::kw_requires) &&
1789 Current.Previous &&
1790 !Current.Previous->isOneOf(tok::equal, tok::at) &&
1791 Line.MightBeFunctionDecl && Contexts.size() == 1) {
1792 // Line.MightBeFunctionDecl can only be true after the parentheses of a
1793 // function declaration have been found.
1794 Current.setType(TT_TrailingAnnotation);
1795 } else if ((Style.Language == FormatStyle::LK_Java ||
1796 Style.isJavaScript()) &&
1797 Current.Previous) {
1798 if (Current.Previous->is(tok::at) &&
1799 Current.isNot(Keywords.kw_interface)) {
1800 const FormatToken &AtToken = *Current.Previous;
1801 const FormatToken *Previous = AtToken.getPreviousNonComment();
1802 if (!Previous || Previous->is(TT_LeadingJavaAnnotation))
1803 Current.setType(TT_LeadingJavaAnnotation);
1804 else
1805 Current.setType(TT_JavaAnnotation);
1806 } else if (Current.Previous->is(tok::period) &&
1807 Current.Previous->isOneOf(TT_JavaAnnotation,
1808 TT_LeadingJavaAnnotation)) {
1809 Current.setType(Current.Previous->getType());
1810 }
1811 }
1812 }
1813
1814 /// Take a guess at whether \p Tok starts a name of a function or
1815 /// variable declaration.
1816 ///
1817 /// This is a heuristic based on whether \p Tok is an identifier following
1818 /// something that is likely a type.
1819 bool isStartOfName(const FormatToken &Tok) {
1820 if (Tok.isNot(tok::identifier) || !Tok.Previous)
1821 return false;
1822
1823 if (Tok.Previous->isOneOf(TT_LeadingJavaAnnotation, Keywords.kw_instanceof,
1824 Keywords.kw_as))
1825 return false;
1826 if (Style.isJavaScript() && Tok.Previous->is(Keywords.kw_in))
1827 return false;
1828
1829 // Skip "const" as it does not have an influence on whether this is a name.
1830 FormatToken *PreviousNotConst = Tok.getPreviousNonComment();
1831
1832 // For javascript const can be like "let" or "var"
1833 if (!Style.isJavaScript())
1834 while (PreviousNotConst && PreviousNotConst->is(tok::kw_const))
1835 PreviousNotConst = PreviousNotConst->getPreviousNonComment();
1836
1837 if (!PreviousNotConst)
1838 return false;
1839
1840 bool IsPPKeyword = PreviousNotConst->is(tok::identifier) &&
1841 PreviousNotConst->Previous &&
1842 PreviousNotConst->Previous->is(tok::hash);
1843
1844 if (PreviousNotConst->is(TT_TemplateCloser))
1845 return PreviousNotConst && PreviousNotConst->MatchingParen &&
1846 PreviousNotConst->MatchingParen->Previous &&
1847 PreviousNotConst->MatchingParen->Previous->isNot(tok::period) &&
1848 PreviousNotConst->MatchingParen->Previous->isNot(tok::kw_template);
1849
1850 if (PreviousNotConst->is(tok::r_paren) &&
1851 PreviousNotConst->is(TT_TypeDeclarationParen))
1852 return true;
1853
1854 // If is a preprocess keyword like #define.
1855 if (IsPPKeyword)
1856 return false;
1857
1858 // int a or auto a.
1859 if (PreviousNotConst->isOneOf(tok::identifier, tok::kw_auto))
1860 return true;
1861
1862 // *a or &a or &&a.
1863 if (PreviousNotConst->is(TT_PointerOrReference))
1864 return true;
1865
1866 // MyClass a;
1867 if (PreviousNotConst->isSimpleTypeSpecifier())
1868 return true;
1869
1870 // const a = in JavaScript.
1871 return (Style.isJavaScript() && PreviousNotConst->is(tok::kw_const));
1872 }
1873
1874 /// Determine whether ')' is ending a cast.
1875 bool rParenEndsCast(const FormatToken &Tok) {
1876 // C-style casts are only used in C++, C# and Java.
1877 if (!Style.isCSharp() && !Style.isCpp() &&
1878 Style.Language != FormatStyle::LK_Java)
1879 return false;
1880
1881 // Empty parens aren't casts and there are no casts at the end of the line.
1882 if (Tok.Previous == Tok.MatchingParen || !Tok.Next || !Tok.MatchingParen)
1883 return false;
1884
1885 FormatToken *LeftOfParens = Tok.MatchingParen->getPreviousNonComment();
1886 if (LeftOfParens) {
1887 // If there is a closing parenthesis left of the current
1888 // parentheses, look past it as these might be chained casts.
1889 if (LeftOfParens->is(tok::r_paren) &&
1890 LeftOfParens->isNot(TT_CastRParen)) {
1891 if (!LeftOfParens->MatchingParen ||
1892 !LeftOfParens->MatchingParen->Previous)
1893 return false;
1894 LeftOfParens = LeftOfParens->MatchingParen->Previous;
1895 }
1896
1897 // The Condition directly below this one will see the operator arguments
1898 // as a (void *foo) cast.
1899 // void operator delete(void *foo) ATTRIB;
1900 if (LeftOfParens->Tok.getIdentifierInfo() && LeftOfParens->Previous &&
1901 LeftOfParens->Previous->is(tok::kw_operator))
1902 return false;
1903
1904 // If there is an identifier (or with a few exceptions a keyword) right
1905 // before the parentheses, this is unlikely to be a cast.
1906 if (LeftOfParens->Tok.getIdentifierInfo() &&
1907 !LeftOfParens->isOneOf(Keywords.kw_in, tok::kw_return, tok::kw_case,
1908 tok::kw_delete))
1909 return false;
1910
1911 // Certain other tokens right before the parentheses are also signals that
1912 // this cannot be a cast.
1913 if (LeftOfParens->isOneOf(tok::at, tok::r_square, TT_OverloadedOperator,
1914 TT_TemplateCloser, tok::ellipsis))
1915 return false;
1916 }
1917
1918 if (Tok.Next->is(tok::question))
1919 return false;
1920
1921 // `foreach((A a, B b) in someList)` should not be seen as a cast.
1922 if (Tok.Next->is(Keywords.kw_in) && Style.isCSharp())
1923 return false;
1924
1925 // Functions which end with decorations like volatile, noexcept are unlikely
1926 // to be casts.
1927 if (Tok.Next->isOneOf(tok::kw_noexcept, tok::kw_volatile, tok::kw_const,
1928 tok::kw_requires, tok::kw_throw, tok::arrow,
1929 Keywords.kw_override, Keywords.kw_final) ||
1930 isCpp11AttributeSpecifier(*Tok.Next))
1931 return false;
1932
1933 // As Java has no function types, a "(" after the ")" likely means that this
1934 // is a cast.
1935 if (Style.Language == FormatStyle::LK_Java && Tok.Next->is(tok::l_paren))
1936 return true;
1937
1938 // If a (non-string) literal follows, this is likely a cast.
1939 if (Tok.Next->isNot(tok::string_literal) &&
1940 (Tok.Next->Tok.isLiteral() ||
1941 Tok.Next->isOneOf(tok::kw_sizeof, tok::kw_alignof)))
1942 return true;
1943
1944 // Heuristically try to determine whether the parentheses contain a type.
1945 auto IsQualifiedPointerOrReference = [](FormatToken *T) {
1946 // This is used to handle cases such as x = (foo *const)&y;
1947 assert(!T->isSimpleTypeSpecifier() && "Should have already been checked")(static_cast <bool> (!T->isSimpleTypeSpecifier() &&
"Should have already been checked") ? void (0) : __assert_fail
("!T->isSimpleTypeSpecifier() && \"Should have already been checked\""
, "clang/lib/Format/TokenAnnotator.cpp", 1947, __extension__ __PRETTY_FUNCTION__
))
;
1948 // Strip trailing qualifiers such as const or volatile when checking
1949 // whether the parens could be a cast to a pointer/reference type.
1950 while (T) {
1951 if (T->is(TT_AttributeParen)) {
1952 // Handle `x = (foo *__attribute__((foo)))&v;`:
1953 if (T->MatchingParen && T->MatchingParen->Previous &&
1954 T->MatchingParen->Previous->is(tok::kw___attribute)) {
1955 T = T->MatchingParen->Previous->Previous;
1956 continue;
1957 }
1958 } else if (T->is(TT_AttributeSquare)) {
1959 // Handle `x = (foo *[[clang::foo]])&v;`:
1960 if (T->MatchingParen && T->MatchingParen->Previous) {
1961 T = T->MatchingParen->Previous;
1962 continue;
1963 }
1964 } else if (T->canBePointerOrReferenceQualifier()) {
1965 T = T->Previous;
1966 continue;
1967 }
1968 break;
1969 }
1970 return T && T->is(TT_PointerOrReference);
1971 };
1972 bool ParensAreType =
1973 !Tok.Previous ||
1974 Tok.Previous->isOneOf(TT_TemplateCloser, TT_TypeDeclarationParen) ||
1975 Tok.Previous->isSimpleTypeSpecifier() ||
1976 IsQualifiedPointerOrReference(Tok.Previous);
1977 bool ParensCouldEndDecl =
1978 Tok.Next->isOneOf(tok::equal, tok::semi, tok::l_brace, tok::greater);
1979 if (ParensAreType && !ParensCouldEndDecl)
1980 return true;
1981
1982 // At this point, we heuristically assume that there are no casts at the
1983 // start of the line. We assume that we have found most cases where there
1984 // are by the logic above, e.g. "(void)x;".
1985 if (!LeftOfParens)
1986 return false;
1987
1988 // Certain token types inside the parentheses mean that this can't be a
1989 // cast.
1990 for (const FormatToken *Token = Tok.MatchingParen->Next; Token != &Tok;
1991 Token = Token->Next)
1992 if (Token->is(TT_BinaryOperator))
1993 return false;
1994
1995 // If the following token is an identifier or 'this', this is a cast. All
1996 // cases where this can be something else are handled above.
1997 if (Tok.Next->isOneOf(tok::identifier, tok::kw_this))
1998 return true;
1999
2000 // Look for a cast `( x ) (`.
2001 if (Tok.Next->is(tok::l_paren) && Tok.Previous && Tok.Previous->Previous) {
2002 if (Tok.Previous->is(tok::identifier) &&
2003 Tok.Previous->Previous->is(tok::l_paren))
2004 return true;
2005 }
2006
2007 if (!Tok.Next->Next)
2008 return false;
2009
2010 // If the next token after the parenthesis is a unary operator, assume
2011 // that this is cast, unless there are unexpected tokens inside the
2012 // parenthesis.
2013 bool NextIsUnary =
2014 Tok.Next->isUnaryOperator() || Tok.Next->isOneOf(tok::amp, tok::star);
2015 if (!NextIsUnary || Tok.Next->is(tok::plus) ||
2016 !Tok.Next->Next->isOneOf(tok::identifier, tok::numeric_constant))
2017 return false;
2018 // Search for unexpected tokens.
2019 for (FormatToken *Prev = Tok.Previous; Prev != Tok.MatchingParen;
2020 Prev = Prev->Previous) {
2021 if (!Prev->isOneOf(tok::kw_const, tok::identifier, tok::coloncolon))
2022 return false;
2023 }
2024 return true;
2025 }
2026
2027 /// Return the type of the given token assuming it is * or &.
2028 TokenType determineStarAmpUsage(const FormatToken &Tok, bool IsExpression,
2029 bool InTemplateArgument) {
2030 if (Style.isJavaScript())
2031 return TT_BinaryOperator;
2032
2033 // && in C# must be a binary operator.
2034 if (Style.isCSharp() && Tok.is(tok::ampamp))
2035 return TT_BinaryOperator;
2036
2037 const FormatToken *PrevToken = Tok.getPreviousNonComment();
2038 if (!PrevToken)
2039 return TT_UnaryOperator;
2040
2041 const FormatToken *NextToken = Tok.getNextNonComment();
2042 if (!NextToken ||
2043 NextToken->isOneOf(tok::arrow, tok::equal, tok::kw_noexcept) ||
2044 NextToken->canBePointerOrReferenceQualifier() ||
2045 (NextToken->is(tok::l_brace) && !NextToken->getNextNonComment()))
2046 return TT_PointerOrReference;
2047
2048 if (PrevToken->is(tok::coloncolon))
2049 return TT_PointerOrReference;
2050
2051 if (PrevToken->is(tok::r_paren) && PrevToken->is(TT_TypeDeclarationParen))
2052 return TT_PointerOrReference;
2053
2054 if (PrevToken->isOneOf(tok::l_paren, tok::l_square, tok::l_brace,
2055 tok::comma, tok::semi, tok::kw_return, tok::colon,
2056 tok::kw_co_return, tok::kw_co_await,
2057 tok::kw_co_yield, tok::equal, tok::kw_delete,
2058 tok::kw_sizeof, tok::kw_throw, TT_BinaryOperator,
2059 TT_ConditionalExpr, TT_UnaryOperator, TT_CastRParen))
2060 return TT_UnaryOperator;
2061
2062 if (NextToken->is(tok::l_square) && NextToken->isNot(TT_LambdaLSquare))
2063 return TT_PointerOrReference;
2064 if (NextToken->is(tok::kw_operator) && !IsExpression)
2065 return TT_PointerOrReference;
2066 if (NextToken->isOneOf(tok::comma, tok::semi))
2067 return TT_PointerOrReference;
2068
2069 if (PrevToken->Tok.isLiteral() ||
2070 PrevToken->isOneOf(tok::r_paren, tok::r_square, tok::kw_true,
2071 tok::kw_false, tok::r_brace) ||
2072 NextToken->Tok.isLiteral() ||
2073 NextToken->isOneOf(tok::kw_true, tok::kw_false) ||
2074 NextToken->isUnaryOperator() ||
2075 // If we know we're in a template argument, there are no named
2076 // declarations. Thus, having an identifier on the right-hand side
2077 // indicates a binary operator.
2078 (InTemplateArgument && NextToken->Tok.isAnyIdentifier()))
2079 return TT_BinaryOperator;
2080
2081 // "&&(" is quite unlikely to be two successive unary "&".
2082 if (Tok.is(tok::ampamp) && NextToken->is(tok::l_paren))
2083 return TT_BinaryOperator;
2084
2085 // This catches some cases where evaluation order is used as control flow:
2086 // aaa && aaa->f();
2087 if (NextToken->Tok.isAnyIdentifier()) {
2088 const FormatToken *NextNextToken = NextToken->getNextNonComment();
2089 if (NextNextToken && NextNextToken->is(tok::arrow))
2090 return TT_BinaryOperator;
2091 }
2092
2093 // It is very unlikely that we are going to find a pointer or reference type
2094 // definition on the RHS of an assignment.
2095 if (IsExpression && !Contexts.back().CaretFound)
2096 return TT_BinaryOperator;
2097
2098 return TT_PointerOrReference;
2099 }
2100
2101 TokenType determinePlusMinusCaretUsage(const FormatToken &Tok) {
2102 const FormatToken *PrevToken = Tok.getPreviousNonComment();
2103 if (!PrevToken)
2104 return TT_UnaryOperator;
2105
2106 if (PrevToken->isOneOf(TT_CastRParen, TT_UnaryOperator))
2107 // This must be a sequence of leading unary operators.
2108 return TT_UnaryOperator;
2109
2110 // Use heuristics to recognize unary operators.
2111 if (PrevToken->isOneOf(tok::equal, tok::l_paren, tok::comma, tok::l_square,
2112 tok::question, tok::colon, tok::kw_return,
2113 tok::kw_case, tok::at, tok::l_brace, tok::kw_throw,
2114 tok::kw_co_return, tok::kw_co_yield))
2115 return TT_UnaryOperator;
2116
2117 // There can't be two consecutive binary operators.
2118 if (PrevToken->is(TT_BinaryOperator))
2119 return TT_UnaryOperator;
2120
2121 // Fall back to marking the token as binary operator.
2122 return TT_BinaryOperator;
2123 }
2124
2125 /// Determine whether ++/-- are pre- or post-increments/-decrements.
2126 TokenType determineIncrementUsage(const FormatToken &Tok) {
2127 const FormatToken *PrevToken = Tok.getPreviousNonComment();
2128 if (!PrevToken || PrevToken->is(TT_CastRParen))
2129 return TT_UnaryOperator;
2130 if (PrevToken->isOneOf(tok::r_paren, tok::r_square, tok::identifier))
2131 return TT_TrailingUnaryOperator;
2132
2133 return TT_UnaryOperator;
2134 }
2135
2136 SmallVector<Context, 8> Contexts;
2137
2138 const FormatStyle &Style;
2139 AnnotatedLine &Line;
2140 FormatToken *CurrentToken;
2141 bool AutoFound;
2142 const AdditionalKeywords &Keywords;
2143
2144 // Set of "<" tokens that do not open a template parameter list. If parseAngle
2145 // determines that a specific token can't be a template opener, it will make
2146 // same decision irrespective of the decisions for tokens leading up to it.
2147 // Store this information to prevent this from causing exponential runtime.
2148 llvm::SmallPtrSet<FormatToken *, 16> NonTemplateLess;
2149};
2150
2151static const int PrecedenceUnaryOperator = prec::PointerToMember + 1;
2152static const int PrecedenceArrowAndPeriod = prec::PointerToMember + 2;
2153
2154/// Parses binary expressions by inserting fake parenthesis based on
2155/// operator precedence.
2156class ExpressionParser {
2157public:
2158 ExpressionParser(const FormatStyle &Style, const AdditionalKeywords &Keywords,
2159 AnnotatedLine &Line)
2160 : Style(Style), Keywords(Keywords), Current(Line.First) {}
2161
2162 /// Parse expressions with the given operator precedence.
2163 void parse(int Precedence = 0) {
2164 // Skip 'return' and ObjC selector colons as they are not part of a binary
2165 // expression.
2166 while (Current && (Current->is(tok::kw_return) ||
2167 (Current->is(tok::colon) &&
2168 Current->isOneOf(TT_ObjCMethodExpr, TT_DictLiteral))))
2169 next();
2170
2171 if (!Current || Precedence > PrecedenceArrowAndPeriod)
2172 return;
2173
2174 // Conditional expressions need to be parsed separately for proper nesting.
2175 if (Precedence == prec::Conditional) {
2176 parseConditionalExpr();
2177 return;
2178 }
2179
2180 // Parse unary operators, which all have a higher precedence than binary
2181 // operators.
2182 if (Precedence == PrecedenceUnaryOperator) {
2183 parseUnaryOperator();
2184 return;
2185 }
2186
2187 FormatToken *Start = Current;
2188 FormatToken *LatestOperator = nullptr;
2189 unsigned OperatorIndex = 0;
2190
2191 while (Current) {
2192 // Consume operators with higher precedence.
2193 parse(Precedence + 1);
2194
2195 int CurrentPrecedence = getCurrentPrecedence();
2196
2197 if (Precedence == CurrentPrecedence && Current &&
2198 Current->is(TT_SelectorName)) {
2199 if (LatestOperator)
2200 addFakeParenthesis(Start, prec::Level(Precedence));
2201 Start = Current;
2202 }
2203
2204 // At the end of the line or when an operator with higher precedence is
2205 // found, insert fake parenthesis and return.
2206 if (!Current ||
2207 (Current->closesScope() &&
2208 (Current->MatchingParen || Current->is(TT_TemplateString))) ||
2209 (CurrentPrecedence != -1 && CurrentPrecedence < Precedence) ||
2210 (CurrentPrecedence == prec::Conditional &&
2211 Precedence == prec::Assignment && Current->is(tok::colon))) {
2212 break;
2213 }
2214
2215 // Consume scopes: (), [], <> and {}
2216 if (Current->opensScope()) {
2217 // In fragment of a JavaScript template string can look like '}..${' and
2218 // thus close a scope and open a new one at the same time.
2219 while (Current && (!Current->closesScope() || Current->opensScope())) {
2220 next();
2221 parse();
2222 }
2223 next();
2224 } else {
2225 // Operator found.
2226 if (CurrentPrecedence == Precedence) {
2227 if (LatestOperator)
2228 LatestOperator->NextOperator = Current;
2229 LatestOperator = Current;
2230 Current->OperatorIndex = OperatorIndex;
2231 ++OperatorIndex;
2232 }
2233 next(/*SkipPastLeadingComments=*/Precedence > 0);
2234 }
2235 }
2236
2237 if (LatestOperator && (Current || Precedence > 0)) {
2238 // LatestOperator->LastOperator = true;
2239 if (Precedence == PrecedenceArrowAndPeriod) {
2240 // Call expressions don't have a binary operator precedence.
2241 addFakeParenthesis(Start, prec::Unknown);
2242 } else {
2243 addFakeParenthesis(Start, prec::Level(Precedence));
2244 }
2245 }
2246 }
2247
2248private:
2249 /// Gets the precedence (+1) of the given token for binary operators
2250 /// and other tokens that we treat like binary operators.
2251 int getCurrentPrecedence() {
2252 if (Current) {
2253 const FormatToken *NextNonComment = Current->getNextNonComment();
2254 if (Current->is(TT_ConditionalExpr))
2255 return prec::Conditional;
2256 if (NextNonComment && Current->is(TT_SelectorName) &&
2257 (NextNonComment->isOneOf(TT_DictLiteral, TT_JsTypeColon) ||
2258 ((Style.Language == FormatStyle::LK_Proto ||
2259 Style.Language == FormatStyle::LK_TextProto) &&
2260 NextNonComment->is(tok::less))))
2261 return prec::Assignment;
2262 if (Current->is(TT_JsComputedPropertyName))
2263 return prec::Assignment;
2264 if (Current->is(TT_LambdaArrow))
2265 return prec::Comma;
2266 if (Current->is(TT_FatArrow))
2267 return prec::Assignment;
2268 if (Current->isOneOf(tok::semi, TT_InlineASMColon, TT_SelectorName) ||
2269 (Current->is(tok::comment) && NextNonComment &&
2270 NextNonComment->is(TT_SelectorName)))
2271 return 0;
2272 if (Current->is(TT_RangeBasedForLoopColon))
2273 return prec::Comma;
2274 if ((Style.Language == FormatStyle::LK_Java || Style.isJavaScript()) &&
2275 Current->is(Keywords.kw_instanceof))
2276 return prec::Relational;
2277 if (Style.isJavaScript() &&
2278 Current->isOneOf(Keywords.kw_in, Keywords.kw_as))
2279 return prec::Relational;
2280 if (Current->is(TT_BinaryOperator) || Current->is(tok::comma))
2281 return Current->getPrecedence();
2282 if (Current->isOneOf(tok::period, tok::arrow))
2283 return PrecedenceArrowAndPeriod;
2284 if ((Style.Language == FormatStyle::LK_Java || Style.isJavaScript()) &&
2285 Current->isOneOf(Keywords.kw_extends, Keywords.kw_implements,
2286 Keywords.kw_throws))
2287 return 0;
2288 }
2289 return -1;
2290 }
2291
2292 void addFakeParenthesis(FormatToken *Start, prec::Level Precedence) {
2293 Start->FakeLParens.push_back(Precedence);
2294 if (Precedence > prec::Unknown)
2295 Start->StartsBinaryExpression = true;
2296 if (Current) {
2297 FormatToken *Previous = Current->Previous;
2298 while (Previous->is(tok::comment) && Previous->Previous)
2299 Previous = Previous->Previous;
2300 ++Previous->FakeRParens;
2301 if (Precedence > prec::Unknown)
2302 Previous->EndsBinaryExpression = true;
2303 }
2304 }
2305
2306 /// Parse unary operator expressions and surround them with fake
2307 /// parentheses if appropriate.
2308 void parseUnaryOperator() {
2309 llvm::SmallVector<FormatToken *, 2> Tokens;
2310 while (Current && Current->is(TT_UnaryOperator)) {
2311 Tokens.push_back(Current);
2312 next();
2313 }
2314 parse(PrecedenceArrowAndPeriod);
2315 for (FormatToken *Token : llvm::reverse(Tokens))
2316 // The actual precedence doesn't matter.
2317 addFakeParenthesis(Token, prec::Unknown);
2318 }
2319
2320 void parseConditionalExpr() {
2321 while (Current && Current->isTrailingComment()) {
2322 next();
2323 }
2324 FormatToken *Start = Current;
2325 parse(prec::LogicalOr);
2326 if (!Current || !Current->is(tok::question))
2327 return;
2328 next();
2329 parse(prec::Assignment);
2330 if (!Current || Current->isNot(TT_ConditionalExpr))
2331 return;
2332 next();
2333 parse(prec::Assignment);
2334 addFakeParenthesis(Start, prec::Conditional);
2335 }
2336
2337 void next(bool SkipPastLeadingComments = true) {
2338 if (Current)
2339 Current = Current->Next;
2340 while (Current &&
2341 (Current->NewlinesBefore == 0 || SkipPastLeadingComments) &&
2342 Current->isTrailingComment())
2343 Current = Current->Next;
2344 }
2345
2346 const FormatStyle &Style;
2347 const AdditionalKeywords &Keywords;
2348 FormatToken *Current;
2349};
2350
2351} // end anonymous namespace
2352
2353void TokenAnnotator::setCommentLineLevels(
2354 SmallVectorImpl<AnnotatedLine *> &Lines) {
2355 const AnnotatedLine *NextNonCommentLine = nullptr;
2356 for (AnnotatedLine *AL : llvm::reverse(Lines)) {
2357 bool CommentLine = true;
2358 for (const FormatToken *Tok = AL->First; Tok; Tok = Tok->Next) {
1
Loop condition is true. Entering loop body
4
Assuming pointer value is null
2359 if (!Tok->is(tok::comment)) {
2
Taking true branch
2360 CommentLine = false;
2361 break;
2362 }
2363 }
2364
2365 // If the comment is currently aligned with the line immediately following
2366 // it, that's probably intentional and we should keep it.
2367 if (NextNonCommentLine
2.1
'NextNonCommentLine' is null
4.1
'NextNonCommentLine' is non-null
&& CommentLine
4.2
'CommentLine' is true
&&
2368 NextNonCommentLine->First->NewlinesBefore <= 1 &&
5
Assuming field 'NewlinesBefore' is <= 1
2369 NextNonCommentLine->First->OriginalColumn ==
2370 AL->First->OriginalColumn) {
6
Access to field 'OriginalColumn' results in a dereference of a null pointer (loaded from field 'First')
2371 // Align comments for preprocessor lines with the # in column 0 if
2372 // preprocessor lines are not indented. Otherwise, align with the next
2373 // line.
2374 AL->Level = (Style.IndentPPDirectives != FormatStyle::PPDIS_BeforeHash &&
2375 (NextNonCommentLine->Type == LT_PreprocessorDirective ||
2376 NextNonCommentLine->Type == LT_ImportStatement))
2377 ? 0
2378 : NextNonCommentLine->Level;
2379 } else {
2380 NextNonCommentLine = AL->First->isNot(tok::r_brace) ? AL : nullptr;
3
'?' condition is true
2381 }
2382
2383 setCommentLineLevels(AL->Children);
2384 }
2385}
2386
2387static unsigned maxNestingDepth(const AnnotatedLine &Line) {
2388 unsigned Result = 0;
2389 for (const auto *Tok = Line.First; Tok != nullptr; Tok = Tok->Next)
2390 Result = std::max(Result, Tok->NestingLevel);
2391 return Result;
2392}
2393
2394void TokenAnnotator::annotate(AnnotatedLine &Line) {
2395 for (auto &Child : Line.Children)
2396 annotate(*Child);
2397
2398 AnnotatingParser Parser(Style, Line, Keywords);
2399 Line.Type = Parser.parseLine();
2400
2401 // With very deep nesting, ExpressionParser uses lots of stack and the
2402 // formatting algorithm is very slow. We're not going to do a good job here
2403 // anyway - it's probably generated code being formatted by mistake.
2404 // Just skip the whole line.
2405 if (maxNestingDepth(Line) > 50)
2406 Line.Type = LT_Invalid;
2407
2408 if (Line.Type == LT_Invalid)
2409 return;
2410
2411 ExpressionParser ExprParser(Style, Keywords, Line);
2412 ExprParser.parse();
2413
2414 if (Line.startsWith(TT_ObjCMethodSpecifier))
2415 Line.Type = LT_ObjCMethodDecl;
2416 else if (Line.startsWith(TT_ObjCDecl))
2417 Line.Type = LT_ObjCDecl;
2418 else if (Line.startsWith(TT_ObjCProperty))
2419 Line.Type = LT_ObjCProperty;
2420
2421 Line.First->SpacesRequiredBefore = 1;
2422 Line.First->CanBreakBefore = Line.First->MustBreakBefore;
2423}
2424
2425// This function heuristically determines whether 'Current' starts the name of a
2426// function declaration.
2427static bool isFunctionDeclarationName(bool IsCpp, const FormatToken &Current,
2428 const AnnotatedLine &Line) {
2429 auto skipOperatorName = [](const FormatToken *Next) -> const FormatToken * {
2430 for (; Next; Next = Next->Next) {
2431 if (Next->is(TT_OverloadedOperatorLParen))
2432 return Next;
2433 if (Next->is(TT_OverloadedOperator))
2434 continue;
2435 if (Next->isOneOf(tok::kw_new, tok::kw_delete)) {
2436 // For 'new[]' and 'delete[]'.
2437 if (Next->Next &&
2438 Next->Next->startsSequence(tok::l_square, tok::r_square))
2439 Next = Next->Next->Next;
2440 continue;
2441 }
2442 if (Next->startsSequence(tok::l_square, tok::r_square)) {
2443 // For operator[]().
2444 Next = Next->Next;
2445 continue;
2446 }
2447 if ((Next->isSimpleTypeSpecifier() || Next->is(tok::identifier)) &&
2448 Next->Next && Next->Next->isOneOf(tok::star, tok::amp, tok::ampamp)) {
2449 // For operator void*(), operator char*(), operator Foo*().
2450 Next = Next->Next;
2451 continue;
2452 }
2453 if (Next->is(TT_TemplateOpener) && Next->MatchingParen) {
2454 Next = Next->MatchingParen;
2455 continue;
2456 }
2457
2458 break;
2459 }
2460 return nullptr;
2461 };
2462
2463 // Find parentheses of parameter list.
2464 const FormatToken *Next = Current.Next;
2465 if (Current.is(tok::kw_operator)) {
2466 if (Current.Previous && Current.Previous->is(tok::coloncolon))
2467 return false;
2468 Next = skipOperatorName(Next);
2469 } else {
2470 if (!Current.is(TT_StartOfName) || Current.NestingLevel != 0)
2471 return false;
2472 for (; Next; Next = Next->Next) {
2473 if (Next->is(TT_TemplateOpener)) {
2474 Next = Next->MatchingParen;
2475 } else if (Next->is(tok::coloncolon)) {
2476 Next = Next->Next;
2477 if (!Next)
2478 return false;
2479 if (Next->is(tok::kw_operator)) {
2480 Next = skipOperatorName(Next->Next);
2481 break;
2482 }
2483 if (!Next->is(tok::identifier))
2484 return false;
2485 } else if (Next->is(tok::l_paren)) {
2486 break;
2487 } else {
2488 return false;
2489 }
2490 }
2491 }
2492
2493 // Check whether parameter list can belong to a function declaration.
2494 if (!Next || !Next->is(tok::l_paren) || !Next->MatchingParen)
2495 return false;
2496 // If the lines ends with "{", this is likely a function definition.
2497 if (Line.Last->is(tok::l_brace))
2498 return true;
2499 if (Next->Next == Next->MatchingParen)
2500 return true; // Empty parentheses.
2501 // If there is an &/&& after the r_paren, this is likely a function.
2502 if (Next->MatchingParen->Next &&
2503 Next->MatchingParen->Next->is(TT_PointerOrReference))
2504 return true;
2505
2506 // Check for K&R C function definitions (and C++ function definitions with
2507 // unnamed parameters), e.g.:
2508 // int f(i)
2509 // {
2510 // return i + 1;
2511 // }
2512 // bool g(size_t = 0, bool b = false)
2513 // {
2514 // return !b;
2515 // }
2516 if (IsCpp && Next->Next && Next->Next->is(tok::identifier) &&
2517 !Line.endsWith(tok::semi))
2518 return true;
2519
2520 for (const FormatToken *Tok = Next->Next; Tok && Tok != Next->MatchingParen;
2521 Tok = Tok->Next) {
2522 if (Tok->is(TT_TypeDeclarationParen))
2523 return true;
2524 if (Tok->isOneOf(tok::l_paren, TT_TemplateOpener) && Tok->MatchingParen) {
2525 Tok = Tok->MatchingParen;
2526 continue;
2527 }
2528 if (Tok->is(tok::kw_const) || Tok->isSimpleTypeSpecifier() ||
2529 Tok->isOneOf(TT_PointerOrReference, TT_StartOfName, tok::ellipsis))
2530 return true;
2531 if (Tok->isOneOf(tok::l_brace, tok::string_literal, TT_ObjCMethodExpr) ||
2532 Tok->Tok.isLiteral())
2533 return false;
2534 }
2535 return false;
2536}
2537
2538bool TokenAnnotator::mustBreakForReturnType(const AnnotatedLine &Line) const {
2539 assert(Line.MightBeFunctionDecl)(static_cast <bool> (Line.MightBeFunctionDecl) ? void (
0) : __assert_fail ("Line.MightBeFunctionDecl", "clang/lib/Format/TokenAnnotator.cpp"
, 2539, __extension__ __PRETTY_FUNCTION__))
;
2540
2541 if ((Style.AlwaysBreakAfterReturnType == FormatStyle::RTBS_TopLevel ||
2542 Style.AlwaysBreakAfterReturnType ==
2543 FormatStyle::RTBS_TopLevelDefinitions) &&
2544 Line.Level > 0)
2545 return false;
2546
2547 switch (Style.AlwaysBreakAfterReturnType) {
2548 case FormatStyle::RTBS_None:
2549 return false;
2550 case FormatStyle::RTBS_All:
2551 case FormatStyle::RTBS_TopLevel:
2552 return true;
2553 case FormatStyle::RTBS_AllDefinitions:
2554 case FormatStyle::RTBS_TopLevelDefinitions:
2555 return Line.mightBeFunctionDefinition();
2556 }
2557
2558 return false;
2559}
2560
2561void TokenAnnotator::calculateFormattingInformation(AnnotatedLine &Line) {
2562 for (AnnotatedLine *ChildLine : Line.Children)
2563 calculateFormattingInformation(*ChildLine);
2564
2565 Line.First->TotalLength =
2566 Line.First->IsMultiline ? Style.ColumnLimit
2567 : Line.FirstStartColumn + Line.First->ColumnWidth;
2568 FormatToken *Current = Line.First->Next;
2569 bool InFunctionDecl = Line.MightBeFunctionDecl;
2570 bool AlignArrayOfStructures =
2571 (Style.AlignArrayOfStructures != FormatStyle::AIAS_None &&
2572 Line.Type == LT_ArrayOfStructInitializer);
2573 if (AlignArrayOfStructures)
2574 calculateArrayInitializerColumnList(Line);
2575
2576 while (Current) {
2577 if (isFunctionDeclarationName(Style.isCpp(), *Current, Line))
2578 Current->setType(TT_FunctionDeclarationName);
2579 const FormatToken *Prev = Current->Previous;
2580 if (Current->is(TT_LineComment)) {
2581 if (Prev->is(BK_BracedInit) && Prev->opensScope())
2582 Current->SpacesRequiredBefore =
2583 (Style.Cpp11BracedListStyle && !Style.SpacesInParentheses) ? 0 : 1;
2584 else
2585 Current->SpacesRequiredBefore = Style.SpacesBeforeTrailingComments;
2586
2587 // If we find a trailing comment, iterate backwards to determine whether
2588 // it seems to relate to a specific parameter. If so, break before that
2589 // parameter to avoid changing the comment's meaning. E.g. don't move 'b'
2590 // to the previous line in:
2591 // SomeFunction(a,
2592 // b, // comment
2593 // c);
2594 if (!Current->HasUnescapedNewline) {
2595 for (FormatToken *Parameter = Current->Previous; Parameter;
2596 Parameter = Parameter->Previous) {
2597 if (Parameter->isOneOf(tok::comment, tok::r_brace))
2598 break;
2599 if (Parameter->Previous && Parameter->Previous->is(tok::comma)) {
2600 if (!Parameter->Previous->is(TT_CtorInitializerComma) &&
2601 Parameter->HasUnescapedNewline)
2602 Parameter->MustBreakBefore = true;
2603 break;
2604 }
2605 }
2606 }
2607 } else if (Current->SpacesRequiredBefore == 0 &&
2608 spaceRequiredBefore(Line, *Current)) {
2609 Current->SpacesRequiredBefore = 1;
2610 }
2611
2612 Current->MustBreakBefore =
2613 Current->MustBreakBefore || mustBreakBefore(Line, *Current);
2614
2615 if (!Current->MustBreakBefore && InFunctionDecl &&
2616 Current->is(TT_FunctionDeclarationName))
2617 Current->MustBreakBefore = mustBreakForReturnType(Line);
2618
2619 Current->CanBreakBefore =
2620 Current->MustBreakBefore || canBreakBefore(Line, *Current);
2621 unsigned ChildSize = 0;
2622 if (Prev->Children.size() == 1) {
2623 FormatToken &LastOfChild = *Prev->Children[0]->Last;
2624 ChildSize = LastOfChild.isTrailingComment() ? Style.ColumnLimit
2625 : LastOfChild.TotalLength + 1;
2626 }
2627 if (Current->MustBreakBefore || Prev->Children.size() > 1 ||
2628 (Prev->Children.size() == 1 &&
2629 Prev->Children[0]->First->MustBreakBefore) ||
2630 Current->IsMultiline)
2631 Current->TotalLength = Prev->TotalLength + Style.ColumnLimit;
2632 else
2633 Current->TotalLength = Prev->TotalLength + Current->ColumnWidth +
2634 ChildSize + Current->SpacesRequiredBefore;
2635
2636 if (Current->is(TT_CtorInitializerColon))
2637 InFunctionDecl = false;
2638
2639 // FIXME: Only calculate this if CanBreakBefore is true once static
2640 // initializers etc. are sorted out.
2641 // FIXME: Move magic numbers to a better place.
2642
2643 // Reduce penalty for aligning ObjC method arguments using the colon
2644 // alignment as this is the canonical way (still prefer fitting everything
2645 // into one line if possible). Trying to fit a whole expression into one
2646 // line should not force other line breaks (e.g. when ObjC method
2647 // expression is a part of other expression).
2648 Current->SplitPenalty = splitPenalty(Line, *Current, InFunctionDecl);
2649 if (Style.Language == FormatStyle::LK_ObjC &&
2650 Current->is(TT_SelectorName) && Current->ParameterIndex > 0) {
2651 if (Current->ParameterIndex == 1)
2652 Current->SplitPenalty += 5 * Current->BindingStrength;
2653 } else {
2654 Current->SplitPenalty += 20 * Current->BindingStrength;
2655 }
2656
2657 Current = Current->Next;
2658 }
2659
2660 calculateUnbreakableTailLengths(Line);
2661 unsigned IndentLevel = Line.Level;
2662 for (Current = Line.First; Current != nullptr; Current = Current->Next) {
2663 if (Current->Role)
2664 Current->Role->precomputeFormattingInfos(Current);
2665 if (Current->MatchingParen &&
2666 Current->MatchingParen->opensBlockOrBlockTypeList(Style) &&
2667 IndentLevel > 0) {
2668 --IndentLevel;
2669 }
2670 Current->IndentLevel = IndentLevel;
2671 if (Current->opensBlockOrBlockTypeList(Style))
2672 ++IndentLevel;
2673 }
2674
2675 LLVM_DEBUG({ printDebugInfo(Line); })do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("format-token-annotator")) { { printDebugInfo(Line); }; } } while
(false)
;
2676}
2677
2678void TokenAnnotator::calculateUnbreakableTailLengths(AnnotatedLine &Line) {
2679 unsigned UnbreakableTailLength = 0;
2680 FormatToken *Current = Line.Last;
2681 while (Current) {
2682 Current->UnbreakableTailLength = UnbreakableTailLength;
2683 if (Current->CanBreakBefore ||
2684 Current->isOneOf(tok::comment, tok::string_literal)) {
2685 UnbreakableTailLength = 0;
2686 } else {
2687 UnbreakableTailLength +=
2688 Current->ColumnWidth + Current->SpacesRequiredBefore;
2689 }
2690 Current = Current->Previous;
2691 }
2692}
2693
2694void TokenAnnotator::calculateArrayInitializerColumnList(AnnotatedLine &Line) {
2695 if (Line.First == Line.Last) {
2696 return;
2697 }
2698 auto *CurrentToken = Line.First;
2699 CurrentToken->ArrayInitializerLineStart = true;
2700 unsigned Depth = 0;
2701 while (CurrentToken != nullptr && CurrentToken != Line.Last) {
2702 if (CurrentToken->is(tok::l_brace)) {
2703 CurrentToken->IsArrayInitializer = true;
2704 if (CurrentToken->Next != nullptr)
2705 CurrentToken->Next->MustBreakBefore = true;
2706 CurrentToken =
2707 calculateInitializerColumnList(Line, CurrentToken->Next, Depth + 1);
2708 } else {
2709 CurrentToken = CurrentToken->Next;
2710 }
2711 }
2712}
2713
2714FormatToken *TokenAnnotator::calculateInitializerColumnList(
2715 AnnotatedLine &Line, FormatToken *CurrentToken, unsigned Depth) {
2716 while (CurrentToken != nullptr && CurrentToken != Line.Last) {
2717 if (CurrentToken->is(tok::l_brace))
2718 ++Depth;
2719 else if (CurrentToken->is(tok::r_brace))
2720 --Depth;
2721 if (Depth == 2 && CurrentToken->isOneOf(tok::l_brace, tok::comma)) {
2722 CurrentToken = CurrentToken->Next;
2723 if (CurrentToken == nullptr)
2724 break;
2725 CurrentToken->StartsColumn = true;
2726 CurrentToken = CurrentToken->Previous;
2727 }
2728 CurrentToken = CurrentToken->Next;
2729 }
2730 return CurrentToken;
2731}
2732
2733unsigned TokenAnnotator::splitPenalty(const AnnotatedLine &Line,
2734 const FormatToken &Tok,
2735 bool InFunctionDecl) {
2736 const FormatToken &Left = *Tok.Previous;
2737 const FormatToken &Right = Tok;
2738
2739 if (Left.is(tok::semi))
2740 return 0;
2741
2742 if (Style.Language == FormatStyle::LK_Java) {
2743 if (Right.isOneOf(Keywords.kw_extends, Keywords.kw_throws))
2744 return 1;
2745 if (Right.is(Keywords.kw_implements))
2746 return 2;
2747 if (Left.is(tok::comma) && Left.NestingLevel == 0)
2748 return 3;
2749 } else if (Style.isJavaScript()) {
2750 if (Right.is(Keywords.kw_function) && Left.isNot(tok::comma))
2751 return 100;
2752 if (Left.is(TT_JsTypeColon))
2753 return 35;
2754 if ((Left.is(TT_TemplateString) && Left.TokenText.endswith("${")) ||
2755 (Right.is(TT_TemplateString) && Right.TokenText.startswith("}")))
2756 return 100;
2757 // Prefer breaking call chains (".foo") over empty "{}", "[]" or "()".
2758 if (Left.opensScope() && Right.closesScope())
2759 return 200;
2760 }
2761
2762 if (Right.is(tok::identifier) && Right.Next && Right.Next->is(TT_DictLiteral))
2763 return 1;
2764 if (Right.is(tok::l_square)) {
2765 if (Style.Language == FormatStyle::LK_Proto)
2766 return 1;
2767 if (Left.is(tok::r_square))
2768 return 200;
2769 // Slightly prefer formatting local lambda definitions like functions.
2770 if (Right.is(TT_LambdaLSquare) && Left.is(tok::equal))
2771 return 35;
2772 if (!Right.isOneOf(TT_ObjCMethodExpr, TT_LambdaLSquare,
2773 TT_ArrayInitializerLSquare,
2774 TT_DesignatedInitializerLSquare, TT_AttributeSquare))
2775 return 500;
2776 }
2777
2778 if (Left.is(tok::coloncolon) ||
2779 (Right.is(tok::period) && Style.Language == FormatStyle::LK_Proto))
2780 return 500;
2781 if (Right.isOneOf(TT_StartOfName, TT_FunctionDeclarationName) ||
2782 Right.is(tok::kw_operator)) {
2783 if (Line.startsWith(tok::kw_for) && Right.PartOfMultiVariableDeclStmt)
2784 return 3;
2785 if (Left.is(TT_StartOfName))
2786 return 110;
2787 if (InFunctionDecl && Right.NestingLevel == 0)
2788 return Style.PenaltyReturnTypeOnItsOwnLine;
2789 return 200;
2790 }
2791 if (Right.is(TT_PointerOrReference))
2792 return 190;
2793 if (Right.is(TT_LambdaArrow))
2794 return 110;
2795 if (Left.is(tok::equal) && Right.is(tok::l_brace))
2796 return 160;
2797 if (Left.is(TT_CastRParen))
2798 return 100;
2799 if (Left.isOneOf(tok::kw_class, tok::kw_struct))
2800 return 5000;
2801 if (Left.is(tok::comment))
2802 return 1000;
2803
2804 if (Left.isOneOf(TT_RangeBasedForLoopColon, TT_InheritanceColon,
2805 TT_CtorInitializerColon))
2806 return 2;
2807
2808 if (Right.isMemberAccess()) {
2809 // Breaking before the "./->" of a chained call/member access is reasonably
2810 // cheap, as formatting those with one call per line is generally
2811 // desirable. In particular, it should be cheaper to break before the call
2812 // than it is to break inside a call's parameters, which could lead to weird
2813 // "hanging" indents. The exception is the very last "./->" to support this
2814 // frequent pattern:
2815 //
2816 // aaaaaaaa.aaaaaaaa.bbbbbbb().ccccccccccccccccccccc(
2817 // dddddddd);
2818 //
2819 // which might otherwise be blown up onto many lines. Here, clang-format
2820 // won't produce "hanging" indents anyway as there is no other trailing
2821 // call.
2822 //
2823 // Also apply higher penalty is not a call as that might lead to a wrapping
2824 // like:
2825 //
2826 // aaaaaaa
2827 // .aaaaaaaaa.bbbbbbbb(cccccccc);
2828 return !Right.NextOperator || !Right.NextOperator->Previous->closesScope()
2829 ? 150
2830 : 35;
2831 }
2832
2833 if (Right.is(TT_TrailingAnnotation) &&
2834 (!Right.Next || Right.Next->isNot(tok::l_paren))) {
2835 // Moving trailing annotations to the next line is fine for ObjC method
2836 // declarations.
2837 if (Line.startsWith(TT_ObjCMethodSpecifier))
2838 return 10;
2839 // Generally, breaking before a trailing annotation is bad unless it is
2840 // function-like. It seems to be especially preferable to keep standard
2841 // annotations (i.e. "const", "final" and "override") on the same line.
2842 // Use a slightly higher penalty after ")" so that annotations like
2843 // "const override" are kept together.
2844 bool is_short_annotation = Right.TokenText.size() < 10;
2845 return (Left.is(tok::r_paren) ? 100 : 120) + (is_short_annotation ? 50 : 0);
2846 }
2847
2848 // In for-loops, prefer breaking at ',' and ';'.
2849 if (Line.startsWith(tok::kw_for) && Left.is(tok::equal))
2850 return 4;
2851
2852 // In Objective-C method expressions, prefer breaking before "param:" over
2853 // breaking after it.
2854 if (Right.is(TT_SelectorName))
2855 return 0;
2856 if (Left.is(tok::colon) && Left.is(TT_ObjCMethodExpr))
2857 return Line.MightBeFunctionDecl ? 50 : 500;
2858
2859 // In Objective-C type declarations, avoid breaking after the category's
2860 // open paren (we'll prefer breaking after the protocol list's opening
2861 // angle bracket, if present).
2862 if (Line.Type == LT_ObjCDecl && Left.is(tok::l_paren) && Left.Previous &&
2863 Left.Previous->isOneOf(tok::identifier, tok::greater))
2864 return 500;
2865
2866 if (Left.is(tok::l_paren) && Style.PenaltyBreakOpenParenthesis != 0)
2867 return Style.PenaltyBreakOpenParenthesis;
2868 if (Left.is(tok::l_paren) && InFunctionDecl &&
2869 Style.AlignAfterOpenBracket != FormatStyle::BAS_DontAlign)
2870 return 100;
2871 if (Left.is(tok::l_paren) && Left.Previous &&
2872 (Left.Previous->is(tok::kw_for) || Left.Previous->isIf()))
2873 return 1000;
2874 if (Left.is(tok::equal) && InFunctionDecl)
2875 return 110;
2876 if (Right.is(tok::r_brace))
2877 return 1;
2878 if (Left.is(TT_TemplateOpener))
2879 return 100;
2880 if (Left.opensScope()) {
2881 // If we aren't aligning after opening parens/braces we can always break
2882 // here unless the style does not want us to place all arguments on the
2883 // next line.
2884 if (Style.AlignAfterOpenBracket == FormatStyle::BAS_DontAlign &&
2885 (Left.ParameterCount <= 1 || Style.AllowAllArgumentsOnNextLine))
2886 return 0;
2887 if (Left.is(tok::l_brace) && !Style.Cpp11BracedListStyle)
2888 return 19;
2889 return Left.ParameterCount > 1 ? Style.PenaltyBreakBeforeFirstCallParameter
2890 : 19;
2891 }
2892 if (Left.is(TT_JavaAnnotation))
2893 return 50;
2894
2895 if (Left.is(TT_UnaryOperator))
2896 return 60;
2897 if (Left.isOneOf(tok::plus, tok::comma) && Left.Previous &&
2898 Left.Previous->isLabelString() &&
2899 (Left.NextOperator || Left.OperatorIndex != 0))
2900 return 50;
2901 if (Right.is(tok::plus) && Left.isLabelString() &&
2902 (Right.NextOperator || Right.OperatorIndex != 0))
2903 return 25;
2904 if (Left.is(tok::comma))
2905 return 1;
2906 if (Right.is(tok::lessless) && Left.isLabelString() &&
2907 (Right.NextOperator || Right.OperatorIndex != 1))
2908 return 25;
2909 if (Right.is(tok::lessless)) {
2910 // Breaking at a << is really cheap.
2911 if (!Left.is(tok::r_paren) || Right.OperatorIndex > 0)
2912 // Slightly prefer to break before the first one in log-like statements.
2913 return 2;
2914 return 1;
2915 }
2916 if (Left.ClosesTemplateDeclaration)
2917 return Style.PenaltyBreakTemplateDeclaration;
2918 if (Left.is(TT_ConditionalExpr))
2919 return prec::Conditional;
2920 prec::Level Level = Left.getPrecedence();
2921 if (Level == prec::Unknown)
2922 Level = Right.getPrecedence();
2923 if (Level == prec::Assignment)
2924 return Style.PenaltyBreakAssignment;
2925 if (Level != prec::Unknown)
2926 return Level;
2927
2928 return 3;
2929}
2930
2931bool TokenAnnotator::spaceRequiredBeforeParens(const FormatToken &Right) const {
2932 if (Style.SpaceBeforeParens == FormatStyle::SBPO_Always)
2933 return true;
2934 if (Right.is(TT_OverloadedOperatorLParen) &&
2935 Style.SpaceBeforeParensOptions.AfterOverloadedOperator)
2936 return true;
2937 if (Style.SpaceBeforeParensOptions.BeforeNonEmptyParentheses &&
2938 Right.ParameterCount > 0)
2939 return true;
2940 return false;
2941}
2942
2943bool TokenAnnotator::spaceRequiredBetween(const AnnotatedLine &Line,
2944 const FormatToken &Left,
2945 const FormatToken &Right) {
2946 if (Left.is(tok::kw_return) && Right.isNot(tok::semi))
2947 return true;
2948 if (Style.isJson() && Left.is(tok::string_literal) && Right.is(tok::colon))
2949 return false;
2950 if (Left.is(Keywords.kw_assert) && Style.Language == FormatStyle::LK_Java)
2951 return true;
2952 if (Style.ObjCSpaceAfterProperty && Line.Type == LT_ObjCProperty &&
2953 Left.Tok.getObjCKeywordID() == tok::objc_property)
2954 return true;
2955 if (Right.is(tok::hashhash))
2956 return Left.is(tok::hash);
2957 if (Left.isOneOf(tok::hashhash, tok::hash))
2958 return Right.is(tok::hash);
2959 if ((Left.is(tok::l_paren) && Right.is(tok::r_paren)) ||
2960 (Left.is(tok::l_brace) && Left.isNot(BK_Block) &&
2961 Right.is(tok::r_brace) && Right.isNot(BK_Block)))
2962 return Style.SpaceInEmptyParentheses;
2963 if (Style.SpacesInConditionalStatement) {
2964 if (Left.is(tok::l_paren) && Left.Previous &&
2965 isKeywordWithCondition(*Left.Previous))
2966 return true;
2967 if (Right.is(tok::r_paren) && Right.MatchingParen &&
2968 Right.MatchingParen->Previous &&
2969 isKeywordWithCondition(*Right.MatchingParen->Previous))
2970 return true;
2971 }
2972
2973 // auto{x} auto(x)
2974 if (Left.is(tok::kw_auto) && Right.isOneOf(tok::l_paren, tok::l_brace))
2975 return false;
2976
2977 // operator co_await(x)
2978 if (Right.is(tok::l_paren) && Left.is(tok::kw_co_await) && Left.Previous &&
2979 Left.Previous->is(tok::kw_operator))
2980 return false;
2981 // co_await (x), co_yield (x), co_return (x)
2982 if (Left.isOneOf(tok::kw_co_await, tok::kw_co_yield, tok::kw_co_return) &&
2983 Right.isNot(tok::semi))
2984 return true;
2985 // requires clause Concept1<T> && Concept2<T>
2986 if (Left.is(TT_ConstraintJunctions) && Right.is(tok::identifier))
2987 return true;
2988
2989 if (Left.is(tok::l_paren) || Right.is(tok::r_paren))
2990 return (Right.is(TT_CastRParen) ||
2991 (Left.MatchingParen && Left.MatchingParen->is(TT_CastRParen)))
2992 ? Style.SpacesInCStyleCastParentheses
2993 : Style.SpacesInParentheses;
2994 if (Right.isOneOf(tok::semi, tok::comma))
2995 return false;
2996 if (Right.is(tok::less) && Line.Type == LT_ObjCDecl) {
2997 bool IsLightweightGeneric = Right.MatchingParen &&
2998 Right.MatchingParen->Next &&
2999 Right.MatchingParen->Next->is(tok::colon);
3000 return !IsLightweightGeneric && Style.ObjCSpaceBeforeProtocolList;
3001 }
3002 if (Right.is(tok::less) && Left.is(tok::kw_template))
3003 return Style.SpaceAfterTemplateKeyword;
3004 if (Left.isOneOf(tok::exclaim, tok::tilde))
3005 return false;
3006 if (Left.is(tok::at) &&
3007 Right.isOneOf(tok::identifier, tok::string_literal, tok::char_constant,
3008 tok::numeric_constant, tok::l_paren, tok::l_brace,
3009 tok::kw_true, tok::kw_false))
3010 return false;
3011 if (Left.is(tok::colon))
3012 return !Left.is(TT_ObjCMethodExpr);
3013 if (Left.is(tok::coloncolon))
3014 return false;
3015 if (Left.is(tok::less) || Right.isOneOf(tok::greater, tok::less)) {
3016 if (Style.Language == FormatStyle::LK_TextProto ||
3017 (Style.Language == FormatStyle::LK_Proto &&
3018 (Left.is(TT_DictLiteral) || Right.is(TT_DictLiteral)))) {
3019 // Format empty list as `<>`.
3020 if (Left.is(tok::less) && Right.is(tok::greater))
3021 return false;
3022 return !Style.Cpp11BracedListStyle;
3023 }
3024 return false;
3025 }
3026 if (Right.is(tok::ellipsis))
3027 return Left.Tok.isLiteral() || (Left.is(tok::identifier) && Left.Previous &&
3028 Left.Previous->is(tok::kw_case));
3029 if (Left.is(tok::l_square) && Right.is(tok::amp))
3030 return Style.SpacesInSquareBrackets;
3031 if (Right.is(TT_PointerOrReference)) {
3032 if (Left.is(tok::r_paren) && Line.MightBeFunctionDecl) {
3033 if (!Left.MatchingParen)
3034 return true;
3035 FormatToken *TokenBeforeMatchingParen =
3036 Left.MatchingParen->getPreviousNonComment();
3037 if (!TokenBeforeMatchingParen || !Left.is(TT_TypeDeclarationParen))
3038 return true;
3039 }
3040 // Add a space if the previous token is a pointer qualifier or the closing
3041 // parenthesis of __attribute__(()) expression and the style requires spaces
3042 // after pointer qualifiers.
3043 if ((Style.SpaceAroundPointerQualifiers == FormatStyle::SAPQ_After ||
3044 Style.SpaceAroundPointerQualifiers == FormatStyle::SAPQ_Both) &&
3045 (Left.is(TT_AttributeParen) || Left.canBePointerOrReferenceQualifier()))
3046 return true;
3047 if (Left.Tok.isLiteral())
3048 return true;
3049 // for (auto a = 0, b = 0; const auto & c : {1, 2, 3})
3050 if (Left.isTypeOrIdentifier() && Right.Next && Right.Next->Next &&
3051 Right.Next->Next->is(TT_RangeBasedForLoopColon))
3052 return getTokenPointerOrReferenceAlignment(Right) !=
3053 FormatStyle::PAS_Left;
3054 return (
3055 (!Left.isOneOf(TT_PointerOrReference, tok::l_paren) &&
3056 (getTokenPointerOrReferenceAlignment(Right) != FormatStyle::PAS_Left ||
3057 (Line.IsMultiVariableDeclStmt &&
3058 (Left.NestingLevel == 0 ||
3059 (Left.NestingLevel == 1 && Line.First->is(tok::kw_for)))))));
3060 }
3061 if (Right.is(TT_FunctionTypeLParen) && Left.isNot(tok::l_paren) &&
3062 (!Left.is(TT_PointerOrReference) ||
3063 (getTokenPointerOrReferenceAlignment(Left) != FormatStyle::PAS_Right &&
3064 !Line.IsMultiVariableDeclStmt)))
3065 return true;
3066 if (Left.is(TT_PointerOrReference)) {
3067 // Add a space if the next token is a pointer qualifier and the style
3068 // requires spaces before pointer qualifiers.
3069 if ((Style.SpaceAroundPointerQualifiers == FormatStyle::SAPQ_Before ||
3070 Style.SpaceAroundPointerQualifiers == FormatStyle::SAPQ_Both) &&
3071 Right.canBePointerOrReferenceQualifier())
3072 return true;
3073 // & 1
3074 if (Right.Tok.isLiteral())
3075 return true;
3076 // & /* comment
3077 if (Right.is(TT_BlockComment))
3078 return true;
3079 // foo() -> const Bar * override/final
3080 if (Right.isOneOf(Keywords.kw_override, Keywords.kw_final) &&
3081 !Right.is(TT_StartOfName))
3082 return true;
3083 // & {
3084 if (Right.is(tok::l_brace) && Right.is(BK_Block))
3085 return true;
3086 // for (auto a = 0, b = 0; const auto& c : {1, 2, 3})
3087 if (Left.Previous && Left.Previous->isTypeOrIdentifier() && Right.Next &&
3088 Right.Next->is(TT_RangeBasedForLoopColon))
3089 return getTokenPointerOrReferenceAlignment(Left) !=
3090 FormatStyle::PAS_Right;
3091 return !Right.isOneOf(TT_PointerOrReference, TT_ArraySubscriptLSquare,
3092 tok::l_paren) &&
3093 (getTokenPointerOrReferenceAlignment(Left) !=
3094 FormatStyle::PAS_Right &&
3095 !Line.IsMultiVariableDeclStmt) &&
3096 Left.Previous &&
3097 !Left.Previous->isOneOf(tok::l_paren, tok::coloncolon,
3098 tok::l_square);
3099 }
3100 // Ensure right pointer alignment with ellipsis e.g. int *...P
3101 if (Left.is(tok::ellipsis) && Left.Previous &&
3102 Left.Previous->isOneOf(tok::star, tok::amp, tok::ampamp))
3103 return Style.PointerAlignment != FormatStyle::PAS_Right;
3104
3105 if (Right.is(tok::star) && Left.is(tok::l_paren))
3106 return false;
3107 if (Left.is(tok::star) && Right.isOneOf(tok::star, tok::amp, tok::ampamp))
3108 return false;
3109 if (Right.isOneOf(tok::star, tok::amp, tok::ampamp)) {
3110 const FormatToken *Previous = &Left;
3111 while (Previous && !Previous->is(tok::kw_operator)) {
3112 if (Previous->is(tok::identifier) || Previous->isSimpleTypeSpecifier()) {
3113 Previous = Previous->getPreviousNonComment();
3114 continue;
3115 }
3116 if (Previous->is(TT_TemplateCloser) && Previous->MatchingParen) {
3117 Previous = Previous->MatchingParen->getPreviousNonComment();
3118 continue;
3119 }
3120 if (Previous->is(tok::coloncolon)) {
3121 Previous = Previous->getPreviousNonComment();
3122 continue;
3123 }
3124 break;
3125 }
3126 // Space between the type and the * in:
3127 // operator void*()
3128 // operator char*()
3129 // operator void const*()
3130 // operator void volatile*()
3131 // operator /*comment*/ const char*()
3132 // operator volatile /*comment*/ char*()
3133 // operator Foo*()
3134 // operator C<T>*()
3135 // operator std::Foo*()
3136 // operator C<T>::D<U>*()
3137 // dependent on PointerAlignment style.
3138 if (Previous) {
3139 if (Previous->endsSequence(tok::kw_operator))
3140 return (Style.PointerAlignment != FormatStyle::PAS_Left);
3141 if (Previous->is(tok::kw_const) || Previous->is(tok::kw_volatile))
3142 return (Style.PointerAlignment != FormatStyle::PAS_Left) ||
3143 (Style.SpaceAroundPointerQualifiers ==
3144 FormatStyle::SAPQ_After) ||
3145 (Style.SpaceAroundPointerQualifiers == FormatStyle::SAPQ_Both);
3146 }
3147 }
3148 const auto SpaceRequiredForArrayInitializerLSquare =
3149 [](const FormatToken &LSquareTok, const FormatStyle &Style) {
3150 return Style.SpacesInContainerLiterals ||
3151 ((Style.Language == FormatStyle::LK_Proto ||
3152 Style.Language == FormatStyle::LK_TextProto) &&
3153 !Style.Cpp11BracedListStyle &&
3154 LSquareTok.endsSequence(tok::l_square, tok::colon,
3155 TT_SelectorName));
3156 };
3157 if (Left.is(tok::l_square))
3158 return (Left.is(TT_ArrayInitializerLSquare) && Right.isNot(tok::r_square) &&
3159 SpaceRequiredForArrayInitializerLSquare(Left, Style)) ||
3160 (Left.isOneOf(TT_ArraySubscriptLSquare, TT_StructuredBindingLSquare,
3161 TT_LambdaLSquare) &&
3162 Style.SpacesInSquareBrackets && Right.isNot(tok::r_square));
3163 if (Right.is(tok::r_square))
3164 return Right.MatchingParen &&
3165 ((Right.MatchingParen->is(TT_ArrayInitializerLSquare) &&
3166 SpaceRequiredForArrayInitializerLSquare(*Right.MatchingParen,
3167 Style)) ||
3168 (Style.SpacesInSquareBrackets &&
3169 Right.MatchingParen->isOneOf(TT_ArraySubscriptLSquare,
3170 TT_StructuredBindingLSquare,
3171 TT_LambdaLSquare)) ||
3172 Right.MatchingParen->is(TT_AttributeParen));
3173 if (Right.is(tok::l_square) &&
3174 !Right.isOneOf(TT_ObjCMethodExpr, TT_LambdaLSquare,
3175 TT_DesignatedInitializerLSquare,
3176 TT_StructuredBindingLSquare, TT_AttributeSquare) &&
3177 !Left.isOneOf(tok::numeric_constant, TT_DictLiteral) &&
3178 !(!Left.is(tok::r_square) && Style.SpaceBeforeSquareBrackets &&
3179 Right.is(TT_ArraySubscriptLSquare)))
3180 return false;
3181 if (Left.is(tok::l_brace) && Right.is(tok::r_brace))
3182 return !Left.Children.empty(); // No spaces in "{}".
3183 if ((Left.is(tok::l_brace) && Left.isNot(BK_Block)) ||
3184 (Right.is(tok::r_brace) && Right.MatchingParen &&
3185 Right.MatchingParen->isNot(BK_Block)))
3186 return Style.Cpp11BracedListStyle ? Style.SpacesInParentheses : true;
3187 if (Left.is(TT_BlockComment))
3188 // No whitespace in x(/*foo=*/1), except for JavaScript.
3189 return Style.isJavaScript() || !Left.TokenText.endswith("=*/");
3190
3191 // Space between template and attribute.
3192 // e.g. template <typename T> [[nodiscard]] ...
3193 if (Left.is(TT_TemplateCloser) && Right.is(TT_AttributeSquare))
3194 return true;
3195 // Space before parentheses common for all languages
3196 if (Right.is(tok::l_paren)) {
3197 if (Left.is(TT_TemplateCloser) && Right.isNot(TT_FunctionTypeLParen))
3198 return spaceRequiredBeforeParens(Right);
3199 if (Left.is(tok::kw_requires))
3200 return spaceRequiredBeforeParens(Right);
3201 if ((Left.is(tok::r_paren) && Left.is(TT_AttributeParen)) ||
3202 (Left.is(tok::r_square) && Left.is(TT_AttributeSquare)))
3203 return true;
3204 if (Left.is(TT_ForEachMacro))
3205 return (Style.SpaceBeforeParensOptions.AfterForeachMacros ||
3206 spaceRequiredBeforeParens(Right));
3207 if (Left.is(TT_IfMacro))
3208 return (Style.SpaceBeforeParensOptions.AfterIfMacros ||
3209 spaceRequiredBeforeParens(Right));
3210 if (Line.Type == LT_ObjCDecl)
3211 return true;
3212 if (Left.is(tok::semi))
3213 return true;
3214 if (Left.isOneOf(tok::pp_elif, tok::kw_for, tok::kw_while, tok::kw_switch,
3215 tok::kw_case, TT_ForEachMacro, TT_ObjCForIn))
3216 return Style.SpaceBeforeParensOptions.AfterControlStatements ||
3217 spaceRequiredBeforeParens(Right);
3218 if (Left.isIf(Line.Type != LT_PreprocessorDirective))
3219 return Style.SpaceBeforeParensOptions.AfterControlStatements ||
3220 spaceRequiredBeforeParens(Right);
3221
3222 // TODO add Operator overloading specific Options to
3223 // SpaceBeforeParensOptions
3224 if (Right.is(TT_OverloadedOperatorLParen))
3225 return spaceRequiredBeforeParens(Right);
3226 // Function declaration or definition
3227 if (Line.MightBeFunctionDecl && (Left.is(TT_FunctionDeclarationName))) {
3228 if (Line.mightBeFunctionDefinition())
3229 return Style.SpaceBeforeParensOptions.AfterFunctionDefinitionName ||
3230 spaceRequiredBeforeParens(Right);
3231 else
3232 return Style.SpaceBeforeParensOptions.AfterFunctionDeclarationName ||
3233 spaceRequiredBeforeParens(Right);
3234 }
3235 // Lambda
3236 if (Line.Type != LT_PreprocessorDirective && Left.is(tok::r_square) &&
3237 Left.MatchingParen && Left.MatchingParen->is(TT_LambdaLSquare))
3238 return Style.SpaceBeforeParensOptions.AfterFunctionDefinitionName ||
3239 spaceRequiredBeforeParens(Right);
3240 if (!Left.Previous || Left.Previous->isNot(tok::period)) {
3241 if (Left.isOneOf(tok::kw_try, Keywords.kw___except, tok::kw_catch))
3242 return Style.SpaceBeforeParensOptions.AfterControlStatements ||
3243 spaceRequiredBeforeParens(Right);
3244 if (Left.isOneOf(tok::kw_new, tok::kw_delete))
3245 return Style.SpaceBeforeParens != FormatStyle::SBPO_Never ||
3246 spaceRequiredBeforeParens(Right);
3247 }
3248 if (Line.Type != LT_PreprocessorDirective &&
3249 (Left.is(tok::identifier) || Left.isFunctionLikeKeyword() ||
3250 Left.is(tok::r_paren) || Left.isSimpleTypeSpecifier()))
3251 return spaceRequiredBeforeParens(Right);
3252 return false;
3253 }
3254 if (Left.is(tok::at) && Right.Tok.getObjCKeywordID() != tok::objc_not_keyword)
3255 return false;
3256 if (Right.is(TT_UnaryOperator))
3257 return !Left.isOneOf(tok::l_paren, tok::l_square, tok::at) &&
3258 (Left.isNot(tok::colon) || Left.isNot(TT_ObjCMethodExpr));
3259 if ((Left.isOneOf(tok::identifier, tok::greater, tok::r_square,
3260 tok::r_paren) ||
3261 Left.isSimpleTypeSpecifier()) &&
3262 Right.is(tok::l_brace) && Right.getNextNonComment() &&
3263 Right.isNot(BK_Block))
3264 return false;
3265 if (Left.is(tok::period) || Right.is(tok::period))
3266 return false;
3267 // u#str, U#str, L#str, u8#str
3268 // uR#str, UR#str, LR#str, u8R#str
3269 if (Right.is(tok::hash) && Left.is(tok::identifier) &&
3270 (Left.TokenText == "L" || Left.TokenText == "u" ||
3271 Left.TokenText == "U" || Left.TokenText == "u8" ||
3272 Left.TokenText == "LR" || Left.TokenText == "uR" ||
3273 Left.TokenText == "UR" || Left.TokenText == "u8R"))
3274 return false;
3275 if (Left.is(TT_TemplateCloser) && Left.MatchingParen &&
3276 Left.MatchingParen->Previous &&
3277 (Left.MatchingParen->Previous->is(tok::period) ||
3278 Left.MatchingParen->Previous->is(tok::coloncolon)))
3279 // Java call to generic function with explicit type:
3280 // A.<B<C<...>>>DoSomething();
3281 // A::<B<C<...>>>DoSomething(); // With a Java 8 method reference.
3282 return false;
3283 if (Left.is(TT_TemplateCloser) && Right.is(tok::l_square))
3284 return false;
3285 if (Left.is(tok::l_brace) && Left.endsSequence(TT_DictLiteral, tok::at))
3286 // Objective-C dictionary literal -> no space after opening brace.
3287 return false;
3288 if (Right.is(tok::r_brace) && Right.MatchingParen &&
3289 Right.MatchingParen->endsSequence(TT_DictLiteral, tok::at))
3290 // Objective-C dictionary literal -> no space before closing brace.
3291 return false;
3292 if (Right.getType() == TT_TrailingAnnotation &&
3293 Right.isOneOf(tok::amp, tok::ampamp) &&
3294 Left.isOneOf(tok::kw_const, tok::kw_volatile) &&
3295 (!Right.Next || Right.Next->is(tok::semi)))
3296 // Match const and volatile ref-qualifiers without any additional
3297 // qualifiers such as
3298 // void Fn() const &;
3299 return getTokenReferenceAlignment(Right) != FormatStyle::PAS_Left;
3300
3301 return true;
3302}
3303
3304bool TokenAnnotator::spaceRequiredBefore(const AnnotatedLine &Line,
3305 const FormatToken &Right) {
3306 const FormatToken &Left = *Right.Previous;
3307 auto HasExistingWhitespace = [&Right]() {
3308 return Right.WhitespaceRange.getBegin() != Right.WhitespaceRange.getEnd();
3309 };
3310
3311 // If the token is finalized don't touch it (as it could be in a
3312 // clang-format-off section).
3313 if (Left.Finalized)
3314 return HasExistingWhitespace();
3315
3316 if (Right.Tok.getIdentifierInfo() && Left.Tok.getIdentifierInfo())
3317 return true; // Never ever merge two identifiers.
3318
3319 // Leave a space between * and /* to avoid C4138 `comment end` found outside
3320 // of comment.
3321 if (Left.is(tok::star) && Right.is(tok::comment))
3322 return true;
3323
3324 if (Style.isCpp()) {
3325 // Space between import <iostream>.
3326 // or import .....;
3327 if (Left.is(Keywords.kw_import) && Right.isOneOf(tok::less, tok::ellipsis))
3328 return true;
3329 // No space between module :.
3330 if (Left.isOneOf(Keywords.kw_module, Keywords.kw_import) &&
3331 Right.is(TT_ModulePartitionColon))
3332 return true;
3333 // No space between import foo:bar but keep a space between import :bar;
3334 if (Left.is(tok::identifier) && Right.is(TT_ModulePartitionColon))
3335 return false;
3336 // No space between :bar;
3337 if (Left.is(TT_ModulePartitionColon) &&
3338 Right.isOneOf(tok::identifier, tok::kw_private))
3339 return false;
3340 if (Left.is(tok::ellipsis) && Right.is(tok::identifier) &&
3341 Line.First->is(Keywords.kw_import))
3342 return false;
3343
3344 if (Left.is(tok::kw_operator))
3345 return Right.is(tok::coloncolon);
3346 if (Right.is(tok::l_brace) && Right.is(BK_BracedInit) &&
3347 !Left.opensScope() && Style.SpaceBeforeCpp11BracedList)
3348 return true;
3349 } else if (Style.Language == FormatStyle::LK_Proto ||
3350 Style.Language == FormatStyle::LK_TextProto) {
3351 if (Right.is(tok::period) &&
3352 Left.isOneOf(Keywords.kw_optional, Keywords.kw_required,
3353 Keywords.kw_repeated, Keywords.kw_extend))
3354 return true;
3355 if (Right.is(tok::l_paren) &&
3356 Left.isOneOf(Keywords.kw_returns, Keywords.kw_option))
3357 return true;
3358 if (Right.isOneOf(tok::l_brace, tok::less) && Left.is(TT_SelectorName))
3359 return true;
3360 // Slashes occur in text protocol extension syntax: [type/type] { ... }.
3361 if (Left.is(tok::slash) || Right.is(tok::slash))
3362 return false;
3363 if (Left.MatchingParen &&
3364 Left.MatchingParen->is(TT_ProtoExtensionLSquare) &&
3365 Right.isOneOf(tok::l_brace, tok::less))
3366 return !Style.Cpp11BracedListStyle;
3367 // A percent is probably part of a formatting specification, such as %lld.
3368 if (Left.is(tok::percent))
3369 return false;
3370 // Preserve the existence of a space before a percent for cases like 0x%04x
3371 // and "%d %d"
3372 if (Left.is(tok::numeric_constant) && Right.is(tok::percent))
3373 return HasExistingWhitespace();
3374 } else if (Style.isJson()) {
3375 if (Right.is(tok::colon))
3376 return false;
3377 } else if (Style.isCSharp()) {
3378 // Require spaces around '{' and before '}' unless they appear in
3379 // interpolated strings. Interpolated strings are merged into a single token
3380 // so cannot have spaces inserted by this function.
3381
3382 // No space between 'this' and '['
3383 if (Left.is(tok::kw_this) && Right.is(tok::l_square))
3384 return false;
3385
3386 // No space between 'new' and '('
3387 if (Left.is(tok::kw_new) && Right.is(tok::l_paren))
3388 return false;
3389
3390 // Space before { (including space within '{ {').
3391 if (Right.is(tok::l_brace))
3392 return true;
3393
3394 // Spaces inside braces.
3395 if (Left.is(tok::l_brace) && Right.isNot(tok::r_brace))
3396 return true;
3397
3398 if (Left.isNot(tok::l_brace) && Right.is(tok::r_brace))
3399 return true;
3400
3401 // Spaces around '=>'.
3402 if (Left.is(TT_FatArrow) || Right.is(TT_FatArrow))
3403 return true;
3404
3405 // No spaces around attribute target colons
3406 if (Left.is(TT_AttributeColon) || Right.is(TT_AttributeColon))
3407 return false;
3408
3409 // space between type and variable e.g. Dictionary<string,string> foo;
3410 if (Left.is(TT_TemplateCloser) && Right.is(TT_StartOfName))
3411 return true;
3412
3413 // spaces inside square brackets.
3414 if (Left.is(tok::l_square) || Right.is(tok::r_square))
3415 return Style.SpacesInSquareBrackets;
3416
3417 // No space before ? in nullable types.
3418 if (Right.is(TT_CSharpNullable))
3419 return false;
3420
3421 // No space before null forgiving '!'.
3422 if (Right.is(TT_NonNullAssertion))
3423 return false;
3424
3425 // No space between consecutive commas '[,,]'.
3426 if (Left.is(tok::comma) && Right.is(tok::comma))
3427 return false;
3428
3429 // space after var in `var (key, value)`
3430 if (Left.is(Keywords.kw_var) && Right.is(tok::l_paren))
3431 return true;
3432
3433 // space between keywords and paren e.g. "using ("
3434 if (Right.is(tok::l_paren))
3435 if (Left.isOneOf(tok::kw_using, Keywords.kw_async, Keywords.kw_when,
3436 Keywords.kw_lock))
3437 return Style.SpaceBeforeParensOptions.AfterControlStatements ||
3438 spaceRequiredBeforeParens(Right);
3439
3440 // space between method modifier and opening parenthesis of a tuple return
3441 // type
3442 if (Left.isOneOf(tok::kw_public, tok::kw_private, tok::kw_protected,
3443 tok::kw_virtual, tok::kw_extern, tok::kw_static,
3444 Keywords.kw_internal, Keywords.kw_abstract,
3445 Keywords.kw_sealed, Keywords.kw_override,
3446 Keywords.kw_async, Keywords.kw_unsafe) &&
3447 Right.is(tok::l_paren))
3448 return true;
3449 } else if (Style.isJavaScript()) {
3450 if (Left.is(TT_FatArrow))
3451 return true;
3452 // for await ( ...
3453 if (Right.is(tok::l_paren) && Left.is(Keywords.kw_await) && Left.Previous &&
3454 Left.Previous->is(tok::kw_for))
3455 return true;
3456 if (Left.is(Keywords.kw_async) && Right.is(tok::l_paren) &&
3457 Right.MatchingParen) {
3458 const FormatToken *Next = Right.MatchingParen->getNextNonComment();
3459 // An async arrow function, for example: `x = async () => foo();`,
3460 // as opposed to calling a function called async: `x = async();`
3461 if (Next && Next->is(TT_FatArrow))
3462 return true;
3463 }
3464 if ((Left.is(TT_TemplateString) && Left.TokenText.endswith("${")) ||
3465 (Right.is(TT_TemplateString) && Right.TokenText.startswith("}")))
3466 return false;
3467 // In tagged template literals ("html`bar baz`"), there is no space between
3468 // the tag identifier and the template string.
3469 if (Keywords.IsJavaScriptIdentifier(Left,
3470 /* AcceptIdentifierName= */ false) &&
3471 Right.is(TT_TemplateString))
3472 return false;
3473 if (Right.is(tok::star) &&
3474 Left.isOneOf(Keywords.kw_function, Keywords.kw_yield))
3475 return false;
3476 if (Right.isOneOf(tok::l_brace, tok::l_square) &&
3477 Left.isOneOf(Keywords.kw_function, Keywords.kw_yield,
3478 Keywords.kw_extends, Keywords.kw_implements))
3479 return true;
3480 if (Right.is(tok::l_paren)) {
3481 // JS methods can use some keywords as names (e.g. `delete()`).
3482 if (Line.MustBeDeclaration && Left.Tok.getIdentifierInfo())
3483 return false;
3484 // Valid JS method names can include keywords, e.g. `foo.delete()` or
3485 // `bar.instanceof()`. Recognize call positions by preceding period.
3486 if (Left.Previous && Left.Previous->is(tok::period) &&
3487 Left.Tok.getIdentifierInfo())
3488 return false;
3489 // Additional unary JavaScript operators that need a space after.
3490 if (Left.isOneOf(tok::kw_throw, Keywords.kw_await, Keywords.kw_typeof,
3491 tok::kw_void))
3492 return true;
3493 }
3494 // `foo as const;` casts into a const type.
3495 if (Left.endsSequence(tok::kw_const, Keywords.kw_as)) {
3496 return false;
3497 }
3498 if ((Left.isOneOf(Keywords.kw_let, Keywords.kw_var, Keywords.kw_in,
3499 tok::kw_const) ||
3500 // "of" is only a keyword if it appears after another identifier
3501 // (e.g. as "const x of y" in a for loop), or after a destructuring
3502 // operation (const [x, y] of z, const {a, b} of c).
3503 (Left.is(Keywords.kw_of) && Left.Previous &&
3504 (Left.Previous->Tok.is(tok::identifier) ||
3505 Left.Previous->isOneOf(tok::r_square, tok::r_brace)))) &&
3506 (!Left.Previous || !Left.Previous->is(tok::period)))
3507 return true;
3508 if (Left.isOneOf(tok::kw_for, Keywords.kw_as) && Left.Previous &&
3509 Left.Previous->is(tok::period) && Right.is(tok::l_paren))
3510 return false;
3511 if (Left.is(Keywords.kw_as) &&
3512 Right.isOneOf(tok::l_square, tok::l_brace, tok::l_paren))
3513 return true;
3514 if (Left.is(tok::kw_default) && Left.Previous &&
3515 Left.Previous->is(tok::kw_export))
3516 return true;
3517 if (Left.is(Keywords.kw_is) && Right.is(tok::l_brace))
3518 return true;
3519 if (Right.isOneOf(TT_JsTypeColon, TT_JsTypeOptionalQuestion))
3520 return false;
3521 if (Left.is(TT_JsTypeOperator) || Right.is(TT_JsTypeOperator))
3522 return false;
3523 if ((Left.is(tok::l_brace) || Right.is(tok::r_brace)) &&
3524 Line.First->isOneOf(Keywords.kw_import, tok::kw_export))
3525 return false;
3526 if (Left.is(tok::ellipsis))
3527 return false;
3528 if (Left.is(TT_TemplateCloser) &&
3529 !Right.isOneOf(tok::equal, tok::l_brace, tok::comma, tok::l_square,
3530 Keywords.kw_implements, Keywords.kw_extends))
3531 // Type assertions ('<type>expr') are not followed by whitespace. Other
3532 // locations that should have whitespace following are identified by the
3533 // above set of follower tokens.
3534 return false;
3535 if (Right.is(TT_NonNullAssertion))
3536 return false;
3537 if (Left.is(TT_NonNullAssertion) &&
3538 Right.isOneOf(Keywords.kw_as, Keywords.kw_in))
3539 return true; // "x! as string", "x! in y"
3540 } else if (Style.Language == FormatStyle::LK_Java) {
3541 if (Left.is(tok::r_square) && Right.is(tok::l_brace))
3542 return true;
3543 if (Left.is(Keywords.kw_synchronized) && Right.is(tok::l_paren))
3544 return Style.SpaceBeforeParensOptions.AfterControlStatements ||
3545 spaceRequiredBeforeParens(Right);
3546 if ((Left.isOneOf(tok::kw_static, tok::kw_public, tok::kw_private,
3547 tok::kw_protected) ||
3548 Left.isOneOf(Keywords.kw_final, Keywords.kw_abstract,
3549 Keywords.kw_native)) &&
3550 Right.is(TT_TemplateOpener))
3551 return true;
3552 }
3553 if (Left.is(TT_ImplicitStringLiteral))
3554 return HasExistingWhitespace();
3555 if (Line.Type == LT_ObjCMethodDecl) {
3556 if (Left.is(TT_ObjCMethodSpecifier))
3557 return true;
3558 if (Left.is(tok::r_paren) && canBeObjCSelectorComponent(Right))
3559 // Don't space between ')' and <id> or ')' and 'new'. 'new' is not a
3560 // keyword in Objective-C, and '+ (instancetype)new;' is a standard class
3561 // method declaration.
3562 return false;
3563 }
3564 if (Line.Type == LT_ObjCProperty &&
3565 (Right.is(tok::equal) || Left.is(tok::equal)))
3566 return false;
3567
3568 if (Right.isOneOf(TT_TrailingReturnArrow, TT_LambdaArrow) ||
3569 Left.isOneOf(TT_TrailingReturnArrow, TT_LambdaArrow))
3570 return true;
3571 if (Left.is(tok::comma) && !Right.is(TT_OverloadedOperatorLParen))
3572 return true;
3573 if (Right.is(tok::comma))
3574 return false;
3575 if (Right.is(TT_ObjCBlockLParen))
3576 return true;
3577 if (Right.is(TT_CtorInitializerColon))
3578 return Style.SpaceBeforeCtorInitializerColon;
3579 if (Right.is(TT_InheritanceColon) && !Style.SpaceBeforeInheritanceColon)
3580 return false;
3581 if (Right.is(TT_RangeBasedForLoopColon) &&
3582 !Style.SpaceBeforeRangeBasedForLoopColon)
3583 return false;
3584 if (Left.is(TT_BitFieldColon))
3585 return Style.BitFieldColonSpacing == FormatStyle::BFCS_Both ||
3586 Style.BitFieldColonSpacing == FormatStyle::BFCS_After;
3587 if (Right.is(tok::colon)) {
3588 if (Line.First->isOneOf(tok::kw_default, tok::kw_case))
3589 return Style.SpaceBeforeCaseColon;
3590 if (!Right.getNextNonComment() || Right.getNextNonComment()->is(tok::semi))
3591 return false;
3592 if (Right.is(TT_ObjCMethodExpr))
3593 return false;
3594 if (Left.is(tok::question))
3595 return false;
3596 if (Right.is(TT_InlineASMColon) && Left.is(tok::coloncolon))
3597 return false;
3598 if (Right.is(TT_DictLiteral))
3599 return Style.SpacesInContainerLiterals;
3600 if (Right.is(TT_AttributeColon))
3601 return false;
3602 if (Right.is(TT_CSharpNamedArgumentColon))
3603 return false;
3604 if (Right.is(TT_BitFieldColon))
3605 return Style.BitFieldColonSpacing == FormatStyle::BFCS_Both ||
3606 Style.BitFieldColonSpacing == FormatStyle::BFCS_Before;
3607 return true;
3608 }
3609 // Do not merge "- -" into "--".
3610 if ((Left.isOneOf(tok::minus, tok::minusminus) &&
3611 Right.isOneOf(tok::minus, tok::minusminus)) ||
3612 (Left.isOneOf(tok::plus, tok::plusplus) &&
3613 Right.isOneOf(tok::plus, tok::plusplus)))
3614 return true;
3615 if (Left.is(TT_UnaryOperator)) {
3616 if (!Right.is(tok::l_paren)) {
3617 // The alternative operators for ~ and ! are "compl" and "not".
3618 // If they are used instead, we do not want to combine them with
3619 // the token to the right, unless that is a left paren.
3620 if (Left.is(tok::exclaim) && Left.TokenText == "not")
3621 return true;
3622 if (Left.is(tok::tilde) && Left.TokenText == "compl")
3623 return true;
3624 // Lambda captures allow for a lone &, so "&]" needs to be properly
3625 // handled.
3626 if (Left.is(tok::amp) && Right.is(tok::r_square))
3627 return Style.SpacesInSquareBrackets;
3628 }
3629 return (Style.SpaceAfterLogicalNot && Left.is(tok::exclaim)) ||
3630 Right.is(TT_BinaryOperator);
3631 }
3632
3633 // If the next token is a binary operator or a selector name, we have
3634 // incorrectly classified the parenthesis as a cast. FIXME: Detect correctly.
3635 if (Left.is(TT_CastRParen))
3636 return Style.SpaceAfterCStyleCast ||
3637 Right.isOneOf(TT_BinaryOperator, TT_SelectorName);
3638
3639 auto ShouldAddSpacesInAngles = [this, &HasExistingWhitespace]() {
3640 if (this->Style.SpacesInAngles == FormatStyle::SIAS_Always)
3641 return true;
3642 if (this->Style.SpacesInAngles == FormatStyle::SIAS_Leave)
3643 return HasExistingWhitespace();
3644 return false;
3645 };
3646
3647 if (Left.is(tok::greater) && Right.is(tok::greater)) {
3648 if (Style.Language == FormatStyle::LK_TextProto ||
3649 (Style.Language == FormatStyle::LK_Proto && Left.is(TT_DictLiteral)))
3650 return !Style.Cpp11BracedListStyle;
3651 return Right.is(TT_TemplateCloser) && Left.is(TT_TemplateCloser) &&
3652 ((Style.Standard < FormatStyle::LS_Cpp11) ||
3653 ShouldAddSpacesInAngles());
3654 }
3655 if (Right.isOneOf(tok::arrow, tok::arrowstar, tok::periodstar) ||
3656 Left.isOneOf(tok::arrow, tok::period, tok::arrowstar, tok::periodstar) ||
3657 (Right.is(tok::period) && Right.isNot(TT_DesignatedInitializerPeriod)))
3658 return false;
3659 if (!Style.SpaceBeforeAssignmentOperators && Left.isNot(TT_TemplateCloser) &&
3660 Right.getPrecedence() == prec::Assignment)
3661 return false;
3662 if (Style.Language == FormatStyle::LK_Java && Right.is(tok::coloncolon) &&
3663 (Left.is(tok::identifier) || Left.is(tok::kw_this)))
3664 return false;
3665 if (Right.is(tok::coloncolon) && Left.is(tok::identifier))
3666 // Generally don't remove existing spaces between an identifier and "::".
3667 // The identifier might actually be a macro name such as ALWAYS_INLINE. If
3668 // this turns out to be too lenient, add analysis of the identifier itself.
3669 return HasExistingWhitespace();
3670 if (Right.is(tok::coloncolon) &&
3671 !Left.isOneOf(tok::l_brace, tok::comment, tok::l_paren))
3672 // Put a space between < and :: in vector< ::std::string >
3673 return (Left.is(TT_TemplateOpener) &&
3674 ((Style.Standard < FormatStyle::LS_Cpp11) ||
3675 ShouldAddSpacesInAngles())) ||
3676 !(Left.isOneOf(tok::l_paren, tok::r_paren, tok::l_square,
3677 tok::kw___super, TT_TemplateOpener,
3678 TT_TemplateCloser)) ||
3679 (Left.is(tok::l_paren) && Style.SpacesInParentheses);
3680 if ((Left.is(TT_TemplateOpener)) != (Right.is(TT_TemplateCloser)))
3681 return ShouldAddSpacesInAngles();
3682 // Space before TT_StructuredBindingLSquare.
3683 if (Right.is(TT_StructuredBindingLSquare))
3684 return !Left.isOneOf(tok::amp, tok::ampamp) ||
3685 getTokenReferenceAlignment(Left) != FormatStyle::PAS_Right;
3686 // Space before & or && following a TT_StructuredBindingLSquare.
3687 if (Right.Next && Right.Next->is(TT_StructuredBindingLSquare) &&
3688 Right.isOneOf(tok::amp, tok::ampamp))
3689 return getTokenReferenceAlignment(Right) != FormatStyle::PAS_Left;
3690 if ((Right.is(TT_BinaryOperator) && !Left.is(tok::l_paren)) ||
3691 (Left.isOneOf(TT_BinaryOperator, TT_ConditionalExpr) &&
3692 !Right.is(tok::r_paren)))
3693 return true;
3694 if (Right.is(TT_TemplateOpener) && Left.is(tok::r_paren) &&
3695 Left.MatchingParen && Left.MatchingParen->is(TT_OverloadedOperatorLParen))
3696 return false;
3697 if (Right.is(tok::less) && Left.isNot(tok::l_paren) &&
3698 Line.startsWith(tok::hash))
3699 return true;
3700 if (Right.is(TT_TrailingUnaryOperator))
3701 return false;
3702 if (Left.is(TT_RegexLiteral))
3703 return false;
3704 return spaceRequiredBetween(Line, Left, Right);
3705}
3706
3707// Returns 'true' if 'Tok' is a brace we'd want to break before in Allman style.
3708static bool isAllmanBrace(const FormatToken &Tok) {
3709 return Tok.is(tok::l_brace) && Tok.is(BK_Block) &&
3710 !Tok.isOneOf(TT_ObjCBlockLBrace, TT_LambdaLBrace, TT_DictLiteral);
3711}
3712
3713// Returns 'true' if 'Tok' is a function argument.
3714static bool IsFunctionArgument(const FormatToken &Tok) {
3715 return Tok.MatchingParen && Tok.MatchingParen->Next &&
3716 Tok.MatchingParen->Next->isOneOf(tok::comma, tok::r_paren);
3717}
3718
3719static bool
3720isItAnEmptyLambdaAllowed(const FormatToken &Tok,
3721 FormatStyle::ShortLambdaStyle ShortLambdaOption) {
3722 return Tok.Children.empty() && ShortLambdaOption != FormatStyle::SLS_None;
3723}
3724
3725static bool isAllmanLambdaBrace(const FormatToken &Tok) {
3726 return (Tok.is(tok::l_brace) && Tok.is(BK_Block) &&
3727 !Tok.isOneOf(TT_ObjCBlockLBrace, TT_DictLiteral));
3728}
3729
3730// Returns the first token on the line that is not a comment.
3731static const FormatToken *getFirstNonComment(const AnnotatedLine &Line) {
3732 const FormatToken *Next = Line.First;
3733 if (!Next)
3734 return Next;
3735 if (Next->is(tok::comment))
3736 Next = Next->getNextNonComment();
3737 return Next;
3738}
3739
3740bool TokenAnnotator::mustBreakBefore(const AnnotatedLine &Line,
3741 const FormatToken &Right) {
3742 const FormatToken &Left = *Right.Previous;
3743 if (Right.NewlinesBefore > 1 && Style.MaxEmptyLinesToKeep > 0)
3744 return true;
3745
3746 if (Style.isCSharp()) {
3747 if (Left.is(TT_FatArrow) && Right.is(tok::l_brace) &&
3748 Style.BraceWrapping.AfterFunction)
3749 return true;
3750 if (Right.is(TT_CSharpNamedArgumentColon) ||
3751 Left.is(TT_CSharpNamedArgumentColon))
3752 return false;
3753 if (Right.is(TT_CSharpGenericTypeConstraint))
3754 return true;
3755 if (Right.Next && Right.Next->is(TT_FatArrow) &&
3756 (Right.is(tok::numeric_constant) ||
3757 (Right.is(tok::identifier) && Right.TokenText == "_")))
3758 return true;
3759
3760 // Break after C# [...] and before public/protected/private/internal.
3761 if (Left.is(TT_AttributeSquare) && Left.is(tok::r_square) &&
3762 (Right.isAccessSpecifier(/*ColonRequired=*/false) ||
3763 Right.is(Keywords.kw_internal)))
3764 return true;
3765 // Break between ] and [ but only when there are really 2 attributes.
3766 if (Left.is(TT_AttributeSquare) && Right.is(TT_AttributeSquare) &&
3767 Left.is(tok::r_square) && Right.is(tok::l_square))
3768 return true;
3769
3770 } else if (Style.isJavaScript()) {
3771 // FIXME: This might apply to other languages and token kinds.
3772 if (Right.is(tok::string_literal) && Left.is(tok::plus) && Left.Previous &&
3773 Left.Previous->is(tok::string_literal))
3774 return true;
3775 if (Left.is(TT_DictLiteral) && Left.is(tok::l_brace) && Line.Level == 0 &&
3776 Left.Previous && Left.Previous->is(tok::equal) &&
3777 Line.First->isOneOf(tok::identifier, Keywords.kw_import, tok::kw_export,
3778 tok::kw_const) &&
3779 // kw_var/kw_let are pseudo-tokens that are tok::identifier, so match
3780 // above.
3781 !Line.First->isOneOf(Keywords.kw_var, Keywords.kw_let))
3782 // Object literals on the top level of a file are treated as "enum-style".
3783 // Each key/value pair is put on a separate line, instead of bin-packing.
3784 return true;
3785 if (Left.is(tok::l_brace) && Line.Level == 0 &&
3786 (Line.startsWith(tok::kw_enum) ||
3787 Line.startsWith(tok::kw_const, tok::kw_enum) ||
3788 Line.startsWith(tok::kw_export, tok::kw_enum) ||
3789 Line.startsWith(tok::kw_export, tok::kw_const, tok::kw_enum)))
3790 // JavaScript top-level enum key/value pairs are put on separate lines
3791 // instead of bin-packing.
3792 return true;
3793 if (Right.is(tok::r_brace) && Left.is(tok::l_brace) && Left.Previous &&
3794 Left.Previous->is(TT_FatArrow)) {
3795 // JS arrow function (=> {...}).
3796 switch (Style.AllowShortLambdasOnASingleLine) {
3797 case FormatStyle::SLS_All:
3798 return false;
3799 case FormatStyle::SLS_None:
3800 return true;
3801 case FormatStyle::SLS_Empty:
3802 return !Left.Children.empty();
3803 case FormatStyle::SLS_Inline:
3804 // allow one-lining inline (e.g. in function call args) and empty arrow
3805 // functions.
3806 return (Left.NestingLevel == 0 && Line.Level == 0) &&
3807 !Left.Children.empty();
3808 }
3809 llvm_unreachable("Unknown FormatStyle::ShortLambdaStyle enum")::llvm::llvm_unreachable_internal("Unknown FormatStyle::ShortLambdaStyle enum"
, "clang/lib/Format/TokenAnnotator.cpp", 3809)
;
3810 }
3811
3812 if (Right.is(tok::r_brace) && Left.is(tok::l_brace) &&
3813 !Left.Children.empty())
3814 // Support AllowShortFunctionsOnASingleLine for JavaScript.
3815 return Style.AllowShortFunctionsOnASingleLine == FormatStyle::SFS_None ||
3816 Style.AllowShortFunctionsOnASingleLine == FormatStyle::SFS_Empty ||
3817 (Left.NestingLevel == 0 && Line.Level == 0 &&
3818 Style.AllowShortFunctionsOnASingleLine &
3819 FormatStyle::SFS_InlineOnly);
3820 } else if (Style.Language == FormatStyle::LK_Java) {
3821 if (Right.is(tok::plus) && Left.is(tok::string_literal) && Right.Next &&
3822 Right.Next->is(tok::string_literal))
3823 return true;
3824 } else if (Style.Language == FormatStyle::LK_Cpp ||
3825 Style.Language == FormatStyle::LK_ObjC ||
3826 Style.Language == FormatStyle::LK_Proto ||
3827 Style.Language == FormatStyle::LK_TableGen ||
3828 Style.Language == FormatStyle::LK_TextProto) {
3829 if (Left.isStringLiteral() && Right.isStringLiteral())
3830 return true;
3831 }
3832
3833 // Basic JSON newline processing.
3834 if (Style.isJson()) {
3835 // Always break after a JSON record opener.
3836 // {
3837 // }
3838 if (Left.is(TT_DictLiteral) && Left.is(tok::l_brace))
3839 return true;
3840 // Always break after a JSON array opener.
3841 // [
3842 // ]
3843 if (Left.is(TT_ArrayInitializerLSquare) && Left.is(tok::l_square) &&
3844 !Right.is(tok::r_square))
3845 return true;
3846 // Always break after successive entries.
3847 // 1,
3848 // 2
3849 if (Left.is(tok::comma))
3850 return true;
3851 }
3852
3853 // If the last token before a '}', ']', or ')' is a comma or a trailing
3854 // comment, the intention is to insert a line break after it in order to make
3855 // shuffling around entries easier. Import statements, especially in
3856 // JavaScript, can be an exception to this rule.
3857 if (Style.JavaScriptWrapImports || Line.Type != LT_ImportStatement) {
3858 const FormatToken *BeforeClosingBrace = nullptr;
3859 if ((Left.isOneOf(tok::l_brace, TT_ArrayInitializerLSquare) ||
3860 (Style.isJavaScript() && Left.is(tok::l_paren))) &&
3861 Left.isNot(BK_Block) && Left.MatchingParen)
3862 BeforeClosingBrace = Left.MatchingParen->Previous;
3863 else if (Right.MatchingParen &&
3864 (Right.MatchingParen->isOneOf(tok::l_brace,
3865 TT_ArrayInitializerLSquare) ||
3866 (Style.isJavaScript() && Right.MatchingParen->is(tok::l_paren))))
3867 BeforeClosingBrace = &Left;
3868 if (BeforeClosingBrace && (BeforeClosingBrace->is(tok::comma) ||
3869 BeforeClosingBrace->isTrailingComment()))
3870 return true;
3871 }
3872
3873 if (Right.is(tok::comment))
3874 return Left.isNot(BK_BracedInit) && Left.isNot(TT_CtorInitializerColon) &&
3875 (Right.NewlinesBefore > 0 && Right.HasUnescapedNewline);
3876 if (Left.isTrailingComment())
3877 return true;
3878 if (Left.IsUnterminatedLiteral)
3879 return true;
3880 if (Right.is(tok::lessless) && Right.Next && Left.is(tok::string_literal) &&
3881 Right.Next->is(tok::string_literal))
3882 return true;
3883 // Can break after template<> declaration
3884 if (Left.ClosesTemplateDeclaration && Left.MatchingParen &&
3885 Left.MatchingParen->NestingLevel == 0) {
3886 // Put concepts on the next line e.g.
3887 // template<typename T>
3888 // concept ...
3889 if (Right.is(tok::kw_concept))
3890 return Style.BreakBeforeConceptDeclarations;
3891 return (Style.AlwaysBreakTemplateDeclarations == FormatStyle::BTDS_Yes);
3892 }
3893 if (Style.PackConstructorInitializers == FormatStyle::PCIS_Never) {
3894 if (Style.BreakConstructorInitializers == FormatStyle::BCIS_BeforeColon &&
3895 (Left.is(TT_CtorInitializerComma) || Right.is(TT_CtorInitializerColon)))
3896 return true;
3897
3898 if (Style.BreakConstructorInitializers == FormatStyle::BCIS_AfterColon &&
3899 Left.isOneOf(TT_CtorInitializerColon, TT_CtorInitializerComma))
3900 return true;
3901 }
3902 if (Style.PackConstructorInitializers < FormatStyle::PCIS_CurrentLine &&
3903 Style.BreakConstructorInitializers == FormatStyle::BCIS_BeforeComma &&
3904 Right.isOneOf(TT_CtorInitializerComma, TT_CtorInitializerColon))
3905 return true;
3906 // Break only if we have multiple inheritance.
3907 if (Style.BreakInheritanceList == FormatStyle::BILS_BeforeComma &&
3908 Right.is(TT_InheritanceComma))
3909 return true;
3910 if (Style.BreakInheritanceList == FormatStyle::BILS_AfterComma &&
3911 Left.is(TT_InheritanceComma))
3912 return true;
3913 if (Right.is(tok::string_literal) && Right.TokenText.startswith("R\""))
3914 // Multiline raw string literals are special wrt. line breaks. The author
3915 // has made a deliberate choice and might have aligned the contents of the
3916 // string literal accordingly. Thus, we try keep existing line breaks.
3917 return Right.IsMultiline && Right.NewlinesBefore > 0;
3918 if ((Left.is(tok::l_brace) || (Left.is(tok::less) && Left.Previous &&
3919 Left.Previous->is(tok::equal))) &&
3920 Right.NestingLevel == 1 && Style.Language == FormatStyle::LK_Proto) {
3921 // Don't put enums or option definitions onto single lines in protocol
3922 // buffers.
3923 return true;
3924 }
3925 if (Right.is(TT_InlineASMBrace))
3926 return Right.HasUnescapedNewline;
3927
3928 if (isAllmanBrace(Left) || isAllmanBrace(Right)) {
3929 auto FirstNonComment = getFirstNonComment(Line);
3930 bool AccessSpecifier =
3931 FirstNonComment &&
3932 FirstNonComment->isOneOf(Keywords.kw_internal, tok::kw_public,
3933 tok::kw_private, tok::kw_protected);
3934
3935 if (Style.BraceWrapping.AfterEnum) {
3936 if (Line.startsWith(tok::kw_enum) ||
3937 Line.startsWith(tok::kw_typedef, tok::kw_enum))
3938 return true;
3939 // Ensure BraceWrapping for `public enum A {`.
3940 if (AccessSpecifier && FirstNonComment->Next &&
3941 FirstNonComment->Next->is(tok::kw_enum))
3942 return true;
3943 }
3944
3945 // Ensure BraceWrapping for `public interface A {`.
3946 if (Style.BraceWrapping.AfterClass &&
3947 ((AccessSpecifier && FirstNonComment->Next &&
3948 FirstNonComment->Next->is(Keywords.kw_interface)) ||
3949 Line.startsWith(Keywords.kw_interface)))
3950 return true;
3951
3952 return (Line.startsWith(tok::kw_class) && Style.BraceWrapping.AfterClass) ||
3953 (Line.startsWith(tok::kw_struct) && Style.BraceWrapping.AfterStruct);
3954 }
3955
3956 if (Left.is(TT_ObjCBlockLBrace) &&
3957 Style.AllowShortBlocksOnASingleLine == FormatStyle::SBS_Never)
3958 return true;
3959
3960 // Ensure wrapping after __attribute__((XX)) and @interface etc.
3961 if (Left.is(TT_AttributeParen) && Right.is(TT_ObjCDecl))
3962 return true;
3963
3964 if (Left.is(TT_LambdaLBrace)) {
3965 if (IsFunctionArgument(Left) &&
3966 Style.AllowShortLambdasOnASingleLine == FormatStyle::SLS_Inline)
3967 return false;
3968
3969 if (Style.AllowShortLambdasOnASingleLine == FormatStyle::SLS_None ||
3970 Style.AllowShortLambdasOnASingleLine == FormatStyle::SLS_Inline ||
3971 (!Left.Children.empty() &&
3972 Style.AllowShortLambdasOnASingleLine == FormatStyle::SLS_Empty))
3973 return true;
3974 }
3975
3976 if (Style.BraceWrapping.BeforeLambdaBody && Right.is(TT_LambdaLBrace) &&
3977 Left.isOneOf(tok::star, tok::amp, tok::ampamp, TT_TemplateCloser)) {
3978 return true;
3979 }
3980
3981 // Put multiple Java annotation on a new line.
3982 if ((Style.Language == FormatStyle::LK_Java || Style.isJavaScript()) &&
3983 Left.is(TT_LeadingJavaAnnotation) &&
3984 Right.isNot(TT_LeadingJavaAnnotation) && Right.isNot(tok::l_paren) &&
3985 (Line.Last->is(tok::l_brace) || Style.BreakAfterJavaFieldAnnotations))
3986 return true;
3987
3988 if (Right.is(TT_ProtoExtensionLSquare))
3989 return true;
3990
3991 // In text proto instances if a submessage contains at least 2 entries and at
3992 // least one of them is a submessage, like A { ... B { ... } ... },
3993 // put all of the entries of A on separate lines by forcing the selector of
3994 // the submessage B to be put on a newline.
3995 //
3996 // Example: these can stay on one line:
3997 // a { scalar_1: 1 scalar_2: 2 }
3998 // a { b { key: value } }
3999 //
4000 // and these entries need to be on a new line even if putting them all in one
4001 // line is under the column limit:
4002 // a {
4003 // scalar: 1
4004 // b { key: value }
4005 // }
4006 //
4007 // We enforce this by breaking before a submessage field that has previous
4008 // siblings, *and* breaking before a field that follows a submessage field.
4009 //
4010 // Be careful to exclude the case [proto.ext] { ... } since the `]` is
4011 // the TT_SelectorName there, but we don't want to break inside the brackets.
4012 //
4013 // Another edge case is @submessage { key: value }, which is a common
4014 // substitution placeholder. In this case we want to keep `@` and `submessage`
4015 // together.
4016 //
4017 // We ensure elsewhere that extensions are always on their own line.
4018 if ((Style.Language == FormatStyle::LK_Proto ||
4019 Style.Language == FormatStyle::LK_TextProto) &&
4020 Right.is(TT_SelectorName) && !Right.is(tok::r_square) && Right.Next) {
4021 // Keep `@submessage` together in:
4022 // @submessage { key: value }
4023 if (Left.is(tok::at))
4024 return false;
4025 // Look for the scope opener after selector in cases like:
4026 // selector { ...
4027 // selector: { ...
4028 // selector: @base { ...
4029 FormatToken *LBrace = Right.Next;
4030 if (LBrace && LBrace->is(tok::colon)) {
4031 LBrace = LBrace->Next;
4032 if (LBrace && LBrace->is(tok::at)) {
4033 LBrace = LBrace->Next;
4034 if (LBrace)
4035 LBrace = LBrace->Next;
4036 }
4037 }
4038 if (LBrace &&
4039 // The scope opener is one of {, [, <:
4040 // selector { ... }
4041 // selector [ ... ]
4042 // selector < ... >
4043 //
4044 // In case of selector { ... }, the l_brace is TT_DictLiteral.
4045 // In case of an empty selector {}, the l_brace is not TT_DictLiteral,
4046 // so we check for immediately following r_brace.
4047 ((LBrace->is(tok::l_brace) &&
4048 (LBrace->is(TT_DictLiteral) ||
4049 (LBrace->Next && LBrace->Next->is(tok::r_brace)))) ||
4050 LBrace->is(TT_ArrayInitializerLSquare) || LBrace->is(tok::less))) {
4051 // If Left.ParameterCount is 0, then this submessage entry is not the
4052 // first in its parent submessage, and we want to break before this entry.
4053 // If Left.ParameterCount is greater than 0, then its parent submessage
4054 // might contain 1 or more entries and we want to break before this entry
4055 // if it contains at least 2 entries. We deal with this case later by
4056 // detecting and breaking before the next entry in the parent submessage.
4057 if (Left.ParameterCount == 0)
4058 return true;
4059 // However, if this submessage is the first entry in its parent
4060 // submessage, Left.ParameterCount might be 1 in some cases.
4061 // We deal with this case later by detecting an entry
4062 // following a closing paren of this submessage.
4063 }
4064
4065 // If this is an entry immediately following a submessage, it will be
4066 // preceded by a closing paren of that submessage, like in:
4067 // left---. .---right
4068 // v v
4069 // sub: { ... } key: value
4070 // If there was a comment between `}` an `key` above, then `key` would be
4071 // put on a new line anyways.
4072 if (Left.isOneOf(tok::r_brace, tok::greater, tok::r_square))
4073 return true;
4074 }
4075
4076 // Deal with lambda arguments in C++ - we want consistent line breaks whether
4077 // they happen to be at arg0, arg1 or argN. The selection is a bit nuanced
4078 // as aggressive line breaks are placed when the lambda is not the last arg.
4079 if ((Style.Language == FormatStyle::LK_Cpp ||
4080 Style.Language == FormatStyle::LK_ObjC) &&
4081 Left.is(tok::l_paren) && Left.BlockParameterCount > 0 &&
4082 !Right.isOneOf(tok::l_paren, TT_LambdaLSquare)) {
4083 // Multiple lambdas in the same function call force line breaks.
4084 if (Left.BlockParameterCount > 1)
4085 return true;
4086
4087 // A lambda followed by another arg forces a line break.
4088 if (!Left.Role)
4089 return false;
4090 auto Comma = Left.Role->lastComma();
4091 if (!Comma)
4092 return false;
4093 auto Next = Comma->getNextNonComment();
4094 if (!Next)
4095 return false;
4096 if (!Next->isOneOf(TT_LambdaLSquare, tok::l_brace, tok::caret))
4097 return true;
4098 }
4099
4100 return false;
4101}
4102
4103bool TokenAnnotator::canBreakBefore(const AnnotatedLine &Line,
4104 const FormatToken &Right) {
4105 const FormatToken &Left = *Right.Previous;
4106 // Language-specific stuff.
4107 if (Style.isCSharp()) {
4108 if (Left.isOneOf(TT_CSharpNamedArgumentColon, TT_AttributeColon) ||
4109 Right.isOneOf(TT_CSharpNamedArgumentColon, TT_AttributeColon))
4110 return false;
4111 // Only break after commas for generic type constraints.
4112 if (Line.First->is(TT_CSharpGenericTypeConstraint))
4113 return Left.is(TT_CSharpGenericTypeConstraintComma);
4114 // Keep nullable operators attached to their identifiers.
4115 if (Right.is(TT_CSharpNullable)) {
4116 return false;
4117 }
4118 } else if (Style.Language == FormatStyle::LK_Java) {
4119 if (Left.isOneOf(Keywords.kw_throws, Keywords.kw_extends,
4120 Keywords.kw_implements))
4121 return false;
4122 if (Right.isOneOf(Keywords.kw_throws, Keywords.kw_extends,
4123 Keywords.kw_implements))
4124 return true;
4125 } else if (Style.isJavaScript()) {
4126 const FormatToken *NonComment = Right.getPreviousNonComment();
4127 if (NonComment &&
4128 NonComment->isOneOf(
4129 tok::kw_return, Keywords.kw_yield, tok::kw_continue, tok::kw_break,
4130 tok::kw_throw, Keywords.kw_interface, Keywords.kw_type,
4131 tok::kw_static, tok::kw_public, tok::kw_private, tok::kw_protected,
4132 Keywords.kw_readonly, Keywords.kw_override, Keywords.kw_abstract,
4133 Keywords.kw_get, Keywords.kw_set, Keywords.kw_async,
4134 Keywords.kw_await))
4135 return false; // Otherwise automatic semicolon insertion would trigger.
4136 if (Right.NestingLevel == 0 &&
4137 (Left.Tok.getIdentifierInfo() ||
4138 Left.isOneOf(tok::r_square, tok::r_paren)) &&
4139 Right.isOneOf(tok::l_square, tok::l_paren))
4140 return false; // Otherwise automatic semicolon insertion would trigger.
4141 if (NonComment && NonComment->is(tok::identifier) &&
4142 NonComment->TokenText == "asserts")
4143 return false;
4144 if (Left.is(TT_FatArrow) && Right.is(tok::l_brace))
4145 return false;
4146 if (Left.is(TT_JsTypeColon))
4147 return true;
4148 // Don't wrap between ":" and "!" of a strict prop init ("field!: type;").
4149 if (Left.is(tok::exclaim) && Right.is(tok::colon))
4150 return false;
4151 // Look for is type annotations like:
4152 // function f(): a is B { ... }
4153 // Do not break before is in these cases.
4154 if (Right.is(Keywords.kw_is)) {
4155 const FormatToken *Next = Right.getNextNonComment();
4156 // If `is` is followed by a colon, it's likely that it's a dict key, so
4157 // ignore it for this check.
4158 // For example this is common in Polymer:
4159 // Polymer({
4160 // is: 'name',
4161 // ...
4162 // });
4163 if (!Next || !Next->is(tok::colon))
4164 return false;
4165 }
4166 if (Left.is(Keywords.kw_in))
4167 return Style.BreakBeforeBinaryOperators == FormatStyle::BOS_None;
4168 if (Right.is(Keywords.kw_in))
4169 return Style.BreakBeforeBinaryOperators != FormatStyle::BOS_None;
4170 if (Right.is(Keywords.kw_as))
4171 return false; // must not break before as in 'x as type' casts
4172 if (Right.isOneOf(Keywords.kw_extends, Keywords.kw_infer)) {
4173 // extends and infer can appear as keywords in conditional types:
4174 // https://www.typescriptlang.org/docs/handbook/release-notes/typescript-2-8.html#conditional-types
4175 // do not break before them, as the expressions are subject to ASI.
4176 return false;
4177 }
4178 if (Left.is(Keywords.kw_as))
4179 return true;
4180 if (Left.is(TT_NonNullAssertion))
4181 return true;
4182 if (Left.is(Keywords.kw_declare) &&
4183 Right.isOneOf(Keywords.kw_module, tok::kw_namespace,
4184 Keywords.kw_function, tok::kw_class, tok::kw_enum,
4185 Keywords.kw_interface, Keywords.kw_type, Keywords.kw_var,
4186 Keywords.kw_let, tok::kw_const))
4187 // See grammar for 'declare' statements at:
4188 // https://github.com/Microsoft/TypeScript/blob/main/doc/spec-ARCHIVED.md#A.10
4189 return false;
4190 if (Left.isOneOf(Keywords.kw_module, tok::kw_namespace) &&
4191 Right.isOneOf(tok::identifier, tok::string_literal))
4192 return false; // must not break in "module foo { ...}"
4193 if (Right.is(TT_TemplateString) && Right.closesScope())
4194 return false;
4195 // Don't split tagged template literal so there is a break between the tag
4196 // identifier and template string.
4197 if (Left.is(tok::identifier) && Right.is(TT_TemplateString)) {
4198 return false;
4199 }
4200 if (Left.is(TT_TemplateString) && Left.opensScope())
4201 return true;
4202 }
4203
4204 if (Left.is(tok::at))
4205 return false;
4206 if (Left.Tok.getObjCKeywordID() == tok::objc_interface)
4207 return false;
4208 if (Left.isOneOf(TT_JavaAnnotation, TT_LeadingJavaAnnotation))
4209 return !Right.is(tok::l_paren);
4210 if (Right.is(TT_PointerOrReference))
4211 return Line.IsMultiVariableDeclStmt ||
4212 (getTokenPointerOrReferenceAlignment(Right) ==
4213 FormatStyle::PAS_Right &&
4214 (!Right.Next || Right.Next->isNot(TT_FunctionDeclarationName)));
4215 if (Right.isOneOf(TT_StartOfName, TT_FunctionDeclarationName) ||
4216 Right.is(tok::kw_operator))
4217 return true;
4218 if (Left.is(TT_PointerOrReference))
4219 return false;
4220 if (Right.isTrailingComment())
4221 // We rely on MustBreakBefore being set correctly here as we should not
4222 // change the "binding" behavior of a comment.
4223 // The first comment in a braced lists is always interpreted as belonging to
4224 // the first list element. Otherwise, it should be placed outside of the
4225 // list.
4226 return Left.is(BK_BracedInit) ||
4227 (Left.is(TT_CtorInitializerColon) &&
4228 Style.BreakConstructorInitializers == FormatStyle::BCIS_AfterColon);
4229 if (Left.is(tok::question) && Right.is(tok::colon))
4230 return false;
4231 if (Right.is(TT_ConditionalExpr) || Right.is(tok::question))
4232 return Style.BreakBeforeTernaryOperators;
4233 if (Left.is(TT_ConditionalExpr) || Left.is(tok::question))
4234 return !Style.BreakBeforeTernaryOperators;
4235 if (Left.is(TT_InheritanceColon))
4236 return Style.BreakInheritanceList == FormatStyle::BILS_AfterColon;
4237 if (Right.is(TT_InheritanceColon))
4238 return Style.BreakInheritanceList != FormatStyle::BILS_AfterColon;
4239 if (Right.is(TT_ObjCMethodExpr) && !Right.is(tok::r_square) &&
4240 Left.isNot(TT_SelectorName))
4241 return true;
4242
4243 if (Right.is(tok::colon) &&
4244 !Right.isOneOf(TT_CtorInitializerColon, TT_InlineASMColon))
4245 return false;
4246 if (Left.is(tok::colon) && Left.isOneOf(TT_DictLiteral, TT_ObjCMethodExpr)) {
4247 if (Style.Language == FormatStyle::LK_Proto ||
4248 Style.Language == FormatStyle::LK_TextProto) {
4249 if (!Style.AlwaysBreakBeforeMultilineStrings && Right.isStringLiteral())
4250 return false;
4251 // Prevent cases like:
4252 //
4253 // submessage:
4254 // { key: valueeeeeeeeeeee }
4255 //
4256 // when the snippet does not fit into one line.
4257 // Prefer:
4258 //
4259 // submessage: {
4260 // key: valueeeeeeeeeeee
4261 // }
4262 //
4263 // instead, even if it is longer by one line.
4264 //
4265 // Note that this allows allows the "{" to go over the column limit
4266 // when the column limit is just between ":" and "{", but that does
4267 // not happen too often and alternative formattings in this case are
4268 // not much better.
4269 //
4270 // The code covers the cases:
4271 //
4272 // submessage: { ... }
4273 // submessage: < ... >
4274 // repeated: [ ... ]
4275 if (((Right.is(tok::l_brace) || Right.is(tok::less)) &&
4276 Right.is(TT_DictLiteral)) ||
4277 Right.is(TT_ArrayInitializerLSquare))
4278 return false;
4279 }
4280 return true;
4281 }
4282 if (Right.is(tok::r_square) && Right.MatchingParen &&
4283 Right.MatchingParen->is(TT_ProtoExtensionLSquare))
4284 return false;
4285 if (Right.is(TT_SelectorName) || (Right.is(tok::identifier) && Right.Next &&
4286 Right.Next->is(TT_ObjCMethodExpr)))
4287 return Left.isNot(tok::period); // FIXME: Properly parse ObjC calls.
4288 if (Left.is(tok::r_paren) && Line.Type == LT_ObjCProperty)
4289 return true;
4290 if (Left.ClosesTemplateDeclaration || Left.is(TT_FunctionAnnotationRParen))
4291 return true;
4292 if (Right.isOneOf(TT_RangeBasedForLoopColon, TT_OverloadedOperatorLParen,
4293 TT_OverloadedOperator))
4294 return false;
4295 if (Left.is(TT_RangeBasedForLoopColon))
4296 return true;
4297 if (Right.is(TT_RangeBasedForLoopColon))
4298 return false;
4299 if (Left.is(TT_TemplateCloser) && Right.is(TT_TemplateOpener))
4300 return true;
4301 if (Left.isOneOf(TT_TemplateCloser, TT_UnaryOperator) ||
4302 Left.is(tok::kw_operator))
4303 return false;
4304 if (Left.is(tok::equal) && !Right.isOneOf(tok::kw_default, tok::kw_delete) &&
4305 Line.Type == LT_VirtualFunctionDecl && Left.NestingLevel == 0)
4306 return false;
4307 if (Left.is(tok::equal) && Right.is(tok::l_brace) &&
4308 !Style.Cpp11BracedListStyle)
4309 return false;
4310 if (Left.is(tok::l_paren) &&
4311 Left.isOneOf(TT_AttributeParen, TT_TypeDeclarationParen))
4312 return false;
4313 if (Left.is(tok::l_paren) && Left.Previous &&
4314 (Left.Previous->isOneOf(TT_BinaryOperator, TT_CastRParen)))
4315 return false;
4316 if (Right.is(TT_ImplicitStringLiteral))
4317 return false;
4318
4319 if (Right.is(TT_TemplateCloser))
4320 return false;
4321 if (Right.is(tok::r_square) && Right.MatchingParen &&
4322 Right.MatchingParen->is(TT_LambdaLSquare))
4323 return false;
4324
4325 // We only break before r_brace if there was a corresponding break before
4326 // the l_brace, which is tracked by BreakBeforeClosingBrace.
4327 if (Right.is(tok::r_brace))
4328 return Right.MatchingParen && Right.MatchingParen->is(BK_Block);
4329
4330 // We only break before r_paren if we're in a block indented context.
4331 if (Right.is(tok::r_paren)) {
4332 if (Style.AlignAfterOpenBracket == FormatStyle::BAS_BlockIndent) {
4333 return Right.MatchingParen &&
4334 !(Right.MatchingParen->Previous &&
4335 (Right.MatchingParen->Previous->is(tok::kw_for) ||
4336 Right.MatchingParen->Previous->isIf()));
4337 }
4338
4339 return false;
4340 }
4341
4342 // Allow breaking after a trailing annotation, e.g. after a method
4343 // declaration.
4344 if (Left.is(TT_TrailingAnnotation))
4345 return !Right.isOneOf(tok::l_brace, tok::semi, tok::equal, tok::l_paren,
4346 tok::less, tok::coloncolon);
4347
4348 if (Right.is(tok::kw___attribute) ||
4349 (Right.is(tok::l_square) && Right.is(TT_AttributeSquare)))
4350 return !Left.is(TT_AttributeSquare);
4351
4352 if (Left.is(tok::identifier) && Right.is(tok::string_literal))
4353 return true;
4354
4355 if (Right.is(tok::identifier) && Right.Next && Right.Next->is(TT_DictLiteral))
4356 return true;
4357
4358 if (Left.is(TT_CtorInitializerColon))
4359 return Style.BreakConstructorInitializers == FormatStyle::BCIS_AfterColon;
4360 if (Right.is(TT_CtorInitializerColon))
4361 return Style.BreakConstructorInitializers != FormatStyle::BCIS_AfterColon;
4362 if (Left.is(TT_CtorInitializerComma) &&
4363 Style.BreakConstructorInitializers == FormatStyle::BCIS_BeforeComma)
4364 return false;
4365 if (Right.is(TT_CtorInitializerComma) &&
4366 Style.BreakConstructorInitializers == FormatStyle::BCIS_BeforeComma)
4367 return true;
4368 if (Left.is(TT_InheritanceComma) &&
4369 Style.BreakInheritanceList == FormatStyle::BILS_BeforeComma)
4370 return false;
4371 if (Right.is(TT_InheritanceComma) &&
4372 Style.BreakInheritanceList == FormatStyle::BILS_BeforeComma)
4373 return true;
4374 if ((Left.is(tok::greater) && Right.is(tok::greater)) ||
4375 (Left.is(tok::less) && Right.is(tok::less)))
4376 return false;
4377 if (Right.is(TT_BinaryOperator) &&
4378 Style.BreakBeforeBinaryOperators != FormatStyle::BOS_None &&
4379 (Style.BreakBeforeBinaryOperators == FormatStyle::BOS_All ||
4380 Right.getPrecedence() != prec::Assignment))
4381 return true;
4382 if (Left.is(TT_ArrayInitializerLSquare))
4383 return true;
4384 if (Right.is(tok::kw_typename) && Left.isNot(tok::kw_const))
4385 return true;
4386 if ((Left.isBinaryOperator() || Left.is(TT_BinaryOperator)) &&
4387 !Left.isOneOf(tok::arrowstar, tok::lessless) &&
4388 Style.BreakBeforeBinaryOperators != FormatStyle::BOS_All &&
4389 (Style.BreakBeforeBinaryOperators == FormatStyle::BOS_None ||
4390 Left.getPrecedence() == prec::Assignment))
4391 return true;
4392 if ((Left.is(TT_AttributeSquare) && Right.is(tok::l_square)) ||
4393 (Left.is(tok::r_square) && Right.is(TT_AttributeSquare)))
4394 return false;
4395
4396 auto ShortLambdaOption = Style.AllowShortLambdasOnASingleLine;
4397 if (Style.BraceWrapping.BeforeLambdaBody && Right.is(TT_LambdaLBrace)) {
4398 if (isAllmanLambdaBrace(Left))
4399 return !isItAnEmptyLambdaAllowed(Left, ShortLambdaOption);
4400 if (isAllmanLambdaBrace(Right))
4401 return !isItAnEmptyLambdaAllowed(Right, ShortLambdaOption);
4402 }
4403
4404 return Left.isOneOf(tok::comma, tok::coloncolon, tok::semi, tok::l_brace,
4405 tok::kw_class, tok::kw_struct, tok::comment) ||
4406 Right.isMemberAccess() ||
4407 Right.isOneOf(TT_TrailingReturnArrow, TT_LambdaArrow, tok::lessless,
4408 tok::colon, tok::l_square, tok::at) ||
4409 (Left.is(tok::r_paren) &&
4410 Right.isOneOf(tok::identifier, tok::kw_const)) ||
4411 (Left.is(tok::l_paren) && !Right.is(tok::r_paren)) ||
4412 (Left.is(TT_TemplateOpener) && !Right.is(TT_TemplateCloser));
4413}
4414
4415void TokenAnnotator::printDebugInfo(const AnnotatedLine &Line) {
4416 llvm::errs() << "AnnotatedTokens(L=" << Line.Level << "):\n";
4417 const FormatToken *Tok = Line.First;
4418 while (Tok) {
4419 llvm::errs() << " M=" << Tok->MustBreakBefore
4420 << " C=" << Tok->CanBreakBefore
4421 << " T=" << getTokenTypeName(Tok->getType())
4422 << " S=" << Tok->SpacesRequiredBefore
4423 << " F=" << Tok->Finalized << " B=" << Tok->BlockParameterCount
4424 << " BK=" << Tok->getBlockKind() << " P=" << Tok->SplitPenalty
4425 << " Name=" << Tok->Tok.getName() << " L=" << Tok->TotalLength
4426 << " PPK=" << Tok->getPackingKind() << " FakeLParens=";
4427 for (unsigned i = 0, e = Tok->FakeLParens.size(); i != e; ++i)
4428 llvm::errs() << Tok->FakeLParens[i] << "/";
4429 llvm::errs() << " FakeRParens=" << Tok->FakeRParens;
4430 llvm::errs() << " II=" << Tok->Tok.getIdentifierInfo();
4431 llvm::errs() << " Text='" << Tok->TokenText << "'\n";
4432 if (!Tok->Next)
4433 assert(Tok == Line.Last)(static_cast <bool> (Tok == Line.Last) ? void (0) : __assert_fail
("Tok == Line.Last", "clang/lib/Format/TokenAnnotator.cpp", 4433
, __extension__ __PRETTY_FUNCTION__))
;
4434 Tok = Tok->Next;
4435 }
4436 llvm::errs() << "----\n";
4437}
4438
4439FormatStyle::PointerAlignmentStyle
4440TokenAnnotator::getTokenReferenceAlignment(const FormatToken &Reference) {
4441 assert(Reference.isOneOf(tok::amp, tok::ampamp))(static_cast <bool> (Reference.isOneOf(tok::amp, tok::ampamp
)) ? void (0) : __assert_fail ("Reference.isOneOf(tok::amp, tok::ampamp)"
, "clang/lib/Format/TokenAnnotator.cpp", 4441, __extension__ __PRETTY_FUNCTION__
))
;
4442 switch (Style.ReferenceAlignment) {
4443 case FormatStyle::RAS_Pointer:
4444 return Style.PointerAlignment;
4445 case FormatStyle::RAS_Left:
4446 return FormatStyle::PAS_Left;
4447 case FormatStyle::RAS_Right:
4448 return FormatStyle::PAS_Right;
4449 case FormatStyle::RAS_Middle:
4450 return FormatStyle::PAS_Middle;
4451 }
4452 assert(0)(static_cast <bool> (0) ? void (0) : __assert_fail ("0"
, "clang/lib/Format/TokenAnnotator.cpp", 4452, __extension__ __PRETTY_FUNCTION__
))
; //"Unhandled value of ReferenceAlignment"
4453 return Style.PointerAlignment;
4454}
4455
4456FormatStyle::PointerAlignmentStyle
4457TokenAnnotator::getTokenPointerOrReferenceAlignment(
4458 const FormatToken &PointerOrReference) {
4459 if (PointerOrReference.isOneOf(tok::amp, tok::ampamp)) {
4460 switch (Style.ReferenceAlignment) {
4461 case FormatStyle::RAS_Pointer:
4462 return Style.PointerAlignment;
4463 case FormatStyle::RAS_Left:
4464 return FormatStyle::PAS_Left;
4465 case FormatStyle::RAS_Right:
4466 return FormatStyle::PAS_Right;
4467 case FormatStyle::RAS_Middle:
4468 return FormatStyle::PAS_Middle;
4469 }
4470 }
4471 assert(PointerOrReference.is(tok::star))(static_cast <bool> (PointerOrReference.is(tok::star)) ?
void (0) : __assert_fail ("PointerOrReference.is(tok::star)"
, "clang/lib/Format/TokenAnnotator.cpp", 4471, __extension__ __PRETTY_FUNCTION__
))
;
4472 return Style.PointerAlignment;
4473}
4474
4475} // namespace format
4476} // namespace clang