Bug Summary

File:clang/lib/Format/TokenAnnotator.cpp
Warning:line 234, column 15
Called C++ object pointer is null

Annotated Source Code

Press '?' to see keyboard shortcuts

clang -cc1 -cc1 -triple x86_64-pc-linux-gnu -analyze -disable-free -disable-llvm-verifier -discard-value-names -main-file-name TokenAnnotator.cpp -analyzer-store=region -analyzer-opt-analyze-nested-blocks -analyzer-checker=core -analyzer-checker=apiModeling -analyzer-checker=unix -analyzer-checker=deadcode -analyzer-checker=cplusplus -analyzer-checker=security.insecureAPI.UncheckedReturn -analyzer-checker=security.insecureAPI.getpw -analyzer-checker=security.insecureAPI.gets -analyzer-checker=security.insecureAPI.mktemp -analyzer-checker=security.insecureAPI.mkstemp -analyzer-checker=security.insecureAPI.vfork -analyzer-checker=nullability.NullPassedToNonnull -analyzer-checker=nullability.NullReturnedFromNonnull -analyzer-output plist -w -setup-static-analyzer -analyzer-config-compatibility-mode=true -mrelocation-model pic -pic-level 2 -mframe-pointer=none -relaxed-aliasing -fmath-errno -fno-rounding-math -mconstructor-aliases -munwind-tables -target-cpu x86-64 -tune-cpu generic -debugger-tuning=gdb -ffunction-sections -fdata-sections -fcoverage-compilation-dir=/build/llvm-toolchain-snapshot-14~++20210903100615+fd66b44ec19e/build-llvm/tools/clang/lib/Format -resource-dir /usr/lib/llvm-14/lib/clang/14.0.0 -D _GNU_SOURCE -D __STDC_CONSTANT_MACROS -D __STDC_FORMAT_MACROS -D __STDC_LIMIT_MACROS -I /build/llvm-toolchain-snapshot-14~++20210903100615+fd66b44ec19e/build-llvm/tools/clang/lib/Format -I /build/llvm-toolchain-snapshot-14~++20210903100615+fd66b44ec19e/clang/lib/Format -I /build/llvm-toolchain-snapshot-14~++20210903100615+fd66b44ec19e/clang/include -I /build/llvm-toolchain-snapshot-14~++20210903100615+fd66b44ec19e/build-llvm/tools/clang/include -I /build/llvm-toolchain-snapshot-14~++20210903100615+fd66b44ec19e/build-llvm/include -I /build/llvm-toolchain-snapshot-14~++20210903100615+fd66b44ec19e/llvm/include -D NDEBUG -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/10/../../../../include/c++/10 -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/10/../../../../include/x86_64-linux-gnu/c++/10 -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/10/../../../../include/c++/10/backward -internal-isystem /usr/lib/llvm-14/lib/clang/14.0.0/include -internal-isystem /usr/local/include -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/10/../../../../x86_64-linux-gnu/include -internal-externc-isystem /usr/include/x86_64-linux-gnu -internal-externc-isystem /include -internal-externc-isystem /usr/include -O2 -Wno-unused-parameter -Wwrite-strings -Wno-missing-field-initializers -Wno-long-long -Wno-maybe-uninitialized -Wno-class-memaccess -Wno-redundant-move -Wno-pessimizing-move -Wno-noexcept-type -Wno-comment -std=c++14 -fdeprecated-macro -fdebug-compilation-dir=/build/llvm-toolchain-snapshot-14~++20210903100615+fd66b44ec19e/build-llvm/tools/clang/lib/Format -fdebug-prefix-map=/build/llvm-toolchain-snapshot-14~++20210903100615+fd66b44ec19e=. -ferror-limit 19 -fvisibility-inlines-hidden -stack-protector 2 -fgnuc-version=4.2.1 -vectorize-loops -vectorize-slp -analyzer-output=html -analyzer-config stable-report-filename=true -faddrsig -D__GCC_HAVE_DWARF2_CFI_ASM=1 -o /tmp/scan-build-2021-09-04-040900-46481-1 -x c++ /build/llvm-toolchain-snapshot-14~++20210903100615+fd66b44ec19e/clang/lib/Format/TokenAnnotator.cpp

/build/llvm-toolchain-snapshot-14~++20210903100615+fd66b44ec19e/clang/lib/Format/TokenAnnotator.cpp

1//===--- TokenAnnotator.cpp - Format C++ code -----------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8///
9/// \file
10/// This file implements a token annotator, i.e. creates
11/// \c AnnotatedTokens out of \c FormatTokens with required extra information.
12///
13//===----------------------------------------------------------------------===//
14
15#include "TokenAnnotator.h"
16#include "FormatToken.h"
17#include "clang/Basic/SourceManager.h"
18#include "clang/Basic/TokenKinds.h"
19#include "llvm/ADT/SmallPtrSet.h"
20#include "llvm/Support/Debug.h"
21
22#define DEBUG_TYPE"format-token-annotator" "format-token-annotator"
23
24namespace clang {
25namespace format {
26
27namespace {
28
29/// Returns \c true if the token can be used as an identifier in
30/// an Objective-C \c \@selector, \c false otherwise.
31///
32/// Because getFormattingLangOpts() always lexes source code as
33/// Objective-C++, C++ keywords like \c new and \c delete are
34/// lexed as tok::kw_*, not tok::identifier, even for Objective-C.
35///
36/// For Objective-C and Objective-C++, both identifiers and keywords
37/// are valid inside @selector(...) (or a macro which
38/// invokes @selector(...)). So, we allow treat any identifier or
39/// keyword as a potential Objective-C selector component.
40static bool canBeObjCSelectorComponent(const FormatToken &Tok) {
41 return Tok.Tok.getIdentifierInfo() != nullptr;
42}
43
44/// With `Left` being '(', check if we're at either `[...](` or
45/// `[...]<...>(`, where the [ opens a lambda capture list.
46static bool isLambdaParameterList(const FormatToken *Left) {
47 // Skip <...> if present.
48 if (Left->Previous && Left->Previous->is(tok::greater) &&
49 Left->Previous->MatchingParen &&
50 Left->Previous->MatchingParen->is(TT_TemplateOpener))
51 Left = Left->Previous->MatchingParen;
52
53 // Check for `[...]`.
54 return Left->Previous && Left->Previous->is(tok::r_square) &&
55 Left->Previous->MatchingParen &&
56 Left->Previous->MatchingParen->is(TT_LambdaLSquare);
57}
58
59/// Returns \c true if the token is followed by a boolean condition, \c false
60/// otherwise.
61static bool isKeywordWithCondition(const FormatToken &Tok) {
62 return Tok.isOneOf(tok::kw_if, tok::kw_for, tok::kw_while, tok::kw_switch,
63 tok::kw_constexpr, tok::kw_catch);
64}
65
66/// A parser that gathers additional information about tokens.
67///
68/// The \c TokenAnnotator tries to match parenthesis and square brakets and
69/// store a parenthesis levels. It also tries to resolve matching "<" and ">"
70/// into template parameter lists.
71class AnnotatingParser {
72public:
73 AnnotatingParser(const FormatStyle &Style, AnnotatedLine &Line,
74 const AdditionalKeywords &Keywords)
75 : Style(Style), Line(Line), CurrentToken(Line.First), AutoFound(false),
76 Keywords(Keywords) {
77 Contexts.push_back(Context(tok::unknown, 1, /*IsExpression=*/false));
78 resetTokenMetadata(CurrentToken);
79 }
80
81private:
82 bool parseAngle() {
83 if (!CurrentToken || !CurrentToken->Previous)
84 return false;
85 if (NonTemplateLess.count(CurrentToken->Previous))
86 return false;
87
88 const FormatToken &Previous = *CurrentToken->Previous; // The '<'.
89 if (Previous.Previous) {
90 if (Previous.Previous->Tok.isLiteral())
91 return false;
92 if (Previous.Previous->is(tok::r_paren) && Contexts.size() > 1 &&
93 (!Previous.Previous->MatchingParen ||
94 !Previous.Previous->MatchingParen->is(TT_OverloadedOperatorLParen)))
95 return false;
96 }
97
98 FormatToken *Left = CurrentToken->Previous;
99 Left->ParentBracket = Contexts.back().ContextKind;
100 ScopedContextCreator ContextCreator(*this, tok::less, 12);
101
102 // If this angle is in the context of an expression, we need to be more
103 // hesitant to detect it as opening template parameters.
104 bool InExprContext = Contexts.back().IsExpression;
105
106 Contexts.back().IsExpression = false;
107 // If there's a template keyword before the opening angle bracket, this is a
108 // template parameter, not an argument.
109 Contexts.back().InTemplateArgument =
110 Left->Previous && Left->Previous->Tok.isNot(tok::kw_template);
111
112 if (Style.Language == FormatStyle::LK_Java &&
113 CurrentToken->is(tok::question))
114 next();
115
116 while (CurrentToken) {
117 if (CurrentToken->is(tok::greater)) {
118 // Try to do a better job at looking for ">>" within the condition of
119 // a statement. Conservatively insert spaces between consecutive ">"
120 // tokens to prevent splitting right bitshift operators and potentially
121 // altering program semantics. This check is overly conservative and
122 // will prevent spaces from being inserted in select nested template
123 // parameter cases, but should not alter program semantics.
124 if (CurrentToken->Next && CurrentToken->Next->is(tok::greater) &&
125 Left->ParentBracket != tok::less &&
126 (isKeywordWithCondition(*Line.First) ||
127 CurrentToken->getStartOfNonWhitespace() ==
128 CurrentToken->Next->getStartOfNonWhitespace().getLocWithOffset(
129 -1)))
130 return false;
131 Left->MatchingParen = CurrentToken;
132 CurrentToken->MatchingParen = Left;
133 // In TT_Proto, we must distignuish between:
134 // map<key, value>
135 // msg < item: data >
136 // msg: < item: data >
137 // In TT_TextProto, map<key, value> does not occur.
138 if (Style.Language == FormatStyle::LK_TextProto ||
139 (Style.Language == FormatStyle::LK_Proto && Left->Previous &&
140 Left->Previous->isOneOf(TT_SelectorName, TT_DictLiteral)))
141 CurrentToken->setType(TT_DictLiteral);
142 else
143 CurrentToken->setType(TT_TemplateCloser);
144 next();
145 return true;
146 }
147 if (CurrentToken->is(tok::question) &&
148 Style.Language == FormatStyle::LK_Java) {
149 next();
150 continue;
151 }
152 if (CurrentToken->isOneOf(tok::r_paren, tok::r_square, tok::r_brace) ||
153 (CurrentToken->isOneOf(tok::colon, tok::question) && InExprContext &&
154 !Style.isCSharp() && Style.Language != FormatStyle::LK_Proto &&
155 Style.Language != FormatStyle::LK_TextProto))
156 return false;
157 // If a && or || is found and interpreted as a binary operator, this set
158 // of angles is likely part of something like "a < b && c > d". If the
159 // angles are inside an expression, the ||/&& might also be a binary
160 // operator that was misinterpreted because we are parsing template
161 // parameters.
162 // FIXME: This is getting out of hand, write a decent parser.
163 if (CurrentToken->Previous->isOneOf(tok::pipepipe, tok::ampamp) &&
164 CurrentToken->Previous->is(TT_BinaryOperator) &&
165 Contexts[Contexts.size() - 2].IsExpression &&
166 !Line.startsWith(tok::kw_template))
167 return false;
168 updateParameterCount(Left, CurrentToken);
169 if (Style.Language == FormatStyle::LK_Proto) {
170 if (FormatToken *Previous = CurrentToken->getPreviousNonComment()) {
171 if (CurrentToken->is(tok::colon) ||
172 (CurrentToken->isOneOf(tok::l_brace, tok::less) &&
173 Previous->isNot(tok::colon)))
174 Previous->setType(TT_SelectorName);
175 }
176 }
177 if (!consumeToken())
178 return false;
179 }
180 return false;
181 }
182
183 bool parseUntouchableParens() {
184 while (CurrentToken) {
185 CurrentToken->Finalized = true;
186 switch (CurrentToken->Tok.getKind()) {
187 case tok::l_paren:
188 next();
189 if (!parseUntouchableParens())
190 return false;
191 continue;
192 case tok::r_paren:
193 next();
194 return true;
195 default:
196 // no-op
197 break;
198 }
199 next();
200 }
201 return false;
202 }
203
204 bool parseParens(bool LookForDecls = false) {
205 if (!CurrentToken)
1
Assuming field 'CurrentToken' is non-null
2
Taking false branch
206 return false;
207 FormatToken *Left = CurrentToken->Previous;
208 assert(Left && "Unknown previous token")(static_cast<void> (0));
209 FormatToken *PrevNonComment = Left->getPreviousNonComment();
210 Left->ParentBracket = Contexts.back().ContextKind;
211 ScopedContextCreator ContextCreator(*this, tok::l_paren, 1);
212
213 // FIXME: This is a bit of a hack. Do better.
214 Contexts.back().ColonIsForRangeExpr =
215 Contexts.size() == 2 && Contexts[0].ColonIsForRangeExpr;
3
Assuming the condition is false
216
217 if (Left->Previous && Left->Previous->is(TT_UntouchableMacroFunc)) {
4
Assuming field 'Previous' is null
218 Left->Finalized = true;
219 return parseUntouchableParens();
220 }
221
222 bool StartsObjCMethodExpr = false;
223 if (FormatToken *MaybeSel
4.1
'MaybeSel' is null
4.1
'MaybeSel' is null
4.1
'MaybeSel' is null
= Left->Previous) {
5
Taking false branch
224 // @selector( starts a selector.
225 if (MaybeSel->isObjCAtKeyword(tok::objc_selector) && MaybeSel->Previous &&
226 MaybeSel->Previous->is(tok::at)) {
227 StartsObjCMethodExpr = true;
228 }
229 }
230
231 if (Left->is(TT_OverloadedOperatorLParen)) {
6
Calling 'FormatToken::is'
9
Returning from 'FormatToken::is'
10
Taking true branch
232 // Find the previous kw_operator token.
233 FormatToken *Prev = Left;
234 while (!Prev->is(tok::kw_operator)) {
11
Calling 'FormatToken::is'
17
Returning from 'FormatToken::is'
18
Loop condition is true. Entering loop body
20
Called C++ object pointer is null
235 Prev = Prev->Previous;
19
Null pointer value stored to 'Prev'
236 assert(Prev && "Expect a kw_operator prior to the OperatorLParen!")(static_cast<void> (0));
237 }
238
239 // If faced with "a.operator*(argument)" or "a->operator*(argument)",
240 // i.e. the operator is called as a member function,
241 // then the argument must be an expression.
242 bool OperatorCalledAsMemberFunction =
243 Prev->Previous && Prev->Previous->isOneOf(tok::period, tok::arrow);
244 Contexts.back().IsExpression = OperatorCalledAsMemberFunction;
245 } else if (Style.Language == FormatStyle::LK_JavaScript &&
246 (Line.startsWith(Keywords.kw_type, tok::identifier) ||
247 Line.startsWith(tok::kw_export, Keywords.kw_type,
248 tok::identifier))) {
249 // type X = (...);
250 // export type X = (...);
251 Contexts.back().IsExpression = false;
252 } else if (Left->Previous &&
253 (Left->Previous->isOneOf(tok::kw_static_assert, tok::kw_while,
254 tok::l_paren, tok::comma) ||
255 Left->Previous->isIf() ||
256 Left->Previous->is(TT_BinaryOperator))) {
257 // static_assert, if and while usually contain expressions.
258 Contexts.back().IsExpression = true;
259 } else if (Style.Language == FormatStyle::LK_JavaScript && Left->Previous &&
260 (Left->Previous->is(Keywords.kw_function) ||
261 (Left->Previous->endsSequence(tok::identifier,
262 Keywords.kw_function)))) {
263 // function(...) or function f(...)
264 Contexts.back().IsExpression = false;
265 } else if (Style.Language == FormatStyle::LK_JavaScript && Left->Previous &&
266 Left->Previous->is(TT_JsTypeColon)) {
267 // let x: (SomeType);
268 Contexts.back().IsExpression = false;
269 } else if (isLambdaParameterList(Left)) {
270 // This is a parameter list of a lambda expression.
271 Contexts.back().IsExpression = false;
272 } else if (Line.InPPDirective &&
273 (!Left->Previous || !Left->Previous->is(tok::identifier))) {
274 Contexts.back().IsExpression = true;
275 } else if (Contexts[Contexts.size() - 2].CaretFound) {
276 // This is the parameter list of an ObjC block.
277 Contexts.back().IsExpression = false;
278 } else if (Left->Previous && Left->Previous->is(TT_ForEachMacro)) {
279 // The first argument to a foreach macro is a declaration.
280 Contexts.back().IsForEachMacro = true;
281 Contexts.back().IsExpression = false;
282 } else if (Left->Previous && Left->Previous->MatchingParen &&
283 Left->Previous->MatchingParen->is(TT_ObjCBlockLParen)) {
284 Contexts.back().IsExpression = false;
285 } else if (!Line.MustBeDeclaration && !Line.InPPDirective) {
286 bool IsForOrCatch =
287 Left->Previous && Left->Previous->isOneOf(tok::kw_for, tok::kw_catch);
288 Contexts.back().IsExpression = !IsForOrCatch;
289 }
290
291 // Infer the role of the l_paren based on the previous token if we haven't
292 // detected one one yet.
293 if (PrevNonComment && Left->is(TT_Unknown)) {
294 if (PrevNonComment->is(tok::kw___attribute)) {
295 Left->setType(TT_AttributeParen);
296 } else if (PrevNonComment->isOneOf(TT_TypenameMacro, tok::kw_decltype,
297 tok::kw_typeof, tok::kw__Atomic,
298 tok::kw___underlying_type)) {
299 Left->setType(TT_TypeDeclarationParen);
300 // decltype() and typeof() usually contain expressions.
301 if (PrevNonComment->isOneOf(tok::kw_decltype, tok::kw_typeof))
302 Contexts.back().IsExpression = true;
303 }
304 }
305
306 if (StartsObjCMethodExpr) {
307 Contexts.back().ColonIsObjCMethodExpr = true;
308 Left->setType(TT_ObjCMethodExpr);
309 }
310
311 // MightBeFunctionType and ProbablyFunctionType are used for
312 // function pointer and reference types as well as Objective-C
313 // block types:
314 //
315 // void (*FunctionPointer)(void);
316 // void (&FunctionReference)(void);
317 // void (^ObjCBlock)(void);
318 bool MightBeFunctionType = !Contexts[Contexts.size() - 2].IsExpression;
319 bool ProbablyFunctionType =
320 CurrentToken->isOneOf(tok::star, tok::amp, tok::caret);
321 bool HasMultipleLines = false;
322 bool HasMultipleParametersOnALine = false;
323 bool MightBeObjCForRangeLoop =
324 Left->Previous && Left->Previous->is(tok::kw_for);
325 FormatToken *PossibleObjCForInToken = nullptr;
326 while (CurrentToken) {
327 // LookForDecls is set when "if (" has been seen. Check for
328 // 'identifier' '*' 'identifier' followed by not '=' -- this
329 // '*' has to be a binary operator but determineStarAmpUsage() will
330 // categorize it as an unary operator, so set the right type here.
331 if (LookForDecls && CurrentToken->Next) {
332 FormatToken *Prev = CurrentToken->getPreviousNonComment();
333 if (Prev) {
334 FormatToken *PrevPrev = Prev->getPreviousNonComment();
335 FormatToken *Next = CurrentToken->Next;
336 if (PrevPrev && PrevPrev->is(tok::identifier) &&
337 Prev->isOneOf(tok::star, tok::amp, tok::ampamp) &&
338 CurrentToken->is(tok::identifier) && Next->isNot(tok::equal)) {
339 Prev->setType(TT_BinaryOperator);
340 LookForDecls = false;
341 }
342 }
343 }
344
345 if (CurrentToken->Previous->is(TT_PointerOrReference) &&
346 CurrentToken->Previous->Previous->isOneOf(tok::l_paren,
347 tok::coloncolon))
348 ProbablyFunctionType = true;
349 if (CurrentToken->is(tok::comma))
350 MightBeFunctionType = false;
351 if (CurrentToken->Previous->is(TT_BinaryOperator))
352 Contexts.back().IsExpression = true;
353 if (CurrentToken->is(tok::r_paren)) {
354 if (MightBeFunctionType && ProbablyFunctionType && CurrentToken->Next &&
355 (CurrentToken->Next->is(tok::l_paren) ||
356 (CurrentToken->Next->is(tok::l_square) && Line.MustBeDeclaration)))
357 Left->setType(Left->Next->is(tok::caret) ? TT_ObjCBlockLParen
358 : TT_FunctionTypeLParen);
359 Left->MatchingParen = CurrentToken;
360 CurrentToken->MatchingParen = Left;
361
362 if (CurrentToken->Next && CurrentToken->Next->is(tok::l_brace) &&
363 Left->Previous && Left->Previous->is(tok::l_paren)) {
364 // Detect the case where macros are used to generate lambdas or
365 // function bodies, e.g.:
366 // auto my_lambda = MACRO((Type *type, int i) { .. body .. });
367 for (FormatToken *Tok = Left; Tok != CurrentToken; Tok = Tok->Next) {
368 if (Tok->is(TT_BinaryOperator) &&
369 Tok->isOneOf(tok::star, tok::amp, tok::ampamp))
370 Tok->setType(TT_PointerOrReference);
371 }
372 }
373
374 if (StartsObjCMethodExpr) {
375 CurrentToken->setType(TT_ObjCMethodExpr);
376 if (Contexts.back().FirstObjCSelectorName) {
377 Contexts.back().FirstObjCSelectorName->LongestObjCSelectorName =
378 Contexts.back().LongestObjCSelectorName;
379 }
380 }
381
382 if (Left->is(TT_AttributeParen))
383 CurrentToken->setType(TT_AttributeParen);
384 if (Left->is(TT_TypeDeclarationParen))
385 CurrentToken->setType(TT_TypeDeclarationParen);
386 if (Left->Previous && Left->Previous->is(TT_JavaAnnotation))
387 CurrentToken->setType(TT_JavaAnnotation);
388 if (Left->Previous && Left->Previous->is(TT_LeadingJavaAnnotation))
389 CurrentToken->setType(TT_LeadingJavaAnnotation);
390 if (Left->Previous && Left->Previous->is(TT_AttributeSquare))
391 CurrentToken->setType(TT_AttributeSquare);
392
393 if (!HasMultipleLines)
394 Left->setPackingKind(PPK_Inconclusive);
395 else if (HasMultipleParametersOnALine)
396 Left->setPackingKind(PPK_BinPacked);
397 else
398 Left->setPackingKind(PPK_OnePerLine);
399
400 next();
401 return true;
402 }
403 if (CurrentToken->isOneOf(tok::r_square, tok::r_brace))
404 return false;
405
406 if (CurrentToken->is(tok::l_brace))
407 Left->setType(TT_Unknown); // Not TT_ObjCBlockLParen
408 if (CurrentToken->is(tok::comma) && CurrentToken->Next &&
409 !CurrentToken->Next->HasUnescapedNewline &&
410 !CurrentToken->Next->isTrailingComment())
411 HasMultipleParametersOnALine = true;
412 bool ProbablyFunctionTypeLParen =
413 (CurrentToken->is(tok::l_paren) && CurrentToken->Next &&
414 CurrentToken->Next->isOneOf(tok::star, tok::amp, tok::caret));
415 if ((CurrentToken->Previous->isOneOf(tok::kw_const, tok::kw_auto) ||
416 CurrentToken->Previous->isSimpleTypeSpecifier()) &&
417 !(CurrentToken->is(tok::l_brace) ||
418 (CurrentToken->is(tok::l_paren) && !ProbablyFunctionTypeLParen)))
419 Contexts.back().IsExpression = false;
420 if (CurrentToken->isOneOf(tok::semi, tok::colon)) {
421 MightBeObjCForRangeLoop = false;
422 if (PossibleObjCForInToken) {
423 PossibleObjCForInToken->setType(TT_Unknown);
424 PossibleObjCForInToken = nullptr;
425 }
426 }
427 if (MightBeObjCForRangeLoop && CurrentToken->is(Keywords.kw_in)) {
428 PossibleObjCForInToken = CurrentToken;
429 PossibleObjCForInToken->setType(TT_ObjCForIn);
430 }
431 // When we discover a 'new', we set CanBeExpression to 'false' in order to
432 // parse the type correctly. Reset that after a comma.
433 if (CurrentToken->is(tok::comma))
434 Contexts.back().CanBeExpression = true;
435
436 FormatToken *Tok = CurrentToken;
437 if (!consumeToken())
438 return false;
439 updateParameterCount(Left, Tok);
440 if (CurrentToken && CurrentToken->HasUnescapedNewline)
441 HasMultipleLines = true;
442 }
443 return false;
444 }
445
446 bool isCSharpAttributeSpecifier(const FormatToken &Tok) {
447 if (!Style.isCSharp())
448 return false;
449
450 // `identifier[i]` is not an attribute.
451 if (Tok.Previous && Tok.Previous->is(tok::identifier))
452 return false;
453
454 // Chains of [] in `identifier[i][j][k]` are not attributes.
455 if (Tok.Previous && Tok.Previous->is(tok::r_square)) {
456 auto *MatchingParen = Tok.Previous->MatchingParen;
457 if (!MatchingParen || MatchingParen->is(TT_ArraySubscriptLSquare))
458 return false;
459 }
460
461 const FormatToken *AttrTok = Tok.Next;
462 if (!AttrTok)
463 return false;
464
465 // Just an empty declaration e.g. string [].
466 if (AttrTok->is(tok::r_square))
467 return false;
468
469 // Move along the tokens inbetween the '[' and ']' e.g. [STAThread].
470 while (AttrTok && AttrTok->isNot(tok::r_square)) {
471 AttrTok = AttrTok->Next;
472 }
473
474 if (!AttrTok)
475 return false;
476
477 // Allow an attribute to be the only content of a file.
478 AttrTok = AttrTok->Next;
479 if (!AttrTok)
480 return true;
481
482 // Limit this to being an access modifier that follows.
483 if (AttrTok->isOneOf(tok::kw_public, tok::kw_private, tok::kw_protected,
484 tok::comment, tok::kw_class, tok::kw_static,
485 tok::l_square, Keywords.kw_internal)) {
486 return true;
487 }
488
489 // incase its a [XXX] retval func(....
490 if (AttrTok->Next &&
491 AttrTok->Next->startsSequence(tok::identifier, tok::l_paren))
492 return true;
493
494 return false;
495 }
496
497 bool isCpp11AttributeSpecifier(const FormatToken &Tok) {
498 if (!Style.isCpp() || !Tok.startsSequence(tok::l_square, tok::l_square))
499 return false;
500 // The first square bracket is part of an ObjC array literal
501 if (Tok.Previous && Tok.Previous->is(tok::at)) {
502 return false;
503 }
504 const FormatToken *AttrTok = Tok.Next->Next;
505 if (!AttrTok)
506 return false;
507 // C++17 '[[using ns: foo, bar(baz, blech)]]'
508 // We assume nobody will name an ObjC variable 'using'.
509 if (AttrTok->startsSequence(tok::kw_using, tok::identifier, tok::colon))
510 return true;
511 if (AttrTok->isNot(tok::identifier))
512 return false;
513 while (AttrTok && !AttrTok->startsSequence(tok::r_square, tok::r_square)) {
514 // ObjC message send. We assume nobody will use : in a C++11 attribute
515 // specifier parameter, although this is technically valid:
516 // [[foo(:)]].
517 if (AttrTok->is(tok::colon) ||
518 AttrTok->startsSequence(tok::identifier, tok::identifier) ||
519 AttrTok->startsSequence(tok::r_paren, tok::identifier))
520 return false;
521 if (AttrTok->is(tok::ellipsis))
522 return true;
523 AttrTok = AttrTok->Next;
524 }
525 return AttrTok && AttrTok->startsSequence(tok::r_square, tok::r_square);
526 }
527
528 bool parseSquare() {
529 if (!CurrentToken)
530 return false;
531
532 // A '[' could be an index subscript (after an identifier or after
533 // ')' or ']'), it could be the start of an Objective-C method
534 // expression, it could the start of an Objective-C array literal,
535 // or it could be a C++ attribute specifier [[foo::bar]].
536 FormatToken *Left = CurrentToken->Previous;
537 Left->ParentBracket = Contexts.back().ContextKind;
538 FormatToken *Parent = Left->getPreviousNonComment();
539
540 // Cases where '>' is followed by '['.
541 // In C++, this can happen either in array of templates (foo<int>[10])
542 // or when array is a nested template type (unique_ptr<type1<type2>[]>).
543 bool CppArrayTemplates =
544 Style.isCpp() && Parent && Parent->is(TT_TemplateCloser) &&
545 (Contexts.back().CanBeExpression || Contexts.back().IsExpression ||
546 Contexts.back().InTemplateArgument);
547
548 bool IsCpp11AttributeSpecifier = isCpp11AttributeSpecifier(*Left) ||
549 Contexts.back().InCpp11AttributeSpecifier;
550
551 // Treat C# Attributes [STAThread] much like C++ attributes [[...]].
552 bool IsCSharpAttributeSpecifier =
553 isCSharpAttributeSpecifier(*Left) ||
554 Contexts.back().InCSharpAttributeSpecifier;
555
556 bool InsideInlineASM = Line.startsWith(tok::kw_asm);
557 bool IsCppStructuredBinding = Left->isCppStructuredBinding(Style);
558 bool StartsObjCMethodExpr =
559 !IsCppStructuredBinding && !InsideInlineASM && !CppArrayTemplates &&
560 Style.isCpp() && !IsCpp11AttributeSpecifier &&
561 !IsCSharpAttributeSpecifier && Contexts.back().CanBeExpression &&
562 Left->isNot(TT_LambdaLSquare) &&
563 !CurrentToken->isOneOf(tok::l_brace, tok::r_square) &&
564 (!Parent ||
565 Parent->isOneOf(tok::colon, tok::l_square, tok::l_paren,
566 tok::kw_return, tok::kw_throw) ||
567 Parent->isUnaryOperator() ||
568 // FIXME(bug 36976): ObjC return types shouldn't use TT_CastRParen.
569 Parent->isOneOf(TT_ObjCForIn, TT_CastRParen) ||
570 (getBinOpPrecedence(Parent->Tok.getKind(), true, true) >
571 prec::Unknown));
572 bool ColonFound = false;
573
574 unsigned BindingIncrease = 1;
575 if (IsCppStructuredBinding) {
576 Left->setType(TT_StructuredBindingLSquare);
577 } else if (Left->is(TT_Unknown)) {
578 if (StartsObjCMethodExpr) {
579 Left->setType(TT_ObjCMethodExpr);
580 } else if (InsideInlineASM) {
581 Left->setType(TT_InlineASMSymbolicNameLSquare);
582 } else if (IsCpp11AttributeSpecifier) {
583 Left->setType(TT_AttributeSquare);
584 } else if (Style.Language == FormatStyle::LK_JavaScript && Parent &&
585 Contexts.back().ContextKind == tok::l_brace &&
586 Parent->isOneOf(tok::l_brace, tok::comma)) {
587 Left->setType(TT_JsComputedPropertyName);
588 } else if (Style.isCpp() && Contexts.back().ContextKind == tok::l_brace &&
589 Parent && Parent->isOneOf(tok::l_brace, tok::comma)) {
590 Left->setType(TT_DesignatedInitializerLSquare);
591 } else if (IsCSharpAttributeSpecifier) {
592 Left->setType(TT_AttributeSquare);
593 } else if (CurrentToken->is(tok::r_square) && Parent &&
594 Parent->is(TT_TemplateCloser)) {
595 Left->setType(TT_ArraySubscriptLSquare);
596 } else if (Style.Language == FormatStyle::LK_Proto ||
597 Style.Language == FormatStyle::LK_TextProto) {
598 // Square braces in LK_Proto can either be message field attributes:
599 //
600 // optional Aaa aaa = 1 [
601 // (aaa) = aaa
602 // ];
603 //
604 // extensions 123 [
605 // (aaa) = aaa
606 // ];
607 //
608 // or text proto extensions (in options):
609 //
610 // option (Aaa.options) = {
611 // [type.type/type] {
612 // key: value
613 // }
614 // }
615 //
616 // or repeated fields (in options):
617 //
618 // option (Aaa.options) = {
619 // keys: [ 1, 2, 3 ]
620 // }
621 //
622 // In the first and the third case we want to spread the contents inside
623 // the square braces; in the second we want to keep them inline.
624 Left->setType(TT_ArrayInitializerLSquare);
625 if (!Left->endsSequence(tok::l_square, tok::numeric_constant,
626 tok::equal) &&
627 !Left->endsSequence(tok::l_square, tok::numeric_constant,
628 tok::identifier) &&
629 !Left->endsSequence(tok::l_square, tok::colon, TT_SelectorName)) {
630 Left->setType(TT_ProtoExtensionLSquare);
631 BindingIncrease = 10;
632 }
633 } else if (!CppArrayTemplates && Parent &&
634 Parent->isOneOf(TT_BinaryOperator, TT_TemplateCloser, tok::at,
635 tok::comma, tok::l_paren, tok::l_square,
636 tok::question, tok::colon, tok::kw_return,
637 // Should only be relevant to JavaScript:
638 tok::kw_default)) {
639 Left->setType(TT_ArrayInitializerLSquare);
640 } else {
641 BindingIncrease = 10;
642 Left->setType(TT_ArraySubscriptLSquare);
643 }
644 }
645
646 ScopedContextCreator ContextCreator(*this, tok::l_square, BindingIncrease);
647 Contexts.back().IsExpression = true;
648 if (Style.Language == FormatStyle::LK_JavaScript && Parent &&
649 Parent->is(TT_JsTypeColon))
650 Contexts.back().IsExpression = false;
651
652 Contexts.back().ColonIsObjCMethodExpr = StartsObjCMethodExpr;
653 Contexts.back().InCpp11AttributeSpecifier = IsCpp11AttributeSpecifier;
654 Contexts.back().InCSharpAttributeSpecifier = IsCSharpAttributeSpecifier;
655
656 while (CurrentToken) {
657 if (CurrentToken->is(tok::r_square)) {
658 if (IsCpp11AttributeSpecifier)
659 CurrentToken->setType(TT_AttributeSquare);
660 if (IsCSharpAttributeSpecifier)
661 CurrentToken->setType(TT_AttributeSquare);
662 else if (((CurrentToken->Next &&
663 CurrentToken->Next->is(tok::l_paren)) ||
664 (CurrentToken->Previous &&
665 CurrentToken->Previous->Previous == Left)) &&
666 Left->is(TT_ObjCMethodExpr)) {
667 // An ObjC method call is rarely followed by an open parenthesis. It
668 // also can't be composed of just one token, unless it's a macro that
669 // will be expanded to more tokens.
670 // FIXME: Do we incorrectly label ":" with this?
671 StartsObjCMethodExpr = false;
672 Left->setType(TT_Unknown);
673 }
674 if (StartsObjCMethodExpr && CurrentToken->Previous != Left) {
675 CurrentToken->setType(TT_ObjCMethodExpr);
676 // If we haven't seen a colon yet, make sure the last identifier
677 // before the r_square is tagged as a selector name component.
678 if (!ColonFound && CurrentToken->Previous &&
679 CurrentToken->Previous->is(TT_Unknown) &&
680 canBeObjCSelectorComponent(*CurrentToken->Previous))
681 CurrentToken->Previous->setType(TT_SelectorName);
682 // determineStarAmpUsage() thinks that '*' '[' is allocating an
683 // array of pointers, but if '[' starts a selector then '*' is a
684 // binary operator.
685 if (Parent && Parent->is(TT_PointerOrReference))
686 Parent->setType(TT_BinaryOperator);
687 }
688 // An arrow after an ObjC method expression is not a lambda arrow.
689 if (CurrentToken->getType() == TT_ObjCMethodExpr &&
690 CurrentToken->Next && CurrentToken->Next->is(TT_LambdaArrow))
691 CurrentToken->Next->setType(TT_Unknown);
692 Left->MatchingParen = CurrentToken;
693 CurrentToken->MatchingParen = Left;
694 // FirstObjCSelectorName is set when a colon is found. This does
695 // not work, however, when the method has no parameters.
696 // Here, we set FirstObjCSelectorName when the end of the method call is
697 // reached, in case it was not set already.
698 if (!Contexts.back().FirstObjCSelectorName) {
699 FormatToken *Previous = CurrentToken->getPreviousNonComment();
700 if (Previous && Previous->is(TT_SelectorName)) {
701 Previous->ObjCSelectorNameParts = 1;
702 Contexts.back().FirstObjCSelectorName = Previous;
703 }
704 } else {
705 Left->ParameterCount =
706 Contexts.back().FirstObjCSelectorName->ObjCSelectorNameParts;
707 }
708 if (Contexts.back().FirstObjCSelectorName) {
709 Contexts.back().FirstObjCSelectorName->LongestObjCSelectorName =
710 Contexts.back().LongestObjCSelectorName;
711 if (Left->BlockParameterCount > 1)
712 Contexts.back().FirstObjCSelectorName->LongestObjCSelectorName = 0;
713 }
714 next();
715 return true;
716 }
717 if (CurrentToken->isOneOf(tok::r_paren, tok::r_brace))
718 return false;
719 if (CurrentToken->is(tok::colon)) {
720 if (IsCpp11AttributeSpecifier &&
721 CurrentToken->endsSequence(tok::colon, tok::identifier,
722 tok::kw_using)) {
723 // Remember that this is a [[using ns: foo]] C++ attribute, so we
724 // don't add a space before the colon (unlike other colons).
725 CurrentToken->setType(TT_AttributeColon);
726 } else if (Left->isOneOf(TT_ArraySubscriptLSquare,
727 TT_DesignatedInitializerLSquare)) {
728 Left->setType(TT_ObjCMethodExpr);
729 StartsObjCMethodExpr = true;
730 Contexts.back().ColonIsObjCMethodExpr = true;
731 if (Parent && Parent->is(tok::r_paren))
732 // FIXME(bug 36976): ObjC return types shouldn't use TT_CastRParen.
733 Parent->setType(TT_CastRParen);
734 }
735 ColonFound = true;
736 }
737 if (CurrentToken->is(tok::comma) && Left->is(TT_ObjCMethodExpr) &&
738 !ColonFound)
739 Left->setType(TT_ArrayInitializerLSquare);
740 FormatToken *Tok = CurrentToken;
741 if (!consumeToken())
742 return false;
743 updateParameterCount(Left, Tok);
744 }
745 return false;
746 }
747
748 bool couldBeInStructArrayInitializer() const {
749 if (Contexts.size() < 2)
750 return false;
751 // We want to back up no more then 2 context levels i.e.
752 // . { { <-
753 const auto End = std::next(Contexts.rbegin(), 2);
754 auto Last = Contexts.rbegin();
755 unsigned Depth = 0;
756 for (; Last != End; ++Last) {
757 if (Last->ContextKind == tok::l_brace)
758 ++Depth;
759 }
760 return Depth == 2 && Last->ContextKind != tok::l_brace;
761 }
762
763 bool parseBrace() {
764 if (CurrentToken) {
765 FormatToken *Left = CurrentToken->Previous;
766 Left->ParentBracket = Contexts.back().ContextKind;
767
768 if (Contexts.back().CaretFound)
769 Left->setType(TT_ObjCBlockLBrace);
770 Contexts.back().CaretFound = false;
771
772 ScopedContextCreator ContextCreator(*this, tok::l_brace, 1);
773 Contexts.back().ColonIsDictLiteral = true;
774 if (Left->is(BK_BracedInit))
775 Contexts.back().IsExpression = true;
776 if (Style.Language == FormatStyle::LK_JavaScript && Left->Previous &&
777 Left->Previous->is(TT_JsTypeColon))
778 Contexts.back().IsExpression = false;
779
780 unsigned CommaCount = 0;
781 while (CurrentToken) {
782 if (CurrentToken->is(tok::r_brace)) {
783 Left->MatchingParen = CurrentToken;
784 CurrentToken->MatchingParen = Left;
785 if (Style.AlignArrayOfStructures != FormatStyle::AIAS_None) {
786 if (Left->ParentBracket == tok::l_brace &&
787 couldBeInStructArrayInitializer() && CommaCount > 0) {
788 Contexts.back().InStructArrayInitializer = true;
789 }
790 }
791 next();
792 return true;
793 }
794 if (CurrentToken->isOneOf(tok::r_paren, tok::r_square))
795 return false;
796 updateParameterCount(Left, CurrentToken);
797 if (CurrentToken->isOneOf(tok::colon, tok::l_brace, tok::less)) {
798 FormatToken *Previous = CurrentToken->getPreviousNonComment();
799 if (Previous->is(TT_JsTypeOptionalQuestion))
800 Previous = Previous->getPreviousNonComment();
801 if ((CurrentToken->is(tok::colon) &&
802 (!Contexts.back().ColonIsDictLiteral || !Style.isCpp())) ||
803 Style.Language == FormatStyle::LK_Proto ||
804 Style.Language == FormatStyle::LK_TextProto) {
805 Left->setType(TT_DictLiteral);
806 if (Previous->Tok.getIdentifierInfo() ||
807 Previous->is(tok::string_literal))
808 Previous->setType(TT_SelectorName);
809 }
810 if (CurrentToken->is(tok::colon) ||
811 Style.Language == FormatStyle::LK_JavaScript)
812 Left->setType(TT_DictLiteral);
813 }
814 if (CurrentToken->is(tok::comma)) {
815 if (Style.Language == FormatStyle::LK_JavaScript)
816 Left->setType(TT_DictLiteral);
817 ++CommaCount;
818 }
819 if (!consumeToken())
820 return false;
821 }
822 }
823 return true;
824 }
825
826 void updateParameterCount(FormatToken *Left, FormatToken *Current) {
827 // For ObjC methods, the number of parameters is calculated differently as
828 // method declarations have a different structure (the parameters are not
829 // inside a bracket scope).
830 if (Current->is(tok::l_brace) && Current->is(BK_Block))
831 ++Left->BlockParameterCount;
832 if (Current->is(tok::comma)) {
833 ++Left->ParameterCount;
834 if (!Left->Role)
835 Left->Role.reset(new CommaSeparatedList(Style));
836 Left->Role->CommaFound(Current);
837 } else if (Left->ParameterCount == 0 && Current->isNot(tok::comment)) {
838 Left->ParameterCount = 1;
839 }
840 }
841
842 bool parseConditional() {
843 while (CurrentToken) {
844 if (CurrentToken->is(tok::colon)) {
845 CurrentToken->setType(TT_ConditionalExpr);
846 next();
847 return true;
848 }
849 if (!consumeToken())
850 return false;
851 }
852 return false;
853 }
854
855 bool parseTemplateDeclaration() {
856 if (CurrentToken && CurrentToken->is(tok::less)) {
857 CurrentToken->setType(TT_TemplateOpener);
858 next();
859 if (!parseAngle())
860 return false;
861 if (CurrentToken)
862 CurrentToken->Previous->ClosesTemplateDeclaration = true;
863 return true;
864 }
865 return false;
866 }
867
868 bool consumeToken() {
869 FormatToken *Tok = CurrentToken;
870 next();
871 switch (Tok->Tok.getKind()) {
872 case tok::plus:
873 case tok::minus:
874 if (!Tok->Previous && Line.MustBeDeclaration)
875 Tok->setType(TT_ObjCMethodSpecifier);
876 break;
877 case tok::colon:
878 if (!Tok->Previous)
879 return false;
880 // Colons from ?: are handled in parseConditional().
881 if (Style.Language == FormatStyle::LK_JavaScript) {
882 if (Contexts.back().ColonIsForRangeExpr || // colon in for loop
883 (Contexts.size() == 1 && // switch/case labels
884 !Line.First->isOneOf(tok::kw_enum, tok::kw_case)) ||
885 Contexts.back().ContextKind == tok::l_paren || // function params
886 Contexts.back().ContextKind == tok::l_square || // array type
887 (!Contexts.back().IsExpression &&
888 Contexts.back().ContextKind == tok::l_brace) || // object type
889 (Contexts.size() == 1 &&
890 Line.MustBeDeclaration)) { // method/property declaration
891 Contexts.back().IsExpression = false;
892 Tok->setType(TT_JsTypeColon);
893 break;
894 }
895 } else if (Style.isCSharp()) {
896 if (Contexts.back().InCSharpAttributeSpecifier) {
897 Tok->setType(TT_AttributeColon);
898 break;
899 }
900 if (Contexts.back().ContextKind == tok::l_paren) {
901 Tok->setType(TT_CSharpNamedArgumentColon);
902 break;
903 }
904 }
905 if (Contexts.back().ColonIsDictLiteral ||
906 Style.Language == FormatStyle::LK_Proto ||
907 Style.Language == FormatStyle::LK_TextProto) {
908 Tok->setType(TT_DictLiteral);
909 if (Style.Language == FormatStyle::LK_TextProto) {
910 if (FormatToken *Previous = Tok->getPreviousNonComment())
911 Previous->setType(TT_SelectorName);
912 }
913 } else if (Contexts.back().ColonIsObjCMethodExpr ||
914 Line.startsWith(TT_ObjCMethodSpecifier)) {
915 Tok->setType(TT_ObjCMethodExpr);
916 const FormatToken *BeforePrevious = Tok->Previous->Previous;
917 // Ensure we tag all identifiers in method declarations as
918 // TT_SelectorName.
919 bool UnknownIdentifierInMethodDeclaration =
920 Line.startsWith(TT_ObjCMethodSpecifier) &&
921 Tok->Previous->is(tok::identifier) && Tok->Previous->is(TT_Unknown);
922 if (!BeforePrevious ||
923 // FIXME(bug 36976): ObjC return types shouldn't use TT_CastRParen.
924 !(BeforePrevious->is(TT_CastRParen) ||
925 (BeforePrevious->is(TT_ObjCMethodExpr) &&
926 BeforePrevious->is(tok::colon))) ||
927 BeforePrevious->is(tok::r_square) ||
928 Contexts.back().LongestObjCSelectorName == 0 ||
929 UnknownIdentifierInMethodDeclaration) {
930 Tok->Previous->setType(TT_SelectorName);
931 if (!Contexts.back().FirstObjCSelectorName)
932 Contexts.back().FirstObjCSelectorName = Tok->Previous;
933 else if (Tok->Previous->ColumnWidth >
934 Contexts.back().LongestObjCSelectorName)
935 Contexts.back().LongestObjCSelectorName =
936 Tok->Previous->ColumnWidth;
937 Tok->Previous->ParameterIndex =
938 Contexts.back().FirstObjCSelectorName->ObjCSelectorNameParts;
939 ++Contexts.back().FirstObjCSelectorName->ObjCSelectorNameParts;
940 }
941 } else if (Contexts.back().ColonIsForRangeExpr) {
942 Tok->setType(TT_RangeBasedForLoopColon);
943 } else if (CurrentToken && CurrentToken->is(tok::numeric_constant)) {
944 Tok->setType(TT_BitFieldColon);
945 } else if (Contexts.size() == 1 &&
946 !Line.First->isOneOf(tok::kw_enum, tok::kw_case,
947 tok::kw_default)) {
948 FormatToken *Prev = Tok->getPreviousNonComment();
949 if (Prev->isOneOf(tok::r_paren, tok::kw_noexcept))
950 Tok->setType(TT_CtorInitializerColon);
951 else if (Prev->is(tok::kw_try)) {
952 // Member initializer list within function try block.
953 FormatToken *PrevPrev = Prev->getPreviousNonComment();
954 if (PrevPrev && PrevPrev->isOneOf(tok::r_paren, tok::kw_noexcept))
955 Tok->setType(TT_CtorInitializerColon);
956 } else
957 Tok->setType(TT_InheritanceColon);
958 } else if (canBeObjCSelectorComponent(*Tok->Previous) && Tok->Next &&
959 (Tok->Next->isOneOf(tok::r_paren, tok::comma) ||
960 (canBeObjCSelectorComponent(*Tok->Next) && Tok->Next->Next &&
961 Tok->Next->Next->is(tok::colon)))) {
962 // This handles a special macro in ObjC code where selectors including
963 // the colon are passed as macro arguments.
964 Tok->setType(TT_ObjCMethodExpr);
965 } else if (Contexts.back().ContextKind == tok::l_paren) {
966 Tok->setType(TT_InlineASMColon);
967 }
968 break;
969 case tok::pipe:
970 case tok::amp:
971 // | and & in declarations/type expressions represent union and
972 // intersection types, respectively.
973 if (Style.Language == FormatStyle::LK_JavaScript &&
974 !Contexts.back().IsExpression)
975 Tok->setType(TT_JsTypeOperator);
976 break;
977 case tok::kw_if:
978 case tok::kw_while:
979 if (Tok->is(tok::kw_if) && CurrentToken &&
980 CurrentToken->isOneOf(tok::kw_constexpr, tok::identifier))
981 next();
982 if (CurrentToken && CurrentToken->is(tok::l_paren)) {
983 next();
984 if (!parseParens(/*LookForDecls=*/true))
985 return false;
986 }
987 break;
988 case tok::kw_for:
989 if (Style.Language == FormatStyle::LK_JavaScript) {
990 // x.for and {for: ...}
991 if ((Tok->Previous && Tok->Previous->is(tok::period)) ||
992 (Tok->Next && Tok->Next->is(tok::colon)))
993 break;
994 // JS' for await ( ...
995 if (CurrentToken && CurrentToken->is(Keywords.kw_await))
996 next();
997 }
998 Contexts.back().ColonIsForRangeExpr = true;
999 next();
1000 if (!parseParens())
1001 return false;
1002 break;
1003 case tok::l_paren:
1004 // When faced with 'operator()()', the kw_operator handler incorrectly
1005 // marks the first l_paren as a OverloadedOperatorLParen. Here, we make
1006 // the first two parens OverloadedOperators and the second l_paren an
1007 // OverloadedOperatorLParen.
1008 if (Tok->Previous && Tok->Previous->is(tok::r_paren) &&
1009 Tok->Previous->MatchingParen &&
1010 Tok->Previous->MatchingParen->is(TT_OverloadedOperatorLParen)) {
1011 Tok->Previous->setType(TT_OverloadedOperator);
1012 Tok->Previous->MatchingParen->setType(TT_OverloadedOperator);
1013 Tok->setType(TT_OverloadedOperatorLParen);
1014 }
1015
1016 if (!parseParens())
1017 return false;
1018 if (Line.MustBeDeclaration && Contexts.size() == 1 &&
1019 !Contexts.back().IsExpression && !Line.startsWith(TT_ObjCProperty) &&
1020 !Tok->is(TT_TypeDeclarationParen) &&
1021 (!Tok->Previous || !Tok->Previous->isOneOf(tok::kw___attribute,
1022 TT_LeadingJavaAnnotation)))
1023 Line.MightBeFunctionDecl = true;
1024 break;
1025 case tok::l_square:
1026 if (!parseSquare())
1027 return false;
1028 break;
1029 case tok::l_brace:
1030 if (Style.Language == FormatStyle::LK_TextProto) {
1031 FormatToken *Previous = Tok->getPreviousNonComment();
1032 if (Previous && Previous->getType() != TT_DictLiteral)
1033 Previous->setType(TT_SelectorName);
1034 }
1035 if (!parseBrace())
1036 return false;
1037 break;
1038 case tok::less:
1039 if (parseAngle()) {
1040 Tok->setType(TT_TemplateOpener);
1041 // In TT_Proto, we must distignuish between:
1042 // map<key, value>
1043 // msg < item: data >
1044 // msg: < item: data >
1045 // In TT_TextProto, map<key, value> does not occur.
1046 if (Style.Language == FormatStyle::LK_TextProto ||
1047 (Style.Language == FormatStyle::LK_Proto && Tok->Previous &&
1048 Tok->Previous->isOneOf(TT_SelectorName, TT_DictLiteral))) {
1049 Tok->setType(TT_DictLiteral);
1050 FormatToken *Previous = Tok->getPreviousNonComment();
1051 if (Previous && Previous->getType() != TT_DictLiteral)
1052 Previous->setType(TT_SelectorName);
1053 }
1054 } else {
1055 Tok->setType(TT_BinaryOperator);
1056 NonTemplateLess.insert(Tok);
1057 CurrentToken = Tok;
1058 next();
1059 }
1060 break;
1061 case tok::r_paren:
1062 case tok::r_square:
1063 return false;
1064 case tok::r_brace:
1065 // Lines can start with '}'.
1066 if (Tok->Previous)
1067 return false;
1068 break;
1069 case tok::greater:
1070 if (Style.Language != FormatStyle::LK_TextProto)
1071 Tok->setType(TT_BinaryOperator);
1072 if (Tok->Previous && Tok->Previous->is(TT_TemplateCloser))
1073 Tok->SpacesRequiredBefore = 1;
1074 break;
1075 case tok::kw_operator:
1076 if (Style.Language == FormatStyle::LK_TextProto ||
1077 Style.Language == FormatStyle::LK_Proto)
1078 break;
1079 while (CurrentToken &&
1080 !CurrentToken->isOneOf(tok::l_paren, tok::semi, tok::r_paren)) {
1081 if (CurrentToken->isOneOf(tok::star, tok::amp))
1082 CurrentToken->setType(TT_PointerOrReference);
1083 consumeToken();
1084 if (CurrentToken && CurrentToken->is(tok::comma) &&
1085 CurrentToken->Previous->isNot(tok::kw_operator))
1086 break;
1087 if (CurrentToken && CurrentToken->Previous->isOneOf(
1088 TT_BinaryOperator, TT_UnaryOperator, tok::comma,
1089 tok::star, tok::arrow, tok::amp, tok::ampamp))
1090 CurrentToken->Previous->setType(TT_OverloadedOperator);
1091 }
1092 if (CurrentToken && CurrentToken->is(tok::l_paren))
1093 CurrentToken->setType(TT_OverloadedOperatorLParen);
1094 if (CurrentToken && CurrentToken->Previous->is(TT_BinaryOperator))
1095 CurrentToken->Previous->setType(TT_OverloadedOperator);
1096 break;
1097 case tok::question:
1098 if (Style.Language == FormatStyle::LK_JavaScript && Tok->Next &&
1099 Tok->Next->isOneOf(tok::semi, tok::comma, tok::colon, tok::r_paren,
1100 tok::r_brace)) {
1101 // Question marks before semicolons, colons, etc. indicate optional
1102 // types (fields, parameters), e.g.
1103 // function(x?: string, y?) {...}
1104 // class X { y?; }
1105 Tok->setType(TT_JsTypeOptionalQuestion);
1106 break;
1107 }
1108 // Declarations cannot be conditional expressions, this can only be part
1109 // of a type declaration.
1110 if (Line.MustBeDeclaration && !Contexts.back().IsExpression &&
1111 Style.Language == FormatStyle::LK_JavaScript)
1112 break;
1113 if (Style.isCSharp()) {
1114 // `Type?)`, `Type?>`, `Type? name;` and `Type? name =` can only be
1115 // nullable types.
1116 // Line.MustBeDeclaration will be true for `Type? name;`.
1117 if ((!Contexts.back().IsExpression && Line.MustBeDeclaration) ||
1118 (Tok->Next && Tok->Next->isOneOf(tok::r_paren, tok::greater)) ||
1119 (Tok->Next && Tok->Next->is(tok::identifier) && Tok->Next->Next &&
1120 Tok->Next->Next->is(tok::equal))) {
1121 Tok->setType(TT_CSharpNullable);
1122 break;
1123 }
1124 }
1125 parseConditional();
1126 break;
1127 case tok::kw_template:
1128 parseTemplateDeclaration();
1129 break;
1130 case tok::comma:
1131 if (Contexts.back().InCtorInitializer)
1132 Tok->setType(TT_CtorInitializerComma);
1133 else if (Contexts.back().InInheritanceList)
1134 Tok->setType(TT_InheritanceComma);
1135 else if (Contexts.back().FirstStartOfName &&
1136 (Contexts.size() == 1 || Line.startsWith(tok::kw_for))) {
1137 Contexts.back().FirstStartOfName->PartOfMultiVariableDeclStmt = true;
1138 Line.IsMultiVariableDeclStmt = true;
1139 }
1140 if (Contexts.back().IsForEachMacro)
1141 Contexts.back().IsExpression = true;
1142 break;
1143 case tok::identifier:
1144 if (Tok->isOneOf(Keywords.kw___has_include,
1145 Keywords.kw___has_include_next)) {
1146 parseHasInclude();
1147 }
1148 if (Style.isCSharp() && Tok->is(Keywords.kw_where) && Tok->Next &&
1149 Tok->Next->isNot(tok::l_paren)) {
1150 Tok->setType(TT_CSharpGenericTypeConstraint);
1151 parseCSharpGenericTypeConstraint();
1152 }
1153 break;
1154 default:
1155 break;
1156 }
1157 return true;
1158 }
1159
1160 void parseCSharpGenericTypeConstraint() {
1161 int OpenAngleBracketsCount = 0;
1162 while (CurrentToken) {
1163 if (CurrentToken->is(tok::less)) {
1164 // parseAngle is too greedy and will consume the whole line.
1165 CurrentToken->setType(TT_TemplateOpener);
1166 ++OpenAngleBracketsCount;
1167 next();
1168 } else if (CurrentToken->is(tok::greater)) {
1169 CurrentToken->setType(TT_TemplateCloser);
1170 --OpenAngleBracketsCount;
1171 next();
1172 } else if (CurrentToken->is(tok::comma) && OpenAngleBracketsCount == 0) {
1173 // We allow line breaks after GenericTypeConstraintComma's
1174 // so do not flag commas in Generics as GenericTypeConstraintComma's.
1175 CurrentToken->setType(TT_CSharpGenericTypeConstraintComma);
1176 next();
1177 } else if (CurrentToken->is(Keywords.kw_where)) {
1178 CurrentToken->setType(TT_CSharpGenericTypeConstraint);
1179 next();
1180 } else if (CurrentToken->is(tok::colon)) {
1181 CurrentToken->setType(TT_CSharpGenericTypeConstraintColon);
1182 next();
1183 } else {
1184 next();
1185 }
1186 }
1187 }
1188
1189 void parseIncludeDirective() {
1190 if (CurrentToken && CurrentToken->is(tok::less)) {
1191 next();
1192 while (CurrentToken) {
1193 // Mark tokens up to the trailing line comments as implicit string
1194 // literals.
1195 if (CurrentToken->isNot(tok::comment) &&
1196 !CurrentToken->TokenText.startswith("//"))
1197 CurrentToken->setType(TT_ImplicitStringLiteral);
1198 next();
1199 }
1200 }
1201 }
1202
1203 void parseWarningOrError() {
1204 next();
1205 // We still want to format the whitespace left of the first token of the
1206 // warning or error.
1207 next();
1208 while (CurrentToken) {
1209 CurrentToken->setType(TT_ImplicitStringLiteral);
1210 next();
1211 }
1212 }
1213
1214 void parsePragma() {
1215 next(); // Consume "pragma".
1216 if (CurrentToken &&
1217 CurrentToken->isOneOf(Keywords.kw_mark, Keywords.kw_option)) {
1218 bool IsMark = CurrentToken->is(Keywords.kw_mark);
1219 next(); // Consume "mark".
1220 next(); // Consume first token (so we fix leading whitespace).
1221 while (CurrentToken) {
1222 if (IsMark || CurrentToken->Previous->is(TT_BinaryOperator))
1223 CurrentToken->setType(TT_ImplicitStringLiteral);
1224 next();
1225 }
1226 }
1227 }
1228
1229 void parseHasInclude() {
1230 if (!CurrentToken || !CurrentToken->is(tok::l_paren))
1231 return;
1232 next(); // '('
1233 parseIncludeDirective();
1234 next(); // ')'
1235 }
1236
1237 LineType parsePreprocessorDirective() {
1238 bool IsFirstToken = CurrentToken->IsFirst;
1239 LineType Type = LT_PreprocessorDirective;
1240 next();
1241 if (!CurrentToken)
1242 return Type;
1243
1244 if (Style.Language == FormatStyle::LK_JavaScript && IsFirstToken) {
1245 // JavaScript files can contain shebang lines of the form:
1246 // #!/usr/bin/env node
1247 // Treat these like C++ #include directives.
1248 while (CurrentToken) {
1249 // Tokens cannot be comments here.
1250 CurrentToken->setType(TT_ImplicitStringLiteral);
1251 next();
1252 }
1253 return LT_ImportStatement;
1254 }
1255
1256 if (CurrentToken->Tok.is(tok::numeric_constant)) {
1257 CurrentToken->SpacesRequiredBefore = 1;
1258 return Type;
1259 }
1260 // Hashes in the middle of a line can lead to any strange token
1261 // sequence.
1262 if (!CurrentToken->Tok.getIdentifierInfo())
1263 return Type;
1264 switch (CurrentToken->Tok.getIdentifierInfo()->getPPKeywordID()) {
1265 case tok::pp_include:
1266 case tok::pp_include_next:
1267 case tok::pp_import:
1268 next();
1269 parseIncludeDirective();
1270 Type = LT_ImportStatement;
1271 break;
1272 case tok::pp_error:
1273 case tok::pp_warning:
1274 parseWarningOrError();
1275 break;
1276 case tok::pp_pragma:
1277 parsePragma();
1278 break;
1279 case tok::pp_if:
1280 case tok::pp_elif:
1281 Contexts.back().IsExpression = true;
1282 next();
1283 parseLine();
1284 break;
1285 default:
1286 break;
1287 }
1288 while (CurrentToken) {
1289 FormatToken *Tok = CurrentToken;
1290 next();
1291 if (Tok->is(tok::l_paren))
1292 parseParens();
1293 else if (Tok->isOneOf(Keywords.kw___has_include,
1294 Keywords.kw___has_include_next))
1295 parseHasInclude();
1296 }
1297 return Type;
1298 }
1299
1300public:
1301 LineType parseLine() {
1302 if (!CurrentToken)
1303 return LT_Invalid;
1304 NonTemplateLess.clear();
1305 if (CurrentToken->is(tok::hash))
1306 return parsePreprocessorDirective();
1307
1308 // Directly allow to 'import <string-literal>' to support protocol buffer
1309 // definitions (github.com/google/protobuf) or missing "#" (either way we
1310 // should not break the line).
1311 IdentifierInfo *Info = CurrentToken->Tok.getIdentifierInfo();
1312 if ((Style.Language == FormatStyle::LK_Java &&
1313 CurrentToken->is(Keywords.kw_package)) ||
1314 (Info && Info->getPPKeywordID() == tok::pp_import &&
1315 CurrentToken->Next &&
1316 CurrentToken->Next->isOneOf(tok::string_literal, tok::identifier,
1317 tok::kw_static))) {
1318 next();
1319 parseIncludeDirective();
1320 return LT_ImportStatement;
1321 }
1322
1323 // If this line starts and ends in '<' and '>', respectively, it is likely
1324 // part of "#define <a/b.h>".
1325 if (CurrentToken->is(tok::less) && Line.Last->is(tok::greater)) {
1326 parseIncludeDirective();
1327 return LT_ImportStatement;
1328 }
1329
1330 // In .proto files, top-level options and package statements are very
1331 // similar to import statements and should not be line-wrapped.
1332 if (Style.Language == FormatStyle::LK_Proto && Line.Level == 0 &&
1333 CurrentToken->isOneOf(Keywords.kw_option, Keywords.kw_package)) {
1334 next();
1335 if (CurrentToken && CurrentToken->is(tok::identifier)) {
1336 while (CurrentToken)
1337 next();
1338 return LT_ImportStatement;
1339 }
1340 }
1341
1342 bool KeywordVirtualFound = false;
1343 bool ImportStatement = false;
1344
1345 // import {...} from '...';
1346 if (Style.Language == FormatStyle::LK_JavaScript &&
1347 CurrentToken->is(Keywords.kw_import))
1348 ImportStatement = true;
1349
1350 while (CurrentToken) {
1351 if (CurrentToken->is(tok::kw_virtual))
1352 KeywordVirtualFound = true;
1353 if (Style.Language == FormatStyle::LK_JavaScript) {
1354 // export {...} from '...';
1355 // An export followed by "from 'some string';" is a re-export from
1356 // another module identified by a URI and is treated as a
1357 // LT_ImportStatement (i.e. prevent wraps on it for long URIs).
1358 // Just "export {...};" or "export class ..." should not be treated as
1359 // an import in this sense.
1360 if (Line.First->is(tok::kw_export) &&
1361 CurrentToken->is(Keywords.kw_from) && CurrentToken->Next &&
1362 CurrentToken->Next->isStringLiteral())
1363 ImportStatement = true;
1364 if (isClosureImportStatement(*CurrentToken))
1365 ImportStatement = true;
1366 }
1367 if (!consumeToken())
1368 return LT_Invalid;
1369 }
1370 if (KeywordVirtualFound)
1371 return LT_VirtualFunctionDecl;
1372 if (ImportStatement)
1373 return LT_ImportStatement;
1374
1375 if (Line.startsWith(TT_ObjCMethodSpecifier)) {
1376 if (Contexts.back().FirstObjCSelectorName)
1377 Contexts.back().FirstObjCSelectorName->LongestObjCSelectorName =
1378 Contexts.back().LongestObjCSelectorName;
1379 return LT_ObjCMethodDecl;
1380 }
1381
1382 for (const auto &ctx : Contexts) {
1383 if (ctx.InStructArrayInitializer) {
1384 return LT_ArrayOfStructInitializer;
1385 }
1386 }
1387
1388 return LT_Other;
1389 }
1390
1391private:
1392 bool isClosureImportStatement(const FormatToken &Tok) {
1393 // FIXME: Closure-library specific stuff should not be hard-coded but be
1394 // configurable.
1395 return Tok.TokenText == "goog" && Tok.Next && Tok.Next->is(tok::period) &&
1396 Tok.Next->Next &&
1397 (Tok.Next->Next->TokenText == "module" ||
1398 Tok.Next->Next->TokenText == "provide" ||
1399 Tok.Next->Next->TokenText == "require" ||
1400 Tok.Next->Next->TokenText == "requireType" ||
1401 Tok.Next->Next->TokenText == "forwardDeclare") &&
1402 Tok.Next->Next->Next && Tok.Next->Next->Next->is(tok::l_paren);
1403 }
1404
1405 void resetTokenMetadata(FormatToken *Token) {
1406 if (!Token)
1407 return;
1408
1409 // Reset token type in case we have already looked at it and then
1410 // recovered from an error (e.g. failure to find the matching >).
1411 if (!CurrentToken->isOneOf(
1412 TT_LambdaLSquare, TT_LambdaLBrace, TT_AttributeMacro, TT_IfMacro,
1413 TT_ForEachMacro, TT_TypenameMacro, TT_FunctionLBrace,
1414 TT_ImplicitStringLiteral, TT_InlineASMBrace, TT_FatArrow,
1415 TT_LambdaArrow, TT_NamespaceMacro, TT_OverloadedOperator,
1416 TT_RegexLiteral, TT_TemplateString, TT_ObjCStringLiteral,
1417 TT_UntouchableMacroFunc, TT_ConstraintJunctions,
1418 TT_StatementAttributeLikeMacro))
1419 CurrentToken->setType(TT_Unknown);
1420 CurrentToken->Role.reset();
1421 CurrentToken->MatchingParen = nullptr;
1422 CurrentToken->FakeLParens.clear();
1423 CurrentToken->FakeRParens = 0;
1424 }
1425
1426 void next() {
1427 if (CurrentToken) {
1428 CurrentToken->NestingLevel = Contexts.size() - 1;
1429 CurrentToken->BindingStrength = Contexts.back().BindingStrength;
1430 modifyContext(*CurrentToken);
1431 determineTokenType(*CurrentToken);
1432 CurrentToken = CurrentToken->Next;
1433 }
1434
1435 resetTokenMetadata(CurrentToken);
1436 }
1437
1438 /// A struct to hold information valid in a specific context, e.g.
1439 /// a pair of parenthesis.
1440 struct Context {
1441 Context(tok::TokenKind ContextKind, unsigned BindingStrength,
1442 bool IsExpression)
1443 : ContextKind(ContextKind), BindingStrength(BindingStrength),
1444 IsExpression(IsExpression) {}
1445
1446 tok::TokenKind ContextKind;
1447 unsigned BindingStrength;
1448 bool IsExpression;
1449 unsigned LongestObjCSelectorName = 0;
1450 bool ColonIsForRangeExpr = false;
1451 bool ColonIsDictLiteral = false;
1452 bool ColonIsObjCMethodExpr = false;
1453 FormatToken *FirstObjCSelectorName = nullptr;
1454 FormatToken *FirstStartOfName = nullptr;
1455 bool CanBeExpression = true;
1456 bool InTemplateArgument = false;
1457 bool InCtorInitializer = false;
1458 bool InInheritanceList = false;
1459 bool CaretFound = false;
1460 bool IsForEachMacro = false;
1461 bool InCpp11AttributeSpecifier = false;
1462 bool InCSharpAttributeSpecifier = false;
1463 bool InStructArrayInitializer = false;
1464 };
1465
1466 /// Puts a new \c Context onto the stack \c Contexts for the lifetime
1467 /// of each instance.
1468 struct ScopedContextCreator {
1469 AnnotatingParser &P;
1470
1471 ScopedContextCreator(AnnotatingParser &P, tok::TokenKind ContextKind,
1472 unsigned Increase)
1473 : P(P) {
1474 P.Contexts.push_back(Context(ContextKind,
1475 P.Contexts.back().BindingStrength + Increase,
1476 P.Contexts.back().IsExpression));
1477 }
1478
1479 ~ScopedContextCreator() {
1480 if (P.Style.AlignArrayOfStructures != FormatStyle::AIAS_None) {
1481 if (P.Contexts.back().InStructArrayInitializer) {
1482 P.Contexts.pop_back();
1483 P.Contexts.back().InStructArrayInitializer = true;
1484 return;
1485 }
1486 }
1487 P.Contexts.pop_back();
1488 }
1489 };
1490
1491 void modifyContext(const FormatToken &Current) {
1492 if (Current.getPrecedence() == prec::Assignment &&
1493 !Line.First->isOneOf(tok::kw_template, tok::kw_using, tok::kw_return) &&
1494 // Type aliases use `type X = ...;` in TypeScript and can be exported
1495 // using `export type ...`.
1496 !(Style.Language == FormatStyle::LK_JavaScript &&
1497 (Line.startsWith(Keywords.kw_type, tok::identifier) ||
1498 Line.startsWith(tok::kw_export, Keywords.kw_type,
1499 tok::identifier))) &&
1500 (!Current.Previous || Current.Previous->isNot(tok::kw_operator))) {
1501 Contexts.back().IsExpression = true;
1502 if (!Line.startsWith(TT_UnaryOperator)) {
1503 for (FormatToken *Previous = Current.Previous;
1504 Previous && Previous->Previous &&
1505 !Previous->Previous->isOneOf(tok::comma, tok::semi);
1506 Previous = Previous->Previous) {
1507 if (Previous->isOneOf(tok::r_square, tok::r_paren)) {
1508 Previous = Previous->MatchingParen;
1509 if (!Previous)
1510 break;
1511 }
1512 if (Previous->opensScope())
1513 break;
1514 if (Previous->isOneOf(TT_BinaryOperator, TT_UnaryOperator) &&
1515 Previous->isOneOf(tok::star, tok::amp, tok::ampamp) &&
1516 Previous->Previous && Previous->Previous->isNot(tok::equal))
1517 Previous->setType(TT_PointerOrReference);
1518 }
1519 }
1520 } else if (Current.is(tok::lessless) &&
1521 (!Current.Previous || !Current.Previous->is(tok::kw_operator))) {
1522 Contexts.back().IsExpression = true;
1523 } else if (Current.isOneOf(tok::kw_return, tok::kw_throw)) {
1524 Contexts.back().IsExpression = true;
1525 } else if (Current.is(TT_TrailingReturnArrow)) {
1526 Contexts.back().IsExpression = false;
1527 } else if (Current.is(TT_LambdaArrow) || Current.is(Keywords.kw_assert)) {
1528 Contexts.back().IsExpression = Style.Language == FormatStyle::LK_Java;
1529 } else if (Current.Previous &&
1530 Current.Previous->is(TT_CtorInitializerColon)) {
1531 Contexts.back().IsExpression = true;
1532 Contexts.back().InCtorInitializer = true;
1533 } else if (Current.Previous && Current.Previous->is(TT_InheritanceColon)) {
1534 Contexts.back().InInheritanceList = true;
1535 } else if (Current.isOneOf(tok::r_paren, tok::greater, tok::comma)) {
1536 for (FormatToken *Previous = Current.Previous;
1537 Previous && Previous->isOneOf(tok::star, tok::amp);
1538 Previous = Previous->Previous)
1539 Previous->setType(TT_PointerOrReference);
1540 if (Line.MustBeDeclaration && !Contexts.front().InCtorInitializer)
1541 Contexts.back().IsExpression = false;
1542 } else if (Current.is(tok::kw_new)) {
1543 Contexts.back().CanBeExpression = false;
1544 } else if (Current.is(tok::semi) ||
1545 (Current.is(tok::exclaim) && Current.Previous &&
1546 !Current.Previous->is(tok::kw_operator))) {
1547 // This should be the condition or increment in a for-loop.
1548 // But not operator !() (can't use TT_OverloadedOperator here as its not
1549 // been annotated yet).
1550 Contexts.back().IsExpression = true;
1551 }
1552 }
1553
1554 static FormatToken *untilMatchingParen(FormatToken *Current) {
1555 // Used when `MatchingParen` is not yet established.
1556 int ParenLevel = 0;
1557 while (Current) {
1558 if (Current->is(tok::l_paren))
1559 ParenLevel++;
1560 if (Current->is(tok::r_paren))
1561 ParenLevel--;
1562 if (ParenLevel < 1)
1563 break;
1564 Current = Current->Next;
1565 }
1566 return Current;
1567 }
1568
1569 static bool isDeductionGuide(FormatToken &Current) {
1570 // Look for a deduction guide template<T> A(...) -> A<...>;
1571 if (Current.Previous && Current.Previous->is(tok::r_paren) &&
1572 Current.startsSequence(tok::arrow, tok::identifier, tok::less)) {
1573 // Find the TemplateCloser.
1574 FormatToken *TemplateCloser = Current.Next->Next;
1575 int NestingLevel = 0;
1576 while (TemplateCloser) {
1577 // Skip over an expressions in parens A<(3 < 2)>;
1578 if (TemplateCloser->is(tok::l_paren)) {
1579 // No Matching Paren yet so skip to matching paren
1580 TemplateCloser = untilMatchingParen(TemplateCloser);
1581 }
1582 if (TemplateCloser->is(tok::less))
1583 NestingLevel++;
1584 if (TemplateCloser->is(tok::greater))
1585 NestingLevel--;
1586 if (NestingLevel < 1)
1587 break;
1588 TemplateCloser = TemplateCloser->Next;
1589 }
1590 // Assuming we have found the end of the template ensure its followed
1591 // with a semi-colon.
1592 if (TemplateCloser && TemplateCloser->Next &&
1593 TemplateCloser->Next->is(tok::semi) &&
1594 Current.Previous->MatchingParen) {
1595 // Determine if the identifier `A` prior to the A<..>; is the same as
1596 // prior to the A(..)
1597 FormatToken *LeadingIdentifier =
1598 Current.Previous->MatchingParen->Previous;
1599
1600 // Differentiate a deduction guide by seeing the
1601 // > of the template prior to the leading identifier.
1602 if (LeadingIdentifier) {
1603 FormatToken *PriorLeadingIdentifier = LeadingIdentifier->Previous;
1604 // Skip back past explicit decoration
1605 if (PriorLeadingIdentifier &&
1606 PriorLeadingIdentifier->is(tok::kw_explicit))
1607 PriorLeadingIdentifier = PriorLeadingIdentifier->Previous;
1608
1609 return (PriorLeadingIdentifier &&
1610 PriorLeadingIdentifier->is(TT_TemplateCloser) &&
1611 LeadingIdentifier->TokenText == Current.Next->TokenText);
1612 }
1613 }
1614 }
1615 return false;
1616 }
1617
1618 void determineTokenType(FormatToken &Current) {
1619 if (!Current.is(TT_Unknown))
1620 // The token type is already known.
1621 return;
1622
1623 if ((Style.Language == FormatStyle::LK_JavaScript || Style.isCSharp()) &&
1624 Current.is(tok::exclaim)) {
1625 if (Current.Previous) {
1626 bool IsIdentifier =
1627 Style.Language == FormatStyle::LK_JavaScript
1628 ? Keywords.IsJavaScriptIdentifier(
1629 *Current.Previous, /* AcceptIdentifierName= */ true)
1630 : Current.Previous->is(tok::identifier);
1631 if (IsIdentifier ||
1632 Current.Previous->isOneOf(
1633 tok::kw_namespace, tok::r_paren, tok::r_square, tok::r_brace,
1634 tok::kw_false, tok::kw_true, Keywords.kw_type, Keywords.kw_get,
1635 Keywords.kw_set) ||
1636 Current.Previous->Tok.isLiteral()) {
1637 Current.setType(TT_NonNullAssertion);
1638 return;
1639 }
1640 }
1641 if (Current.Next &&
1642 Current.Next->isOneOf(TT_BinaryOperator, Keywords.kw_as)) {
1643 Current.setType(TT_NonNullAssertion);
1644 return;
1645 }
1646 }
1647
1648 // Line.MightBeFunctionDecl can only be true after the parentheses of a
1649 // function declaration have been found. In this case, 'Current' is a
1650 // trailing token of this declaration and thus cannot be a name.
1651 if (Current.is(Keywords.kw_instanceof)) {
1652 Current.setType(TT_BinaryOperator);
1653 } else if (isStartOfName(Current) &&
1654 (!Line.MightBeFunctionDecl || Current.NestingLevel != 0)) {
1655 Contexts.back().FirstStartOfName = &Current;
1656 Current.setType(TT_StartOfName);
1657 } else if (Current.is(tok::semi)) {
1658 // Reset FirstStartOfName after finding a semicolon so that a for loop
1659 // with multiple increment statements is not confused with a for loop
1660 // having multiple variable declarations.
1661 Contexts.back().FirstStartOfName = nullptr;
1662 } else if (Current.isOneOf(tok::kw_auto, tok::kw___auto_type)) {
1663 AutoFound = true;
1664 } else if (Current.is(tok::arrow) &&
1665 Style.Language == FormatStyle::LK_Java) {
1666 Current.setType(TT_LambdaArrow);
1667 } else if (Current.is(tok::arrow) && AutoFound && Line.MustBeDeclaration &&
1668 Current.NestingLevel == 0 &&
1669 !Current.Previous->is(tok::kw_operator)) {
1670 // not auto operator->() -> xxx;
1671 Current.setType(TT_TrailingReturnArrow);
1672 } else if (Current.is(tok::arrow) && Current.Previous &&
1673 Current.Previous->is(tok::r_brace)) {
1674 // Concept implicit conversion contraint needs to be treated like
1675 // a trailing return type ... } -> <type>.
1676 Current.setType(TT_TrailingReturnArrow);
1677 } else if (isDeductionGuide(Current)) {
1678 // Deduction guides trailing arrow " A(...) -> A<T>;".
1679 Current.setType(TT_TrailingReturnArrow);
1680 } else if (Current.isOneOf(tok::star, tok::amp, tok::ampamp)) {
1681 Current.setType(determineStarAmpUsage(
1682 Current,
1683 Contexts.back().CanBeExpression && Contexts.back().IsExpression,
1684 Contexts.back().InTemplateArgument));
1685 } else if (Current.isOneOf(tok::minus, tok::plus, tok::caret)) {
1686 Current.setType(determinePlusMinusCaretUsage(Current));
1687 if (Current.is(TT_UnaryOperator) && Current.is(tok::caret))
1688 Contexts.back().CaretFound = true;
1689 } else if (Current.isOneOf(tok::minusminus, tok::plusplus)) {
1690 Current.setType(determineIncrementUsage(Current));
1691 } else if (Current.isOneOf(tok::exclaim, tok::tilde)) {
1692 Current.setType(TT_UnaryOperator);
1693 } else if (Current.is(tok::question)) {
1694 if (Style.Language == FormatStyle::LK_JavaScript &&
1695 Line.MustBeDeclaration && !Contexts.back().IsExpression) {
1696 // In JavaScript, `interface X { foo?(): bar; }` is an optional method
1697 // on the interface, not a ternary expression.
1698 Current.setType(TT_JsTypeOptionalQuestion);
1699 } else {
1700 Current.setType(TT_ConditionalExpr);
1701 }
1702 } else if (Current.isBinaryOperator() &&
1703 (!Current.Previous || Current.Previous->isNot(tok::l_square)) &&
1704 (!Current.is(tok::greater) &&
1705 Style.Language != FormatStyle::LK_TextProto)) {
1706 Current.setType(TT_BinaryOperator);
1707 } else if (Current.is(tok::comment)) {
1708 if (Current.TokenText.startswith("/*")) {
1709 if (Current.TokenText.endswith("*/"))
1710 Current.setType(TT_BlockComment);
1711 else
1712 // The lexer has for some reason determined a comment here. But we
1713 // cannot really handle it, if it isn't properly terminated.
1714 Current.Tok.setKind(tok::unknown);
1715 } else {
1716 Current.setType(TT_LineComment);
1717 }
1718 } else if (Current.is(tok::r_paren)) {
1719 if (rParenEndsCast(Current))
1720 Current.setType(TT_CastRParen);
1721 if (Current.MatchingParen && Current.Next &&
1722 !Current.Next->isBinaryOperator() &&
1723 !Current.Next->isOneOf(tok::semi, tok::colon, tok::l_brace,
1724 tok::comma, tok::period, tok::arrow,
1725 tok::coloncolon))
1726 if (FormatToken *AfterParen = Current.MatchingParen->Next) {
1727 // Make sure this isn't the return type of an Obj-C block declaration
1728 if (AfterParen->Tok.isNot(tok::caret)) {
1729 if (FormatToken *BeforeParen = Current.MatchingParen->Previous)
1730 if (BeforeParen->is(tok::identifier) &&
1731 !BeforeParen->is(TT_TypenameMacro) &&
1732 BeforeParen->TokenText == BeforeParen->TokenText.upper() &&
1733 (!BeforeParen->Previous ||
1734 BeforeParen->Previous->ClosesTemplateDeclaration))
1735 Current.setType(TT_FunctionAnnotationRParen);
1736 }
1737 }
1738 } else if (Current.is(tok::at) && Current.Next &&
1739 Style.Language != FormatStyle::LK_JavaScript &&
1740 Style.Language != FormatStyle::LK_Java) {
1741 // In Java & JavaScript, "@..." is a decorator or annotation. In ObjC, it
1742 // marks declarations and properties that need special formatting.
1743 switch (Current.Next->Tok.getObjCKeywordID()) {
1744 case tok::objc_interface:
1745 case tok::objc_implementation:
1746 case tok::objc_protocol:
1747 Current.setType(TT_ObjCDecl);
1748 break;
1749 case tok::objc_property:
1750 Current.setType(TT_ObjCProperty);
1751 break;
1752 default:
1753 break;
1754 }
1755 } else if (Current.is(tok::period)) {
1756 FormatToken *PreviousNoComment = Current.getPreviousNonComment();
1757 if (PreviousNoComment &&
1758 PreviousNoComment->isOneOf(tok::comma, tok::l_brace))
1759 Current.setType(TT_DesignatedInitializerPeriod);
1760 else if (Style.Language == FormatStyle::LK_Java && Current.Previous &&
1761 Current.Previous->isOneOf(TT_JavaAnnotation,
1762 TT_LeadingJavaAnnotation)) {
1763 Current.setType(Current.Previous->getType());
1764 }
1765 } else if (canBeObjCSelectorComponent(Current) &&
1766 // FIXME(bug 36976): ObjC return types shouldn't use
1767 // TT_CastRParen.
1768 Current.Previous && Current.Previous->is(TT_CastRParen) &&
1769 Current.Previous->MatchingParen &&
1770 Current.Previous->MatchingParen->Previous &&
1771 Current.Previous->MatchingParen->Previous->is(
1772 TT_ObjCMethodSpecifier)) {
1773 // This is the first part of an Objective-C selector name. (If there's no
1774 // colon after this, this is the only place which annotates the identifier
1775 // as a selector.)
1776 Current.setType(TT_SelectorName);
1777 } else if (Current.isOneOf(tok::identifier, tok::kw_const, tok::kw_noexcept,
1778 tok::kw_requires) &&
1779 Current.Previous &&
1780 !Current.Previous->isOneOf(tok::equal, tok::at) &&
1781 Line.MightBeFunctionDecl && Contexts.size() == 1) {
1782 // Line.MightBeFunctionDecl can only be true after the parentheses of a
1783 // function declaration have been found.
1784 Current.setType(TT_TrailingAnnotation);
1785 } else if ((Style.Language == FormatStyle::LK_Java ||
1786 Style.Language == FormatStyle::LK_JavaScript) &&
1787 Current.Previous) {
1788 if (Current.Previous->is(tok::at) &&
1789 Current.isNot(Keywords.kw_interface)) {
1790 const FormatToken &AtToken = *Current.Previous;
1791 const FormatToken *Previous = AtToken.getPreviousNonComment();
1792 if (!Previous || Previous->is(TT_LeadingJavaAnnotation))
1793 Current.setType(TT_LeadingJavaAnnotation);
1794 else
1795 Current.setType(TT_JavaAnnotation);
1796 } else if (Current.Previous->is(tok::period) &&
1797 Current.Previous->isOneOf(TT_JavaAnnotation,
1798 TT_LeadingJavaAnnotation)) {
1799 Current.setType(Current.Previous->getType());
1800 }
1801 }
1802 }
1803
1804 /// Take a guess at whether \p Tok starts a name of a function or
1805 /// variable declaration.
1806 ///
1807 /// This is a heuristic based on whether \p Tok is an identifier following
1808 /// something that is likely a type.
1809 bool isStartOfName(const FormatToken &Tok) {
1810 if (Tok.isNot(tok::identifier) || !Tok.Previous)
1811 return false;
1812
1813 if (Tok.Previous->isOneOf(TT_LeadingJavaAnnotation, Keywords.kw_instanceof,
1814 Keywords.kw_as))
1815 return false;
1816 if (Style.Language == FormatStyle::LK_JavaScript &&
1817 Tok.Previous->is(Keywords.kw_in))
1818 return false;
1819
1820 // Skip "const" as it does not have an influence on whether this is a name.
1821 FormatToken *PreviousNotConst = Tok.getPreviousNonComment();
1822 while (PreviousNotConst && PreviousNotConst->is(tok::kw_const))
1823 PreviousNotConst = PreviousNotConst->getPreviousNonComment();
1824
1825 if (!PreviousNotConst)
1826 return false;
1827
1828 bool IsPPKeyword = PreviousNotConst->is(tok::identifier) &&
1829 PreviousNotConst->Previous &&
1830 PreviousNotConst->Previous->is(tok::hash);
1831
1832 if (PreviousNotConst->is(TT_TemplateCloser))
1833 return PreviousNotConst && PreviousNotConst->MatchingParen &&
1834 PreviousNotConst->MatchingParen->Previous &&
1835 PreviousNotConst->MatchingParen->Previous->isNot(tok::period) &&
1836 PreviousNotConst->MatchingParen->Previous->isNot(tok::kw_template);
1837
1838 if (PreviousNotConst->is(tok::r_paren) &&
1839 PreviousNotConst->is(TT_TypeDeclarationParen))
1840 return true;
1841
1842 return (!IsPPKeyword &&
1843 PreviousNotConst->isOneOf(tok::identifier, tok::kw_auto)) ||
1844 PreviousNotConst->is(TT_PointerOrReference) ||
1845 PreviousNotConst->isSimpleTypeSpecifier();
1846 }
1847
1848 /// Determine whether ')' is ending a cast.
1849 bool rParenEndsCast(const FormatToken &Tok) {
1850 // C-style casts are only used in C++, C# and Java.
1851 if (!Style.isCSharp() && !Style.isCpp() &&
1852 Style.Language != FormatStyle::LK_Java)
1853 return false;
1854
1855 // Empty parens aren't casts and there are no casts at the end of the line.
1856 if (Tok.Previous == Tok.MatchingParen || !Tok.Next || !Tok.MatchingParen)
1857 return false;
1858
1859 FormatToken *LeftOfParens = Tok.MatchingParen->getPreviousNonComment();
1860 if (LeftOfParens) {
1861 // If there is a closing parenthesis left of the current parentheses,
1862 // look past it as these might be chained casts.
1863 if (LeftOfParens->is(tok::r_paren)) {
1864 if (!LeftOfParens->MatchingParen ||
1865 !LeftOfParens->MatchingParen->Previous)
1866 return false;
1867 LeftOfParens = LeftOfParens->MatchingParen->Previous;
1868 }
1869
1870 // If there is an identifier (or with a few exceptions a keyword) right
1871 // before the parentheses, this is unlikely to be a cast.
1872 if (LeftOfParens->Tok.getIdentifierInfo() &&
1873 !LeftOfParens->isOneOf(Keywords.kw_in, tok::kw_return, tok::kw_case,
1874 tok::kw_delete))
1875 return false;
1876
1877 // Certain other tokens right before the parentheses are also signals that
1878 // this cannot be a cast.
1879 if (LeftOfParens->isOneOf(tok::at, tok::r_square, TT_OverloadedOperator,
1880 TT_TemplateCloser, tok::ellipsis))
1881 return false;
1882 }
1883
1884 if (Tok.Next->is(tok::question))
1885 return false;
1886
1887 // `foreach((A a, B b) in someList)` should not be seen as a cast.
1888 if (Tok.Next->is(Keywords.kw_in) && Style.isCSharp())
1889 return false;
1890
1891 // Functions which end with decorations like volatile, noexcept are unlikely
1892 // to be casts.
1893 if (Tok.Next->isOneOf(tok::kw_noexcept, tok::kw_volatile, tok::kw_const,
1894 tok::kw_requires, tok::kw_throw, tok::arrow,
1895 Keywords.kw_override, Keywords.kw_final) ||
1896 isCpp11AttributeSpecifier(*Tok.Next))
1897 return false;
1898
1899 // As Java has no function types, a "(" after the ")" likely means that this
1900 // is a cast.
1901 if (Style.Language == FormatStyle::LK_Java && Tok.Next->is(tok::l_paren))
1902 return true;
1903
1904 // If a (non-string) literal follows, this is likely a cast.
1905 if (Tok.Next->isNot(tok::string_literal) &&
1906 (Tok.Next->Tok.isLiteral() ||
1907 Tok.Next->isOneOf(tok::kw_sizeof, tok::kw_alignof)))
1908 return true;
1909
1910 // Heuristically try to determine whether the parentheses contain a type.
1911 auto IsQualifiedPointerOrReference = [](FormatToken *T) {
1912 // This is used to handle cases such as x = (foo *const)&y;
1913 assert(!T->isSimpleTypeSpecifier() && "Should have already been checked")(static_cast<void> (0));
1914 // Strip trailing qualifiers such as const or volatile when checking
1915 // whether the parens could be a cast to a pointer/reference type.
1916 while (T) {
1917 if (T->is(TT_AttributeParen)) {
1918 // Handle `x = (foo *__attribute__((foo)))&v;`:
1919 if (T->MatchingParen && T->MatchingParen->Previous &&
1920 T->MatchingParen->Previous->is(tok::kw___attribute)) {
1921 T = T->MatchingParen->Previous->Previous;
1922 continue;
1923 }
1924 } else if (T->is(TT_AttributeSquare)) {
1925 // Handle `x = (foo *[[clang::foo]])&v;`:
1926 if (T->MatchingParen && T->MatchingParen->Previous) {
1927 T = T->MatchingParen->Previous;
1928 continue;
1929 }
1930 } else if (T->canBePointerOrReferenceQualifier()) {
1931 T = T->Previous;
1932 continue;
1933 }
1934 break;
1935 }
1936 return T && T->is(TT_PointerOrReference);
1937 };
1938 bool ParensAreType =
1939 !Tok.Previous ||
1940 Tok.Previous->isOneOf(TT_TemplateCloser, TT_TypeDeclarationParen) ||
1941 Tok.Previous->isSimpleTypeSpecifier() ||
1942 IsQualifiedPointerOrReference(Tok.Previous);
1943 bool ParensCouldEndDecl =
1944 Tok.Next->isOneOf(tok::equal, tok::semi, tok::l_brace, tok::greater);
1945 if (ParensAreType && !ParensCouldEndDecl)
1946 return true;
1947
1948 // At this point, we heuristically assume that there are no casts at the
1949 // start of the line. We assume that we have found most cases where there
1950 // are by the logic above, e.g. "(void)x;".
1951 if (!LeftOfParens)
1952 return false;
1953
1954 // Certain token types inside the parentheses mean that this can't be a
1955 // cast.
1956 for (const FormatToken *Token = Tok.MatchingParen->Next; Token != &Tok;
1957 Token = Token->Next)
1958 if (Token->is(TT_BinaryOperator))
1959 return false;
1960
1961 // If the following token is an identifier or 'this', this is a cast. All
1962 // cases where this can be something else are handled above.
1963 if (Tok.Next->isOneOf(tok::identifier, tok::kw_this))
1964 return true;
1965
1966 // Look for a cast `( x ) (`.
1967 if (Tok.Next->is(tok::l_paren) && Tok.Previous && Tok.Previous->Previous) {
1968 if (Tok.Previous->is(tok::identifier) &&
1969 Tok.Previous->Previous->is(tok::l_paren))
1970 return true;
1971 }
1972
1973 if (!Tok.Next->Next)
1974 return false;
1975
1976 // If the next token after the parenthesis is a unary operator, assume
1977 // that this is cast, unless there are unexpected tokens inside the
1978 // parenthesis.
1979 bool NextIsUnary =
1980 Tok.Next->isUnaryOperator() || Tok.Next->isOneOf(tok::amp, tok::star);
1981 if (!NextIsUnary || Tok.Next->is(tok::plus) ||
1982 !Tok.Next->Next->isOneOf(tok::identifier, tok::numeric_constant))
1983 return false;
1984 // Search for unexpected tokens.
1985 for (FormatToken *Prev = Tok.Previous; Prev != Tok.MatchingParen;
1986 Prev = Prev->Previous) {
1987 if (!Prev->isOneOf(tok::kw_const, tok::identifier, tok::coloncolon))
1988 return false;
1989 }
1990 return true;
1991 }
1992
1993 /// Return the type of the given token assuming it is * or &.
1994 TokenType determineStarAmpUsage(const FormatToken &Tok, bool IsExpression,
1995 bool InTemplateArgument) {
1996 if (Style.Language == FormatStyle::LK_JavaScript)
1997 return TT_BinaryOperator;
1998
1999 // && in C# must be a binary operator.
2000 if (Style.isCSharp() && Tok.is(tok::ampamp))
2001 return TT_BinaryOperator;
2002
2003 const FormatToken *PrevToken = Tok.getPreviousNonComment();
2004 if (!PrevToken)
2005 return TT_UnaryOperator;
2006
2007 const FormatToken *NextToken = Tok.getNextNonComment();
2008 if (!NextToken ||
2009 NextToken->isOneOf(tok::arrow, tok::equal, tok::kw_noexcept) ||
2010 NextToken->canBePointerOrReferenceQualifier() ||
2011 (NextToken->is(tok::l_brace) && !NextToken->getNextNonComment()))
2012 return TT_PointerOrReference;
2013
2014 if (PrevToken->is(tok::coloncolon))
2015 return TT_PointerOrReference;
2016
2017 if (PrevToken->is(tok::r_paren) && PrevToken->is(TT_TypeDeclarationParen))
2018 return TT_PointerOrReference;
2019
2020 if (PrevToken->isOneOf(tok::l_paren, tok::l_square, tok::l_brace,
2021 tok::comma, tok::semi, tok::kw_return, tok::colon,
2022 tok::kw_co_return, tok::kw_co_await,
2023 tok::kw_co_yield, tok::equal, tok::kw_delete,
2024 tok::kw_sizeof, tok::kw_throw) ||
2025 PrevToken->isOneOf(TT_BinaryOperator, TT_ConditionalExpr,
2026 TT_UnaryOperator, TT_CastRParen))
2027 return TT_UnaryOperator;
2028
2029 if (NextToken->is(tok::l_square) && NextToken->isNot(TT_LambdaLSquare))
2030 return TT_PointerOrReference;
2031 if (NextToken->is(tok::kw_operator) && !IsExpression)
2032 return TT_PointerOrReference;
2033 if (NextToken->isOneOf(tok::comma, tok::semi))
2034 return TT_PointerOrReference;
2035
2036 if (PrevToken->Tok.isLiteral() ||
2037 PrevToken->isOneOf(tok::r_paren, tok::r_square, tok::kw_true,
2038 tok::kw_false, tok::r_brace) ||
2039 NextToken->Tok.isLiteral() ||
2040 NextToken->isOneOf(tok::kw_true, tok::kw_false) ||
2041 NextToken->isUnaryOperator() ||
2042 // If we know we're in a template argument, there are no named
2043 // declarations. Thus, having an identifier on the right-hand side
2044 // indicates a binary operator.
2045 (InTemplateArgument && NextToken->Tok.isAnyIdentifier()))
2046 return TT_BinaryOperator;
2047
2048 // "&&(" is quite unlikely to be two successive unary "&".
2049 if (Tok.is(tok::ampamp) && NextToken->is(tok::l_paren))
2050 return TT_BinaryOperator;
2051
2052 // This catches some cases where evaluation order is used as control flow:
2053 // aaa && aaa->f();
2054 if (NextToken->Tok.isAnyIdentifier()) {
2055 const FormatToken *NextNextToken = NextToken->getNextNonComment();
2056 if (NextNextToken && NextNextToken->is(tok::arrow))
2057 return TT_BinaryOperator;
2058 }
2059
2060 // It is very unlikely that we are going to find a pointer or reference type
2061 // definition on the RHS of an assignment.
2062 if (IsExpression && !Contexts.back().CaretFound)
2063 return TT_BinaryOperator;
2064
2065 return TT_PointerOrReference;
2066 }
2067
2068 TokenType determinePlusMinusCaretUsage(const FormatToken &Tok) {
2069 const FormatToken *PrevToken = Tok.getPreviousNonComment();
2070 if (!PrevToken)
2071 return TT_UnaryOperator;
2072
2073 if (PrevToken->isOneOf(TT_CastRParen, TT_UnaryOperator))
2074 // This must be a sequence of leading unary operators.
2075 return TT_UnaryOperator;
2076
2077 // Use heuristics to recognize unary operators.
2078 if (PrevToken->isOneOf(tok::equal, tok::l_paren, tok::comma, tok::l_square,
2079 tok::question, tok::colon, tok::kw_return,
2080 tok::kw_case, tok::at, tok::l_brace, tok::kw_throw,
2081 tok::kw_co_return, tok::kw_co_yield))
2082 return TT_UnaryOperator;
2083
2084 // There can't be two consecutive binary operators.
2085 if (PrevToken->is(TT_BinaryOperator))
2086 return TT_UnaryOperator;
2087
2088 // Fall back to marking the token as binary operator.
2089 return TT_BinaryOperator;
2090 }
2091
2092 /// Determine whether ++/-- are pre- or post-increments/-decrements.
2093 TokenType determineIncrementUsage(const FormatToken &Tok) {
2094 const FormatToken *PrevToken = Tok.getPreviousNonComment();
2095 if (!PrevToken || PrevToken->is(TT_CastRParen))
2096 return TT_UnaryOperator;
2097 if (PrevToken->isOneOf(tok::r_paren, tok::r_square, tok::identifier))
2098 return TT_TrailingUnaryOperator;
2099
2100 return TT_UnaryOperator;
2101 }
2102
2103 SmallVector<Context, 8> Contexts;
2104
2105 const FormatStyle &Style;
2106 AnnotatedLine &Line;
2107 FormatToken *CurrentToken;
2108 bool AutoFound;
2109 const AdditionalKeywords &Keywords;
2110
2111 // Set of "<" tokens that do not open a template parameter list. If parseAngle
2112 // determines that a specific token can't be a template opener, it will make
2113 // same decision irrespective of the decisions for tokens leading up to it.
2114 // Store this information to prevent this from causing exponential runtime.
2115 llvm::SmallPtrSet<FormatToken *, 16> NonTemplateLess;
2116};
2117
2118static const int PrecedenceUnaryOperator = prec::PointerToMember + 1;
2119static const int PrecedenceArrowAndPeriod = prec::PointerToMember + 2;
2120
2121/// Parses binary expressions by inserting fake parenthesis based on
2122/// operator precedence.
2123class ExpressionParser {
2124public:
2125 ExpressionParser(const FormatStyle &Style, const AdditionalKeywords &Keywords,
2126 AnnotatedLine &Line)
2127 : Style(Style), Keywords(Keywords), Current(Line.First) {}
2128
2129 /// Parse expressions with the given operator precedence.
2130 void parse(int Precedence = 0) {
2131 // Skip 'return' and ObjC selector colons as they are not part of a binary
2132 // expression.
2133 while (Current && (Current->is(tok::kw_return) ||
2134 (Current->is(tok::colon) &&
2135 Current->isOneOf(TT_ObjCMethodExpr, TT_DictLiteral))))
2136 next();
2137
2138 if (!Current || Precedence > PrecedenceArrowAndPeriod)
2139 return;
2140
2141 // Conditional expressions need to be parsed separately for proper nesting.
2142 if (Precedence == prec::Conditional) {
2143 parseConditionalExpr();
2144 return;
2145 }
2146
2147 // Parse unary operators, which all have a higher precedence than binary
2148 // operators.
2149 if (Precedence == PrecedenceUnaryOperator) {
2150 parseUnaryOperator();
2151 return;
2152 }
2153
2154 FormatToken *Start = Current;
2155 FormatToken *LatestOperator = nullptr;
2156 unsigned OperatorIndex = 0;
2157
2158 while (Current) {
2159 // Consume operators with higher precedence.
2160 parse(Precedence + 1);
2161
2162 int CurrentPrecedence = getCurrentPrecedence();
2163
2164 if (Current && Current->is(TT_SelectorName) &&
2165 Precedence == CurrentPrecedence) {
2166 if (LatestOperator)
2167 addFakeParenthesis(Start, prec::Level(Precedence));
2168 Start = Current;
2169 }
2170
2171 // At the end of the line or when an operator with higher precedence is
2172 // found, insert fake parenthesis and return.
2173 if (!Current ||
2174 (Current->closesScope() &&
2175 (Current->MatchingParen || Current->is(TT_TemplateString))) ||
2176 (CurrentPrecedence != -1 && CurrentPrecedence < Precedence) ||
2177 (CurrentPrecedence == prec::Conditional &&
2178 Precedence == prec::Assignment && Current->is(tok::colon))) {
2179 break;
2180 }
2181
2182 // Consume scopes: (), [], <> and {}
2183 if (Current->opensScope()) {
2184 // In fragment of a JavaScript template string can look like '}..${' and
2185 // thus close a scope and open a new one at the same time.
2186 while (Current && (!Current->closesScope() || Current->opensScope())) {
2187 next();
2188 parse();
2189 }
2190 next();
2191 } else {
2192 // Operator found.
2193 if (CurrentPrecedence == Precedence) {
2194 if (LatestOperator)
2195 LatestOperator->NextOperator = Current;
2196 LatestOperator = Current;
2197 Current->OperatorIndex = OperatorIndex;
2198 ++OperatorIndex;
2199 }
2200 next(/*SkipPastLeadingComments=*/Precedence > 0);
2201 }
2202 }
2203
2204 if (LatestOperator && (Current || Precedence > 0)) {
2205 // LatestOperator->LastOperator = true;
2206 if (Precedence == PrecedenceArrowAndPeriod) {
2207 // Call expressions don't have a binary operator precedence.
2208 addFakeParenthesis(Start, prec::Unknown);
2209 } else {
2210 addFakeParenthesis(Start, prec::Level(Precedence));
2211 }
2212 }
2213 }
2214
2215private:
2216 /// Gets the precedence (+1) of the given token for binary operators
2217 /// and other tokens that we treat like binary operators.
2218 int getCurrentPrecedence() {
2219 if (Current) {
2220 const FormatToken *NextNonComment = Current->getNextNonComment();
2221 if (Current->is(TT_ConditionalExpr))
2222 return prec::Conditional;
2223 if (NextNonComment && Current->is(TT_SelectorName) &&
2224 (NextNonComment->isOneOf(TT_DictLiteral, TT_JsTypeColon) ||
2225 ((Style.Language == FormatStyle::LK_Proto ||
2226 Style.Language == FormatStyle::LK_TextProto) &&
2227 NextNonComment->is(tok::less))))
2228 return prec::Assignment;
2229 if (Current->is(TT_JsComputedPropertyName))
2230 return prec::Assignment;
2231 if (Current->is(TT_LambdaArrow))
2232 return prec::Comma;
2233 if (Current->is(TT_FatArrow))
2234 return prec::Assignment;
2235 if (Current->isOneOf(tok::semi, TT_InlineASMColon, TT_SelectorName) ||
2236 (Current->is(tok::comment) && NextNonComment &&
2237 NextNonComment->is(TT_SelectorName)))
2238 return 0;
2239 if (Current->is(TT_RangeBasedForLoopColon))
2240 return prec::Comma;
2241 if ((Style.Language == FormatStyle::LK_Java ||
2242 Style.Language == FormatStyle::LK_JavaScript) &&
2243 Current->is(Keywords.kw_instanceof))
2244 return prec::Relational;
2245 if (Style.Language == FormatStyle::LK_JavaScript &&
2246 Current->isOneOf(Keywords.kw_in, Keywords.kw_as))
2247 return prec::Relational;
2248 if (Current->is(TT_BinaryOperator) || Current->is(tok::comma))
2249 return Current->getPrecedence();
2250 if (Current->isOneOf(tok::period, tok::arrow))
2251 return PrecedenceArrowAndPeriod;
2252 if ((Style.Language == FormatStyle::LK_Java ||
2253 Style.Language == FormatStyle::LK_JavaScript) &&
2254 Current->isOneOf(Keywords.kw_extends, Keywords.kw_implements,
2255 Keywords.kw_throws))
2256 return 0;
2257 }
2258 return -1;
2259 }
2260
2261 void addFakeParenthesis(FormatToken *Start, prec::Level Precedence) {
2262 Start->FakeLParens.push_back(Precedence);
2263 if (Precedence > prec::Unknown)
2264 Start->StartsBinaryExpression = true;
2265 if (Current) {
2266 FormatToken *Previous = Current->Previous;
2267 while (Previous->is(tok::comment) && Previous->Previous)
2268 Previous = Previous->Previous;
2269 ++Previous->FakeRParens;
2270 if (Precedence > prec::Unknown)
2271 Previous->EndsBinaryExpression = true;
2272 }
2273 }
2274
2275 /// Parse unary operator expressions and surround them with fake
2276 /// parentheses if appropriate.
2277 void parseUnaryOperator() {
2278 llvm::SmallVector<FormatToken *, 2> Tokens;
2279 while (Current && Current->is(TT_UnaryOperator)) {
2280 Tokens.push_back(Current);
2281 next();
2282 }
2283 parse(PrecedenceArrowAndPeriod);
2284 for (FormatToken *Token : llvm::reverse(Tokens))
2285 // The actual precedence doesn't matter.
2286 addFakeParenthesis(Token, prec::Unknown);
2287 }
2288
2289 void parseConditionalExpr() {
2290 while (Current && Current->isTrailingComment()) {
2291 next();
2292 }
2293 FormatToken *Start = Current;
2294 parse(prec::LogicalOr);
2295 if (!Current || !Current->is(tok::question))
2296 return;
2297 next();
2298 parse(prec::Assignment);
2299 if (!Current || Current->isNot(TT_ConditionalExpr))
2300 return;
2301 next();
2302 parse(prec::Assignment);
2303 addFakeParenthesis(Start, prec::Conditional);
2304 }
2305
2306 void next(bool SkipPastLeadingComments = true) {
2307 if (Current)
2308 Current = Current->Next;
2309 while (Current &&
2310 (Current->NewlinesBefore == 0 || SkipPastLeadingComments) &&
2311 Current->isTrailingComment())
2312 Current = Current->Next;
2313 }
2314
2315 const FormatStyle &Style;
2316 const AdditionalKeywords &Keywords;
2317 FormatToken *Current;
2318};
2319
2320} // end anonymous namespace
2321
2322void TokenAnnotator::setCommentLineLevels(
2323 SmallVectorImpl<AnnotatedLine *> &Lines) {
2324 const AnnotatedLine *NextNonCommentLine = nullptr;
2325 for (SmallVectorImpl<AnnotatedLine *>::reverse_iterator I = Lines.rbegin(),
2326 E = Lines.rend();
2327 I != E; ++I) {
2328 bool CommentLine = true;
2329 for (const FormatToken *Tok = (*I)->First; Tok; Tok = Tok->Next) {
2330 if (!Tok->is(tok::comment)) {
2331 CommentLine = false;
2332 break;
2333 }
2334 }
2335
2336 // If the comment is currently aligned with the line immediately following
2337 // it, that's probably intentional and we should keep it.
2338 if (NextNonCommentLine && CommentLine &&
2339 NextNonCommentLine->First->NewlinesBefore <= 1 &&
2340 NextNonCommentLine->First->OriginalColumn ==
2341 (*I)->First->OriginalColumn) {
2342 // Align comments for preprocessor lines with the # in column 0 if
2343 // preprocessor lines are not indented. Otherwise, align with the next
2344 // line.
2345 (*I)->Level =
2346 (Style.IndentPPDirectives != FormatStyle::PPDIS_BeforeHash &&
2347 (NextNonCommentLine->Type == LT_PreprocessorDirective ||
2348 NextNonCommentLine->Type == LT_ImportStatement))
2349 ? 0
2350 : NextNonCommentLine->Level;
2351 } else {
2352 NextNonCommentLine = (*I)->First->isNot(tok::r_brace) ? (*I) : nullptr;
2353 }
2354
2355 setCommentLineLevels((*I)->Children);
2356 }
2357}
2358
2359static unsigned maxNestingDepth(const AnnotatedLine &Line) {
2360 unsigned Result = 0;
2361 for (const auto *Tok = Line.First; Tok != nullptr; Tok = Tok->Next)
2362 Result = std::max(Result, Tok->NestingLevel);
2363 return Result;
2364}
2365
2366void TokenAnnotator::annotate(AnnotatedLine &Line) {
2367 for (SmallVectorImpl<AnnotatedLine *>::iterator I = Line.Children.begin(),
2368 E = Line.Children.end();
2369 I != E; ++I) {
2370 annotate(**I);
2371 }
2372 AnnotatingParser Parser(Style, Line, Keywords);
2373 Line.Type = Parser.parseLine();
2374
2375 // With very deep nesting, ExpressionParser uses lots of stack and the
2376 // formatting algorithm is very slow. We're not going to do a good job here
2377 // anyway - it's probably generated code being formatted by mistake.
2378 // Just skip the whole line.
2379 if (maxNestingDepth(Line) > 50)
2380 Line.Type = LT_Invalid;
2381
2382 if (Line.Type == LT_Invalid)
2383 return;
2384
2385 ExpressionParser ExprParser(Style, Keywords, Line);
2386 ExprParser.parse();
2387
2388 if (Line.startsWith(TT_ObjCMethodSpecifier))
2389 Line.Type = LT_ObjCMethodDecl;
2390 else if (Line.startsWith(TT_ObjCDecl))
2391 Line.Type = LT_ObjCDecl;
2392 else if (Line.startsWith(TT_ObjCProperty))
2393 Line.Type = LT_ObjCProperty;
2394
2395 Line.First->SpacesRequiredBefore = 1;
2396 Line.First->CanBreakBefore = Line.First->MustBreakBefore;
2397}
2398
2399// This function heuristically determines whether 'Current' starts the name of a
2400// function declaration.
2401static bool isFunctionDeclarationName(const FormatToken &Current,
2402 const AnnotatedLine &Line) {
2403 auto skipOperatorName = [](const FormatToken *Next) -> const FormatToken * {
2404 for (; Next; Next = Next->Next) {
2405 if (Next->is(TT_OverloadedOperatorLParen))
2406 return Next;
2407 if (Next->is(TT_OverloadedOperator))
2408 continue;
2409 if (Next->isOneOf(tok::kw_new, tok::kw_delete)) {
2410 // For 'new[]' and 'delete[]'.
2411 if (Next->Next &&
2412 Next->Next->startsSequence(tok::l_square, tok::r_square))
2413 Next = Next->Next->Next;
2414 continue;
2415 }
2416 if (Next->startsSequence(tok::l_square, tok::r_square)) {
2417 // For operator[]().
2418 Next = Next->Next;
2419 continue;
2420 }
2421 if ((Next->isSimpleTypeSpecifier() || Next->is(tok::identifier)) &&
2422 Next->Next && Next->Next->isOneOf(tok::star, tok::amp, tok::ampamp)) {
2423 // For operator void*(), operator char*(), operator Foo*().
2424 Next = Next->Next;
2425 continue;
2426 }
2427 if (Next->is(TT_TemplateOpener) && Next->MatchingParen) {
2428 Next = Next->MatchingParen;
2429 continue;
2430 }
2431
2432 break;
2433 }
2434 return nullptr;
2435 };
2436
2437 // Find parentheses of parameter list.
2438 const FormatToken *Next = Current.Next;
2439 if (Current.is(tok::kw_operator)) {
2440 if (Current.Previous && Current.Previous->is(tok::coloncolon))
2441 return false;
2442 Next = skipOperatorName(Next);
2443 } else {
2444 if (!Current.is(TT_StartOfName) || Current.NestingLevel != 0)
2445 return false;
2446 for (; Next; Next = Next->Next) {
2447 if (Next->is(TT_TemplateOpener)) {
2448 Next = Next->MatchingParen;
2449 } else if (Next->is(tok::coloncolon)) {
2450 Next = Next->Next;
2451 if (!Next)
2452 return false;
2453 if (Next->is(tok::kw_operator)) {
2454 Next = skipOperatorName(Next->Next);
2455 break;
2456 }
2457 if (!Next->is(tok::identifier))
2458 return false;
2459 } else if (Next->is(tok::l_paren)) {
2460 break;
2461 } else {
2462 return false;
2463 }
2464 }
2465 }
2466
2467 // Check whether parameter list can belong to a function declaration.
2468 if (!Next || !Next->is(tok::l_paren) || !Next->MatchingParen)
2469 return false;
2470 // If the lines ends with "{", this is likely an function definition.
2471 if (Line.Last->is(tok::l_brace))
2472 return true;
2473 if (Next->Next == Next->MatchingParen)
2474 return true; // Empty parentheses.
2475 // If there is an &/&& after the r_paren, this is likely a function.
2476 if (Next->MatchingParen->Next &&
2477 Next->MatchingParen->Next->is(TT_PointerOrReference))
2478 return true;
2479 // Check for K&R C function definitions, e.g.:
2480 // int f(i)
2481 // {
2482 // return i + 1;
2483 // }
2484 if (Next->Next && Next->Next->is(tok::identifier) &&
2485 !Line.endsWith(tok::semi))
2486 return true;
2487 for (const FormatToken *Tok = Next->Next; Tok && Tok != Next->MatchingParen;
2488 Tok = Tok->Next) {
2489 if (Tok->is(TT_TypeDeclarationParen))
2490 return true;
2491 if (Tok->isOneOf(tok::l_paren, TT_TemplateOpener) && Tok->MatchingParen) {
2492 Tok = Tok->MatchingParen;
2493 continue;
2494 }
2495 if (Tok->is(tok::kw_const) || Tok->isSimpleTypeSpecifier() ||
2496 Tok->isOneOf(TT_PointerOrReference, TT_StartOfName, tok::ellipsis))
2497 return true;
2498 if (Tok->isOneOf(tok::l_brace, tok::string_literal, TT_ObjCMethodExpr) ||
2499 Tok->Tok.isLiteral())
2500 return false;
2501 }
2502 return false;
2503}
2504
2505bool TokenAnnotator::mustBreakForReturnType(const AnnotatedLine &Line) const {
2506 assert(Line.MightBeFunctionDecl)(static_cast<void> (0));
2507
2508 if ((Style.AlwaysBreakAfterReturnType == FormatStyle::RTBS_TopLevel ||
2509 Style.AlwaysBreakAfterReturnType ==
2510 FormatStyle::RTBS_TopLevelDefinitions) &&
2511 Line.Level > 0)
2512 return false;
2513
2514 switch (Style.AlwaysBreakAfterReturnType) {
2515 case FormatStyle::RTBS_None:
2516 return false;
2517 case FormatStyle::RTBS_All:
2518 case FormatStyle::RTBS_TopLevel:
2519 return true;
2520 case FormatStyle::RTBS_AllDefinitions:
2521 case FormatStyle::RTBS_TopLevelDefinitions:
2522 return Line.mightBeFunctionDefinition();
2523 }
2524
2525 return false;
2526}
2527
2528void TokenAnnotator::calculateFormattingInformation(AnnotatedLine &Line) {
2529 for (SmallVectorImpl<AnnotatedLine *>::iterator I = Line.Children.begin(),
2530 E = Line.Children.end();
2531 I != E; ++I) {
2532 calculateFormattingInformation(**I);
2533 }
2534
2535 Line.First->TotalLength =
2536 Line.First->IsMultiline ? Style.ColumnLimit
2537 : Line.FirstStartColumn + Line.First->ColumnWidth;
2538 FormatToken *Current = Line.First->Next;
2539 bool InFunctionDecl = Line.MightBeFunctionDecl;
2540 bool AlignArrayOfStructures =
2541 (Style.AlignArrayOfStructures != FormatStyle::AIAS_None &&
2542 Line.Type == LT_ArrayOfStructInitializer);
2543 if (AlignArrayOfStructures)
2544 calculateArrayInitializerColumnList(Line);
2545
2546 while (Current) {
2547 if (isFunctionDeclarationName(*Current, Line))
2548 Current->setType(TT_FunctionDeclarationName);
2549 if (Current->is(TT_LineComment)) {
2550 if (Current->Previous->is(BK_BracedInit) &&
2551 Current->Previous->opensScope())
2552 Current->SpacesRequiredBefore =
2553 (Style.Cpp11BracedListStyle && !Style.SpacesInParentheses) ? 0 : 1;
2554 else
2555 Current->SpacesRequiredBefore = Style.SpacesBeforeTrailingComments;
2556
2557 // If we find a trailing comment, iterate backwards to determine whether
2558 // it seems to relate to a specific parameter. If so, break before that
2559 // parameter to avoid changing the comment's meaning. E.g. don't move 'b'
2560 // to the previous line in:
2561 // SomeFunction(a,
2562 // b, // comment
2563 // c);
2564 if (!Current->HasUnescapedNewline) {
2565 for (FormatToken *Parameter = Current->Previous; Parameter;
2566 Parameter = Parameter->Previous) {
2567 if (Parameter->isOneOf(tok::comment, tok::r_brace))
2568 break;
2569 if (Parameter->Previous && Parameter->Previous->is(tok::comma)) {
2570 if (!Parameter->Previous->is(TT_CtorInitializerComma) &&
2571 Parameter->HasUnescapedNewline)
2572 Parameter->MustBreakBefore = true;
2573 break;
2574 }
2575 }
2576 }
2577 } else if (Current->SpacesRequiredBefore == 0 &&
2578 spaceRequiredBefore(Line, *Current)) {
2579 Current->SpacesRequiredBefore = 1;
2580 }
2581
2582 Current->MustBreakBefore =
2583 Current->MustBreakBefore || mustBreakBefore(Line, *Current);
2584
2585 if (!Current->MustBreakBefore && InFunctionDecl &&
2586 Current->is(TT_FunctionDeclarationName))
2587 Current->MustBreakBefore = mustBreakForReturnType(Line);
2588
2589 Current->CanBreakBefore =
2590 Current->MustBreakBefore || canBreakBefore(Line, *Current);
2591 unsigned ChildSize = 0;
2592 if (Current->Previous->Children.size() == 1) {
2593 FormatToken &LastOfChild = *Current->Previous->Children[0]->Last;
2594 ChildSize = LastOfChild.isTrailingComment() ? Style.ColumnLimit
2595 : LastOfChild.TotalLength + 1;
2596 }
2597 const FormatToken *Prev = Current->Previous;
2598 if (Current->MustBreakBefore || Prev->Children.size() > 1 ||
2599 (Prev->Children.size() == 1 &&
2600 Prev->Children[0]->First->MustBreakBefore) ||
2601 Current->IsMultiline)
2602 Current->TotalLength = Prev->TotalLength + Style.ColumnLimit;
2603 else
2604 Current->TotalLength = Prev->TotalLength + Current->ColumnWidth +
2605 ChildSize + Current->SpacesRequiredBefore;
2606
2607 if (Current->is(TT_CtorInitializerColon))
2608 InFunctionDecl = false;
2609
2610 // FIXME: Only calculate this if CanBreakBefore is true once static
2611 // initializers etc. are sorted out.
2612 // FIXME: Move magic numbers to a better place.
2613
2614 // Reduce penalty for aligning ObjC method arguments using the colon
2615 // alignment as this is the canonical way (still prefer fitting everything
2616 // into one line if possible). Trying to fit a whole expression into one
2617 // line should not force other line breaks (e.g. when ObjC method
2618 // expression is a part of other expression).
2619 Current->SplitPenalty = splitPenalty(Line, *Current, InFunctionDecl);
2620 if (Style.Language == FormatStyle::LK_ObjC &&
2621 Current->is(TT_SelectorName) && Current->ParameterIndex > 0) {
2622 if (Current->ParameterIndex == 1)
2623 Current->SplitPenalty += 5 * Current->BindingStrength;
2624 } else {
2625 Current->SplitPenalty += 20 * Current->BindingStrength;
2626 }
2627
2628 Current = Current->Next;
2629 }
2630
2631 calculateUnbreakableTailLengths(Line);
2632 unsigned IndentLevel = Line.Level;
2633 for (Current = Line.First; Current != nullptr; Current = Current->Next) {
2634 if (Current->Role)
2635 Current->Role->precomputeFormattingInfos(Current);
2636 if (Current->MatchingParen &&
2637 Current->MatchingParen->opensBlockOrBlockTypeList(Style)) {
2638 assert(IndentLevel > 0)(static_cast<void> (0));
2639 --IndentLevel;
2640 }
2641 Current->IndentLevel = IndentLevel;
2642 if (Current->opensBlockOrBlockTypeList(Style))
2643 ++IndentLevel;
2644 }
2645
2646 LLVM_DEBUG({ printDebugInfo(Line); })do { } while (false);
2647}
2648
2649void TokenAnnotator::calculateUnbreakableTailLengths(AnnotatedLine &Line) {
2650 unsigned UnbreakableTailLength = 0;
2651 FormatToken *Current = Line.Last;
2652 while (Current) {
2653 Current->UnbreakableTailLength = UnbreakableTailLength;
2654 if (Current->CanBreakBefore ||
2655 Current->isOneOf(tok::comment, tok::string_literal)) {
2656 UnbreakableTailLength = 0;
2657 } else {
2658 UnbreakableTailLength +=
2659 Current->ColumnWidth + Current->SpacesRequiredBefore;
2660 }
2661 Current = Current->Previous;
2662 }
2663}
2664
2665void TokenAnnotator::calculateArrayInitializerColumnList(AnnotatedLine &Line) {
2666 if (Line.First == Line.Last) {
2667 return;
2668 }
2669 auto *CurrentToken = Line.First;
2670 CurrentToken->ArrayInitializerLineStart = true;
2671 unsigned Depth = 0;
2672 while (CurrentToken != nullptr && CurrentToken != Line.Last) {
2673 if (CurrentToken->is(tok::l_brace)) {
2674 CurrentToken->IsArrayInitializer = true;
2675 if (CurrentToken->Next != nullptr)
2676 CurrentToken->Next->MustBreakBefore = true;
2677 CurrentToken =
2678 calculateInitializerColumnList(Line, CurrentToken->Next, Depth + 1);
2679 } else {
2680 CurrentToken = CurrentToken->Next;
2681 }
2682 }
2683}
2684
2685FormatToken *TokenAnnotator::calculateInitializerColumnList(
2686 AnnotatedLine &Line, FormatToken *CurrentToken, unsigned Depth) {
2687 while (CurrentToken != nullptr && CurrentToken != Line.Last) {
2688 if (CurrentToken->is(tok::l_brace))
2689 ++Depth;
2690 else if (CurrentToken->is(tok::r_brace))
2691 --Depth;
2692 if (Depth == 2 && CurrentToken->isOneOf(tok::l_brace, tok::comma)) {
2693 CurrentToken = CurrentToken->Next;
2694 if (CurrentToken == nullptr)
2695 break;
2696 CurrentToken->StartsColumn = true;
2697 CurrentToken = CurrentToken->Previous;
2698 }
2699 CurrentToken = CurrentToken->Next;
2700 }
2701 return CurrentToken;
2702}
2703
2704unsigned TokenAnnotator::splitPenalty(const AnnotatedLine &Line,
2705 const FormatToken &Tok,
2706 bool InFunctionDecl) {
2707 const FormatToken &Left = *Tok.Previous;
2708 const FormatToken &Right = Tok;
2709
2710 if (Left.is(tok::semi))
2711 return 0;
2712
2713 if (Style.Language == FormatStyle::LK_Java) {
2714 if (Right.isOneOf(Keywords.kw_extends, Keywords.kw_throws))
2715 return 1;
2716 if (Right.is(Keywords.kw_implements))
2717 return 2;
2718 if (Left.is(tok::comma) && Left.NestingLevel == 0)
2719 return 3;
2720 } else if (Style.Language == FormatStyle::LK_JavaScript) {
2721 if (Right.is(Keywords.kw_function) && Left.isNot(tok::comma))
2722 return 100;
2723 if (Left.is(TT_JsTypeColon))
2724 return 35;
2725 if ((Left.is(TT_TemplateString) && Left.TokenText.endswith("${")) ||
2726 (Right.is(TT_TemplateString) && Right.TokenText.startswith("}")))
2727 return 100;
2728 // Prefer breaking call chains (".foo") over empty "{}", "[]" or "()".
2729 if (Left.opensScope() && Right.closesScope())
2730 return 200;
2731 }
2732
2733 if (Right.is(tok::identifier) && Right.Next && Right.Next->is(TT_DictLiteral))
2734 return 1;
2735 if (Right.is(tok::l_square)) {
2736 if (Style.Language == FormatStyle::LK_Proto)
2737 return 1;
2738 if (Left.is(tok::r_square))
2739 return 200;
2740 // Slightly prefer formatting local lambda definitions like functions.
2741 if (Right.is(TT_LambdaLSquare) && Left.is(tok::equal))
2742 return 35;
2743 if (!Right.isOneOf(TT_ObjCMethodExpr, TT_LambdaLSquare,
2744 TT_ArrayInitializerLSquare,
2745 TT_DesignatedInitializerLSquare, TT_AttributeSquare))
2746 return 500;
2747 }
2748
2749 if (Left.is(tok::coloncolon) ||
2750 (Right.is(tok::period) && Style.Language == FormatStyle::LK_Proto))
2751 return 500;
2752 if (Right.isOneOf(TT_StartOfName, TT_FunctionDeclarationName) ||
2753 Right.is(tok::kw_operator)) {
2754 if (Line.startsWith(tok::kw_for) && Right.PartOfMultiVariableDeclStmt)
2755 return 3;
2756 if (Left.is(TT_StartOfName))
2757 return 110;
2758 if (InFunctionDecl && Right.NestingLevel == 0)
2759 return Style.PenaltyReturnTypeOnItsOwnLine;
2760 return 200;
2761 }
2762 if (Right.is(TT_PointerOrReference))
2763 return 190;
2764 if (Right.is(TT_LambdaArrow))
2765 return 110;
2766 if (Left.is(tok::equal) && Right.is(tok::l_brace))
2767 return 160;
2768 if (Left.is(TT_CastRParen))
2769 return 100;
2770 if (Left.isOneOf(tok::kw_class, tok::kw_struct))
2771 return 5000;
2772 if (Left.is(tok::comment))
2773 return 1000;
2774
2775 if (Left.isOneOf(TT_RangeBasedForLoopColon, TT_InheritanceColon,
2776 TT_CtorInitializerColon))
2777 return 2;
2778
2779 if (Right.isMemberAccess()) {
2780 // Breaking before the "./->" of a chained call/member access is reasonably
2781 // cheap, as formatting those with one call per line is generally
2782 // desirable. In particular, it should be cheaper to break before the call
2783 // than it is to break inside a call's parameters, which could lead to weird
2784 // "hanging" indents. The exception is the very last "./->" to support this
2785 // frequent pattern:
2786 //
2787 // aaaaaaaa.aaaaaaaa.bbbbbbb().ccccccccccccccccccccc(
2788 // dddddddd);
2789 //
2790 // which might otherwise be blown up onto many lines. Here, clang-format
2791 // won't produce "hanging" indents anyway as there is no other trailing
2792 // call.
2793 //
2794 // Also apply higher penalty is not a call as that might lead to a wrapping
2795 // like:
2796 //
2797 // aaaaaaa
2798 // .aaaaaaaaa.bbbbbbbb(cccccccc);
2799 return !Right.NextOperator || !Right.NextOperator->Previous->closesScope()
2800 ? 150
2801 : 35;
2802 }
2803
2804 if (Right.is(TT_TrailingAnnotation) &&
2805 (!Right.Next || Right.Next->isNot(tok::l_paren))) {
2806 // Moving trailing annotations to the next line is fine for ObjC method
2807 // declarations.
2808 if (Line.startsWith(TT_ObjCMethodSpecifier))
2809 return 10;
2810 // Generally, breaking before a trailing annotation is bad unless it is
2811 // function-like. It seems to be especially preferable to keep standard
2812 // annotations (i.e. "const", "final" and "override") on the same line.
2813 // Use a slightly higher penalty after ")" so that annotations like
2814 // "const override" are kept together.
2815 bool is_short_annotation = Right.TokenText.size() < 10;
2816 return (Left.is(tok::r_paren) ? 100 : 120) + (is_short_annotation ? 50 : 0);
2817 }
2818
2819 // In for-loops, prefer breaking at ',' and ';'.
2820 if (Line.startsWith(tok::kw_for) && Left.is(tok::equal))
2821 return 4;
2822
2823 // In Objective-C method expressions, prefer breaking before "param:" over
2824 // breaking after it.
2825 if (Right.is(TT_SelectorName))
2826 return 0;
2827 if (Left.is(tok::colon) && Left.is(TT_ObjCMethodExpr))
2828 return Line.MightBeFunctionDecl ? 50 : 500;
2829
2830 // In Objective-C type declarations, avoid breaking after the category's
2831 // open paren (we'll prefer breaking after the protocol list's opening
2832 // angle bracket, if present).
2833 if (Line.Type == LT_ObjCDecl && Left.is(tok::l_paren) && Left.Previous &&
2834 Left.Previous->isOneOf(tok::identifier, tok::greater))
2835 return 500;
2836
2837 if (Left.is(tok::l_paren) && InFunctionDecl &&
2838 Style.AlignAfterOpenBracket != FormatStyle::BAS_DontAlign)
2839 return 100;
2840 if (Left.is(tok::l_paren) && Left.Previous &&
2841 (Left.Previous->is(tok::kw_for) || Left.Previous->isIf()))
2842 return 1000;
2843 if (Left.is(tok::equal) && InFunctionDecl)
2844 return 110;
2845 if (Right.is(tok::r_brace))
2846 return 1;
2847 if (Left.is(TT_TemplateOpener))
2848 return 100;
2849 if (Left.opensScope()) {
2850 // If we aren't aligning after opening parens/braces we can always break
2851 // here unless the style does not want us to place all arguments on the
2852 // next line.
2853 if (Style.AlignAfterOpenBracket == FormatStyle::BAS_DontAlign &&
2854 (Left.ParameterCount <= 1 || Style.AllowAllArgumentsOnNextLine))
2855 return 0;
2856 if (Left.is(tok::l_brace) && !Style.Cpp11BracedListStyle)
2857 return 19;
2858 return Left.ParameterCount > 1 ? Style.PenaltyBreakBeforeFirstCallParameter
2859 : 19;
2860 }
2861 if (Left.is(TT_JavaAnnotation))
2862 return 50;
2863
2864 if (Left.is(TT_UnaryOperator))
2865 return 60;
2866 if (Left.isOneOf(tok::plus, tok::comma) && Left.Previous &&
2867 Left.Previous->isLabelString() &&
2868 (Left.NextOperator || Left.OperatorIndex != 0))
2869 return 50;
2870 if (Right.is(tok::plus) && Left.isLabelString() &&
2871 (Right.NextOperator || Right.OperatorIndex != 0))
2872 return 25;
2873 if (Left.is(tok::comma))
2874 return 1;
2875 if (Right.is(tok::lessless) && Left.isLabelString() &&
2876 (Right.NextOperator || Right.OperatorIndex != 1))
2877 return 25;
2878 if (Right.is(tok::lessless)) {
2879 // Breaking at a << is really cheap.
2880 if (!Left.is(tok::r_paren) || Right.OperatorIndex > 0)
2881 // Slightly prefer to break before the first one in log-like statements.
2882 return 2;
2883 return 1;
2884 }
2885 if (Left.ClosesTemplateDeclaration)
2886 return Style.PenaltyBreakTemplateDeclaration;
2887 if (Left.is(TT_ConditionalExpr))
2888 return prec::Conditional;
2889 prec::Level Level = Left.getPrecedence();
2890 if (Level == prec::Unknown)
2891 Level = Right.getPrecedence();
2892 if (Level == prec::Assignment)
2893 return Style.PenaltyBreakAssignment;
2894 if (Level != prec::Unknown)
2895 return Level;
2896
2897 return 3;
2898}
2899
2900bool TokenAnnotator::spaceRequiredBeforeParens(const FormatToken &Right) const {
2901 return Style.SpaceBeforeParens == FormatStyle::SBPO_Always ||
2902 (Style.SpaceBeforeParens == FormatStyle::SBPO_NonEmptyParentheses &&
2903 Right.ParameterCount > 0);
2904}
2905
2906bool TokenAnnotator::spaceRequiredBetween(const AnnotatedLine &Line,
2907 const FormatToken &Left,
2908 const FormatToken &Right) {
2909 if (Left.is(tok::kw_return) && Right.isNot(tok::semi))
2910 return true;
2911 if (Style.isJson() && Left.is(tok::string_literal) && Right.is(tok::colon))
2912 return false;
2913 if (Left.is(Keywords.kw_assert) && Style.Language == FormatStyle::LK_Java)
2914 return true;
2915 if (Style.ObjCSpaceAfterProperty && Line.Type == LT_ObjCProperty &&
2916 Left.Tok.getObjCKeywordID() == tok::objc_property)
2917 return true;
2918 if (Right.is(tok::hashhash))
2919 return Left.is(tok::hash);
2920 if (Left.isOneOf(tok::hashhash, tok::hash))
2921 return Right.is(tok::hash);
2922 if ((Left.is(tok::l_paren) && Right.is(tok::r_paren)) ||
2923 (Left.is(tok::l_brace) && Left.isNot(BK_Block) &&
2924 Right.is(tok::r_brace) && Right.isNot(BK_Block)))
2925 return Style.SpaceInEmptyParentheses;
2926 if (Style.SpacesInConditionalStatement) {
2927 if (Left.is(tok::l_paren) && Left.Previous &&
2928 isKeywordWithCondition(*Left.Previous))
2929 return true;
2930 if (Right.is(tok::r_paren) && Right.MatchingParen &&
2931 Right.MatchingParen->Previous &&
2932 isKeywordWithCondition(*Right.MatchingParen->Previous))
2933 return true;
2934 }
2935
2936 // requires ( or requires(
2937 if (Right.is(tok::l_paren) && Left.is(tok::kw_requires))
2938 return spaceRequiredBeforeParens(Right);
2939 // requires clause Concept1<T> && Concept2<T>
2940 if (Left.is(TT_ConstraintJunctions) && Right.is(tok::identifier))
2941 return true;
2942
2943 if (Left.is(tok::l_paren) || Right.is(tok::r_paren))
2944 return (Right.is(TT_CastRParen) ||
2945 (Left.MatchingParen && Left.MatchingParen->is(TT_CastRParen)))
2946 ? Style.SpacesInCStyleCastParentheses
2947 : Style.SpacesInParentheses;
2948 if (Right.isOneOf(tok::semi, tok::comma))
2949 return false;
2950 if (Right.is(tok::less) && Line.Type == LT_ObjCDecl) {
2951 bool IsLightweightGeneric = Right.MatchingParen &&
2952 Right.MatchingParen->Next &&
2953 Right.MatchingParen->Next->is(tok::colon);
2954 return !IsLightweightGeneric && Style.ObjCSpaceBeforeProtocolList;
2955 }
2956 if (Right.is(tok::less) && Left.is(tok::kw_template))
2957 return Style.SpaceAfterTemplateKeyword;
2958 if (Left.isOneOf(tok::exclaim, tok::tilde))
2959 return false;
2960 if (Left.is(tok::at) &&
2961 Right.isOneOf(tok::identifier, tok::string_literal, tok::char_constant,
2962 tok::numeric_constant, tok::l_paren, tok::l_brace,
2963 tok::kw_true, tok::kw_false))
2964 return false;
2965 if (Left.is(tok::colon))
2966 return !Left.is(TT_ObjCMethodExpr);
2967 if (Left.is(tok::coloncolon))
2968 return false;
2969 if (Left.is(tok::less) || Right.isOneOf(tok::greater, tok::less)) {
2970 if (Style.Language == FormatStyle::LK_TextProto ||
2971 (Style.Language == FormatStyle::LK_Proto &&
2972 (Left.is(TT_DictLiteral) || Right.is(TT_DictLiteral)))) {
2973 // Format empty list as `<>`.
2974 if (Left.is(tok::less) && Right.is(tok::greater))
2975 return false;
2976 return !Style.Cpp11BracedListStyle;
2977 }
2978 return false;
2979 }
2980 if (Right.is(tok::ellipsis))
2981 return Left.Tok.isLiteral() || (Left.is(tok::identifier) && Left.Previous &&
2982 Left.Previous->is(tok::kw_case));
2983 if (Left.is(tok::l_square) && Right.is(tok::amp))
2984 return Style.SpacesInSquareBrackets;
2985 if (Right.is(TT_PointerOrReference)) {
2986 if (Left.is(tok::r_paren) && Line.MightBeFunctionDecl) {
2987 if (!Left.MatchingParen)
2988 return true;
2989 FormatToken *TokenBeforeMatchingParen =
2990 Left.MatchingParen->getPreviousNonComment();
2991 if (!TokenBeforeMatchingParen || !Left.is(TT_TypeDeclarationParen))
2992 return true;
2993 }
2994 // Add a space if the previous token is a pointer qualifer or the closing
2995 // parenthesis of __attribute__(()) expression and the style requires spaces
2996 // after pointer qualifiers.
2997 if ((Style.SpaceAroundPointerQualifiers == FormatStyle::SAPQ_After ||
2998 Style.SpaceAroundPointerQualifiers == FormatStyle::SAPQ_Both) &&
2999 (Left.is(TT_AttributeParen) || Left.canBePointerOrReferenceQualifier()))
3000 return true;
3001 return (
3002 Left.Tok.isLiteral() ||
3003 (!Left.isOneOf(TT_PointerOrReference, tok::l_paren) &&
3004 (getTokenPointerOrReferenceAlignment(Right) != FormatStyle::PAS_Left ||
3005 (Line.IsMultiVariableDeclStmt &&
3006 (Left.NestingLevel == 0 ||
3007 (Left.NestingLevel == 1 && Line.First->is(tok::kw_for)))))));
3008 }
3009 if (Right.is(TT_FunctionTypeLParen) && Left.isNot(tok::l_paren) &&
3010 (!Left.is(TT_PointerOrReference) ||
3011 (getTokenPointerOrReferenceAlignment(Left) != FormatStyle::PAS_Right &&
3012 !Line.IsMultiVariableDeclStmt)))
3013 return true;
3014 if (Left.is(TT_PointerOrReference)) {
3015 // Add a space if the next token is a pointer qualifer and the style
3016 // requires spaces before pointer qualifiers.
3017 if ((Style.SpaceAroundPointerQualifiers == FormatStyle::SAPQ_Before ||
3018 Style.SpaceAroundPointerQualifiers == FormatStyle::SAPQ_Both) &&
3019 Right.canBePointerOrReferenceQualifier())
3020 return true;
3021 return Right.Tok.isLiteral() || Right.is(TT_BlockComment) ||
3022 (Right.isOneOf(Keywords.kw_override, Keywords.kw_final) &&
3023 !Right.is(TT_StartOfName)) ||
3024 (Right.is(tok::l_brace) && Right.is(BK_Block)) ||
3025 (!Right.isOneOf(TT_PointerOrReference, TT_ArraySubscriptLSquare,
3026 tok::l_paren) &&
3027 (getTokenPointerOrReferenceAlignment(Left) !=
3028 FormatStyle::PAS_Right &&
3029 !Line.IsMultiVariableDeclStmt) &&
3030 Left.Previous &&
3031 !Left.Previous->isOneOf(tok::l_paren, tok::coloncolon,
3032 tok::l_square));
3033 }
3034 // Ensure right pointer alignement with ellipsis e.g. int *...P
3035 if (Left.is(tok::ellipsis) && Left.Previous &&
3036 Left.Previous->isOneOf(tok::star, tok::amp, tok::ampamp))
3037 return Style.PointerAlignment != FormatStyle::PAS_Right;
3038
3039 if (Right.is(tok::star) && Left.is(tok::l_paren))
3040 return false;
3041 if (Left.is(tok::star) && Right.isOneOf(tok::star, tok::amp, tok::ampamp))
3042 return false;
3043 if (Right.isOneOf(tok::star, tok::amp, tok::ampamp)) {
3044 const FormatToken *Previous = &Left;
3045 while (Previous && !Previous->is(tok::kw_operator)) {
3046 if (Previous->is(tok::identifier) || Previous->isSimpleTypeSpecifier()) {
3047 Previous = Previous->getPreviousNonComment();
3048 continue;
3049 }
3050 if (Previous->is(TT_TemplateCloser) && Previous->MatchingParen) {
3051 Previous = Previous->MatchingParen->getPreviousNonComment();
3052 continue;
3053 }
3054 if (Previous->is(tok::coloncolon)) {
3055 Previous = Previous->getPreviousNonComment();
3056 continue;
3057 }
3058 break;
3059 }
3060 // Space between the type and the * in:
3061 // operator void*()
3062 // operator char*()
3063 // operator void const*()
3064 // operator void volatile*()
3065 // operator /*comment*/ const char*()
3066 // operator volatile /*comment*/ char*()
3067 // operator Foo*()
3068 // operator C<T>*()
3069 // operator std::Foo*()
3070 // operator C<T>::D<U>*()
3071 // dependent on PointerAlignment style.
3072 if (Previous) {
3073 if (Previous->endsSequence(tok::kw_operator))
3074 return (Style.PointerAlignment != FormatStyle::PAS_Left);
3075 if (Previous->is(tok::kw_const) || Previous->is(tok::kw_volatile))
3076 return (Style.PointerAlignment != FormatStyle::PAS_Left) ||
3077 (Style.SpaceAroundPointerQualifiers ==
3078 FormatStyle::SAPQ_After) ||
3079 (Style.SpaceAroundPointerQualifiers == FormatStyle::SAPQ_Both);
3080 }
3081 }
3082 const auto SpaceRequiredForArrayInitializerLSquare =
3083 [](const FormatToken &LSquareTok, const FormatStyle &Style) {
3084 return Style.SpacesInContainerLiterals ||
3085 ((Style.Language == FormatStyle::LK_Proto ||
3086 Style.Language == FormatStyle::LK_TextProto) &&
3087 !Style.Cpp11BracedListStyle &&
3088 LSquareTok.endsSequence(tok::l_square, tok::colon,
3089 TT_SelectorName));
3090 };
3091 if (Left.is(tok::l_square))
3092 return (Left.is(TT_ArrayInitializerLSquare) && Right.isNot(tok::r_square) &&
3093 SpaceRequiredForArrayInitializerLSquare(Left, Style)) ||
3094 (Left.isOneOf(TT_ArraySubscriptLSquare, TT_StructuredBindingLSquare,
3095 TT_LambdaLSquare) &&
3096 Style.SpacesInSquareBrackets && Right.isNot(tok::r_square));
3097 if (Right.is(tok::r_square))
3098 return Right.MatchingParen &&
3099 ((Right.MatchingParen->is(TT_ArrayInitializerLSquare) &&
3100 SpaceRequiredForArrayInitializerLSquare(*Right.MatchingParen,
3101 Style)) ||
3102 (Style.SpacesInSquareBrackets &&
3103 Right.MatchingParen->isOneOf(TT_ArraySubscriptLSquare,
3104 TT_StructuredBindingLSquare,
3105 TT_LambdaLSquare)) ||
3106 Right.MatchingParen->is(TT_AttributeParen));
3107 if (Right.is(tok::l_square) &&
3108 !Right.isOneOf(TT_ObjCMethodExpr, TT_LambdaLSquare,
3109 TT_DesignatedInitializerLSquare,
3110 TT_StructuredBindingLSquare, TT_AttributeSquare) &&
3111 !Left.isOneOf(tok::numeric_constant, TT_DictLiteral) &&
3112 !(!Left.is(tok::r_square) && Style.SpaceBeforeSquareBrackets &&
3113 Right.is(TT_ArraySubscriptLSquare)))
3114 return false;
3115 if (Left.is(tok::l_brace) && Right.is(tok::r_brace))
3116 return !Left.Children.empty(); // No spaces in "{}".
3117 if ((Left.is(tok::l_brace) && Left.isNot(BK_Block)) ||
3118 (Right.is(tok::r_brace) && Right.MatchingParen &&
3119 Right.MatchingParen->isNot(BK_Block)))
3120 return Style.Cpp11BracedListStyle ? Style.SpacesInParentheses : true;
3121 if (Left.is(TT_BlockComment))
3122 // No whitespace in x(/*foo=*/1), except for JavaScript.
3123 return Style.Language == FormatStyle::LK_JavaScript ||
3124 !Left.TokenText.endswith("=*/");
3125
3126 // Space between template and attribute.
3127 // e.g. template <typename T> [[nodiscard]] ...
3128 if (Left.is(TT_TemplateCloser) && Right.is(TT_AttributeSquare))
3129 return true;
3130 if (Right.is(tok::l_paren)) {
3131 if ((Left.is(tok::r_paren) && Left.is(TT_AttributeParen)) ||
3132 (Left.is(tok::r_square) && Left.is(TT_AttributeSquare)))
3133 return true;
3134 if (Style.SpaceBeforeParens ==
3135 FormatStyle::SBPO_ControlStatementsExceptControlMacros &&
3136 Left.is(TT_ForEachMacro))
3137 return false;
3138 if (Style.SpaceBeforeParens ==
3139 FormatStyle::SBPO_ControlStatementsExceptControlMacros &&
3140 Left.is(TT_IfMacro))
3141 return false;
3142 return Line.Type == LT_ObjCDecl || Left.is(tok::semi) ||
3143 (Style.SpaceBeforeParens != FormatStyle::SBPO_Never &&
3144 (Left.isOneOf(tok::pp_elif, tok::kw_for, tok::kw_while,
3145 tok::kw_switch, tok::kw_case, TT_ForEachMacro,
3146 TT_ObjCForIn) ||
3147 Left.isIf(Line.Type != LT_PreprocessorDirective) ||
3148 (Left.isOneOf(tok::kw_try, Keywords.kw___except, tok::kw_catch,
3149 tok::kw_new, tok::kw_delete) &&
3150 (!Left.Previous || Left.Previous->isNot(tok::period))))) ||
3151 (spaceRequiredBeforeParens(Right) &&
3152 (Left.is(tok::identifier) || Left.isFunctionLikeKeyword() ||
3153 Left.is(tok::r_paren) || Left.isSimpleTypeSpecifier() ||
3154 (Left.is(tok::r_square) && Left.MatchingParen &&
3155 Left.MatchingParen->is(TT_LambdaLSquare))) &&
3156 Line.Type != LT_PreprocessorDirective);
3157 }
3158 if (Left.is(tok::at) && Right.Tok.getObjCKeywordID() != tok::objc_not_keyword)
3159 return false;
3160 if (Right.is(TT_UnaryOperator))
3161 return !Left.isOneOf(tok::l_paren, tok::l_square, tok::at) &&
3162 (Left.isNot(tok::colon) || Left.isNot(TT_ObjCMethodExpr));
3163 if ((Left.isOneOf(tok::identifier, tok::greater, tok::r_square,
3164 tok::r_paren) ||
3165 Left.isSimpleTypeSpecifier()) &&
3166 Right.is(tok::l_brace) && Right.getNextNonComment() &&
3167 Right.isNot(BK_Block))
3168 return false;
3169 if (Left.is(tok::period) || Right.is(tok::period))
3170 return false;
3171 if (Right.is(tok::hash) && Left.is(tok::identifier) && Left.TokenText == "L")
3172 return false;
3173 if (Left.is(TT_TemplateCloser) && Left.MatchingParen &&
3174 Left.MatchingParen->Previous &&
3175 (Left.MatchingParen->Previous->is(tok::period) ||
3176 Left.MatchingParen->Previous->is(tok::coloncolon)))
3177 // Java call to generic function with explicit type:
3178 // A.<B<C<...>>>DoSomething();
3179 // A::<B<C<...>>>DoSomething(); // With a Java 8 method reference.
3180 return false;
3181 if (Left.is(TT_TemplateCloser) && Right.is(tok::l_square))
3182 return false;
3183 if (Left.is(tok::l_brace) && Left.endsSequence(TT_DictLiteral, tok::at))
3184 // Objective-C dictionary literal -> no space after opening brace.
3185 return false;
3186 if (Right.is(tok::r_brace) && Right.MatchingParen &&
3187 Right.MatchingParen->endsSequence(TT_DictLiteral, tok::at))
3188 // Objective-C dictionary literal -> no space before closing brace.
3189 return false;
3190 if (Right.getType() == TT_TrailingAnnotation &&
3191 Right.isOneOf(tok::amp, tok::ampamp) &&
3192 Left.isOneOf(tok::kw_const, tok::kw_volatile) &&
3193 (!Right.Next || Right.Next->is(tok::semi)))
3194 // Match const and volatile ref-qualifiers without any additional
3195 // qualifiers such as
3196 // void Fn() const &;
3197 return getTokenReferenceAlignment(Right) != FormatStyle::PAS_Left;
3198 return true;
3199}
3200
3201bool TokenAnnotator::spaceRequiredBefore(const AnnotatedLine &Line,
3202 const FormatToken &Right) {
3203 const FormatToken &Left = *Right.Previous;
3204 auto HasExistingWhitespace = [&Right]() {
3205 return Right.WhitespaceRange.getBegin() != Right.WhitespaceRange.getEnd();
3206 };
3207 if (Right.Tok.getIdentifierInfo() && Left.Tok.getIdentifierInfo())
3208 return true; // Never ever merge two identifiers.
3209 if (Style.isCpp()) {
3210 if (Left.is(tok::kw_operator))
3211 return Right.is(tok::coloncolon);
3212 if (Right.is(tok::l_brace) && Right.is(BK_BracedInit) &&
3213 !Left.opensScope() && Style.SpaceBeforeCpp11BracedList)
3214 return true;
3215 } else if (Style.Language == FormatStyle::LK_Proto ||
3216 Style.Language == FormatStyle::LK_TextProto) {
3217 if (Right.is(tok::period) &&
3218 Left.isOneOf(Keywords.kw_optional, Keywords.kw_required,
3219 Keywords.kw_repeated, Keywords.kw_extend))
3220 return true;
3221 if (Right.is(tok::l_paren) &&
3222 Left.isOneOf(Keywords.kw_returns, Keywords.kw_option))
3223 return true;
3224 if (Right.isOneOf(tok::l_brace, tok::less) && Left.is(TT_SelectorName))
3225 return true;
3226 // Slashes occur in text protocol extension syntax: [type/type] { ... }.
3227 if (Left.is(tok::slash) || Right.is(tok::slash))
3228 return false;
3229 if (Left.MatchingParen &&
3230 Left.MatchingParen->is(TT_ProtoExtensionLSquare) &&
3231 Right.isOneOf(tok::l_brace, tok::less))
3232 return !Style.Cpp11BracedListStyle;
3233 // A percent is probably part of a formatting specification, such as %lld.
3234 if (Left.is(tok::percent))
3235 return false;
3236 // Preserve the existence of a space before a percent for cases like 0x%04x
3237 // and "%d %d"
3238 if (Left.is(tok::numeric_constant) && Right.is(tok::percent))
3239 return HasExistingWhitespace();
3240 } else if (Style.isJson()) {
3241 if (Right.is(tok::colon))
3242 return false;
3243 } else if (Style.isCSharp()) {
3244 // Require spaces around '{' and before '}' unless they appear in
3245 // interpolated strings. Interpolated strings are merged into a single token
3246 // so cannot have spaces inserted by this function.
3247
3248 // No space between 'this' and '['
3249 if (Left.is(tok::kw_this) && Right.is(tok::l_square))
3250 return false;
3251
3252 // No space between 'new' and '('
3253 if (Left.is(tok::kw_new) && Right.is(tok::l_paren))
3254 return false;
3255
3256 // Space before { (including space within '{ {').
3257 if (Right.is(tok::l_brace))
3258 return true;
3259
3260 // Spaces inside braces.
3261 if (Left.is(tok::l_brace) && Right.isNot(tok::r_brace))
3262 return true;
3263
3264 if (Left.isNot(tok::l_brace) && Right.is(tok::r_brace))
3265 return true;
3266
3267 // Spaces around '=>'.
3268 if (Left.is(TT_FatArrow) || Right.is(TT_FatArrow))
3269 return true;
3270
3271 // No spaces around attribute target colons
3272 if (Left.is(TT_AttributeColon) || Right.is(TT_AttributeColon))
3273 return false;
3274
3275 // space between type and variable e.g. Dictionary<string,string> foo;
3276 if (Left.is(TT_TemplateCloser) && Right.is(TT_StartOfName))
3277 return true;
3278
3279 // spaces inside square brackets.
3280 if (Left.is(tok::l_square) || Right.is(tok::r_square))
3281 return Style.SpacesInSquareBrackets;
3282
3283 // No space before ? in nullable types.
3284 if (Right.is(TT_CSharpNullable))
3285 return false;
3286
3287 // No space before null forgiving '!'.
3288 if (Right.is(TT_NonNullAssertion))
3289 return false;
3290
3291 // No space between consecutive commas '[,,]'.
3292 if (Left.is(tok::comma) && Right.is(tok::comma))
3293 return false;
3294
3295 // space after var in `var (key, value)`
3296 if (Left.is(Keywords.kw_var) && Right.is(tok::l_paren))
3297 return true;
3298
3299 // space between keywords and paren e.g. "using ("
3300 if (Right.is(tok::l_paren))
3301 if (Left.isOneOf(tok::kw_using, Keywords.kw_async, Keywords.kw_when,
3302 Keywords.kw_lock))
3303 return Style.SpaceBeforeParens == FormatStyle::SBPO_ControlStatements ||
3304 spaceRequiredBeforeParens(Right);
3305
3306 // space between method modifier and opening parenthesis of a tuple return
3307 // type
3308 if (Left.isOneOf(tok::kw_public, tok::kw_private, tok::kw_protected,
3309 tok::kw_virtual, tok::kw_extern, tok::kw_static,
3310 Keywords.kw_internal, Keywords.kw_abstract,
3311 Keywords.kw_sealed, Keywords.kw_override,
3312 Keywords.kw_async, Keywords.kw_unsafe) &&
3313 Right.is(tok::l_paren))
3314 return true;
3315 } else if (Style.Language == FormatStyle::LK_JavaScript) {
3316 if (Left.is(TT_FatArrow))
3317 return true;
3318 // for await ( ...
3319 if (Right.is(tok::l_paren) && Left.is(Keywords.kw_await) && Left.Previous &&
3320 Left.Previous->is(tok::kw_for))
3321 return true;
3322 if (Left.is(Keywords.kw_async) && Right.is(tok::l_paren) &&
3323 Right.MatchingParen) {
3324 const FormatToken *Next = Right.MatchingParen->getNextNonComment();
3325 // An async arrow function, for example: `x = async () => foo();`,
3326 // as opposed to calling a function called async: `x = async();`
3327 if (Next && Next->is(TT_FatArrow))
3328 return true;
3329 }
3330 if ((Left.is(TT_TemplateString) && Left.TokenText.endswith("${")) ||
3331 (Right.is(TT_TemplateString) && Right.TokenText.startswith("}")))
3332 return false;
3333 // In tagged template literals ("html`bar baz`"), there is no space between
3334 // the tag identifier and the template string.
3335 if (Keywords.IsJavaScriptIdentifier(Left,
3336 /* AcceptIdentifierName= */ false) &&
3337 Right.is(TT_TemplateString))
3338 return false;
3339 if (Right.is(tok::star) &&
3340 Left.isOneOf(Keywords.kw_function, Keywords.kw_yield))
3341 return false;
3342 if (Right.isOneOf(tok::l_brace, tok::l_square) &&
3343 Left.isOneOf(Keywords.kw_function, Keywords.kw_yield,
3344 Keywords.kw_extends, Keywords.kw_implements))
3345 return true;
3346 if (Right.is(tok::l_paren)) {
3347 // JS methods can use some keywords as names (e.g. `delete()`).
3348 if (Line.MustBeDeclaration && Left.Tok.getIdentifierInfo())
3349 return false;
3350 // Valid JS method names can include keywords, e.g. `foo.delete()` or
3351 // `bar.instanceof()`. Recognize call positions by preceding period.
3352 if (Left.Previous && Left.Previous->is(tok::period) &&
3353 Left.Tok.getIdentifierInfo())
3354 return false;
3355 // Additional unary JavaScript operators that need a space after.
3356 if (Left.isOneOf(tok::kw_throw, Keywords.kw_await, Keywords.kw_typeof,
3357 tok::kw_void))
3358 return true;
3359 }
3360 // `foo as const;` casts into a const type.
3361 if (Left.endsSequence(tok::kw_const, Keywords.kw_as)) {
3362 return false;
3363 }
3364 if ((Left.isOneOf(Keywords.kw_let, Keywords.kw_var, Keywords.kw_in,
3365 tok::kw_const) ||
3366 // "of" is only a keyword if it appears after another identifier
3367 // (e.g. as "const x of y" in a for loop), or after a destructuring
3368 // operation (const [x, y] of z, const {a, b} of c).
3369 (Left.is(Keywords.kw_of) && Left.Previous &&
3370 (Left.Previous->Tok.is(tok::identifier) ||
3371 Left.Previous->isOneOf(tok::r_square, tok::r_brace)))) &&
3372 (!Left.Previous || !Left.Previous->is(tok::period)))
3373 return true;
3374 if (Left.isOneOf(tok::kw_for, Keywords.kw_as) && Left.Previous &&
3375 Left.Previous->is(tok::period) && Right.is(tok::l_paren))
3376 return false;
3377 if (Left.is(Keywords.kw_as) &&
3378 Right.isOneOf(tok::l_square, tok::l_brace, tok::l_paren))
3379 return true;
3380 if (Left.is(tok::kw_default) && Left.Previous &&
3381 Left.Previous->is(tok::kw_export))
3382 return true;
3383 if (Left.is(Keywords.kw_is) && Right.is(tok::l_brace))
3384 return true;
3385 if (Right.isOneOf(TT_JsTypeColon, TT_JsTypeOptionalQuestion))
3386 return false;
3387 if (Left.is(TT_JsTypeOperator) || Right.is(TT_JsTypeOperator))
3388 return false;
3389 if ((Left.is(tok::l_brace) || Right.is(tok::r_brace)) &&
3390 Line.First->isOneOf(Keywords.kw_import, tok::kw_export))
3391 return false;
3392 if (Left.is(tok::ellipsis))
3393 return false;
3394 if (Left.is(TT_TemplateCloser) &&
3395 !Right.isOneOf(tok::equal, tok::l_brace, tok::comma, tok::l_square,
3396 Keywords.kw_implements, Keywords.kw_extends))
3397 // Type assertions ('<type>expr') are not followed by whitespace. Other
3398 // locations that should have whitespace following are identified by the
3399 // above set of follower tokens.
3400 return false;
3401 if (Right.is(TT_NonNullAssertion))
3402 return false;
3403 if (Left.is(TT_NonNullAssertion) &&
3404 Right.isOneOf(Keywords.kw_as, Keywords.kw_in))
3405 return true; // "x! as string", "x! in y"
3406 } else if (Style.Language == FormatStyle::LK_Java) {
3407 if (Left.is(tok::r_square) && Right.is(tok::l_brace))
3408 return true;
3409 if (Left.is(Keywords.kw_synchronized) && Right.is(tok::l_paren))
3410 return Style.SpaceBeforeParens != FormatStyle::SBPO_Never;
3411 if ((Left.isOneOf(tok::kw_static, tok::kw_public, tok::kw_private,
3412 tok::kw_protected) ||
3413 Left.isOneOf(Keywords.kw_final, Keywords.kw_abstract,
3414 Keywords.kw_native)) &&
3415 Right.is(TT_TemplateOpener))
3416 return true;
3417 }
3418 if (Left.is(TT_ImplicitStringLiteral))
3419 return HasExistingWhitespace();
3420 if (Line.Type == LT_ObjCMethodDecl) {
3421 if (Left.is(TT_ObjCMethodSpecifier))
3422 return true;
3423 if (Left.is(tok::r_paren) && canBeObjCSelectorComponent(Right))
3424 // Don't space between ')' and <id> or ')' and 'new'. 'new' is not a
3425 // keyword in Objective-C, and '+ (instancetype)new;' is a standard class
3426 // method declaration.
3427 return false;
3428 }
3429 if (Line.Type == LT_ObjCProperty &&
3430 (Right.is(tok::equal) || Left.is(tok::equal)))
3431 return false;
3432
3433 if (Right.isOneOf(TT_TrailingReturnArrow, TT_LambdaArrow) ||
3434 Left.isOneOf(TT_TrailingReturnArrow, TT_LambdaArrow))
3435 return true;
3436 if (Right.is(TT_OverloadedOperatorLParen))
3437 return spaceRequiredBeforeParens(Right);
3438 if (Left.is(tok::comma))
3439 return true;
3440 if (Right.is(tok::comma))
3441 return false;
3442 if (Right.is(TT_ObjCBlockLParen))
3443 return true;
3444 if (Right.is(TT_CtorInitializerColon))
3445 return Style.SpaceBeforeCtorInitializerColon;
3446 if (Right.is(TT_InheritanceColon) && !Style.SpaceBeforeInheritanceColon)
3447 return false;
3448 if (Right.is(TT_RangeBasedForLoopColon) &&
3449 !Style.SpaceBeforeRangeBasedForLoopColon)
3450 return false;
3451 if (Left.is(TT_BitFieldColon))
3452 return Style.BitFieldColonSpacing == FormatStyle::BFCS_Both ||
3453 Style.BitFieldColonSpacing == FormatStyle::BFCS_After;
3454 if (Right.is(tok::colon)) {
3455 if (Line.First->isOneOf(tok::kw_default, tok::kw_case))
3456 return Style.SpaceBeforeCaseColon;
3457 if (!Right.getNextNonComment() || Right.getNextNonComment()->is(tok::semi))
3458 return false;
3459 if (Right.is(TT_ObjCMethodExpr))
3460 return false;
3461 if (Left.is(tok::question))
3462 return false;
3463 if (Right.is(TT_InlineASMColon) && Left.is(tok::coloncolon))
3464 return false;
3465 if (Right.is(TT_DictLiteral))
3466 return Style.SpacesInContainerLiterals;
3467 if (Right.is(TT_AttributeColon))
3468 return false;
3469 if (Right.is(TT_CSharpNamedArgumentColon))
3470 return false;
3471 if (Right.is(TT_BitFieldColon))
3472 return Style.BitFieldColonSpacing == FormatStyle::BFCS_Both ||
3473 Style.BitFieldColonSpacing == FormatStyle::BFCS_Before;
3474 return true;
3475 }
3476 // Do not merge "- -" into "--".
3477 if ((Left.isOneOf(tok::minus, tok::minusminus) &&
3478 Right.isOneOf(tok::minus, tok::minusminus)) ||
3479 (Left.isOneOf(tok::plus, tok::plusplus) &&
3480 Right.isOneOf(tok::plus, tok::plusplus)))
3481 return true;
3482 if (Left.is(TT_UnaryOperator)) {
3483 if (!Right.is(tok::l_paren)) {
3484 // The alternative operators for ~ and ! are "compl" and "not".
3485 // If they are used instead, we do not want to combine them with
3486 // the token to the right, unless that is a left paren.
3487 if (Left.is(tok::exclaim) && Left.TokenText == "not")
3488 return true;
3489 if (Left.is(tok::tilde) && Left.TokenText == "compl")
3490 return true;
3491 // Lambda captures allow for a lone &, so "&]" needs to be properly
3492 // handled.
3493 if (Left.is(tok::amp) && Right.is(tok::r_square))
3494 return Style.SpacesInSquareBrackets;
3495 }
3496 return (Style.SpaceAfterLogicalNot && Left.is(tok::exclaim)) ||
3497 Right.is(TT_BinaryOperator);
3498 }
3499
3500 // If the next token is a binary operator or a selector name, we have
3501 // incorrectly classified the parenthesis as a cast. FIXME: Detect correctly.
3502 if (Left.is(TT_CastRParen))
3503 return Style.SpaceAfterCStyleCast ||
3504 Right.isOneOf(TT_BinaryOperator, TT_SelectorName);
3505
3506 auto ShouldAddSpacesInAngles = [this, &HasExistingWhitespace]() {
3507 if (this->Style.SpacesInAngles == FormatStyle::SIAS_Always)
3508 return true;
3509 if (this->Style.SpacesInAngles == FormatStyle::SIAS_Leave)
3510 return HasExistingWhitespace();
3511 return false;
3512 };
3513
3514 if (Left.is(tok::greater) && Right.is(tok::greater)) {
3515 if (Style.Language == FormatStyle::LK_TextProto ||
3516 (Style.Language == FormatStyle::LK_Proto && Left.is(TT_DictLiteral)))
3517 return !Style.Cpp11BracedListStyle;
3518 return Right.is(TT_TemplateCloser) && Left.is(TT_TemplateCloser) &&
3519 ((Style.Standard < FormatStyle::LS_Cpp11) ||
3520 ShouldAddSpacesInAngles());
3521 }
3522 if (Right.isOneOf(tok::arrow, tok::arrowstar, tok::periodstar) ||
3523 Left.isOneOf(tok::arrow, tok::period, tok::arrowstar, tok::periodstar) ||
3524 (Right.is(tok::period) && Right.isNot(TT_DesignatedInitializerPeriod)))
3525 return false;
3526 if (!Style.SpaceBeforeAssignmentOperators && Left.isNot(TT_TemplateCloser) &&
3527 Right.getPrecedence() == prec::Assignment)
3528 return false;
3529 if (Style.Language == FormatStyle::LK_Java && Right.is(tok::coloncolon) &&
3530 (Left.is(tok::identifier) || Left.is(tok::kw_this)))
3531 return false;
3532 if (Right.is(tok::coloncolon) && Left.is(tok::identifier))
3533 // Generally don't remove existing spaces between an identifier and "::".
3534 // The identifier might actually be a macro name such as ALWAYS_INLINE. If
3535 // this turns out to be too lenient, add analysis of the identifier itself.
3536 return HasExistingWhitespace();
3537 if (Right.is(tok::coloncolon) &&
3538 !Left.isOneOf(tok::l_brace, tok::comment, tok::l_paren))
3539 // Put a space between < and :: in vector< ::std::string >
3540 return (Left.is(TT_TemplateOpener) &&
3541 ((Style.Standard < FormatStyle::LS_Cpp11) ||
3542 ShouldAddSpacesInAngles())) ||
3543 !(Left.isOneOf(tok::l_paren, tok::r_paren, tok::l_square,
3544 tok::kw___super, TT_TemplateOpener,
3545 TT_TemplateCloser)) ||
3546 (Left.is(tok::l_paren) && Style.SpacesInParentheses);
3547 if ((Left.is(TT_TemplateOpener)) != (Right.is(TT_TemplateCloser)))
3548 return ShouldAddSpacesInAngles();
3549 // Space before TT_StructuredBindingLSquare.
3550 if (Right.is(TT_StructuredBindingLSquare))
3551 return !Left.isOneOf(tok::amp, tok::ampamp) ||
3552 getTokenReferenceAlignment(Left) != FormatStyle::PAS_Right;
3553 // Space before & or && following a TT_StructuredBindingLSquare.
3554 if (Right.Next && Right.Next->is(TT_StructuredBindingLSquare) &&
3555 Right.isOneOf(tok::amp, tok::ampamp))
3556 return getTokenReferenceAlignment(Right) != FormatStyle::PAS_Left;
3557 if ((Right.is(TT_BinaryOperator) && !Left.is(tok::l_paren)) ||
3558 (Left.isOneOf(TT_BinaryOperator, TT_ConditionalExpr) &&
3559 !Right.is(tok::r_paren)))
3560 return true;
3561 if (Left.is(TT_TemplateCloser) && Right.is(tok::l_paren) &&
3562 Right.isNot(TT_FunctionTypeLParen))
3563 return spaceRequiredBeforeParens(Right);
3564 if (Right.is(TT_TemplateOpener) && Left.is(tok::r_paren) &&
3565 Left.MatchingParen && Left.MatchingParen->is(TT_OverloadedOperatorLParen))
3566 return false;
3567 if (Right.is(tok::less) && Left.isNot(tok::l_paren) &&
3568 Line.startsWith(tok::hash))
3569 return true;
3570 if (Right.is(TT_TrailingUnaryOperator))
3571 return false;
3572 if (Left.is(TT_RegexLiteral))
3573 return false;
3574 return spaceRequiredBetween(Line, Left, Right);
3575}
3576
3577// Returns 'true' if 'Tok' is a brace we'd want to break before in Allman style.
3578static bool isAllmanBrace(const FormatToken &Tok) {
3579 return Tok.is(tok::l_brace) && Tok.is(BK_Block) &&
3580 !Tok.isOneOf(TT_ObjCBlockLBrace, TT_LambdaLBrace, TT_DictLiteral);
3581}
3582
3583// Returns 'true' if 'Tok' is an function argument.
3584static bool IsFunctionArgument(const FormatToken &Tok) {
3585 return Tok.MatchingParen && Tok.MatchingParen->Next &&
3586 Tok.MatchingParen->Next->isOneOf(tok::comma, tok::r_paren);
3587}
3588
3589static bool
3590isItAnEmptyLambdaAllowed(const FormatToken &Tok,
3591 FormatStyle::ShortLambdaStyle ShortLambdaOption) {
3592 return Tok.Children.empty() && ShortLambdaOption != FormatStyle::SLS_None;
3593}
3594
3595static bool isAllmanLambdaBrace(const FormatToken &Tok) {
3596 return (Tok.is(tok::l_brace) && Tok.is(BK_Block) &&
3597 !Tok.isOneOf(TT_ObjCBlockLBrace, TT_DictLiteral));
3598}
3599
3600// Returns the first token on the line that is not a comment.
3601static const FormatToken *getFirstNonComment(const AnnotatedLine &Line) {
3602 const FormatToken *Next = Line.First;
3603 if (!Next)
3604 return Next;
3605 if (Next->is(tok::comment))
3606 Next = Next->getNextNonComment();
3607 return Next;
3608}
3609
3610bool TokenAnnotator::mustBreakBefore(const AnnotatedLine &Line,
3611 const FormatToken &Right) {
3612 const FormatToken &Left = *Right.Previous;
3613 if (Right.NewlinesBefore > 1 && Style.MaxEmptyLinesToKeep > 0)
3614 return true;
3615
3616 if (Style.isCSharp()) {
3617 if (Right.is(TT_CSharpNamedArgumentColon) ||
3618 Left.is(TT_CSharpNamedArgumentColon))
3619 return false;
3620 if (Right.is(TT_CSharpGenericTypeConstraint))
3621 return true;
3622
3623 // Break after C# [...] and before public/protected/private/internal.
3624 if (Left.is(TT_AttributeSquare) && Left.is(tok::r_square) &&
3625 (Right.isAccessSpecifier(/*ColonRequired=*/false) ||
3626 Right.is(Keywords.kw_internal)))
3627 return true;
3628 // Break between ] and [ but only when there are really 2 attributes.
3629 if (Left.is(TT_AttributeSquare) && Right.is(TT_AttributeSquare) &&
3630 Left.is(tok::r_square) && Right.is(tok::l_square))
3631 return true;
3632
3633 } else if (Style.Language == FormatStyle::LK_JavaScript) {
3634 // FIXME: This might apply to other languages and token kinds.
3635 if (Right.is(tok::string_literal) && Left.is(tok::plus) && Left.Previous &&
3636 Left.Previous->is(tok::string_literal))
3637 return true;
3638 if (Left.is(TT_DictLiteral) && Left.is(tok::l_brace) && Line.Level == 0 &&
3639 Left.Previous && Left.Previous->is(tok::equal) &&
3640 Line.First->isOneOf(tok::identifier, Keywords.kw_import, tok::kw_export,
3641 tok::kw_const) &&
3642 // kw_var/kw_let are pseudo-tokens that are tok::identifier, so match
3643 // above.
3644 !Line.First->isOneOf(Keywords.kw_var, Keywords.kw_let))
3645 // Object literals on the top level of a file are treated as "enum-style".
3646 // Each key/value pair is put on a separate line, instead of bin-packing.
3647 return true;
3648 if (Left.is(tok::l_brace) && Line.Level == 0 &&
3649 (Line.startsWith(tok::kw_enum) ||
3650 Line.startsWith(tok::kw_const, tok::kw_enum) ||
3651 Line.startsWith(tok::kw_export, tok::kw_enum) ||
3652 Line.startsWith(tok::kw_export, tok::kw_const, tok::kw_enum)))
3653 // JavaScript top-level enum key/value pairs are put on separate lines
3654 // instead of bin-packing.
3655 return true;
3656 if (Right.is(tok::r_brace) && Left.is(tok::l_brace) && Left.Previous &&
3657 Left.Previous->is(TT_FatArrow)) {
3658 // JS arrow function (=> {...}).
3659 switch (Style.AllowShortLambdasOnASingleLine) {
3660 case FormatStyle::SLS_All:
3661 return false;
3662 case FormatStyle::SLS_None:
3663 return true;
3664 case FormatStyle::SLS_Empty:
3665 return !Left.Children.empty();
3666 case FormatStyle::SLS_Inline:
3667 // allow one-lining inline (e.g. in function call args) and empty arrow
3668 // functions.
3669 return (Left.NestingLevel == 0 && Line.Level == 0) &&
3670 !Left.Children.empty();
3671 }
3672 llvm_unreachable("Unknown FormatStyle::ShortLambdaStyle enum")__builtin_unreachable();
3673 }
3674
3675 if (Right.is(tok::r_brace) && Left.is(tok::l_brace) &&
3676 !Left.Children.empty())
3677 // Support AllowShortFunctionsOnASingleLine for JavaScript.
3678 return Style.AllowShortFunctionsOnASingleLine == FormatStyle::SFS_None ||
3679 Style.AllowShortFunctionsOnASingleLine == FormatStyle::SFS_Empty ||
3680 (Left.NestingLevel == 0 && Line.Level == 0 &&
3681 Style.AllowShortFunctionsOnASingleLine &
3682 FormatStyle::SFS_InlineOnly);
3683 } else if (Style.Language == FormatStyle::LK_Java) {
3684 if (Right.is(tok::plus) && Left.is(tok::string_literal) && Right.Next &&
3685 Right.Next->is(tok::string_literal))
3686 return true;
3687 } else if (Style.Language == FormatStyle::LK_Cpp ||
3688 Style.Language == FormatStyle::LK_ObjC ||
3689 Style.Language == FormatStyle::LK_Proto ||
3690 Style.Language == FormatStyle::LK_TableGen ||
3691 Style.Language == FormatStyle::LK_TextProto) {
3692 if (Left.isStringLiteral() && Right.isStringLiteral())
3693 return true;
3694 }
3695
3696 // Basic JSON newline processing.
3697 if (Style.isJson()) {
3698 // Always break after a JSON record opener.
3699 // {
3700 // }
3701 if (Left.is(TT_DictLiteral) && Left.is(tok::l_brace))
3702 return true;
3703 // Always break after a JSON array opener.
3704 // [
3705 // ]
3706 if (Left.is(TT_ArrayInitializerLSquare) && Left.is(tok::l_square) &&
3707 !Right.is(tok::r_square))
3708 return true;
3709 // Always break afer successive entries.
3710 // 1,
3711 // 2
3712 if (Left.is(tok::comma))
3713 return true;
3714 }
3715
3716 // If the last token before a '}', ']', or ')' is a comma or a trailing
3717 // comment, the intention is to insert a line break after it in order to make
3718 // shuffling around entries easier. Import statements, especially in
3719 // JavaScript, can be an exception to this rule.
3720 if (Style.JavaScriptWrapImports || Line.Type != LT_ImportStatement) {
3721 const FormatToken *BeforeClosingBrace = nullptr;
3722 if ((Left.isOneOf(tok::l_brace, TT_ArrayInitializerLSquare) ||
3723 (Style.Language == FormatStyle::LK_JavaScript &&
3724 Left.is(tok::l_paren))) &&
3725 Left.isNot(BK_Block) && Left.MatchingParen)
3726 BeforeClosingBrace = Left.MatchingParen->Previous;
3727 else if (Right.MatchingParen &&
3728 (Right.MatchingParen->isOneOf(tok::l_brace,
3729 TT_ArrayInitializerLSquare) ||
3730 (Style.Language == FormatStyle::LK_JavaScript &&
3731 Right.MatchingParen->is(tok::l_paren))))
3732 BeforeClosingBrace = &Left;
3733 if (BeforeClosingBrace && (BeforeClosingBrace->is(tok::comma) ||
3734 BeforeClosingBrace->isTrailingComment()))
3735 return true;
3736 }
3737
3738 if (Right.is(tok::comment))
3739 return Left.isNot(BK_BracedInit) && Left.isNot(TT_CtorInitializerColon) &&
3740 (Right.NewlinesBefore > 0 && Right.HasUnescapedNewline);
3741 if (Left.isTrailingComment())
3742 return true;
3743 if (Right.Previous->IsUnterminatedLiteral)
3744 return true;
3745 if (Right.is(tok::lessless) && Right.Next &&
3746 Right.Previous->is(tok::string_literal) &&
3747 Right.Next->is(tok::string_literal))
3748 return true;
3749 // Can break after template<> declaration
3750 if (Right.Previous->ClosesTemplateDeclaration &&
3751 Right.Previous->MatchingParen &&
3752 Right.Previous->MatchingParen->NestingLevel == 0) {
3753 // Put concepts on the next line e.g.
3754 // template<typename T>
3755 // concept ...
3756 if (Right.is(tok::kw_concept))
3757 return Style.BreakBeforeConceptDeclarations;
3758 return (Style.AlwaysBreakTemplateDeclarations == FormatStyle::BTDS_Yes);
3759 }
3760 if (Style.PackConstructorInitializers == FormatStyle::PCIS_Never) {
3761 if (Style.BreakConstructorInitializers == FormatStyle::BCIS_BeforeColon &&
3762 (Left.is(TT_CtorInitializerComma) || Right.is(TT_CtorInitializerColon)))
3763 return true;
3764
3765 if (Style.BreakConstructorInitializers == FormatStyle::BCIS_AfterColon &&
3766 Left.isOneOf(TT_CtorInitializerColon, TT_CtorInitializerComma))
3767 return true;
3768 }
3769 if (Style.PackConstructorInitializers < FormatStyle::PCIS_CurrentLine &&
3770 Style.BreakConstructorInitializers == FormatStyle::BCIS_BeforeComma &&
3771 Right.isOneOf(TT_CtorInitializerComma, TT_CtorInitializerColon))
3772 return true;
3773 // Break only if we have multiple inheritance.
3774 if (Style.BreakInheritanceList == FormatStyle::BILS_BeforeComma &&
3775 Right.is(TT_InheritanceComma))
3776 return true;
3777 if (Style.BreakInheritanceList == FormatStyle::BILS_AfterComma &&
3778 Left.is(TT_InheritanceComma))
3779 return true;
3780 if (Right.is(tok::string_literal) && Right.TokenText.startswith("R\""))
3781 // Multiline raw string literals are special wrt. line breaks. The author
3782 // has made a deliberate choice and might have aligned the contents of the
3783 // string literal accordingly. Thus, we try keep existing line breaks.
3784 return Right.IsMultiline && Right.NewlinesBefore > 0;
3785 if ((Right.Previous->is(tok::l_brace) ||
3786 (Right.Previous->is(tok::less) && Right.Previous->Previous &&
3787 Right.Previous->Previous->is(tok::equal))) &&
3788 Right.NestingLevel == 1 && Style.Language == FormatStyle::LK_Proto) {
3789 // Don't put enums or option definitions onto single lines in protocol
3790 // buffers.
3791 return true;
3792 }
3793 if (Right.is(TT_InlineASMBrace))
3794 return Right.HasUnescapedNewline;
3795
3796 if (isAllmanBrace(Left) || isAllmanBrace(Right)) {
3797 auto FirstNonComment = getFirstNonComment(Line);
3798 bool AccessSpecifier =
3799 FirstNonComment &&
3800 FirstNonComment->isOneOf(Keywords.kw_internal, tok::kw_public,
3801 tok::kw_private, tok::kw_protected);
3802
3803 if (Style.BraceWrapping.AfterEnum) {
3804 if (Line.startsWith(tok::kw_enum) ||
3805 Line.startsWith(tok::kw_typedef, tok::kw_enum))
3806 return true;
3807 // Ensure BraceWrapping for `public enum A {`.
3808 if (AccessSpecifier && FirstNonComment->Next &&
3809 FirstNonComment->Next->is(tok::kw_enum))
3810 return true;
3811 }
3812
3813 // Ensure BraceWrapping for `public interface A {`.
3814 if (Style.BraceWrapping.AfterClass &&
3815 ((AccessSpecifier && FirstNonComment->Next &&
3816 FirstNonComment->Next->is(Keywords.kw_interface)) ||
3817 Line.startsWith(Keywords.kw_interface)))
3818 return true;
3819
3820 return (Line.startsWith(tok::kw_class) && Style.BraceWrapping.AfterClass) ||
3821 (Line.startsWith(tok::kw_struct) && Style.BraceWrapping.AfterStruct);
3822 }
3823
3824 if (Left.is(TT_ObjCBlockLBrace) &&
3825 Style.AllowShortBlocksOnASingleLine == FormatStyle::SBS_Never)
3826 return true;
3827
3828 if (Left.is(TT_LambdaLBrace)) {
3829 if (IsFunctionArgument(Left) &&
3830 Style.AllowShortLambdasOnASingleLine == FormatStyle::SLS_Inline)
3831 return false;
3832
3833 if (Style.AllowShortLambdasOnASingleLine == FormatStyle::SLS_None ||
3834 Style.AllowShortLambdasOnASingleLine == FormatStyle::SLS_Inline ||
3835 (!Left.Children.empty() &&
3836 Style.AllowShortLambdasOnASingleLine == FormatStyle::SLS_Empty))
3837 return true;
3838 }
3839
3840 if (Style.BraceWrapping.BeforeLambdaBody && Right.is(TT_LambdaLBrace) &&
3841 Left.isOneOf(tok::star, tok::amp, tok::ampamp, TT_TemplateCloser)) {
3842 return true;
3843 }
3844
3845 // Put multiple Java annotation on a new line.
3846 if ((Style.Language == FormatStyle::LK_Java ||
3847 Style.Language == FormatStyle::LK_JavaScript) &&
3848 Left.is(TT_LeadingJavaAnnotation) &&
3849 Right.isNot(TT_LeadingJavaAnnotation) && Right.isNot(tok::l_paren) &&
3850 (Line.Last->is(tok::l_brace) || Style.BreakAfterJavaFieldAnnotations))
3851 return true;
3852
3853 if (Right.is(TT_ProtoExtensionLSquare))
3854 return true;
3855
3856 // In text proto instances if a submessage contains at least 2 entries and at
3857 // least one of them is a submessage, like A { ... B { ... } ... },
3858 // put all of the entries of A on separate lines by forcing the selector of
3859 // the submessage B to be put on a newline.
3860 //
3861 // Example: these can stay on one line:
3862 // a { scalar_1: 1 scalar_2: 2 }
3863 // a { b { key: value } }
3864 //
3865 // and these entries need to be on a new line even if putting them all in one
3866 // line is under the column limit:
3867 // a {
3868 // scalar: 1
3869 // b { key: value }
3870 // }
3871 //
3872 // We enforce this by breaking before a submessage field that has previous
3873 // siblings, *and* breaking before a field that follows a submessage field.
3874 //
3875 // Be careful to exclude the case [proto.ext] { ... } since the `]` is
3876 // the TT_SelectorName there, but we don't want to break inside the brackets.
3877 //
3878 // Another edge case is @submessage { key: value }, which is a common
3879 // substitution placeholder. In this case we want to keep `@` and `submessage`
3880 // together.
3881 //
3882 // We ensure elsewhere that extensions are always on their own line.
3883 if ((Style.Language == FormatStyle::LK_Proto ||
3884 Style.Language == FormatStyle::LK_TextProto) &&
3885 Right.is(TT_SelectorName) && !Right.is(tok::r_square) && Right.Next) {
3886 // Keep `@submessage` together in:
3887 // @submessage { key: value }
3888 if (Right.Previous && Right.Previous->is(tok::at))
3889 return false;
3890 // Look for the scope opener after selector in cases like:
3891 // selector { ...
3892 // selector: { ...
3893 // selector: @base { ...
3894 FormatToken *LBrace = Right.Next;
3895 if (LBrace && LBrace->is(tok::colon)) {
3896 LBrace = LBrace->Next;
3897 if (LBrace && LBrace->is(tok::at)) {
3898 LBrace = LBrace->Next;
3899 if (LBrace)
3900 LBrace = LBrace->Next;
3901 }
3902 }
3903 if (LBrace &&
3904 // The scope opener is one of {, [, <:
3905 // selector { ... }
3906 // selector [ ... ]
3907 // selector < ... >
3908 //
3909 // In case of selector { ... }, the l_brace is TT_DictLiteral.
3910 // In case of an empty selector {}, the l_brace is not TT_DictLiteral,
3911 // so we check for immediately following r_brace.
3912 ((LBrace->is(tok::l_brace) &&
3913 (LBrace->is(TT_DictLiteral) ||
3914 (LBrace->Next && LBrace->Next->is(tok::r_brace)))) ||
3915 LBrace->is(TT_ArrayInitializerLSquare) || LBrace->is(tok::less))) {
3916 // If Left.ParameterCount is 0, then this submessage entry is not the
3917 // first in its parent submessage, and we want to break before this entry.
3918 // If Left.ParameterCount is greater than 0, then its parent submessage
3919 // might contain 1 or more entries and we want to break before this entry
3920 // if it contains at least 2 entries. We deal with this case later by
3921 // detecting and breaking before the next entry in the parent submessage.
3922 if (Left.ParameterCount == 0)
3923 return true;
3924 // However, if this submessage is the first entry in its parent
3925 // submessage, Left.ParameterCount might be 1 in some cases.
3926 // We deal with this case later by detecting an entry
3927 // following a closing paren of this submessage.
3928 }
3929
3930 // If this is an entry immediately following a submessage, it will be
3931 // preceded by a closing paren of that submessage, like in:
3932 // left---. .---right
3933 // v v
3934 // sub: { ... } key: value
3935 // If there was a comment between `}` an `key` above, then `key` would be
3936 // put on a new line anyways.
3937 if (Left.isOneOf(tok::r_brace, tok::greater, tok::r_square))
3938 return true;
3939 }
3940
3941 // Deal with lambda arguments in C++ - we want consistent line breaks whether
3942 // they happen to be at arg0, arg1 or argN. The selection is a bit nuanced
3943 // as aggressive line breaks are placed when the lambda is not the last arg.
3944 if ((Style.Language == FormatStyle::LK_Cpp ||
3945 Style.Language == FormatStyle::LK_ObjC) &&
3946 Left.is(tok::l_paren) && Left.BlockParameterCount > 0 &&
3947 !Right.isOneOf(tok::l_paren, TT_LambdaLSquare)) {
3948 // Multiple lambdas in the same function call force line breaks.
3949 if (Left.BlockParameterCount > 1)
3950 return true;
3951
3952 // A lambda followed by another arg forces a line break.
3953 if (!Left.Role)
3954 return false;
3955 auto Comma = Left.Role->lastComma();
3956 if (!Comma)
3957 return false;
3958 auto Next = Comma->getNextNonComment();
3959 if (!Next)
3960 return false;
3961 if (!Next->isOneOf(TT_LambdaLSquare, tok::l_brace, tok::caret))
3962 return true;
3963 }
3964
3965 return false;
3966}
3967
3968bool TokenAnnotator::canBreakBefore(const AnnotatedLine &Line,
3969 const FormatToken &Right) {
3970 const FormatToken &Left = *Right.Previous;
3971 // Language-specific stuff.
3972 if (Style.isCSharp()) {
3973 if (Left.isOneOf(TT_CSharpNamedArgumentColon, TT_AttributeColon) ||
3974 Right.isOneOf(TT_CSharpNamedArgumentColon, TT_AttributeColon))
3975 return false;
3976 // Only break after commas for generic type constraints.
3977 if (Line.First->is(TT_CSharpGenericTypeConstraint))
3978 return Left.is(TT_CSharpGenericTypeConstraintComma);
3979 // Keep nullable operators attached to their identifiers.
3980 if (Right.is(TT_CSharpNullable)) {
3981 return false;
3982 }
3983 } else if (Style.Language == FormatStyle::LK_Java) {
3984 if (Left.isOneOf(Keywords.kw_throws, Keywords.kw_extends,
3985 Keywords.kw_implements))
3986 return false;
3987 if (Right.isOneOf(Keywords.kw_throws, Keywords.kw_extends,
3988 Keywords.kw_implements))
3989 return true;
3990 } else if (Style.Language == FormatStyle::LK_JavaScript) {
3991 const FormatToken *NonComment = Right.getPreviousNonComment();
3992 if (NonComment &&
3993 NonComment->isOneOf(
3994 tok::kw_return, Keywords.kw_yield, tok::kw_continue, tok::kw_break,
3995 tok::kw_throw, Keywords.kw_interface, Keywords.kw_type,
3996 tok::kw_static, tok::kw_public, tok::kw_private, tok::kw_protected,
3997 Keywords.kw_readonly, Keywords.kw_override, Keywords.kw_abstract,
3998 Keywords.kw_get, Keywords.kw_set, Keywords.kw_async,
3999 Keywords.kw_await))
4000 return false; // Otherwise automatic semicolon insertion would trigger.
4001 if (Right.NestingLevel == 0 &&
4002 (Left.Tok.getIdentifierInfo() ||
4003 Left.isOneOf(tok::r_square, tok::r_paren)) &&
4004 Right.isOneOf(tok::l_square, tok::l_paren))
4005 return false; // Otherwise automatic semicolon insertion would trigger.
4006 if (NonComment && NonComment->is(tok::identifier) &&
4007 NonComment->TokenText == "asserts")
4008 return false;
4009 if (Left.is(TT_FatArrow) && Right.is(tok::l_brace))
4010 return false;
4011 if (Left.is(TT_JsTypeColon))
4012 return true;
4013 // Don't wrap between ":" and "!" of a strict prop init ("field!: type;").
4014 if (Left.is(tok::exclaim) && Right.is(tok::colon))
4015 return false;
4016 // Look for is type annotations like:
4017 // function f(): a is B { ... }
4018 // Do not break before is in these cases.
4019 if (Right.is(Keywords.kw_is)) {
4020 const FormatToken *Next = Right.getNextNonComment();
4021 // If `is` is followed by a colon, it's likely that it's a dict key, so
4022 // ignore it for this check.
4023 // For example this is common in Polymer:
4024 // Polymer({
4025 // is: 'name',
4026 // ...
4027 // });
4028 if (!Next || !Next->is(tok::colon))
4029 return false;
4030 }
4031 if (Left.is(Keywords.kw_in))
4032 return Style.BreakBeforeBinaryOperators == FormatStyle::BOS_None;
4033 if (Right.is(Keywords.kw_in))
4034 return Style.BreakBeforeBinaryOperators != FormatStyle::BOS_None;
4035 if (Right.is(Keywords.kw_as))
4036 return false; // must not break before as in 'x as type' casts
4037 if (Right.isOneOf(Keywords.kw_extends, Keywords.kw_infer)) {
4038 // extends and infer can appear as keywords in conditional types:
4039 // https://www.typescriptlang.org/docs/handbook/release-notes/typescript-2-8.html#conditional-types
4040 // do not break before them, as the expressions are subject to ASI.
4041 return false;
4042 }
4043 if (Left.is(Keywords.kw_as))
4044 return true;
4045 if (Left.is(TT_NonNullAssertion))
4046 return true;
4047 if (Left.is(Keywords.kw_declare) &&
4048 Right.isOneOf(Keywords.kw_module, tok::kw_namespace,
4049 Keywords.kw_function, tok::kw_class, tok::kw_enum,
4050 Keywords.kw_interface, Keywords.kw_type, Keywords.kw_var,
4051 Keywords.kw_let, tok::kw_const))
4052 // See grammar for 'declare' statements at:
4053 // https://github.com/Microsoft/TypeScript/blob/master/doc/spec.md#A.10
4054 return false;
4055 if (Left.isOneOf(Keywords.kw_module, tok::kw_namespace) &&
4056 Right.isOneOf(tok::identifier, tok::string_literal))
4057 return false; // must not break in "module foo { ...}"
4058 if (Right.is(TT_TemplateString) && Right.closesScope())
4059 return false;
4060 // Don't split tagged template literal so there is a break between the tag
4061 // identifier and template string.
4062 if (Left.is(tok::identifier) && Right.is(TT_TemplateString)) {
4063 return false;
4064 }
4065 if (Left.is(TT_TemplateString) && Left.opensScope())
4066 return true;
4067 }
4068
4069 if (Left.is(tok::at))
4070 return false;
4071 if (Left.Tok.getObjCKeywordID() == tok::objc_interface)
4072 return false;
4073 if (Left.isOneOf(TT_JavaAnnotation, TT_LeadingJavaAnnotation))
4074 return !Right.is(tok::l_paren);
4075 if (Right.is(TT_PointerOrReference))
4076 return Line.IsMultiVariableDeclStmt ||
4077 (getTokenPointerOrReferenceAlignment(Right) ==
4078 FormatStyle::PAS_Right &&
4079 (!Right.Next || Right.Next->isNot(TT_FunctionDeclarationName)));
4080 if (Right.isOneOf(TT_StartOfName, TT_FunctionDeclarationName) ||
4081 Right.is(tok::kw_operator))
4082 return true;
4083 if (Left.is(TT_PointerOrReference))
4084 return false;
4085 if (Right.isTrailingComment())
4086 // We rely on MustBreakBefore being set correctly here as we should not
4087 // change the "binding" behavior of a comment.
4088 // The first comment in a braced lists is always interpreted as belonging to
4089 // the first list element. Otherwise, it should be placed outside of the
4090 // list.
4091 return Left.is(BK_BracedInit) ||
4092 (Left.is(TT_CtorInitializerColon) &&
4093 Style.BreakConstructorInitializers == FormatStyle::BCIS_AfterColon);
4094 if (Left.is(tok::question) && Right.is(tok::colon))
4095 return false;
4096 if (Right.is(TT_ConditionalExpr) || Right.is(tok::question))
4097 return Style.BreakBeforeTernaryOperators;
4098 if (Left.is(TT_ConditionalExpr) || Left.is(tok::question))
4099 return !Style.BreakBeforeTernaryOperators;
4100 if (Left.is(TT_InheritanceColon))
4101 return Style.BreakInheritanceList == FormatStyle::BILS_AfterColon;
4102 if (Right.is(TT_InheritanceColon))
4103 return Style.BreakInheritanceList != FormatStyle::BILS_AfterColon;
4104 if (Right.is(TT_ObjCMethodExpr) && !Right.is(tok::r_square) &&
4105 Left.isNot(TT_SelectorName))
4106 return true;
4107
4108 if (Right.is(tok::colon) &&
4109 !Right.isOneOf(TT_CtorInitializerColon, TT_InlineASMColon))
4110 return false;
4111 if (Left.is(tok::colon) && Left.isOneOf(TT_DictLiteral, TT_ObjCMethodExpr)) {
4112 if (Style.Language == FormatStyle::LK_Proto ||
4113 Style.Language == FormatStyle::LK_TextProto) {
4114 if (!Style.AlwaysBreakBeforeMultilineStrings && Right.isStringLiteral())
4115 return false;
4116 // Prevent cases like:
4117 //
4118 // submessage:
4119 // { key: valueeeeeeeeeeee }
4120 //
4121 // when the snippet does not fit into one line.
4122 // Prefer:
4123 //
4124 // submessage: {
4125 // key: valueeeeeeeeeeee
4126 // }
4127 //
4128 // instead, even if it is longer by one line.
4129 //
4130 // Note that this allows allows the "{" to go over the column limit
4131 // when the column limit is just between ":" and "{", but that does
4132 // not happen too often and alternative formattings in this case are
4133 // not much better.
4134 //
4135 // The code covers the cases:
4136 //
4137 // submessage: { ... }
4138 // submessage: < ... >
4139 // repeated: [ ... ]
4140 if (((Right.is(tok::l_brace) || Right.is(tok::less)) &&
4141 Right.is(TT_DictLiteral)) ||
4142 Right.is(TT_ArrayInitializerLSquare))
4143 return false;
4144 }
4145 return true;
4146 }
4147 if (Right.is(tok::r_square) && Right.MatchingParen &&
4148 Right.MatchingParen->is(TT_ProtoExtensionLSquare))
4149 return false;
4150 if (Right.is(TT_SelectorName) || (Right.is(tok::identifier) && Right.Next &&
4151 Right.Next->is(TT_ObjCMethodExpr)))
4152 return Left.isNot(tok::period); // FIXME: Properly parse ObjC calls.
4153 if (Left.is(tok::r_paren) && Line.Type == LT_ObjCProperty)
4154 return true;
4155 if (Left.ClosesTemplateDeclaration || Left.is(TT_FunctionAnnotationRParen))
4156 return true;
4157 if (Right.isOneOf(TT_RangeBasedForLoopColon, TT_OverloadedOperatorLParen,
4158 TT_OverloadedOperator))
4159 return false;
4160 if (Left.is(TT_RangeBasedForLoopColon))
4161 return true;
4162 if (Right.is(TT_RangeBasedForLoopColon))
4163 return false;
4164 if (Left.is(TT_TemplateCloser) && Right.is(TT_TemplateOpener))
4165 return true;
4166 if (Left.isOneOf(TT_TemplateCloser, TT_UnaryOperator) ||
4167 Left.is(tok::kw_operator))
4168 return false;
4169 if (Left.is(tok::equal) && !Right.isOneOf(tok::kw_default, tok::kw_delete) &&
4170 Line.Type == LT_VirtualFunctionDecl && Left.NestingLevel == 0)
4171 return false;
4172 if (Left.is(tok::equal) && Right.is(tok::l_brace) &&
4173 !Style.Cpp11BracedListStyle)
4174 return false;
4175 if (Left.is(tok::l_paren) &&
4176 Left.isOneOf(TT_AttributeParen, TT_TypeDeclarationParen))
4177 return false;
4178 if (Left.is(tok::l_paren) && Left.Previous &&
4179 (Left.Previous->isOneOf(TT_BinaryOperator, TT_CastRParen)))
4180 return false;
4181 if (Right.is(TT_ImplicitStringLiteral))
4182 return false;
4183
4184 if (Right.is(tok::r_paren) || Right.is(TT_TemplateCloser))
4185 return false;
4186 if (Right.is(tok::r_square) && Right.MatchingParen &&
4187 Right.MatchingParen->is(TT_LambdaLSquare))
4188 return false;
4189
4190 // We only break before r_brace if there was a corresponding break before
4191 // the l_brace, which is tracked by BreakBeforeClosingBrace.
4192 if (Right.is(tok::r_brace))
4193 return Right.MatchingParen && Right.MatchingParen->is(BK_Block);
4194
4195 // Allow breaking after a trailing annotation, e.g. after a method
4196 // declaration.
4197 if (Left.is(TT_TrailingAnnotation))
4198 return !Right.isOneOf(tok::l_brace, tok::semi, tok::equal, tok::l_paren,
4199 tok::less, tok::coloncolon);
4200
4201 if (Right.is(tok::kw___attribute) ||
4202 (Right.is(tok::l_square) && Right.is(TT_AttributeSquare)))
4203 return !Left.is(TT_AttributeSquare);
4204
4205 if (Left.is(tok::identifier) && Right.is(tok::string_literal))
4206 return true;
4207
4208 if (Right.is(tok::identifier) && Right.Next && Right.Next->is(TT_DictLiteral))
4209 return true;
4210
4211 if (Left.is(TT_CtorInitializerColon))
4212 return Style.BreakConstructorInitializers == FormatStyle::BCIS_AfterColon;
4213 if (Right.is(TT_CtorInitializerColon))
4214 return Style.BreakConstructorInitializers != FormatStyle::BCIS_AfterColon;
4215 if (Left.is(TT_CtorInitializerComma) &&
4216 Style.BreakConstructorInitializers == FormatStyle::BCIS_BeforeComma)
4217 return false;
4218 if (Right.is(TT_CtorInitializerComma) &&
4219 Style.BreakConstructorInitializers == FormatStyle::BCIS_BeforeComma)
4220 return true;
4221 if (Left.is(TT_InheritanceComma) &&
4222 Style.BreakInheritanceList == FormatStyle::BILS_BeforeComma)
4223 return false;
4224 if (Right.is(TT_InheritanceComma) &&
4225 Style.BreakInheritanceList == FormatStyle::BILS_BeforeComma)
4226 return true;
4227 if ((Left.is(tok::greater) && Right.is(tok::greater)) ||
4228 (Left.is(tok::less) && Right.is(tok::less)))
4229 return false;
4230 if (Right.is(TT_BinaryOperator) &&
4231 Style.BreakBeforeBinaryOperators != FormatStyle::BOS_None &&
4232 (Style.BreakBeforeBinaryOperators == FormatStyle::BOS_All ||
4233 Right.getPrecedence() != prec::Assignment))
4234 return true;
4235 if (Left.is(TT_ArrayInitializerLSquare))
4236 return true;
4237 if (Right.is(tok::kw_typename) && Left.isNot(tok::kw_const))
4238 return true;
4239 if ((Left.isBinaryOperator() || Left.is(TT_BinaryOperator)) &&
4240 !Left.isOneOf(tok::arrowstar, tok::lessless) &&
4241 Style.BreakBeforeBinaryOperators != FormatStyle::BOS_All &&
4242 (Style.BreakBeforeBinaryOperators == FormatStyle::BOS_None ||
4243 Left.getPrecedence() == prec::Assignment))
4244 return true;
4245 if ((Left.is(TT_AttributeSquare) && Right.is(tok::l_square)) ||
4246 (Left.is(tok::r_square) && Right.is(TT_AttributeSquare)))
4247 return false;
4248
4249 auto ShortLambdaOption = Style.AllowShortLambdasOnASingleLine;
4250 if (Style.BraceWrapping.BeforeLambdaBody && Right.is(TT_LambdaLBrace)) {
4251 if (isAllmanLambdaBrace(Left))
4252 return !isItAnEmptyLambdaAllowed(Left, ShortLambdaOption);
4253 if (isAllmanLambdaBrace(Right))
4254 return !isItAnEmptyLambdaAllowed(Right, ShortLambdaOption);
4255 }
4256
4257 return Left.isOneOf(tok::comma, tok::coloncolon, tok::semi, tok::l_brace,
4258 tok::kw_class, tok::kw_struct, tok::comment) ||
4259 Right.isMemberAccess() ||
4260 Right.isOneOf(TT_TrailingReturnArrow, TT_LambdaArrow, tok::lessless,
4261 tok::colon, tok::l_square, tok::at) ||
4262 (Left.is(tok::r_paren) &&
4263 Right.isOneOf(tok::identifier, tok::kw_const)) ||
4264 (Left.is(tok::l_paren) && !Right.is(tok::r_paren)) ||
4265 (Left.is(TT_TemplateOpener) && !Right.is(TT_TemplateCloser));
4266}
4267
4268void TokenAnnotator::printDebugInfo(const AnnotatedLine &Line) {
4269 llvm::errs() << "AnnotatedTokens(L=" << Line.Level << "):\n";
4270 const FormatToken *Tok = Line.First;
4271 while (Tok) {
4272 llvm::errs() << " M=" << Tok->MustBreakBefore
4273 << " C=" << Tok->CanBreakBefore
4274 << " T=" << getTokenTypeName(Tok->getType())
4275 << " S=" << Tok->SpacesRequiredBefore
4276 << " F=" << Tok->Finalized << " B=" << Tok->BlockParameterCount
4277 << " BK=" << Tok->getBlockKind() << " P=" << Tok->SplitPenalty
4278 << " Name=" << Tok->Tok.getName() << " L=" << Tok->TotalLength
4279 << " PPK=" << Tok->getPackingKind() << " FakeLParens=";
4280 for (unsigned i = 0, e = Tok->FakeLParens.size(); i != e; ++i)
4281 llvm::errs() << Tok->FakeLParens[i] << "/";
4282 llvm::errs() << " FakeRParens=" << Tok->FakeRParens;
4283 llvm::errs() << " II=" << Tok->Tok.getIdentifierInfo();
4284 llvm::errs() << " Text='" << Tok->TokenText << "'\n";
4285 if (!Tok->Next)
4286 assert(Tok == Line.Last)(static_cast<void> (0));
4287 Tok = Tok->Next;
4288 }
4289 llvm::errs() << "----\n";
4290}
4291
4292FormatStyle::PointerAlignmentStyle
4293TokenAnnotator::getTokenReferenceAlignment(const FormatToken &Reference) {
4294 assert(Reference.isOneOf(tok::amp, tok::ampamp))(static_cast<void> (0));
4295 switch (Style.ReferenceAlignment) {
4296 case FormatStyle::RAS_Pointer:
4297 return Style.PointerAlignment;
4298 case FormatStyle::RAS_Left:
4299 return FormatStyle::PAS_Left;
4300 case FormatStyle::RAS_Right:
4301 return FormatStyle::PAS_Right;
4302 case FormatStyle::RAS_Middle:
4303 return FormatStyle::PAS_Middle;
4304 }
4305 assert(0)(static_cast<void> (0)); //"Unhandled value of ReferenceAlignment"
4306 return Style.PointerAlignment;
4307}
4308
4309FormatStyle::PointerAlignmentStyle
4310TokenAnnotator::getTokenPointerOrReferenceAlignment(
4311 const FormatToken &PointerOrReference) {
4312 if (PointerOrReference.isOneOf(tok::amp, tok::ampamp)) {
4313 switch (Style.ReferenceAlignment) {
4314 case FormatStyle::RAS_Pointer:
4315 return Style.PointerAlignment;
4316 case FormatStyle::RAS_Left:
4317 return FormatStyle::PAS_Left;
4318 case FormatStyle::RAS_Right:
4319 return FormatStyle::PAS_Right;
4320 case FormatStyle::RAS_Middle:
4321 return FormatStyle::PAS_Middle;
4322 }
4323 }
4324 assert(PointerOrReference.is(tok::star))(static_cast<void> (0));
4325 return Style.PointerAlignment;
4326}
4327
4328} // namespace format
4329} // namespace clang

/build/llvm-toolchain-snapshot-14~++20210903100615+fd66b44ec19e/clang/lib/Format/FormatToken.h

1//===--- FormatToken.h - Format C++ code ------------------------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8///
9/// \file
10/// This file contains the declaration of the FormatToken, a wrapper
11/// around Token with additional information related to formatting.
12///
13//===----------------------------------------------------------------------===//
14
15#ifndef LLVM_CLANG_LIB_FORMAT_FORMATTOKEN_H
16#define LLVM_CLANG_LIB_FORMAT_FORMATTOKEN_H
17
18#include "clang/Basic/IdentifierTable.h"
19#include "clang/Basic/OperatorPrecedence.h"
20#include "clang/Format/Format.h"
21#include "clang/Lex/Lexer.h"
22#include <memory>
23#include <unordered_set>
24
25namespace clang {
26namespace format {
27
28#define LIST_TOKEN_TYPESTYPE(ArrayInitializerLSquare) TYPE(ArraySubscriptLSquare) TYPE
(AttributeColon) TYPE(AttributeMacro) TYPE(AttributeParen) TYPE
(AttributeSquare) TYPE(BinaryOperator) TYPE(BitFieldColon) TYPE
(BlockComment) TYPE(CastRParen) TYPE(ConditionalExpr) TYPE(ConflictAlternative
) TYPE(ConflictEnd) TYPE(ConflictStart) TYPE(ConstraintJunctions
) TYPE(CtorInitializerColon) TYPE(CtorInitializerComma) TYPE(
DesignatedInitializerLSquare) TYPE(DesignatedInitializerPeriod
) TYPE(DictLiteral) TYPE(FatArrow) TYPE(ForEachMacro) TYPE(FunctionAnnotationRParen
) TYPE(FunctionDeclarationName) TYPE(FunctionLBrace) TYPE(FunctionTypeLParen
) TYPE(IfMacro) TYPE(ImplicitStringLiteral) TYPE(InheritanceColon
) TYPE(InheritanceComma) TYPE(InlineASMBrace) TYPE(InlineASMColon
) TYPE(InlineASMSymbolicNameLSquare) TYPE(JavaAnnotation) TYPE
(JsComputedPropertyName) TYPE(JsExponentiation) TYPE(JsExponentiationEqual
) TYPE(JsPipePipeEqual) TYPE(JsPrivateIdentifier) TYPE(JsTypeColon
) TYPE(JsTypeOperator) TYPE(JsTypeOptionalQuestion) TYPE(JsAndAndEqual
) TYPE(LambdaArrow) TYPE(LambdaLBrace) TYPE(LambdaLSquare) TYPE
(LeadingJavaAnnotation) TYPE(LineComment) TYPE(MacroBlockBegin
) TYPE(MacroBlockEnd) TYPE(NamespaceMacro) TYPE(NonNullAssertion
) TYPE(NullCoalescingEqual) TYPE(NullCoalescingOperator) TYPE
(NullPropagatingOperator) TYPE(ObjCBlockLBrace) TYPE(ObjCBlockLParen
) TYPE(ObjCDecl) TYPE(ObjCForIn) TYPE(ObjCMethodExpr) TYPE(ObjCMethodSpecifier
) TYPE(ObjCProperty) TYPE(ObjCStringLiteral) TYPE(OverloadedOperator
) TYPE(OverloadedOperatorLParen) TYPE(PointerOrReference) TYPE
(PureVirtualSpecifier) TYPE(RangeBasedForLoopColon) TYPE(RegexLiteral
) TYPE(SelectorName) TYPE(StartOfName) TYPE(StatementAttributeLikeMacro
) TYPE(StatementMacro) TYPE(StructuredBindingLSquare) TYPE(TemplateCloser
) TYPE(TemplateOpener) TYPE(TemplateString) TYPE(ProtoExtensionLSquare
) TYPE(TrailingAnnotation) TYPE(TrailingReturnArrow) TYPE(TrailingUnaryOperator
) TYPE(TypeDeclarationParen) TYPE(TypenameMacro) TYPE(UnaryOperator
) TYPE(UntouchableMacroFunc) TYPE(CSharpStringLiteral) TYPE(CSharpNamedArgumentColon
) TYPE(CSharpNullable) TYPE(CSharpNullConditionalLSquare) TYPE
(CSharpGenericTypeConstraint) TYPE(CSharpGenericTypeConstraintColon
) TYPE(CSharpGenericTypeConstraintComma) TYPE(Unknown)
\
29 TYPE(ArrayInitializerLSquare) \
30 TYPE(ArraySubscriptLSquare) \
31 TYPE(AttributeColon) \
32 TYPE(AttributeMacro) \
33 TYPE(AttributeParen) \
34 TYPE(AttributeSquare) \
35 TYPE(BinaryOperator) \
36 TYPE(BitFieldColon) \
37 TYPE(BlockComment) \
38 TYPE(CastRParen) \
39 TYPE(ConditionalExpr) \
40 TYPE(ConflictAlternative) \
41 TYPE(ConflictEnd) \
42 TYPE(ConflictStart) \
43 TYPE(ConstraintJunctions) \
44 TYPE(CtorInitializerColon) \
45 TYPE(CtorInitializerComma) \
46 TYPE(DesignatedInitializerLSquare) \
47 TYPE(DesignatedInitializerPeriod) \
48 TYPE(DictLiteral) \
49 TYPE(FatArrow) \
50 TYPE(ForEachMacro) \
51 TYPE(FunctionAnnotationRParen) \
52 TYPE(FunctionDeclarationName) \
53 TYPE(FunctionLBrace) \
54 TYPE(FunctionTypeLParen) \
55 TYPE(IfMacro) \
56 TYPE(ImplicitStringLiteral) \
57 TYPE(InheritanceColon) \
58 TYPE(InheritanceComma) \
59 TYPE(InlineASMBrace) \
60 TYPE(InlineASMColon) \
61 TYPE(InlineASMSymbolicNameLSquare) \
62 TYPE(JavaAnnotation) \
63 TYPE(JsComputedPropertyName) \
64 TYPE(JsExponentiation) \
65 TYPE(JsExponentiationEqual) \
66 TYPE(JsPipePipeEqual) \
67 TYPE(JsPrivateIdentifier) \
68 TYPE(JsTypeColon) \
69 TYPE(JsTypeOperator) \
70 TYPE(JsTypeOptionalQuestion) \
71 TYPE(JsAndAndEqual) \
72 TYPE(LambdaArrow) \
73 TYPE(LambdaLBrace) \
74 TYPE(LambdaLSquare) \
75 TYPE(LeadingJavaAnnotation) \
76 TYPE(LineComment) \
77 TYPE(MacroBlockBegin) \
78 TYPE(MacroBlockEnd) \
79 TYPE(NamespaceMacro) \
80 TYPE(NonNullAssertion) \
81 TYPE(NullCoalescingEqual) \
82 TYPE(NullCoalescingOperator) \
83 TYPE(NullPropagatingOperator) \
84 TYPE(ObjCBlockLBrace) \
85 TYPE(ObjCBlockLParen) \
86 TYPE(ObjCDecl) \
87 TYPE(ObjCForIn) \
88 TYPE(ObjCMethodExpr) \
89 TYPE(ObjCMethodSpecifier) \
90 TYPE(ObjCProperty) \
91 TYPE(ObjCStringLiteral) \
92 TYPE(OverloadedOperator) \
93 TYPE(OverloadedOperatorLParen) \
94 TYPE(PointerOrReference) \
95 TYPE(PureVirtualSpecifier) \
96 TYPE(RangeBasedForLoopColon) \
97 TYPE(RegexLiteral) \
98 TYPE(SelectorName) \
99 TYPE(StartOfName) \
100 TYPE(StatementAttributeLikeMacro) \
101 TYPE(StatementMacro) \
102 TYPE(StructuredBindingLSquare) \
103 TYPE(TemplateCloser) \
104 TYPE(TemplateOpener) \
105 TYPE(TemplateString) \
106 TYPE(ProtoExtensionLSquare) \
107 TYPE(TrailingAnnotation) \
108 TYPE(TrailingReturnArrow) \
109 TYPE(TrailingUnaryOperator) \
110 TYPE(TypeDeclarationParen) \
111 TYPE(TypenameMacro) \
112 TYPE(UnaryOperator) \
113 TYPE(UntouchableMacroFunc) \
114 TYPE(CSharpStringLiteral) \
115 TYPE(CSharpNamedArgumentColon) \
116 TYPE(CSharpNullable) \
117 TYPE(CSharpNullConditionalLSquare) \
118 TYPE(CSharpGenericTypeConstraint) \
119 TYPE(CSharpGenericTypeConstraintColon) \
120 TYPE(CSharpGenericTypeConstraintComma) \
121 TYPE(Unknown)
122
123/// Determines the semantic type of a syntactic token, e.g. whether "<" is a
124/// template opener or binary operator.
125enum TokenType : uint8_t {
126#define TYPE(X) TT_##X,
127 LIST_TOKEN_TYPESTYPE(ArrayInitializerLSquare) TYPE(ArraySubscriptLSquare) TYPE
(AttributeColon) TYPE(AttributeMacro) TYPE(AttributeParen) TYPE
(AttributeSquare) TYPE(BinaryOperator) TYPE(BitFieldColon) TYPE
(BlockComment) TYPE(CastRParen) TYPE(ConditionalExpr) TYPE(ConflictAlternative
) TYPE(ConflictEnd) TYPE(ConflictStart) TYPE(ConstraintJunctions
) TYPE(CtorInitializerColon) TYPE(CtorInitializerComma) TYPE(
DesignatedInitializerLSquare) TYPE(DesignatedInitializerPeriod
) TYPE(DictLiteral) TYPE(FatArrow) TYPE(ForEachMacro) TYPE(FunctionAnnotationRParen
) TYPE(FunctionDeclarationName) TYPE(FunctionLBrace) TYPE(FunctionTypeLParen
) TYPE(IfMacro) TYPE(ImplicitStringLiteral) TYPE(InheritanceColon
) TYPE(InheritanceComma) TYPE(InlineASMBrace) TYPE(InlineASMColon
) TYPE(InlineASMSymbolicNameLSquare) TYPE(JavaAnnotation) TYPE
(JsComputedPropertyName) TYPE(JsExponentiation) TYPE(JsExponentiationEqual
) TYPE(JsPipePipeEqual) TYPE(JsPrivateIdentifier) TYPE(JsTypeColon
) TYPE(JsTypeOperator) TYPE(JsTypeOptionalQuestion) TYPE(JsAndAndEqual
) TYPE(LambdaArrow) TYPE(LambdaLBrace) TYPE(LambdaLSquare) TYPE
(LeadingJavaAnnotation) TYPE(LineComment) TYPE(MacroBlockBegin
) TYPE(MacroBlockEnd) TYPE(NamespaceMacro) TYPE(NonNullAssertion
) TYPE(NullCoalescingEqual) TYPE(NullCoalescingOperator) TYPE
(NullPropagatingOperator) TYPE(ObjCBlockLBrace) TYPE(ObjCBlockLParen
) TYPE(ObjCDecl) TYPE(ObjCForIn) TYPE(ObjCMethodExpr) TYPE(ObjCMethodSpecifier
) TYPE(ObjCProperty) TYPE(ObjCStringLiteral) TYPE(OverloadedOperator
) TYPE(OverloadedOperatorLParen) TYPE(PointerOrReference) TYPE
(PureVirtualSpecifier) TYPE(RangeBasedForLoopColon) TYPE(RegexLiteral
) TYPE(SelectorName) TYPE(StartOfName) TYPE(StatementAttributeLikeMacro
) TYPE(StatementMacro) TYPE(StructuredBindingLSquare) TYPE(TemplateCloser
) TYPE(TemplateOpener) TYPE(TemplateString) TYPE(ProtoExtensionLSquare
) TYPE(TrailingAnnotation) TYPE(TrailingReturnArrow) TYPE(TrailingUnaryOperator
) TYPE(TypeDeclarationParen) TYPE(TypenameMacro) TYPE(UnaryOperator
) TYPE(UntouchableMacroFunc) TYPE(CSharpStringLiteral) TYPE(CSharpNamedArgumentColon
) TYPE(CSharpNullable) TYPE(CSharpNullConditionalLSquare) TYPE
(CSharpGenericTypeConstraint) TYPE(CSharpGenericTypeConstraintColon
) TYPE(CSharpGenericTypeConstraintComma) TYPE(Unknown)
128#undef TYPE
129 NUM_TOKEN_TYPES
130};
131
132/// Determines the name of a token type.
133const char *getTokenTypeName(TokenType Type);
134
135// Represents what type of block a set of braces open.
136enum BraceBlockKind { BK_Unknown, BK_Block, BK_BracedInit };
137
138// The packing kind of a function's parameters.
139enum ParameterPackingKind { PPK_BinPacked, PPK_OnePerLine, PPK_Inconclusive };
140
141enum FormatDecision { FD_Unformatted, FD_Continue, FD_Break };
142
143/// Roles a token can take in a configured macro expansion.
144enum MacroRole {
145 /// The token was expanded from a macro argument when formatting the expanded
146 /// token sequence.
147 MR_ExpandedArg,
148 /// The token is part of a macro argument that was previously formatted as
149 /// expansion when formatting the unexpanded macro call.
150 MR_UnexpandedArg,
151 /// The token was expanded from a macro definition, and is not visible as part
152 /// of the macro call.
153 MR_Hidden,
154};
155
156struct FormatToken;
157
158/// Contains information on the token's role in a macro expansion.
159///
160/// Given the following definitions:
161/// A(X) = [ X ]
162/// B(X) = < X >
163/// C(X) = X
164///
165/// Consider the macro call:
166/// A({B(C(C(x)))}) -> [{<x>}]
167///
168/// In this case, the tokens of the unexpanded macro call will have the
169/// following relevant entries in their macro context (note that formatting
170/// the unexpanded macro call happens *after* formatting the expanded macro
171/// call):
172/// A( { B( C( C(x) ) ) } )
173/// Role: NN U NN NN NNUN N N U N (N=None, U=UnexpandedArg)
174///
175/// [ { < x > } ]
176/// Role: H E H E H E H (H=Hidden, E=ExpandedArg)
177/// ExpandedFrom[0]: A A A A A A A
178/// ExpandedFrom[1]: B B B
179/// ExpandedFrom[2]: C
180/// ExpandedFrom[3]: C
181/// StartOfExpansion: 1 0 1 2 0 0 0
182/// EndOfExpansion: 0 0 0 2 1 0 1
183struct MacroExpansion {
184 MacroExpansion(MacroRole Role) : Role(Role) {}
185
186 /// The token's role in the macro expansion.
187 /// When formatting an expanded macro, all tokens that are part of macro
188 /// arguments will be MR_ExpandedArg, while all tokens that are not visible in
189 /// the macro call will be MR_Hidden.
190 /// When formatting an unexpanded macro call, all tokens that are part of
191 /// macro arguments will be MR_UnexpandedArg.
192 MacroRole Role;
193
194 /// The stack of macro call identifier tokens this token was expanded from.
195 llvm::SmallVector<FormatToken *, 1> ExpandedFrom;
196
197 /// The number of expansions of which this macro is the first entry.
198 unsigned StartOfExpansion = 0;
199
200 /// The number of currently open expansions in \c ExpandedFrom this macro is
201 /// the last token in.
202 unsigned EndOfExpansion = 0;
203};
204
205class TokenRole;
206class AnnotatedLine;
207
208/// A wrapper around a \c Token storing information about the
209/// whitespace characters preceding it.
210struct FormatToken {
211 FormatToken()
212 : HasUnescapedNewline(false), IsMultiline(false), IsFirst(false),
213 MustBreakBefore(false), IsUnterminatedLiteral(false),
214 CanBreakBefore(false), ClosesTemplateDeclaration(false),
215 StartsBinaryExpression(false), EndsBinaryExpression(false),
216 PartOfMultiVariableDeclStmt(false), ContinuesLineCommentSection(false),
217 Finalized(false), BlockKind(BK_Unknown), Decision(FD_Unformatted),
218 PackingKind(PPK_Inconclusive), Type(TT_Unknown) {}
219
220 /// The \c Token.
221 Token Tok;
222
223 /// The raw text of the token.
224 ///
225 /// Contains the raw token text without leading whitespace and without leading
226 /// escaped newlines.
227 StringRef TokenText;
228
229 /// A token can have a special role that can carry extra information
230 /// about the token's formatting.
231 /// FIXME: Make FormatToken for parsing and AnnotatedToken two different
232 /// classes and make this a unique_ptr in the AnnotatedToken class.
233 std::shared_ptr<TokenRole> Role;
234
235 /// The range of the whitespace immediately preceding the \c Token.
236 SourceRange WhitespaceRange;
237
238 /// Whether there is at least one unescaped newline before the \c
239 /// Token.
240 unsigned HasUnescapedNewline : 1;
241
242 /// Whether the token text contains newlines (escaped or not).
243 unsigned IsMultiline : 1;
244
245 /// Indicates that this is the first token of the file.
246 unsigned IsFirst : 1;
247
248 /// Whether there must be a line break before this token.
249 ///
250 /// This happens for example when a preprocessor directive ended directly
251 /// before the token.
252 unsigned MustBreakBefore : 1;
253
254 /// Set to \c true if this token is an unterminated literal.
255 unsigned IsUnterminatedLiteral : 1;
256
257 /// \c true if it is allowed to break before this token.
258 unsigned CanBreakBefore : 1;
259
260 /// \c true if this is the ">" of "template<..>".
261 unsigned ClosesTemplateDeclaration : 1;
262
263 /// \c true if this token starts a binary expression, i.e. has at least
264 /// one fake l_paren with a precedence greater than prec::Unknown.
265 unsigned StartsBinaryExpression : 1;
266 /// \c true if this token ends a binary expression.
267 unsigned EndsBinaryExpression : 1;
268
269 /// Is this token part of a \c DeclStmt defining multiple variables?
270 ///
271 /// Only set if \c Type == \c TT_StartOfName.
272 unsigned PartOfMultiVariableDeclStmt : 1;
273
274 /// Does this line comment continue a line comment section?
275 ///
276 /// Only set to true if \c Type == \c TT_LineComment.
277 unsigned ContinuesLineCommentSection : 1;
278
279 /// If \c true, this token has been fully formatted (indented and
280 /// potentially re-formatted inside), and we do not allow further formatting
281 /// changes.
282 unsigned Finalized : 1;
283
284private:
285 /// Contains the kind of block if this token is a brace.
286 unsigned BlockKind : 2;
287
288public:
289 BraceBlockKind getBlockKind() const {
290 return static_cast<BraceBlockKind>(BlockKind);
291 }
292 void setBlockKind(BraceBlockKind BBK) {
293 BlockKind = BBK;
294 assert(getBlockKind() == BBK && "BraceBlockKind overflow!")(static_cast<void> (0));
295 }
296
297private:
298 /// Stores the formatting decision for the token once it was made.
299 unsigned Decision : 2;
300
301public:
302 FormatDecision getDecision() const {
303 return static_cast<FormatDecision>(Decision);
304 }
305 void setDecision(FormatDecision D) {
306 Decision = D;
307 assert(getDecision() == D && "FormatDecision overflow!")(static_cast<void> (0));
308 }
309
310private:
311 /// If this is an opening parenthesis, how are the parameters packed?
312 unsigned PackingKind : 2;
313
314public:
315 ParameterPackingKind getPackingKind() const {
316 return static_cast<ParameterPackingKind>(PackingKind);
317 }
318 void setPackingKind(ParameterPackingKind K) {
319 PackingKind = K;
320 assert(getPackingKind() == K && "ParameterPackingKind overflow!")(static_cast<void> (0));
321 }
322
323private:
324 TokenType Type;
325
326public:
327 /// Returns the token's type, e.g. whether "<" is a template opener or
328 /// binary operator.
329 TokenType getType() const { return Type; }
330 void setType(TokenType T) { Type = T; }
331
332 /// The number of newlines immediately before the \c Token.
333 ///
334 /// This can be used to determine what the user wrote in the original code
335 /// and thereby e.g. leave an empty line between two function definitions.
336 unsigned NewlinesBefore = 0;
337
338 /// The offset just past the last '\n' in this token's leading
339 /// whitespace (relative to \c WhiteSpaceStart). 0 if there is no '\n'.
340 unsigned LastNewlineOffset = 0;
341
342 /// The width of the non-whitespace parts of the token (or its first
343 /// line for multi-line tokens) in columns.
344 /// We need this to correctly measure number of columns a token spans.
345 unsigned ColumnWidth = 0;
346
347 /// Contains the width in columns of the last line of a multi-line
348 /// token.
349 unsigned LastLineColumnWidth = 0;
350
351 /// The number of spaces that should be inserted before this token.
352 unsigned SpacesRequiredBefore = 0;
353
354 /// Number of parameters, if this is "(", "[" or "<".
355 unsigned ParameterCount = 0;
356
357 /// Number of parameters that are nested blocks,
358 /// if this is "(", "[" or "<".
359 unsigned BlockParameterCount = 0;
360
361 /// If this is a bracket ("<", "(", "[" or "{"), contains the kind of
362 /// the surrounding bracket.
363 tok::TokenKind ParentBracket = tok::unknown;
364
365 /// The total length of the unwrapped line up to and including this
366 /// token.
367 unsigned TotalLength = 0;
368
369 /// The original 0-based column of this token, including expanded tabs.
370 /// The configured TabWidth is used as tab width.
371 unsigned OriginalColumn = 0;
372
373 /// The length of following tokens until the next natural split point,
374 /// or the next token that can be broken.
375 unsigned UnbreakableTailLength = 0;
376
377 // FIXME: Come up with a 'cleaner' concept.
378 /// The binding strength of a token. This is a combined value of
379 /// operator precedence, parenthesis nesting, etc.
380 unsigned BindingStrength = 0;
381
382 /// The nesting level of this token, i.e. the number of surrounding (),
383 /// [], {} or <>.
384 unsigned NestingLevel = 0;
385
386 /// The indent level of this token. Copied from the surrounding line.
387 unsigned IndentLevel = 0;
388
389 /// Penalty for inserting a line break before this token.
390 unsigned SplitPenalty = 0;
391
392 /// If this is the first ObjC selector name in an ObjC method
393 /// definition or call, this contains the length of the longest name.
394 ///
395 /// This being set to 0 means that the selectors should not be colon-aligned,
396 /// e.g. because several of them are block-type.
397 unsigned LongestObjCSelectorName = 0;
398
399 /// If this is the first ObjC selector name in an ObjC method
400 /// definition or call, this contains the number of parts that the whole
401 /// selector consist of.
402 unsigned ObjCSelectorNameParts = 0;
403
404 /// The 0-based index of the parameter/argument. For ObjC it is set
405 /// for the selector name token.
406 /// For now calculated only for ObjC.
407 unsigned ParameterIndex = 0;
408
409 /// Stores the number of required fake parentheses and the
410 /// corresponding operator precedence.
411 ///
412 /// If multiple fake parentheses start at a token, this vector stores them in
413 /// reverse order, i.e. inner fake parenthesis first.
414 SmallVector<prec::Level, 4> FakeLParens;
415 /// Insert this many fake ) after this token for correct indentation.
416 unsigned FakeRParens = 0;
417
418 /// If this is an operator (or "."/"->") in a sequence of operators
419 /// with the same precedence, contains the 0-based operator index.
420 unsigned OperatorIndex = 0;
421
422 /// If this is an operator (or "."/"->") in a sequence of operators
423 /// with the same precedence, points to the next operator.
424 FormatToken *NextOperator = nullptr;
425
426 /// If this is a bracket, this points to the matching one.
427 FormatToken *MatchingParen = nullptr;
428
429 /// The previous token in the unwrapped line.
430 FormatToken *Previous = nullptr;
431
432 /// The next token in the unwrapped line.
433 FormatToken *Next = nullptr;
434
435 /// The first token in set of column elements.
436 bool StartsColumn = false;
437
438 /// This notes the start of the line of an array initializer.
439 bool ArrayInitializerLineStart = false;
440
441 /// This starts an array initializer.
442 bool IsArrayInitializer = false;
443
444 /// If this token starts a block, this contains all the unwrapped lines
445 /// in it.
446 SmallVector<AnnotatedLine *, 1> Children;
447
448 // Contains all attributes related to how this token takes part
449 // in a configured macro expansion.
450 llvm::Optional<MacroExpansion> MacroCtx;
451
452 bool is(tok::TokenKind Kind) const { return Tok.is(Kind); }
12
Calling 'Token::is'
15
Returning from 'Token::is'
16
Returning zero, which participates in a condition later
453 bool is(TokenType TT) const { return getType() == TT; }
7
Assuming the condition is true
8
Returning the value 1, which participates in a condition later
454 bool is(const IdentifierInfo *II) const {
455 return II && II == Tok.getIdentifierInfo();
456 }
457 bool is(tok::PPKeywordKind Kind) const {
458 return Tok.getIdentifierInfo() &&
459 Tok.getIdentifierInfo()->getPPKeywordID() == Kind;
460 }
461 bool is(BraceBlockKind BBK) const { return getBlockKind() == BBK; }
462 bool is(ParameterPackingKind PPK) const { return getPackingKind() == PPK; }
463
464 template <typename A, typename B> bool isOneOf(A K1, B K2) const {
465 return is(K1) || is(K2);
466 }
467 template <typename A, typename B, typename... Ts>
468 bool isOneOf(A K1, B K2, Ts... Ks) const {
469 return is(K1) || isOneOf(K2, Ks...);
470 }
471 template <typename T> bool isNot(T Kind) const { return !is(Kind); }
472
473 bool isIf(bool AllowConstexprMacro = true) const {
474 return is(tok::kw_if) || endsSequence(tok::kw_constexpr, tok::kw_if) ||
475 (endsSequence(tok::identifier, tok::kw_if) && AllowConstexprMacro);
476 }
477
478 bool closesScopeAfterBlock() const {
479 if (getBlockKind() == BK_Block)
480 return true;
481 if (closesScope())
482 return Previous->closesScopeAfterBlock();
483 return false;
484 }
485
486 /// \c true if this token starts a sequence with the given tokens in order,
487 /// following the ``Next`` pointers, ignoring comments.
488 template <typename A, typename... Ts>
489 bool startsSequence(A K1, Ts... Tokens) const {
490 return startsSequenceInternal(K1, Tokens...);
491 }
492
493 /// \c true if this token ends a sequence with the given tokens in order,
494 /// following the ``Previous`` pointers, ignoring comments.
495 /// For example, given tokens [T1, T2, T3], the function returns true if
496 /// 3 tokens ending at this (ignoring comments) are [T3, T2, T1]. In other
497 /// words, the tokens passed to this function need to the reverse of the
498 /// order the tokens appear in code.
499 template <typename A, typename... Ts>
500 bool endsSequence(A K1, Ts... Tokens) const {
501 return endsSequenceInternal(K1, Tokens...);
502 }
503
504 bool isStringLiteral() const { return tok::isStringLiteral(Tok.getKind()); }
505
506 bool isObjCAtKeyword(tok::ObjCKeywordKind Kind) const {
507 return Tok.isObjCAtKeyword(Kind);
508 }
509
510 bool isAccessSpecifier(bool ColonRequired = true) const {
511 return isOneOf(tok::kw_public, tok::kw_protected, tok::kw_private) &&
512 (!ColonRequired || (Next && Next->is(tok::colon)));
513 }
514
515 bool canBePointerOrReferenceQualifier() const {
516 return isOneOf(tok::kw_const, tok::kw_restrict, tok::kw_volatile,
517 tok::kw___attribute, tok::kw__Nonnull, tok::kw__Nullable,
518 tok::kw__Null_unspecified, tok::kw___ptr32, tok::kw___ptr64,
519 TT_AttributeMacro);
520 }
521
522 /// Determine whether the token is a simple-type-specifier.
523 bool isSimpleTypeSpecifier() const;
524
525 bool isObjCAccessSpecifier() const {
526 return is(tok::at) && Next &&
527 (Next->isObjCAtKeyword(tok::objc_public) ||
528 Next->isObjCAtKeyword(tok::objc_protected) ||
529 Next->isObjCAtKeyword(tok::objc_package) ||
530 Next->isObjCAtKeyword(tok::objc_private));
531 }
532
533 /// Returns whether \p Tok is ([{ or an opening < of a template or in
534 /// protos.
535 bool opensScope() const {
536 if (is(TT_TemplateString) && TokenText.endswith("${"))
537 return true;
538 if (is(TT_DictLiteral) && is(tok::less))
539 return true;
540 return isOneOf(tok::l_paren, tok::l_brace, tok::l_square,
541 TT_TemplateOpener);
542 }
543 /// Returns whether \p Tok is )]} or a closing > of a template or in
544 /// protos.
545 bool closesScope() const {
546 if (is(TT_TemplateString) && TokenText.startswith("}"))
547 return true;
548 if (is(TT_DictLiteral) && is(tok::greater))
549 return true;
550 return isOneOf(tok::r_paren, tok::r_brace, tok::r_square,
551 TT_TemplateCloser);
552 }
553
554 /// Returns \c true if this is a "." or "->" accessing a member.
555 bool isMemberAccess() const {
556 return isOneOf(tok::arrow, tok::period, tok::arrowstar) &&
557 !isOneOf(TT_DesignatedInitializerPeriod, TT_TrailingReturnArrow,
558 TT_LambdaArrow, TT_LeadingJavaAnnotation);
559 }
560
561 bool isUnaryOperator() const {
562 switch (Tok.getKind()) {
563 case tok::plus:
564 case tok::plusplus:
565 case tok::minus:
566 case tok::minusminus:
567 case tok::exclaim:
568 case tok::tilde:
569 case tok::kw_sizeof:
570 case tok::kw_alignof:
571 return true;
572 default:
573 return false;
574 }
575 }
576
577 bool isBinaryOperator() const {
578 // Comma is a binary operator, but does not behave as such wrt. formatting.
579 return getPrecedence() > prec::Comma;
580 }
581
582 bool isTrailingComment() const {
583 return is(tok::comment) &&
584 (is(TT_LineComment) || !Next || Next->NewlinesBefore > 0);
585 }
586
587 /// Returns \c true if this is a keyword that can be used
588 /// like a function call (e.g. sizeof, typeid, ...).
589 bool isFunctionLikeKeyword() const {
590 switch (Tok.getKind()) {
591 case tok::kw_throw:
592 case tok::kw_typeid:
593 case tok::kw_return:
594 case tok::kw_sizeof:
595 case tok::kw_alignof:
596 case tok::kw_alignas:
597 case tok::kw_decltype:
598 case tok::kw_noexcept:
599 case tok::kw_static_assert:
600 case tok::kw__Atomic:
601 case tok::kw___attribute:
602 case tok::kw___underlying_type:
603 case tok::kw_requires:
604 return true;
605 default:
606 return false;
607 }
608 }
609
610 /// Returns \c true if this is a string literal that's like a label,
611 /// e.g. ends with "=" or ":".
612 bool isLabelString() const {
613 if (!is(tok::string_literal))
614 return false;
615 StringRef Content = TokenText;
616 if (Content.startswith("\"") || Content.startswith("'"))
617 Content = Content.drop_front(1);
618 if (Content.endswith("\"") || Content.endswith("'"))
619 Content = Content.drop_back(1);
620 Content = Content.trim();
621 return Content.size() > 1 &&
622 (Content.back() == ':' || Content.back() == '=');
623 }
624
625 /// Returns actual token start location without leading escaped
626 /// newlines and whitespace.
627 ///
628 /// This can be different to Tok.getLocation(), which includes leading escaped
629 /// newlines.
630 SourceLocation getStartOfNonWhitespace() const {
631 return WhitespaceRange.getEnd();
632 }
633
634 prec::Level getPrecedence() const {
635 return getBinOpPrecedence(Tok.getKind(), /*GreaterThanIsOperator=*/true,
636 /*CPlusPlus11=*/true);
637 }
638
639 /// Returns the previous token ignoring comments.
640 FormatToken *getPreviousNonComment() const {
641 FormatToken *Tok = Previous;
642 while (Tok && Tok->is(tok::comment))
643 Tok = Tok->Previous;
644 return Tok;
645 }
646
647 /// Returns the next token ignoring comments.
648 const FormatToken *getNextNonComment() const {
649 const FormatToken *Tok = Next;
650 while (Tok && Tok->is(tok::comment))
651 Tok = Tok->Next;
652 return Tok;
653 }
654
655 /// Returns \c true if this tokens starts a block-type list, i.e. a
656 /// list that should be indented with a block indent.
657 bool opensBlockOrBlockTypeList(const FormatStyle &Style) const {
658 // C# Does not indent object initialisers as continuations.
659 if (is(tok::l_brace) && getBlockKind() == BK_BracedInit && Style.isCSharp())
660 return true;
661 if (is(TT_TemplateString) && opensScope())
662 return true;
663 return is(TT_ArrayInitializerLSquare) || is(TT_ProtoExtensionLSquare) ||
664 (is(tok::l_brace) &&
665 (getBlockKind() == BK_Block || is(TT_DictLiteral) ||
666 (!Style.Cpp11BracedListStyle && NestingLevel == 0))) ||
667 (is(tok::less) && (Style.Language == FormatStyle::LK_Proto ||
668 Style.Language == FormatStyle::LK_TextProto));
669 }
670
671 /// Returns whether the token is the left square bracket of a C++
672 /// structured binding declaration.
673 bool isCppStructuredBinding(const FormatStyle &Style) const {
674 if (!Style.isCpp() || isNot(tok::l_square))
675 return false;
676 const FormatToken *T = this;
677 do {
678 T = T->getPreviousNonComment();
679 } while (T && T->isOneOf(tok::kw_const, tok::kw_volatile, tok::amp,
680 tok::ampamp));
681 return T && T->is(tok::kw_auto);
682 }
683
684 /// Same as opensBlockOrBlockTypeList, but for the closing token.
685 bool closesBlockOrBlockTypeList(const FormatStyle &Style) const {
686 if (is(TT_TemplateString) && closesScope())
687 return true;
688 return MatchingParen && MatchingParen->opensBlockOrBlockTypeList(Style);
689 }
690
691 /// Return the actual namespace token, if this token starts a namespace
692 /// block.
693 const FormatToken *getNamespaceToken() const {
694 const FormatToken *NamespaceTok = this;
695 if (is(tok::comment))
696 NamespaceTok = NamespaceTok->getNextNonComment();
697 // Detect "(inline|export)? namespace" in the beginning of a line.
698 if (NamespaceTok && NamespaceTok->isOneOf(tok::kw_inline, tok::kw_export))
699 NamespaceTok = NamespaceTok->getNextNonComment();
700 return NamespaceTok &&
701 NamespaceTok->isOneOf(tok::kw_namespace, TT_NamespaceMacro)
702 ? NamespaceTok
703 : nullptr;
704 }
705
706 void copyFrom(const FormatToken &Tok) { *this = Tok; }
707
708private:
709 // Only allow copying via the explicit copyFrom method.
710 FormatToken(const FormatToken &) = delete;
711 FormatToken &operator=(const FormatToken &) = default;
712
713 template <typename A, typename... Ts>
714 bool startsSequenceInternal(A K1, Ts... Tokens) const {
715 if (is(tok::comment) && Next)
716 return Next->startsSequenceInternal(K1, Tokens...);
717 return is(K1) && Next && Next->startsSequenceInternal(Tokens...);
718 }
719
720 template <typename A> bool startsSequenceInternal(A K1) const {
721 if (is(tok::comment) && Next)
722 return Next->startsSequenceInternal(K1);
723 return is(K1);
724 }
725
726 template <typename A, typename... Ts> bool endsSequenceInternal(A K1) const {
727 if (is(tok::comment) && Previous)
728 return Previous->endsSequenceInternal(K1);
729 return is(K1);
730 }
731
732 template <typename A, typename... Ts>
733 bool endsSequenceInternal(A K1, Ts... Tokens) const {
734 if (is(tok::comment) && Previous)
735 return Previous->endsSequenceInternal(K1, Tokens...);
736 return is(K1) && Previous && Previous->endsSequenceInternal(Tokens...);
737 }
738};
739
740class ContinuationIndenter;
741struct LineState;
742
743class TokenRole {
744public:
745 TokenRole(const FormatStyle &Style) : Style(Style) {}
746 virtual ~TokenRole();
747
748 /// After the \c TokenAnnotator has finished annotating all the tokens,
749 /// this function precomputes required information for formatting.
750 virtual void precomputeFormattingInfos(const FormatToken *Token);
751
752 /// Apply the special formatting that the given role demands.
753 ///
754 /// Assumes that the token having this role is already formatted.
755 ///
756 /// Continues formatting from \p State leaving indentation to \p Indenter and
757 /// returns the total penalty that this formatting incurs.
758 virtual unsigned formatFromToken(LineState &State,
759 ContinuationIndenter *Indenter,
760 bool DryRun) {
761 return 0;
762 }
763
764 /// Same as \c formatFromToken, but assumes that the first token has
765 /// already been set thereby deciding on the first line break.
766 virtual unsigned formatAfterToken(LineState &State,
767 ContinuationIndenter *Indenter,
768 bool DryRun) {
769 return 0;
770 }
771
772 /// Notifies the \c Role that a comma was found.
773 virtual void CommaFound(const FormatToken *Token) {}
774
775 virtual const FormatToken *lastComma() { return nullptr; }
776
777protected:
778 const FormatStyle &Style;
779};
780
781class CommaSeparatedList : public TokenRole {
782public:
783 CommaSeparatedList(const FormatStyle &Style)
784 : TokenRole(Style), HasNestedBracedList(false) {}
785
786 void precomputeFormattingInfos(const FormatToken *Token) override;
787
788 unsigned formatAfterToken(LineState &State, ContinuationIndenter *Indenter,
789 bool DryRun) override;
790
791 unsigned formatFromToken(LineState &State, ContinuationIndenter *Indenter,
792 bool DryRun) override;
793
794 /// Adds \p Token as the next comma to the \c CommaSeparated list.
795 void CommaFound(const FormatToken *Token) override {
796 Commas.push_back(Token);
797 }
798
799 const FormatToken *lastComma() override {
800 if (Commas.empty())
801 return nullptr;
802 return Commas.back();
803 }
804
805private:
806 /// A struct that holds information on how to format a given list with
807 /// a specific number of columns.
808 struct ColumnFormat {
809 /// The number of columns to use.
810 unsigned Columns;
811
812 /// The total width in characters.
813 unsigned TotalWidth;
814
815 /// The number of lines required for this format.
816 unsigned LineCount;
817
818 /// The size of each column in characters.
819 SmallVector<unsigned, 8> ColumnSizes;
820 };
821
822 /// Calculate which \c ColumnFormat fits best into
823 /// \p RemainingCharacters.
824 const ColumnFormat *getColumnFormat(unsigned RemainingCharacters) const;
825
826 /// The ordered \c FormatTokens making up the commas of this list.
827 SmallVector<const FormatToken *, 8> Commas;
828
829 /// The length of each of the list's items in characters including the
830 /// trailing comma.
831 SmallVector<unsigned, 8> ItemLengths;
832
833 /// Precomputed formats that can be used for this list.
834 SmallVector<ColumnFormat, 4> Formats;
835
836 bool HasNestedBracedList;
837};
838
839/// Encapsulates keywords that are context sensitive or for languages not
840/// properly supported by Clang's lexer.
841struct AdditionalKeywords {
842 AdditionalKeywords(IdentifierTable &IdentTable) {
843 kw_final = &IdentTable.get("final");
844 kw_override = &IdentTable.get("override");
845 kw_in = &IdentTable.get("in");
846 kw_of = &IdentTable.get("of");
847 kw_CF_CLOSED_ENUM = &IdentTable.get("CF_CLOSED_ENUM");
848 kw_CF_ENUM = &IdentTable.get("CF_ENUM");
849 kw_CF_OPTIONS = &IdentTable.get("CF_OPTIONS");
850 kw_NS_CLOSED_ENUM = &IdentTable.get("NS_CLOSED_ENUM");
851 kw_NS_ENUM = &IdentTable.get("NS_ENUM");
852 kw_NS_OPTIONS = &IdentTable.get("NS_OPTIONS");
853
854 kw_as = &IdentTable.get("as");
855 kw_async = &IdentTable.get("async");
856 kw_await = &IdentTable.get("await");
857 kw_declare = &IdentTable.get("declare");
858 kw_finally = &IdentTable.get("finally");
859 kw_from = &IdentTable.get("from");
860 kw_function = &IdentTable.get("function");
861 kw_get = &IdentTable.get("get");
862 kw_import = &IdentTable.get("import");
863 kw_infer = &IdentTable.get("infer");
864 kw_is = &IdentTable.get("is");
865 kw_let = &IdentTable.get("let");
866 kw_module = &IdentTable.get("module");
867 kw_readonly = &IdentTable.get("readonly");
868 kw_set = &IdentTable.get("set");
869 kw_type = &IdentTable.get("type");
870 kw_typeof = &IdentTable.get("typeof");
871 kw_var = &IdentTable.get("var");
872 kw_yield = &IdentTable.get("yield");
873
874 kw_abstract = &IdentTable.get("abstract");
875 kw_assert = &IdentTable.get("assert");
876 kw_extends = &IdentTable.get("extends");
877 kw_implements = &IdentTable.get("implements");
878 kw_instanceof = &IdentTable.get("instanceof");
879 kw_interface = &IdentTable.get("interface");
880 kw_native = &IdentTable.get("native");
881 kw_package = &IdentTable.get("package");
882 kw_synchronized = &IdentTable.get("synchronized");
883 kw_throws = &IdentTable.get("throws");
884 kw___except = &IdentTable.get("__except");
885 kw___has_include = &IdentTable.get("__has_include");
886 kw___has_include_next = &IdentTable.get("__has_include_next");
887
888 kw_mark = &IdentTable.get("mark");
889
890 kw_extend = &IdentTable.get("extend");
891 kw_option = &IdentTable.get("option");
892 kw_optional = &IdentTable.get("optional");
893 kw_repeated = &IdentTable.get("repeated");
894 kw_required = &IdentTable.get("required");
895 kw_returns = &IdentTable.get("returns");
896
897 kw_signals = &IdentTable.get("signals");
898 kw_qsignals = &IdentTable.get("Q_SIGNALS");
899 kw_slots = &IdentTable.get("slots");
900 kw_qslots = &IdentTable.get("Q_SLOTS");
901
902 // C# keywords
903 kw_dollar = &IdentTable.get("dollar");
904 kw_base = &IdentTable.get("base");
905 kw_byte = &IdentTable.get("byte");
906 kw_checked = &IdentTable.get("checked");
907 kw_decimal = &IdentTable.get("decimal");
908 kw_delegate = &IdentTable.get("delegate");
909 kw_event = &IdentTable.get("event");
910 kw_fixed = &IdentTable.get("fixed");
911 kw_foreach = &IdentTable.get("foreach");
912 kw_implicit = &IdentTable.get("implicit");
913 kw_internal = &IdentTable.get("internal");
914 kw_lock = &IdentTable.get("lock");
915 kw_null = &IdentTable.get("null");
916 kw_object = &IdentTable.get("object");
917 kw_out = &IdentTable.get("out");
918 kw_params = &IdentTable.get("params");
919 kw_ref = &IdentTable.get("ref");
920 kw_string = &IdentTable.get("string");
921 kw_stackalloc = &IdentTable.get("stackalloc");
922 kw_sbyte = &IdentTable.get("sbyte");
923 kw_sealed = &IdentTable.get("sealed");
924 kw_uint = &IdentTable.get("uint");
925 kw_ulong = &IdentTable.get("ulong");
926 kw_unchecked = &IdentTable.get("unchecked");
927 kw_unsafe = &IdentTable.get("unsafe");
928 kw_ushort = &IdentTable.get("ushort");
929 kw_when = &IdentTable.get("when");
930 kw_where = &IdentTable.get("where");
931
932 // Keep this at the end of the constructor to make sure everything here
933 // is
934 // already initialized.
935 JsExtraKeywords = std::unordered_set<IdentifierInfo *>(
936 {kw_as, kw_async, kw_await, kw_declare, kw_finally, kw_from,
937 kw_function, kw_get, kw_import, kw_is, kw_let, kw_module, kw_override,
938 kw_readonly, kw_set, kw_type, kw_typeof, kw_var, kw_yield,
939 // Keywords from the Java section.
940 kw_abstract, kw_extends, kw_implements, kw_instanceof, kw_interface});
941
942 CSharpExtraKeywords = std::unordered_set<IdentifierInfo *>(
943 {kw_base, kw_byte, kw_checked, kw_decimal, kw_delegate, kw_event,
944 kw_fixed, kw_foreach, kw_implicit, kw_in, kw_interface, kw_internal,
945 kw_is, kw_lock, kw_null, kw_object, kw_out, kw_override, kw_params,
946 kw_readonly, kw_ref, kw_string, kw_stackalloc, kw_sbyte, kw_sealed,
947 kw_uint, kw_ulong, kw_unchecked, kw_unsafe, kw_ushort, kw_when,
948 kw_where,
949 // Keywords from the JavaScript section.
950 kw_as, kw_async, kw_await, kw_declare, kw_finally, kw_from,
951 kw_function, kw_get, kw_import, kw_is, kw_let, kw_module, kw_readonly,
952 kw_set, kw_type, kw_typeof, kw_var, kw_yield,
953 // Keywords from the Java section.
954 kw_abstract, kw_extends, kw_implements, kw_instanceof, kw_interface});
955 }
956
957 // Context sensitive keywords.
958 IdentifierInfo *kw_final;
959 IdentifierInfo *kw_override;
960 IdentifierInfo *kw_in;
961 IdentifierInfo *kw_of;
962 IdentifierInfo *kw_CF_CLOSED_ENUM;
963 IdentifierInfo *kw_CF_ENUM;
964 IdentifierInfo *kw_CF_OPTIONS;
965 IdentifierInfo *kw_NS_CLOSED_ENUM;
966 IdentifierInfo *kw_NS_ENUM;
967 IdentifierInfo *kw_NS_OPTIONS;
968 IdentifierInfo *kw___except;
969 IdentifierInfo *kw___has_include;
970 IdentifierInfo *kw___has_include_next;
971
972 // JavaScript keywords.
973 IdentifierInfo *kw_as;
974 IdentifierInfo *kw_async;
975 IdentifierInfo *kw_await;
976 IdentifierInfo *kw_declare;
977 IdentifierInfo *kw_finally;
978 IdentifierInfo *kw_from;
979 IdentifierInfo *kw_function;
980 IdentifierInfo *kw_get;
981 IdentifierInfo *kw_import;
982 IdentifierInfo *kw_infer;
983 IdentifierInfo *kw_is;
984 IdentifierInfo *kw_let;
985 IdentifierInfo *kw_module;
986 IdentifierInfo *kw_readonly;
987 IdentifierInfo *kw_set;
988 IdentifierInfo *kw_type;
989 IdentifierInfo *kw_typeof;
990 IdentifierInfo *kw_var;
991 IdentifierInfo *kw_yield;
992
993 // Java keywords.
994 IdentifierInfo *kw_abstract;
995 IdentifierInfo *kw_assert;
996 IdentifierInfo *kw_extends;
997 IdentifierInfo *kw_implements;
998 IdentifierInfo *kw_instanceof;
999 IdentifierInfo *kw_interface;
1000 IdentifierInfo *kw_native;
1001 IdentifierInfo *kw_package;
1002 IdentifierInfo *kw_synchronized;
1003 IdentifierInfo *kw_throws;
1004
1005 // Pragma keywords.
1006 IdentifierInfo *kw_mark;
1007
1008 // Proto keywords.
1009 IdentifierInfo *kw_extend;
1010 IdentifierInfo *kw_option;
1011 IdentifierInfo *kw_optional;
1012 IdentifierInfo *kw_repeated;
1013 IdentifierInfo *kw_required;
1014 IdentifierInfo *kw_returns;
1015
1016 // QT keywords.
1017 IdentifierInfo *kw_signals;
1018 IdentifierInfo *kw_qsignals;
1019 IdentifierInfo *kw_slots;
1020 IdentifierInfo *kw_qslots;
1021
1022 // C# keywords
1023 IdentifierInfo *kw_dollar;
1024 IdentifierInfo *kw_base;
1025 IdentifierInfo *kw_byte;
1026 IdentifierInfo *kw_checked;
1027 IdentifierInfo *kw_decimal;
1028 IdentifierInfo *kw_delegate;
1029 IdentifierInfo *kw_event;
1030 IdentifierInfo *kw_fixed;
1031 IdentifierInfo *kw_foreach;
1032 IdentifierInfo *kw_implicit;
1033 IdentifierInfo *kw_internal;
1034
1035 IdentifierInfo *kw_lock;
1036 IdentifierInfo *kw_null;
1037 IdentifierInfo *kw_object;
1038 IdentifierInfo *kw_out;
1039
1040 IdentifierInfo *kw_params;
1041
1042 IdentifierInfo *kw_ref;
1043 IdentifierInfo *kw_string;
1044 IdentifierInfo *kw_stackalloc;
1045 IdentifierInfo *kw_sbyte;
1046 IdentifierInfo *kw_sealed;
1047 IdentifierInfo *kw_uint;
1048 IdentifierInfo *kw_ulong;
1049 IdentifierInfo *kw_unchecked;
1050 IdentifierInfo *kw_unsafe;
1051 IdentifierInfo *kw_ushort;
1052 IdentifierInfo *kw_when;
1053 IdentifierInfo *kw_where;
1054
1055 /// Returns \c true if \p Tok is a true JavaScript identifier, returns
1056 /// \c false if it is a keyword or a pseudo keyword.
1057 /// If \c AcceptIdentifierName is true, returns true not only for keywords,
1058 // but also for IdentifierName tokens (aka pseudo-keywords), such as
1059 // ``yield``.
1060 bool IsJavaScriptIdentifier(const FormatToken &Tok,
1061 bool AcceptIdentifierName = true) const {
1062 // Based on the list of JavaScript & TypeScript keywords here:
1063 // https://github.com/microsoft/TypeScript/blob/master/src/compiler/scanner.ts#L74
1064 switch (Tok.Tok.getKind()) {
1065 case tok::kw_break:
1066 case tok::kw_case:
1067 case tok::kw_catch:
1068 case tok::kw_class:
1069 case tok::kw_continue:
1070 case tok::kw_const:
1071 case tok::kw_default:
1072 case tok::kw_delete:
1073 case tok::kw_do:
1074 case tok::kw_else:
1075 case tok::kw_enum:
1076 case tok::kw_export:
1077 case tok::kw_false:
1078 case tok::kw_for:
1079 case tok::kw_if:
1080 case tok::kw_import:
1081 case tok::kw_module:
1082 case tok::kw_new:
1083 case tok::kw_private:
1084 case tok::kw_protected:
1085 case tok::kw_public:
1086 case tok::kw_return:
1087 case tok::kw_static:
1088 case tok::kw_switch:
1089 case tok::kw_this:
1090 case tok::kw_throw:
1091 case tok::kw_true:
1092 case tok::kw_try:
1093 case tok::kw_typeof:
1094 case tok::kw_void:
1095 case tok::kw_while:
1096 // These are JS keywords that are lexed by LLVM/clang as keywords.
1097 return false;
1098 case tok::identifier: {
1099 // For identifiers, make sure they are true identifiers, excluding the
1100 // JavaScript pseudo-keywords (not lexed by LLVM/clang as keywords).
1101 bool IsPseudoKeyword =
1102 JsExtraKeywords.find(Tok.Tok.getIdentifierInfo()) !=
1103 JsExtraKeywords.end();
1104 return AcceptIdentifierName || !IsPseudoKeyword;
1105 }
1106 default:
1107 // Other keywords are handled in the switch below, to avoid problems due
1108 // to duplicate case labels when using the #include trick.
1109 break;
1110 }
1111
1112 switch (Tok.Tok.getKind()) {
1113 // Handle C++ keywords not included above: these are all JS identifiers.
1114#define KEYWORD(X, Y) case tok::kw_##X:
1115#include "clang/Basic/TokenKinds.def"
1116 // #undef KEYWORD is not needed -- it's #undef-ed at the end of
1117 // TokenKinds.def
1118 return true;
1119 default:
1120 // All other tokens (punctuation etc) are not JS identifiers.
1121 return false;
1122 }
1123 }
1124
1125 /// Returns \c true if \p Tok is a C# keyword, returns
1126 /// \c false if it is a anything else.
1127 bool isCSharpKeyword(const FormatToken &Tok) const {
1128 switch (Tok.Tok.getKind()) {
1129 case tok::kw_bool:
1130 case tok::kw_break:
1131 case tok::kw_case:
1132 case tok::kw_catch:
1133 case tok::kw_char:
1134 case tok::kw_class:
1135 case tok::kw_const:
1136 case tok::kw_continue:
1137 case tok::kw_default:
1138 case tok::kw_do:
1139 case tok::kw_double:
1140 case tok::kw_else:
1141 case tok::kw_enum:
1142 case tok::kw_explicit:
1143 case tok::kw_extern:
1144 case tok::kw_false:
1145 case tok::kw_float:
1146 case tok::kw_for:
1147 case tok::kw_goto:
1148 case tok::kw_if:
1149 case tok::kw_int:
1150 case tok::kw_long:
1151 case tok::kw_namespace:
1152 case tok::kw_new:
1153 case tok::kw_operator:
1154 case tok::kw_private:
1155 case tok::kw_protected:
1156 case tok::kw_public:
1157 case tok::kw_return:
1158 case tok::kw_short:
1159 case tok::kw_sizeof:
1160 case tok::kw_static:
1161 case tok::kw_struct:
1162 case tok::kw_switch:
1163 case tok::kw_this:
1164 case tok::kw_throw:
1165 case tok::kw_true:
1166 case tok::kw_try:
1167 case tok::kw_typeof:
1168 case tok::kw_using:
1169 case tok::kw_virtual:
1170 case tok::kw_void:
1171 case tok::kw_volatile:
1172 case tok::kw_while:
1173 return true;
1174 default:
1175 return Tok.is(tok::identifier) &&
1176 CSharpExtraKeywords.find(Tok.Tok.getIdentifierInfo()) ==
1177 CSharpExtraKeywords.end();
1178 }
1179 }
1180
1181private:
1182 /// The JavaScript keywords beyond the C++ keyword set.
1183 std::unordered_set<IdentifierInfo *> JsExtraKeywords;
1184
1185 /// The C# keywords beyond the C++ keyword set
1186 std::unordered_set<IdentifierInfo *> CSharpExtraKeywords;
1187};
1188
1189} // namespace format
1190} // namespace clang
1191
1192#endif

/build/llvm-toolchain-snapshot-14~++20210903100615+fd66b44ec19e/clang/include/clang/Lex/Token.h

1//===--- Token.h - Token interface ------------------------------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file defines the Token interface.
10//
11//===----------------------------------------------------------------------===//
12
13#ifndef LLVM_CLANG_LEX_TOKEN_H
14#define LLVM_CLANG_LEX_TOKEN_H
15
16#include "clang/Basic/SourceLocation.h"
17#include "clang/Basic/TokenKinds.h"
18#include "llvm/ADT/StringRef.h"
19#include <cassert>
20
21namespace clang {
22
23class IdentifierInfo;
24
25/// Token - This structure provides full information about a lexed token.
26/// It is not intended to be space efficient, it is intended to return as much
27/// information as possible about each returned token. This is expected to be
28/// compressed into a smaller form if memory footprint is important.
29///
30/// The parser can create a special "annotation token" representing a stream of
31/// tokens that were parsed and semantically resolved, e.g.: "foo::MyClass<int>"
32/// can be represented by a single typename annotation token that carries
33/// information about the SourceRange of the tokens and the type object.
34class Token {
35 /// The location of the token. This is actually a SourceLocation.
36 SourceLocation::UIntTy Loc;
37
38 // Conceptually these next two fields could be in a union. However, this
39 // causes gcc 4.2 to pessimize LexTokenInternal, a very performance critical
40 // routine. Keeping as separate members with casts until a more beautiful fix
41 // presents itself.
42
43 /// UintData - This holds either the length of the token text, when
44 /// a normal token, or the end of the SourceRange when an annotation
45 /// token.
46 SourceLocation::UIntTy UintData;
47
48 /// PtrData - This is a union of four different pointer types, which depends
49 /// on what type of token this is:
50 /// Identifiers, keywords, etc:
51 /// This is an IdentifierInfo*, which contains the uniqued identifier
52 /// spelling.
53 /// Literals: isLiteral() returns true.
54 /// This is a pointer to the start of the token in a text buffer, which
55 /// may be dirty (have trigraphs / escaped newlines).
56 /// Annotations (resolved type names, C++ scopes, etc): isAnnotation().
57 /// This is a pointer to sema-specific data for the annotation token.
58 /// Eof:
59 // This is a pointer to a Decl.
60 /// Other:
61 /// This is null.
62 void *PtrData;
63
64 /// Kind - The actual flavor of token this is.
65 tok::TokenKind Kind;
66
67 /// Flags - Bits we track about this token, members of the TokenFlags enum.
68 unsigned short Flags;
69
70public:
71 // Various flags set per token:
72 enum TokenFlags {
73 StartOfLine = 0x01, // At start of line or only after whitespace
74 // (considering the line after macro expansion).
75 LeadingSpace = 0x02, // Whitespace exists before this token (considering
76 // whitespace after macro expansion).
77 DisableExpand = 0x04, // This identifier may never be macro expanded.
78 NeedsCleaning = 0x08, // Contained an escaped newline or trigraph.
79 LeadingEmptyMacro = 0x10, // Empty macro exists before this token.
80 HasUDSuffix = 0x20, // This string or character literal has a ud-suffix.
81 HasUCN = 0x40, // This identifier contains a UCN.
82 IgnoredComma = 0x80, // This comma is not a macro argument separator (MS).
83 StringifiedInMacro = 0x100, // This string or character literal is formed by
84 // macro stringizing or charizing operator.
85 CommaAfterElided = 0x200, // The comma following this token was elided (MS).
86 IsEditorPlaceholder = 0x400, // This identifier is a placeholder.
87 IsReinjected = 0x800, // A phase 4 token that was produced before and
88 // re-added, e.g. via EnterTokenStream. Annotation
89 // tokens are *not* reinjected.
90 };
91
92 tok::TokenKind getKind() const { return Kind; }
93 void setKind(tok::TokenKind K) { Kind = K; }
94
95 /// is/isNot - Predicates to check if this token is a specific kind, as in
96 /// "if (Tok.is(tok::l_brace)) {...}".
97 bool is(tok::TokenKind K) const { return Kind == K; }
13
Assuming 'K' is not equal to field 'Kind'
14
Returning zero, which participates in a condition later
98 bool isNot(tok::TokenKind K) const { return Kind != K; }
99 bool isOneOf(tok::TokenKind K1, tok::TokenKind K2) const {
100 return is(K1) || is(K2);
101 }
102 template <typename... Ts>
103 bool isOneOf(tok::TokenKind K1, tok::TokenKind K2, Ts... Ks) const {
104 return is(K1) || isOneOf(K2, Ks...);
105 }
106
107 /// Return true if this is a raw identifier (when lexing
108 /// in raw mode) or a non-keyword identifier (when lexing in non-raw mode).
109 bool isAnyIdentifier() const {
110 return tok::isAnyIdentifier(getKind());
111 }
112
113 /// Return true if this is a "literal", like a numeric
114 /// constant, string, etc.
115 bool isLiteral() const {
116 return tok::isLiteral(getKind());
117 }
118
119 /// Return true if this is any of tok::annot_* kind tokens.
120 bool isAnnotation() const {
121 return tok::isAnnotation(getKind());
122 }
123
124 /// Return a source location identifier for the specified
125 /// offset in the current file.
126 SourceLocation getLocation() const {
127 return SourceLocation::getFromRawEncoding(Loc);
128 }
129 unsigned getLength() const {
130 assert(!isAnnotation() && "Annotation tokens have no length field")(static_cast<void> (0));
131 return UintData;
132 }
133
134 void setLocation(SourceLocation L) { Loc = L.getRawEncoding(); }
135 void setLength(unsigned Len) {
136 assert(!isAnnotation() && "Annotation tokens have no length field")(static_cast<void> (0));
137 UintData = Len;
138 }
139
140 SourceLocation getAnnotationEndLoc() const {
141 assert(isAnnotation() && "Used AnnotEndLocID on non-annotation token")(static_cast<void> (0));
142 return SourceLocation::getFromRawEncoding(UintData ? UintData : Loc);
143 }
144 void setAnnotationEndLoc(SourceLocation L) {
145 assert(isAnnotation() && "Used AnnotEndLocID on non-annotation token")(static_cast<void> (0));
146 UintData = L.getRawEncoding();
147 }
148
149 SourceLocation getLastLoc() const {
150 return isAnnotation() ? getAnnotationEndLoc() : getLocation();
151 }
152
153 SourceLocation getEndLoc() const {
154 return isAnnotation() ? getAnnotationEndLoc()
155 : getLocation().getLocWithOffset(getLength());
156 }
157
158 /// SourceRange of the group of tokens that this annotation token
159 /// represents.
160 SourceRange getAnnotationRange() const {
161 return SourceRange(getLocation(), getAnnotationEndLoc());
162 }
163 void setAnnotationRange(SourceRange R) {
164 setLocation(R.getBegin());
165 setAnnotationEndLoc(R.getEnd());
166 }
167
168 const char *getName() const { return tok::getTokenName(Kind); }
169
170 /// Reset all flags to cleared.
171 void startToken() {
172 Kind = tok::unknown;
173 Flags = 0;
174 PtrData = nullptr;
175 UintData = 0;
176 Loc = SourceLocation().getRawEncoding();
177 }
178
179 IdentifierInfo *getIdentifierInfo() const {
180 assert(isNot(tok::raw_identifier) &&(static_cast<void> (0))
181 "getIdentifierInfo() on a tok::raw_identifier token!")(static_cast<void> (0));
182 assert(!isAnnotation() &&(static_cast<void> (0))
183 "getIdentifierInfo() on an annotation token!")(static_cast<void> (0));
184 if (isLiteral()) return nullptr;
185 if (is(tok::eof)) return nullptr;
186 return (IdentifierInfo*) PtrData;
187 }
188 void setIdentifierInfo(IdentifierInfo *II) {
189 PtrData = (void*) II;
190 }
191
192 const void *getEofData() const {
193 assert(is(tok::eof))(static_cast<void> (0));
194 return reinterpret_cast<const void *>(PtrData);
195 }
196 void setEofData(const void *D) {
197 assert(is(tok::eof))(static_cast<void> (0));
198 assert(!PtrData)(static_cast<void> (0));
199 PtrData = const_cast<void *>(D);
200 }
201
202 /// getRawIdentifier - For a raw identifier token (i.e., an identifier
203 /// lexed in raw mode), returns a reference to the text substring in the
204 /// buffer if known.
205 StringRef getRawIdentifier() const {
206 assert(is(tok::raw_identifier))(static_cast<void> (0));
207 return StringRef(reinterpret_cast<const char *>(PtrData), getLength());
208 }
209 void setRawIdentifierData(const char *Ptr) {
210 assert(is(tok::raw_identifier))(static_cast<void> (0));
211 PtrData = const_cast<char*>(Ptr);
212 }
213
214 /// getLiteralData - For a literal token (numeric constant, string, etc), this
215 /// returns a pointer to the start of it in the text buffer if known, null
216 /// otherwise.
217 const char *getLiteralData() const {
218 assert(isLiteral() && "Cannot get literal data of non-literal")(static_cast<void> (0));
219 return reinterpret_cast<const char*>(PtrData);
220 }
221 void setLiteralData(const char *Ptr) {
222 assert(isLiteral() && "Cannot set literal data of non-literal")(static_cast<void> (0));
223 PtrData = const_cast<char*>(Ptr);
224 }
225
226 void *getAnnotationValue() const {
227 assert(isAnnotation() && "Used AnnotVal on non-annotation token")(static_cast<void> (0));
228 return PtrData;
229 }
230 void setAnnotationValue(void *val) {
231 assert(isAnnotation() && "Used AnnotVal on non-annotation token")(static_cast<void> (0));
232 PtrData = val;
233 }
234
235 /// Set the specified flag.
236 void setFlag(TokenFlags Flag) {
237 Flags |= Flag;
238 }
239
240 /// Get the specified flag.
241 bool getFlag(TokenFlags Flag) const {
242 return (Flags & Flag) != 0;
243 }
244
245 /// Unset the specified flag.
246 void clearFlag(TokenFlags Flag) {
247 Flags &= ~Flag;
248 }
249
250 /// Return the internal represtation of the flags.
251 ///
252 /// This is only intended for low-level operations such as writing tokens to
253 /// disk.
254 unsigned getFlags() const {
255 return Flags;
256 }
257
258 /// Set a flag to either true or false.
259 void setFlagValue(TokenFlags Flag, bool Val) {
260 if (Val)
261 setFlag(Flag);
262 else
263 clearFlag(Flag);
264 }
265
266 /// isAtStartOfLine - Return true if this token is at the start of a line.
267 ///
268 bool isAtStartOfLine() const { return getFlag(StartOfLine); }
269
270 /// Return true if this token has whitespace before it.
271 ///
272 bool hasLeadingSpace() const { return getFlag(LeadingSpace); }
273
274 /// Return true if this identifier token should never
275 /// be expanded in the future, due to C99 6.10.3.4p2.
276 bool isExpandDisabled() const { return getFlag(DisableExpand); }
277
278 /// Return true if we have an ObjC keyword identifier.
279 bool isObjCAtKeyword(tok::ObjCKeywordKind objcKey) const;
280
281 /// Return the ObjC keyword kind.
282 tok::ObjCKeywordKind getObjCKeywordID() const;
283
284 /// Return true if this token has trigraphs or escaped newlines in it.
285 bool needsCleaning() const { return getFlag(NeedsCleaning); }
286
287 /// Return true if this token has an empty macro before it.
288 ///
289 bool hasLeadingEmptyMacro() const { return getFlag(LeadingEmptyMacro); }
290
291 /// Return true if this token is a string or character literal which
292 /// has a ud-suffix.
293 bool hasUDSuffix() const { return getFlag(HasUDSuffix); }
294
295 /// Returns true if this token contains a universal character name.
296 bool hasUCN() const { return getFlag(HasUCN); }
297
298 /// Returns true if this token is formed by macro by stringizing or charizing
299 /// operator.
300 bool stringifiedInMacro() const { return getFlag(StringifiedInMacro); }
301
302 /// Returns true if the comma after this token was elided.
303 bool commaAfterElided() const { return getFlag(CommaAfterElided); }
304
305 /// Returns true if this token is an editor placeholder.
306 ///
307 /// Editor placeholders are produced by the code-completion engine and are
308 /// represented as characters between '<#' and '#>' in the source code. The
309 /// lexer uses identifier tokens to represent placeholders.
310 bool isEditorPlaceholder() const { return getFlag(IsEditorPlaceholder); }
311};
312
313/// Information about the conditional stack (\#if directives)
314/// currently active.
315struct PPConditionalInfo {
316 /// Location where the conditional started.
317 SourceLocation IfLoc;
318
319 /// True if this was contained in a skipping directive, e.g.,
320 /// in a "\#if 0" block.
321 bool WasSkipping;
322
323 /// True if we have emitted tokens already, and now we're in
324 /// an \#else block or something. Only useful in Skipping blocks.
325 bool FoundNonSkip;
326
327 /// True if we've seen a \#else in this block. If so,
328 /// \#elif/\#else directives are not allowed.
329 bool FoundElse;
330};
331
332} // end namespace clang
333
334#endif // LLVM_CLANG_LEX_TOKEN_H