Bug Summary

File:clang/lib/Format/FormatToken.cpp
Warning:line 186, column 12
Access to field 'HasUnescapedNewline' results in a dereference of a null pointer (loaded from variable 'ItemBegin')

Annotated Source Code

Press '?' to see keyboard shortcuts

clang -cc1 -triple x86_64-pc-linux-gnu -analyze -disable-free -disable-llvm-verifier -discard-value-names -main-file-name FormatToken.cpp -analyzer-store=region -analyzer-opt-analyze-nested-blocks -analyzer-checker=core -analyzer-checker=apiModeling -analyzer-checker=unix -analyzer-checker=deadcode -analyzer-checker=cplusplus -analyzer-checker=security.insecureAPI.UncheckedReturn -analyzer-checker=security.insecureAPI.getpw -analyzer-checker=security.insecureAPI.gets -analyzer-checker=security.insecureAPI.mktemp -analyzer-checker=security.insecureAPI.mkstemp -analyzer-checker=security.insecureAPI.vfork -analyzer-checker=nullability.NullPassedToNonnull -analyzer-checker=nullability.NullReturnedFromNonnull -analyzer-output plist -w -setup-static-analyzer -analyzer-config-compatibility-mode=true -mrelocation-model pic -pic-level 2 -mthread-model posix -mframe-pointer=none -relaxed-aliasing -fmath-errno -fno-rounding-math -masm-verbose -mconstructor-aliases -munwind-tables -target-cpu x86-64 -dwarf-column-info -fno-split-dwarf-inlining -debugger-tuning=gdb -ffunction-sections -fdata-sections -resource-dir /usr/lib/llvm-11/lib/clang/11.0.0 -D CLANG_VENDOR="Debian " -D _DEBUG -D _GNU_SOURCE -D __STDC_CONSTANT_MACROS -D __STDC_FORMAT_MACROS -D __STDC_LIMIT_MACROS -I /build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/build-llvm/tools/clang/lib/Format -I /build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/clang/lib/Format -I /build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/clang/include -I /build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/build-llvm/tools/clang/include -I /build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/build-llvm/include -I /build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/include -U NDEBUG -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/6.3.0/../../../../include/c++/6.3.0 -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/6.3.0/../../../../include/x86_64-linux-gnu/c++/6.3.0 -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/6.3.0/../../../../include/x86_64-linux-gnu/c++/6.3.0 -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/6.3.0/../../../../include/c++/6.3.0/backward -internal-isystem /usr/local/include -internal-isystem /usr/lib/llvm-11/lib/clang/11.0.0/include -internal-externc-isystem /usr/include/x86_64-linux-gnu -internal-externc-isystem /include -internal-externc-isystem /usr/include -O2 -Wno-unused-parameter -Wwrite-strings -Wno-missing-field-initializers -Wno-long-long -Wno-maybe-uninitialized -Wno-comment -std=c++14 -fdeprecated-macro -fdebug-compilation-dir /build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/build-llvm/tools/clang/lib/Format -fdebug-prefix-map=/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347=. -ferror-limit 19 -fmessage-length 0 -fvisibility-inlines-hidden -stack-protector 2 -fgnuc-version=4.2.1 -fobjc-runtime=gcc -fno-common -fdiagnostics-show-option -vectorize-loops -vectorize-slp -analyzer-output=html -analyzer-config stable-report-filename=true -faddrsig -o /tmp/scan-build-2020-03-09-184146-41876-1 -x c++ /build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/clang/lib/Format/FormatToken.cpp

/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/clang/lib/Format/FormatToken.cpp

1//===--- FormatToken.cpp - Format C++ code --------------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8///
9/// \file
10/// This file implements specific functions of \c FormatTokens and their
11/// roles.
12///
13//===----------------------------------------------------------------------===//
14
15#include "FormatToken.h"
16#include "ContinuationIndenter.h"
17#include "llvm/ADT/SmallVector.h"
18#include "llvm/Support/Debug.h"
19#include <climits>
20
21namespace clang {
22namespace format {
23
24const char *getTokenTypeName(TokenType Type) {
25 static const char *const TokNames[] = {
26#define TYPE(X) #X,
27 LIST_TOKEN_TYPESTYPE(ArrayInitializerLSquare) TYPE(ArraySubscriptLSquare) TYPE
(AttributeColon) TYPE(AttributeParen) TYPE(AttributeSquare) TYPE
(BinaryOperator) TYPE(BitFieldColon) TYPE(BlockComment) TYPE(
CastRParen) TYPE(ConditionalExpr) TYPE(ConflictAlternative) TYPE
(ConflictEnd) TYPE(ConflictStart) TYPE(CtorInitializerColon) TYPE
(CtorInitializerComma) TYPE(DesignatedInitializerLSquare) TYPE
(DesignatedInitializerPeriod) TYPE(DictLiteral) TYPE(ForEachMacro
) TYPE(FunctionAnnotationRParen) TYPE(FunctionDeclarationName
) TYPE(FunctionLBrace) TYPE(FunctionTypeLParen) TYPE(ImplicitStringLiteral
) TYPE(InheritanceColon) TYPE(InheritanceComma) TYPE(InlineASMBrace
) TYPE(InlineASMColon) TYPE(InlineASMSymbolicNameLSquare) TYPE
(JavaAnnotation) TYPE(JsComputedPropertyName) TYPE(JsExponentiation
) TYPE(JsExponentiationEqual) TYPE(JsFatArrow) TYPE(JsNonNullAssertion
) TYPE(JsNullishCoalescingOperator) TYPE(JsNullPropagatingOperator
) TYPE(JsPrivateIdentifier) TYPE(JsTypeColon) TYPE(JsTypeOperator
) TYPE(JsTypeOptionalQuestion) TYPE(LambdaArrow) TYPE(LambdaLBrace
) TYPE(LambdaLSquare) TYPE(LeadingJavaAnnotation) TYPE(LineComment
) TYPE(MacroBlockBegin) TYPE(MacroBlockEnd) TYPE(NamespaceMacro
) TYPE(ObjCBlockLBrace) TYPE(ObjCBlockLParen) TYPE(ObjCDecl) TYPE
(ObjCForIn) TYPE(ObjCMethodExpr) TYPE(ObjCMethodSpecifier) TYPE
(ObjCProperty) TYPE(ObjCStringLiteral) TYPE(OverloadedOperator
) TYPE(OverloadedOperatorLParen) TYPE(PointerOrReference) TYPE
(PureVirtualSpecifier) TYPE(RangeBasedForLoopColon) TYPE(RegexLiteral
) TYPE(SelectorName) TYPE(StartOfName) TYPE(StatementMacro) TYPE
(StructuredBindingLSquare) TYPE(TemplateCloser) TYPE(TemplateOpener
) TYPE(TemplateString) TYPE(ProtoExtensionLSquare) TYPE(TrailingAnnotation
) TYPE(TrailingReturnArrow) TYPE(TrailingUnaryOperator) TYPE(
TypenameMacro) TYPE(UnaryOperator) TYPE(CSharpStringLiteral) TYPE
(CSharpNamedArgumentColon) TYPE(CSharpNullable) TYPE(CSharpNullCoalescing
) TYPE(CSharpNullConditional) TYPE(CSharpNullConditionalLSquare
) TYPE(Unknown)
28#undef TYPE
29 nullptr};
30
31 if (Type < NUM_TOKEN_TYPES)
32 return TokNames[Type];
33 llvm_unreachable("unknown TokenType")::llvm::llvm_unreachable_internal("unknown TokenType", "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/clang/lib/Format/FormatToken.cpp"
, 33)
;
34 return nullptr;
35}
36
37// FIXME: This is copy&pasted from Sema. Put it in a common place and remove
38// duplication.
39bool FormatToken::isSimpleTypeSpecifier() const {
40 switch (Tok.getKind()) {
41 case tok::kw_short:
42 case tok::kw_long:
43 case tok::kw___int64:
44 case tok::kw___int128:
45 case tok::kw_signed:
46 case tok::kw_unsigned:
47 case tok::kw_void:
48 case tok::kw_char:
49 case tok::kw_int:
50 case tok::kw_half:
51 case tok::kw_float:
52 case tok::kw_double:
53 case tok::kw__Float16:
54 case tok::kw___float128:
55 case tok::kw_wchar_t:
56 case tok::kw_bool:
57 case tok::kw___underlying_type:
58 case tok::annot_typename:
59 case tok::kw_char8_t:
60 case tok::kw_char16_t:
61 case tok::kw_char32_t:
62 case tok::kw_typeof:
63 case tok::kw_decltype:
64 return true;
65 default:
66 return false;
67 }
68}
69
70TokenRole::~TokenRole() {}
71
72void TokenRole::precomputeFormattingInfos(const FormatToken *Token) {}
73
74unsigned CommaSeparatedList::formatAfterToken(LineState &State,
75 ContinuationIndenter *Indenter,
76 bool DryRun) {
77 if (State.NextToken == nullptr || !State.NextToken->Previous)
78 return 0;
79
80 if (Formats.size() == 1)
81 return 0; // Handled by formatFromToken
82
83 // Ensure that we start on the opening brace.
84 const FormatToken *LBrace =
85 State.NextToken->Previous->getPreviousNonComment();
86 if (!LBrace || !LBrace->isOneOf(tok::l_brace, TT_ArrayInitializerLSquare) ||
87 LBrace->BlockKind == BK_Block || LBrace->Type == TT_DictLiteral ||
88 LBrace->Next->Type == TT_DesignatedInitializerPeriod)
89 return 0;
90
91 // Calculate the number of code points we have to format this list. As the
92 // first token is already placed, we have to subtract it.
93 unsigned RemainingCodePoints =
94 Style.ColumnLimit - State.Column + State.NextToken->Previous->ColumnWidth;
95
96 // Find the best ColumnFormat, i.e. the best number of columns to use.
97 const ColumnFormat *Format = getColumnFormat(RemainingCodePoints);
98
99 // If no ColumnFormat can be used, the braced list would generally be
100 // bin-packed. Add a severe penalty to this so that column layouts are
101 // preferred if possible.
102 if (!Format)
103 return 10000;
104
105 // Format the entire list.
106 unsigned Penalty = 0;
107 unsigned Column = 0;
108 unsigned Item = 0;
109 while (State.NextToken != LBrace->MatchingParen) {
110 bool NewLine = false;
111 unsigned ExtraSpaces = 0;
112
113 // If the previous token was one of our commas, we are now on the next item.
114 if (Item < Commas.size() && State.NextToken->Previous == Commas[Item]) {
115 if (!State.NextToken->isTrailingComment()) {
116 ExtraSpaces += Format->ColumnSizes[Column] - ItemLengths[Item];
117 ++Column;
118 }
119 ++Item;
120 }
121
122 if (Column == Format->Columns || State.NextToken->MustBreakBefore) {
123 Column = 0;
124 NewLine = true;
125 }
126
127 // Place token using the continuation indenter and store the penalty.
128 Penalty += Indenter->addTokenToState(State, NewLine, DryRun, ExtraSpaces);
129 }
130 return Penalty;
131}
132
133unsigned CommaSeparatedList::formatFromToken(LineState &State,
134 ContinuationIndenter *Indenter,
135 bool DryRun) {
136 // Formatting with 1 Column isn't really a column layout, so we don't need the
137 // special logic here. We can just avoid bin packing any of the parameters.
138 if (Formats.size() == 1 || HasNestedBracedList)
139 State.Stack.back().AvoidBinPacking = true;
140 return 0;
141}
142
143// Returns the lengths in code points between Begin and End (both included),
144// assuming that the entire sequence is put on a single line.
145static unsigned CodePointsBetween(const FormatToken *Begin,
146 const FormatToken *End) {
147 assert(End->TotalLength >= Begin->TotalLength)((End->TotalLength >= Begin->TotalLength) ? static_cast
<void> (0) : __assert_fail ("End->TotalLength >= Begin->TotalLength"
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/clang/lib/Format/FormatToken.cpp"
, 147, __PRETTY_FUNCTION__))
;
148 return End->TotalLength - Begin->TotalLength + Begin->ColumnWidth;
149}
150
151void CommaSeparatedList::precomputeFormattingInfos(const FormatToken *Token) {
152 // FIXME: At some point we might want to do this for other lists, too.
153 if (!Token->MatchingParen ||
1
Assuming field 'MatchingParen' is non-null
5
Taking false branch
154 !Token->isOneOf(tok::l_brace, TT_ArrayInitializerLSquare))
2
Calling 'FormatToken::isOneOf'
4
Returning from 'FormatToken::isOneOf'
155 return;
156
157 // In C++11 braced list style, we should not format in columns unless they
158 // have many items (20 or more) or we allow bin-packing of function call
159 // arguments.
160 if (Style.Cpp11BracedListStyle && !Style.BinPackArguments &&
6
Assuming field 'Cpp11BracedListStyle' is false
161 Commas.size() < 19)
162 return;
163
164 // Limit column layout for JavaScript array initializers to 20 or more items
165 // for now to introduce it carefully. We can become more aggressive if this
166 // necessary.
167 if (Token->is(TT_ArrayInitializerLSquare) && Commas.size() < 19)
7
Calling 'FormatToken::is'
9
Returning from 'FormatToken::is'
10
Assuming the condition is false
11
Taking false branch
168 return;
169
170 // Column format doesn't really make sense if we don't align after brackets.
171 if (Style.AlignAfterOpenBracket == FormatStyle::BAS_DontAlign)
12
Assuming field 'AlignAfterOpenBracket' is not equal to BAS_DontAlign
13
Taking false branch
172 return;
173
174 FormatToken *ItemBegin = Token->Next;
175 while (ItemBegin->isTrailingComment())
14
Loop condition is false. Execution continues on line 177
176 ItemBegin = ItemBegin->Next;
177 SmallVector<bool, 8> MustBreakBeforeItem;
178
179 // The lengths of an item if it is put at the end of the line. This includes
180 // trailing comments which are otherwise ignored for column alignment.
181 SmallVector<unsigned, 8> EndOfLineItemLength;
182
183 bool HasSeparatingComment = false;
184 for (unsigned i = 0, e = Commas.size() + 1; i
14.1
'i' is not equal to 'e'
21.1
'i' is not equal to 'e'
14.1
'i' is not equal to 'e'
21.1
'i' is not equal to 'e'
!= e; ++i) {
15
Loop condition is true. Entering loop body
22
Loop condition is true. Entering loop body
185 // Skip comments on their own line.
186 while (ItemBegin->HasUnescapedNewline && ItemBegin->isTrailingComment()) {
16
Assuming field 'HasUnescapedNewline' is false
23
Access to field 'HasUnescapedNewline' results in a dereference of a null pointer (loaded from variable 'ItemBegin')
187 ItemBegin = ItemBegin->Next;
188 HasSeparatingComment = i > 0;
189 }
190
191 MustBreakBeforeItem.push_back(ItemBegin->MustBreakBefore);
192 if (ItemBegin->is(tok::l_brace))
17
Taking false branch
193 HasNestedBracedList = true;
194 const FormatToken *ItemEnd = nullptr;
195 if (i == Commas.size()) {
18
Taking false branch
196 ItemEnd = Token->MatchingParen;
197 const FormatToken *NonCommentEnd = ItemEnd->getPreviousNonComment();
198 ItemLengths.push_back(CodePointsBetween(ItemBegin, NonCommentEnd));
199 if (Style.Cpp11BracedListStyle &&
200 !ItemEnd->Previous->isTrailingComment()) {
201 // In Cpp11 braced list style, the } and possibly other subsequent
202 // tokens will need to stay on a line with the last element.
203 while (ItemEnd->Next && !ItemEnd->Next->CanBreakBefore)
204 ItemEnd = ItemEnd->Next;
205 } else {
206 // In other braced lists styles, the "}" can be wrapped to the new line.
207 ItemEnd = Token->MatchingParen->Previous;
208 }
209 } else {
210 ItemEnd = Commas[i];
211 // The comma is counted as part of the item when calculating the length.
212 ItemLengths.push_back(CodePointsBetween(ItemBegin, ItemEnd));
213
214 // Consume trailing comments so the are included in EndOfLineItemLength.
215 if (ItemEnd->Next && !ItemEnd->Next->HasUnescapedNewline &&
19
Assuming field 'Next' is null
216 ItemEnd->Next->isTrailingComment())
217 ItemEnd = ItemEnd->Next;
218 }
219 EndOfLineItemLength.push_back(CodePointsBetween(ItemBegin, ItemEnd));
220 // If there is a trailing comma in the list, the next item will start at the
221 // closing brace. Don't create an extra item for this.
222 if (ItemEnd->getNextNonComment() == Token->MatchingParen)
20
Taking false branch
223 break;
224 ItemBegin = ItemEnd->Next;
21
Null pointer value stored to 'ItemBegin'
225 }
226
227 // Don't use column layout for lists with few elements and in presence of
228 // separating comments.
229 if (Commas.size() < 5 || HasSeparatingComment)
230 return;
231
232 if (Token->NestingLevel != 0 && Token->is(tok::l_brace) && Commas.size() < 19)
233 return;
234
235 // We can never place more than ColumnLimit / 3 items in a row (because of the
236 // spaces and the comma).
237 unsigned MaxItems = Style.ColumnLimit / 3;
238 std::vector<unsigned> MinSizeInColumn;
239 MinSizeInColumn.reserve(MaxItems);
240 for (unsigned Columns = 1; Columns <= MaxItems; ++Columns) {
241 ColumnFormat Format;
242 Format.Columns = Columns;
243 Format.ColumnSizes.resize(Columns);
244 MinSizeInColumn.assign(Columns, UINT_MAX(2147483647 *2U +1U));
245 Format.LineCount = 1;
246 bool HasRowWithSufficientColumns = false;
247 unsigned Column = 0;
248 for (unsigned i = 0, e = ItemLengths.size(); i != e; ++i) {
249 assert(i < MustBreakBeforeItem.size())((i < MustBreakBeforeItem.size()) ? static_cast<void>
(0) : __assert_fail ("i < MustBreakBeforeItem.size()", "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/clang/lib/Format/FormatToken.cpp"
, 249, __PRETTY_FUNCTION__))
;
250 if (MustBreakBeforeItem[i] || Column == Columns) {
251 ++Format.LineCount;
252 Column = 0;
253 }
254 if (Column == Columns - 1)
255 HasRowWithSufficientColumns = true;
256 unsigned Length =
257 (Column == Columns - 1) ? EndOfLineItemLength[i] : ItemLengths[i];
258 Format.ColumnSizes[Column] = std::max(Format.ColumnSizes[Column], Length);
259 MinSizeInColumn[Column] = std::min(MinSizeInColumn[Column], Length);
260 ++Column;
261 }
262 // If all rows are terminated early (e.g. by trailing comments), we don't
263 // need to look further.
264 if (!HasRowWithSufficientColumns)
265 break;
266 Format.TotalWidth = Columns - 1; // Width of the N-1 spaces.
267
268 for (unsigned i = 0; i < Columns; ++i)
269 Format.TotalWidth += Format.ColumnSizes[i];
270
271 // Don't use this Format, if the difference between the longest and shortest
272 // element in a column exceeds a threshold to avoid excessive spaces.
273 if ([&] {
274 for (unsigned i = 0; i < Columns - 1; ++i)
275 if (Format.ColumnSizes[i] - MinSizeInColumn[i] > 10)
276 return true;
277 return false;
278 }())
279 continue;
280
281 // Ignore layouts that are bound to violate the column limit.
282 if (Format.TotalWidth > Style.ColumnLimit && Columns > 1)
283 continue;
284
285 Formats.push_back(Format);
286 }
287}
288
289const CommaSeparatedList::ColumnFormat *
290CommaSeparatedList::getColumnFormat(unsigned RemainingCharacters) const {
291 const ColumnFormat *BestFormat = nullptr;
292 for (SmallVector<ColumnFormat, 4>::const_reverse_iterator
293 I = Formats.rbegin(),
294 E = Formats.rend();
295 I != E; ++I) {
296 if (I->TotalWidth <= RemainingCharacters || I->Columns == 1) {
297 if (BestFormat && I->LineCount > BestFormat->LineCount)
298 break;
299 BestFormat = &*I;
300 }
301 }
302 return BestFormat;
303}
304
305} // namespace format
306} // namespace clang

/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/clang/lib/Format/FormatToken.h

1//===--- FormatToken.h - Format C++ code ------------------------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8///
9/// \file
10/// This file contains the declaration of the FormatToken, a wrapper
11/// around Token with additional information related to formatting.
12///
13//===----------------------------------------------------------------------===//
14
15#ifndef LLVM_CLANG_LIB_FORMAT_FORMATTOKEN_H
16#define LLVM_CLANG_LIB_FORMAT_FORMATTOKEN_H
17
18#include "clang/Basic/IdentifierTable.h"
19#include "clang/Basic/OperatorPrecedence.h"
20#include "clang/Format/Format.h"
21#include "clang/Lex/Lexer.h"
22#include <memory>
23#include <unordered_set>
24
25namespace clang {
26namespace format {
27
28#define LIST_TOKEN_TYPESTYPE(ArrayInitializerLSquare) TYPE(ArraySubscriptLSquare) TYPE
(AttributeColon) TYPE(AttributeParen) TYPE(AttributeSquare) TYPE
(BinaryOperator) TYPE(BitFieldColon) TYPE(BlockComment) TYPE(
CastRParen) TYPE(ConditionalExpr) TYPE(ConflictAlternative) TYPE
(ConflictEnd) TYPE(ConflictStart) TYPE(CtorInitializerColon) TYPE
(CtorInitializerComma) TYPE(DesignatedInitializerLSquare) TYPE
(DesignatedInitializerPeriod) TYPE(DictLiteral) TYPE(ForEachMacro
) TYPE(FunctionAnnotationRParen) TYPE(FunctionDeclarationName
) TYPE(FunctionLBrace) TYPE(FunctionTypeLParen) TYPE(ImplicitStringLiteral
) TYPE(InheritanceColon) TYPE(InheritanceComma) TYPE(InlineASMBrace
) TYPE(InlineASMColon) TYPE(InlineASMSymbolicNameLSquare) TYPE
(JavaAnnotation) TYPE(JsComputedPropertyName) TYPE(JsExponentiation
) TYPE(JsExponentiationEqual) TYPE(JsFatArrow) TYPE(JsNonNullAssertion
) TYPE(JsNullishCoalescingOperator) TYPE(JsNullPropagatingOperator
) TYPE(JsPrivateIdentifier) TYPE(JsTypeColon) TYPE(JsTypeOperator
) TYPE(JsTypeOptionalQuestion) TYPE(LambdaArrow) TYPE(LambdaLBrace
) TYPE(LambdaLSquare) TYPE(LeadingJavaAnnotation) TYPE(LineComment
) TYPE(MacroBlockBegin) TYPE(MacroBlockEnd) TYPE(NamespaceMacro
) TYPE(ObjCBlockLBrace) TYPE(ObjCBlockLParen) TYPE(ObjCDecl) TYPE
(ObjCForIn) TYPE(ObjCMethodExpr) TYPE(ObjCMethodSpecifier) TYPE
(ObjCProperty) TYPE(ObjCStringLiteral) TYPE(OverloadedOperator
) TYPE(OverloadedOperatorLParen) TYPE(PointerOrReference) TYPE
(PureVirtualSpecifier) TYPE(RangeBasedForLoopColon) TYPE(RegexLiteral
) TYPE(SelectorName) TYPE(StartOfName) TYPE(StatementMacro) TYPE
(StructuredBindingLSquare) TYPE(TemplateCloser) TYPE(TemplateOpener
) TYPE(TemplateString) TYPE(ProtoExtensionLSquare) TYPE(TrailingAnnotation
) TYPE(TrailingReturnArrow) TYPE(TrailingUnaryOperator) TYPE(
TypenameMacro) TYPE(UnaryOperator) TYPE(CSharpStringLiteral) TYPE
(CSharpNamedArgumentColon) TYPE(CSharpNullable) TYPE(CSharpNullCoalescing
) TYPE(CSharpNullConditional) TYPE(CSharpNullConditionalLSquare
) TYPE(Unknown)
\
29 TYPE(ArrayInitializerLSquare) \
30 TYPE(ArraySubscriptLSquare) \
31 TYPE(AttributeColon) \
32 TYPE(AttributeParen) \
33 TYPE(AttributeSquare) \
34 TYPE(BinaryOperator) \
35 TYPE(BitFieldColon) \
36 TYPE(BlockComment) \
37 TYPE(CastRParen) \
38 TYPE(ConditionalExpr) \
39 TYPE(ConflictAlternative) \
40 TYPE(ConflictEnd) \
41 TYPE(ConflictStart) \
42 TYPE(CtorInitializerColon) \
43 TYPE(CtorInitializerComma) \
44 TYPE(DesignatedInitializerLSquare) \
45 TYPE(DesignatedInitializerPeriod) \
46 TYPE(DictLiteral) \
47 TYPE(ForEachMacro) \
48 TYPE(FunctionAnnotationRParen) \
49 TYPE(FunctionDeclarationName) \
50 TYPE(FunctionLBrace) \
51 TYPE(FunctionTypeLParen) \
52 TYPE(ImplicitStringLiteral) \
53 TYPE(InheritanceColon) \
54 TYPE(InheritanceComma) \
55 TYPE(InlineASMBrace) \
56 TYPE(InlineASMColon) \
57 TYPE(InlineASMSymbolicNameLSquare) \
58 TYPE(JavaAnnotation) \
59 TYPE(JsComputedPropertyName) \
60 TYPE(JsExponentiation) \
61 TYPE(JsExponentiationEqual) \
62 TYPE(JsFatArrow) \
63 TYPE(JsNonNullAssertion) \
64 TYPE(JsNullishCoalescingOperator) \
65 TYPE(JsNullPropagatingOperator) \
66 TYPE(JsPrivateIdentifier) \
67 TYPE(JsTypeColon) \
68 TYPE(JsTypeOperator) \
69 TYPE(JsTypeOptionalQuestion) \
70 TYPE(LambdaArrow) \
71 TYPE(LambdaLBrace) \
72 TYPE(LambdaLSquare) \
73 TYPE(LeadingJavaAnnotation) \
74 TYPE(LineComment) \
75 TYPE(MacroBlockBegin) \
76 TYPE(MacroBlockEnd) \
77 TYPE(NamespaceMacro) \
78 TYPE(ObjCBlockLBrace) \
79 TYPE(ObjCBlockLParen) \
80 TYPE(ObjCDecl) \
81 TYPE(ObjCForIn) \
82 TYPE(ObjCMethodExpr) \
83 TYPE(ObjCMethodSpecifier) \
84 TYPE(ObjCProperty) \
85 TYPE(ObjCStringLiteral) \
86 TYPE(OverloadedOperator) \
87 TYPE(OverloadedOperatorLParen) \
88 TYPE(PointerOrReference) \
89 TYPE(PureVirtualSpecifier) \
90 TYPE(RangeBasedForLoopColon) \
91 TYPE(RegexLiteral) \
92 TYPE(SelectorName) \
93 TYPE(StartOfName) \
94 TYPE(StatementMacro) \
95 TYPE(StructuredBindingLSquare) \
96 TYPE(TemplateCloser) \
97 TYPE(TemplateOpener) \
98 TYPE(TemplateString) \
99 TYPE(ProtoExtensionLSquare) \
100 TYPE(TrailingAnnotation) \
101 TYPE(TrailingReturnArrow) \
102 TYPE(TrailingUnaryOperator) \
103 TYPE(TypenameMacro) \
104 TYPE(UnaryOperator) \
105 TYPE(CSharpStringLiteral) \
106 TYPE(CSharpNamedArgumentColon) \
107 TYPE(CSharpNullable) \
108 TYPE(CSharpNullCoalescing) \
109 TYPE(CSharpNullConditional) \
110 TYPE(CSharpNullConditionalLSquare) \
111 TYPE(Unknown)
112
113enum TokenType {
114#define TYPE(X) TT_##X,
115 LIST_TOKEN_TYPESTYPE(ArrayInitializerLSquare) TYPE(ArraySubscriptLSquare) TYPE
(AttributeColon) TYPE(AttributeParen) TYPE(AttributeSquare) TYPE
(BinaryOperator) TYPE(BitFieldColon) TYPE(BlockComment) TYPE(
CastRParen) TYPE(ConditionalExpr) TYPE(ConflictAlternative) TYPE
(ConflictEnd) TYPE(ConflictStart) TYPE(CtorInitializerColon) TYPE
(CtorInitializerComma) TYPE(DesignatedInitializerLSquare) TYPE
(DesignatedInitializerPeriod) TYPE(DictLiteral) TYPE(ForEachMacro
) TYPE(FunctionAnnotationRParen) TYPE(FunctionDeclarationName
) TYPE(FunctionLBrace) TYPE(FunctionTypeLParen) TYPE(ImplicitStringLiteral
) TYPE(InheritanceColon) TYPE(InheritanceComma) TYPE(InlineASMBrace
) TYPE(InlineASMColon) TYPE(InlineASMSymbolicNameLSquare) TYPE
(JavaAnnotation) TYPE(JsComputedPropertyName) TYPE(JsExponentiation
) TYPE(JsExponentiationEqual) TYPE(JsFatArrow) TYPE(JsNonNullAssertion
) TYPE(JsNullishCoalescingOperator) TYPE(JsNullPropagatingOperator
) TYPE(JsPrivateIdentifier) TYPE(JsTypeColon) TYPE(JsTypeOperator
) TYPE(JsTypeOptionalQuestion) TYPE(LambdaArrow) TYPE(LambdaLBrace
) TYPE(LambdaLSquare) TYPE(LeadingJavaAnnotation) TYPE(LineComment
) TYPE(MacroBlockBegin) TYPE(MacroBlockEnd) TYPE(NamespaceMacro
) TYPE(ObjCBlockLBrace) TYPE(ObjCBlockLParen) TYPE(ObjCDecl) TYPE
(ObjCForIn) TYPE(ObjCMethodExpr) TYPE(ObjCMethodSpecifier) TYPE
(ObjCProperty) TYPE(ObjCStringLiteral) TYPE(OverloadedOperator
) TYPE(OverloadedOperatorLParen) TYPE(PointerOrReference) TYPE
(PureVirtualSpecifier) TYPE(RangeBasedForLoopColon) TYPE(RegexLiteral
) TYPE(SelectorName) TYPE(StartOfName) TYPE(StatementMacro) TYPE
(StructuredBindingLSquare) TYPE(TemplateCloser) TYPE(TemplateOpener
) TYPE(TemplateString) TYPE(ProtoExtensionLSquare) TYPE(TrailingAnnotation
) TYPE(TrailingReturnArrow) TYPE(TrailingUnaryOperator) TYPE(
TypenameMacro) TYPE(UnaryOperator) TYPE(CSharpStringLiteral) TYPE
(CSharpNamedArgumentColon) TYPE(CSharpNullable) TYPE(CSharpNullCoalescing
) TYPE(CSharpNullConditional) TYPE(CSharpNullConditionalLSquare
) TYPE(Unknown)
116#undef TYPE
117 NUM_TOKEN_TYPES
118};
119
120/// Determines the name of a token type.
121const char *getTokenTypeName(TokenType Type);
122
123// Represents what type of block a set of braces open.
124enum BraceBlockKind { BK_Unknown, BK_Block, BK_BracedInit };
125
126// The packing kind of a function's parameters.
127enum ParameterPackingKind { PPK_BinPacked, PPK_OnePerLine, PPK_Inconclusive };
128
129enum FormatDecision { FD_Unformatted, FD_Continue, FD_Break };
130
131class TokenRole;
132class AnnotatedLine;
133
134/// A wrapper around a \c Token storing information about the
135/// whitespace characters preceding it.
136struct FormatToken {
137 FormatToken() {}
138
139 /// The \c Token.
140 Token Tok;
141
142 /// The number of newlines immediately before the \c Token.
143 ///
144 /// This can be used to determine what the user wrote in the original code
145 /// and thereby e.g. leave an empty line between two function definitions.
146 unsigned NewlinesBefore = 0;
147
148 /// Whether there is at least one unescaped newline before the \c
149 /// Token.
150 bool HasUnescapedNewline = false;
151
152 /// The range of the whitespace immediately preceding the \c Token.
153 SourceRange WhitespaceRange;
154
155 /// The offset just past the last '\n' in this token's leading
156 /// whitespace (relative to \c WhiteSpaceStart). 0 if there is no '\n'.
157 unsigned LastNewlineOffset = 0;
158
159 /// The width of the non-whitespace parts of the token (or its first
160 /// line for multi-line tokens) in columns.
161 /// We need this to correctly measure number of columns a token spans.
162 unsigned ColumnWidth = 0;
163
164 /// Contains the width in columns of the last line of a multi-line
165 /// token.
166 unsigned LastLineColumnWidth = 0;
167
168 /// Whether the token text contains newlines (escaped or not).
169 bool IsMultiline = false;
170
171 /// Indicates that this is the first token of the file.
172 bool IsFirst = false;
173
174 /// Whether there must be a line break before this token.
175 ///
176 /// This happens for example when a preprocessor directive ended directly
177 /// before the token.
178 bool MustBreakBefore = false;
179
180 /// The raw text of the token.
181 ///
182 /// Contains the raw token text without leading whitespace and without leading
183 /// escaped newlines.
184 StringRef TokenText;
185
186 /// Set to \c true if this token is an unterminated literal.
187 bool IsUnterminatedLiteral = 0;
188
189 /// Contains the kind of block if this token is a brace.
190 BraceBlockKind BlockKind = BK_Unknown;
191
192 TokenType Type = TT_Unknown;
193
194 /// The number of spaces that should be inserted before this token.
195 unsigned SpacesRequiredBefore = 0;
196
197 /// \c true if it is allowed to break before this token.
198 bool CanBreakBefore = false;
199
200 /// \c true if this is the ">" of "template<..>".
201 bool ClosesTemplateDeclaration = false;
202
203 /// Number of parameters, if this is "(", "[" or "<".
204 unsigned ParameterCount = 0;
205
206 /// Number of parameters that are nested blocks,
207 /// if this is "(", "[" or "<".
208 unsigned BlockParameterCount = 0;
209
210 /// If this is a bracket ("<", "(", "[" or "{"), contains the kind of
211 /// the surrounding bracket.
212 tok::TokenKind ParentBracket = tok::unknown;
213
214 /// A token can have a special role that can carry extra information
215 /// about the token's formatting.
216 std::unique_ptr<TokenRole> Role;
217
218 /// If this is an opening parenthesis, how are the parameters packed?
219 ParameterPackingKind PackingKind = PPK_Inconclusive;
220
221 /// The total length of the unwrapped line up to and including this
222 /// token.
223 unsigned TotalLength = 0;
224
225 /// The original 0-based column of this token, including expanded tabs.
226 /// The configured TabWidth is used as tab width.
227 unsigned OriginalColumn = 0;
228
229 /// The length of following tokens until the next natural split point,
230 /// or the next token that can be broken.
231 unsigned UnbreakableTailLength = 0;
232
233 // FIXME: Come up with a 'cleaner' concept.
234 /// The binding strength of a token. This is a combined value of
235 /// operator precedence, parenthesis nesting, etc.
236 unsigned BindingStrength = 0;
237
238 /// The nesting level of this token, i.e. the number of surrounding (),
239 /// [], {} or <>.
240 unsigned NestingLevel = 0;
241
242 /// The indent level of this token. Copied from the surrounding line.
243 unsigned IndentLevel = 0;
244
245 /// Penalty for inserting a line break before this token.
246 unsigned SplitPenalty = 0;
247
248 /// If this is the first ObjC selector name in an ObjC method
249 /// definition or call, this contains the length of the longest name.
250 ///
251 /// This being set to 0 means that the selectors should not be colon-aligned,
252 /// e.g. because several of them are block-type.
253 unsigned LongestObjCSelectorName = 0;
254
255 /// If this is the first ObjC selector name in an ObjC method
256 /// definition or call, this contains the number of parts that the whole
257 /// selector consist of.
258 unsigned ObjCSelectorNameParts = 0;
259
260 /// The 0-based index of the parameter/argument. For ObjC it is set
261 /// for the selector name token.
262 /// For now calculated only for ObjC.
263 unsigned ParameterIndex = 0;
264
265 /// Stores the number of required fake parentheses and the
266 /// corresponding operator precedence.
267 ///
268 /// If multiple fake parentheses start at a token, this vector stores them in
269 /// reverse order, i.e. inner fake parenthesis first.
270 SmallVector<prec::Level, 4> FakeLParens;
271 /// Insert this many fake ) after this token for correct indentation.
272 unsigned FakeRParens = 0;
273
274 /// \c true if this token starts a binary expression, i.e. has at least
275 /// one fake l_paren with a precedence greater than prec::Unknown.
276 bool StartsBinaryExpression = false;
277 /// \c true if this token ends a binary expression.
278 bool EndsBinaryExpression = false;
279
280 /// If this is an operator (or "."/"->") in a sequence of operators
281 /// with the same precedence, contains the 0-based operator index.
282 unsigned OperatorIndex = 0;
283
284 /// If this is an operator (or "."/"->") in a sequence of operators
285 /// with the same precedence, points to the next operator.
286 FormatToken *NextOperator = nullptr;
287
288 /// Is this token part of a \c DeclStmt defining multiple variables?
289 ///
290 /// Only set if \c Type == \c TT_StartOfName.
291 bool PartOfMultiVariableDeclStmt = false;
292
293 /// Does this line comment continue a line comment section?
294 ///
295 /// Only set to true if \c Type == \c TT_LineComment.
296 bool ContinuesLineCommentSection = false;
297
298 /// If this is a bracket, this points to the matching one.
299 FormatToken *MatchingParen = nullptr;
300
301 /// The previous token in the unwrapped line.
302 FormatToken *Previous = nullptr;
303
304 /// The next token in the unwrapped line.
305 FormatToken *Next = nullptr;
306
307 /// If this token starts a block, this contains all the unwrapped lines
308 /// in it.
309 SmallVector<AnnotatedLine *, 1> Children;
310
311 /// Stores the formatting decision for the token once it was made.
312 FormatDecision Decision = FD_Unformatted;
313
314 /// If \c true, this token has been fully formatted (indented and
315 /// potentially re-formatted inside), and we do not allow further formatting
316 /// changes.
317 bool Finalized = false;
318
319 bool is(tok::TokenKind Kind) const { return Tok.is(Kind); }
320 bool is(TokenType TT) const { return Type == TT; }
8
Returning the value 1, which participates in a condition later
321 bool is(const IdentifierInfo *II) const {
322 return II && II == Tok.getIdentifierInfo();
323 }
324 bool is(tok::PPKeywordKind Kind) const {
325 return Tok.getIdentifierInfo() &&
326 Tok.getIdentifierInfo()->getPPKeywordID() == Kind;
327 }
328 template <typename A, typename B> bool isOneOf(A K1, B K2) const {
329 return is(K1) || is(K2);
3
Returning the value 1, which participates in a condition later
330 }
331 template <typename A, typename B, typename... Ts>
332 bool isOneOf(A K1, B K2, Ts... Ks) const {
333 return is(K1) || isOneOf(K2, Ks...);
334 }
335 template <typename T> bool isNot(T Kind) const { return !is(Kind); }
336
337 bool isIf(bool AllowConstexprMacro = true) const {
338 return is(tok::kw_if) || endsSequence(tok::kw_constexpr, tok::kw_if) ||
339 (endsSequence(tok::identifier, tok::kw_if) && AllowConstexprMacro);
340 }
341
342 bool closesScopeAfterBlock() const {
343 if (BlockKind == BK_Block)
344 return true;
345 if (closesScope())
346 return Previous->closesScopeAfterBlock();
347 return false;
348 }
349
350 /// \c true if this token starts a sequence with the given tokens in order,
351 /// following the ``Next`` pointers, ignoring comments.
352 template <typename A, typename... Ts>
353 bool startsSequence(A K1, Ts... Tokens) const {
354 return startsSequenceInternal(K1, Tokens...);
355 }
356
357 /// \c true if this token ends a sequence with the given tokens in order,
358 /// following the ``Previous`` pointers, ignoring comments.
359 /// For example, given tokens [T1, T2, T3], the function returns true if
360 /// 3 tokens ending at this (ignoring comments) are [T3, T2, T1]. In other
361 /// words, the tokens passed to this function need to the reverse of the
362 /// order the tokens appear in code.
363 template <typename A, typename... Ts>
364 bool endsSequence(A K1, Ts... Tokens) const {
365 return endsSequenceInternal(K1, Tokens...);
366 }
367
368 bool isStringLiteral() const { return tok::isStringLiteral(Tok.getKind()); }
369
370 bool isObjCAtKeyword(tok::ObjCKeywordKind Kind) const {
371 return Tok.isObjCAtKeyword(Kind);
372 }
373
374 bool isAccessSpecifier(bool ColonRequired = true) const {
375 return isOneOf(tok::kw_public, tok::kw_protected, tok::kw_private) &&
376 (!ColonRequired || (Next && Next->is(tok::colon)));
377 }
378
379 /// Determine whether the token is a simple-type-specifier.
380 bool isSimpleTypeSpecifier() const;
381
382 bool isObjCAccessSpecifier() const {
383 return is(tok::at) && Next &&
384 (Next->isObjCAtKeyword(tok::objc_public) ||
385 Next->isObjCAtKeyword(tok::objc_protected) ||
386 Next->isObjCAtKeyword(tok::objc_package) ||
387 Next->isObjCAtKeyword(tok::objc_private));
388 }
389
390 /// Returns whether \p Tok is ([{ or an opening < of a template or in
391 /// protos.
392 bool opensScope() const {
393 if (is(TT_TemplateString) && TokenText.endswith("${"))
394 return true;
395 if (is(TT_DictLiteral) && is(tok::less))
396 return true;
397 return isOneOf(tok::l_paren, tok::l_brace, tok::l_square,
398 TT_TemplateOpener);
399 }
400 /// Returns whether \p Tok is )]} or a closing > of a template or in
401 /// protos.
402 bool closesScope() const {
403 if (is(TT_TemplateString) && TokenText.startswith("}"))
404 return true;
405 if (is(TT_DictLiteral) && is(tok::greater))
406 return true;
407 return isOneOf(tok::r_paren, tok::r_brace, tok::r_square,
408 TT_TemplateCloser);
409 }
410
411 /// Returns \c true if this is a "." or "->" accessing a member.
412 bool isMemberAccess() const {
413 return isOneOf(tok::arrow, tok::period, tok::arrowstar) &&
414 !isOneOf(TT_DesignatedInitializerPeriod, TT_TrailingReturnArrow,
415 TT_LambdaArrow, TT_LeadingJavaAnnotation);
416 }
417
418 bool isUnaryOperator() const {
419 switch (Tok.getKind()) {
420 case tok::plus:
421 case tok::plusplus:
422 case tok::minus:
423 case tok::minusminus:
424 case tok::exclaim:
425 case tok::tilde:
426 case tok::kw_sizeof:
427 case tok::kw_alignof:
428 return true;
429 default:
430 return false;
431 }
432 }
433
434 bool isBinaryOperator() const {
435 // Comma is a binary operator, but does not behave as such wrt. formatting.
436 return getPrecedence() > prec::Comma;
437 }
438
439 bool isTrailingComment() const {
440 return is(tok::comment) &&
441 (is(TT_LineComment) || !Next || Next->NewlinesBefore > 0);
442 }
443
444 /// Returns \c true if this is a keyword that can be used
445 /// like a function call (e.g. sizeof, typeid, ...).
446 bool isFunctionLikeKeyword() const {
447 switch (Tok.getKind()) {
448 case tok::kw_throw:
449 case tok::kw_typeid:
450 case tok::kw_return:
451 case tok::kw_sizeof:
452 case tok::kw_alignof:
453 case tok::kw_alignas:
454 case tok::kw_decltype:
455 case tok::kw_noexcept:
456 case tok::kw_static_assert:
457 case tok::kw___attribute:
458 return true;
459 default:
460 return false;
461 }
462 }
463
464 /// Returns \c true if this is a string literal that's like a label,
465 /// e.g. ends with "=" or ":".
466 bool isLabelString() const {
467 if (!is(tok::string_literal))
468 return false;
469 StringRef Content = TokenText;
470 if (Content.startswith("\"") || Content.startswith("'"))
471 Content = Content.drop_front(1);
472 if (Content.endswith("\"") || Content.endswith("'"))
473 Content = Content.drop_back(1);
474 Content = Content.trim();
475 return Content.size() > 1 &&
476 (Content.back() == ':' || Content.back() == '=');
477 }
478
479 /// Returns actual token start location without leading escaped
480 /// newlines and whitespace.
481 ///
482 /// This can be different to Tok.getLocation(), which includes leading escaped
483 /// newlines.
484 SourceLocation getStartOfNonWhitespace() const {
485 return WhitespaceRange.getEnd();
486 }
487
488 prec::Level getPrecedence() const {
489 return getBinOpPrecedence(Tok.getKind(), /*GreaterThanIsOperator=*/true,
490 /*CPlusPlus11=*/true);
491 }
492
493 /// Returns the previous token ignoring comments.
494 FormatToken *getPreviousNonComment() const {
495 FormatToken *Tok = Previous;
496 while (Tok && Tok->is(tok::comment))
497 Tok = Tok->Previous;
498 return Tok;
499 }
500
501 /// Returns the next token ignoring comments.
502 const FormatToken *getNextNonComment() const {
503 const FormatToken *Tok = Next;
504 while (Tok && Tok->is(tok::comment))
505 Tok = Tok->Next;
506 return Tok;
507 }
508
509 /// Returns \c true if this tokens starts a block-type list, i.e. a
510 /// list that should be indented with a block indent.
511 bool opensBlockOrBlockTypeList(const FormatStyle &Style) const {
512 if (is(TT_TemplateString) && opensScope())
513 return true;
514 return is(TT_ArrayInitializerLSquare) || is(TT_ProtoExtensionLSquare) ||
515 (is(tok::l_brace) &&
516 (BlockKind == BK_Block || is(TT_DictLiteral) ||
517 (!Style.Cpp11BracedListStyle && NestingLevel == 0))) ||
518 (is(tok::less) && (Style.Language == FormatStyle::LK_Proto ||
519 Style.Language == FormatStyle::LK_TextProto));
520 }
521
522 /// Returns whether the token is the left square bracket of a C++
523 /// structured binding declaration.
524 bool isCppStructuredBinding(const FormatStyle &Style) const {
525 if (!Style.isCpp() || isNot(tok::l_square))
526 return false;
527 const FormatToken *T = this;
528 do {
529 T = T->getPreviousNonComment();
530 } while (T && T->isOneOf(tok::kw_const, tok::kw_volatile, tok::amp,
531 tok::ampamp));
532 return T && T->is(tok::kw_auto);
533 }
534
535 /// Same as opensBlockOrBlockTypeList, but for the closing token.
536 bool closesBlockOrBlockTypeList(const FormatStyle &Style) const {
537 if (is(TT_TemplateString) && closesScope())
538 return true;
539 return MatchingParen && MatchingParen->opensBlockOrBlockTypeList(Style);
540 }
541
542 /// Return the actual namespace token, if this token starts a namespace
543 /// block.
544 const FormatToken *getNamespaceToken() const {
545 const FormatToken *NamespaceTok = this;
546 if (is(tok::comment))
547 NamespaceTok = NamespaceTok->getNextNonComment();
548 // Detect "(inline|export)? namespace" in the beginning of a line.
549 if (NamespaceTok && NamespaceTok->isOneOf(tok::kw_inline, tok::kw_export))
550 NamespaceTok = NamespaceTok->getNextNonComment();
551 return NamespaceTok &&
552 NamespaceTok->isOneOf(tok::kw_namespace, TT_NamespaceMacro)
553 ? NamespaceTok
554 : nullptr;
555 }
556
557private:
558 // Disallow copying.
559 FormatToken(const FormatToken &) = delete;
560 void operator=(const FormatToken &) = delete;
561
562 template <typename A, typename... Ts>
563 bool startsSequenceInternal(A K1, Ts... Tokens) const {
564 if (is(tok::comment) && Next)
565 return Next->startsSequenceInternal(K1, Tokens...);
566 return is(K1) && Next && Next->startsSequenceInternal(Tokens...);
567 }
568
569 template <typename A> bool startsSequenceInternal(A K1) const {
570 if (is(tok::comment) && Next)
571 return Next->startsSequenceInternal(K1);
572 return is(K1);
573 }
574
575 template <typename A, typename... Ts> bool endsSequenceInternal(A K1) const {
576 if (is(tok::comment) && Previous)
577 return Previous->endsSequenceInternal(K1);
578 return is(K1);
579 }
580
581 template <typename A, typename... Ts>
582 bool endsSequenceInternal(A K1, Ts... Tokens) const {
583 if (is(tok::comment) && Previous)
584 return Previous->endsSequenceInternal(K1, Tokens...);
585 return is(K1) && Previous && Previous->endsSequenceInternal(Tokens...);
586 }
587};
588
589class ContinuationIndenter;
590struct LineState;
591
592class TokenRole {
593public:
594 TokenRole(const FormatStyle &Style) : Style(Style) {}
595 virtual ~TokenRole();
596
597 /// After the \c TokenAnnotator has finished annotating all the tokens,
598 /// this function precomputes required information for formatting.
599 virtual void precomputeFormattingInfos(const FormatToken *Token);
600
601 /// Apply the special formatting that the given role demands.
602 ///
603 /// Assumes that the token having this role is already formatted.
604 ///
605 /// Continues formatting from \p State leaving indentation to \p Indenter and
606 /// returns the total penalty that this formatting incurs.
607 virtual unsigned formatFromToken(LineState &State,
608 ContinuationIndenter *Indenter,
609 bool DryRun) {
610 return 0;
611 }
612
613 /// Same as \c formatFromToken, but assumes that the first token has
614 /// already been set thereby deciding on the first line break.
615 virtual unsigned formatAfterToken(LineState &State,
616 ContinuationIndenter *Indenter,
617 bool DryRun) {
618 return 0;
619 }
620
621 /// Notifies the \c Role that a comma was found.
622 virtual void CommaFound(const FormatToken *Token) {}
623
624 virtual const FormatToken *lastComma() { return nullptr; }
625
626protected:
627 const FormatStyle &Style;
628};
629
630class CommaSeparatedList : public TokenRole {
631public:
632 CommaSeparatedList(const FormatStyle &Style)
633 : TokenRole(Style), HasNestedBracedList(false) {}
634
635 void precomputeFormattingInfos(const FormatToken *Token) override;
636
637 unsigned formatAfterToken(LineState &State, ContinuationIndenter *Indenter,
638 bool DryRun) override;
639
640 unsigned formatFromToken(LineState &State, ContinuationIndenter *Indenter,
641 bool DryRun) override;
642
643 /// Adds \p Token as the next comma to the \c CommaSeparated list.
644 void CommaFound(const FormatToken *Token) override {
645 Commas.push_back(Token);
646 }
647
648 const FormatToken *lastComma() override {
649 if (Commas.empty())
650 return nullptr;
651 return Commas.back();
652 }
653
654private:
655 /// A struct that holds information on how to format a given list with
656 /// a specific number of columns.
657 struct ColumnFormat {
658 /// The number of columns to use.
659 unsigned Columns;
660
661 /// The total width in characters.
662 unsigned TotalWidth;
663
664 /// The number of lines required for this format.
665 unsigned LineCount;
666
667 /// The size of each column in characters.
668 SmallVector<unsigned, 8> ColumnSizes;
669 };
670
671 /// Calculate which \c ColumnFormat fits best into
672 /// \p RemainingCharacters.
673 const ColumnFormat *getColumnFormat(unsigned RemainingCharacters) const;
674
675 /// The ordered \c FormatTokens making up the commas of this list.
676 SmallVector<const FormatToken *, 8> Commas;
677
678 /// The length of each of the list's items in characters including the
679 /// trailing comma.
680 SmallVector<unsigned, 8> ItemLengths;
681
682 /// Precomputed formats that can be used for this list.
683 SmallVector<ColumnFormat, 4> Formats;
684
685 bool HasNestedBracedList;
686};
687
688/// Encapsulates keywords that are context sensitive or for languages not
689/// properly supported by Clang's lexer.
690struct AdditionalKeywords {
691 AdditionalKeywords(IdentifierTable &IdentTable) {
692 kw_final = &IdentTable.get("final");
693 kw_override = &IdentTable.get("override");
694 kw_in = &IdentTable.get("in");
695 kw_of = &IdentTable.get("of");
696 kw_CF_CLOSED_ENUM = &IdentTable.get("CF_CLOSED_ENUM");
697 kw_CF_ENUM = &IdentTable.get("CF_ENUM");
698 kw_CF_OPTIONS = &IdentTable.get("CF_OPTIONS");
699 kw_NS_CLOSED_ENUM = &IdentTable.get("NS_CLOSED_ENUM");
700 kw_NS_ENUM = &IdentTable.get("NS_ENUM");
701 kw_NS_OPTIONS = &IdentTable.get("NS_OPTIONS");
702
703 kw_as = &IdentTable.get("as");
704 kw_async = &IdentTable.get("async");
705 kw_await = &IdentTable.get("await");
706 kw_declare = &IdentTable.get("declare");
707 kw_finally = &IdentTable.get("finally");
708 kw_from = &IdentTable.get("from");
709 kw_function = &IdentTable.get("function");
710 kw_get = &IdentTable.get("get");
711 kw_import = &IdentTable.get("import");
712 kw_infer = &IdentTable.get("infer");
713 kw_is = &IdentTable.get("is");
714 kw_let = &IdentTable.get("let");
715 kw_module = &IdentTable.get("module");
716 kw_readonly = &IdentTable.get("readonly");
717 kw_set = &IdentTable.get("set");
718 kw_type = &IdentTable.get("type");
719 kw_typeof = &IdentTable.get("typeof");
720 kw_var = &IdentTable.get("var");
721 kw_yield = &IdentTable.get("yield");
722
723 kw_abstract = &IdentTable.get("abstract");
724 kw_assert = &IdentTable.get("assert");
725 kw_extends = &IdentTable.get("extends");
726 kw_implements = &IdentTable.get("implements");
727 kw_instanceof = &IdentTable.get("instanceof");
728 kw_interface = &IdentTable.get("interface");
729 kw_native = &IdentTable.get("native");
730 kw_package = &IdentTable.get("package");
731 kw_synchronized = &IdentTable.get("synchronized");
732 kw_throws = &IdentTable.get("throws");
733 kw___except = &IdentTable.get("__except");
734 kw___has_include = &IdentTable.get("__has_include");
735 kw___has_include_next = &IdentTable.get("__has_include_next");
736
737 kw_mark = &IdentTable.get("mark");
738
739 kw_extend = &IdentTable.get("extend");
740 kw_option = &IdentTable.get("option");
741 kw_optional = &IdentTable.get("optional");
742 kw_repeated = &IdentTable.get("repeated");
743 kw_required = &IdentTable.get("required");
744 kw_returns = &IdentTable.get("returns");
745
746 kw_signals = &IdentTable.get("signals");
747 kw_qsignals = &IdentTable.get("Q_SIGNALS");
748 kw_slots = &IdentTable.get("slots");
749 kw_qslots = &IdentTable.get("Q_SLOTS");
750
751 // C# keywords
752 kw_dollar = &IdentTable.get("dollar");
753 kw_base = &IdentTable.get("base");
754 kw_byte = &IdentTable.get("byte");
755 kw_checked = &IdentTable.get("checked");
756 kw_decimal = &IdentTable.get("decimal");
757 kw_delegate = &IdentTable.get("delegate");
758 kw_event = &IdentTable.get("event");
759 kw_fixed = &IdentTable.get("fixed");
760 kw_foreach = &IdentTable.get("foreach");
761 kw_implicit = &IdentTable.get("implicit");
762 kw_internal = &IdentTable.get("internal");
763 kw_lock = &IdentTable.get("lock");
764 kw_null = &IdentTable.get("null");
765 kw_object = &IdentTable.get("object");
766 kw_out = &IdentTable.get("out");
767 kw_params = &IdentTable.get("params");
768 kw_ref = &IdentTable.get("ref");
769 kw_string = &IdentTable.get("string");
770 kw_stackalloc = &IdentTable.get("stackalloc");
771 kw_sbyte = &IdentTable.get("sbyte");
772 kw_sealed = &IdentTable.get("sealed");
773 kw_uint = &IdentTable.get("uint");
774 kw_ulong = &IdentTable.get("ulong");
775 kw_unchecked = &IdentTable.get("unchecked");
776 kw_unsafe = &IdentTable.get("unsafe");
777 kw_ushort = &IdentTable.get("ushort");
778 kw_when = &IdentTable.get("when");
779
780 // Keep this at the end of the constructor to make sure everything here
781 // is
782 // already initialized.
783 JsExtraKeywords = std::unordered_set<IdentifierInfo *>(
784 {kw_as, kw_async, kw_await, kw_declare, kw_finally, kw_from,
785 kw_function, kw_get, kw_import, kw_is, kw_let, kw_module, kw_readonly,
786 kw_set, kw_type, kw_typeof, kw_var, kw_yield,
787 // Keywords from the Java section.
788 kw_abstract, kw_extends, kw_implements, kw_instanceof, kw_interface});
789
790 CSharpExtraKeywords = std::unordered_set<IdentifierInfo *>(
791 {kw_base, kw_byte, kw_checked, kw_decimal, kw_delegate, kw_event,
792 kw_fixed, kw_foreach, kw_implicit, kw_in, kw_interface, kw_internal,
793 kw_is, kw_lock, kw_null, kw_object, kw_out, kw_override, kw_params,
794 kw_readonly, kw_ref, kw_string, kw_stackalloc, kw_sbyte, kw_sealed,
795 kw_uint, kw_ulong, kw_unchecked, kw_unsafe, kw_ushort, kw_when,
796 // Keywords from the JavaScript section.
797 kw_as, kw_async, kw_await, kw_declare, kw_finally, kw_from,
798 kw_function, kw_get, kw_import, kw_is, kw_let, kw_module, kw_readonly,
799 kw_set, kw_type, kw_typeof, kw_var, kw_yield,
800 // Keywords from the Java section.
801 kw_abstract, kw_extends, kw_implements, kw_instanceof, kw_interface});
802 }
803
804 // Context sensitive keywords.
805 IdentifierInfo *kw_final;
806 IdentifierInfo *kw_override;
807 IdentifierInfo *kw_in;
808 IdentifierInfo *kw_of;
809 IdentifierInfo *kw_CF_CLOSED_ENUM;
810 IdentifierInfo *kw_CF_ENUM;
811 IdentifierInfo *kw_CF_OPTIONS;
812 IdentifierInfo *kw_NS_CLOSED_ENUM;
813 IdentifierInfo *kw_NS_ENUM;
814 IdentifierInfo *kw_NS_OPTIONS;
815 IdentifierInfo *kw___except;
816 IdentifierInfo *kw___has_include;
817 IdentifierInfo *kw___has_include_next;
818
819 // JavaScript keywords.
820 IdentifierInfo *kw_as;
821 IdentifierInfo *kw_async;
822 IdentifierInfo *kw_await;
823 IdentifierInfo *kw_declare;
824 IdentifierInfo *kw_finally;
825 IdentifierInfo *kw_from;
826 IdentifierInfo *kw_function;
827 IdentifierInfo *kw_get;
828 IdentifierInfo *kw_import;
829 IdentifierInfo *kw_infer;
830 IdentifierInfo *kw_is;
831 IdentifierInfo *kw_let;
832 IdentifierInfo *kw_module;
833 IdentifierInfo *kw_readonly;
834 IdentifierInfo *kw_set;
835 IdentifierInfo *kw_type;
836 IdentifierInfo *kw_typeof;
837 IdentifierInfo *kw_var;
838 IdentifierInfo *kw_yield;
839
840 // Java keywords.
841 IdentifierInfo *kw_abstract;
842 IdentifierInfo *kw_assert;
843 IdentifierInfo *kw_extends;
844 IdentifierInfo *kw_implements;
845 IdentifierInfo *kw_instanceof;
846 IdentifierInfo *kw_interface;
847 IdentifierInfo *kw_native;
848 IdentifierInfo *kw_package;
849 IdentifierInfo *kw_synchronized;
850 IdentifierInfo *kw_throws;
851
852 // Pragma keywords.
853 IdentifierInfo *kw_mark;
854
855 // Proto keywords.
856 IdentifierInfo *kw_extend;
857 IdentifierInfo *kw_option;
858 IdentifierInfo *kw_optional;
859 IdentifierInfo *kw_repeated;
860 IdentifierInfo *kw_required;
861 IdentifierInfo *kw_returns;
862
863 // QT keywords.
864 IdentifierInfo *kw_signals;
865 IdentifierInfo *kw_qsignals;
866 IdentifierInfo *kw_slots;
867 IdentifierInfo *kw_qslots;
868
869 // C# keywords
870 IdentifierInfo *kw_dollar;
871 IdentifierInfo *kw_base;
872 IdentifierInfo *kw_byte;
873 IdentifierInfo *kw_checked;
874 IdentifierInfo *kw_decimal;
875 IdentifierInfo *kw_delegate;
876 IdentifierInfo *kw_event;
877 IdentifierInfo *kw_fixed;
878 IdentifierInfo *kw_foreach;
879 IdentifierInfo *kw_implicit;
880 IdentifierInfo *kw_internal;
881
882 IdentifierInfo *kw_lock;
883 IdentifierInfo *kw_null;
884 IdentifierInfo *kw_object;
885 IdentifierInfo *kw_out;
886
887 IdentifierInfo *kw_params;
888
889 IdentifierInfo *kw_ref;
890 IdentifierInfo *kw_string;
891 IdentifierInfo *kw_stackalloc;
892 IdentifierInfo *kw_sbyte;
893 IdentifierInfo *kw_sealed;
894 IdentifierInfo *kw_uint;
895 IdentifierInfo *kw_ulong;
896 IdentifierInfo *kw_unchecked;
897 IdentifierInfo *kw_unsafe;
898 IdentifierInfo *kw_ushort;
899 IdentifierInfo *kw_when;
900
901 /// Returns \c true if \p Tok is a true JavaScript identifier, returns
902 /// \c false if it is a keyword or a pseudo keyword.
903 bool IsJavaScriptIdentifier(const FormatToken &Tok) const {
904 return Tok.is(tok::identifier) &&
905 JsExtraKeywords.find(Tok.Tok.getIdentifierInfo()) ==
906 JsExtraKeywords.end();
907 }
908
909 /// Returns \c true if \p Tok is a C# keyword, returns
910 /// \c false if it is a anything else.
911 bool isCSharpKeyword(const FormatToken &Tok) const {
912 switch (Tok.Tok.getKind()) {
913 case tok::kw_bool:
914 case tok::kw_break:
915 case tok::kw_case:
916 case tok::kw_catch:
917 case tok::kw_char:
918 case tok::kw_class:
919 case tok::kw_const:
920 case tok::kw_continue:
921 case tok::kw_default:
922 case tok::kw_do:
923 case tok::kw_double:
924 case tok::kw_else:
925 case tok::kw_enum:
926 case tok::kw_explicit:
927 case tok::kw_extern:
928 case tok::kw_false:
929 case tok::kw_float:
930 case tok::kw_for:
931 case tok::kw_goto:
932 case tok::kw_if:
933 case tok::kw_int:
934 case tok::kw_long:
935 case tok::kw_namespace:
936 case tok::kw_new:
937 case tok::kw_operator:
938 case tok::kw_private:
939 case tok::kw_protected:
940 case tok::kw_public:
941 case tok::kw_return:
942 case tok::kw_short:
943 case tok::kw_sizeof:
944 case tok::kw_static:
945 case tok::kw_struct:
946 case tok::kw_switch:
947 case tok::kw_this:
948 case tok::kw_throw:
949 case tok::kw_true:
950 case tok::kw_try:
951 case tok::kw_typeof:
952 case tok::kw_using:
953 case tok::kw_virtual:
954 case tok::kw_void:
955 case tok::kw_volatile:
956 case tok::kw_while:
957 return true;
958 default:
959 return Tok.is(tok::identifier) &&
960 CSharpExtraKeywords.find(Tok.Tok.getIdentifierInfo()) ==
961 CSharpExtraKeywords.end();
962 }
963 }
964
965private:
966 /// The JavaScript keywords beyond the C++ keyword set.
967 std::unordered_set<IdentifierInfo *> JsExtraKeywords;
968
969 /// The C# keywords beyond the C++ keyword set
970 std::unordered_set<IdentifierInfo *> CSharpExtraKeywords;
971};
972
973} // namespace format
974} // namespace clang
975
976#endif