21 #include "llvm/Support/Regex.h"
37 Lex->SetKeepWhitespaceMode(
true);
39 for (
const std::string &ForEachMacro :
Style.ForEachMacros)
40 ForEachMacros.push_back(&
IdentTable.get(ForEachMacro));
41 std::sort(ForEachMacros.begin(), ForEachMacros.end());
45 assert(Tokens.empty());
46 assert(FirstInLineIndex == 0);
48 Tokens.push_back(getNextToken());
50 tryParseJSRegexLiteral();
51 handleTemplateStrings();
53 tryMergePreviousTokens();
54 if (Tokens.back()->NewlinesBefore > 0 || Tokens.back()->IsMultiline)
55 FirstInLineIndex = Tokens.size() - 1;
56 }
while (Tokens.back()->Tok.isNot(
tok::eof));
60 void FormatTokenLexer::tryMergePreviousTokens() {
61 if (tryMerge_TMacro())
63 if (tryMergeConflictMarkers())
65 if (tryMergeLessLess())
67 if (tryMergeNSStringLiteral())
71 static const tok::TokenKind JSIdentity[] = {tok::equalequal, tok::equal};
74 static const tok::TokenKind JSShiftEqual[] = {tok::greater, tok::greater,
76 static const tok::TokenKind JSRightArrow[] = {tok::equal, tok::greater};
77 static const tok::TokenKind JSExponentiation[] = {tok::star, tok::star};
82 if (tryMergeTokens(JSIdentity, TT_BinaryOperator))
84 if (tryMergeTokens(JSNotIdentity, TT_BinaryOperator))
86 if (tryMergeTokens(JSShiftEqual, TT_BinaryOperator))
88 if (tryMergeTokens(JSRightArrow, TT_JsFatArrow))
90 if (tryMergeTokens(JSExponentiation, TT_JsExponentiation))
92 if (tryMergeTokens(JSExponentiationEqual, TT_JsExponentiationEqual)) {
93 Tokens.back()->Tok.setKind(tok::starequal);
99 static const tok::TokenKind JavaRightLogicalShift[] = {tok::greater,
102 static const tok::TokenKind JavaRightLogicalShiftAssign[] = {tok::greater,
105 if (tryMergeTokens(JavaRightLogicalShift, TT_BinaryOperator))
107 if (tryMergeTokens(JavaRightLogicalShiftAssign, TT_BinaryOperator))
112 bool FormatTokenLexer::tryMergeNSStringLiteral() {
113 if (Tokens.size() < 2)
115 auto &At = *(Tokens.end() - 2);
116 auto &String = *(Tokens.end() - 1);
117 if (!At->is(tok::at) || !String->is(tok::string_literal))
119 At->Tok.setKind(tok::string_literal);
120 At->TokenText = StringRef(At->TokenText.begin(),
121 String->TokenText.end() - At->TokenText.begin());
122 At->ColumnWidth += String->ColumnWidth;
123 At->Type = TT_ObjCStringLiteral;
124 Tokens.erase(Tokens.end() - 1);
128 bool FormatTokenLexer::tryMergeLessLess() {
130 if (Tokens.size() < 3)
133 bool FourthTokenIsLess =
false;
134 if (Tokens.size() > 3)
135 FourthTokenIsLess = (Tokens.end() - 4)[0]->is(tok::less);
137 auto First = Tokens.end() - 3;
138 if (First[2]->is(tok::less) || First[1]->isNot(tok::less) ||
139 First[0]->isNot(tok::less) || FourthTokenIsLess)
143 if (First[1]->WhitespaceRange.getBegin() !=
144 First[1]->WhitespaceRange.getEnd())
147 First[0]->Tok.setKind(tok::lessless);
148 First[0]->TokenText =
"<<";
149 First[0]->ColumnWidth += 1;
150 Tokens.erase(Tokens.end() - 2);
154 bool FormatTokenLexer::tryMergeTokens(ArrayRef<tok::TokenKind> Kinds,
156 if (Tokens.size() < Kinds.size())
159 SmallVectorImpl<FormatToken *>::const_iterator First =
160 Tokens.end() - Kinds.size();
161 if (!First[0]->is(Kinds[0]))
163 unsigned AddLength = 0;
164 for (
unsigned i = 1; i < Kinds.size(); ++i) {
165 if (!First[i]->is(Kinds[i]) ||
166 First[i]->WhitespaceRange.getBegin() !=
167 First[i]->WhitespaceRange.getEnd())
169 AddLength += First[i]->TokenText.size();
171 Tokens.resize(Tokens.size() - Kinds.size() + 1);
172 First[0]->TokenText = StringRef(First[0]->TokenText.data(),
173 First[0]->TokenText.size() + AddLength);
174 First[0]->ColumnWidth += AddLength;
175 First[0]->Type = NewType;
180 bool FormatTokenLexer::precedesOperand(FormatToken *Tok) {
184 return Tok->isOneOf(tok::period, tok::l_paren, tok::comma, tok::l_brace,
185 tok::r_brace, tok::l_square, tok::semi, tok::exclaim,
186 tok::colon, tok::question, tok::tilde) ||
187 Tok->isOneOf(tok::kw_return, tok::kw_do, tok::kw_case, tok::kw_throw,
188 tok::kw_else, tok::kw_new, tok::kw_delete, tok::kw_void,
190 Tok->isBinaryOperator();
193 bool FormatTokenLexer::canPrecedeRegexLiteral(FormatToken *Prev) {
203 if (Prev->isOneOf(tok::plusplus, tok::minusminus, tok::exclaim))
204 return (Tokens.size() < 3 || precedesOperand(Tokens[Tokens.size() - 3]));
208 if (!precedesOperand(Prev))
218 void FormatTokenLexer::tryParseJSRegexLiteral() {
219 FormatToken *RegexToken = Tokens.back();
220 if (!RegexToken->isOneOf(tok::slash, tok::slashequal))
223 FormatToken *Prev =
nullptr;
224 for (
auto I = Tokens.rbegin() + 1,
E = Tokens.rend();
I !=
E; ++
I) {
227 if ((*I)->isNot(tok::comment)) {
233 if (!canPrecedeRegexLiteral(Prev))
237 const char *
Offset = Lex->getBufferLocation();
238 const char *RegexBegin = Offset - RegexToken->TokenText.size();
239 StringRef
Buffer = Lex->getBuffer();
240 bool InCharacterClass =
false;
241 bool HaveClosingSlash =
false;
242 for (; !HaveClosingSlash && Offset != Buffer.end(); ++
Offset) {
252 InCharacterClass =
true;
255 InCharacterClass =
false;
258 if (!InCharacterClass)
259 HaveClosingSlash =
true;
264 RegexToken->Type = TT_RegexLiteral;
266 RegexToken->Tok.setKind(tok::string_literal);
267 RegexToken->TokenText = StringRef(RegexBegin, Offset - RegexBegin);
268 RegexToken->ColumnWidth = RegexToken->TokenText.size();
270 resetLexer(SourceMgr.
getFileOffset(Lex->getSourceLocation(Offset)));
273 void FormatTokenLexer::handleTemplateStrings() {
274 FormatToken *BacktickToken = Tokens.back();
276 if (BacktickToken->is(tok::l_brace)) {
280 if (BacktickToken->is(tok::r_brace)) {
281 if (StateStack.size() == 1)
287 }
else if (BacktickToken->is(tok::unknown) &&
288 BacktickToken->TokenText ==
"`") {
295 const char *Offset = Lex->getBufferLocation();
296 const char *TmplBegin = Offset - BacktickToken->TokenText.size();
297 for (; Offset != Lex->getBuffer().end(); ++
Offset) {
298 if (Offset[0] ==
'`') {
302 if (Offset[0] ==
'\\') {
304 }
else if (Offset + 1 < Lex->getBuffer().end() && Offset[0] ==
'$' &&
313 StringRef LiteralText(TmplBegin, Offset - TmplBegin + 1);
314 BacktickToken->Type = TT_TemplateString;
315 BacktickToken->Tok.setKind(tok::string_literal);
316 BacktickToken->TokenText = LiteralText;
319 size_t FirstBreak = LiteralText.find(
'\n');
320 StringRef FirstLineText = FirstBreak == StringRef::npos
322 : LiteralText.substr(0, FirstBreak);
324 FirstLineText, BacktickToken->OriginalColumn, Style.
TabWidth, Encoding);
325 size_t LastBreak = LiteralText.rfind(
'\n');
326 if (LastBreak != StringRef::npos) {
327 BacktickToken->IsMultiline =
true;
328 unsigned StartColumn = 0;
330 LiteralText.substr(LastBreak + 1, LiteralText.size()), StartColumn,
334 SourceLocation loc = Offset < Lex->getBuffer().end()
335 ? Lex->getSourceLocation(Offset + 1)
340 bool FormatTokenLexer::tryMerge_TMacro() {
341 if (Tokens.size() < 4)
343 FormatToken *Last = Tokens.back();
344 if (!Last->is(tok::r_paren))
347 FormatToken *String = Tokens[Tokens.size() - 2];
348 if (!String->is(tok::string_literal) || String->IsMultiline)
351 if (!Tokens[Tokens.size() - 3]->is(tok::l_paren))
354 FormatToken *
Macro = Tokens[Tokens.size() - 4];
355 if (Macro->TokenText !=
"_T")
358 const char *Start = Macro->TokenText.data();
359 const char *
End = Last->TokenText.data() + Last->TokenText.size();
360 String->TokenText = StringRef(Start, End - Start);
361 String->IsFirst = Macro->IsFirst;
362 String->LastNewlineOffset = Macro->LastNewlineOffset;
363 String->WhitespaceRange = Macro->WhitespaceRange;
364 String->OriginalColumn = Macro->OriginalColumn;
366 String->TokenText, String->OriginalColumn, Style.
TabWidth, Encoding);
367 String->NewlinesBefore = Macro->NewlinesBefore;
368 String->HasUnescapedNewline = Macro->HasUnescapedNewline;
373 Tokens.back() = String;
377 bool FormatTokenLexer::tryMergeConflictMarkers() {
378 if (Tokens.back()->NewlinesBefore == 0 && Tokens.back()->isNot(
tok::eof))
392 unsigned FirstInLineOffset;
394 Tokens[FirstInLineIndex]->getStartOfNonWhitespace());
395 StringRef Buffer = SourceMgr.
getBuffer(ID)->getBuffer();
397 auto LineOffset = Buffer.rfind(
'\n', FirstInLineOffset);
398 if (LineOffset == StringRef::npos) {
404 auto FirstSpace = Buffer.find_first_of(
" \n", LineOffset);
406 if (FirstSpace == StringRef::npos) {
407 LineStart = Buffer.substr(LineOffset);
409 LineStart = Buffer.substr(LineOffset, FirstSpace - LineOffset);
413 if (LineStart ==
"<<<<<<<" || LineStart ==
">>>>") {
414 Type = TT_ConflictStart;
415 }
else if (LineStart ==
"|||||||" || LineStart ==
"=======" ||
416 LineStart ==
"====") {
417 Type = TT_ConflictAlternative;
418 }
else if (LineStart ==
">>>>>>>" || LineStart ==
"<<<<") {
419 Type = TT_ConflictEnd;
422 if (Type != TT_Unknown) {
423 FormatToken *
Next = Tokens.back();
425 Tokens.resize(FirstInLineIndex + 1);
429 Tokens.back()->Type = Type;
430 Tokens.back()->Tok.setKind(tok::kw___unknown_anytype);
432 Tokens.push_back(Next);
439 FormatToken *FormatTokenLexer::getStashedToken() {
442 StringRef TokenText = FormatTok->
TokenText;
445 FormatTok =
new (Allocator.Allocate()) FormatToken;
446 FormatTok->
Tok = Tok;
447 SourceLocation TokLocation =
458 FormatToken *FormatTokenLexer::getNextToken() {
461 return getStashedToken();
464 FormatTok =
new (Allocator.Allocate()) FormatToken;
465 readRawToken(*FormatTok);
466 SourceLocation WhitespaceStart =
468 FormatTok->
IsFirst = IsFirstToken;
469 IsFirstToken =
false;
472 unsigned WhitespaceLength = TrailingWhitespace;
473 while (FormatTok->
Tok.
is(tok::unknown)) {
475 auto EscapesNewline = [&](
int pos) {
477 if (pos >= 0 && Text[pos] ==
'\r')
484 for (; pos >= 0; --pos, ++count)
485 if (Text[pos] !=
'\\')
491 for (
int i = 0, e = Text.size(); i != e; ++i) {
514 if (i + 1 == e || (Text[i + 1] !=
'\r' && Text[i + 1] !=
'\n'))
515 FormatTok->
Type = TT_ImplicitStringLiteral;
518 FormatTok->
Type = TT_ImplicitStringLiteral;
521 if (FormatTok->
Type == TT_ImplicitStringLiteral)
525 if (FormatTok->
is(TT_ImplicitStringLiteral))
529 readRawToken(*FormatTok);
539 WhitespaceLength += 2;
546 WhitespaceStart, WhitespaceStart.getLocWithOffset(WhitespaceLength));
550 TrailingWhitespace = 0;
551 if (FormatTok->
Tok.
is(tok::comment)) {
553 StringRef UntrimmedText = FormatTok->
TokenText;
555 TrailingWhitespace = UntrimmedText.size() - FormatTok->
TokenText.size();
556 }
else if (FormatTok->
Tok.
is(tok::raw_identifier)) {
557 IdentifierInfo &Info = IdentTable.
get(FormatTok->
TokenText);
561 FormatTok->
isOneOf(tok::kw_struct, tok::kw_union, tok::kw_delete,
566 FormatTok->
isOneOf(tok::kw_struct, tok::kw_union,
571 }
else if (FormatTok->
Tok.
is(tok::greatergreater)) {
576 }
else if (FormatTok->
Tok.
is(tok::lessless)) {
586 size_t FirstNewlinePos = Text.find(
'\n');
587 if (FirstNewlinePos == StringRef::npos) {
598 Text.substr(0, FirstNewlinePos), Column, Style.
TabWidth, Encoding);
603 Text.substr(Text.find_last_of(
'\n') + 1), 0, Style.
TabWidth, Encoding);
608 if (!(Tokens.size() > 0 && Tokens.back()->Tok.getIdentifierInfo() &&
609 Tokens.back()->Tok.getIdentifierInfo()->getPPKeywordID() ==
611 std::find(ForEachMacros.begin(), ForEachMacros.end(),
613 FormatTok->
Type = TT_ForEachMacro;
614 }
else if (FormatTok->
is(tok::identifier)) {
615 if (MacroBlockBeginRegex.match(Text)) {
616 FormatTok->
Type = TT_MacroBlockBegin;
617 }
else if (MacroBlockEndRegex.match(Text)) {
618 FormatTok->
Type = TT_MacroBlockEnd;
626 void FormatTokenLexer::readRawToken(FormatToken &Tok) {
627 Lex->LexFromRawLexer(Tok.Tok);
628 Tok.TokenText = StringRef(SourceMgr.
getCharacterData(Tok.Tok.getLocation()),
629 Tok.Tok.getLength());
632 if (Tok.is(tok::unknown)) {
633 if (!Tok.TokenText.empty() && Tok.TokenText[0] ==
'"') {
634 Tok.Tok.setKind(tok::string_literal);
635 Tok.IsUnterminatedLiteral =
true;
637 Tok.TokenText ==
"''") {
638 Tok.Tok.setKind(tok::string_literal);
643 Tok.is(tok::char_constant)) {
644 Tok.Tok.setKind(tok::string_literal);
647 if (Tok.is(tok::comment) && (Tok.TokenText ==
"// clang-format on" ||
648 Tok.TokenText ==
"/* clang-format on */")) {
649 FormattingDisabled =
false;
652 Tok.Finalized = FormattingDisabled;
654 if (Tok.is(tok::comment) && (Tok.TokenText ==
"// clang-format off" ||
655 Tok.TokenText ==
"/* clang-format off */")) {
656 FormattingDisabled =
true;
660 void FormatTokenLexer::resetLexer(
unsigned Offset) {
664 Buffer.begin() +
Offset, Buffer.end()));
665 Lex->SetKeepWhitespaceMode(
true);
666 TrailingWhitespace = 0;
Lexer - This provides a simple interface that turns a text buffer into a stream of tokens...
Defines the SourceManager interface.
const char * getCharacterData(SourceLocation SL, bool *Invalid=nullptr) const
Return a pointer to the start of the specified location in the appropriate spelling MemoryBuffer...
llvm::MemoryBuffer * getBuffer(FileID FID, SourceLocation Loc, bool *Invalid=nullptr) const
Return the buffer for the specified FileID.
std::unique_ptr< llvm::MemoryBuffer > Buffer
StringRef getBufferData(FileID FID, bool *Invalid=nullptr) const
Return a StringRef to the source buffer data for the specified FileID.
void setKind(tok::TokenKind K)
SourceLocation getLocWithOffset(int Offset) const
Return a source location with the specified offset from this SourceLocation.
detail::InMemoryDirectory::const_iterator I
SourceLocation getLocForEndOfFile(FileID FID) const
Return the source location corresponding to the last byte of the specified file.
SourceLocation getLocation() const
Return a source location identifier for the specified offset in the current file. ...
The l-value was considered opaque, so the alignment was determined from a type.
IdentifierInfo & get(StringRef Name)
Return the identifier token info for the specified named identifier.
void setIdentifierInfo(IdentifierInfo *II)
TokenKind
Provides a simple uniform namespace for tokens from all C languages.
bool is(tok::TokenKind K) const
is/isNot - Predicates to check if this token is a specific kind, as in "if (Tok.is(tok::l_brace)) {...
An opaque identifier used by SourceManager which refers to a source file (MemoryBuffer) along with it...
detail::InMemoryDirectory::const_iterator E
Defines the clang::SourceLocation class and associated facilities.
unsigned getLength() const
SourceLocation getLocForStartOfFile(FileID FID) const
Return the source location corresponding to the first byte of the specified file. ...
unsigned getFileOffset(SourceLocation SpellingLoc) const
Returns the offset from the start of the file that the specified SourceLocation represents.
void setLocation(SourceLocation L)
std::pair< FileID, unsigned > getDecomposedLoc(SourceLocation Loc) const
Decompose the specified location into a raw FileID + Offset pair.
This class handles loading and caching of source files into memory.
IdentifierInfo * getIdentifierInfo() const