30 #include "llvm/ADT/None.h" 31 #include "llvm/ADT/Optional.h" 32 #include "llvm/ADT/StringExtras.h" 33 #include "llvm/ADT/StringSwitch.h" 34 #include "llvm/ADT/StringRef.h" 35 #include "llvm/Support/Compiler.h" 36 #include "llvm/Support/ConvertUTF.h" 37 #include "llvm/Support/MathExtras.h" 38 #include "llvm/Support/MemoryBuffer.h" 39 #include "llvm/Support/NativeFormatting.h" 40 #include "llvm/Support/UnicodeCharRanges.h" 50 using namespace clang;
61 return II->getObjCKeywordID() == objcKey;
68 return tok::objc_not_keyword;
77 void Lexer::anchor() {}
79 void Lexer::InitLexer(
const char *BufStart,
const char *BufPtr,
81 BufferStart = BufStart;
85 assert(BufEnd[0] == 0 &&
86 "We assume that the input buffer has a null character at the end" 87 " to simplify lexing!");
92 if (BufferStart == BufferPtr) {
94 StringRef Buf(BufferStart, BufferEnd - BufferStart);
95 size_t BOMLength = llvm::StringSwitch<size_t>(Buf)
96 .StartsWith(
"\xEF\xBB\xBF", 3)
100 BufferPtr += BOMLength;
103 Is_PragmaLexer =
false;
104 CurrentConflictMarkerState =
CMK_None;
107 IsAtStartOfLine =
true;
108 IsAtPhysicalStartOfLine =
true;
110 HasLeadingSpace =
false;
111 HasLeadingEmptyMacro =
false;
114 ParsingPreprocessorDirective =
false;
117 ParsingFilename =
false;
123 LexingRawMode =
false;
126 ExtendedTokenMode = 0;
135 FileLoc(PP.getSourceManager().getLocForStartOfFile(FID)),
136 LangOpts(PP.getLangOpts()) {
137 InitLexer(InputFile->getBufferStart(), InputFile->getBufferStart(),
138 InputFile->getBufferEnd());
147 const char *BufStart,
const char *BufPtr,
const char *BufEnd)
148 : FileLoc(fileloc), LangOpts(langOpts) {
149 InitLexer(BufStart, BufPtr, BufEnd);
160 :
Lexer(SM.getLocForStartOfFile(FID), langOpts, FromFile->getBufferStart(),
161 FromFile->getBufferStart(), FromFile->getBufferEnd()) {}
164 assert(
PP &&
"Cannot reset token mode without a preprocessor");
165 if (LangOpts.TraditionalCPP)
194 const llvm::MemoryBuffer *InputFile = SM.
getBuffer(SpellingFID);
195 Lexer *L =
new Lexer(SpellingFID, InputFile, PP);
202 L->BufferPtr = StrData;
203 L->BufferEnd = StrData+TokLen;
204 assert(L->BufferEnd[0] == 0 &&
"Buffer is not nul terminated!");
210 ExpansionLocEnd, TokLen);
217 L->Is_PragmaLexer =
true;
222 typename T::size_type
i = 0, e = Str.size();
224 if (Str[i] ==
'\\' || Str[i] == Quote) {
225 Str.insert(Str.begin() +
i,
'\\');
228 }
else if (Str[i] ==
'\n' || Str[i] ==
'\r') {
230 if ((i < e - 1) && (Str[i + 1] ==
'\n' || Str[i + 1] ==
'\r') &&
231 Str[i] != Str[i + 1]) {
237 Str.insert(Str.begin() + i + 1,
'n');
248 char Quote = Charify ?
'\'' :
'"';
263 assert(Tok.
needsCleaning() &&
"getSpellingSlow called on simple token");
266 const char *BufEnd = BufPtr + Tok.
getLength();
270 while (BufPtr < BufEnd) {
275 if (Spelling[Length - 1] ==
'"')
283 Spelling[Length - 2] ==
'R' && Spelling[Length - 1] ==
'"') {
286 const char *RawEnd = BufEnd;
287 do --RawEnd;
while (*RawEnd !=
'"');
288 size_t RawLength = RawEnd - BufPtr + 1;
291 memcpy(Spelling + Length, BufPtr, RawLength);
299 while (BufPtr < BufEnd) {
306 "NeedsCleaning flag set on token that didn't need cleaning!");
324 bool invalidTemp =
false;
325 StringRef file = SM.
getBufferData(locInfo.first, &invalidTemp);
327 if (invalid) *invalid =
true;
331 const char *tokenBegin = file.data() + locInfo.second;
335 file.begin(), tokenBegin, file.end());
337 lexer.LexFromRawLexer(token);
339 unsigned length = token.getLength();
342 if (!token.needsCleaning())
343 return StringRef(tokenBegin, length);
346 buffer.resize(length);
347 buffer.resize(
getSpellingSlow(token, tokenBegin, options, buffer.data()));
348 return StringRef(buffer.data(), buffer.size());
358 assert((
int)Tok.
getLength() >= 0 &&
"Token character range is bogus!");
360 bool CharDataInvalid =
false;
364 *Invalid = CharDataInvalid;
370 return std::string(TokStart, TokStart + Tok.
getLength());
374 Result.resize(
getSpellingSlow(Tok, TokStart, LangOpts, &*Result.begin()));
391 assert((
int)Tok.
getLength() >= 0 &&
"Token character range is bogus!");
393 const char *TokStart =
nullptr;
395 if (Tok.
is(tok::raw_identifier))
400 Buffer = II->getNameStart();
401 return II->getLength();
411 bool CharDataInvalid =
false;
414 *Invalid = CharDataInvalid;
415 if (CharDataInvalid) {
428 return getSpellingSlow(Tok, TokStart, LangOpts, const_cast<char*>(Buffer));
449 bool IgnoreWhiteSpace) {
460 bool Invalid =
false;
461 StringRef Buffer = SM.
getBufferData(LocInfo.first, &Invalid);
465 const char *StrData = Buffer.data()+LocInfo.second;
472 Buffer.begin(), StrData, Buffer.end());
474 TheLexer.LexFromRawLexer(Result);
481 const char *BufStart = Buffer.data();
482 if (Offset >= Buffer.size())
485 const char *LexStart = BufStart +
Offset;
486 for (; LexStart != BufStart; --LexStart) {
502 if (LocInfo.first.isInvalid())
505 bool Invalid =
false;
506 StringRef Buffer = SM.
getBufferData(LocInfo.first, &Invalid);
512 const char *StrData = Buffer.data() + LocInfo.second;
514 if (!LexStart || LexStart == StrData)
519 Lexer TheLexer(LexerStartLoc, LangOpts, Buffer.data(), LexStart,
526 TheLexer.LexFromRawLexer(TheTok);
528 if (TheLexer.getBufferLocation() > StrData) {
532 if (TheLexer.getBufferLocation() - TheTok.
getLength() <= StrData)
557 std::pair<FileID, unsigned> BeginFileLocInfo =
559 assert(FileLocInfo.first == BeginFileLocInfo.first &&
560 FileLocInfo.second >= BeginFileLocInfo.second);
579 const unsigned StartOffset = 1;
581 Lexer TheLexer(FileLoc, LangOpts, Buffer.begin(), Buffer.begin(),
585 bool InPreprocessorDirective =
false;
589 unsigned MaxLineOffset = 0;
591 const char *CurPtr = Buffer.begin();
592 unsigned CurLine = 0;
593 while (CurPtr != Buffer.end()) {
597 if (CurLine == MaxLines)
601 if (CurPtr != Buffer.end())
602 MaxLineOffset = CurPtr - Buffer.begin();
606 TheLexer.LexFromRawLexer(TheTok);
608 if (InPreprocessorDirective) {
621 InPreprocessorDirective =
false;
630 if (MaxLineOffset && TokOffset >= MaxLineOffset)
635 if (TheTok.
getKind() == tok::comment) {
643 Token HashTok = TheTok;
644 InPreprocessorDirective =
true;
650 TheLexer.LexFromRawLexer(TheTok);
654 = llvm::StringSwitch<PreambleDirectiveKind>(Keyword)
655 .Case(
"include", PDK_Skipped)
656 .Case(
"__include_macros", PDK_Skipped)
657 .Case(
"define", PDK_Skipped)
658 .Case(
"undef", PDK_Skipped)
659 .Case(
"line", PDK_Skipped)
660 .Case(
"error", PDK_Skipped)
661 .Case(
"pragma", PDK_Skipped)
662 .Case(
"import", PDK_Skipped)
663 .Case(
"include_next", PDK_Skipped)
664 .Case(
"warning", PDK_Skipped)
665 .Case(
"ident", PDK_Skipped)
666 .Case(
"sccs", PDK_Skipped)
667 .Case(
"assert", PDK_Skipped)
668 .Case(
"unassert", PDK_Skipped)
669 .Case(
"if", PDK_Skipped)
670 .Case(
"ifdef", PDK_Skipped)
671 .Case(
"ifndef", PDK_Skipped)
672 .Case(
"elif", PDK_Skipped)
673 .Case(
"else", PDK_Skipped)
674 .Case(
"endif", PDK_Skipped)
675 .Default(PDK_Unknown);
700 if (ActiveCommentLoc.
isValid())
701 End = ActiveCommentLoc;
715 bool Invalid =
false;
719 if (Invalid || (CharNo == 0 && Lexer::isObviouslySimpleCharacter(*TokPtr)))
722 unsigned PhysOffset = 0;
727 while (Lexer::isObviouslySimpleCharacter(*TokPtr)) {
737 for (; CharNo; --CharNo) {
748 if (!Lexer::isObviouslySimpleCharacter(*TokPtr))
749 PhysOffset += Lexer::SkipEscapedNewLines(TokPtr)-TokPtr;
801 if (expansionLoc.isFileID()) {
804 *MacroBegin = expansionLoc;
832 *MacroEnd = expansionLoc;
906 bool Invalid =
false;
936 if (Invalid) *Invalid =
true;
942 if (beginInfo.first.isInvalid()) {
943 if (Invalid) *Invalid =
true;
949 beginInfo.second > EndOffs) {
950 if (Invalid) *Invalid =
true;
955 bool invalidTemp =
false;
956 StringRef file = SM.
getBufferData(beginInfo.first, &invalidTemp);
958 if (Invalid) *Invalid =
true;
962 if (Invalid) *Invalid =
false;
963 return file.substr(beginInfo.second, EndOffs - beginInfo.second);
969 assert(Loc.
isMacroID() &&
"Only reasonable to call this on macros");
1009 StringRef ExpansionBuffer = SM.
getBufferData(ExpansionInfo.first);
1010 return ExpansionBuffer.substr(ExpansionInfo.second, MacroTokenLength);
1015 assert(Loc.
isMacroID() &&
"Only reasonable to call this on macros");
1034 StringRef ExpansionBuffer = SM.
getBufferData(ExpansionInfo.first);
1035 return ExpansionBuffer.substr(ExpansionInfo.second, MacroTokenLength);
1044 if (Str - 1 < BufferStart)
1047 if ((Str[0] ==
'\n' && Str[-1] ==
'\r') ||
1048 (Str[0] ==
'\r' && Str[-1] ==
'\n')) {
1049 if (Str - 2 < BufferStart)
1059 return *Str ==
'\\';
1067 if (LocInfo.first.isInvalid())
1069 bool Invalid =
false;
1070 StringRef Buffer = SM.
getBufferData(LocInfo.first, &Invalid);
1076 StringRef Rest = Buffer.substr(Line - Buffer.data());
1077 size_t NumWhitespaceChars = Rest.find_first_not_of(
" \t");
1078 return NumWhitespaceChars == StringRef::npos
1080 : Rest.take_front(NumWhitespaceChars);
1095 unsigned CharNo,
unsigned TokLen) {
1096 assert(FileLoc.
isMacroID() &&
"Must be a macro expansion");
1112 return SM.createExpansionLoc(SpellingLoc, II.
getBegin(), II.
getEnd(), TokLen);
1118 unsigned TokLen)
const {
1119 assert(Loc >= BufferStart && Loc <= BufferEnd &&
1120 "Location out of range for this buffer!");
1124 unsigned CharNo = Loc-BufferStart;
1130 assert(PP &&
"This doesn't work on raw lexers");
1149 case '=':
return '#';
1150 case ')':
return ']';
1151 case '(':
return '[';
1152 case '!':
return '|';
1153 case '\'':
return '^';
1154 case '>':
return '}';
1155 case '/':
return '\\';
1156 case '<':
return '{';
1157 case '-':
return '~';
1167 if (!Res || !L)
return Res;
1171 L->
Diag(CP-2, diag::trigraph_ignored);
1176 L->
Diag(CP-2, diag::trigraph_converted) << StringRef(&Res, 1);
1183 unsigned Lexer::getEscapedNewLineSize(
const char *Ptr) {
1188 if (Ptr[Size-1] !=
'\n' && Ptr[Size-1] !=
'\r')
1192 if ((Ptr[Size] ==
'\r' || Ptr[Size] ==
'\n') &&
1193 Ptr[Size-1] != Ptr[Size])
1206 const char *Lexer::SkipEscapedNewLines(
const char *
P) {
1208 const char *AfterEscape;
1211 }
else if (*P ==
'?') {
1213 if (P[1] !=
'?' || P[2] !=
'/')
1222 unsigned NewLineSize = Lexer::getEscapedNewLineSize(AfterEscape);
1223 if (NewLineSize == 0)
return P;
1224 P = AfterEscape+NewLineSize;
1241 bool InvalidTemp =
false;
1242 StringRef File = SM.
getBufferData(LocInfo.first, &InvalidTemp);
1246 const char *TokenBegin = File.data() + LocInfo.second;
1250 TokenBegin, File.end());
1253 lexer.LexFromRawLexer(Tok);
1263 const LangOptions &LangOpts,
bool SkipTrailingWhitespaceAndNewLine) {
1265 if (!Tok || Tok->isNot(TKind))
1270 unsigned NumWhitespaceChars = 0;
1271 if (SkipTrailingWhitespaceAndNewLine) {
1273 unsigned char C = *TokenEnd;
1276 NumWhitespaceChars++;
1280 if (C ==
'\n' || C ==
'\r') {
1283 NumWhitespaceChars++;
1284 if ((C ==
'\n' || C ==
'\r') && C != PrevC)
1285 NumWhitespaceChars++;
1307 char Lexer::getCharAndSizeSlow(
const char *Ptr,
unsigned &Size,
1310 if (Ptr[0] ==
'\\') {
1319 if (
unsigned EscapedNewLineSize = getEscapedNewLineSize(Ptr)) {
1325 Diag(Ptr, diag::backslash_newline_space);
1328 Size += EscapedNewLineSize;
1329 Ptr += EscapedNewLineSize;
1332 return getCharAndSizeSlow(Ptr, Size, Tok);
1340 if (Ptr[0] ==
'?' && Ptr[1] ==
'?') {
1349 if (
C ==
'\\')
goto Slash;
1365 char Lexer::getCharAndSizeSlowNoWarn(
const char *Ptr,
unsigned &Size,
1368 if (Ptr[0] ==
'\\') {
1376 if (
unsigned EscapedNewLineSize = getEscapedNewLineSize(Ptr)) {
1378 Size += EscapedNewLineSize;
1379 Ptr += EscapedNewLineSize;
1382 return getCharAndSizeSlowNoWarn(Ptr, Size, LangOpts);
1390 if (LangOpts.Trigraphs && Ptr[0] ==
'?' && Ptr[1] ==
'?') {
1396 if (
C ==
'\\')
goto Slash;
1411 void Lexer::SetByteOffset(
unsigned Offset,
bool StartOfLine) {
1412 BufferPtr = BufferStart +
Offset;
1413 if (BufferPtr > BufferEnd)
1414 BufferPtr = BufferEnd;
1418 IsAtStartOfLine = StartOfLine;
1419 IsAtPhysicalStartOfLine = StartOfLine;
1423 if (LangOpts.AsmPreprocessor) {
1425 }
else if (LangOpts.CPlusPlus11 || LangOpts.C11) {
1426 static const llvm::sys::UnicodeCharSet C11AllowedIDChars(
1428 return C11AllowedIDChars.contains(C);
1429 }
else if (LangOpts.CPlusPlus) {
1430 static const llvm::sys::UnicodeCharSet CXX03AllowedIDChars(
1432 return CXX03AllowedIDChars.contains(C);
1434 static const llvm::sys::UnicodeCharSet C99AllowedIDChars(
1436 return C99AllowedIDChars.contains(C);
1442 if (LangOpts.AsmPreprocessor) {
1444 }
else if (LangOpts.CPlusPlus11 || LangOpts.C11) {
1445 static const llvm::sys::UnicodeCharSet C11DisallowedInitialIDChars(
1447 return !C11DisallowedInitialIDChars.contains(C);
1448 }
else if (LangOpts.CPlusPlus) {
1451 static const llvm::sys::UnicodeCharSet C99DisallowedInitialIDChars(
1453 return !C99DisallowedInitialIDChars.contains(C);
1468 CannotAppearInIdentifier = 0,
1469 CannotStartIdentifier
1472 static const llvm::sys::UnicodeCharSet C99AllowedIDChars(
1474 static const llvm::sys::UnicodeCharSet C99DisallowedInitialIDChars(
1476 if (!C99AllowedIDChars.contains(C)) {
1479 << CannotAppearInIdentifier;
1480 }
else if (IsFirst && C99DisallowedInitialIDChars.contains(C)) {
1483 << CannotStartIdentifier;
1489 static const llvm::sys::UnicodeCharSet CXX03AllowedIDChars(
1491 if (!CXX03AllowedIDChars.contains(C)) {
1492 Diags.
Report(Range.
getBegin(), diag::warn_cxx98_compat_unicode_id)
1505 struct HomoglyphPair {
1508 bool operator<(HomoglyphPair R)
const {
return Character < R.Character; }
1510 static constexpr HomoglyphPair SortedHomoglyphs[] = {
1563 std::lower_bound(std::begin(SortedHomoglyphs),
1564 std::end(SortedHomoglyphs) - 1, HomoglyphPair{
C,
'\0'});
1565 if (Homoglyph->Character == C) {
1568 llvm::raw_svector_ostream CharOS(CharBuf);
1569 llvm::write_hex(CharOS, C, llvm::HexPrintStyle::Upper, 4);
1571 if (Homoglyph->LooksLike) {
1572 const char LooksLikeStr[] = {Homoglyph->LooksLike, 0};
1574 << Range << CharBuf << LooksLikeStr;
1576 Diags.
Report(Range.
getBegin(), diag::warn_utf8_symbol_zero_width)
1577 << Range << CharBuf;
1582 bool Lexer::tryConsumeIdentifierUCN(
const char *&CurPtr,
unsigned Size,
1584 const char *UCNPtr = CurPtr + Size;
1585 uint32_t CodePoint = tryReadUCN(UCNPtr, CurPtr,
nullptr);
1595 if ((UCNPtr - CurPtr == 6 && CurPtr[1] ==
'u') ||
1596 (UCNPtr - CurPtr == 10 && CurPtr[1] ==
'U'))
1599 while (CurPtr != UCNPtr)
1600 (void)getAndAdvanceChar(CurPtr, Result);
1604 bool Lexer::tryConsumeIdentifierUTF8Char(
const char *&CurPtr) {
1605 const char *UnicodePtr = CurPtr;
1606 llvm::UTF32 CodePoint;
1607 llvm::ConversionResult Result =
1608 llvm::convertUTF8Sequence((
const llvm::UTF8 **)&UnicodePtr,
1609 (
const llvm::UTF8 *)BufferEnd,
1611 llvm::strictConversion);
1612 if (Result != llvm::conversionOK ||
1624 CurPtr = UnicodePtr;
1628 bool Lexer::LexIdentifier(
Token &Result,
const char *CurPtr) {
1631 unsigned char C = *CurPtr++;
1642 if (
isASCII(C) && C !=
'\\' && C !=
'?' &&
1643 (C !=
'$' || !LangOpts.DollarIdents)) {
1645 const char *IdStart = BufferPtr;
1646 FormTokenWithChars(Result, CurPtr, tok::raw_identifier);
1663 if (isCodeCompletionPoint(CurPtr)) {
1665 Result.
setKind(tok::code_completion);
1671 assert(*CurPtr == 0 &&
"Completion character must be 0");
1676 if (CurPtr < BufferEnd) {
1694 C = getCharAndSize(CurPtr, Size);
1698 if (!LangOpts.DollarIdents)
goto FinishIdentifier;
1702 Diag(CurPtr, diag::ext_dollar_in_identifier);
1703 CurPtr = ConsumeChar(CurPtr, Size, Result);
1704 C = getCharAndSize(CurPtr, Size);
1706 }
else if (C ==
'\\' && tryConsumeIdentifierUCN(CurPtr, Size, Result)) {
1707 C = getCharAndSize(CurPtr, Size);
1709 }
else if (!
isASCII(C) && tryConsumeIdentifierUTF8Char(CurPtr)) {
1710 C = getCharAndSize(CurPtr, Size);
1713 goto FinishIdentifier;
1717 CurPtr = ConsumeChar(CurPtr, Size, Result);
1719 C = getCharAndSize(CurPtr, Size);
1721 CurPtr = ConsumeChar(CurPtr, Size, Result);
1722 C = getCharAndSize(CurPtr, Size);
1729 bool Lexer::isHexaLiteral(
const char *Start,
const LangOptions &LangOpts) {
1735 return (C2 ==
'x' || C2 ==
'X');
1741 bool Lexer::LexNumericConstant(
Token &Result,
const char *CurPtr) {
1743 char C = getCharAndSize(CurPtr, Size);
1746 CurPtr = ConsumeChar(CurPtr, Size, Result);
1748 C = getCharAndSize(CurPtr, Size);
1752 if ((C ==
'-' || C ==
'+') && (PrevCh ==
'E' || PrevCh ==
'e')) {
1755 if (!LangOpts.MicrosoftExt || !isHexaLiteral(BufferPtr, LangOpts))
1756 return LexNumericConstant(Result, ConsumeChar(CurPtr, Size, Result));
1760 if ((C ==
'-' || C ==
'+') && (PrevCh ==
'P' || PrevCh ==
'p')) {
1764 bool IsHexFloat =
true;
1765 if (!LangOpts.C99) {
1766 if (!isHexaLiteral(BufferPtr, LangOpts))
1769 std::find(BufferPtr, CurPtr,
'_') != CurPtr)
1773 return LexNumericConstant(Result, ConsumeChar(CurPtr, Size, Result));
1782 Diag(CurPtr, diag::warn_cxx11_compat_digit_separator);
1783 CurPtr = ConsumeChar(CurPtr, Size, Result);
1784 CurPtr = ConsumeChar(CurPtr, NextSize, Result);
1785 return LexNumericConstant(Result, CurPtr);
1790 if (C ==
'\\' && tryConsumeIdentifierUCN(CurPtr, Size, Result))
1791 return LexNumericConstant(Result, CurPtr);
1792 if (!
isASCII(C) && tryConsumeIdentifierUTF8Char(CurPtr))
1793 return LexNumericConstant(Result, CurPtr);
1796 const char *TokStart = BufferPtr;
1797 FormTokenWithChars(Result, CurPtr, tok::numeric_constant);
1804 const char *Lexer::LexUDSuffix(
Token &Result,
const char *CurPtr,
1805 bool IsStringLiteral) {
1810 char C = getCharAndSize(CurPtr, Size);
1811 bool Consumed =
false;
1814 if (C ==
'\\' && tryConsumeIdentifierUCN(CurPtr, Size, Result))
1816 else if (!
isASCII(C) && tryConsumeIdentifierUTF8Char(CurPtr))
1825 C ==
'_' ? diag::warn_cxx11_compat_user_defined_literal
1826 : diag::warn_cxx11_compat_reserved_user_defined_literal)
1837 bool IsUDSuffix =
false;
1844 const unsigned MaxStandardSuffixLength = 3;
1845 char Buffer[MaxStandardSuffixLength] = { C };
1846 unsigned Consumed = Size;
1854 const StringRef CompleteSuffix(Buffer, Chars);
1860 if (Chars == MaxStandardSuffixLength)
1864 Buffer[Chars++] = Next;
1865 Consumed += NextSize;
1872 ? diag::ext_ms_reserved_user_defined_literal
1873 : diag::ext_reserved_user_defined_literal)
1878 CurPtr = ConsumeChar(CurPtr, Size, Result);
1883 C = getCharAndSize(CurPtr, Size);
1885 else if (C ==
'\\' && tryConsumeIdentifierUCN(CurPtr, Size, Result)) {}
1886 else if (!
isASCII(C) && tryConsumeIdentifierUTF8Char(CurPtr)) {}
1895 bool Lexer::LexStringLiteral(
Token &Result,
const char *CurPtr,
1897 const char *AfterQuote = CurPtr;
1899 const char *NulCharacter =
nullptr;
1902 (Kind == tok::utf8_string_literal ||
1903 Kind == tok::utf16_string_literal ||
1904 Kind == tok::utf32_string_literal))
1906 ? diag::warn_cxx98_compat_unicode_literal
1907 : diag::warn_c99_compat_unicode_literal);
1909 char C = getAndAdvanceChar(CurPtr, Result);
1914 C = getAndAdvanceChar(CurPtr, Result);
1916 if (C ==
'\n' || C ==
'\r' ||
1917 (C == 0 && CurPtr-1 == BufferEnd)) {
1919 Diag(BufferPtr, diag::ext_unterminated_char_or_string) << 1;
1920 FormTokenWithChars(Result, CurPtr-1, tok::unknown);
1925 if (isCodeCompletionPoint(CurPtr-1)) {
1927 codeCompleteIncludedFile(AfterQuote, CurPtr - 1,
false);
1930 FormTokenWithChars(Result, CurPtr - 1, tok::unknown);
1935 NulCharacter = CurPtr-1;
1937 C = getAndAdvanceChar(CurPtr, Result);
1942 CurPtr = LexUDSuffix(Result, CurPtr,
true);
1946 Diag(NulCharacter, diag::null_in_char_or_string) << 1;
1949 const char *TokStart = BufferPtr;
1950 FormTokenWithChars(Result, CurPtr, Kind);
1957 bool Lexer::LexRawStringLiteral(
Token &Result,
const char *CurPtr,
1965 Diag(BufferPtr, diag::warn_cxx98_compat_raw_string_literal);
1967 unsigned PrefixLen = 0;
1973 if (CurPtr[PrefixLen] !=
'(') {
1975 const char *PrefixEnd = &CurPtr[PrefixLen];
1976 if (PrefixLen == 16) {
1977 Diag(PrefixEnd, diag::err_raw_delim_too_long);
1979 Diag(PrefixEnd, diag::err_invalid_char_raw_delim)
1980 << StringRef(PrefixEnd, 1);
1992 if (C == 0 && CurPtr-1 == BufferEnd) {
1998 FormTokenWithChars(Result, CurPtr, tok::unknown);
2003 const char *Prefix = CurPtr;
2004 CurPtr += PrefixLen + 1;
2011 if (strncmp(CurPtr, Prefix, PrefixLen) == 0 && CurPtr[PrefixLen] ==
'"') {
2012 CurPtr += PrefixLen + 1;
2015 }
else if (C == 0 && CurPtr-1 == BufferEnd) {
2017 Diag(BufferPtr, diag::err_unterminated_raw_string)
2018 << StringRef(Prefix, PrefixLen);
2019 FormTokenWithChars(Result, CurPtr-1, tok::unknown);
2026 CurPtr = LexUDSuffix(Result, CurPtr,
true);
2029 const char *TokStart = BufferPtr;
2030 FormTokenWithChars(Result, CurPtr, Kind);
2037 bool Lexer::LexAngledStringLiteral(
Token &Result,
const char *CurPtr) {
2039 const char *NulCharacter =
nullptr;
2040 const char *AfterLessPos = CurPtr;
2041 char C = getAndAdvanceChar(CurPtr, Result);
2046 C = getAndAdvanceChar(CurPtr, Result);
2048 if (C ==
'\n' || C ==
'\r' ||
2049 (C == 0 && (CurPtr - 1 == BufferEnd))) {
2052 FormTokenWithChars(Result, AfterLessPos, tok::less);
2057 if (isCodeCompletionPoint(CurPtr - 1)) {
2058 codeCompleteIncludedFile(AfterLessPos, CurPtr - 1,
true);
2060 FormTokenWithChars(Result, CurPtr - 1, tok::unknown);
2063 NulCharacter = CurPtr-1;
2065 C = getAndAdvanceChar(CurPtr, Result);
2070 Diag(NulCharacter, diag::null_in_char_or_string) << 1;
2073 const char *TokStart = BufferPtr;
2074 FormTokenWithChars(Result, CurPtr, tok::header_name);
2079 void Lexer::codeCompleteIncludedFile(
const char *PathStart,
2080 const char *CompletionPoint,
2083 StringRef PartialPath(PathStart, CompletionPoint - PathStart);
2084 auto Slash = PartialPath.find_last_of(LangOpts.MSVCCompat ?
"/\\" :
"/");
2086 (Slash == StringRef::npos) ?
"" : PartialPath.take_front(Slash);
2087 const char *StartOfFilename =
2088 (Slash == StringRef::npos) ? PathStart : PathStart + Slash + 1;
2091 StringRef(StartOfFilename, CompletionPoint - StartOfFilename)));
2093 while (CompletionPoint < BufferEnd) {
2094 char Next = *(CompletionPoint + 1);
2095 if (Next == 0 || Next ==
'\r' || Next ==
'\n')
2098 if (Next == (IsAngled ?
'>' :
'"'))
2109 bool Lexer::LexCharConstant(
Token &Result,
const char *CurPtr,
2112 const char *NulCharacter =
nullptr;
2115 if (Kind == tok::utf16_char_constant || Kind == tok::utf32_char_constant)
2117 ? diag::warn_cxx98_compat_unicode_literal
2118 : diag::warn_c99_compat_unicode_literal);
2119 else if (Kind == tok::utf8_char_constant)
2120 Diag(BufferPtr, diag::warn_cxx14_compat_u8_character_literal);
2123 char C = getAndAdvanceChar(CurPtr, Result);
2126 Diag(BufferPtr, diag::ext_empty_character);
2127 FormTokenWithChars(Result, CurPtr, tok::unknown);
2134 C = getAndAdvanceChar(CurPtr, Result);
2136 if (C ==
'\n' || C ==
'\r' ||
2137 (C == 0 && CurPtr-1 == BufferEnd)) {
2139 Diag(BufferPtr, diag::ext_unterminated_char_or_string) << 0;
2140 FormTokenWithChars(Result, CurPtr-1, tok::unknown);
2145 if (isCodeCompletionPoint(CurPtr-1)) {
2147 FormTokenWithChars(Result, CurPtr-1, tok::unknown);
2152 NulCharacter = CurPtr-1;
2154 C = getAndAdvanceChar(CurPtr, Result);
2159 CurPtr = LexUDSuffix(Result, CurPtr,
false);
2163 Diag(NulCharacter, diag::null_in_char_or_string) << 0;
2166 const char *TokStart = BufferPtr;
2167 FormTokenWithChars(Result, CurPtr, Kind);
2176 bool Lexer::SkipWhitespace(
Token &Result,
const char *CurPtr,
2177 bool &TokAtPhysicalStartOfLine) {
2181 unsigned char Char = *CurPtr;
2206 FormTokenWithChars(Result, CurPtr, tok::unknown);
2208 IsAtStartOfLine =
true;
2209 IsAtPhysicalStartOfLine =
true;
2216 char PrevChar = CurPtr[-1];
2222 TokAtPhysicalStartOfLine =
true;
2235 bool Lexer::SkipLineComment(
Token &Result,
const char *CurPtr,
2236 bool &TokAtPhysicalStartOfLine) {
2240 Diag(BufferPtr, diag::ext_line_comment);
2244 LangOpts.LineComment =
true;
2258 C !=
'\n' && C !=
'\r')
2261 const char *NextLine = CurPtr;
2264 const char *EscapePtr = CurPtr-1;
2265 bool HasSpace =
false;
2271 if (*EscapePtr ==
'\\')
2274 else if (EscapePtr[0] ==
'/' && EscapePtr[-1] ==
'?' &&
2275 EscapePtr[-2] ==
'?' && LangOpts.Trigraphs)
2277 CurPtr = EscapePtr-2;
2283 Diag(EscapePtr, diag::backslash_newline_space);
2290 const char *OldPtr = CurPtr;
2293 C = getAndAdvanceChar(CurPtr, Result);
2298 if (C != 0 && CurPtr == OldPtr+1) {
2306 if (CurPtr != OldPtr + 1 && C !=
'/' &&
2307 (CurPtr == BufferEnd + 1 || CurPtr[0] !=
'/')) {
2308 for (; OldPtr != CurPtr; ++OldPtr)
2309 if (OldPtr[0] ==
'\n' || OldPtr[0] ==
'\r') {
2313 const char *ForwardPtr = CurPtr;
2316 if (ForwardPtr[0] ==
'/' && ForwardPtr[1] ==
'/')
2321 Diag(OldPtr-1, diag::ext_multi_line_line_comment);
2326 if (C ==
'\r' || C ==
'\n' || CurPtr == BufferEnd + 1) {
2331 if (C ==
'\0' && isCodeCompletionPoint(CurPtr-1)) {
2349 return SaveLineComment(Result, CurPtr);
2367 TokAtPhysicalStartOfLine =
true;
2376 bool Lexer::SaveLineComment(
Token &Result,
const char *CurPtr) {
2379 FormTokenWithChars(Result, CurPtr, tok::comment);
2386 bool Invalid =
false;
2387 std::string Spelling = PP->
getSpelling(Result, &Invalid);
2391 assert(Spelling[0] ==
'/' && Spelling[1] ==
'/' &&
"Not line comment?");
2406 assert(CurPtr[0] ==
'\n' || CurPtr[0] ==
'\r');
2412 if (CurPtr[0] ==
'\n' || CurPtr[0] ==
'\r') {
2414 if (CurPtr[0] == CurPtr[1])
2422 bool HasSpace =
false;
2429 if (*CurPtr ==
'\\') {
2430 if (CurPtr[-1] !=
'*')
return false;
2433 if (CurPtr[0] !=
'/' || CurPtr[-1] !=
'?' || CurPtr[-2] !=
'?' ||
2444 L->
Diag(CurPtr, diag::trigraph_ignored_block_comment);
2448 L->
Diag(CurPtr, diag::trigraph_ends_block_comment);
2453 L->
Diag(CurPtr, diag::escaped_newline_block_comment_end);
2457 L->
Diag(CurPtr, diag::backslash_newline_space);
2463 #include <emmintrin.h> 2478 bool Lexer::SkipBlockComment(
Token &Result,
const char *CurPtr,
2479 bool &TokAtPhysicalStartOfLine) {
2489 unsigned char C = getCharAndSize(CurPtr, CharSize);
2491 if (C == 0 && CurPtr == BufferEnd+1) {
2493 Diag(BufferPtr, diag::err_unterminated_block_comment);
2499 FormTokenWithChars(Result, CurPtr, tok::unknown);
2515 if (CurPtr + 24 < BufferEnd &&
2520 while (C !=
'/' && ((
intptr_t)CurPtr & 0x0F) != 0)
2523 if (C ==
'/')
goto FoundSlash;
2527 while (CurPtr+16 <= BufferEnd) {
2534 CurPtr += llvm::countTrailingZeros<unsigned>(cmp) + 1;
2540 __vector
unsigned char Slashes = {
2541 '/',
'/',
'/',
'/',
'/',
'/',
'/',
'/',
2542 '/',
'/',
'/',
'/',
'/',
'/',
'/',
'/' 2544 while (CurPtr+16 <= BufferEnd &&
2545 !
vec_any_eq(*(
const vector
unsigned char*)CurPtr, Slashes))
2549 while (CurPtr[0] !=
'/' &&
2553 CurPtr+4 < BufferEnd) {
2563 while (C !=
'/' && C !=
'\0')
2568 if (CurPtr[-2] ==
'*')
2571 if ((CurPtr[-2] ==
'\n' || CurPtr[-2] ==
'\r')) {
2578 if (CurPtr[0] ==
'*' && CurPtr[1] !=
'/') {
2583 Diag(CurPtr-1, diag::warn_nested_block_comment);
2585 }
else if (C == 0 && CurPtr == BufferEnd+1) {
2587 Diag(BufferPtr, diag::err_unterminated_block_comment);
2596 FormTokenWithChars(Result, CurPtr, tok::unknown);
2602 }
else if (C ==
'\0' && isCodeCompletionPoint(CurPtr-1)) {
2621 FormTokenWithChars(Result, CurPtr, tok::comment);
2630 SkipWhitespace(Result, CurPtr+1, TokAtPhysicalStartOfLine);
2648 "Must be in a preprocessing directive!");
2652 const char *CurPtr = BufferPtr;
2654 char Char = getAndAdvanceChar(CurPtr, Tmp);
2658 Result->push_back(Char);
2662 if (CurPtr-1 != BufferEnd) {
2663 if (isCodeCompletionPoint(CurPtr-1)) {
2671 Result->push_back(Char);
2679 assert(CurPtr[-1] == Char &&
"Trigraphs for newline?");
2680 BufferPtr = CurPtr-1;
2684 if (Tmp.
is(tok::code_completion)) {
2689 assert(Tmp.
is(tok::eod) &&
"Unexpected token!");
2701 bool Lexer::LexEndOfFile(
Token &Result,
const char *CurPtr) {
2709 FormTokenWithChars(Result, CurPtr, tok::eod);
2721 BufferPtr = BufferEnd;
2722 FormTokenWithChars(Result, BufferEnd,
tok::eof);
2737 diag::err_pp_unterminated_conditional);
2743 if (CurPtr != BufferStart && (CurPtr[-1] !=
'\n' && CurPtr[-1] !=
'\r')) {
2748 if (LangOpts.CPlusPlus11) {
2752 if (!Diags.
isIgnored(diag::warn_cxx98_compat_no_newline_eof, EndLoc)) {
2753 DiagID = diag::warn_cxx98_compat_no_newline_eof;
2755 DiagID = diag::warn_no_newline_eof;
2758 DiagID = diag::ext_no_newline_eof;
2761 Diag(BufferEnd, DiagID)
2775 unsigned Lexer::isNextPPTokenLParen() {
2776 assert(!
LexingRawMode &&
"How can we expand a macro from a skipping buffer?");
2784 const char *TmpBufferPtr = BufferPtr;
2786 bool atStartOfLine = IsAtStartOfLine;
2787 bool atPhysicalStartOfLine = IsAtPhysicalStartOfLine;
2788 bool leadingSpace = HasLeadingSpace;
2794 BufferPtr = TmpBufferPtr;
2796 HasLeadingSpace = leadingSpace;
2797 IsAtStartOfLine = atStartOfLine;
2798 IsAtPhysicalStartOfLine = atPhysicalStartOfLine;
2805 return Tok.
is(tok::l_paren);
2811 const char *Terminator = CMK ==
CMK_Perforce ?
"<<<<\n" :
">>>>>>>";
2813 auto RestOfBuffer = StringRef(CurPtr, BufferEnd - CurPtr).substr(TermLen);
2814 size_t Pos = RestOfBuffer.find(Terminator);
2815 while (Pos != StringRef::npos) {
2818 (RestOfBuffer[Pos - 1] !=
'\r' && RestOfBuffer[Pos - 1] !=
'\n')) {
2819 RestOfBuffer = RestOfBuffer.substr(Pos+TermLen);
2820 Pos = RestOfBuffer.find(Terminator);
2823 return RestOfBuffer.data()+Pos;
2832 bool Lexer::IsStartOfConflictMarker(
const char *CurPtr) {
2834 if (CurPtr != BufferStart &&
2835 CurPtr[-1] !=
'\n' && CurPtr[-1] !=
'\r')
2839 if (!StringRef(CurPtr, BufferEnd - CurPtr).startswith(
"<<<<<<<") &&
2840 !StringRef(CurPtr, BufferEnd - CurPtr).startswith(
">>>> "))
2855 Diag(CurPtr, diag::err_conflict_marker);
2856 CurrentConflictMarkerState =
Kind;
2860 while (*CurPtr !=
'\r' && *CurPtr !=
'\n') {
2861 assert(CurPtr != BufferEnd &&
"Didn't find end of line");
2876 bool Lexer::HandleEndOfConflictMarker(
const char *CurPtr) {
2878 if (CurPtr != BufferStart &&
2879 CurPtr[-1] !=
'\n' && CurPtr[-1] !=
'\r')
2888 for (
unsigned i = 1;
i != 4; ++
i)
2889 if (CurPtr[
i] != CurPtr[0])
2896 CurrentConflictMarkerState)) {
2900 while (CurPtr != BufferEnd && *CurPtr !=
'\r' && *CurPtr !=
'\n')
2906 CurrentConflictMarkerState =
CMK_None;
2914 const char *BufferEnd) {
2915 if (CurPtr == BufferEnd)
2918 for (; CurPtr != BufferEnd; ++CurPtr) {
2919 if (CurPtr[0] ==
'#' && CurPtr[1] ==
'>')
2925 bool Lexer::lexEditorPlaceholder(
Token &Result,
const char *CurPtr) {
2926 assert(CurPtr[-1] ==
'<' && CurPtr[0] ==
'#' &&
"Not a placeholder!");
2932 const char *Start = CurPtr - 1;
2933 if (!LangOpts.AllowEditorPlaceholders)
2934 Diag(Start, diag::err_placeholder_in_source);
2936 FormTokenWithChars(Result, End, tok::raw_identifier);
2944 bool Lexer::isCodeCompletionPoint(
const char *CurPtr)
const {
2953 uint32_t Lexer::tryReadUCN(
const char *&StartPtr,
const char *SlashLoc,
2956 char Kind = getCharAndSize(StartPtr, CharSize);
2958 unsigned NumHexDigits;
2961 else if (Kind ==
'U')
2966 if (!LangOpts.CPlusPlus && !LangOpts.C99) {
2968 Diag(SlashLoc, diag::warn_ucn_not_valid_in_c89);
2972 const char *CurPtr = StartPtr + CharSize;
2973 const char *KindLoc = &CurPtr[-1];
2975 uint32_t CodePoint = 0;
2976 for (
unsigned i = 0;
i < NumHexDigits; ++
i) {
2977 char C = getCharAndSize(CurPtr, CharSize);
2979 unsigned Value = llvm::hexDigitValue(C);
2983 Diag(BufferPtr, diag::warn_ucn_escape_no_digits)
2984 << StringRef(KindLoc, 1);
2986 Diag(BufferPtr, diag::warn_ucn_escape_incomplete);
2989 if (
i == 4 && NumHexDigits == 8) {
2991 Diag(KindLoc, diag::note_ucn_four_not_eight)
3008 if (CurPtr - StartPtr == (
ptrdiff_t)NumHexDigits + 2)
3011 while (StartPtr != CurPtr)
3012 (void)getAndAdvanceChar(StartPtr, *Result);
3018 if (LangOpts.AsmPreprocessor)
3032 if (CodePoint < 0xA0) {
3033 if (CodePoint == 0x24 || CodePoint == 0x40 || CodePoint == 0x60)
3039 if (CodePoint < 0x20 || CodePoint >= 0x7F)
3040 Diag(BufferPtr, diag::err_ucn_control_character);
3042 char C =
static_cast<char>(CodePoint);
3043 Diag(BufferPtr, diag::err_ucn_escape_basic_scs) << StringRef(&C, 1);
3048 }
else if (CodePoint >= 0xD800 && CodePoint <= 0xDFFF) {
3053 if (LangOpts.CPlusPlus && !LangOpts.CPlusPlus11)
3054 Diag(BufferPtr, diag::warn_ucn_escape_surrogate);
3056 Diag(BufferPtr, diag::err_ucn_escape_invalid);
3064 bool Lexer::CheckUnicodeWhitespace(
Token &Result, uint32_t
C,
3065 const char *CurPtr) {
3066 static const llvm::sys::UnicodeCharSet UnicodeWhitespaceChars(
3069 UnicodeWhitespaceChars.contains(
C)) {
3070 Diag(BufferPtr, diag::ext_unicode_whitespace)
3079 bool Lexer::LexUnicode(
Token &Result, uint32_t
C,
const char *CurPtr) {
3091 return LexIdentifier(Result, CurPtr);
3106 Diag(BufferPtr, diag::err_non_ascii)
3116 FormTokenWithChars(Result, CurPtr, tok::unknown);
3120 void Lexer::PropagateLineStartLeadingSpaceInfo(
Token &Result) {
3127 bool Lexer::Lex(
Token &Result) {
3132 if (IsAtStartOfLine) {
3134 IsAtStartOfLine =
false;
3137 if (HasLeadingSpace) {
3139 HasLeadingSpace =
false;
3142 if (HasLeadingEmptyMacro) {
3144 HasLeadingEmptyMacro =
false;
3147 bool atPhysicalStartOfLine = IsAtPhysicalStartOfLine;
3148 IsAtPhysicalStartOfLine =
false;
3151 bool returnedToken = LexTokenInternal(Result, atPhysicalStartOfLine);
3153 assert((returnedToken || !isRawLex) &&
"Raw lex must succeed");
3154 return returnedToken;
3162 bool Lexer::LexTokenInternal(
Token &Result,
bool TokAtPhysicalStartOfLine) {
3169 const char *CurPtr = BufferPtr;
3172 if ((*CurPtr ==
' ') || (*CurPtr ==
'\t')) {
3174 while ((*CurPtr ==
' ') || (*CurPtr ==
'\t'))
3181 FormTokenWithChars(Result, CurPtr, tok::unknown);
3190 unsigned SizeTmp, SizeTmp2;
3193 char Char = getAndAdvanceChar(CurPtr, Result);
3199 if (CurPtr-1 == BufferEnd)
3200 return LexEndOfFile(Result, CurPtr-1);
3203 if (isCodeCompletionPoint(CurPtr-1)) {
3206 FormTokenWithChars(Result, CurPtr, tok::code_completion);
3211 Diag(CurPtr-1, diag::null_in_file);
3213 if (SkipWhitespace(Result, CurPtr, TokAtPhysicalStartOfLine))
3222 if (LangOpts.MicrosoftExt) {
3224 Diag(CurPtr-1, diag::ext_ctrl_z_eof_microsoft);
3225 return LexEndOfFile(Result, CurPtr-1);
3229 Kind = tok::unknown;
3233 if (CurPtr[0] ==
'\n')
3234 (void)getAndAdvanceChar(CurPtr, Result);
3248 IsAtStartOfLine =
true;
3249 IsAtPhysicalStartOfLine =
true;
3258 if (SkipWhitespace(Result, CurPtr, TokAtPhysicalStartOfLine))
3268 SkipHorizontalWhitespace:
3270 if (SkipWhitespace(Result, CurPtr, TokAtPhysicalStartOfLine))
3279 LangOpts.LineComment &&
3280 (LangOpts.CPlusPlus || !LangOpts.TraditionalCPP)) {
3281 if (SkipLineComment(Result, CurPtr+2, TokAtPhysicalStartOfLine))
3283 goto SkipIgnoredUnits;
3285 if (SkipBlockComment(Result, CurPtr+2, TokAtPhysicalStartOfLine))
3287 goto SkipIgnoredUnits;
3289 goto SkipHorizontalWhitespace;
3297 case '0':
case '1':
case '2':
case '3':
case '4':
3298 case '5':
case '6':
case '7':
case '8':
case '9':
3301 return LexNumericConstant(Result, CurPtr);
3307 if (LangOpts.CPlusPlus11 || LangOpts.C11) {
3308 Char = getCharAndSize(CurPtr, SizeTmp);
3312 return LexStringLiteral(Result, ConsumeChar(CurPtr, SizeTmp, Result),
3313 tok::utf16_string_literal);
3317 return LexCharConstant(Result, ConsumeChar(CurPtr, SizeTmp, Result),
3318 tok::utf16_char_constant);
3321 if (Char ==
'R' && LangOpts.CPlusPlus11 &&
3322 getCharAndSize(CurPtr + SizeTmp, SizeTmp2) ==
'"')
3323 return LexRawStringLiteral(Result,
3324 ConsumeChar(ConsumeChar(CurPtr, SizeTmp, Result),
3326 tok::utf16_string_literal);
3329 char Char2 = getCharAndSize(CurPtr + SizeTmp, SizeTmp2);
3333 return LexStringLiteral(Result,
3334 ConsumeChar(ConsumeChar(CurPtr, SizeTmp, Result),
3336 tok::utf8_string_literal);
3337 if (Char2 ==
'\'' && LangOpts.CPlusPlus17)
3338 return LexCharConstant(
3339 Result, ConsumeChar(ConsumeChar(CurPtr, SizeTmp, Result),
3341 tok::utf8_char_constant);
3343 if (Char2 ==
'R' && LangOpts.CPlusPlus11) {
3345 char Char3 = getCharAndSize(CurPtr + SizeTmp + SizeTmp2, SizeTmp3);
3348 return LexRawStringLiteral(Result,
3349 ConsumeChar(ConsumeChar(ConsumeChar(CurPtr, SizeTmp, Result),
3352 tok::utf8_string_literal);
3359 return LexIdentifier(Result, CurPtr);
3365 if (LangOpts.CPlusPlus11 || LangOpts.C11) {
3366 Char = getCharAndSize(CurPtr, SizeTmp);
3370 return LexStringLiteral(Result, ConsumeChar(CurPtr, SizeTmp, Result),
3371 tok::utf32_string_literal);
3375 return LexCharConstant(Result, ConsumeChar(CurPtr, SizeTmp, Result),
3376 tok::utf32_char_constant);
3379 if (Char ==
'R' && LangOpts.CPlusPlus11 &&
3380 getCharAndSize(CurPtr + SizeTmp, SizeTmp2) ==
'"')
3381 return LexRawStringLiteral(Result,
3382 ConsumeChar(ConsumeChar(CurPtr, SizeTmp, Result),
3384 tok::utf32_string_literal);
3388 return LexIdentifier(Result, CurPtr);
3394 if (LangOpts.CPlusPlus11) {
3395 Char = getCharAndSize(CurPtr, SizeTmp);
3398 return LexRawStringLiteral(Result,
3399 ConsumeChar(CurPtr, SizeTmp, Result),
3400 tok::string_literal);
3404 return LexIdentifier(Result, CurPtr);
3409 Char = getCharAndSize(CurPtr, SizeTmp);
3413 return LexStringLiteral(Result, ConsumeChar(CurPtr, SizeTmp, Result),
3414 tok::wide_string_literal);
3417 if (LangOpts.CPlusPlus11 && Char ==
'R' &&
3418 getCharAndSize(CurPtr + SizeTmp, SizeTmp2) ==
'"')
3419 return LexRawStringLiteral(Result,
3420 ConsumeChar(ConsumeChar(CurPtr, SizeTmp, Result),
3422 tok::wide_string_literal);
3426 return LexCharConstant(Result, ConsumeChar(CurPtr, SizeTmp, Result),
3427 tok::wide_char_constant);
3432 case 'A':
case 'B':
case 'C':
case 'D':
case 'E':
case 'F':
case 'G':
3433 case 'H':
case 'I':
case 'J':
case 'K':
case 'M':
case 'N':
3434 case 'O':
case 'P':
case 'Q':
case 'S':
case 'T':
3435 case 'V':
case 'W':
case 'X':
case 'Y':
case 'Z':
3436 case 'a':
case 'b':
case 'c':
case 'd':
case 'e':
case 'f':
case 'g':
3437 case 'h':
case 'i':
case 'j':
case 'k':
case 'l':
case 'm':
case 'n':
3438 case 'o':
case 'p':
case 'q':
case 'r':
case 's':
case 't':
3439 case 'v':
case 'w':
case 'x':
case 'y':
case 'z':
3443 return LexIdentifier(Result, CurPtr);
3446 if (LangOpts.DollarIdents) {
3448 Diag(CurPtr-1, diag::ext_dollar_in_identifier);
3451 return LexIdentifier(Result, CurPtr);
3454 Kind = tok::unknown;
3461 return LexCharConstant(Result, CurPtr, tok::char_constant);
3467 return LexStringLiteral(Result, CurPtr,
3469 : tok::string_literal);
3473 Kind = tok::question;
3476 Kind = tok::l_square;
3479 Kind = tok::r_square;
3482 Kind = tok::l_paren;
3485 Kind = tok::r_paren;
3488 Kind = tok::l_brace;
3491 Kind = tok::r_brace;
3494 Char = getCharAndSize(CurPtr, SizeTmp);
3495 if (Char >=
'0' && Char <=
'9') {
3499 return LexNumericConstant(Result, ConsumeChar(CurPtr, SizeTmp, Result));
3500 }
else if (LangOpts.CPlusPlus && Char ==
'*') {
3501 Kind = tok::periodstar;
3503 }
else if (Char ==
'.' &&
3504 getCharAndSize(CurPtr+SizeTmp, SizeTmp2) ==
'.') {
3505 Kind = tok::ellipsis;
3506 CurPtr = ConsumeChar(ConsumeChar(CurPtr, SizeTmp, Result),
3513 Char = getCharAndSize(CurPtr, SizeTmp);
3516 CurPtr = ConsumeChar(CurPtr, SizeTmp, Result);
3517 }
else if (Char ==
'=') {
3518 Kind = tok::ampequal;
3519 CurPtr = ConsumeChar(CurPtr, SizeTmp, Result);
3525 if (getCharAndSize(CurPtr, SizeTmp) ==
'=') {
3526 Kind = tok::starequal;
3527 CurPtr = ConsumeChar(CurPtr, SizeTmp, Result);
3533 Char = getCharAndSize(CurPtr, SizeTmp);
3535 CurPtr = ConsumeChar(CurPtr, SizeTmp, Result);
3536 Kind = tok::plusplus;
3537 }
else if (Char ==
'=') {
3538 CurPtr = ConsumeChar(CurPtr, SizeTmp, Result);
3539 Kind = tok::plusequal;
3545 Char = getCharAndSize(CurPtr, SizeTmp);
3547 CurPtr = ConsumeChar(CurPtr, SizeTmp, Result);
3548 Kind = tok::minusminus;
3549 }
else if (Char ==
'>' && LangOpts.CPlusPlus &&
3550 getCharAndSize(CurPtr+SizeTmp, SizeTmp2) ==
'*') {
3551 CurPtr = ConsumeChar(ConsumeChar(CurPtr, SizeTmp, Result),
3553 Kind = tok::arrowstar;
3554 }
else if (Char ==
'>') {
3555 CurPtr = ConsumeChar(CurPtr, SizeTmp, Result);
3557 }
else if (Char ==
'=') {
3558 CurPtr = ConsumeChar(CurPtr, SizeTmp, Result);
3559 Kind = tok::minusequal;
3568 if (getCharAndSize(CurPtr, SizeTmp) ==
'=') {
3569 Kind = tok::exclaimequal;
3570 CurPtr = ConsumeChar(CurPtr, SizeTmp, Result);
3572 Kind = tok::exclaim;
3577 Char = getCharAndSize(CurPtr, SizeTmp);
3587 bool TreatAsComment = LangOpts.LineComment &&
3588 (LangOpts.CPlusPlus || !LangOpts.TraditionalCPP);
3589 if (!TreatAsComment)
3591 TreatAsComment = getCharAndSize(CurPtr+SizeTmp, SizeTmp2) !=
'*';
3593 if (TreatAsComment) {
3594 if (SkipLineComment(Result, ConsumeChar(CurPtr, SizeTmp, Result),
3595 TokAtPhysicalStartOfLine))
3601 goto SkipIgnoredUnits;
3606 if (SkipBlockComment(Result, ConsumeChar(CurPtr, SizeTmp, Result),
3607 TokAtPhysicalStartOfLine))
3616 CurPtr = ConsumeChar(CurPtr, SizeTmp, Result);
3617 Kind = tok::slashequal;
3623 Char = getCharAndSize(CurPtr, SizeTmp);
3625 Kind = tok::percentequal;
3626 CurPtr = ConsumeChar(CurPtr, SizeTmp, Result);
3627 }
else if (LangOpts.Digraphs && Char ==
'>') {
3628 Kind = tok::r_brace;
3629 CurPtr = ConsumeChar(CurPtr, SizeTmp, Result);
3630 }
else if (LangOpts.Digraphs && Char ==
':') {
3631 CurPtr = ConsumeChar(CurPtr, SizeTmp, Result);
3632 Char = getCharAndSize(CurPtr, SizeTmp);
3633 if (Char ==
'%' && getCharAndSize(CurPtr+SizeTmp, SizeTmp2) ==
':') {
3634 Kind = tok::hashhash;
3635 CurPtr = ConsumeChar(ConsumeChar(CurPtr, SizeTmp, Result),
3637 }
else if (Char ==
'@' && LangOpts.MicrosoftExt) {
3638 CurPtr = ConsumeChar(CurPtr, SizeTmp, Result);
3640 Diag(BufferPtr, diag::ext_charize_microsoft);
3647 if (TokAtPhysicalStartOfLine && !
LexingRawMode && !Is_PragmaLexer)
3648 goto HandleDirective;
3653 Kind = tok::percent;
3657 Char = getCharAndSize(CurPtr, SizeTmp);
3659 return LexAngledStringLiteral(Result, CurPtr);
3660 }
else if (Char ==
'<') {
3661 char After = getCharAndSize(CurPtr+SizeTmp, SizeTmp2);
3663 Kind = tok::lesslessequal;
3664 CurPtr = ConsumeChar(ConsumeChar(CurPtr, SizeTmp, Result),
3666 }
else if (After ==
'<' && IsStartOfConflictMarker(CurPtr-1)) {
3670 }
else if (After ==
'<' && HandleEndOfConflictMarker(CurPtr-1)) {
3674 }
else if (LangOpts.CUDA && After ==
'<') {
3675 Kind = tok::lesslessless;
3676 CurPtr = ConsumeChar(ConsumeChar(CurPtr, SizeTmp, Result),
3679 CurPtr = ConsumeChar(CurPtr, SizeTmp, Result);
3680 Kind = tok::lessless;
3682 }
else if (Char ==
'=') {
3683 char After = getCharAndSize(CurPtr+SizeTmp, SizeTmp2);
3687 Diag(BufferPtr, diag::warn_cxx17_compat_spaceship);
3688 CurPtr = ConsumeChar(ConsumeChar(CurPtr, SizeTmp, Result),
3690 Kind = tok::spaceship;
3696 Diag(BufferPtr, diag::warn_cxx2a_compat_spaceship)
3701 CurPtr = ConsumeChar(CurPtr, SizeTmp, Result);
3702 Kind = tok::lessequal;
3703 }
else if (LangOpts.Digraphs && Char ==
':') {
3704 if (LangOpts.CPlusPlus11 &&
3705 getCharAndSize(CurPtr + SizeTmp, SizeTmp2) ==
':') {
3712 char After = getCharAndSize(CurPtr + SizeTmp + SizeTmp2, SizeTmp3);
3713 if (After !=
':' && After !=
'>') {
3716 Diag(BufferPtr, diag::warn_cxx98_compat_less_colon_colon);
3721 CurPtr = ConsumeChar(CurPtr, SizeTmp, Result);
3722 Kind = tok::l_square;
3723 }
else if (LangOpts.Digraphs && Char ==
'%') {
3724 CurPtr = ConsumeChar(CurPtr, SizeTmp, Result);
3725 Kind = tok::l_brace;
3726 }
else if (Char ==
'#' && SizeTmp == 1 &&
3727 lexEditorPlaceholder(Result, CurPtr)) {
3734 Char = getCharAndSize(CurPtr, SizeTmp);
3736 CurPtr = ConsumeChar(CurPtr, SizeTmp, Result);
3737 Kind = tok::greaterequal;
3738 }
else if (Char ==
'>') {
3739 char After = getCharAndSize(CurPtr+SizeTmp, SizeTmp2);
3741 CurPtr = ConsumeChar(ConsumeChar(CurPtr, SizeTmp, Result),
3743 Kind = tok::greatergreaterequal;
3744 }
else if (After ==
'>' && IsStartOfConflictMarker(CurPtr-1)) {
3748 }
else if (After ==
'>' && HandleEndOfConflictMarker(CurPtr-1)) {
3751 }
else if (LangOpts.CUDA && After ==
'>') {
3752 Kind = tok::greatergreatergreater;
3753 CurPtr = ConsumeChar(ConsumeChar(CurPtr, SizeTmp, Result),
3756 CurPtr = ConsumeChar(CurPtr, SizeTmp, Result);
3757 Kind = tok::greatergreater;
3760 Kind = tok::greater;
3764 Char = getCharAndSize(CurPtr, SizeTmp);
3766 CurPtr = ConsumeChar(CurPtr, SizeTmp, Result);
3767 Kind = tok::caretequal;
3768 }
else if (LangOpts.OpenCL && Char ==
'^') {
3769 CurPtr = ConsumeChar(CurPtr, SizeTmp, Result);
3770 Kind = tok::caretcaret;
3776 Char = getCharAndSize(CurPtr, SizeTmp);
3778 Kind = tok::pipeequal;
3779 CurPtr = ConsumeChar(CurPtr, SizeTmp, Result);
3780 }
else if (Char ==
'|') {
3782 if (CurPtr[1] ==
'|' && HandleEndOfConflictMarker(CurPtr-1))
3784 Kind = tok::pipepipe;
3785 CurPtr = ConsumeChar(CurPtr, SizeTmp, Result);
3791 Char = getCharAndSize(CurPtr, SizeTmp);
3792 if (LangOpts.Digraphs && Char ==
'>') {
3793 Kind = tok::r_square;
3794 CurPtr = ConsumeChar(CurPtr, SizeTmp, Result);
3795 }
else if ((LangOpts.CPlusPlus ||
3796 LangOpts.DoubleSquareBracketAttributes) &&
3798 Kind = tok::coloncolon;
3799 CurPtr = ConsumeChar(CurPtr, SizeTmp, Result);
3808 Char = getCharAndSize(CurPtr, SizeTmp);
3811 if (CurPtr[1] ==
'=' && HandleEndOfConflictMarker(CurPtr-1))
3814 Kind = tok::equalequal;
3815 CurPtr = ConsumeChar(CurPtr, SizeTmp, Result);
3824 Char = getCharAndSize(CurPtr, SizeTmp);
3826 Kind = tok::hashhash;
3827 CurPtr = ConsumeChar(CurPtr, SizeTmp, Result);
3828 }
else if (Char ==
'@' && LangOpts.MicrosoftExt) {
3831 Diag(BufferPtr, diag::ext_charize_microsoft);
3832 CurPtr = ConsumeChar(CurPtr, SizeTmp, Result);
3838 if (TokAtPhysicalStartOfLine && !
LexingRawMode && !Is_PragmaLexer)
3839 goto HandleDirective;
3847 if (CurPtr[-1] ==
'@' && LangOpts.ObjC)
3850 Kind = tok::unknown;
3855 if (!LangOpts.AsmPreprocessor) {
3856 if (uint32_t CodePoint = tryReadUCN(CurPtr, BufferPtr, &Result)) {
3857 if (CheckUnicodeWhitespace(Result, CodePoint, CurPtr)) {
3858 if (SkipWhitespace(Result, CurPtr, TokAtPhysicalStartOfLine))
3866 return LexUnicode(Result, CodePoint, CurPtr);
3870 Kind = tok::unknown;
3875 Kind = tok::unknown;
3879 llvm::UTF32 CodePoint;
3884 llvm::ConversionResult Status =
3885 llvm::convertUTF8Sequence((
const llvm::UTF8 **)&CurPtr,
3886 (
const llvm::UTF8 *)BufferEnd,
3888 llvm::strictConversion);
3889 if (Status == llvm::conversionOK) {
3890 if (CheckUnicodeWhitespace(Result, CodePoint, CurPtr)) {
3891 if (SkipWhitespace(Result, CurPtr, TokAtPhysicalStartOfLine))
3898 return LexUnicode(Result, CodePoint, CurPtr);
3904 Kind = tok::unknown;
3911 Diag(CurPtr, diag::err_invalid_utf8);
3913 BufferPtr = CurPtr+1;
3925 FormTokenWithChars(Result, CurPtr, Kind);
3931 FormTokenWithChars(Result, CurPtr, tok::hash);
3936 assert(Result.
is(
tok::eof) &&
"Preprocessor did not set tok:eof");
SourceLocation getLocForStartOfFile(FileID FID) const
Return the source location corresponding to the first byte of the specified file. ...
Describes the bounds (start, size) of the preamble and a flag required by PreprocessorOptions::Precom...
static unsigned getSpelling(const Token &Tok, const char *&Buffer, const SourceManager &SourceMgr, const LangOptions &LangOpts, bool *Invalid=nullptr)
getSpelling - This method is used to get the spelling of a token into a preallocated buffer...
void setCodeCompletionTokenRange(const SourceLocation Start, const SourceLocation End)
Set the code completion token range for detecting replacement range later on.
Lexer - This provides a simple interface that turns a text buffer into a stream of tokens...
SourceLocation getLocWithOffset(int Offset) const
Return a source location with the specified offset from this SourceLocation.
This is a discriminated union of FileInfo and ExpansionInfo.
unsigned getRawEncoding() const
When a SourceLocation itself cannot be used, this returns an (opaque) 32-bit integer encoding for it...
SourceLocation getSpellingLoc() const
void setFlagValue(TokenFlags Flag, bool Val)
Set a flag to either true or false.
static const llvm::sys::UnicodeCharRange C11AllowedIDCharRanges[]
void setBegin(SourceLocation b)
static __inline__ int __ATTRS_o_ai vec_any_eq(vector signed char __a, vector signed char __b)
bool is(tok::TokenKind K) const
is/isNot - Predicates to check if this token is a specific kind, as in "if (Tok.is(tok::l_brace)) {...
Defines the SourceManager interface.
LLVM_READNONE bool isASCII(char c)
Returns true if this is an ASCII character.
static bool isAllowedIDChar(uint32_t C, const LangOptions &LangOpts)
bool isInPrimaryFile() const
Return true if we're in the top-level file, not in a #include.
const char * getCharacterData(SourceLocation SL, bool *Invalid=nullptr) const
Return a pointer to the start of the specified location in the appropriate spelling MemoryBuffer...
Each ExpansionInfo encodes the expansion location - where the token was ultimately expanded...
DiagnosticBuilder Report(SourceLocation Loc, unsigned DiagID)
Issue the message to the client.
void setFlag(TokenFlags Flag)
Set the specified flag.
static char getCharAndSizeNoWarn(const char *Ptr, unsigned &Size, const LangOptions &LangOpts)
getCharAndSizeNoWarn - Like the getCharAndSize method, but does not ever emit a warning.
void setCodeCompletionIdentifierInfo(IdentifierInfo *Filter)
Set the code completion token for filtering purposes.
StringRef getBufferData(FileID FID, bool *Invalid=nullptr) const
Return a StringRef to the source buffer data for the specified FileID.
bool hadModuleLoaderFatalFailure() const
static bool isAtStartOfMacroExpansion(SourceLocation loc, const SourceManager &SM, const LangOptions &LangOpts, SourceLocation *MacroBegin=nullptr)
Returns true if the given MacroID location points at the first token of the macro expansion...
LLVM_READONLY bool isHorizontalWhitespace(unsigned char c)
Returns true if this character is horizontal ASCII whitespace: ' ', '\t', '\f', '\v'.
bool isStringLiteral(TokenKind K)
Return true if this is a C or C++ string-literal (or C++11 user-defined-string-literal) token...
ConflictMarkerKind
ConflictMarkerKind - Kinds of conflict marker which the lexer might be recovering from...
static LLVM_ATTRIBUTE_NOINLINE SourceLocation GetMappedTokenLoc(Preprocessor &PP, SourceLocation FileLoc, unsigned CharNo, unsigned TokLen)
GetMappedTokenLoc - If lexing out of a 'mapped buffer', where we pretend the lexer buffer was all exp...
Like System, but searched after the system directories.
SourceLocation getCodeCompletionFileLoc() const
Returns the start location of the file of code-completion point.
static Lexer * Create_PragmaLexer(SourceLocation SpellingLoc, SourceLocation ExpansionLocStart, SourceLocation ExpansionLocEnd, unsigned TokLen, Preprocessor &PP)
Create_PragmaLexer: Lexer constructor - Create a new lexer object for _Pragma expansion.
Defines the MultipleIncludeOpt interface.
bool isAnnotation() const
Return true if this is any of tok::annot_* kind tokens.
tok::TokenKind getKind() const
bool isLiteral() const
Return true if this is a "literal", like a numeric constant, string, etc.
Lexer(FileID FID, const llvm::MemoryBuffer *InputFile, Preprocessor &PP)
Lexer constructor - Create a new lexer object for the specified buffer with the specified preprocesso...
One of these records is kept for each identifier that is lexed.
static StringRef getIndentationForLine(SourceLocation Loc, const SourceManager &SM)
Returns the leading whitespace for line that corresponds to the given location Loc.
SourceLocation getBegin() const
bool ParsingPreprocessorDirective
True when parsing #XXX; turns '\n' into a tok::eod token.
void setRawIdentifierData(const char *Ptr)
static SourceLocation getFromRawEncoding(unsigned Encoding)
Turn a raw encoding of a SourceLocation object into a real SourceLocation.
SmallVector< PPConditionalInfo, 4 > ConditionalStack
Information about the set of #if/#ifdef/#ifndef blocks we are currently in.
Token - This structure provides full information about a lexed token.
void setKind(tok::TokenKind K)
Keeps track of the various options that can be enabled, which controls the dialect of C or C++ that i...
bool isPragmaLexer() const
isPragmaLexer - Returns true if this Lexer is being used to lex a pragma.
void resetExtendedTokenMode()
Sets the extended token mode back to its initial value, according to the language options and preproc...
A Perforce-style conflict marker, initiated by 4 ">"s, separated by 4 "="s, and terminated by 4 "<"s...
bool isObjCAtKeyword(tok::ObjCKeywordKind objcKey) const
Return true if we have an ObjC keyword identifier.
SourceLocation getSourceLocation() override
getSourceLocation - Return a source location for the next character in the current file...
bool isAtEndOfImmediateMacroExpansion(SourceLocation Loc, SourceLocation *MacroEnd=nullptr) const
Returns true if the given MacroID location points at the character end of the immediate macro expansi...
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_set1_epi8(char __b)
Initializes all values in a 128-bit vector of [16 x i8] with the specified 8-bit value.
static SourceLocation getBeginningOfFileToken(SourceLocation Loc, const SourceManager &SM, const LangOptions &LangOpts)
static bool isNewLineEscaped(const char *BufferStart, const char *Str)
Checks whether new line pointed by Str is preceded by escape sequence.
LLVM_READONLY bool isWhitespace(unsigned char c)
Return true if this character is horizontal or vertical ASCII whitespace: ' ', '\t', '\f', '\v', '\n', '\r'.
StringRef getSpelling(SourceLocation loc, SmallVectorImpl< char > &buffer, bool *invalid=nullptr) const
Return the 'spelling' of the token at the given location; does not go up to the spelling location or ...
Forward-declares and imports various common LLVM datatypes that clang wants to use unqualified...
SourceLocation getExpansionLoc(SourceLocation Loc) const
Given a SourceLocation object Loc, return the expansion location referenced by the ID...
static bool getRawToken(SourceLocation Loc, Token &Result, const SourceManager &SM, const LangOptions &LangOpts, bool IgnoreWhiteSpace=false)
Relex the token at the specified location.
void HandleDirective(Token &Result)
Callback invoked when the lexer sees a # token at the start of a line.
Concrete class used by the front-end to report problems and issues.
Defines the Diagnostic-related interfaces.
SourceLocation getSpellingLoc(SourceLocation Loc) const
Given a SourceLocation object, return the spelling location referenced by the ID. ...
const FileID FID
The SourceManager FileID corresponding to the file being lexed.
LLVM_READONLY bool isRawStringDelimBody(unsigned char c)
Return true if this is the body character of a C++ raw string delimiter.
static bool isEndOfBlockCommentWithEscapedNewLine(const char *CurPtr, Lexer *L)
isBlockCommentEndOfEscapedNewLine - Return true if the specified newline character (either \n or \r) ...
bool isMacroArgExpansion() const
bool HandleComment(Token &result, SourceRange Comment)
static bool isValidUDSuffix(const LangOptions &LangOpts, StringRef Suffix)
Determine whether a suffix is a valid ud-suffix.
static CharSourceRange makeCharRange(Lexer &L, const char *Begin, const char *End)
A little helper class used to produce diagnostics.
bool ParsingFilename
True after #include; turns <xx> or "xxx" into a tok::header_name token.
const LangOptions & getLangOpts() const
getLangOpts - Return the language features currently enabled.
static const llvm::sys::UnicodeCharRange C11DisallowedInitialIDCharRanges[]
bool isInFileID(SourceLocation Loc, FileID FID, unsigned *RelativeOffset=nullptr) const
Given a specific FileID, returns true if Loc is inside that FileID chunk and sets relative offset (of...
LLVM_READONLY bool isIdentifierHead(unsigned char c, bool AllowDollar=false)
Returns true if this is a valid first character of a C identifier, which is [a-zA-Z_].
static StringRef getSourceText(CharSourceRange Range, const SourceManager &SM, const LangOptions &LangOpts, bool *Invalid=nullptr)
Returns a string for the source that the range encompasses.
IdentifierInfo * LookUpIdentifierInfo(Token &Identifier) const
Given a tok::raw_identifier token, look up the identifier information for the token and install it in...
static bool isAtEndOfMacroExpansion(SourceLocation loc, const SourceManager &SM, const LangOptions &LangOpts, SourceLocation *MacroEnd=nullptr)
Returns true if the given MacroID location points at the last token of the macro expansion.
bool isCodeCompletionEnabled() const
Determine if we are performing code completion.
SourceLocation getImmediateSpellingLoc(SourceLocation Loc) const
Given a SourceLocation object, return the spelling location referenced by the ID. ...
Defines the clang::LangOptions interface.
bool LexingRawMode
True if in raw mode.
static SourceLocation getLocForEndOfToken(SourceLocation Loc, unsigned Offset, const SourceManager &SM, const LangOptions &LangOpts)
Computes the source location just past the end of the token at this source location.
Represents a character-granular source range.
bool isKeepWhitespaceMode() const
isKeepWhitespaceMode - Return true if the lexer should return tokens for every character in the file...
static PreambleBounds ComputePreamble(StringRef Buffer, const LangOptions &LangOpts, unsigned MaxLines=0)
Compute the preamble of the given file.
static unsigned MeasureTokenLength(SourceLocation Loc, const SourceManager &SM, const LangOptions &LangOpts)
MeasureTokenLength - Relex the token at the specified location and return its length in bytes in the ...
const FileEntry * getFileEntryForID(FileID FID) const
Returns the FileEntry record for the provided FileID.
const AnnotatedLine * Line
static SourceLocation findLocationAfterToken(SourceLocation loc, tok::TokenKind TKind, const SourceManager &SM, const LangOptions &LangOpts, bool SkipTrailingWhitespaceAndNewLine)
Checks that the given token is the first token that occurs after the given location (this excludes co...
bool hasLeadingEmptyMacro() const
Return true if this token has an empty macro before it.
SourceLocation getSourceLocation(const char *Loc, unsigned TokLen=1) const
getSourceLocation - Return a source location identifier for the specified offset in the current file...
SourceLocation getLocation() const
Return a source location identifier for the specified offset in the current file. ...
Defines the clang::Preprocessor interface.
tok::ObjCKeywordKind getObjCKeywordID() const
Return the Objective-C keyword ID for the this identifier.
static unsigned getTokenPrefixLength(SourceLocation TokStart, unsigned CharNo, const SourceManager &SM, const LangOptions &LangOpts)
Get the physical length (including trigraphs and escaped newlines) of the first Characters characters...
MultipleIncludeOpt MIOpt
A state machine that detects the #ifndef-wrapping a file idiom for the multiple-include optimization...
void setEnd(SourceLocation e)
bool getCommentRetentionState() const
Defines the clang::IdentifierInfo, clang::IdentifierTable, and clang::Selector interfaces.
CharSourceRange getImmediateExpansionRange(SourceLocation Loc) const
Return the start/end of the expansion information for an expansion location.
static const char * findBeginningOfLine(StringRef Buffer, unsigned Offset)
Returns the pointer that points to the beginning of line that contains the given offset, or null if the offset if invalid.
bool HandleEndOfFile(Token &Result, bool isEndOfMacro=false)
Callback invoked when the lexer hits the end of the current file.
The result type of a method or function.
float __ovld __cnfn length(float p)
Return the length of vector p, i.e., sqrt(p.x2 + p.y 2 + ...)
ObjCKeywordKind
Provides a namespace for Objective-C keywords which start with an '@'.
const ExpansionInfo & getExpansion() const
bool isRecordingPreamble() const
static CharSourceRange getCharRange(SourceRange R)
SourceManager & getSourceManager() const
bool isAtStartOfImmediateMacroExpansion(SourceLocation Loc, SourceLocation *MacroBegin=nullptr) const
Returns true if the given MacroID location points at the beginning of the immediate macro expansion...
__INTPTR_TYPE__ intptr_t
A signed integer type with the property that any valid pointer to void can be converted to this type...
Encodes a location in the source.
IdentifierInfo & get(StringRef Name)
Return the identifier token info for the specified named identifier.
static void maybeDiagnoseUTF8Homoglyph(DiagnosticsEngine &Diags, uint32_t C, CharSourceRange Range)
After encountering UTF-8 character C and interpreting it as an identifier character, check whether it's a homoglyph for a common non-identifier source character that is unlikely to be an intentional identifier character and warn if so.
SourceLocation createExpansionLoc(SourceLocation Loc, SourceLocation ExpansionLocStart, SourceLocation ExpansionLocEnd, unsigned TokLength, bool ExpansionIsTokenRange=true, int LoadedID=0, unsigned LoadedOffset=0)
Return a new SourceLocation that encodes the fact that a token from SpellingLoc should actually be re...
IdentifierInfo * getIdentifierInfo() const
IdentifierTable & getIdentifierTable()
static Optional< Token > findNextToken(SourceLocation Loc, const SourceManager &SM, const LangOptions &LangOpts)
Finds the token that comes right after the given location.
static void maybeDiagnoseIDCharCompat(DiagnosticsEngine &Diags, uint32_t C, CharSourceRange Range, bool IsFirst)
void setIdentifierInfo(IdentifierInfo *II)
static const llvm::sys::UnicodeCharRange C99DisallowedInitialIDCharRanges[]
bool isAtStartOfLine() const
isAtStartOfLine - Return true if this token is at the start of a line.
static SourceLocation GetBeginningOfToken(SourceLocation Loc, const SourceManager &SM, const LangOptions &LangOpts)
Given a location any where in a source buffer, find the location that corresponds to the beginning of...
static CharSourceRange makeRangeFromFileLocs(CharSourceRange Range, const SourceManager &SM, const LangOptions &LangOpts)
TokenKind
Provides a simple uniform namespace for tokens from all C languages.
bool operator<(DeclarationName LHS, DeclarationName RHS)
Ordering on two declaration names.
tok::ObjCKeywordKind getObjCKeywordID() const
Return the ObjC keyword kind.
SourceLocation getCodeCompletionLoc() const
Returns the location of the code-completion point.
SourceLocation getExpansionLocStart() const
DiagnosticBuilder Diag(const char *Loc, unsigned DiagID) const
Diag - Forwarding function for diagnostics.
__DEVICE__ void * memcpy(void *__a, const void *__b, size_t __c)
static const llvm::sys::UnicodeCharRange C99AllowedIDCharRanges[]
static StringRef getImmediateMacroName(SourceLocation Loc, const SourceManager &SM, const LangOptions &LangOpts)
Retrieve the name of the immediate macro expansion.
static StringRef getImmediateMacroNameForDiagnostics(SourceLocation Loc, const SourceManager &SM, const LangOptions &LangOpts)
Retrieve the name of the immediate macro expansion.
bool inKeepCommentMode() const
inKeepCommentMode - Return true if the lexer should return comments as tokens.
bool isTokenRange() const
Return true if the end of this range specifies the start of the last token.
static const llvm::sys::UnicodeCharRange UnicodeWhitespaceCharRanges[]
An opaque identifier used by SourceManager which refers to a source file (MemoryBuffer) along with it...
const llvm::MemoryBuffer * getBuffer(FileID FID, SourceLocation Loc, bool *Invalid=nullptr) const
Return the buffer for the specified FileID.
Dataflow Directional Tag Classes.
bool isValid() const
Return true if this is a valid SourceLocation object.
LLVM_READONLY bool isVerticalWhitespace(unsigned char c)
Returns true if this character is vertical ASCII whitespace: '\n', '\r'.
static CharSourceRange makeFileCharRange(CharSourceRange Range, const SourceManager &SM, const LangOptions &LangOpts)
Accepts a range and returns a character range with file locations.
static size_t getSpellingSlow(const Token &Tok, const char *BufPtr, const LangOptions &LangOpts, char *Spelling)
Slow case of getSpelling.
static FixItHint CreateRemoval(CharSourceRange RemoveRange)
Create a code modification hint that removes the given source range.
bool isHandleIdentifierCase() const
Return true if the Preprocessor::HandleIdentifier must be called on a token of this identifier...
bool isLexingRawMode() const
Return true if this lexer is in raw mode or not.
LLVM_READONLY bool isIdentifierBody(unsigned char c, bool AllowDollar=false)
Returns true if this is a body character of a C identifier, which is [a-zA-Z0-9_].
void CodeCompleteNaturalLanguage()
Hook used by the lexer to invoke the "natural language" code completion point.
unsigned getLength() const
static const char * findPlaceholderEnd(const char *CurPtr, const char *BufferEnd)
void setLiteralData(const char *Ptr)
const char * getLiteralData() const
getLiteralData - For a literal token (numeric constant, string, etc), this returns a pointer to the s...
FileID getFileID(SourceLocation SpellingLoc) const
Return the FileID for a SourceLocation.
static const llvm::sys::UnicodeCharRange CXX03AllowedIDCharRanges[]
bool isMacroArgExpansion(SourceLocation Loc, SourceLocation *StartLoc=nullptr) const
Tests whether the given source location represents a macro argument's expansion into the function-lik...
bool HandleIdentifier(Token &Identifier)
Callback invoked when the lexer reads an identifier and has filled in the tokens IdentifierInfo membe...
void CreateString(StringRef Str, Token &Tok, SourceLocation ExpansionLocStart=SourceLocation(), SourceLocation ExpansionLocEnd=SourceLocation())
Plop the specified string into a scratch buffer and set the specified token's location and length to ...
static bool isAllowedInitiallyIDChar(uint32_t C, const LangOptions &LangOpts)
SourceLocation getEnd() const
static FixItHint CreateInsertion(SourceLocation InsertionLoc, StringRef Code, bool BeforePreviousInsertions=false)
Create a code modification hint that inserts the given code string at a specific location.
PreprocessorOptions & getPreprocessorOpts() const
Retrieve the preprocessor options used to initialize this preprocessor.
Defines the clang::TokenKind enum and support functions.
const SrcMgr::SLocEntry & getSLocEntry(FileID FID, bool *Invalid=nullptr) const
static char GetTrigraphCharForLetter(char Letter)
GetTrigraphCharForLetter - Given a character that occurs after a ?? pair, return the decoded trigraph...
static bool isIdentifierBodyChar(char c, const LangOptions &LangOpts)
Returns true if the given character could appear in an identifier.
void ReadToEndOfLine(SmallVectorImpl< char > *Result=nullptr)
ReadToEndOfLine - Read the rest of the current preprocessor line as an uninterpreted string...
__PTRDIFF_TYPE__ ptrdiff_t
A signed integer type that is the result of subtracting two pointers.
Defines the clang::SourceLocation class and associated facilities.
DiagnosticsEngine & getDiagnostics() const
StringRef getRawIdentifier() const
getRawIdentifier - For a raw identifier token (i.e., an identifier lexed in raw mode), returns a reference to the text substring in the buffer if known.
Not within a conflict marker.
static char DecodeTrigraphChar(const char *CP, Lexer *L)
DecodeTrigraphChar - If the specified character is a legal trigraph when prefixed with ...
static const char * FindConflictEnd(const char *CurPtr, const char *BufferEnd, ConflictMarkerKind CMK)
Find the end of a version control conflict marker.
static void StringifyImpl(T &Str, char Quote)
static FixItHint CreateReplacement(CharSourceRange RemoveRange, StringRef Code)
Create a code modification hint that replaces the given source range with the given code string...
void SetCommentRetentionState(bool Mode)
SetCommentRetentionMode - Change the comment retention mode of the lexer to the specified mode...
bool needsCleaning() const
Return true if this token has trigraphs or escaped newlines in it.
static __inline__ int __DEFAULT_FN_ATTRS _mm_movemask_epi8(__m128i __a)
Copies the values of the most significant bits from each 8-bit element in a 128-bit integer vector of...
bool isIgnored(unsigned DiagID, SourceLocation Loc) const
Determine whether the diagnostic is known to be ignored.
void CodeCompleteIncludedFile(llvm::StringRef Dir, bool IsAngled)
Hook used by the lexer to invoke the "included file" code completion point.
A normal or diff3 conflict marker, initiated by at least 7 "<"s, separated by at least 7 "="s or "|"s...
A trivial tuple used to represent a source range.
void clearFlag(TokenFlags Flag)
Unset the specified flag.
bool hasUCN() const
Returns true if this token contains a universal character name.
bool isPreprocessedOutput() const
Returns true if the preprocessor is responsible for generating output, false if it is producing token...
void SetKeepWhitespaceMode(bool Val)
SetKeepWhitespaceMode - This method lets clients enable or disable whitespace retention mode...
LLVM_READONLY bool isPreprocessingNumberBody(unsigned char c)
Return true if this is the body character of a C preprocessing number, which is [a-zA-Z0-9_.
bool hasLeadingSpace() const
Return true if this token has whitespace before it.
DiagnosticBuilder Diag(SourceLocation Loc, unsigned DiagID) const
Forwarding function for diagnostics.
void setRecordedPreambleConditionalStack(ArrayRef< PPConditionalInfo > s)
This class handles loading and caching of source files into memory.
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_cmpeq_epi8(__m128i __a, __m128i __b)
Compares each of the corresponding 8-bit values of the 128-bit integer vectors for equality...
void startToken()
Reset all flags to cleared.
std::pair< FileID, unsigned > getDecomposedLoc(SourceLocation Loc) const
Decompose the specified location into a raw FileID + Offset pair.
static std::string Stringify(StringRef Str, bool Charify=false)
Stringify - Convert the specified string into a C string by i) escaping '\' and " characters and ii) ...
Engages in a tight little dance with the lexer to efficiently preprocess tokens.
bool LexEditorPlaceholders
When enabled, the preprocessor will construct editor placeholder tokens.