23 #include "llvm/ADT/StringExtras.h"
24 #include "llvm/ADT/StringSwitch.h"
25 #include "llvm/Support/Compiler.h"
26 #include "llvm/Support/ConvertUTF.h"
27 #include "llvm/Support/MathExtras.h"
28 #include "llvm/Support/MemoryBuffer.h"
29 #include "llvm/Support/UnicodeCharRanges.h"
39 using namespace clang;
50 return II->getObjCKeywordID() == objcKey;
57 return tok::objc_not_keyword;
66 void Lexer::anchor() { }
68 void Lexer::InitLexer(
const char *BufStart,
const char *BufPtr,
70 BufferStart = BufStart;
74 assert(BufEnd[0] == 0 &&
75 "We assume that the input buffer has a null character at the end"
76 " to simplify lexing!");
81 if (BufferStart == BufferPtr) {
83 StringRef Buf(BufferStart, BufferEnd - BufferStart);
84 size_t BOMLength = llvm::StringSwitch<size_t>(Buf)
85 .StartsWith(
"\xEF\xBB\xBF", 3)
89 BufferPtr += BOMLength;
92 Is_PragmaLexer =
false;
93 CurrentConflictMarkerState =
CMK_None;
96 IsAtStartOfLine =
true;
97 IsAtPhysicalStartOfLine =
true;
99 HasLeadingSpace =
false;
100 HasLeadingEmptyMacro =
false;
115 ExtendedTokenMode = 0;
124 FileLoc(PP.getSourceManager().getLocForStartOfFile(FID)),
125 LangOpts(PP.getLangOpts()) {
127 InitLexer(InputFile->getBufferStart(), InputFile->getBufferStart(),
128 InputFile->getBufferEnd());
134 assert(
PP &&
"Cannot reset token mode without a preprocessor");
135 if (LangOpts.TraditionalCPP)
145 const char *BufStart,
const char *BufPtr,
const char *BufEnd)
146 : FileLoc(fileloc), LangOpts(langOpts) {
148 InitLexer(BufStart, BufPtr, BufEnd);
157 Lexer::Lexer(
FileID FID,
const llvm::MemoryBuffer *FromFile,
159 :
Lexer(SM.getLocForStartOfFile(FID), langOpts, FromFile->getBufferStart(),
160 FromFile->getBufferStart(), FromFile->getBufferEnd()) {}
185 const llvm::MemoryBuffer *InputFile = SM.
getBuffer(SpellingFID);
186 Lexer *L =
new Lexer(SpellingFID, InputFile, PP);
193 L->BufferPtr = StrData;
194 L->BufferEnd = StrData+TokLen;
195 assert(L->BufferEnd[0] == 0 &&
"Buffer is not nul terminated!");
201 ExpansionLocEnd, TokLen);
208 L->Is_PragmaLexer =
true;
216 char Quote = Charify ?
'\'' :
'"';
217 for (
unsigned i = 0, e = Result.size(); i != e; ++i) {
218 if (Result[i] ==
'\\' || Result[i] == Quote) {
219 Result.insert(Result.begin()+i,
'\\');
229 for (
unsigned i = 0, e = Str.size(); i != e; ++i) {
230 if (Str[i] ==
'\\' || Str[i] ==
'"') {
231 Str.insert(Str.begin()+i,
'\\');
245 assert(Tok.
needsCleaning() &&
"getSpellingSlow called on simple token");
248 const char *BufEnd = BufPtr + Tok.
getLength();
252 while (BufPtr < BufEnd) {
257 if (Spelling[Length - 1] ==
'"')
265 Spelling[Length - 2] ==
'R' && Spelling[Length - 1] ==
'"') {
268 const char *RawEnd = BufEnd;
269 do --RawEnd;
while (*RawEnd !=
'"');
270 size_t RawLength = RawEnd - BufPtr + 1;
273 memcpy(Spelling + Length, BufPtr, RawLength);
281 while (BufPtr < BufEnd) {
288 "NeedsCleaning flag set on token that didn't need cleaning!");
306 bool invalidTemp =
false;
307 StringRef file = SM.
getBufferData(locInfo.first, &invalidTemp);
309 if (invalid) *invalid =
true;
313 const char *tokenBegin = file.data() + locInfo.second;
317 file.begin(), tokenBegin, file.end());
319 lexer.LexFromRawLexer(token);
321 unsigned length = token.getLength();
324 if (!token.needsCleaning())
325 return StringRef(tokenBegin, length);
328 buffer.resize(length);
329 buffer.resize(
getSpellingSlow(token, tokenBegin, options, buffer.data()));
330 return StringRef(buffer.data(), buffer.size());
340 assert((
int)Tok.
getLength() >= 0 &&
"Token character range is bogus!");
342 bool CharDataInvalid =
false;
346 *Invalid = CharDataInvalid;
348 return std::string();
352 return std::string(TokStart, TokStart + Tok.
getLength());
356 Result.resize(
getSpellingSlow(Tok, TokStart, LangOpts, &*Result.begin()));
373 assert((
int)Tok.
getLength() >= 0 &&
"Token character range is bogus!");
375 const char *TokStart =
nullptr;
377 if (Tok.
is(tok::raw_identifier))
382 Buffer = II->getNameStart();
383 return II->getLength();
393 bool CharDataInvalid =
false;
396 *Invalid = CharDataInvalid;
397 if (CharDataInvalid) {
410 return getSpellingSlow(Tok, TokStart, LangOpts, const_cast<char*>(Buffer));
431 bool IgnoreWhiteSpace) {
442 bool Invalid =
false;
447 const char *StrData = Buffer.data()+LocInfo.second;
454 Buffer.begin(), StrData, Buffer.end());
456 TheLexer.LexFromRawLexer(Result);
463 const char *BufStart = Buffer.data();
464 if (Offset >= Buffer.size())
466 const char *StrData = BufStart +
Offset;
468 if (StrData[0] ==
'\n' || StrData[0] ==
'\r')
471 const char *LexStart = StrData;
472 while (LexStart != BufStart) {
473 if (LexStart[0] ==
'\n' || LexStart[0] ==
'\r') {
488 if (LocInfo.first.isInvalid())
491 bool Invalid =
false;
498 const char *StrData = Buffer.data() + LocInfo.second;
500 if (!LexStart || LexStart == StrData)
505 Lexer TheLexer(LexerStartLoc, LangOpts, Buffer.data(), LexStart,
512 TheLexer.LexFromRawLexer(TheTok);
514 if (TheLexer.getBufferLocation() > StrData) {
518 if (TheLexer.getBufferLocation() - TheTok.
getLength() <= StrData)
543 std::pair<FileID, unsigned> BeginFileLocInfo
545 assert(FileLocInfo.first == BeginFileLocInfo.first &&
546 FileLocInfo.second >= BeginFileLocInfo.second);
565 const unsigned StartOffset = 1;
567 Lexer TheLexer(FileLoc, LangOpts, Buffer.begin(), Buffer.begin(),
574 bool InPreprocessorDirective =
false;
578 unsigned MaxLineOffset = 0;
580 const char *CurPtr = Buffer.begin();
581 unsigned CurLine = 0;
582 while (CurPtr != Buffer.end()) {
586 if (CurLine == MaxLines)
590 if (CurPtr != Buffer.end())
591 MaxLineOffset = CurPtr - Buffer.begin();
595 TheLexer.LexFromRawLexer(TheTok);
597 if (InPreprocessorDirective) {
610 InPreprocessorDirective =
false;
619 if (MaxLineOffset && TokOffset >= MaxLineOffset)
624 if (TheTok.
getKind() == tok::comment) {
632 Token HashTok = TheTok;
633 InPreprocessorDirective =
true;
639 TheLexer.LexFromRawLexer(TheTok);
643 = llvm::StringSwitch<PreambleDirectiveKind>(Keyword)
644 .Case(
"include", PDK_Skipped)
645 .Case(
"__include_macros", PDK_Skipped)
646 .Case(
"define", PDK_Skipped)
647 .Case(
"undef", PDK_Skipped)
648 .Case(
"line", PDK_Skipped)
649 .Case(
"error", PDK_Skipped)
650 .Case(
"pragma", PDK_Skipped)
651 .Case(
"import", PDK_Skipped)
652 .Case(
"include_next", PDK_Skipped)
653 .Case(
"warning", PDK_Skipped)
654 .Case(
"ident", PDK_Skipped)
655 .Case(
"sccs", PDK_Skipped)
656 .Case(
"assert", PDK_Skipped)
657 .Case(
"unassert", PDK_Skipped)
658 .Case(
"if", PDK_Skipped)
659 .Case(
"ifdef", PDK_Skipped)
660 .Case(
"ifndef", PDK_Skipped)
661 .Case(
"elif", PDK_Skipped)
662 .Case(
"else", PDK_Skipped)
663 .Case(
"endif", PDK_Skipped)
664 .Default(PDK_Unknown);
679 InPreprocessorDirective =
false;
690 if (ActiveCommentLoc.
isValid())
691 End = ActiveCommentLoc;
708 bool Invalid =
false;
712 if (Invalid || (CharNo == 0 && Lexer::isObviouslySimpleCharacter(*TokPtr)))
715 unsigned PhysOffset = 0;
720 while (Lexer::isObviouslySimpleCharacter(*TokPtr)) {
730 for (; CharNo; --CharNo) {
741 if (!Lexer::isObviouslySimpleCharacter(*TokPtr))
742 PhysOffset += Lexer::SkipEscapedNewLines(TokPtr)-TokPtr;
794 if (expansionLoc.isFileID()) {
797 *MacroBegin = expansionLoc;
825 *MacroEnd = expansionLoc;
899 bool Invalid =
false;
929 if (Invalid) *Invalid =
true;
935 if (beginInfo.first.isInvalid()) {
936 if (Invalid) *Invalid =
true;
942 beginInfo.second > EndOffs) {
943 if (Invalid) *Invalid =
true;
948 bool invalidTemp =
false;
949 StringRef file = SM.
getBufferData(beginInfo.first, &invalidTemp);
951 if (Invalid) *Invalid =
true;
955 if (Invalid) *Invalid =
false;
956 return file.substr(beginInfo.second, EndOffs - beginInfo.second);
962 assert(Loc.
isMacroID() &&
"Only reasonble to call this on macros");
1002 StringRef ExpansionBuffer = SM.
getBufferData(ExpansionInfo.first);
1003 return ExpansionBuffer.substr(ExpansionInfo.second, MacroTokenLength);
1008 assert(Loc.
isMacroID() &&
"Only reasonble to call this on macros");
1027 StringRef ExpansionBuffer = SM.
getBufferData(ExpansionInfo.first);
1028 return ExpansionBuffer.substr(ExpansionInfo.second, MacroTokenLength);
1040 if (LocInfo.first.isInvalid())
1042 bool Invalid =
false;
1049 StringRef Rest = Buffer.substr(Line - Buffer.data());
1050 size_t NumWhitespaceChars = Rest.find_first_not_of(
" \t");
1051 return NumWhitespaceChars == StringRef::npos
1053 : Rest.take_front(NumWhitespaceChars);
1068 unsigned CharNo,
unsigned TokLen) {
1069 assert(FileLoc.
isMacroID() &&
"Must be a macro expansion");
1083 std::pair<SourceLocation,SourceLocation> II =
1084 SM.getImmediateExpansionRange(FileLoc);
1086 return SM.createExpansionLoc(SpellingLoc, II.first, II.second, TokLen);
1092 unsigned TokLen)
const {
1093 assert(Loc >= BufferStart && Loc <= BufferEnd &&
1094 "Location out of range for this buffer!");
1098 unsigned CharNo = Loc-BufferStart;
1104 assert(
PP &&
"This doesn't work on raw lexers");
1123 case '=':
return '#';
1124 case ')':
return ']';
1125 case '(':
return '[';
1126 case '!':
return '|';
1127 case '\'':
return '^';
1128 case '>':
return '}';
1129 case '/':
return '\\';
1130 case '<':
return '{';
1131 case '-':
return '~';
1141 if (!Res || !L)
return Res;
1145 L->
Diag(CP-2, diag::trigraph_ignored);
1150 L->
Diag(CP-2, diag::trigraph_converted) << StringRef(&Res, 1);
1157 unsigned Lexer::getEscapedNewLineSize(
const char *Ptr) {
1162 if (Ptr[Size-1] !=
'\n' && Ptr[Size-1] !=
'\r')
1166 if ((Ptr[Size] ==
'\r' || Ptr[Size] ==
'\n') &&
1167 Ptr[Size-1] != Ptr[Size])
1180 const char *Lexer::SkipEscapedNewLines(
const char *
P) {
1182 const char *AfterEscape;
1185 }
else if (*P ==
'?') {
1187 if (P[1] !=
'?' || P[2] !=
'/')
1196 unsigned NewLineSize = Lexer::getEscapedNewLineSize(AfterEscape);
1197 if (NewLineSize == 0)
return P;
1198 P = AfterEscape+NewLineSize;
1210 bool SkipTrailingWhitespaceAndNewLine) {
1221 bool InvalidTemp =
false;
1222 StringRef File = SM.
getBufferData(LocInfo.first, &InvalidTemp);
1226 const char *TokenBegin = File.data() + LocInfo.second;
1230 TokenBegin, File.end());
1233 lexer.LexFromRawLexer(Tok);
1234 if (Tok.isNot(TKind))
1239 unsigned NumWhitespaceChars = 0;
1240 if (SkipTrailingWhitespaceAndNewLine) {
1243 unsigned char C = *TokenEnd;
1246 NumWhitespaceChars++;
1250 if (C ==
'\n' || C ==
'\r') {
1253 NumWhitespaceChars++;
1254 if ((C ==
'\n' || C ==
'\r') && C != PrevC)
1255 NumWhitespaceChars++;
1278 char Lexer::getCharAndSizeSlow(
const char *Ptr,
unsigned &Size,
1281 if (Ptr[0] ==
'\\') {
1290 if (
unsigned EscapedNewLineSize = getEscapedNewLineSize(Ptr)) {
1296 Diag(Ptr, diag::backslash_newline_space);
1299 Size += EscapedNewLineSize;
1300 Ptr += EscapedNewLineSize;
1303 return getCharAndSizeSlow(Ptr, Size, Tok);
1311 if (Ptr[0] ==
'?' && Ptr[1] ==
'?') {
1320 if (C ==
'\\')
goto Slash;
1336 char Lexer::getCharAndSizeSlowNoWarn(
const char *Ptr,
unsigned &Size,
1339 if (Ptr[0] ==
'\\') {
1347 if (
unsigned EscapedNewLineSize = getEscapedNewLineSize(Ptr)) {
1349 Size += EscapedNewLineSize;
1350 Ptr += EscapedNewLineSize;
1353 return getCharAndSizeSlowNoWarn(Ptr, Size, LangOpts);
1361 if (LangOpts.Trigraphs && Ptr[0] ==
'?' && Ptr[1] ==
'?') {
1367 if (C ==
'\\')
goto Slash;
1382 void Lexer::SkipBytes(
unsigned Bytes,
bool StartOfLine) {
1384 if (BufferPtr > BufferEnd)
1385 BufferPtr = BufferEnd;
1389 IsAtStartOfLine = StartOfLine;
1390 IsAtPhysicalStartOfLine = StartOfLine;
1394 if (LangOpts.AsmPreprocessor) {
1396 }
else if (LangOpts.CPlusPlus11 || LangOpts.C11) {
1397 static const llvm::sys::UnicodeCharSet C11AllowedIDChars(
1399 return C11AllowedIDChars.contains(C);
1400 }
else if (LangOpts.CPlusPlus) {
1401 static const llvm::sys::UnicodeCharSet CXX03AllowedIDChars(
1403 return CXX03AllowedIDChars.contains(C);
1405 static const llvm::sys::UnicodeCharSet C99AllowedIDChars(
1407 return C99AllowedIDChars.contains(C);
1413 if (LangOpts.AsmPreprocessor) {
1415 }
else if (LangOpts.CPlusPlus11 || LangOpts.C11) {
1416 static const llvm::sys::UnicodeCharSet C11DisallowedInitialIDChars(
1418 return !C11DisallowedInitialIDChars.contains(C);
1419 }
else if (LangOpts.CPlusPlus) {
1422 static const llvm::sys::UnicodeCharSet C99DisallowedInitialIDChars(
1424 return !C99DisallowedInitialIDChars.contains(C);
1439 CannotAppearInIdentifier = 0,
1440 CannotStartIdentifier
1443 static const llvm::sys::UnicodeCharSet C99AllowedIDChars(
1445 static const llvm::sys::UnicodeCharSet C99DisallowedInitialIDChars(
1447 if (!C99AllowedIDChars.contains(C)) {
1450 << CannotAppearInIdentifier;
1451 }
else if (IsFirst && C99DisallowedInitialIDChars.contains(C)) {
1454 << CannotStartIdentifier;
1460 static const llvm::sys::UnicodeCharSet CXX03AllowedIDChars(
1462 if (!CXX03AllowedIDChars.contains(C)) {
1463 Diags.
Report(Range.
getBegin(), diag::warn_cxx98_compat_unicode_id)
1469 bool Lexer::tryConsumeIdentifierUCN(
const char *&CurPtr,
unsigned Size,
1471 const char *UCNPtr = CurPtr + Size;
1472 uint32_t CodePoint = tryReadUCN(UCNPtr, CurPtr,
nullptr);
1482 if ((UCNPtr - CurPtr == 6 && CurPtr[1] ==
'u') ||
1483 (UCNPtr - CurPtr == 10 && CurPtr[1] ==
'U'))
1486 while (CurPtr != UCNPtr)
1487 (void)getAndAdvanceChar(CurPtr, Result);
1491 bool Lexer::tryConsumeIdentifierUTF8Char(
const char *&CurPtr) {
1492 const char *UnicodePtr = CurPtr;
1493 llvm::UTF32 CodePoint;
1494 llvm::ConversionResult Result =
1495 llvm::convertUTF8Sequence((
const llvm::UTF8 **)&UnicodePtr,
1496 (
const llvm::UTF8 *)BufferEnd,
1498 llvm::strictConversion);
1499 if (Result != llvm::conversionOK ||
1508 CurPtr = UnicodePtr;
1512 bool Lexer::LexIdentifier(
Token &Result,
const char *CurPtr) {
1515 unsigned char C = *CurPtr++;
1526 if (
isASCII(C) && C !=
'\\' && C !=
'?' &&
1527 (C !=
'$' || !LangOpts.DollarIdents)) {
1529 const char *IdStart = BufferPtr;
1530 FormTokenWithChars(Result, CurPtr, tok::raw_identifier);
1547 if (II->
getTokenID() == tok::identifier && isCodeCompletionPoint(CurPtr)
1551 Result.
setKind(tok::code_completion);
1560 C = getCharAndSize(CurPtr, Size);
1564 if (!LangOpts.DollarIdents)
goto FinishIdentifier;
1568 Diag(CurPtr, diag::ext_dollar_in_identifier);
1569 CurPtr = ConsumeChar(CurPtr, Size, Result);
1570 C = getCharAndSize(CurPtr, Size);
1573 }
else if (C ==
'\\' && tryConsumeIdentifierUCN(CurPtr, Size, Result)) {
1574 C = getCharAndSize(CurPtr, Size);
1576 }
else if (!
isASCII(C) && tryConsumeIdentifierUTF8Char(CurPtr)) {
1577 C = getCharAndSize(CurPtr, Size);
1580 goto FinishIdentifier;
1584 CurPtr = ConsumeChar(CurPtr, Size, Result);
1586 C = getCharAndSize(CurPtr, Size);
1588 CurPtr = ConsumeChar(CurPtr, Size, Result);
1589 C = getCharAndSize(CurPtr, Size);
1596 bool Lexer::isHexaLiteral(
const char *Start,
const LangOptions &LangOpts) {
1602 return (C2 ==
'x' || C2 ==
'X');
1608 bool Lexer::LexNumericConstant(
Token &Result,
const char *CurPtr) {
1610 char C = getCharAndSize(CurPtr, Size);
1613 CurPtr = ConsumeChar(CurPtr, Size, Result);
1615 C = getCharAndSize(CurPtr, Size);
1619 if ((C ==
'-' || C ==
'+') && (PrevCh ==
'E' || PrevCh ==
'e')) {
1622 if (!LangOpts.MicrosoftExt || !isHexaLiteral(BufferPtr, LangOpts))
1623 return LexNumericConstant(Result, ConsumeChar(CurPtr, Size, Result));
1627 if ((C ==
'-' || C ==
'+') && (PrevCh ==
'P' || PrevCh ==
'p')) {
1631 bool IsHexFloat =
true;
1632 if (!LangOpts.C99) {
1633 if (!isHexaLiteral(BufferPtr, LangOpts))
1636 std::find(BufferPtr, CurPtr,
'_') != CurPtr)
1640 return LexNumericConstant(Result, ConsumeChar(CurPtr, Size, Result));
1649 Diag(CurPtr, diag::warn_cxx11_compat_digit_separator);
1650 CurPtr = ConsumeChar(CurPtr, Size, Result);
1651 CurPtr = ConsumeChar(CurPtr, NextSize, Result);
1652 return LexNumericConstant(Result, CurPtr);
1657 if (C ==
'\\' && tryConsumeIdentifierUCN(CurPtr, Size, Result))
1658 return LexNumericConstant(Result, CurPtr);
1659 if (!
isASCII(C) && tryConsumeIdentifierUTF8Char(CurPtr))
1660 return LexNumericConstant(Result, CurPtr);
1663 const char *TokStart = BufferPtr;
1664 FormTokenWithChars(Result, CurPtr, tok::numeric_constant);
1671 const char *Lexer::LexUDSuffix(
Token &Result,
const char *CurPtr,
1672 bool IsStringLiteral) {
1677 char C = getCharAndSize(CurPtr, Size);
1678 bool Consumed =
false;
1681 if (C ==
'\\' && tryConsumeIdentifierUCN(CurPtr, Size, Result))
1683 else if (!
isASCII(C) && tryConsumeIdentifierUTF8Char(CurPtr))
1692 C ==
'_' ? diag::warn_cxx11_compat_user_defined_literal
1693 : diag::warn_cxx11_compat_reserved_user_defined_literal)
1704 bool IsUDSuffix =
false;
1711 const unsigned MaxStandardSuffixLength = 3;
1712 char Buffer[MaxStandardSuffixLength] = { C };
1713 unsigned Consumed = Size;
1721 const StringRef CompleteSuffix(Buffer, Chars);
1727 if (Chars == MaxStandardSuffixLength)
1731 Buffer[Chars++] =
Next;
1732 Consumed += NextSize;
1739 ? diag::ext_ms_reserved_user_defined_literal
1740 : diag::ext_reserved_user_defined_literal)
1745 CurPtr = ConsumeChar(CurPtr, Size, Result);
1750 C = getCharAndSize(CurPtr, Size);
1752 else if (C ==
'\\' && tryConsumeIdentifierUCN(CurPtr, Size, Result)) {}
1753 else if (!
isASCII(C) && tryConsumeIdentifierUTF8Char(CurPtr)) {}
1762 bool Lexer::LexStringLiteral(
Token &Result,
const char *CurPtr,
1765 const char *NulCharacter =
nullptr;
1768 (Kind == tok::utf8_string_literal ||
1769 Kind == tok::utf16_string_literal ||
1770 Kind == tok::utf32_string_literal))
1772 ? diag::warn_cxx98_compat_unicode_literal
1773 : diag::warn_c99_compat_unicode_literal);
1775 char C = getAndAdvanceChar(CurPtr, Result);
1780 C = getAndAdvanceChar(CurPtr, Result);
1782 if (C ==
'\n' || C ==
'\r' ||
1783 (C == 0 && CurPtr-1 == BufferEnd)) {
1785 Diag(BufferPtr, diag::ext_unterminated_char_or_string) << 1;
1786 FormTokenWithChars(Result, CurPtr-1, tok::unknown);
1791 if (isCodeCompletionPoint(CurPtr-1)) {
1793 FormTokenWithChars(Result, CurPtr-1, tok::unknown);
1798 NulCharacter = CurPtr-1;
1800 C = getAndAdvanceChar(CurPtr, Result);
1805 CurPtr = LexUDSuffix(Result, CurPtr,
true);
1809 Diag(NulCharacter, diag::null_in_char_or_string) << 1;
1812 const char *TokStart = BufferPtr;
1813 FormTokenWithChars(Result, CurPtr, Kind);
1820 bool Lexer::LexRawStringLiteral(
Token &Result,
const char *CurPtr,
1828 Diag(BufferPtr, diag::warn_cxx98_compat_raw_string_literal);
1830 unsigned PrefixLen = 0;
1836 if (CurPtr[PrefixLen] !=
'(') {
1838 const char *PrefixEnd = &CurPtr[PrefixLen];
1839 if (PrefixLen == 16) {
1840 Diag(PrefixEnd, diag::err_raw_delim_too_long);
1842 Diag(PrefixEnd, diag::err_invalid_char_raw_delim)
1843 << StringRef(PrefixEnd, 1);
1855 if (C == 0 && CurPtr-1 == BufferEnd) {
1861 FormTokenWithChars(Result, CurPtr, tok::unknown);
1866 const char *Prefix = CurPtr;
1867 CurPtr += PrefixLen + 1;
1874 if (strncmp(CurPtr, Prefix, PrefixLen) == 0 && CurPtr[PrefixLen] ==
'"') {
1875 CurPtr += PrefixLen + 1;
1878 }
else if (C == 0 && CurPtr-1 == BufferEnd) {
1880 Diag(BufferPtr, diag::err_unterminated_raw_string)
1881 << StringRef(Prefix, PrefixLen);
1882 FormTokenWithChars(Result, CurPtr-1, tok::unknown);
1889 CurPtr = LexUDSuffix(Result, CurPtr,
true);
1892 const char *TokStart = BufferPtr;
1893 FormTokenWithChars(Result, CurPtr, Kind);
1900 bool Lexer::LexAngledStringLiteral(
Token &Result,
const char *CurPtr) {
1902 const char *NulCharacter =
nullptr;
1903 const char *AfterLessPos = CurPtr;
1904 char C = getAndAdvanceChar(CurPtr, Result);
1907 if (C ==
'\\' && CurPtr < BufferEnd) {
1909 getAndAdvanceChar(CurPtr, Result);
1910 }
else if (C ==
'\n' || C ==
'\r' ||
1911 (C == 0 && (CurPtr-1 == BufferEnd ||
1912 isCodeCompletionPoint(CurPtr-1)))) {
1915 FormTokenWithChars(Result, AfterLessPos, tok::less);
1917 }
else if (C == 0) {
1918 NulCharacter = CurPtr-1;
1920 C = getAndAdvanceChar(CurPtr, Result);
1925 Diag(NulCharacter, diag::null_in_char_or_string) << 1;
1928 const char *TokStart = BufferPtr;
1929 FormTokenWithChars(Result, CurPtr, tok::angle_string_literal);
1936 bool Lexer::LexCharConstant(
Token &Result,
const char *CurPtr,
1939 const char *NulCharacter =
nullptr;
1942 if (Kind == tok::utf16_char_constant || Kind == tok::utf32_char_constant)
1944 ? diag::warn_cxx98_compat_unicode_literal
1945 : diag::warn_c99_compat_unicode_literal);
1946 else if (Kind == tok::utf8_char_constant)
1947 Diag(BufferPtr, diag::warn_cxx14_compat_u8_character_literal);
1950 char C = getAndAdvanceChar(CurPtr, Result);
1953 Diag(BufferPtr, diag::ext_empty_character);
1954 FormTokenWithChars(Result, CurPtr, tok::unknown);
1961 C = getAndAdvanceChar(CurPtr, Result);
1963 if (C ==
'\n' || C ==
'\r' ||
1964 (C == 0 && CurPtr-1 == BufferEnd)) {
1966 Diag(BufferPtr, diag::ext_unterminated_char_or_string) << 0;
1967 FormTokenWithChars(Result, CurPtr-1, tok::unknown);
1972 if (isCodeCompletionPoint(CurPtr-1)) {
1974 FormTokenWithChars(Result, CurPtr-1, tok::unknown);
1979 NulCharacter = CurPtr-1;
1981 C = getAndAdvanceChar(CurPtr, Result);
1986 CurPtr = LexUDSuffix(Result, CurPtr,
false);
1990 Diag(NulCharacter, diag::null_in_char_or_string) << 0;
1993 const char *TokStart = BufferPtr;
1994 FormTokenWithChars(Result, CurPtr, Kind);
2004 bool Lexer::SkipWhitespace(
Token &Result,
const char *CurPtr,
2005 bool &TokAtPhysicalStartOfLine) {
2009 unsigned char Char = *CurPtr;
2034 FormTokenWithChars(Result, CurPtr, tok::unknown);
2036 IsAtStartOfLine =
true;
2037 IsAtPhysicalStartOfLine =
true;
2044 char PrevChar = CurPtr[-1];
2050 TokAtPhysicalStartOfLine =
true;
2063 bool Lexer::SkipLineComment(
Token &Result,
const char *CurPtr,
2064 bool &TokAtPhysicalStartOfLine) {
2068 Diag(BufferPtr, diag::ext_line_comment);
2072 LangOpts.LineComment =
true;
2086 C !=
'\n' && C !=
'\r')
2089 const char *NextLine = CurPtr;
2092 const char *EscapePtr = CurPtr-1;
2093 bool HasSpace =
false;
2099 if (*EscapePtr ==
'\\')
2102 else if (EscapePtr[0] ==
'/' && EscapePtr[-1] ==
'?' &&
2103 EscapePtr[-2] ==
'?' && LangOpts.Trigraphs)
2105 CurPtr = EscapePtr-2;
2111 Diag(EscapePtr, diag::backslash_newline_space);
2118 const char *OldPtr = CurPtr;
2121 C = getAndAdvanceChar(CurPtr, Result);
2126 if (C != 0 && CurPtr == OldPtr+1) {
2134 if (CurPtr != OldPtr+1 && C !=
'/' && CurPtr[0] !=
'/') {
2135 for (; OldPtr != CurPtr; ++OldPtr)
2136 if (OldPtr[0] ==
'\n' || OldPtr[0] ==
'\r') {
2140 const char *ForwardPtr = CurPtr;
2143 if (ForwardPtr[0] ==
'/' && ForwardPtr[1] ==
'/')
2148 Diag(OldPtr-1, diag::ext_multi_line_line_comment);
2153 if (C ==
'\r' || C ==
'\n' || CurPtr == BufferEnd + 1) {
2158 if (C ==
'\0' && isCodeCompletionPoint(CurPtr-1)) {
2176 return SaveLineComment(Result, CurPtr);
2194 TokAtPhysicalStartOfLine =
true;
2203 bool Lexer::SaveLineComment(
Token &Result,
const char *CurPtr) {
2206 FormTokenWithChars(Result, CurPtr, tok::comment);
2218 assert(Spelling[0] ==
'/' && Spelling[1] ==
'/' &&
"Not line comment?");
2233 assert(CurPtr[0] ==
'\n' || CurPtr[0] ==
'\r');
2239 if (CurPtr[0] ==
'\n' || CurPtr[0] ==
'\r') {
2241 if (CurPtr[0] == CurPtr[1])
2249 bool HasSpace =
false;
2256 if (*CurPtr ==
'\\') {
2257 if (CurPtr[-1] !=
'*')
return false;
2260 if (CurPtr[0] !=
'/' || CurPtr[-1] !=
'?' || CurPtr[-2] !=
'?' ||
2271 L->
Diag(CurPtr, diag::trigraph_ignored_block_comment);
2275 L->
Diag(CurPtr, diag::trigraph_ends_block_comment);
2280 L->
Diag(CurPtr, diag::escaped_newline_block_comment_end);
2284 L->
Diag(CurPtr, diag::backslash_newline_space);
2305 bool Lexer::SkipBlockComment(
Token &Result,
const char *CurPtr,
2306 bool &TokAtPhysicalStartOfLine) {
2316 unsigned char C = getCharAndSize(CurPtr, CharSize);
2318 if (C == 0 && CurPtr == BufferEnd+1) {
2320 Diag(BufferPtr, diag::err_unterminated_block_comment);
2326 FormTokenWithChars(Result, CurPtr, tok::unknown);
2342 if (CurPtr + 24 < BufferEnd &&
2347 while (C !=
'/' && ((
intptr_t)CurPtr & 0x0F) != 0)
2350 if (C ==
'/')
goto FoundSlash;
2354 while (CurPtr+16 <= BufferEnd) {
2361 CurPtr += llvm::countTrailingZeros<unsigned>(cmp) + 1;
2367 __vector
unsigned char Slashes = {
2368 '/',
'/',
'/',
'/',
'/',
'/',
'/',
'/',
2369 '/',
'/',
'/',
'/',
'/',
'/',
'/',
'/'
2371 while (CurPtr+16 <= BufferEnd &&
2372 !
vec_any_eq(*(
const vector
unsigned char*)CurPtr, Slashes))
2376 while (CurPtr[0] !=
'/' &&
2380 CurPtr+4 < BufferEnd) {
2390 while (C !=
'/' && C !=
'\0')
2395 if (CurPtr[-2] ==
'*')
2398 if ((CurPtr[-2] ==
'\n' || CurPtr[-2] ==
'\r')) {
2405 if (CurPtr[0] ==
'*' && CurPtr[1] !=
'/') {
2410 Diag(CurPtr-1, diag::warn_nested_block_comment);
2412 }
else if (C == 0 && CurPtr == BufferEnd+1) {
2414 Diag(BufferPtr, diag::err_unterminated_block_comment);
2423 FormTokenWithChars(Result, CurPtr, tok::unknown);
2429 }
else if (C ==
'\0' && isCodeCompletionPoint(CurPtr-1)) {
2448 FormTokenWithChars(Result, CurPtr, tok::comment);
2457 SkipWhitespace(Result, CurPtr+1, TokAtPhysicalStartOfLine);
2475 "Must be in a preprocessing directive!");
2479 const char *CurPtr = BufferPtr;
2481 char Char = getAndAdvanceChar(CurPtr, Tmp);
2485 Result->push_back(Char);
2489 if (CurPtr-1 != BufferEnd) {
2490 if (isCodeCompletionPoint(CurPtr-1)) {
2498 Result->push_back(Char);
2506 assert(CurPtr[-1] == Char &&
"Trigraphs for newline?");
2507 BufferPtr = CurPtr-1;
2511 if (Tmp.
is(tok::code_completion)) {
2516 assert(Tmp.
is(tok::eod) &&
"Unexpected token!");
2528 bool Lexer::LexEndOfFile(
Token &Result,
const char *CurPtr) {
2536 FormTokenWithChars(Result, CurPtr, tok::eod);
2548 BufferPtr = BufferEnd;
2549 FormTokenWithChars(Result, BufferEnd,
tok::eof);
2564 diag::err_pp_unterminated_conditional);
2570 if (CurPtr != BufferStart && (CurPtr[-1] !=
'\n' && CurPtr[-1] !=
'\r')) {
2575 if (LangOpts.CPlusPlus11) {
2579 if (!Diags.
isIgnored(diag::warn_cxx98_compat_no_newline_eof, EndLoc)) {
2580 DiagID = diag::warn_cxx98_compat_no_newline_eof;
2582 DiagID = diag::warn_no_newline_eof;
2585 DiagID = diag::ext_no_newline_eof;
2588 Diag(BufferEnd, DiagID)
2602 unsigned Lexer::isNextPPTokenLParen() {
2603 assert(!
LexingRawMode &&
"How can we expand a macro from a skipping buffer?");
2611 const char *TmpBufferPtr = BufferPtr;
2613 bool atStartOfLine = IsAtStartOfLine;
2614 bool atPhysicalStartOfLine = IsAtPhysicalStartOfLine;
2615 bool leadingSpace = HasLeadingSpace;
2621 BufferPtr = TmpBufferPtr;
2623 HasLeadingSpace = leadingSpace;
2624 IsAtStartOfLine = atStartOfLine;
2625 IsAtPhysicalStartOfLine = atPhysicalStartOfLine;
2632 return Tok.
is(tok::l_paren);
2638 const char *Terminator = CMK ==
CMK_Perforce ?
"<<<<\n" :
">>>>>>>";
2640 auto RestOfBuffer = StringRef(CurPtr, BufferEnd - CurPtr).substr(TermLen);
2641 size_t Pos = RestOfBuffer.find(Terminator);
2642 while (Pos != StringRef::npos) {
2645 (RestOfBuffer[Pos - 1] !=
'\r' && RestOfBuffer[Pos - 1] !=
'\n')) {
2646 RestOfBuffer = RestOfBuffer.substr(Pos+TermLen);
2647 Pos = RestOfBuffer.find(Terminator);
2650 return RestOfBuffer.data()+Pos;
2659 bool Lexer::IsStartOfConflictMarker(
const char *CurPtr) {
2661 if (CurPtr != BufferStart &&
2662 CurPtr[-1] !=
'\n' && CurPtr[-1] !=
'\r')
2666 if (!StringRef(CurPtr, BufferEnd - CurPtr).startswith(
"<<<<<<<") &&
2667 !StringRef(CurPtr, BufferEnd - CurPtr).startswith(
">>>> "))
2682 Diag(CurPtr, diag::err_conflict_marker);
2683 CurrentConflictMarkerState =
Kind;
2687 while (*CurPtr !=
'\r' && *CurPtr !=
'\n') {
2688 assert(CurPtr != BufferEnd &&
"Didn't find end of line");
2703 bool Lexer::HandleEndOfConflictMarker(
const char *CurPtr) {
2705 if (CurPtr != BufferStart &&
2706 CurPtr[-1] !=
'\n' && CurPtr[-1] !=
'\r')
2715 for (
unsigned i = 1; i != 4; ++i)
2716 if (CurPtr[i] != CurPtr[0])
2723 CurrentConflictMarkerState)) {
2727 while (CurPtr != BufferEnd && *CurPtr !=
'\r' && *CurPtr !=
'\n')
2733 CurrentConflictMarkerState =
CMK_None;
2741 const char *BufferEnd) {
2742 if (CurPtr == BufferEnd)
2745 for (; CurPtr != BufferEnd; ++CurPtr) {
2746 if (CurPtr[0] ==
'#' && CurPtr[1] ==
'>')
2752 bool Lexer::lexEditorPlaceholder(
Token &Result,
const char *CurPtr) {
2753 assert(CurPtr[-1] ==
'<' && CurPtr[0] ==
'#' &&
"Not a placeholder!");
2759 const char *Start = CurPtr - 1;
2760 if (!LangOpts.AllowEditorPlaceholders)
2761 Diag(Start, diag::err_placeholder_in_source);
2763 FormTokenWithChars(Result, End, tok::raw_identifier);
2771 bool Lexer::isCodeCompletionPoint(
const char *CurPtr)
const {
2780 uint32_t Lexer::tryReadUCN(
const char *&StartPtr,
const char *SlashLoc,
2783 char Kind = getCharAndSize(StartPtr, CharSize);
2785 unsigned NumHexDigits;
2788 else if (Kind ==
'U')
2793 if (!LangOpts.CPlusPlus && !LangOpts.C99) {
2795 Diag(SlashLoc, diag::warn_ucn_not_valid_in_c89);
2799 const char *CurPtr = StartPtr + CharSize;
2800 const char *KindLoc = &CurPtr[-1];
2802 uint32_t CodePoint = 0;
2803 for (
unsigned i = 0; i < NumHexDigits; ++i) {
2804 char C = getCharAndSize(CurPtr, CharSize);
2806 unsigned Value = llvm::hexDigitValue(C);
2810 Diag(BufferPtr, diag::warn_ucn_escape_no_digits)
2811 << StringRef(KindLoc, 1);
2813 Diag(BufferPtr, diag::warn_ucn_escape_incomplete);
2816 if (i == 4 && NumHexDigits == 8) {
2818 Diag(KindLoc, diag::note_ucn_four_not_eight)
2835 if (CurPtr - StartPtr == (
ptrdiff_t)NumHexDigits + 2)
2838 while (StartPtr != CurPtr)
2839 (void)getAndAdvanceChar(StartPtr, *Result);
2845 if (LangOpts.AsmPreprocessor)
2859 if (CodePoint < 0xA0) {
2860 if (CodePoint == 0x24 || CodePoint == 0x40 || CodePoint == 0x60)
2866 if (CodePoint < 0x20 || CodePoint >= 0x7F)
2867 Diag(BufferPtr, diag::err_ucn_control_character);
2869 char C =
static_cast<char>(CodePoint);
2870 Diag(BufferPtr, diag::err_ucn_escape_basic_scs) << StringRef(&C, 1);
2876 }
else if (CodePoint >= 0xD800 && CodePoint <= 0xDFFF) {
2881 if (LangOpts.CPlusPlus && !LangOpts.CPlusPlus11)
2882 Diag(BufferPtr, diag::warn_ucn_escape_surrogate);
2884 Diag(BufferPtr, diag::err_ucn_escape_invalid);
2892 bool Lexer::CheckUnicodeWhitespace(
Token &Result, uint32_t C,
2893 const char *CurPtr) {
2894 static const llvm::sys::UnicodeCharSet UnicodeWhitespaceChars(
2897 UnicodeWhitespaceChars.contains(C)) {
2898 Diag(BufferPtr, diag::ext_unicode_whitespace)
2907 bool Lexer::LexUnicode(
Token &Result, uint32_t C,
const char *CurPtr) {
2917 return LexIdentifier(Result, CurPtr);
2932 Diag(BufferPtr, diag::err_non_ascii)
2942 FormTokenWithChars(Result, CurPtr, tok::unknown);
2946 void Lexer::PropagateLineStartLeadingSpaceInfo(
Token &Result) {
2953 bool Lexer::Lex(
Token &Result) {
2958 if (IsAtStartOfLine) {
2960 IsAtStartOfLine =
false;
2963 if (HasLeadingSpace) {
2965 HasLeadingSpace =
false;
2968 if (HasLeadingEmptyMacro) {
2970 HasLeadingEmptyMacro =
false;
2973 bool atPhysicalStartOfLine = IsAtPhysicalStartOfLine;
2974 IsAtPhysicalStartOfLine =
false;
2977 bool returnedToken = LexTokenInternal(Result, atPhysicalStartOfLine);
2979 assert((returnedToken || !isRawLex) &&
"Raw lex must succeed");
2980 return returnedToken;
2988 bool Lexer::LexTokenInternal(
Token &Result,
bool TokAtPhysicalStartOfLine) {
2995 const char *CurPtr = BufferPtr;
2998 if ((*CurPtr ==
' ') || (*CurPtr ==
'\t')) {
3000 while ((*CurPtr ==
' ') || (*CurPtr ==
'\t'))
3007 FormTokenWithChars(Result, CurPtr, tok::unknown);
3016 unsigned SizeTmp, SizeTmp2;
3019 char Char = getAndAdvanceChar(CurPtr, Result);
3025 if (CurPtr-1 == BufferEnd)
3026 return LexEndOfFile(Result, CurPtr-1);
3029 if (isCodeCompletionPoint(CurPtr-1)) {
3032 FormTokenWithChars(Result, CurPtr, tok::code_completion);
3037 Diag(CurPtr-1, diag::null_in_file);
3039 if (SkipWhitespace(Result, CurPtr, TokAtPhysicalStartOfLine))
3048 if (LangOpts.MicrosoftExt) {
3050 Diag(CurPtr-1, diag::ext_ctrl_z_eof_microsoft);
3051 return LexEndOfFile(Result, CurPtr-1);
3055 Kind = tok::unknown;
3071 IsAtStartOfLine =
true;
3072 IsAtPhysicalStartOfLine =
true;
3081 if (SkipWhitespace(Result, CurPtr, TokAtPhysicalStartOfLine))
3091 SkipHorizontalWhitespace:
3093 if (SkipWhitespace(Result, CurPtr, TokAtPhysicalStartOfLine))
3102 LangOpts.LineComment &&
3103 (LangOpts.CPlusPlus || !LangOpts.TraditionalCPP)) {
3104 if (SkipLineComment(Result, CurPtr+2, TokAtPhysicalStartOfLine))
3106 goto SkipIgnoredUnits;
3108 if (SkipBlockComment(Result, CurPtr+2, TokAtPhysicalStartOfLine))
3110 goto SkipIgnoredUnits;
3112 goto SkipHorizontalWhitespace;
3120 case '0':
case '1':
case '2':
case '3':
case '4':
3121 case '5':
case '6':
case '7':
case '8':
case '9':
3124 return LexNumericConstant(Result, CurPtr);
3130 if (LangOpts.CPlusPlus11 || LangOpts.C11) {
3131 Char = getCharAndSize(CurPtr, SizeTmp);
3135 return LexStringLiteral(Result, ConsumeChar(CurPtr, SizeTmp, Result),
3136 tok::utf16_string_literal);
3140 return LexCharConstant(Result, ConsumeChar(CurPtr, SizeTmp, Result),
3141 tok::utf16_char_constant);
3144 if (Char ==
'R' && LangOpts.CPlusPlus11 &&
3145 getCharAndSize(CurPtr + SizeTmp, SizeTmp2) ==
'"')
3146 return LexRawStringLiteral(Result,
3147 ConsumeChar(ConsumeChar(CurPtr, SizeTmp, Result),
3149 tok::utf16_string_literal);
3152 char Char2 = getCharAndSize(CurPtr + SizeTmp, SizeTmp2);
3156 return LexStringLiteral(Result,
3157 ConsumeChar(ConsumeChar(CurPtr, SizeTmp, Result),
3159 tok::utf8_string_literal);
3160 if (Char2 ==
'\'' && LangOpts.CPlusPlus1z)
3161 return LexCharConstant(
3162 Result, ConsumeChar(ConsumeChar(CurPtr, SizeTmp, Result),
3164 tok::utf8_char_constant);
3166 if (Char2 ==
'R' && LangOpts.CPlusPlus11) {
3168 char Char3 = getCharAndSize(CurPtr + SizeTmp + SizeTmp2, SizeTmp3);
3171 return LexRawStringLiteral(Result,
3172 ConsumeChar(ConsumeChar(ConsumeChar(CurPtr, SizeTmp, Result),
3175 tok::utf8_string_literal);
3182 return LexIdentifier(Result, CurPtr);
3188 if (LangOpts.CPlusPlus11 || LangOpts.C11) {
3189 Char = getCharAndSize(CurPtr, SizeTmp);
3193 return LexStringLiteral(Result, ConsumeChar(CurPtr, SizeTmp, Result),
3194 tok::utf32_string_literal);
3198 return LexCharConstant(Result, ConsumeChar(CurPtr, SizeTmp, Result),
3199 tok::utf32_char_constant);
3202 if (Char ==
'R' && LangOpts.CPlusPlus11 &&
3203 getCharAndSize(CurPtr + SizeTmp, SizeTmp2) ==
'"')
3204 return LexRawStringLiteral(Result,
3205 ConsumeChar(ConsumeChar(CurPtr, SizeTmp, Result),
3207 tok::utf32_string_literal);
3211 return LexIdentifier(Result, CurPtr);
3217 if (LangOpts.CPlusPlus11) {
3218 Char = getCharAndSize(CurPtr, SizeTmp);
3221 return LexRawStringLiteral(Result,
3222 ConsumeChar(CurPtr, SizeTmp, Result),
3223 tok::string_literal);
3227 return LexIdentifier(Result, CurPtr);
3232 Char = getCharAndSize(CurPtr, SizeTmp);
3236 return LexStringLiteral(Result, ConsumeChar(CurPtr, SizeTmp, Result),
3237 tok::wide_string_literal);
3240 if (LangOpts.CPlusPlus11 && Char ==
'R' &&
3241 getCharAndSize(CurPtr + SizeTmp, SizeTmp2) ==
'"')
3242 return LexRawStringLiteral(Result,
3243 ConsumeChar(ConsumeChar(CurPtr, SizeTmp, Result),
3245 tok::wide_string_literal);
3249 return LexCharConstant(Result, ConsumeChar(CurPtr, SizeTmp, Result),
3250 tok::wide_char_constant);
3255 case 'A':
case 'B':
case 'C':
case 'D':
case 'E':
case 'F':
case 'G':
3256 case 'H':
case 'I':
case 'J':
case 'K':
case 'M':
case 'N':
3257 case 'O':
case 'P':
case 'Q':
case 'S':
case 'T':
3258 case 'V':
case 'W':
case 'X':
case 'Y':
case 'Z':
3259 case 'a':
case 'b':
case 'c':
case 'd':
case 'e':
case 'f':
case 'g':
3260 case 'h':
case 'i':
case 'j':
case 'k':
case 'l':
case 'm':
case 'n':
3261 case 'o':
case 'p':
case 'q':
case 'r':
case 's':
case 't':
3262 case 'v':
case 'w':
case 'x':
case 'y':
case 'z':
3266 return LexIdentifier(Result, CurPtr);
3269 if (LangOpts.DollarIdents) {
3271 Diag(CurPtr-1, diag::ext_dollar_in_identifier);
3274 return LexIdentifier(Result, CurPtr);
3277 Kind = tok::unknown;
3284 return LexCharConstant(Result, CurPtr, tok::char_constant);
3290 return LexStringLiteral(Result, CurPtr, tok::string_literal);
3294 Kind = tok::question;
3297 Kind = tok::l_square;
3300 Kind = tok::r_square;
3303 Kind = tok::l_paren;
3306 Kind = tok::r_paren;
3309 Kind = tok::l_brace;
3312 Kind = tok::r_brace;
3315 Char = getCharAndSize(CurPtr, SizeTmp);
3316 if (Char >=
'0' && Char <=
'9') {
3320 return LexNumericConstant(Result, ConsumeChar(CurPtr, SizeTmp, Result));
3321 }
else if (LangOpts.CPlusPlus && Char ==
'*') {
3322 Kind = tok::periodstar;
3324 }
else if (Char ==
'.' &&
3325 getCharAndSize(CurPtr+SizeTmp, SizeTmp2) ==
'.') {
3326 Kind = tok::ellipsis;
3327 CurPtr = ConsumeChar(ConsumeChar(CurPtr, SizeTmp, Result),
3334 Char = getCharAndSize(CurPtr, SizeTmp);
3337 CurPtr = ConsumeChar(CurPtr, SizeTmp, Result);
3338 }
else if (Char ==
'=') {
3339 Kind = tok::ampequal;
3340 CurPtr = ConsumeChar(CurPtr, SizeTmp, Result);
3346 if (getCharAndSize(CurPtr, SizeTmp) ==
'=') {
3347 Kind = tok::starequal;
3348 CurPtr = ConsumeChar(CurPtr, SizeTmp, Result);
3354 Char = getCharAndSize(CurPtr, SizeTmp);
3356 CurPtr = ConsumeChar(CurPtr, SizeTmp, Result);
3357 Kind = tok::plusplus;
3358 }
else if (Char ==
'=') {
3359 CurPtr = ConsumeChar(CurPtr, SizeTmp, Result);
3360 Kind = tok::plusequal;
3366 Char = getCharAndSize(CurPtr, SizeTmp);
3368 CurPtr = ConsumeChar(CurPtr, SizeTmp, Result);
3369 Kind = tok::minusminus;
3370 }
else if (Char ==
'>' && LangOpts.CPlusPlus &&
3371 getCharAndSize(CurPtr+SizeTmp, SizeTmp2) ==
'*') {
3372 CurPtr = ConsumeChar(ConsumeChar(CurPtr, SizeTmp, Result),
3374 Kind = tok::arrowstar;
3375 }
else if (Char ==
'>') {
3376 CurPtr = ConsumeChar(CurPtr, SizeTmp, Result);
3378 }
else if (Char ==
'=') {
3379 CurPtr = ConsumeChar(CurPtr, SizeTmp, Result);
3380 Kind = tok::minusequal;
3389 if (getCharAndSize(CurPtr, SizeTmp) ==
'=') {
3390 Kind = tok::exclaimequal;
3391 CurPtr = ConsumeChar(CurPtr, SizeTmp, Result);
3393 Kind = tok::exclaim;
3398 Char = getCharAndSize(CurPtr, SizeTmp);
3408 bool TreatAsComment = LangOpts.LineComment &&
3409 (LangOpts.CPlusPlus || !LangOpts.TraditionalCPP);
3410 if (!TreatAsComment)
3412 TreatAsComment = getCharAndSize(CurPtr+SizeTmp, SizeTmp2) !=
'*';
3414 if (TreatAsComment) {
3415 if (SkipLineComment(Result, ConsumeChar(CurPtr, SizeTmp, Result),
3416 TokAtPhysicalStartOfLine))
3422 goto SkipIgnoredUnits;
3427 if (SkipBlockComment(Result, ConsumeChar(CurPtr, SizeTmp, Result),
3428 TokAtPhysicalStartOfLine))
3437 CurPtr = ConsumeChar(CurPtr, SizeTmp, Result);
3438 Kind = tok::slashequal;
3444 Char = getCharAndSize(CurPtr, SizeTmp);
3446 Kind = tok::percentequal;
3447 CurPtr = ConsumeChar(CurPtr, SizeTmp, Result);
3448 }
else if (LangOpts.Digraphs && Char ==
'>') {
3449 Kind = tok::r_brace;
3450 CurPtr = ConsumeChar(CurPtr, SizeTmp, Result);
3451 }
else if (LangOpts.Digraphs && Char ==
':') {
3452 CurPtr = ConsumeChar(CurPtr, SizeTmp, Result);
3453 Char = getCharAndSize(CurPtr, SizeTmp);
3454 if (Char ==
'%' && getCharAndSize(CurPtr+SizeTmp, SizeTmp2) ==
':') {
3455 Kind = tok::hashhash;
3456 CurPtr = ConsumeChar(ConsumeChar(CurPtr, SizeTmp, Result),
3458 }
else if (Char ==
'@' && LangOpts.MicrosoftExt) {
3459 CurPtr = ConsumeChar(CurPtr, SizeTmp, Result);
3461 Diag(BufferPtr, diag::ext_charize_microsoft);
3468 if (TokAtPhysicalStartOfLine && !
LexingRawMode && !Is_PragmaLexer)
3469 goto HandleDirective;
3474 Kind = tok::percent;
3478 Char = getCharAndSize(CurPtr, SizeTmp);
3480 return LexAngledStringLiteral(Result, CurPtr);
3481 }
else if (Char ==
'<') {
3482 char After = getCharAndSize(CurPtr+SizeTmp, SizeTmp2);
3484 Kind = tok::lesslessequal;
3485 CurPtr = ConsumeChar(ConsumeChar(CurPtr, SizeTmp, Result),
3487 }
else if (After ==
'<' && IsStartOfConflictMarker(CurPtr-1)) {
3491 }
else if (After ==
'<' && HandleEndOfConflictMarker(CurPtr-1)) {
3495 }
else if (LangOpts.CUDA && After ==
'<') {
3496 Kind = tok::lesslessless;
3497 CurPtr = ConsumeChar(ConsumeChar(CurPtr, SizeTmp, Result),
3500 CurPtr = ConsumeChar(CurPtr, SizeTmp, Result);
3501 Kind = tok::lessless;
3503 }
else if (Char ==
'=') {
3504 CurPtr = ConsumeChar(CurPtr, SizeTmp, Result);
3505 Kind = tok::lessequal;
3506 }
else if (LangOpts.Digraphs && Char ==
':') {
3507 if (LangOpts.CPlusPlus11 &&
3508 getCharAndSize(CurPtr + SizeTmp, SizeTmp2) ==
':') {
3515 char After = getCharAndSize(CurPtr + SizeTmp + SizeTmp2, SizeTmp3);
3516 if (After !=
':' && After !=
'>') {
3519 Diag(BufferPtr, diag::warn_cxx98_compat_less_colon_colon);
3524 CurPtr = ConsumeChar(CurPtr, SizeTmp, Result);
3525 Kind = tok::l_square;
3526 }
else if (LangOpts.Digraphs && Char ==
'%') {
3527 CurPtr = ConsumeChar(CurPtr, SizeTmp, Result);
3528 Kind = tok::l_brace;
3529 }
else if (Char ==
'#' && lexEditorPlaceholder(Result, CurPtr)) {
3536 Char = getCharAndSize(CurPtr, SizeTmp);
3538 CurPtr = ConsumeChar(CurPtr, SizeTmp, Result);
3539 Kind = tok::greaterequal;
3540 }
else if (Char ==
'>') {
3541 char After = getCharAndSize(CurPtr+SizeTmp, SizeTmp2);
3543 CurPtr = ConsumeChar(ConsumeChar(CurPtr, SizeTmp, Result),
3545 Kind = tok::greatergreaterequal;
3546 }
else if (After ==
'>' && IsStartOfConflictMarker(CurPtr-1)) {
3550 }
else if (After ==
'>' && HandleEndOfConflictMarker(CurPtr-1)) {
3553 }
else if (LangOpts.CUDA && After ==
'>') {
3554 Kind = tok::greatergreatergreater;
3555 CurPtr = ConsumeChar(ConsumeChar(CurPtr, SizeTmp, Result),
3558 CurPtr = ConsumeChar(CurPtr, SizeTmp, Result);
3559 Kind = tok::greatergreater;
3562 Kind = tok::greater;
3566 Char = getCharAndSize(CurPtr, SizeTmp);
3568 CurPtr = ConsumeChar(CurPtr, SizeTmp, Result);
3569 Kind = tok::caretequal;
3570 }
else if (LangOpts.OpenCL && Char ==
'^') {
3571 CurPtr = ConsumeChar(CurPtr, SizeTmp, Result);
3572 Kind = tok::caretcaret;
3578 Char = getCharAndSize(CurPtr, SizeTmp);
3580 Kind = tok::pipeequal;
3581 CurPtr = ConsumeChar(CurPtr, SizeTmp, Result);
3582 }
else if (Char ==
'|') {
3584 if (CurPtr[1] ==
'|' && HandleEndOfConflictMarker(CurPtr-1))
3586 Kind = tok::pipepipe;
3587 CurPtr = ConsumeChar(CurPtr, SizeTmp, Result);
3593 Char = getCharAndSize(CurPtr, SizeTmp);
3594 if (LangOpts.Digraphs && Char ==
'>') {
3595 Kind = tok::r_square;
3596 CurPtr = ConsumeChar(CurPtr, SizeTmp, Result);
3597 }
else if (LangOpts.CPlusPlus && Char ==
':') {
3598 Kind = tok::coloncolon;
3599 CurPtr = ConsumeChar(CurPtr, SizeTmp, Result);
3608 Char = getCharAndSize(CurPtr, SizeTmp);
3611 if (CurPtr[1] ==
'=' && HandleEndOfConflictMarker(CurPtr-1))
3614 Kind = tok::equalequal;
3615 CurPtr = ConsumeChar(CurPtr, SizeTmp, Result);
3624 Char = getCharAndSize(CurPtr, SizeTmp);
3626 Kind = tok::hashhash;
3627 CurPtr = ConsumeChar(CurPtr, SizeTmp, Result);
3628 }
else if (Char ==
'@' && LangOpts.MicrosoftExt) {
3631 Diag(BufferPtr, diag::ext_charize_microsoft);
3632 CurPtr = ConsumeChar(CurPtr, SizeTmp, Result);
3638 if (TokAtPhysicalStartOfLine && !
LexingRawMode && !Is_PragmaLexer)
3639 goto HandleDirective;
3647 if (CurPtr[-1] ==
'@' && LangOpts.ObjC1)
3650 Kind = tok::unknown;
3655 if (!LangOpts.AsmPreprocessor) {
3656 if (uint32_t CodePoint = tryReadUCN(CurPtr, BufferPtr, &Result)) {
3657 if (CheckUnicodeWhitespace(Result, CodePoint, CurPtr)) {
3658 if (SkipWhitespace(Result, CurPtr, TokAtPhysicalStartOfLine))
3666 return LexUnicode(Result, CodePoint, CurPtr);
3670 Kind = tok::unknown;
3675 Kind = tok::unknown;
3679 llvm::UTF32 CodePoint;
3684 llvm::ConversionResult
Status =
3685 llvm::convertUTF8Sequence((
const llvm::UTF8 **)&CurPtr,
3686 (
const llvm::UTF8 *)BufferEnd,
3688 llvm::strictConversion);
3689 if (Status == llvm::conversionOK) {
3690 if (CheckUnicodeWhitespace(Result, CodePoint, CurPtr)) {
3691 if (SkipWhitespace(Result, CurPtr, TokAtPhysicalStartOfLine))
3698 return LexUnicode(Result, CodePoint, CurPtr);
3704 Kind = tok::unknown;
3711 Diag(CurPtr, diag::err_invalid_utf8);
3713 BufferPtr = CurPtr+1;
3725 FormTokenWithChars(Result, CurPtr, Kind);
3731 FormTokenWithChars(Result, CurPtr, tok::hash);
3736 assert(Result.
is(
tok::eof) &&
"Preprocessor did not set tok:eof");
bool isAtStartOfLine() const
isAtStartOfLine - Return true if this token is at the start of a line.
SourceManager & getSourceManager() const
tok::ObjCKeywordKind getObjCKeywordID() const
Return the ObjC keyword kind.
bool isMacroArgExpansion(SourceLocation Loc, SourceLocation *StartLoc=nullptr) const
Tests whether the given source location represents a macro argument's expansion into the function-lik...
static unsigned getSpelling(const Token &Tok, const char *&Buffer, const SourceManager &SourceMgr, const LangOptions &LangOpts, bool *Invalid=nullptr)
getSpelling - This method is used to get the spelling of a token into a preallocated buffer...
Lexer - This provides a simple interface that turns a text buffer into a stream of tokens...
This is a discriminated union of FileInfo and ExpansionInfo.
SourceLocation getBegin() const
static std::pair< unsigned, bool > ComputePreamble(StringRef Buffer, const LangOptions &LangOpts, unsigned MaxLines=0)
Compute the preamble of the given file.
void setFlagValue(TokenFlags Flag, bool Val)
Set a flag to either true or false.
static const llvm::sys::UnicodeCharRange C11AllowedIDCharRanges[]
SourceLocation getImmediateSpellingLoc(SourceLocation Loc) const
Given a SourceLocation object, return the spelling location referenced by the ID. ...
static LLVM_READONLY bool isWhitespace(unsigned char c)
Return true if this character is horizontal or vertical ASCII whitespace: ' ', '\t', '\f', '\v', '\n', '\r'.
void setBegin(SourceLocation b)
SourceLocation getSpellingLoc(SourceLocation Loc) const
Given a SourceLocation object, return the spelling location referenced by the ID. ...
static __inline__ int __ATTRS_o_ai vec_any_eq(vector signed char __a, vector signed char __b)
Defines the SourceManager interface.
const SrcMgr::SLocEntry & getSLocEntry(FileID FID, bool *Invalid=nullptr) const
const char * getCharacterData(SourceLocation SL, bool *Invalid=nullptr) const
Return a pointer to the start of the specified location in the appropriate spelling MemoryBuffer...
static bool isAllowedIDChar(uint32_t C, const LangOptions &LangOpts)
llvm::MemoryBuffer * getBuffer(FileID FID, SourceLocation Loc, bool *Invalid=nullptr) const
Return the buffer for the specified FileID.
bool hasLeadingSpace() const
Return true if this token has whitespace before it.
Each ExpansionInfo encodes the expansion location - where the token was ultimately expanded...
const ExpansionInfo & getExpansion() const
std::unique_ptr< llvm::MemoryBuffer > Buffer
DiagnosticBuilder Report(SourceLocation Loc, unsigned DiagID)
Issue the message to the client.
bool hasUCN() const
Returns true if this token contains a universal character name.
void setFlag(TokenFlags Flag)
Set the specified flag.
unsigned getRawEncoding() const
When a SourceLocation itself cannot be used, this returns an (opaque) 32-bit integer encoding for it...
bool needsCleaning() const
Return true if this token has trigraphs or escaped newlines in it.
static char getCharAndSizeNoWarn(const char *Ptr, unsigned &Size, const LangOptions &LangOpts)
getCharAndSizeNoWarn - Like the getCharAndSize method, but does not ever emit a warning.
static bool isAtStartOfMacroExpansion(SourceLocation loc, const SourceManager &SM, const LangOptions &LangOpts, SourceLocation *MacroBegin=nullptr)
Returns true if the given MacroID location points at the first token of the macro expansion...
static LLVM_READNONE bool isASCII(char c)
Returns true if this is an ASCII character.
bool isStringLiteral(TokenKind K)
Return true if this is a C or C++ string-literal (or C++11 user-defined-string-literal) token...
ConflictMarkerKind
ConflictMarkerKind - Kinds of conflict marker which the lexer might be recovering from...
static LLVM_ATTRIBUTE_NOINLINE SourceLocation GetMappedTokenLoc(Preprocessor &PP, SourceLocation FileLoc, unsigned CharNo, unsigned TokLen)
GetMappedTokenLoc - If lexing out of a 'mapped buffer', where we pretend the lexer buffer was all exp...
Like System, but searched after the system directories.
bool isInPrimaryFile() const
Return true if we're in the top-level file, not in a #include.
static Lexer * Create_PragmaLexer(SourceLocation SpellingLoc, SourceLocation ExpansionLocStart, SourceLocation ExpansionLocEnd, unsigned TokLen, Preprocessor &PP)
Create_PragmaLexer: Lexer constructor - Create a new lexer object for _Pragma expansion.
StringRef getSpelling(SourceLocation loc, SmallVectorImpl< char > &buffer, bool *invalid=nullptr) const
Return the 'spelling' of the token at the given location; does not go up to the spelling location or ...
static LLVM_READONLY bool isPreprocessingNumberBody(unsigned char c)
Return true if this is the body character of a C preprocessing number, which is [a-zA-Z0-9_.
One of these records is kept for each identifier that is lexed.
static StringRef getIndentationForLine(SourceLocation Loc, const SourceManager &SM)
Returns the leading whitespace for line that corresponds to the given location Loc.
bool ParsingPreprocessorDirective
True when parsing #XXX; turns '\n' into a tok::eod token.
StringRef getBufferData(FileID FID, bool *Invalid=nullptr) const
Return a StringRef to the source buffer data for the specified FileID.
void setRawIdentifierData(const char *Ptr)
bool isPragmaLexer() const
isPragmaLexer - Returns true if this Lexer is being used to lex a pragma.
static SourceLocation getFromRawEncoding(unsigned Encoding)
Turn a raw encoding of a SourceLocation object into a real SourceLocation.
static LLVM_READONLY bool isHorizontalWhitespace(unsigned char c)
Returns true if this character is horizontal ASCII whitespace: ' ', '\t', '\f', '\v'.
SmallVector< PPConditionalInfo, 4 > ConditionalStack
Information about the set of #if/#ifdef/#ifndef blocks we are currently in.
Token - This structure provides full information about a lexed token.
void setKind(tok::TokenKind K)
Keeps track of the various options that can be enabled, which controls the dialect of C or C++ that i...
void resetExtendedTokenMode()
Sets the extended token mode back to its initial value, according to the language options and preproc...
A Perforce-style conflict marker, initiated by 4 ">"s, separated by 4 "="s, and terminated by 4 "<"s...
SourceLocation getSourceLocation() override
getSourceLocation - Return a source location for the next character in the current file...
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_set1_epi8(char __b)
Initializes all values in a 128-bit vector of [16 x i8] with the specified 8-bit value.
static SourceLocation getBeginningOfFileToken(SourceLocation Loc, const SourceManager &SM, const LangOptions &LangOpts)
SourceLocation getLocWithOffset(int Offset) const
Return a source location with the specified offset from this SourceLocation.
bool getCommentRetentionState() const
tok::TokenKind getTokenID() const
If this is a source-language token (e.g.
static bool getRawToken(SourceLocation Loc, Token &Result, const SourceManager &SM, const LangOptions &LangOpts, bool IgnoreWhiteSpace=false)
Relex the token at the specified location.
void HandleDirective(Token &Result)
Callback invoked when the lexer sees a # token at the start of a line.
Concrete class used by the front-end to report problems and issues.
bool hadModuleLoaderFatalFailure() const
static LLVM_READONLY bool isRawStringDelimBody(unsigned char c)
Return true if this is the body character of a C++ raw string delimiter.
SourceLocation getCodeCompletionFileLoc() const
Returns the start location of the file of code-completion point.
tok::TokenKind getKind() const
__INTPTR_TYPE__ intptr_t
A signed integer type with the property that any valid pointer to void can be converted to this type...
const FileID FID
The SourceManager FileID corresponding to the file being lexed.
static SourceLocation AdvanceToTokenCharacter(SourceLocation TokStart, unsigned Character, const SourceManager &SM, const LangOptions &LangOpts)
AdvanceToTokenCharacter - If the current SourceLocation specifies a location at the start of a token...
DiagnosticBuilder Diag(SourceLocation Loc, unsigned DiagID) const
Forwarding function for diagnostics.
static bool isEndOfBlockCommentWithEscapedNewLine(const char *CurPtr, Lexer *L)
isBlockCommentEndOfEscapedNewLine - Return true if the specified newline character (either \n or \r) ...
static bool isValidUDSuffix(const LangOptions &LangOpts, StringRef Suffix)
Determine whether a suffix is a valid ud-suffix.
StringRef getRawIdentifier() const
getRawIdentifier - For a raw identifier token (i.e., an identifier lexed in raw mode), returns a reference to the text substring in the buffer if known.
static CharSourceRange makeCharRange(Lexer &L, const char *Begin, const char *End)
A little helper class used to produce diagnostics.
const FileEntry * getFileEntryForID(FileID FID) const
Returns the FileEntry record for the provided FileID.
bool ParsingFilename
True after #include; turns <xx> into a tok::angle_string_literal token.
FileID getFileID(SourceLocation SpellingLoc) const
Return the FileID for a SourceLocation.
static const llvm::sys::UnicodeCharRange C11DisallowedInitialIDCharRanges[]
bool isRecordingPreamble() const
static StringRef getSourceText(CharSourceRange Range, const SourceManager &SM, const LangOptions &LangOpts, bool *Invalid=nullptr)
Returns a string for the source that the range encompasses.
bool isInFileID(SourceLocation Loc, FileID FID, unsigned *RelativeOffset=nullptr) const
Given a specific FileID, returns true if Loc is inside that FileID chunk and sets relative offset (of...
static bool isAtEndOfMacroExpansion(SourceLocation loc, const SourceManager &SM, const LangOptions &LangOpts, SourceLocation *MacroEnd=nullptr)
Returns true if the given MacroID location points at the last token of the macro expansion.
bool LexingRawMode
True if in raw mode.
static SourceLocation getLocForEndOfToken(SourceLocation Loc, unsigned Offset, const SourceManager &SM, const LangOptions &LangOpts)
Computes the source location just past the end of the token at this source location.
Represents a character-granular source range.
SourceLocation getEnd() const
static unsigned MeasureTokenLength(SourceLocation Loc, const SourceManager &SM, const LangOptions &LangOpts)
MeasureTokenLength - Relex the token at the specified location and return its length in bytes in the ...
static SourceLocation findLocationAfterToken(SourceLocation loc, tok::TokenKind TKind, const SourceManager &SM, const LangOptions &LangOpts, bool SkipTrailingWhitespaceAndNewLine)
Checks that the given token is the first token that occurs after the given location (this excludes co...
Defines the clang::Preprocessor interface.
MultipleIncludeOpt MIOpt
A state machine that detects the #ifndef-wrapping a file idiom for the multiple-include optimization...
void setEnd(SourceLocation e)
SourceLocation getLocation() const
Return a source location identifier for the specified offset in the current file. ...
Defines the clang::IdentifierInfo, clang::IdentifierTable, and clang::Selector interfaces.
static const char * findBeginningOfLine(StringRef Buffer, unsigned Offset)
Returns the pointer that points to the beginning of line that contains the given offset, or null if the offset if invalid.
bool HandleEndOfFile(Token &Result, bool isEndOfMacro=false)
Callback invoked when the lexer hits the end of the current file.
SourceLocation createExpansionLoc(SourceLocation Loc, SourceLocation ExpansionLocStart, SourceLocation ExpansionLocEnd, unsigned TokLength, int LoadedID=0, unsigned LoadedOffset=0)
Return a new SourceLocation that encodes the fact that a token from SpellingLoc should actually be re...
SourceLocation getCodeCompletionLoc() const
Returns the location of the code-completion point.
The result type of a method or function.
float __ovld __cnfn length(float p)
Return the length of vector p, i.e., sqrt(p.x2 + p.y 2 + ...)
ObjCKeywordKind
Provides a namespace for Objective-C keywords which start with an '@'.
static CharSourceRange getCharRange(SourceRange R)
const char * getLiteralData() const
getLiteralData - For a literal token (numeric constant, string, etc), this returns a pointer to the s...
bool isHandleIdentifierCase() const
Return true if the Preprocessor::HandleIdentifier must be called on a token of this identifier...
bool isTokenRange() const
Return true if the end of this range specifies the start of the last token.
Encodes a location in the source.
bool isValid() const
Return true if this is a valid SourceLocation object.
bool isAtEndOfImmediateMacroExpansion(SourceLocation Loc, SourceLocation *MacroEnd=nullptr) const
Returns true if the given MacroID location points at the character end of the immediate macro expansi...
static void maybeDiagnoseIDCharCompat(DiagnosticsEngine &Diags, uint32_t C, CharSourceRange Range, bool IsFirst)
bool isObjCAtKeyword(tok::ObjCKeywordKind objcKey) const
Return true if we have an ObjC keyword identifier.
void setIdentifierInfo(IdentifierInfo *II)
DiagnosticBuilder Diag(const char *Loc, unsigned DiagID) const
Diag - Forwarding function for diagnostics.
static const llvm::sys::UnicodeCharRange C99DisallowedInitialIDCharRanges[]
static SourceLocation GetBeginningOfToken(SourceLocation Loc, const SourceManager &SM, const LangOptions &LangOpts)
Given a location any where in a source buffer, find the location that corresponds to the beginning of...
static CharSourceRange makeRangeFromFileLocs(CharSourceRange Range, const SourceManager &SM, const LangOptions &LangOpts)
TokenKind
Provides a simple uniform namespace for tokens from all C languages.
bool is(tok::TokenKind K) const
is/isNot - Predicates to check if this token is a specific kind, as in "if (Tok.is(tok::l_brace)) {...
static const llvm::sys::UnicodeCharRange C99AllowedIDCharRanges[]
bool isIgnored(unsigned DiagID, SourceLocation Loc) const
Determine whether the diagnostic is known to be ignored.
DiagnosticsEngine & getDiagnostics() const
__PTRDIFF_TYPE__ ptrdiff_t
A signed integer type that is the result of subtracting two pointers.
static StringRef getImmediateMacroName(SourceLocation Loc, const SourceManager &SM, const LangOptions &LangOpts)
Retrieve the name of the immediate macro expansion.
static StringRef getImmediateMacroNameForDiagnostics(SourceLocation Loc, const SourceManager &SM, const LangOptions &LangOpts)
Retrieve the name of the immediate macro expansion.
static const llvm::sys::UnicodeCharRange UnicodeWhitespaceCharRanges[]
An opaque identifier used by SourceManager which refers to a source file (MemoryBuffer) along with it...
bool inKeepCommentMode() const
inKeepCommentMode - Return true if the lexer should return comments as tokens.
static CharSourceRange makeFileCharRange(CharSourceRange Range, const SourceManager &SM, const LangOptions &LangOpts)
Accepts a range and returns a character range with file locations.
static size_t getSpellingSlow(const Token &Tok, const char *BufPtr, const LangOptions &LangOpts, char *Spelling)
Slow case of getSpelling.
bool isAtStartOfImmediateMacroExpansion(SourceLocation Loc, SourceLocation *MacroBegin=nullptr) const
Returns true if the given MacroID location points at the beginning of the immediate macro expansion...
static FixItHint CreateRemoval(CharSourceRange RemoveRange)
Create a code modification hint that removes the given source range.
std::pair< SourceLocation, SourceLocation > getImmediateExpansionRange(SourceLocation Loc) const
Return the start/end of the expansion information for an expansion location.
SourceLocation getSourceLocation(const char *Loc, unsigned TokLen=1) const
getSourceLocation - Return a source location identifier for the specified offset in the current file...
void CodeCompleteNaturalLanguage()
Hook used by the lexer to invoke the "natural language" code completion point.
detail::InMemoryDirectory::const_iterator E
SourceLocation getExpansionLocStart() const
static const char * findPlaceholderEnd(const char *CurPtr, const char *BufferEnd)
void setLiteralData(const char *Ptr)
bool isLiteral() const
Return true if this is a "literal", like a numeric constant, string, etc.
bool isMacroArgExpansion() const
static const llvm::sys::UnicodeCharRange CXX03AllowedIDCharRanges[]
bool HandleIdentifier(Token &Identifier)
Callback invoked when the lexer reads an identifier and has filled in the tokens IdentifierInfo membe...
void CreateString(StringRef Str, Token &Tok, SourceLocation ExpansionLocStart=SourceLocation(), SourceLocation ExpansionLocEnd=SourceLocation())
Plop the specified string into a scratch buffer and set the specified token's location and length to ...
tok::ObjCKeywordKind getObjCKeywordID() const
Return the Objective-C keyword ID for the this identifier.
static bool isAllowedInitiallyIDChar(uint32_t C, const LangOptions &LangOpts)
bool hasLeadingEmptyMacro() const
Return true if this token has an empty macro before it.
bool isCodeCompletionEnabled() const
Determine if we are performing code completion.
static FixItHint CreateInsertion(SourceLocation InsertionLoc, StringRef Code, bool BeforePreviousInsertions=false)
Create a code modification hint that inserts the given code string at a specific location.
static char GetTrigraphCharForLetter(char Letter)
GetTrigraphCharForLetter - Given a character that occurs after a ?? pair, return the decoded trigraph...
static bool isIdentifierBodyChar(char c, const LangOptions &LangOpts)
Returns true if the given character could appear in an identifier.
static LLVM_READONLY bool isIdentifierBody(unsigned char c, bool AllowDollar=false)
Returns true if this is a body character of a C identifier, which is [a-zA-Z0-9_].
bool HandleComment(Token &Token, SourceRange Comment)
const LangOptions & getLangOpts() const
getLangOpts - Return the language features currently enabled.
PreprocessorOptions & getPreprocessorOpts() const
Retrieve the preprocessor options used to initialize this preprocessor.
void ReadToEndOfLine(SmallVectorImpl< char > *Result=nullptr)
ReadToEndOfLine - Read the rest of the current preprocessor line as an uninterpreted string...
Not within a conflict marker.
static LLVM_READONLY bool isVerticalWhitespace(unsigned char c)
Returns true if this character is vertical ASCII whitespace: '\n', '\r'.
bool isLexingRawMode() const
Return true if this lexer is in raw mode or not.
static char DecodeTrigraphChar(const char *CP, Lexer *L)
DecodeTrigraphChar - If the specified character is a legal trigraph when prefixed with ...
static const char * FindConflictEnd(const char *CurPtr, const char *BufferEnd, ConflictMarkerKind CMK)
Find the end of a version control conflict marker.
static FixItHint CreateReplacement(CharSourceRange RemoveRange, StringRef Code)
Create a code modification hint that replaces the given source range with the given code string...
void SetCommentRetentionState(bool Mode)
SetCommentRetentionMode - Change the comment retention mode of the lexer to the specified mode...
static __inline__ int __DEFAULT_FN_ATTRS _mm_movemask_epi8(__m128i __a)
Copies the values of the most significant bits from each 8-bit element in a 128-bit integer vector of...
IdentifierInfo * LookUpIdentifierInfo(Token &Identifier) const
Given a tok::raw_identifier token, look up the identifier information for the token and install it in...
unsigned getLength() const
SourceLocation getLocForStartOfFile(FileID FID) const
Return the source location corresponding to the first byte of the specified file. ...
bool isKeepWhitespaceMode() const
isKeepWhitespaceMode - Return true if the lexer should return tokens for every character in the file...
bool isPreprocessedOutput() const
Returns true if the preprocessor is responsible for generating output, false if it is producing token...
static LLVM_READONLY bool isIdentifierHead(unsigned char c, bool AllowDollar=false)
Returns true if this is a valid first character of a C identifier, which is [a-zA-Z_].
A normal or diff3 conflict marker, initiated by at least 7 "<"s, separated by at least 7 "="s or "|"s...
A trivial tuple used to represent a source range.
void clearFlag(TokenFlags Flag)
Unset the specified flag.
SourceLocation getExpansionLoc(SourceLocation Loc) const
Given a SourceLocation object Loc, return the expansion location referenced by the ID...
std::pair< FileID, unsigned > getDecomposedLoc(SourceLocation Loc) const
Decompose the specified location into a raw FileID + Offset pair.
void SetKeepWhitespaceMode(bool Val)
SetKeepWhitespaceMode - This method lets clients enable or disable whitespace retention mode...
bool isAnnotation() const
Return true if this is any of tok::annot_* kind tokens.
void setRecordedPreambleConditionalStack(ArrayRef< PPConditionalInfo > s)
This class handles loading and caching of source files into memory.
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_cmpeq_epi8(__m128i __a, __m128i __b)
Compares each of the corresponding 8-bit values of the 128-bit integer vectors for equality...
void startToken()
Reset all flags to cleared.
static std::string Stringify(StringRef Str, bool Charify=false)
Stringify - Convert the specified string into a C string by escaping '\' and " characters. This does not add surrounding ""'s to the string.
Engages in a tight little dance with the lexer to efficiently preprocess tokens.
SourceLocation getSpellingLoc() const
IdentifierInfo * getIdentifierInfo() const
tok::PPKeywordKind getPPKeywordID() const
Return the preprocessor keyword ID for this identifier.
bool LexEditorPlaceholders
When enabled, the preprocessor will construct editor placeholder tokens.