31 #include "llvm/ADT/None.h" 32 #include "llvm/ADT/Optional.h" 33 #include "llvm/ADT/StringExtras.h" 34 #include "llvm/ADT/StringSwitch.h" 35 #include "llvm/ADT/StringRef.h" 36 #include "llvm/Support/Compiler.h" 37 #include "llvm/Support/ConvertUTF.h" 38 #include "llvm/Support/MathExtras.h" 39 #include "llvm/Support/MemoryBuffer.h" 40 #include "llvm/Support/NativeFormatting.h" 41 #include "llvm/Support/UnicodeCharRanges.h" 51 using namespace clang;
62 return II->getObjCKeywordID() == objcKey;
69 return tok::objc_not_keyword;
78 void Lexer::anchor() {}
80 void Lexer::InitLexer(
const char *BufStart,
const char *BufPtr,
82 BufferStart = BufStart;
86 assert(BufEnd[0] == 0 &&
87 "We assume that the input buffer has a null character at the end" 88 " to simplify lexing!");
93 if (BufferStart == BufferPtr) {
95 StringRef Buf(BufferStart, BufferEnd - BufferStart);
96 size_t BOMLength = llvm::StringSwitch<size_t>(Buf)
97 .StartsWith(
"\xEF\xBB\xBF", 3)
101 BufferPtr += BOMLength;
104 Is_PragmaLexer =
false;
105 CurrentConflictMarkerState =
CMK_None;
108 IsAtStartOfLine =
true;
109 IsAtPhysicalStartOfLine =
true;
111 HasLeadingSpace =
false;
112 HasLeadingEmptyMacro =
false;
115 ParsingPreprocessorDirective =
false;
118 ParsingFilename =
false;
124 LexingRawMode =
false;
127 ExtendedTokenMode = 0;
136 FileLoc(PP.getSourceManager().getLocForStartOfFile(FID)),
137 LangOpts(PP.getLangOpts()) {
138 InitLexer(InputFile->getBufferStart(), InputFile->getBufferStart(),
139 InputFile->getBufferEnd());
148 const char *BufStart,
const char *BufPtr,
const char *BufEnd)
149 : FileLoc(fileloc), LangOpts(langOpts) {
150 InitLexer(BufStart, BufPtr, BufEnd);
161 :
Lexer(SM.getLocForStartOfFile(FID), langOpts, FromFile->getBufferStart(),
162 FromFile->getBufferStart(), FromFile->getBufferEnd()) {}
165 assert(
PP &&
"Cannot reset token mode without a preprocessor");
166 if (LangOpts.TraditionalCPP)
195 const llvm::MemoryBuffer *InputFile = SM.
getBuffer(SpellingFID);
196 Lexer *L =
new Lexer(SpellingFID, InputFile, PP);
203 L->BufferPtr = StrData;
204 L->BufferEnd = StrData+TokLen;
205 assert(L->BufferEnd[0] == 0 &&
"Buffer is not nul terminated!");
211 ExpansionLocEnd, TokLen);
218 L->Is_PragmaLexer =
true;
223 typename T::size_type i = 0, e = Str.size();
225 if (Str[i] ==
'\\' || Str[i] == Quote) {
226 Str.insert(Str.begin() + i,
'\\');
229 }
else if (Str[i] ==
'\n' || Str[i] ==
'\r') {
231 if ((i < e - 1) && (Str[i + 1] ==
'\n' || Str[i + 1] ==
'\r') &&
232 Str[i] != Str[i + 1]) {
238 Str.insert(Str.begin() + i + 1,
'n');
249 char Quote = Charify ?
'\'' :
'"';
264 assert(Tok.
needsCleaning() &&
"getSpellingSlow called on simple token");
267 const char *BufEnd = BufPtr + Tok.
getLength();
271 while (BufPtr < BufEnd) {
276 if (Spelling[Length - 1] ==
'"')
284 Spelling[Length - 2] ==
'R' && Spelling[Length - 1] ==
'"') {
287 const char *RawEnd = BufEnd;
288 do --RawEnd;
while (*RawEnd !=
'"');
289 size_t RawLength = RawEnd - BufPtr + 1;
292 memcpy(Spelling + Length, BufPtr, RawLength);
300 while (BufPtr < BufEnd) {
307 "NeedsCleaning flag set on token that didn't need cleaning!");
325 bool invalidTemp =
false;
326 StringRef file = SM.
getBufferData(locInfo.first, &invalidTemp);
328 if (invalid) *invalid =
true;
332 const char *tokenBegin = file.data() + locInfo.second;
336 file.begin(), tokenBegin, file.end());
338 lexer.LexFromRawLexer(token);
340 unsigned length = token.getLength();
343 if (!token.needsCleaning())
344 return StringRef(tokenBegin, length);
347 buffer.resize(length);
348 buffer.resize(
getSpellingSlow(token, tokenBegin, options, buffer.data()));
349 return StringRef(buffer.data(), buffer.size());
359 assert((
int)Tok.
getLength() >= 0 &&
"Token character range is bogus!");
361 bool CharDataInvalid =
false;
365 *Invalid = CharDataInvalid;
371 return std::string(TokStart, TokStart + Tok.
getLength());
375 Result.resize(
getSpellingSlow(Tok, TokStart, LangOpts, &*Result.begin()));
392 assert((
int)Tok.
getLength() >= 0 &&
"Token character range is bogus!");
394 const char *TokStart =
nullptr;
396 if (Tok.
is(tok::raw_identifier))
401 Buffer = II->getNameStart();
402 return II->getLength();
412 bool CharDataInvalid =
false;
415 *Invalid = CharDataInvalid;
416 if (CharDataInvalid) {
429 return getSpellingSlow(Tok, TokStart, LangOpts, const_cast<char*>(Buffer));
450 bool IgnoreWhiteSpace) {
461 bool Invalid =
false;
462 StringRef Buffer = SM.
getBufferData(LocInfo.first, &Invalid);
466 const char *StrData = Buffer.data()+LocInfo.second;
473 Buffer.begin(), StrData, Buffer.end());
475 TheLexer.LexFromRawLexer(Result);
482 const char *BufStart = Buffer.data();
483 if (Offset >= Buffer.size())
486 const char *LexStart = BufStart +
Offset;
487 for (; LexStart != BufStart; --LexStart) {
503 if (LocInfo.first.isInvalid())
506 bool Invalid =
false;
507 StringRef Buffer = SM.
getBufferData(LocInfo.first, &Invalid);
513 const char *StrData = Buffer.data() + LocInfo.second;
515 if (!LexStart || LexStart == StrData)
520 Lexer TheLexer(LexerStartLoc, LangOpts, Buffer.data(), LexStart,
527 TheLexer.LexFromRawLexer(TheTok);
529 if (TheLexer.getBufferLocation() > StrData) {
533 if (TheLexer.getBufferLocation() - TheTok.
getLength() <= StrData)
558 std::pair<FileID, unsigned> BeginFileLocInfo =
560 assert(FileLocInfo.first == BeginFileLocInfo.first &&
561 FileLocInfo.second >= BeginFileLocInfo.second);
580 const unsigned StartOffset = 1;
582 Lexer TheLexer(FileLoc, LangOpts, Buffer.begin(), Buffer.begin(),
586 bool InPreprocessorDirective =
false;
590 unsigned MaxLineOffset = 0;
592 const char *CurPtr = Buffer.begin();
593 unsigned CurLine = 0;
594 while (CurPtr != Buffer.end()) {
598 if (CurLine == MaxLines)
602 if (CurPtr != Buffer.end())
603 MaxLineOffset = CurPtr - Buffer.begin();
607 TheLexer.LexFromRawLexer(TheTok);
609 if (InPreprocessorDirective) {
622 InPreprocessorDirective =
false;
631 if (MaxLineOffset && TokOffset >= MaxLineOffset)
636 if (TheTok.
getKind() == tok::comment) {
644 Token HashTok = TheTok;
645 InPreprocessorDirective =
true;
651 TheLexer.LexFromRawLexer(TheTok);
655 = llvm::StringSwitch<PreambleDirectiveKind>(Keyword)
656 .Case(
"include", PDK_Skipped)
657 .Case(
"__include_macros", PDK_Skipped)
658 .Case(
"define", PDK_Skipped)
659 .Case(
"undef", PDK_Skipped)
660 .Case(
"line", PDK_Skipped)
661 .Case(
"error", PDK_Skipped)
662 .Case(
"pragma", PDK_Skipped)
663 .Case(
"import", PDK_Skipped)
664 .Case(
"include_next", PDK_Skipped)
665 .Case(
"warning", PDK_Skipped)
666 .Case(
"ident", PDK_Skipped)
667 .Case(
"sccs", PDK_Skipped)
668 .Case(
"assert", PDK_Skipped)
669 .Case(
"unassert", PDK_Skipped)
670 .Case(
"if", PDK_Skipped)
671 .Case(
"ifdef", PDK_Skipped)
672 .Case(
"ifndef", PDK_Skipped)
673 .Case(
"elif", PDK_Skipped)
674 .Case(
"else", PDK_Skipped)
675 .Case(
"endif", PDK_Skipped)
676 .Default(PDK_Unknown);
691 InPreprocessorDirective =
false;
702 if (ActiveCommentLoc.
isValid())
703 End = ActiveCommentLoc;
717 bool Invalid =
false;
721 if (Invalid || (CharNo == 0 && Lexer::isObviouslySimpleCharacter(*TokPtr)))
724 unsigned PhysOffset = 0;
729 while (Lexer::isObviouslySimpleCharacter(*TokPtr)) {
739 for (; CharNo; --CharNo) {
750 if (!Lexer::isObviouslySimpleCharacter(*TokPtr))
751 PhysOffset += Lexer::SkipEscapedNewLines(TokPtr)-TokPtr;
803 if (expansionLoc.isFileID()) {
806 *MacroBegin = expansionLoc;
834 *MacroEnd = expansionLoc;
908 bool Invalid =
false;
938 if (Invalid) *Invalid =
true;
944 if (beginInfo.first.isInvalid()) {
945 if (Invalid) *Invalid =
true;
951 beginInfo.second > EndOffs) {
952 if (Invalid) *Invalid =
true;
957 bool invalidTemp =
false;
958 StringRef file = SM.
getBufferData(beginInfo.first, &invalidTemp);
960 if (Invalid) *Invalid =
true;
964 if (Invalid) *Invalid =
false;
965 return file.substr(beginInfo.second, EndOffs - beginInfo.second);
971 assert(Loc.
isMacroID() &&
"Only reasonable to call this on macros");
1011 StringRef ExpansionBuffer = SM.
getBufferData(ExpansionInfo.first);
1012 return ExpansionBuffer.substr(ExpansionInfo.second, MacroTokenLength);
1017 assert(Loc.
isMacroID() &&
"Only reasonable to call this on macros");
1036 StringRef ExpansionBuffer = SM.
getBufferData(ExpansionInfo.first);
1037 return ExpansionBuffer.substr(ExpansionInfo.second, MacroTokenLength);
1046 if (Str - 1 < BufferStart)
1049 if ((Str[0] ==
'\n' && Str[-1] ==
'\r') ||
1050 (Str[0] ==
'\r' && Str[-1] ==
'\n')) {
1051 if (Str - 2 < BufferStart)
1061 return *Str ==
'\\';
1069 if (LocInfo.first.isInvalid())
1071 bool Invalid =
false;
1072 StringRef Buffer = SM.
getBufferData(LocInfo.first, &Invalid);
1078 StringRef Rest = Buffer.substr(Line - Buffer.data());
1079 size_t NumWhitespaceChars = Rest.find_first_not_of(
" \t");
1080 return NumWhitespaceChars == StringRef::npos
1082 : Rest.take_front(NumWhitespaceChars);
1097 unsigned CharNo,
unsigned TokLen) {
1098 assert(FileLoc.
isMacroID() &&
"Must be a macro expansion");
1114 return SM.createExpansionLoc(SpellingLoc, II.
getBegin(), II.
getEnd(), TokLen);
1120 unsigned TokLen)
const {
1121 assert(Loc >= BufferStart && Loc <= BufferEnd &&
1122 "Location out of range for this buffer!");
1126 unsigned CharNo = Loc-BufferStart;
1132 assert(PP &&
"This doesn't work on raw lexers");
1151 case '=':
return '#';
1152 case ')':
return ']';
1153 case '(':
return '[';
1154 case '!':
return '|';
1155 case '\'':
return '^';
1156 case '>':
return '}';
1157 case '/':
return '\\';
1158 case '<':
return '{';
1159 case '-':
return '~';
1169 if (!Res || !L)
return Res;
1173 L->
Diag(CP-2, diag::trigraph_ignored);
1178 L->
Diag(CP-2, diag::trigraph_converted) << StringRef(&Res, 1);
1185 unsigned Lexer::getEscapedNewLineSize(
const char *Ptr) {
1190 if (Ptr[Size-1] !=
'\n' && Ptr[Size-1] !=
'\r')
1194 if ((Ptr[Size] ==
'\r' || Ptr[Size] ==
'\n') &&
1195 Ptr[Size-1] != Ptr[Size])
1208 const char *Lexer::SkipEscapedNewLines(
const char *
P) {
1210 const char *AfterEscape;
1213 }
else if (*P ==
'?') {
1215 if (P[1] !=
'?' || P[2] !=
'/')
1224 unsigned NewLineSize = Lexer::getEscapedNewLineSize(AfterEscape);
1225 if (NewLineSize == 0)
return P;
1226 P = AfterEscape+NewLineSize;
1243 bool InvalidTemp =
false;
1244 StringRef File = SM.
getBufferData(LocInfo.first, &InvalidTemp);
1248 const char *TokenBegin = File.data() + LocInfo.second;
1252 TokenBegin, File.end());
1255 lexer.LexFromRawLexer(Tok);
1265 const LangOptions &LangOpts,
bool SkipTrailingWhitespaceAndNewLine) {
1267 if (!Tok || Tok->isNot(TKind))
1272 unsigned NumWhitespaceChars = 0;
1273 if (SkipTrailingWhitespaceAndNewLine) {
1275 unsigned char C = *TokenEnd;
1278 NumWhitespaceChars++;
1282 if (C ==
'\n' || C ==
'\r') {
1285 NumWhitespaceChars++;
1286 if ((C ==
'\n' || C ==
'\r') && C != PrevC)
1287 NumWhitespaceChars++;
1309 char Lexer::getCharAndSizeSlow(
const char *Ptr,
unsigned &Size,
1312 if (Ptr[0] ==
'\\') {
1321 if (
unsigned EscapedNewLineSize = getEscapedNewLineSize(Ptr)) {
1327 Diag(Ptr, diag::backslash_newline_space);
1330 Size += EscapedNewLineSize;
1331 Ptr += EscapedNewLineSize;
1334 return getCharAndSizeSlow(Ptr, Size, Tok);
1342 if (Ptr[0] ==
'?' && Ptr[1] ==
'?') {
1351 if (
C ==
'\\')
goto Slash;
1367 char Lexer::getCharAndSizeSlowNoWarn(
const char *Ptr,
unsigned &Size,
1370 if (Ptr[0] ==
'\\') {
1378 if (
unsigned EscapedNewLineSize = getEscapedNewLineSize(Ptr)) {
1380 Size += EscapedNewLineSize;
1381 Ptr += EscapedNewLineSize;
1384 return getCharAndSizeSlowNoWarn(Ptr, Size, LangOpts);
1392 if (LangOpts.Trigraphs && Ptr[0] ==
'?' && Ptr[1] ==
'?') {
1398 if (
C ==
'\\')
goto Slash;
1413 void Lexer::SetByteOffset(
unsigned Offset,
bool StartOfLine) {
1414 BufferPtr = BufferStart +
Offset;
1415 if (BufferPtr > BufferEnd)
1416 BufferPtr = BufferEnd;
1420 IsAtStartOfLine = StartOfLine;
1421 IsAtPhysicalStartOfLine = StartOfLine;
1425 if (LangOpts.AsmPreprocessor) {
1427 }
else if (LangOpts.CPlusPlus11 || LangOpts.C11) {
1428 static const llvm::sys::UnicodeCharSet C11AllowedIDChars(
1430 return C11AllowedIDChars.contains(C);
1431 }
else if (LangOpts.CPlusPlus) {
1432 static const llvm::sys::UnicodeCharSet CXX03AllowedIDChars(
1434 return CXX03AllowedIDChars.contains(C);
1436 static const llvm::sys::UnicodeCharSet C99AllowedIDChars(
1438 return C99AllowedIDChars.contains(C);
1444 if (LangOpts.AsmPreprocessor) {
1446 }
else if (LangOpts.CPlusPlus11 || LangOpts.C11) {
1447 static const llvm::sys::UnicodeCharSet C11DisallowedInitialIDChars(
1449 return !C11DisallowedInitialIDChars.contains(C);
1450 }
else if (LangOpts.CPlusPlus) {
1453 static const llvm::sys::UnicodeCharSet C99DisallowedInitialIDChars(
1455 return !C99DisallowedInitialIDChars.contains(C);
1470 CannotAppearInIdentifier = 0,
1471 CannotStartIdentifier
1474 static const llvm::sys::UnicodeCharSet C99AllowedIDChars(
1476 static const llvm::sys::UnicodeCharSet C99DisallowedInitialIDChars(
1478 if (!C99AllowedIDChars.contains(C)) {
1481 << CannotAppearInIdentifier;
1482 }
else if (IsFirst && C99DisallowedInitialIDChars.contains(C)) {
1485 << CannotStartIdentifier;
1491 static const llvm::sys::UnicodeCharSet CXX03AllowedIDChars(
1493 if (!CXX03AllowedIDChars.contains(C)) {
1494 Diags.
Report(Range.
getBegin(), diag::warn_cxx98_compat_unicode_id)
1507 struct HomoglyphPair {
1510 bool operator<(HomoglyphPair R)
const {
return Character < R.Character; }
1512 static constexpr HomoglyphPair SortedHomoglyphs[] = {
1555 std::lower_bound(std::begin(SortedHomoglyphs),
1556 std::end(SortedHomoglyphs) - 1, HomoglyphPair{
C,
'\0'});
1557 if (Homoglyph->Character == C) {
1560 llvm::raw_svector_ostream CharOS(CharBuf);
1561 llvm::write_hex(CharOS, C, llvm::HexPrintStyle::Upper, 4);
1563 const char LooksLikeStr[] = {Homoglyph->LooksLike, 0};
1565 << Range << CharBuf << LooksLikeStr;
1569 bool Lexer::tryConsumeIdentifierUCN(
const char *&CurPtr,
unsigned Size,
1571 const char *UCNPtr = CurPtr + Size;
1572 uint32_t CodePoint = tryReadUCN(UCNPtr, CurPtr,
nullptr);
1582 if ((UCNPtr - CurPtr == 6 && CurPtr[1] ==
'u') ||
1583 (UCNPtr - CurPtr == 10 && CurPtr[1] ==
'U'))
1586 while (CurPtr != UCNPtr)
1587 (void)getAndAdvanceChar(CurPtr, Result);
1591 bool Lexer::tryConsumeIdentifierUTF8Char(
const char *&CurPtr) {
1592 const char *UnicodePtr = CurPtr;
1593 llvm::UTF32 CodePoint;
1594 llvm::ConversionResult Result =
1595 llvm::convertUTF8Sequence((
const llvm::UTF8 **)&UnicodePtr,
1596 (
const llvm::UTF8 *)BufferEnd,
1598 llvm::strictConversion);
1599 if (Result != llvm::conversionOK ||
1611 CurPtr = UnicodePtr;
1615 bool Lexer::LexIdentifier(
Token &Result,
const char *CurPtr) {
1618 unsigned char C = *CurPtr++;
1629 if (
isASCII(C) && C !=
'\\' && C !=
'?' &&
1630 (C !=
'$' || !LangOpts.DollarIdents)) {
1632 const char *IdStart = BufferPtr;
1633 FormTokenWithChars(Result, CurPtr, tok::raw_identifier);
1650 if (isCodeCompletionPoint(CurPtr)) {
1652 Result.
setKind(tok::code_completion);
1658 assert(*CurPtr == 0 &&
"Completion character must be 0");
1663 if (CurPtr < BufferEnd) {
1681 C = getCharAndSize(CurPtr, Size);
1685 if (!LangOpts.DollarIdents)
goto FinishIdentifier;
1689 Diag(CurPtr, diag::ext_dollar_in_identifier);
1690 CurPtr = ConsumeChar(CurPtr, Size, Result);
1691 C = getCharAndSize(CurPtr, Size);
1693 }
else if (C ==
'\\' && tryConsumeIdentifierUCN(CurPtr, Size, Result)) {
1694 C = getCharAndSize(CurPtr, Size);
1696 }
else if (!
isASCII(C) && tryConsumeIdentifierUTF8Char(CurPtr)) {
1697 C = getCharAndSize(CurPtr, Size);
1700 goto FinishIdentifier;
1704 CurPtr = ConsumeChar(CurPtr, Size, Result);
1706 C = getCharAndSize(CurPtr, Size);
1708 CurPtr = ConsumeChar(CurPtr, Size, Result);
1709 C = getCharAndSize(CurPtr, Size);
1716 bool Lexer::isHexaLiteral(
const char *Start,
const LangOptions &LangOpts) {
1722 return (C2 ==
'x' || C2 ==
'X');
1728 bool Lexer::LexNumericConstant(
Token &Result,
const char *CurPtr) {
1730 char C = getCharAndSize(CurPtr, Size);
1733 CurPtr = ConsumeChar(CurPtr, Size, Result);
1735 C = getCharAndSize(CurPtr, Size);
1739 if ((C ==
'-' || C ==
'+') && (PrevCh ==
'E' || PrevCh ==
'e')) {
1742 if (!LangOpts.MicrosoftExt || !isHexaLiteral(BufferPtr, LangOpts))
1743 return LexNumericConstant(Result, ConsumeChar(CurPtr, Size, Result));
1747 if ((C ==
'-' || C ==
'+') && (PrevCh ==
'P' || PrevCh ==
'p')) {
1751 bool IsHexFloat =
true;
1752 if (!LangOpts.C99) {
1753 if (!isHexaLiteral(BufferPtr, LangOpts))
1756 std::find(BufferPtr, CurPtr,
'_') != CurPtr)
1760 return LexNumericConstant(Result, ConsumeChar(CurPtr, Size, Result));
1769 Diag(CurPtr, diag::warn_cxx11_compat_digit_separator);
1770 CurPtr = ConsumeChar(CurPtr, Size, Result);
1771 CurPtr = ConsumeChar(CurPtr, NextSize, Result);
1772 return LexNumericConstant(Result, CurPtr);
1777 if (C ==
'\\' && tryConsumeIdentifierUCN(CurPtr, Size, Result))
1778 return LexNumericConstant(Result, CurPtr);
1779 if (!
isASCII(C) && tryConsumeIdentifierUTF8Char(CurPtr))
1780 return LexNumericConstant(Result, CurPtr);
1783 const char *TokStart = BufferPtr;
1784 FormTokenWithChars(Result, CurPtr, tok::numeric_constant);
1791 const char *Lexer::LexUDSuffix(
Token &Result,
const char *CurPtr,
1792 bool IsStringLiteral) {
1797 char C = getCharAndSize(CurPtr, Size);
1798 bool Consumed =
false;
1801 if (C ==
'\\' && tryConsumeIdentifierUCN(CurPtr, Size, Result))
1803 else if (!
isASCII(C) && tryConsumeIdentifierUTF8Char(CurPtr))
1812 C ==
'_' ? diag::warn_cxx11_compat_user_defined_literal
1813 : diag::warn_cxx11_compat_reserved_user_defined_literal)
1824 bool IsUDSuffix =
false;
1831 const unsigned MaxStandardSuffixLength = 3;
1832 char Buffer[MaxStandardSuffixLength] = { C };
1833 unsigned Consumed = Size;
1841 const StringRef CompleteSuffix(Buffer, Chars);
1847 if (Chars == MaxStandardSuffixLength)
1851 Buffer[Chars++] = Next;
1852 Consumed += NextSize;
1859 ? diag::ext_ms_reserved_user_defined_literal
1860 : diag::ext_reserved_user_defined_literal)
1865 CurPtr = ConsumeChar(CurPtr, Size, Result);
1870 C = getCharAndSize(CurPtr, Size);
1872 else if (C ==
'\\' && tryConsumeIdentifierUCN(CurPtr, Size, Result)) {}
1873 else if (!
isASCII(C) && tryConsumeIdentifierUTF8Char(CurPtr)) {}
1882 bool Lexer::LexStringLiteral(
Token &Result,
const char *CurPtr,
1885 const char *NulCharacter =
nullptr;
1888 (Kind == tok::utf8_string_literal ||
1889 Kind == tok::utf16_string_literal ||
1890 Kind == tok::utf32_string_literal))
1892 ? diag::warn_cxx98_compat_unicode_literal
1893 : diag::warn_c99_compat_unicode_literal);
1895 char C = getAndAdvanceChar(CurPtr, Result);
1900 C = getAndAdvanceChar(CurPtr, Result);
1902 if (C ==
'\n' || C ==
'\r' ||
1903 (C == 0 && CurPtr-1 == BufferEnd)) {
1905 Diag(BufferPtr, diag::ext_unterminated_char_or_string) << 1;
1906 FormTokenWithChars(Result, CurPtr-1, tok::unknown);
1911 if (isCodeCompletionPoint(CurPtr-1)) {
1913 FormTokenWithChars(Result, CurPtr-1, tok::unknown);
1918 NulCharacter = CurPtr-1;
1920 C = getAndAdvanceChar(CurPtr, Result);
1925 CurPtr = LexUDSuffix(Result, CurPtr,
true);
1929 Diag(NulCharacter, diag::null_in_char_or_string) << 1;
1932 const char *TokStart = BufferPtr;
1933 FormTokenWithChars(Result, CurPtr, Kind);
1940 bool Lexer::LexRawStringLiteral(
Token &Result,
const char *CurPtr,
1948 Diag(BufferPtr, diag::warn_cxx98_compat_raw_string_literal);
1950 unsigned PrefixLen = 0;
1956 if (CurPtr[PrefixLen] !=
'(') {
1958 const char *PrefixEnd = &CurPtr[PrefixLen];
1959 if (PrefixLen == 16) {
1960 Diag(PrefixEnd, diag::err_raw_delim_too_long);
1962 Diag(PrefixEnd, diag::err_invalid_char_raw_delim)
1963 << StringRef(PrefixEnd, 1);
1975 if (C == 0 && CurPtr-1 == BufferEnd) {
1981 FormTokenWithChars(Result, CurPtr, tok::unknown);
1986 const char *Prefix = CurPtr;
1987 CurPtr += PrefixLen + 1;
1994 if (strncmp(CurPtr, Prefix, PrefixLen) == 0 && CurPtr[PrefixLen] ==
'"') {
1995 CurPtr += PrefixLen + 1;
1998 }
else if (C == 0 && CurPtr-1 == BufferEnd) {
2000 Diag(BufferPtr, diag::err_unterminated_raw_string)
2001 << StringRef(Prefix, PrefixLen);
2002 FormTokenWithChars(Result, CurPtr-1, tok::unknown);
2009 CurPtr = LexUDSuffix(Result, CurPtr,
true);
2012 const char *TokStart = BufferPtr;
2013 FormTokenWithChars(Result, CurPtr, Kind);
2020 bool Lexer::LexAngledStringLiteral(
Token &Result,
const char *CurPtr) {
2022 const char *NulCharacter =
nullptr;
2023 const char *AfterLessPos = CurPtr;
2024 char C = getAndAdvanceChar(CurPtr, Result);
2029 C = getAndAdvanceChar(CurPtr, Result);
2031 if (C ==
'\n' || C ==
'\r' ||
2032 (C == 0 && (CurPtr-1 == BufferEnd ||
2033 isCodeCompletionPoint(CurPtr-1)))) {
2036 FormTokenWithChars(Result, AfterLessPos, tok::less);
2041 NulCharacter = CurPtr-1;
2043 C = getAndAdvanceChar(CurPtr, Result);
2048 Diag(NulCharacter, diag::null_in_char_or_string) << 1;
2051 const char *TokStart = BufferPtr;
2052 FormTokenWithChars(Result, CurPtr, tok::angle_string_literal);
2059 bool Lexer::LexCharConstant(
Token &Result,
const char *CurPtr,
2062 const char *NulCharacter =
nullptr;
2065 if (Kind == tok::utf16_char_constant || Kind == tok::utf32_char_constant)
2067 ? diag::warn_cxx98_compat_unicode_literal
2068 : diag::warn_c99_compat_unicode_literal);
2069 else if (Kind == tok::utf8_char_constant)
2070 Diag(BufferPtr, diag::warn_cxx14_compat_u8_character_literal);
2073 char C = getAndAdvanceChar(CurPtr, Result);
2076 Diag(BufferPtr, diag::ext_empty_character);
2077 FormTokenWithChars(Result, CurPtr, tok::unknown);
2084 C = getAndAdvanceChar(CurPtr, Result);
2086 if (C ==
'\n' || C ==
'\r' ||
2087 (C == 0 && CurPtr-1 == BufferEnd)) {
2089 Diag(BufferPtr, diag::ext_unterminated_char_or_string) << 0;
2090 FormTokenWithChars(Result, CurPtr-1, tok::unknown);
2095 if (isCodeCompletionPoint(CurPtr-1)) {
2097 FormTokenWithChars(Result, CurPtr-1, tok::unknown);
2102 NulCharacter = CurPtr-1;
2104 C = getAndAdvanceChar(CurPtr, Result);
2109 CurPtr = LexUDSuffix(Result, CurPtr,
false);
2113 Diag(NulCharacter, diag::null_in_char_or_string) << 0;
2116 const char *TokStart = BufferPtr;
2117 FormTokenWithChars(Result, CurPtr, Kind);
2126 bool Lexer::SkipWhitespace(
Token &Result,
const char *CurPtr,
2127 bool &TokAtPhysicalStartOfLine) {
2131 unsigned char Char = *CurPtr;
2156 FormTokenWithChars(Result, CurPtr, tok::unknown);
2158 IsAtStartOfLine =
true;
2159 IsAtPhysicalStartOfLine =
true;
2166 char PrevChar = CurPtr[-1];
2172 TokAtPhysicalStartOfLine =
true;
2185 bool Lexer::SkipLineComment(
Token &Result,
const char *CurPtr,
2186 bool &TokAtPhysicalStartOfLine) {
2190 Diag(BufferPtr, diag::ext_line_comment);
2194 LangOpts.LineComment =
true;
2208 C !=
'\n' && C !=
'\r')
2211 const char *NextLine = CurPtr;
2214 const char *EscapePtr = CurPtr-1;
2215 bool HasSpace =
false;
2221 if (*EscapePtr ==
'\\')
2224 else if (EscapePtr[0] ==
'/' && EscapePtr[-1] ==
'?' &&
2225 EscapePtr[-2] ==
'?' && LangOpts.Trigraphs)
2227 CurPtr = EscapePtr-2;
2233 Diag(EscapePtr, diag::backslash_newline_space);
2240 const char *OldPtr = CurPtr;
2243 C = getAndAdvanceChar(CurPtr, Result);
2248 if (C != 0 && CurPtr == OldPtr+1) {
2256 if (CurPtr != OldPtr + 1 && C !=
'/' &&
2257 (CurPtr == BufferEnd + 1 || CurPtr[0] !=
'/')) {
2258 for (; OldPtr != CurPtr; ++OldPtr)
2259 if (OldPtr[0] ==
'\n' || OldPtr[0] ==
'\r') {
2263 const char *ForwardPtr = CurPtr;
2266 if (ForwardPtr[0] ==
'/' && ForwardPtr[1] ==
'/')
2271 Diag(OldPtr-1, diag::ext_multi_line_line_comment);
2276 if (C ==
'\r' || C ==
'\n' || CurPtr == BufferEnd + 1) {
2281 if (C ==
'\0' && isCodeCompletionPoint(CurPtr-1)) {
2299 return SaveLineComment(Result, CurPtr);
2317 TokAtPhysicalStartOfLine =
true;
2326 bool Lexer::SaveLineComment(
Token &Result,
const char *CurPtr) {
2329 FormTokenWithChars(Result, CurPtr, tok::comment);
2336 bool Invalid =
false;
2337 std::string Spelling = PP->
getSpelling(Result, &Invalid);
2341 assert(Spelling[0] ==
'/' && Spelling[1] ==
'/' &&
"Not line comment?");
2356 assert(CurPtr[0] ==
'\n' || CurPtr[0] ==
'\r');
2362 if (CurPtr[0] ==
'\n' || CurPtr[0] ==
'\r') {
2364 if (CurPtr[0] == CurPtr[1])
2372 bool HasSpace =
false;
2379 if (*CurPtr ==
'\\') {
2380 if (CurPtr[-1] !=
'*')
return false;
2383 if (CurPtr[0] !=
'/' || CurPtr[-1] !=
'?' || CurPtr[-2] !=
'?' ||
2394 L->
Diag(CurPtr, diag::trigraph_ignored_block_comment);
2398 L->
Diag(CurPtr, diag::trigraph_ends_block_comment);
2403 L->
Diag(CurPtr, diag::escaped_newline_block_comment_end);
2407 L->
Diag(CurPtr, diag::backslash_newline_space);
2428 bool Lexer::SkipBlockComment(
Token &Result,
const char *CurPtr,
2429 bool &TokAtPhysicalStartOfLine) {
2439 unsigned char C = getCharAndSize(CurPtr, CharSize);
2441 if (C == 0 && CurPtr == BufferEnd+1) {
2443 Diag(BufferPtr, diag::err_unterminated_block_comment);
2449 FormTokenWithChars(Result, CurPtr, tok::unknown);
2465 if (CurPtr + 24 < BufferEnd &&
2470 while (C !=
'/' && ((
intptr_t)CurPtr & 0x0F) != 0)
2473 if (C ==
'/')
goto FoundSlash;
2477 while (CurPtr+16 <= BufferEnd) {
2484 CurPtr += llvm::countTrailingZeros<unsigned>(cmp) + 1;
2490 __vector
unsigned char Slashes = {
2491 '/',
'/',
'/',
'/',
'/',
'/',
'/',
'/',
2492 '/',
'/',
'/',
'/',
'/',
'/',
'/',
'/' 2494 while (CurPtr+16 <= BufferEnd &&
2495 !
vec_any_eq(*(
const vector
unsigned char*)CurPtr, Slashes))
2499 while (CurPtr[0] !=
'/' &&
2503 CurPtr+4 < BufferEnd) {
2513 while (C !=
'/' && C !=
'\0')
2518 if (CurPtr[-2] ==
'*')
2521 if ((CurPtr[-2] ==
'\n' || CurPtr[-2] ==
'\r')) {
2528 if (CurPtr[0] ==
'*' && CurPtr[1] !=
'/') {
2533 Diag(CurPtr-1, diag::warn_nested_block_comment);
2535 }
else if (C == 0 && CurPtr == BufferEnd+1) {
2537 Diag(BufferPtr, diag::err_unterminated_block_comment);
2546 FormTokenWithChars(Result, CurPtr, tok::unknown);
2552 }
else if (C ==
'\0' && isCodeCompletionPoint(CurPtr-1)) {
2571 FormTokenWithChars(Result, CurPtr, tok::comment);
2580 SkipWhitespace(Result, CurPtr+1, TokAtPhysicalStartOfLine);
2598 "Must be in a preprocessing directive!");
2602 const char *CurPtr = BufferPtr;
2604 char Char = getAndAdvanceChar(CurPtr, Tmp);
2608 Result->push_back(Char);
2612 if (CurPtr-1 != BufferEnd) {
2613 if (isCodeCompletionPoint(CurPtr-1)) {
2621 Result->push_back(Char);
2629 assert(CurPtr[-1] == Char &&
"Trigraphs for newline?");
2630 BufferPtr = CurPtr-1;
2634 if (Tmp.
is(tok::code_completion)) {
2639 assert(Tmp.
is(tok::eod) &&
"Unexpected token!");
2651 bool Lexer::LexEndOfFile(
Token &Result,
const char *CurPtr) {
2659 FormTokenWithChars(Result, CurPtr, tok::eod);
2671 BufferPtr = BufferEnd;
2672 FormTokenWithChars(Result, BufferEnd,
tok::eof);
2687 diag::err_pp_unterminated_conditional);
2693 if (CurPtr != BufferStart && (CurPtr[-1] !=
'\n' && CurPtr[-1] !=
'\r')) {
2698 if (LangOpts.CPlusPlus11) {
2702 if (!Diags.
isIgnored(diag::warn_cxx98_compat_no_newline_eof, EndLoc)) {
2703 DiagID = diag::warn_cxx98_compat_no_newline_eof;
2705 DiagID = diag::warn_no_newline_eof;
2708 DiagID = diag::ext_no_newline_eof;
2711 Diag(BufferEnd, DiagID)
2725 unsigned Lexer::isNextPPTokenLParen() {
2726 assert(!
LexingRawMode &&
"How can we expand a macro from a skipping buffer?");
2734 const char *TmpBufferPtr = BufferPtr;
2736 bool atStartOfLine = IsAtStartOfLine;
2737 bool atPhysicalStartOfLine = IsAtPhysicalStartOfLine;
2738 bool leadingSpace = HasLeadingSpace;
2744 BufferPtr = TmpBufferPtr;
2746 HasLeadingSpace = leadingSpace;
2747 IsAtStartOfLine = atStartOfLine;
2748 IsAtPhysicalStartOfLine = atPhysicalStartOfLine;
2755 return Tok.
is(tok::l_paren);
2761 const char *Terminator = CMK ==
CMK_Perforce ?
"<<<<\n" :
">>>>>>>";
2763 auto RestOfBuffer = StringRef(CurPtr, BufferEnd - CurPtr).substr(TermLen);
2764 size_t Pos = RestOfBuffer.find(Terminator);
2765 while (Pos != StringRef::npos) {
2768 (RestOfBuffer[Pos - 1] !=
'\r' && RestOfBuffer[Pos - 1] !=
'\n')) {
2769 RestOfBuffer = RestOfBuffer.substr(Pos+TermLen);
2770 Pos = RestOfBuffer.find(Terminator);
2773 return RestOfBuffer.data()+Pos;
2782 bool Lexer::IsStartOfConflictMarker(
const char *CurPtr) {
2784 if (CurPtr != BufferStart &&
2785 CurPtr[-1] !=
'\n' && CurPtr[-1] !=
'\r')
2789 if (!StringRef(CurPtr, BufferEnd - CurPtr).startswith(
"<<<<<<<") &&
2790 !StringRef(CurPtr, BufferEnd - CurPtr).startswith(
">>>> "))
2805 Diag(CurPtr, diag::err_conflict_marker);
2806 CurrentConflictMarkerState =
Kind;
2810 while (*CurPtr !=
'\r' && *CurPtr !=
'\n') {
2811 assert(CurPtr != BufferEnd &&
"Didn't find end of line");
2826 bool Lexer::HandleEndOfConflictMarker(
const char *CurPtr) {
2828 if (CurPtr != BufferStart &&
2829 CurPtr[-1] !=
'\n' && CurPtr[-1] !=
'\r')
2838 for (
unsigned i = 1; i != 4; ++i)
2839 if (CurPtr[i] != CurPtr[0])
2846 CurrentConflictMarkerState)) {
2850 while (CurPtr != BufferEnd && *CurPtr !=
'\r' && *CurPtr !=
'\n')
2856 CurrentConflictMarkerState =
CMK_None;
2864 const char *BufferEnd) {
2865 if (CurPtr == BufferEnd)
2868 for (; CurPtr != BufferEnd; ++CurPtr) {
2869 if (CurPtr[0] ==
'#' && CurPtr[1] ==
'>')
2875 bool Lexer::lexEditorPlaceholder(
Token &Result,
const char *CurPtr) {
2876 assert(CurPtr[-1] ==
'<' && CurPtr[0] ==
'#' &&
"Not a placeholder!");
2882 const char *Start = CurPtr - 1;
2883 if (!LangOpts.AllowEditorPlaceholders)
2884 Diag(Start, diag::err_placeholder_in_source);
2886 FormTokenWithChars(Result, End, tok::raw_identifier);
2894 bool Lexer::isCodeCompletionPoint(
const char *CurPtr)
const {
2903 uint32_t Lexer::tryReadUCN(
const char *&StartPtr,
const char *SlashLoc,
2906 char Kind = getCharAndSize(StartPtr, CharSize);
2908 unsigned NumHexDigits;
2911 else if (Kind ==
'U')
2916 if (!LangOpts.CPlusPlus && !LangOpts.C99) {
2918 Diag(SlashLoc, diag::warn_ucn_not_valid_in_c89);
2922 const char *CurPtr = StartPtr + CharSize;
2923 const char *KindLoc = &CurPtr[-1];
2925 uint32_t CodePoint = 0;
2926 for (
unsigned i = 0; i < NumHexDigits; ++i) {
2927 char C = getCharAndSize(CurPtr, CharSize);
2929 unsigned Value = llvm::hexDigitValue(C);
2933 Diag(BufferPtr, diag::warn_ucn_escape_no_digits)
2934 << StringRef(KindLoc, 1);
2936 Diag(BufferPtr, diag::warn_ucn_escape_incomplete);
2939 if (i == 4 && NumHexDigits == 8) {
2941 Diag(KindLoc, diag::note_ucn_four_not_eight)
2958 if (CurPtr - StartPtr == (
ptrdiff_t)NumHexDigits + 2)
2961 while (StartPtr != CurPtr)
2962 (void)getAndAdvanceChar(StartPtr, *Result);
2968 if (LangOpts.AsmPreprocessor)
2982 if (CodePoint < 0xA0) {
2983 if (CodePoint == 0x24 || CodePoint == 0x40 || CodePoint == 0x60)
2989 if (CodePoint < 0x20 || CodePoint >= 0x7F)
2990 Diag(BufferPtr, diag::err_ucn_control_character);
2992 char C =
static_cast<char>(CodePoint);
2993 Diag(BufferPtr, diag::err_ucn_escape_basic_scs) << StringRef(&C, 1);
2998 }
else if (CodePoint >= 0xD800 && CodePoint <= 0xDFFF) {
3003 if (LangOpts.CPlusPlus && !LangOpts.CPlusPlus11)
3004 Diag(BufferPtr, diag::warn_ucn_escape_surrogate);
3006 Diag(BufferPtr, diag::err_ucn_escape_invalid);
3014 bool Lexer::CheckUnicodeWhitespace(
Token &Result, uint32_t
C,
3015 const char *CurPtr) {
3016 static const llvm::sys::UnicodeCharSet UnicodeWhitespaceChars(
3019 UnicodeWhitespaceChars.contains(
C)) {
3020 Diag(BufferPtr, diag::ext_unicode_whitespace)
3029 bool Lexer::LexUnicode(
Token &Result, uint32_t
C,
const char *CurPtr) {
3039 return LexIdentifier(Result, CurPtr);
3054 Diag(BufferPtr, diag::err_non_ascii)
3064 FormTokenWithChars(Result, CurPtr, tok::unknown);
3068 void Lexer::PropagateLineStartLeadingSpaceInfo(
Token &Result) {
3075 bool Lexer::Lex(
Token &Result) {
3080 if (IsAtStartOfLine) {
3082 IsAtStartOfLine =
false;
3085 if (HasLeadingSpace) {
3087 HasLeadingSpace =
false;
3090 if (HasLeadingEmptyMacro) {
3092 HasLeadingEmptyMacro =
false;
3095 bool atPhysicalStartOfLine = IsAtPhysicalStartOfLine;
3096 IsAtPhysicalStartOfLine =
false;
3099 bool returnedToken = LexTokenInternal(Result, atPhysicalStartOfLine);
3101 assert((returnedToken || !isRawLex) &&
"Raw lex must succeed");
3102 return returnedToken;
3110 bool Lexer::LexTokenInternal(
Token &Result,
bool TokAtPhysicalStartOfLine) {
3117 const char *CurPtr = BufferPtr;
3120 if ((*CurPtr ==
' ') || (*CurPtr ==
'\t')) {
3122 while ((*CurPtr ==
' ') || (*CurPtr ==
'\t'))
3129 FormTokenWithChars(Result, CurPtr, tok::unknown);
3138 unsigned SizeTmp, SizeTmp2;
3141 char Char = getAndAdvanceChar(CurPtr, Result);
3147 if (CurPtr-1 == BufferEnd)
3148 return LexEndOfFile(Result, CurPtr-1);
3151 if (isCodeCompletionPoint(CurPtr-1)) {
3154 FormTokenWithChars(Result, CurPtr, tok::code_completion);
3159 Diag(CurPtr-1, diag::null_in_file);
3161 if (SkipWhitespace(Result, CurPtr, TokAtPhysicalStartOfLine))
3170 if (LangOpts.MicrosoftExt) {
3172 Diag(CurPtr-1, diag::ext_ctrl_z_eof_microsoft);
3173 return LexEndOfFile(Result, CurPtr-1);
3177 Kind = tok::unknown;
3181 if (CurPtr[0] ==
'\n')
3182 Char = getAndAdvanceChar(CurPtr, Result);
3196 IsAtStartOfLine =
true;
3197 IsAtPhysicalStartOfLine =
true;
3206 if (SkipWhitespace(Result, CurPtr, TokAtPhysicalStartOfLine))
3216 SkipHorizontalWhitespace:
3218 if (SkipWhitespace(Result, CurPtr, TokAtPhysicalStartOfLine))
3227 LangOpts.LineComment &&
3228 (LangOpts.CPlusPlus || !LangOpts.TraditionalCPP)) {
3229 if (SkipLineComment(Result, CurPtr+2, TokAtPhysicalStartOfLine))
3231 goto SkipIgnoredUnits;
3233 if (SkipBlockComment(Result, CurPtr+2, TokAtPhysicalStartOfLine))
3235 goto SkipIgnoredUnits;
3237 goto SkipHorizontalWhitespace;
3245 case '0':
case '1':
case '2':
case '3':
case '4':
3246 case '5':
case '6':
case '7':
case '8':
case '9':
3249 return LexNumericConstant(Result, CurPtr);
3255 if (LangOpts.CPlusPlus11 || LangOpts.C11) {
3256 Char = getCharAndSize(CurPtr, SizeTmp);
3260 return LexStringLiteral(Result, ConsumeChar(CurPtr, SizeTmp, Result),
3261 tok::utf16_string_literal);
3265 return LexCharConstant(Result, ConsumeChar(CurPtr, SizeTmp, Result),
3266 tok::utf16_char_constant);
3269 if (Char ==
'R' && LangOpts.CPlusPlus11 &&
3270 getCharAndSize(CurPtr + SizeTmp, SizeTmp2) ==
'"')
3271 return LexRawStringLiteral(Result,
3272 ConsumeChar(ConsumeChar(CurPtr, SizeTmp, Result),
3274 tok::utf16_string_literal);
3277 char Char2 = getCharAndSize(CurPtr + SizeTmp, SizeTmp2);
3281 return LexStringLiteral(Result,
3282 ConsumeChar(ConsumeChar(CurPtr, SizeTmp, Result),
3284 tok::utf8_string_literal);
3285 if (Char2 ==
'\'' && LangOpts.CPlusPlus17)
3286 return LexCharConstant(
3287 Result, ConsumeChar(ConsumeChar(CurPtr, SizeTmp, Result),
3289 tok::utf8_char_constant);
3291 if (Char2 ==
'R' && LangOpts.CPlusPlus11) {
3293 char Char3 = getCharAndSize(CurPtr + SizeTmp + SizeTmp2, SizeTmp3);
3296 return LexRawStringLiteral(Result,
3297 ConsumeChar(ConsumeChar(ConsumeChar(CurPtr, SizeTmp, Result),
3300 tok::utf8_string_literal);
3307 return LexIdentifier(Result, CurPtr);
3313 if (LangOpts.CPlusPlus11 || LangOpts.C11) {
3314 Char = getCharAndSize(CurPtr, SizeTmp);
3318 return LexStringLiteral(Result, ConsumeChar(CurPtr, SizeTmp, Result),
3319 tok::utf32_string_literal);
3323 return LexCharConstant(Result, ConsumeChar(CurPtr, SizeTmp, Result),
3324 tok::utf32_char_constant);
3327 if (Char ==
'R' && LangOpts.CPlusPlus11 &&
3328 getCharAndSize(CurPtr + SizeTmp, SizeTmp2) ==
'"')
3329 return LexRawStringLiteral(Result,
3330 ConsumeChar(ConsumeChar(CurPtr, SizeTmp, Result),
3332 tok::utf32_string_literal);
3336 return LexIdentifier(Result, CurPtr);
3342 if (LangOpts.CPlusPlus11) {
3343 Char = getCharAndSize(CurPtr, SizeTmp);
3346 return LexRawStringLiteral(Result,
3347 ConsumeChar(CurPtr, SizeTmp, Result),
3348 tok::string_literal);
3352 return LexIdentifier(Result, CurPtr);
3357 Char = getCharAndSize(CurPtr, SizeTmp);
3361 return LexStringLiteral(Result, ConsumeChar(CurPtr, SizeTmp, Result),
3362 tok::wide_string_literal);
3365 if (LangOpts.CPlusPlus11 && Char ==
'R' &&
3366 getCharAndSize(CurPtr + SizeTmp, SizeTmp2) ==
'"')
3367 return LexRawStringLiteral(Result,
3368 ConsumeChar(ConsumeChar(CurPtr, SizeTmp, Result),
3370 tok::wide_string_literal);
3374 return LexCharConstant(Result, ConsumeChar(CurPtr, SizeTmp, Result),
3375 tok::wide_char_constant);
3380 case 'A':
case 'B':
case 'C':
case 'D':
case 'E':
case 'F':
case 'G':
3381 case 'H':
case 'I':
case 'J':
case 'K':
case 'M':
case 'N':
3382 case 'O':
case 'P':
case 'Q':
case 'S':
case 'T':
3383 case 'V':
case 'W':
case 'X':
case 'Y':
case 'Z':
3384 case 'a':
case 'b':
case 'c':
case 'd':
case 'e':
case 'f':
case 'g':
3385 case 'h':
case 'i':
case 'j':
case 'k':
case 'l':
case 'm':
case 'n':
3386 case 'o':
case 'p':
case 'q':
case 'r':
case 's':
case 't':
3387 case 'v':
case 'w':
case 'x':
case 'y':
case 'z':
3391 return LexIdentifier(Result, CurPtr);
3394 if (LangOpts.DollarIdents) {
3396 Diag(CurPtr-1, diag::ext_dollar_in_identifier);
3399 return LexIdentifier(Result, CurPtr);
3402 Kind = tok::unknown;
3409 return LexCharConstant(Result, CurPtr, tok::char_constant);
3415 return LexStringLiteral(Result, CurPtr, tok::string_literal);
3419 Kind = tok::question;
3422 Kind = tok::l_square;
3425 Kind = tok::r_square;
3428 Kind = tok::l_paren;
3431 Kind = tok::r_paren;
3434 Kind = tok::l_brace;
3437 Kind = tok::r_brace;
3440 Char = getCharAndSize(CurPtr, SizeTmp);
3441 if (Char >=
'0' && Char <=
'9') {
3445 return LexNumericConstant(Result, ConsumeChar(CurPtr, SizeTmp, Result));
3446 }
else if (LangOpts.CPlusPlus && Char ==
'*') {
3447 Kind = tok::periodstar;
3449 }
else if (Char ==
'.' &&
3450 getCharAndSize(CurPtr+SizeTmp, SizeTmp2) ==
'.') {
3451 Kind = tok::ellipsis;
3452 CurPtr = ConsumeChar(ConsumeChar(CurPtr, SizeTmp, Result),
3459 Char = getCharAndSize(CurPtr, SizeTmp);
3462 CurPtr = ConsumeChar(CurPtr, SizeTmp, Result);
3463 }
else if (Char ==
'=') {
3464 Kind = tok::ampequal;
3465 CurPtr = ConsumeChar(CurPtr, SizeTmp, Result);
3471 if (getCharAndSize(CurPtr, SizeTmp) ==
'=') {
3472 Kind = tok::starequal;
3473 CurPtr = ConsumeChar(CurPtr, SizeTmp, Result);
3479 Char = getCharAndSize(CurPtr, SizeTmp);
3481 CurPtr = ConsumeChar(CurPtr, SizeTmp, Result);
3482 Kind = tok::plusplus;
3483 }
else if (Char ==
'=') {
3484 CurPtr = ConsumeChar(CurPtr, SizeTmp, Result);
3485 Kind = tok::plusequal;
3491 Char = getCharAndSize(CurPtr, SizeTmp);
3493 CurPtr = ConsumeChar(CurPtr, SizeTmp, Result);
3494 Kind = tok::minusminus;
3495 }
else if (Char ==
'>' && LangOpts.CPlusPlus &&
3496 getCharAndSize(CurPtr+SizeTmp, SizeTmp2) ==
'*') {
3497 CurPtr = ConsumeChar(ConsumeChar(CurPtr, SizeTmp, Result),
3499 Kind = tok::arrowstar;
3500 }
else if (Char ==
'>') {
3501 CurPtr = ConsumeChar(CurPtr, SizeTmp, Result);
3503 }
else if (Char ==
'=') {
3504 CurPtr = ConsumeChar(CurPtr, SizeTmp, Result);
3505 Kind = tok::minusequal;
3514 if (getCharAndSize(CurPtr, SizeTmp) ==
'=') {
3515 Kind = tok::exclaimequal;
3516 CurPtr = ConsumeChar(CurPtr, SizeTmp, Result);
3518 Kind = tok::exclaim;
3523 Char = getCharAndSize(CurPtr, SizeTmp);
3533 bool TreatAsComment = LangOpts.LineComment &&
3534 (LangOpts.CPlusPlus || !LangOpts.TraditionalCPP);
3535 if (!TreatAsComment)
3537 TreatAsComment = getCharAndSize(CurPtr+SizeTmp, SizeTmp2) !=
'*';
3539 if (TreatAsComment) {
3540 if (SkipLineComment(Result, ConsumeChar(CurPtr, SizeTmp, Result),
3541 TokAtPhysicalStartOfLine))
3547 goto SkipIgnoredUnits;
3552 if (SkipBlockComment(Result, ConsumeChar(CurPtr, SizeTmp, Result),
3553 TokAtPhysicalStartOfLine))
3562 CurPtr = ConsumeChar(CurPtr, SizeTmp, Result);
3563 Kind = tok::slashequal;
3569 Char = getCharAndSize(CurPtr, SizeTmp);
3571 Kind = tok::percentequal;
3572 CurPtr = ConsumeChar(CurPtr, SizeTmp, Result);
3573 }
else if (LangOpts.Digraphs && Char ==
'>') {
3574 Kind = tok::r_brace;
3575 CurPtr = ConsumeChar(CurPtr, SizeTmp, Result);
3576 }
else if (LangOpts.Digraphs && Char ==
':') {
3577 CurPtr = ConsumeChar(CurPtr, SizeTmp, Result);
3578 Char = getCharAndSize(CurPtr, SizeTmp);
3579 if (Char ==
'%' && getCharAndSize(CurPtr+SizeTmp, SizeTmp2) ==
':') {
3580 Kind = tok::hashhash;
3581 CurPtr = ConsumeChar(ConsumeChar(CurPtr, SizeTmp, Result),
3583 }
else if (Char ==
'@' && LangOpts.MicrosoftExt) {
3584 CurPtr = ConsumeChar(CurPtr, SizeTmp, Result);
3586 Diag(BufferPtr, diag::ext_charize_microsoft);
3593 if (TokAtPhysicalStartOfLine && !
LexingRawMode && !Is_PragmaLexer)
3594 goto HandleDirective;
3599 Kind = tok::percent;
3603 Char = getCharAndSize(CurPtr, SizeTmp);
3605 return LexAngledStringLiteral(Result, CurPtr);
3606 }
else if (Char ==
'<') {
3607 char After = getCharAndSize(CurPtr+SizeTmp, SizeTmp2);
3609 Kind = tok::lesslessequal;
3610 CurPtr = ConsumeChar(ConsumeChar(CurPtr, SizeTmp, Result),
3612 }
else if (After ==
'<' && IsStartOfConflictMarker(CurPtr-1)) {
3616 }
else if (After ==
'<' && HandleEndOfConflictMarker(CurPtr-1)) {
3620 }
else if (LangOpts.CUDA && After ==
'<') {
3621 Kind = tok::lesslessless;
3622 CurPtr = ConsumeChar(ConsumeChar(CurPtr, SizeTmp, Result),
3625 CurPtr = ConsumeChar(CurPtr, SizeTmp, Result);
3626 Kind = tok::lessless;
3628 }
else if (Char ==
'=') {
3629 char After = getCharAndSize(CurPtr+SizeTmp, SizeTmp2);
3633 Diag(BufferPtr, diag::warn_cxx17_compat_spaceship);
3634 CurPtr = ConsumeChar(ConsumeChar(CurPtr, SizeTmp, Result),
3636 Kind = tok::spaceship;
3642 Diag(BufferPtr, diag::warn_cxx2a_compat_spaceship)
3647 CurPtr = ConsumeChar(CurPtr, SizeTmp, Result);
3648 Kind = tok::lessequal;
3649 }
else if (LangOpts.Digraphs && Char ==
':') {
3650 if (LangOpts.CPlusPlus11 &&
3651 getCharAndSize(CurPtr + SizeTmp, SizeTmp2) ==
':') {
3658 char After = getCharAndSize(CurPtr + SizeTmp + SizeTmp2, SizeTmp3);
3659 if (After !=
':' && After !=
'>') {
3662 Diag(BufferPtr, diag::warn_cxx98_compat_less_colon_colon);
3667 CurPtr = ConsumeChar(CurPtr, SizeTmp, Result);
3668 Kind = tok::l_square;
3669 }
else if (LangOpts.Digraphs && Char ==
'%') {
3670 CurPtr = ConsumeChar(CurPtr, SizeTmp, Result);
3671 Kind = tok::l_brace;
3672 }
else if (Char ==
'#' && SizeTmp == 1 &&
3673 lexEditorPlaceholder(Result, CurPtr)) {
3680 Char = getCharAndSize(CurPtr, SizeTmp);
3682 CurPtr = ConsumeChar(CurPtr, SizeTmp, Result);
3683 Kind = tok::greaterequal;
3684 }
else if (Char ==
'>') {
3685 char After = getCharAndSize(CurPtr+SizeTmp, SizeTmp2);
3687 CurPtr = ConsumeChar(ConsumeChar(CurPtr, SizeTmp, Result),
3689 Kind = tok::greatergreaterequal;
3690 }
else if (After ==
'>' && IsStartOfConflictMarker(CurPtr-1)) {
3694 }
else if (After ==
'>' && HandleEndOfConflictMarker(CurPtr-1)) {
3697 }
else if (LangOpts.CUDA && After ==
'>') {
3698 Kind = tok::greatergreatergreater;
3699 CurPtr = ConsumeChar(ConsumeChar(CurPtr, SizeTmp, Result),
3702 CurPtr = ConsumeChar(CurPtr, SizeTmp, Result);
3703 Kind = tok::greatergreater;
3706 Kind = tok::greater;
3710 Char = getCharAndSize(CurPtr, SizeTmp);
3712 CurPtr = ConsumeChar(CurPtr, SizeTmp, Result);
3713 Kind = tok::caretequal;
3714 }
else if (LangOpts.OpenCL && Char ==
'^') {
3715 CurPtr = ConsumeChar(CurPtr, SizeTmp, Result);
3716 Kind = tok::caretcaret;
3722 Char = getCharAndSize(CurPtr, SizeTmp);
3724 Kind = tok::pipeequal;
3725 CurPtr = ConsumeChar(CurPtr, SizeTmp, Result);
3726 }
else if (Char ==
'|') {
3728 if (CurPtr[1] ==
'|' && HandleEndOfConflictMarker(CurPtr-1))
3730 Kind = tok::pipepipe;
3731 CurPtr = ConsumeChar(CurPtr, SizeTmp, Result);
3737 Char = getCharAndSize(CurPtr, SizeTmp);
3738 if (LangOpts.Digraphs && Char ==
'>') {
3739 Kind = tok::r_square;
3740 CurPtr = ConsumeChar(CurPtr, SizeTmp, Result);
3741 }
else if ((LangOpts.CPlusPlus ||
3742 LangOpts.DoubleSquareBracketAttributes) &&
3744 Kind = tok::coloncolon;
3745 CurPtr = ConsumeChar(CurPtr, SizeTmp, Result);
3754 Char = getCharAndSize(CurPtr, SizeTmp);
3757 if (CurPtr[1] ==
'=' && HandleEndOfConflictMarker(CurPtr-1))
3760 Kind = tok::equalequal;
3761 CurPtr = ConsumeChar(CurPtr, SizeTmp, Result);
3770 Char = getCharAndSize(CurPtr, SizeTmp);
3772 Kind = tok::hashhash;
3773 CurPtr = ConsumeChar(CurPtr, SizeTmp, Result);
3774 }
else if (Char ==
'@' && LangOpts.MicrosoftExt) {
3777 Diag(BufferPtr, diag::ext_charize_microsoft);
3778 CurPtr = ConsumeChar(CurPtr, SizeTmp, Result);
3784 if (TokAtPhysicalStartOfLine && !
LexingRawMode && !Is_PragmaLexer)
3785 goto HandleDirective;
3793 if (CurPtr[-1] ==
'@' && LangOpts.ObjC1)
3796 Kind = tok::unknown;
3801 if (!LangOpts.AsmPreprocessor) {
3802 if (uint32_t CodePoint = tryReadUCN(CurPtr, BufferPtr, &Result)) {
3803 if (CheckUnicodeWhitespace(Result, CodePoint, CurPtr)) {
3804 if (SkipWhitespace(Result, CurPtr, TokAtPhysicalStartOfLine))
3812 return LexUnicode(Result, CodePoint, CurPtr);
3816 Kind = tok::unknown;
3821 Kind = tok::unknown;
3825 llvm::UTF32 CodePoint;
3830 const char *UTF8StartPtr = CurPtr;
3831 llvm::ConversionResult Status =
3832 llvm::convertUTF8Sequence((
const llvm::UTF8 **)&CurPtr,
3833 (
const llvm::UTF8 *)BufferEnd,
3835 llvm::strictConversion);
3836 if (Status == llvm::conversionOK) {
3837 if (CheckUnicodeWhitespace(Result, CodePoint, CurPtr)) {
3838 if (SkipWhitespace(Result, CurPtr, TokAtPhysicalStartOfLine))
3848 return LexUnicode(Result, CodePoint, CurPtr);
3854 Kind = tok::unknown;
3861 Diag(CurPtr, diag::err_invalid_utf8);
3863 BufferPtr = CurPtr+1;
3875 FormTokenWithChars(Result, CurPtr, Kind);
3881 FormTokenWithChars(Result, CurPtr, tok::hash);
3886 assert(Result.
is(
tok::eof) &&
"Preprocessor did not set tok:eof");
SourceLocation getLocForStartOfFile(FileID FID) const
Return the source location corresponding to the first byte of the specified file. ...
Describes the bounds (start, size) of the preamble and a flag required by PreprocessorOptions::Precom...
static unsigned getSpelling(const Token &Tok, const char *&Buffer, const SourceManager &SourceMgr, const LangOptions &LangOpts, bool *Invalid=nullptr)
getSpelling - This method is used to get the spelling of a token into a preallocated buffer...
Lexer - This provides a simple interface that turns a text buffer into a stream of tokens...
SourceLocation getLocWithOffset(int Offset) const
Return a source location with the specified offset from this SourceLocation.
This is a discriminated union of FileInfo and ExpansionInfo.
unsigned getRawEncoding() const
When a SourceLocation itself cannot be used, this returns an (opaque) 32-bit integer encoding for it...
SourceLocation getSpellingLoc() const
void setFlagValue(TokenFlags Flag, bool Val)
Set a flag to either true or false.
static const llvm::sys::UnicodeCharRange C11AllowedIDCharRanges[]
void setBegin(SourceLocation b)
static __inline__ int __ATTRS_o_ai vec_any_eq(vector signed char __a, vector signed char __b)
bool is(tok::TokenKind K) const
is/isNot - Predicates to check if this token is a specific kind, as in "if (Tok.is(tok::l_brace)) {...
Defines the SourceManager interface.
LLVM_READNONE bool isASCII(char c)
Returns true if this is an ASCII character.
static bool isAllowedIDChar(uint32_t C, const LangOptions &LangOpts)
bool isInPrimaryFile() const
Return true if we're in the top-level file, not in a #include.
const char * getCharacterData(SourceLocation SL, bool *Invalid=nullptr) const
Return a pointer to the start of the specified location in the appropriate spelling MemoryBuffer...
Each ExpansionInfo encodes the expansion location - where the token was ultimately expanded...
DiagnosticBuilder Report(SourceLocation Loc, unsigned DiagID)
Issue the message to the client.
void setFlag(TokenFlags Flag)
Set the specified flag.
static char getCharAndSizeNoWarn(const char *Ptr, unsigned &Size, const LangOptions &LangOpts)
getCharAndSizeNoWarn - Like the getCharAndSize method, but does not ever emit a warning.
StringRef getBufferData(FileID FID, bool *Invalid=nullptr) const
Return a StringRef to the source buffer data for the specified FileID.
Lexer(FileID FID, const llvm::MemoryBuffer *InputBuffer, Preprocessor &PP)
Lexer constructor - Create a new lexer object for the specified buffer with the specified preprocesso...
bool hadModuleLoaderFatalFailure() const
static bool isAtStartOfMacroExpansion(SourceLocation loc, const SourceManager &SM, const LangOptions &LangOpts, SourceLocation *MacroBegin=nullptr)
Returns true if the given MacroID location points at the first token of the macro expansion...
LLVM_READONLY bool isHorizontalWhitespace(unsigned char c)
Returns true if this character is horizontal ASCII whitespace: ' ', '\t', '\f', '\v'.
bool isStringLiteral(TokenKind K)
Return true if this is a C or C++ string-literal (or C++11 user-defined-string-literal) token...
ConflictMarkerKind
ConflictMarkerKind - Kinds of conflict marker which the lexer might be recovering from...
static LLVM_ATTRIBUTE_NOINLINE SourceLocation GetMappedTokenLoc(Preprocessor &PP, SourceLocation FileLoc, unsigned CharNo, unsigned TokLen)
GetMappedTokenLoc - If lexing out of a 'mapped buffer', where we pretend the lexer buffer was all exp...
Like System, but searched after the system directories.
SourceLocation getCodeCompletionFileLoc() const
Returns the start location of the file of code-completion point.
static Lexer * Create_PragmaLexer(SourceLocation SpellingLoc, SourceLocation ExpansionLocStart, SourceLocation ExpansionLocEnd, unsigned TokLen, Preprocessor &PP)
Create_PragmaLexer: Lexer constructor - Create a new lexer object for _Pragma expansion.
Defines the MultipleIncludeOpt interface.
bool isAnnotation() const
Return true if this is any of tok::annot_* kind tokens.
tok::TokenKind getKind() const
bool isLiteral() const
Return true if this is a "literal", like a numeric constant, string, etc.
One of these records is kept for each identifier that is lexed.
static StringRef getIndentationForLine(SourceLocation Loc, const SourceManager &SM)
Returns the leading whitespace for line that corresponds to the given location Loc.
SourceLocation getBegin() const
bool ParsingPreprocessorDirective
True when parsing #XXX; turns '\n' into a tok::eod token.
void setRawIdentifierData(const char *Ptr)
static SourceLocation getFromRawEncoding(unsigned Encoding)
Turn a raw encoding of a SourceLocation object into a real SourceLocation.
SmallVector< PPConditionalInfo, 4 > ConditionalStack
Information about the set of #if/#ifdef/#ifndef blocks we are currently in.
Token - This structure provides full information about a lexed token.
void setKind(tok::TokenKind K)
Keeps track of the various options that can be enabled, which controls the dialect of C or C++ that i...
bool isPragmaLexer() const
isPragmaLexer - Returns true if this Lexer is being used to lex a pragma.
void resetExtendedTokenMode()
Sets the extended token mode back to its initial value, according to the language options and preproc...
A Perforce-style conflict marker, initiated by 4 ">"s, separated by 4 "="s, and terminated by 4 "<"s...
bool isObjCAtKeyword(tok::ObjCKeywordKind objcKey) const
Return true if we have an ObjC keyword identifier.
SourceLocation getSourceLocation() override
getSourceLocation - Return a source location for the next character in the current file...
bool isAtEndOfImmediateMacroExpansion(SourceLocation Loc, SourceLocation *MacroEnd=nullptr) const
Returns true if the given MacroID location points at the character end of the immediate macro expansi...
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_set1_epi8(char __b)
Initializes all values in a 128-bit vector of [16 x i8] with the specified 8-bit value.
static SourceLocation getBeginningOfFileToken(SourceLocation Loc, const SourceManager &SM, const LangOptions &LangOpts)
static bool isNewLineEscaped(const char *BufferStart, const char *Str)
Checks whether new line pointed by Str is preceded by escape sequence.
LLVM_READONLY bool isWhitespace(unsigned char c)
Return true if this character is horizontal or vertical ASCII whitespace: ' ', '\t', '\f', '\v', '\n', '\r'.
StringRef getSpelling(SourceLocation loc, SmallVectorImpl< char > &buffer, bool *invalid=nullptr) const
Return the 'spelling' of the token at the given location; does not go up to the spelling location or ...
Forward-declares and imports various common LLVM datatypes that clang wants to use unqualified...
SourceLocation getExpansionLoc(SourceLocation Loc) const
Given a SourceLocation object Loc, return the expansion location referenced by the ID...
static bool getRawToken(SourceLocation Loc, Token &Result, const SourceManager &SM, const LangOptions &LangOpts, bool IgnoreWhiteSpace=false)
Relex the token at the specified location.
void HandleDirective(Token &Result)
Callback invoked when the lexer sees a # token at the start of a line.
Concrete class used by the front-end to report problems and issues.
Defines the Diagnostic-related interfaces.
SourceLocation getSpellingLoc(SourceLocation Loc) const
Given a SourceLocation object, return the spelling location referenced by the ID. ...
__INTPTR_TYPE__ intptr_t
A signed integer type with the property that any valid pointer to void can be converted to this type...
const FileID FID
The SourceManager FileID corresponding to the file being lexed.
LLVM_READONLY bool isRawStringDelimBody(unsigned char c)
Return true if this is the body character of a C++ raw string delimiter.
static bool isEndOfBlockCommentWithEscapedNewLine(const char *CurPtr, Lexer *L)
isBlockCommentEndOfEscapedNewLine - Return true if the specified newline character (either \n or \r) ...
bool isMacroArgExpansion() const
static bool isValidUDSuffix(const LangOptions &LangOpts, StringRef Suffix)
Determine whether a suffix is a valid ud-suffix.
static CharSourceRange makeCharRange(Lexer &L, const char *Begin, const char *End)
A little helper class used to produce diagnostics.
bool ParsingFilename
True after #include; turns <xx> into a tok::angle_string_literal token.
const LangOptions & getLangOpts() const
getLangOpts - Return the language features currently enabled.
static const llvm::sys::UnicodeCharRange C11DisallowedInitialIDCharRanges[]
bool isInFileID(SourceLocation Loc, FileID FID, unsigned *RelativeOffset=nullptr) const
Given a specific FileID, returns true if Loc is inside that FileID chunk and sets relative offset (of...
LLVM_READONLY bool isIdentifierHead(unsigned char c, bool AllowDollar=false)
Returns true if this is a valid first character of a C identifier, which is [a-zA-Z_].
static StringRef getSourceText(CharSourceRange Range, const SourceManager &SM, const LangOptions &LangOpts, bool *Invalid=nullptr)
Returns a string for the source that the range encompasses.
IdentifierInfo * LookUpIdentifierInfo(Token &Identifier) const
Given a tok::raw_identifier token, look up the identifier information for the token and install it in...
static bool isAtEndOfMacroExpansion(SourceLocation loc, const SourceManager &SM, const LangOptions &LangOpts, SourceLocation *MacroEnd=nullptr)
Returns true if the given MacroID location points at the last token of the macro expansion.
bool isCodeCompletionEnabled() const
Determine if we are performing code completion.
SourceLocation getImmediateSpellingLoc(SourceLocation Loc) const
Given a SourceLocation object, return the spelling location referenced by the ID. ...
Defines the clang::LangOptions interface.
bool LexingRawMode
True if in raw mode.
static SourceLocation getLocForEndOfToken(SourceLocation Loc, unsigned Offset, const SourceManager &SM, const LangOptions &LangOpts)
Computes the source location just past the end of the token at this source location.
Represents a character-granular source range.
bool isKeepWhitespaceMode() const
isKeepWhitespaceMode - Return true if the lexer should return tokens for every character in the file...
static PreambleBounds ComputePreamble(StringRef Buffer, const LangOptions &LangOpts, unsigned MaxLines=0)
Compute the preamble of the given file.
static unsigned MeasureTokenLength(SourceLocation Loc, const SourceManager &SM, const LangOptions &LangOpts)
MeasureTokenLength - Relex the token at the specified location and return its length in bytes in the ...
const FileEntry * getFileEntryForID(FileID FID) const
Returns the FileEntry record for the provided FileID.
const AnnotatedLine * Line
static SourceLocation findLocationAfterToken(SourceLocation loc, tok::TokenKind TKind, const SourceManager &SM, const LangOptions &LangOpts, bool SkipTrailingWhitespaceAndNewLine)
Checks that the given token is the first token that occurs after the given location (this excludes co...
bool hasLeadingEmptyMacro() const
Return true if this token has an empty macro before it.
SourceLocation getSourceLocation(const char *Loc, unsigned TokLen=1) const
getSourceLocation - Return a source location identifier for the specified offset in the current file...
SourceLocation getLocation() const
Return a source location identifier for the specified offset in the current file. ...
Defines the clang::Preprocessor interface.
tok::ObjCKeywordKind getObjCKeywordID() const
Return the Objective-C keyword ID for the this identifier.
MultipleIncludeOpt MIOpt
A state machine that detects the #ifndef-wrapping a file idiom for the multiple-include optimization...
void setEnd(SourceLocation e)
bool getCommentRetentionState() const
Defines the clang::IdentifierInfo, clang::IdentifierTable, and clang::Selector interfaces.
CharSourceRange getImmediateExpansionRange(SourceLocation Loc) const
Return the start/end of the expansion information for an expansion location.
static const char * findBeginningOfLine(StringRef Buffer, unsigned Offset)
Returns the pointer that points to the beginning of line that contains the given offset, or null if the offset if invalid.
bool HandleEndOfFile(Token &Result, bool isEndOfMacro=false)
Callback invoked when the lexer hits the end of the current file.
The result type of a method or function.
float __ovld __cnfn length(float p)
Return the length of vector p, i.e., sqrt(p.x2 + p.y 2 + ...)
ObjCKeywordKind
Provides a namespace for Objective-C keywords which start with an '@'.
const ExpansionInfo & getExpansion() const
bool isRecordingPreamble() const
static CharSourceRange getCharRange(SourceRange R)
SourceManager & getSourceManager() const
bool isAtStartOfImmediateMacroExpansion(SourceLocation Loc, SourceLocation *MacroBegin=nullptr) const
Returns true if the given MacroID location points at the beginning of the immediate macro expansion...
llvm::MemoryBuffer * getBuffer(FileID FID, SourceLocation Loc, bool *Invalid=nullptr) const
Return the buffer for the specified FileID.
Encodes a location in the source.
static void maybeDiagnoseUTF8Homoglyph(DiagnosticsEngine &Diags, uint32_t C, CharSourceRange Range)
After encountering UTF-8 character C and interpreting it as an identifier character, check whether it's a homoglyph for a common non-identifier source character that is unlikely to be an intentional identifier character and warn if so.
SourceLocation createExpansionLoc(SourceLocation Loc, SourceLocation ExpansionLocStart, SourceLocation ExpansionLocEnd, unsigned TokLength, bool ExpansionIsTokenRange=true, int LoadedID=0, unsigned LoadedOffset=0)
Return a new SourceLocation that encodes the fact that a token from SpellingLoc should actually be re...
IdentifierInfo * getIdentifierInfo() const
static Optional< Token > findNextToken(SourceLocation Loc, const SourceManager &SM, const LangOptions &LangOpts)
Finds the token that comes right after the given location.
static void maybeDiagnoseIDCharCompat(DiagnosticsEngine &Diags, uint32_t C, CharSourceRange Range, bool IsFirst)
void setIdentifierInfo(IdentifierInfo *II)
static const llvm::sys::UnicodeCharRange C99DisallowedInitialIDCharRanges[]
bool isAtStartOfLine() const
isAtStartOfLine - Return true if this token is at the start of a line.
static SourceLocation GetBeginningOfToken(SourceLocation Loc, const SourceManager &SM, const LangOptions &LangOpts)
Given a location any where in a source buffer, find the location that corresponds to the beginning of...
static CharSourceRange makeRangeFromFileLocs(CharSourceRange Range, const SourceManager &SM, const LangOptions &LangOpts)
TokenKind
Provides a simple uniform namespace for tokens from all C languages.
bool operator<(DeclarationName LHS, DeclarationName RHS)
Ordering on two declaration names.
tok::ObjCKeywordKind getObjCKeywordID() const
Return the ObjC keyword kind.
SourceLocation getCodeCompletionLoc() const
Returns the location of the code-completion point.
static unsigned getTokenPrefixLength(SourceLocation TokStart, unsigned Characters, const SourceManager &SM, const LangOptions &LangOpts)
Get the physical length (including trigraphs and escaped newlines) of the first Characters characters...
SourceLocation getExpansionLocStart() const
DiagnosticBuilder Diag(const char *Loc, unsigned DiagID) const
Diag - Forwarding function for diagnostics.
__DEVICE__ void * memcpy(void *__a, const void *__b, size_t __c)
static const llvm::sys::UnicodeCharRange C99AllowedIDCharRanges[]
__PTRDIFF_TYPE__ ptrdiff_t
A signed integer type that is the result of subtracting two pointers.
static StringRef getImmediateMacroName(SourceLocation Loc, const SourceManager &SM, const LangOptions &LangOpts)
Retrieve the name of the immediate macro expansion.
static StringRef getImmediateMacroNameForDiagnostics(SourceLocation Loc, const SourceManager &SM, const LangOptions &LangOpts)
Retrieve the name of the immediate macro expansion.
bool inKeepCommentMode() const
inKeepCommentMode - Return true if the lexer should return comments as tokens.
bool isTokenRange() const
Return true if the end of this range specifies the start of the last token.
static const llvm::sys::UnicodeCharRange UnicodeWhitespaceCharRanges[]
An opaque identifier used by SourceManager which refers to a source file (MemoryBuffer) along with it...
Dataflow Directional Tag Classes.
bool isValid() const
Return true if this is a valid SourceLocation object.
LLVM_READONLY bool isVerticalWhitespace(unsigned char c)
Returns true if this character is vertical ASCII whitespace: '\n', '\r'.
static CharSourceRange makeFileCharRange(CharSourceRange Range, const SourceManager &SM, const LangOptions &LangOpts)
Accepts a range and returns a character range with file locations.
static size_t getSpellingSlow(const Token &Tok, const char *BufPtr, const LangOptions &LangOpts, char *Spelling)
Slow case of getSpelling.
static FixItHint CreateRemoval(CharSourceRange RemoveRange)
Create a code modification hint that removes the given source range.
bool isHandleIdentifierCase() const
Return true if the Preprocessor::HandleIdentifier must be called on a token of this identifier...
bool isLexingRawMode() const
Return true if this lexer is in raw mode or not.
LLVM_READONLY bool isIdentifierBody(unsigned char c, bool AllowDollar=false)
Returns true if this is a body character of a C identifier, which is [a-zA-Z0-9_].
void CodeCompleteNaturalLanguage()
Hook used by the lexer to invoke the "natural language" code completion point.
unsigned getLength() const
static const char * findPlaceholderEnd(const char *CurPtr, const char *BufferEnd)
void setLiteralData(const char *Ptr)
const char * getLiteralData() const
getLiteralData - For a literal token (numeric constant, string, etc), this returns a pointer to the s...
FileID getFileID(SourceLocation SpellingLoc) const
Return the FileID for a SourceLocation.
static const llvm::sys::UnicodeCharRange CXX03AllowedIDCharRanges[]
bool isMacroArgExpansion(SourceLocation Loc, SourceLocation *StartLoc=nullptr) const
Tests whether the given source location represents a macro argument's expansion into the function-lik...
bool HandleIdentifier(Token &Identifier)
Callback invoked when the lexer reads an identifier and has filled in the tokens IdentifierInfo membe...
void CreateString(StringRef Str, Token &Tok, SourceLocation ExpansionLocStart=SourceLocation(), SourceLocation ExpansionLocEnd=SourceLocation())
Plop the specified string into a scratch buffer and set the specified token's location and length to ...
static bool isAllowedInitiallyIDChar(uint32_t C, const LangOptions &LangOpts)
SourceLocation getEnd() const
static FixItHint CreateInsertion(SourceLocation InsertionLoc, StringRef Code, bool BeforePreviousInsertions=false)
Create a code modification hint that inserts the given code string at a specific location.
PreprocessorOptions & getPreprocessorOpts() const
Retrieve the preprocessor options used to initialize this preprocessor.
Defines the clang::TokenKind enum and support functions.
const SrcMgr::SLocEntry & getSLocEntry(FileID FID, bool *Invalid=nullptr) const
static char GetTrigraphCharForLetter(char Letter)
GetTrigraphCharForLetter - Given a character that occurs after a ?? pair, return the decoded trigraph...
static bool isIdentifierBodyChar(char c, const LangOptions &LangOpts)
Returns true if the given character could appear in an identifier.
bool HandleComment(Token &Token, SourceRange Comment)
void ReadToEndOfLine(SmallVectorImpl< char > *Result=nullptr)
ReadToEndOfLine - Read the rest of the current preprocessor line as an uninterpreted string...
Defines the clang::SourceLocation class and associated facilities.
DiagnosticsEngine & getDiagnostics() const
StringRef getRawIdentifier() const
getRawIdentifier - For a raw identifier token (i.e., an identifier lexed in raw mode), returns a reference to the text substring in the buffer if known.
Not within a conflict marker.
static char DecodeTrigraphChar(const char *CP, Lexer *L)
DecodeTrigraphChar - If the specified character is a legal trigraph when prefixed with ...
static const char * FindConflictEnd(const char *CurPtr, const char *BufferEnd, ConflictMarkerKind CMK)
Find the end of a version control conflict marker.
static void StringifyImpl(T &Str, char Quote)
static FixItHint CreateReplacement(CharSourceRange RemoveRange, StringRef Code)
Create a code modification hint that replaces the given source range with the given code string...
void SetCommentRetentionState(bool Mode)
SetCommentRetentionMode - Change the comment retention mode of the lexer to the specified mode...
bool needsCleaning() const
Return true if this token has trigraphs or escaped newlines in it.
static __inline__ int __DEFAULT_FN_ATTRS _mm_movemask_epi8(__m128i __a)
Copies the values of the most significant bits from each 8-bit element in a 128-bit integer vector of...
bool isIgnored(unsigned DiagID, SourceLocation Loc) const
Determine whether the diagnostic is known to be ignored.
A normal or diff3 conflict marker, initiated by at least 7 "<"s, separated by at least 7 "="s or "|"s...
A trivial tuple used to represent a source range.
void clearFlag(TokenFlags Flag)
Unset the specified flag.
bool hasUCN() const
Returns true if this token contains a universal character name.
bool isPreprocessedOutput() const
Returns true if the preprocessor is responsible for generating output, false if it is producing token...
void SetKeepWhitespaceMode(bool Val)
SetKeepWhitespaceMode - This method lets clients enable or disable whitespace retention mode...
LLVM_READONLY bool isPreprocessingNumberBody(unsigned char c)
Return true if this is the body character of a C preprocessing number, which is [a-zA-Z0-9_.
bool hasLeadingSpace() const
Return true if this token has whitespace before it.
DiagnosticBuilder Diag(SourceLocation Loc, unsigned DiagID) const
Forwarding function for diagnostics.
void setRecordedPreambleConditionalStack(ArrayRef< PPConditionalInfo > s)
This class handles loading and caching of source files into memory.
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_cmpeq_epi8(__m128i __a, __m128i __b)
Compares each of the corresponding 8-bit values of the 128-bit integer vectors for equality...
void startToken()
Reset all flags to cleared.
std::pair< FileID, unsigned > getDecomposedLoc(SourceLocation Loc) const
Decompose the specified location into a raw FileID + Offset pair.
static std::string Stringify(StringRef Str, bool Charify=false)
Stringify - Convert the specified string into a C string by i) escaping '\' and " characters and ii) ...
Engages in a tight little dance with the lexer to efficiently preprocess tokens.
bool LexEditorPlaceholders
When enabled, the preprocessor will construct editor placeholder tokens.