14 #ifndef LLVM_CLANG_LEX_LEXER_H 15 #define LLVM_CLANG_LEX_LEXER_H 22 #include "llvm/ADT/Optional.h" 23 #include "llvm/ADT/SmallVector.h" 24 #include "llvm/ADT/StringRef.h" 37 class DiagnosticBuilder;
70 : Size(Size), PreambleEndsAtStartOfLine(PreambleEndsAtStartOfLine) {}
80 void anchor()
override;
86 const char *BufferStart;
89 const char *BufferEnd;
111 unsigned char ExtendedTokenMode;
120 const char *BufferPtr;
124 bool IsAtStartOfLine;
126 bool IsAtPhysicalStartOfLine;
128 bool HasLeadingSpace;
130 bool HasLeadingEmptyMacro;
135 void InitLexer(
const char *BufStart,
const char *BufPtr,
const char *BufEnd);
148 const char *BufStart,
const char *BufPtr,
const char *BufEnd);
153 Lexer(
FileID FID,
const llvm::MemoryBuffer *FromFile,
180 bool Lex(
Token &Result);
189 void IndirectLex(
Token &Result)
override { Lex(Result); }
196 assert(LexingRawMode &&
"Not already in raw mode!");
200 return BufferPtr == BufferEnd;
208 return ExtendedTokenMode > 1;
214 assert((!Val || LexingRawMode || LangOpts.TraditionalCPP) &&
215 "Can only retain whitespace in raw mode or -traditional-cpp");
216 ExtendedTokenMode = Val ? 2 : 0;
222 return ExtendedTokenMode > 0;
229 assert(!isKeepWhitespaceMode() &&
230 "Can't play with comment retention state when retaining whitespace");
231 ExtendedTokenMode = Mode ? 1 : 0;
240 void resetExtendedTokenMode();
244 return StringRef(BufferStart, BufferEnd - BufferStart);
258 SourceLocation getSourceLocation(
const char *Loc,
unsigned TokLen = 1)
const;
263 return getSourceLocation(BufferPtr);
272 static std::string Stringify(StringRef Str,
bool Charify =
false);
288 static unsigned getSpelling(
const Token &
Tok,
const char *&Buffer,
291 bool *Invalid =
nullptr);
298 static std::string getSpelling(
const Token &Tok,
301 bool *Invalid =
nullptr);
315 bool *invalid =
nullptr);
330 bool IgnoreWhiteSpace =
false);
354 getTokenPrefixLength(TokStart, Characters, SM, LangOpts));
385 : CharSourceRange::getCharRange(
450 bool *Invalid =
nullptr);
480 static StringRef getImmediateMacroNameForDiagnostics(
500 unsigned MaxLines = 0);
518 bool SkipTrailingWhitespaceAndNewLine);
521 static bool isIdentifierBodyChar(
char c,
const LangOptions &LangOpts);
525 static bool isNewLineEscaped(
const char *BufferStart,
const char *Str);
533 if (isObviouslySimpleCharacter(Ptr[0])) {
539 return getCharAndSizeSlowNoWarn(Ptr, Size, LangOpts);
554 bool LexTokenInternal(
Token &Result,
bool TokAtPhysicalStartOfLine);
556 bool CheckUnicodeWhitespace(
Token &Result, uint32_t
C,
const char *CurPtr);
561 bool LexUnicode(
Token &Result, uint32_t C,
const char *CurPtr);
568 void FormTokenWithChars(
Token &Result,
const char *TokEnd,
570 unsigned TokLen = TokEnd-BufferPtr;
572 Result.
setLocation(getSourceLocation(BufferPtr, TokLen));
580 unsigned isNextPPTokenLParen();
604 static bool isObviouslySimpleCharacter(
char C) {
605 return C !=
'?' && C !=
'\\';
612 inline char getAndAdvanceChar(
const char *&Ptr,
Token &Tok) {
615 if (isObviouslySimpleCharacter(Ptr[0]))
return *Ptr++;
618 char C = getCharAndSizeSlow(Ptr, Size, &Tok);
627 const char *ConsumeChar(
const char *Ptr,
unsigned Size,
Token &Tok) {
635 getCharAndSizeSlow(Ptr, Size, &Tok);
643 inline char getCharAndSize(
const char *Ptr,
unsigned &Size) {
646 if (isObviouslySimpleCharacter(Ptr[0])) {
652 return getCharAndSizeSlow(Ptr, Size);
657 char getCharAndSizeSlow(
const char *Ptr,
unsigned &Size,
658 Token *Tok =
nullptr);
663 static unsigned getEscapedNewLineSize(
const char *
P);
668 static const char *SkipEscapedNewLines(
const char *P);
672 static char getCharAndSizeSlowNoWarn(
const char *Ptr,
unsigned &Size,
678 void SetByteOffset(
unsigned Offset,
bool StartOfLine);
680 void PropagateLineStartLeadingSpaceInfo(
Token &Result);
682 const char *LexUDSuffix(
Token &Result,
const char *CurPtr,
683 bool IsStringLiteral);
686 bool LexIdentifier (
Token &Result,
const char *CurPtr);
687 bool LexNumericConstant (
Token &Result,
const char *CurPtr);
688 bool LexStringLiteral (
Token &Result,
const char *CurPtr,
690 bool LexRawStringLiteral (
Token &Result,
const char *CurPtr,
692 bool LexAngledStringLiteral(
Token &Result,
const char *CurPtr);
693 bool LexCharConstant (
Token &Result,
const char *CurPtr,
695 bool LexEndOfFile (
Token &Result,
const char *CurPtr);
696 bool SkipWhitespace (
Token &Result,
const char *CurPtr,
697 bool &TokAtPhysicalStartOfLine);
698 bool SkipLineComment (
Token &Result,
const char *CurPtr,
699 bool &TokAtPhysicalStartOfLine);
700 bool SkipBlockComment (
Token &Result,
const char *CurPtr,
701 bool &TokAtPhysicalStartOfLine);
702 bool SaveLineComment (
Token &Result,
const char *CurPtr);
704 bool IsStartOfConflictMarker(
const char *CurPtr);
705 bool HandleEndOfConflictMarker(
const char *CurPtr);
707 bool lexEditorPlaceholder(
Token &Result,
const char *CurPtr);
709 bool isCodeCompletionPoint(
const char *CurPtr)
const;
710 void cutOffLexing() { BufferPtr = BufferEnd; }
712 bool isHexaLiteral(
const char *Start,
const LangOptions &LangOpts);
714 void codeCompleteIncludedFile(
const char *PathStart,
715 const char *CompletionPoint,
bool IsAngled);
729 uint32_t tryReadUCN(
const char *&StartPtr,
const char *SlashLoc,
Token *Result);
742 bool tryConsumeIdentifierUCN(
const char *&CurPtr,
unsigned Size,
750 bool tryConsumeIdentifierUTF8Char(
const char *&CurPtr);
755 #endif // LLVM_CLANG_LEX_LEXER_H Describes the bounds (start, size) of the preamble and a flag required by PreprocessorOptions::Precom...
static DiagnosticBuilder Diag(DiagnosticsEngine *Diags, const LangOptions &Features, FullSourceLoc TokLoc, const char *TokBegin, const char *TokRangeBegin, const char *TokRangeEnd, unsigned DiagID)
Produce a diagnostic highlighting some portion of a literal.
Lexer - This provides a simple interface that turns a text buffer into a stream of tokens...
SourceLocation getLocWithOffset(int Offset) const
Return a source location with the specified offset from this SourceLocation.
const char * getBufferLocation() const
Return the current location in the buffer.
DominatorTree GraphTraits specialization so the DominatorTree can be iterable by generic graph iterat...
bool LexFromRawLexer(Token &Result)
LexFromRawLexer - Lex a token from a designated raw lexer (one with no associated preprocessor object...
static char getCharAndSizeNoWarn(const char *Ptr, unsigned &Size, const LangOptions &LangOpts)
getCharAndSizeNoWarn - Like the getCharAndSize method, but does not ever emit a warning.
ConflictMarkerKind
ConflictMarkerKind - Kinds of conflict marker which the lexer might be recovering from...
static CharSourceRange getAsCharRange(CharSourceRange Range, const SourceManager &SM, const LangOptions &LangOpts)
Token - This structure provides full information about a lexed token.
void setKind(tok::TokenKind K)
SourceLocation getFileLoc() const
getFileLoc - Return the File Location for the file we are lexing out of.
Keeps track of the various options that can be enabled, which controls the dialect of C or C++ that i...
bool isPragmaLexer() const
isPragmaLexer - Returns true if this Lexer is being used to lex a pragma.
A Perforce-style conflict marker, initiated by 4 ">"s, separated by 4 "="s, and terminated by 4 "<"s...
SourceLocation getSourceLocation() override
getSourceLocation - Return a source location for the next character in the current file...
StringRef getBuffer() const
Gets source code buffer.
static SourceLocation AdvanceToTokenCharacter(SourceLocation TokStart, unsigned Characters, const SourceManager &SM, const LangOptions &LangOpts)
AdvanceToTokenCharacter - If the current SourceLocation specifies a location at the start of a token...
bool PreambleEndsAtStartOfLine
Whether the preamble ends at the start of a new line.
A little helper class used to produce diagnostics.
const LangOptions & getLangOpts() const
getLangOpts - Return the language features currently enabled.
Defines the clang::LangOptions interface.
Represents a character-granular source range.
bool isKeepWhitespaceMode() const
isKeepWhitespaceMode - Return true if the lexer should return tokens for every character in the file...
SourceLocation getEnd() const
Encodes a location in the source.
void setLength(unsigned Len)
PreambleBounds(unsigned Size, bool PreambleEndsAtStartOfLine)
TokenKind
Provides a simple uniform namespace for tokens from all C languages.
bool inKeepCommentMode() const
inKeepCommentMode - Return true if the lexer should return comments as tokens.
bool isTokenRange() const
Return true if the end of this range specifies the start of the last token.
An opaque identifier used by SourceManager which refers to a source file (MemoryBuffer) along with it...
SourceRange getAsRange() const
Dataflow Directional Tag Classes.
static CharSourceRange getAsCharRange(SourceRange Range, const SourceManager &SM, const LangOptions &LangOpts)
Given a token range, produce a corresponding CharSourceRange that is not a token range.
unsigned Size
Size of the preamble in bytes.
Defines the clang::TokenKind enum and support functions.
Defines the clang::SourceLocation class and associated facilities.
Not within a conflict marker.
void SetCommentRetentionState(bool Mode)
SetCommentRetentionMode - Change the comment retention mode of the lexer to the specified mode...
void setLocation(SourceLocation L)
A normal or diff3 conflict marker, initiated by at least 7 "<"s, separated by at least 7 "="s or "|"s...
A trivial tuple used to represent a source range.
Defines the PreprocessorLexer interface.
void SetKeepWhitespaceMode(bool Val)
SetKeepWhitespaceMode - This method lets clients enable or disable whitespace retention mode...
SourceLocation getBegin() const
This class handles loading and caching of source files into memory.
Engages in a tight little dance with the lexer to efficiently preprocess tokens.