20 #include "llvm/ADT/ArrayRef.h" 21 #include "llvm/ADT/None.h" 22 #include "llvm/ADT/Optional.h" 23 #include "llvm/ADT/STLExtras.h" 24 #include "llvm/Support/Debug.h" 25 #include "llvm/Support/ErrorHandling.h" 26 #include "llvm/Support/FormatVariadic.h" 27 #include "llvm/Support/raw_ostream.h" 35 using namespace clang;
40 : Location(Location), Length(Length), Kind(Kind) {
53 return llvm::StringRef(Start,
length());
57 assert(
location().isFileID() &&
"must be a spelled token");
67 auto F = First.
range(SM);
68 auto L = Last.
range(SM);
69 assert(F.file() == L.file() &&
"tokens from different files");
70 assert(F.endOffset() <= L.beginOffset() &&
"wrong order of tokens");
71 return FileRange(F.file(), F.beginOffset(), L.endOffset());
79 : File(File),
Begin(BeginOffset),
End(EndOffset) {
81 assert(BeginOffset <= EndOffset);
107 return OS << llvm::formatv(
"FileRange(file = {0}, offsets = {1}-{2})",
113 bool Invalid =
false;
117 assert(Begin <= Text.size());
118 assert(End <= Text.size());
119 return Text.substr(Begin,
length());
122 std::pair<const syntax::Token *, const TokenBuffer::Mapping *>
123 TokenBuffer::spelledForExpandedToken(
const syntax::Token *Expanded)
const {
125 assert(ExpandedTokens.data() <= Expanded &&
126 Expanded < ExpandedTokens.data() + ExpandedTokens.size());
128 auto FileIt = Files.find(
129 SourceMgr->getFileID(SourceMgr->getExpansionLoc(Expanded->
location())));
130 assert(FileIt != Files.end() &&
"no file for an expanded token");
132 const MarkedFile &File = FileIt->second;
134 unsigned ExpandedIndex = Expanded - ExpandedTokens.data();
136 auto It = llvm::partition_point(File.Mappings, [&](
const Mapping &M) {
137 return M.BeginExpanded <= ExpandedIndex;
140 if (It == File.Mappings.begin()) {
142 return {&File.SpelledTokens[ExpandedIndex - File.BeginExpanded],
nullptr};
147 if (ExpandedIndex < It->EndExpanded)
148 return {&File.SpelledTokens[It->BeginSpelled], &*It};
153 &File.SpelledTokens[It->EndSpelled + (ExpandedIndex - It->EndExpanded)],
158 auto It = Files.find(FID);
159 assert(It != Files.end());
160 return It->second.SpelledTokens;
163 std::string TokenBuffer::Mapping::str()
const {
164 return llvm::formatv(
"spelled tokens: [{0},{1}), expanded tokens: [{2},{3})",
165 BeginSpelled, EndSpelled, BeginExpanded, EndExpanded);
172 if (Expanded.empty())
178 const Mapping *BeginMapping;
179 std::tie(BeginSpelled, BeginMapping) =
180 spelledForExpandedToken(&Expanded.front());
183 const Mapping *LastMapping;
184 std::tie(LastSpelled, LastMapping) =
185 spelledForExpandedToken(&Expanded.back());
189 if (FID != SourceMgr->getFileID(LastSpelled->location()))
192 const MarkedFile &File = Files.find(FID)->second;
195 unsigned BeginExpanded = Expanded.begin() - ExpandedTokens.data();
196 unsigned EndExpanded = Expanded.end() - ExpandedTokens.data();
197 if (BeginMapping && BeginMapping->BeginExpanded < BeginExpanded)
199 if (LastMapping && EndExpanded < LastMapping->EndExpanded)
202 return llvm::makeArrayRef(
203 BeginMapping ? File.SpelledTokens.data() + BeginMapping->BeginSpelled
205 LastMapping ? File.SpelledTokens.data() + LastMapping->EndSpelled
213 auto FileIt = Files.find(SourceMgr->getFileID(Spelled->
location()));
214 assert(FileIt != Files.end() &&
"file not tracked by token buffer");
216 auto &File = FileIt->second;
217 assert(File.SpelledTokens.data() <= Spelled &&
218 Spelled < (File.SpelledTokens.data() + File.SpelledTokens.size()));
220 unsigned SpelledIndex = Spelled - File.SpelledTokens.data();
221 auto M = llvm::partition_point(File.Mappings, [&](
const Mapping &M) {
222 return M.BeginSpelled < SpelledIndex;
224 if (M == File.Mappings.end() || M->BeginSpelled != SpelledIndex)
228 E.
Spelled = llvm::makeArrayRef(File.SpelledTokens.data() + M->BeginSpelled,
229 File.SpelledTokens.data() + M->EndSpelled);
230 E.
Expanded = llvm::makeArrayRef(ExpandedTokens.data() + M->BeginExpanded,
231 ExpandedTokens.data() + M->EndExpanded);
237 std::vector<syntax::Token> Tokens;
241 if (T.getKind() == tok::raw_identifier && !T.needsCleaning() &&
244 T.setIdentifierInfo(&II);
253 while (!L.LexFromRawLexer(T))
281 (LastExpansionEnd.isValid() &&
282 Collector->PP.getSourceManager().isBeforeInTranslationUnit(
283 Range.
getBegin(), LastExpansionEnd)))
286 LastExpansionEnd = Range.
getEnd();
313 DEBUG_WITH_TYPE(
"collect-tokens", llvm::dbgs()
324 auto CB = llvm::make_unique<CollectPPExpansions>(*this);
325 this->Collector = CB.get();
333 Builder(std::vector<syntax::Token> Expanded, PPExpansions CollectedExpansions,
335 : Result(SM), CollectedExpansions(
std::move(CollectedExpansions)), SM(SM),
337 Result.ExpandedTokens = std::move(Expanded);
341 buildSpelledTokens();
351 assert(!Result.ExpandedTokens.empty());
352 assert(Result.ExpandedTokens.back().kind() ==
tok::eof);
353 for (
unsigned I = 0; I < Result.ExpandedTokens.size() - 1; ++I) {
355 processExpandedToken(I);
360 SM.
getFileID(Result.ExpandedTokens.back().location()));
362 Result.ExpandedTokens.back().location(),
363 Result.ExpandedTokens.size() - 1);
364 Result.Files[
SM.
getMainFileID()].EndExpanded = Result.ExpandedTokens.size();
368 fillGapsAtEndOfFiles();
370 return std::move(Result);
377 void processExpandedToken(
unsigned &I) {
378 auto L = Result.ExpandedTokens[I].location();
385 TokenBuffer::MarkedFile &File = Result.Files[FID];
387 fillGapUntil(File, L, I);
390 assert(File.SpelledTokens[NextSpelled[FID]].location() == L &&
391 "no corresponding token in the spelled stream");
401 void processMacroExpansion(
CharSourceRange SpelledRange,
unsigned &I) {
404 TokenBuffer::MarkedFile &File = Result.Files[FID];
406 fillGapUntil(File, SpelledRange.
getBegin(), I);
409 unsigned BeginExpanded = I;
410 for (; I + 1 < Result.ExpandedTokens.size(); ++I) {
411 auto NextL = Result.ExpandedTokens[I + 1].location();
412 if (!NextL.isMacroID() ||
416 unsigned EndExpanded = I + 1;
418 EndExpanded, NextSpelled[FID]);
423 void buildSpelledTokens() {
424 for (
unsigned I = 0; I < Result.ExpandedTokens.size(); ++I) {
427 auto It = Result.Files.try_emplace(FID);
428 TokenBuffer::MarkedFile &File = It.first->second;
430 File.EndExpanded = I + 1;
435 File.BeginExpanded = I;
436 File.SpelledTokens =
tokenize(FID,
SM, LangOpts);
440 void consumeEmptyMapping(TokenBuffer::MarkedFile &File,
unsigned EndOffset,
441 unsigned ExpandedIndex,
unsigned &SpelledIndex) {
442 consumeMapping(File, EndOffset, ExpandedIndex, ExpandedIndex, SpelledIndex);
448 void consumeMapping(TokenBuffer::MarkedFile &File,
unsigned EndOffset,
449 unsigned BeginExpanded,
unsigned EndExpanded,
450 unsigned &SpelledIndex) {
452 unsigned MappingBegin = SpelledIndex;
456 tryConsumeSpelledUntil(File, EndOffset + 1, SpelledIndex).hasValue();
458 assert(!HitMapping &&
"recursive macro expansion?");
460 TokenBuffer::Mapping M;
461 M.BeginExpanded = BeginExpanded;
462 M.EndExpanded = EndExpanded;
463 M.BeginSpelled = MappingBegin;
464 M.EndSpelled = SpelledIndex;
466 File.Mappings.push_back(M);
472 void fillGapUntil(TokenBuffer::MarkedFile &File,
SourceLocation L,
473 unsigned ExpandedIndex) {
479 unsigned &SpelledIndex = NextSpelled[FID];
480 unsigned MappingBegin = SpelledIndex;
482 auto EndLoc = tryConsumeSpelledUntil(File, Offset, SpelledIndex);
483 if (SpelledIndex != MappingBegin) {
484 TokenBuffer::Mapping M;
485 M.BeginSpelled = MappingBegin;
486 M.EndSpelled = SpelledIndex;
487 M.BeginExpanded = M.EndExpanded = ExpandedIndex;
488 File.Mappings.push_back(M);
495 MappingBegin = SpelledIndex;
506 tryConsumeSpelledUntil(TokenBuffer::MarkedFile &File,
unsigned Offset,
507 unsigned &NextSpelled) {
508 for (; NextSpelled < File.SpelledTokens.size(); ++NextSpelled) {
509 auto L = File.SpelledTokens[NextSpelled].location();
513 if (Mapping != CollectedExpansions.end())
514 return Mapping->second;
520 void fillGapsAtEndOfFiles() {
521 for (
auto &F : Result.Files) {
522 if (F.second.SpelledTokens.empty())
524 fillGapUntil(F.second, F.second.SpelledTokens.back().endLocation(),
525 F.second.EndExpanded);
531 llvm::DenseMap<FileID, unsigned> NextSpelled;
532 PPExpansions CollectedExpansions;
538 PP.setTokenWatcher(
nullptr);
539 Collector->disable();
540 return Builder(std::move(Expanded), std::move(Expansions),
541 PP.getSourceManager(), PP.getLangOpts())
555 auto PrintToken = [
this](
const syntax::Token &T) -> std::string {
558 return T.text(*SourceMgr);
561 auto DumpTokens = [
this, &PrintToken](llvm::raw_ostream &OS,
563 if (Tokens.empty()) {
567 OS << Tokens[0].text(*SourceMgr);
568 for (
unsigned I = 1; I < Tokens.size(); ++I) {
571 OS <<
" " << PrintToken(Tokens[I]);
576 llvm::raw_string_ostream OS(Dump);
578 OS <<
"expanded tokens:\n" 581 DumpTokens(OS, llvm::makeArrayRef(ExpandedTokens).drop_back());
584 std::vector<FileID> Keys;
586 Keys.push_back(F.first);
590 const MarkedFile &File = Files.find(
ID)->second;
591 auto *Entry = SourceMgr->getFileEntryForID(
ID);
594 OS << llvm::formatv(
"file '{0}'\n", Entry->getName())
595 <<
" spelled tokens:\n" 600 if (File.Mappings.empty()) {
601 OS <<
" no mappings.\n";
604 OS <<
" mappings:\n";
605 for (
auto &M : File.Mappings) {
607 " ['{0}'_{1}, '{2}'_{3}) => ['{4}'_{5}, '{6}'_{7})\n",
608 PrintToken(File.SpelledTokens[M.BeginSpelled]), M.BeginSpelled,
609 M.EndSpelled == File.SpelledTokens.size()
611 : PrintToken(File.SpelledTokens[M.EndSpelled]),
612 M.EndSpelled, PrintToken(ExpandedTokens[M.BeginExpanded]),
613 M.BeginExpanded, PrintToken(ExpandedTokens[M.EndExpanded]),
llvm::StringRef text(const SourceManager &SM) const
Gets the substring that this FileRange refers to.
Lexer - This provides a simple interface that turns a text buffer into a stream of tokens...
unsigned getRawEncoding() const
When a SourceLocation itself cannot be used, this returns an (opaque) 32-bit integer encoding for it...
Defines the SourceManager interface.
llvm::raw_ostream & operator<<(llvm::raw_ostream &OS, NodeKind K)
For debugging purposes.
const char * getCharacterData(SourceLocation SL, bool *Invalid=nullptr) const
Return a pointer to the start of the specified location in the appropriate spelling MemoryBuffer...
A description of the current definition of a macro.
std::string str() const
For debugging purposes.
StringRef getBufferData(FileID FID, bool *Invalid=nullptr) const
Return a StringRef to the source buffer data for the specified FileID.
A token coming directly from a file or from a macro invocation.
FileRange(FileID File, unsigned BeginOffset, unsigned EndOffset)
EXPECTS: File.isValid() && Begin <= End.
llvm::Optional< llvm::ArrayRef< syntax::Token > > spelledForExpanded(llvm::ArrayRef< syntax::Token > Expanded) const
Find the subrange of spelled tokens that produced the corresponding Expanded tokens.
This interface provides a way to observe the actions of the preprocessor as it does its thing...
bool isAnnotation() const
Return true if this is any of tok::annot_* kind tokens.
Builder(std::vector< syntax::Token > Expanded, PPExpansions CollectedExpansions, const SourceManager &SM, const LangOptions &LangOpts)
One of these records is kept for each identifier that is lexed.
SourceLocation getBegin() const
unsigned getHashValue() const
Records information reqired to construct mappings for the token buffer that we are collecting...
Token - This structure provides full information about a lexed token.
Keeps track of the various options that can be enabled, which controls the dialect of C or C++ that i...
llvm::ArrayRef< syntax::Token > Spelled
void disable()
Disabled instance will stop reporting anything to TokenCollector.
llvm::Optional< Expansion > expansionStartingAt(const syntax::Token *Spelled) const
If Spelled starts a mapping (e.g.
tok::TokenKind getTokenID() const
If this is a source-language token (e.g.
Forward-declares and imports various common LLVM datatypes that clang wants to use unqualified...
SourceLocation getExpansionLoc(SourceLocation Loc) const
Given a SourceLocation object Loc, return the expansion location referenced by the ID...
SourceLocation location() const
Location of the first character of a token.
Defines the Diagnostic-related interfaces.
void setTokenWatcher(llvm::unique_function< void(const clang::Token &)> F)
Register a function that would be called on each token in the final expanded token stream...
LLVM_NODISCARD TokenBuffer consume() &&
Finalizes token collection.
Dump out preprocessed tokens.
Defines the clang::LangOptions interface.
llvm::StringRef text(const SourceManager &SM) const
Get the substring covered by the token.
Represents a character-granular source range.
Implements an efficient mapping from strings to IdentifierInfo nodes.
MacroArgs - An instance of this class captures information about the formal arguments specified to a ...
SourceLocation getLocation() const
Return a source location identifier for the specified offset in the current file. ...
Defines the clang::Preprocessor interface.
Defines the clang::IdentifierInfo, clang::IdentifierTable, and clang::Selector interfaces.
SourceLocation getEnd() const
float __ovld __cnfn length(float p)
Return the length of vector p, i.e., sqrt(p.x2 + p.y 2 + ...)
SourceManager & getSourceManager() const
TokenCollector(Preprocessor &P)
Adds the hooks to collect the tokens.
unsigned getFileOffset(SourceLocation SpellingLoc) const
Returns the offset from the start of the file that the specified SourceLocation represents.
Encodes a location in the source.
IdentifierInfo & get(StringRef Name)
Return the identifier token info for the specified named identifier.
std::string dumpForTests(const SourceManager &SM) const
TokenKind
Provides a simple uniform namespace for tokens from all C languages.
An opaque identifier used by SourceManager which refers to a source file (MemoryBuffer) along with it...
const llvm::MemoryBuffer * getBuffer(FileID FID, SourceLocation Loc, bool *Invalid=nullptr) const
Return the buffer for the specified FileID.
unsigned beginOffset() const
Start is a start offset (inclusive) in the corresponding file.
Dataflow Directional Tag Classes.
bool isValid() const
Return true if this is a valid SourceLocation object.
A half-open character range inside a particular file, the start offset is included and the end offset...
FileID getMainFileID() const
Returns the FileID of the main source file.
FileID getFileID(SourceLocation SpellingLoc) const
Return the FileID for a SourceLocation.
CollectPPExpansions(TokenCollector &C)
Collects tokens for the main file while running the frontend action.
std::vector< syntax::Token > tokenize(FileID FID, const SourceManager &SM, const LangOptions &LO)
Lex the text buffer, corresponding to FID, in raw mode and record the resulting spelled tokens...
SourceLocation getEnd() const
CharSourceRange getExpansionRange(SourceLocation Loc) const
Given a SourceLocation object, return the range of tokens covered by the expansion in the ultimate fi...
Defines the PPCallbacks interface.
Defines the clang::TokenKind enum and support functions.
Defines the clang::SourceLocation class and associated facilities.
Builds mappings and spelled tokens in the TokenBuffer based on the expanded token stream...
An expansion produced by the preprocessor, includes macro expansions and preprocessor directives...
Token(SourceLocation Location, unsigned Length, tok::TokenKind Kind)
unsigned kind
All of the diagnostics that can be emitted by the frontend.
const char * getTokenName(TokenKind Kind) LLVM_READNONE
Determines the name of a token as used within the front end.
static Decl::Kind getKind(const Decl *D)
std::string dumpForTests() const
llvm::ArrayRef< syntax::Token > spelledTokens(FileID FID) const
Lexed tokens of a file before preprocessing.
A list of tokens obtained by preprocessing a text buffer and operations to map between the expanded a...
FileRange range(const SourceManager &SM) const
Gets a range of this token.
A trivial tuple used to represent a source range.
llvm::ArrayRef< syntax::Token > Expanded
unsigned endOffset() const
End offset (exclusive) in the corresponding file.
SourceLocation getBegin() const
void addPPCallbacks(std::unique_ptr< PPCallbacks > C)
This class handles loading and caching of source files into memory.
std::pair< FileID, unsigned > getDecomposedLoc(SourceLocation Loc) const
Decompose the specified location into a raw FileID + Offset pair.
Engages in a tight little dance with the lexer to efficiently preprocess tokens.
void MacroExpands(const clang::Token &MacroNameTok, const MacroDefinition &MD, SourceRange Range, const MacroArgs *Args) override
Called by Preprocessor::HandleMacroExpandedIdentifier when a macro invocation is found.