21 #include "llvm/ADT/StringSwitch.h" 22 #include "llvm/Support/MemoryBuffer.h" 25 using namespace clang;
39 : Out(Out), Tokens(Tokens), Input(Input), Diags(Diags),
40 InputSourceLoc(InputSourceLoc) {}
56 LLVM_NODISCARD IdInfo lexIdentifier(
const char *First,
const char *
const End);
57 LLVM_NODISCARD
bool isNextIdentifier(StringRef
Id,
const char *&First,
58 const char *
const End);
59 LLVM_NODISCARD
bool minimizeImpl(
const char *First,
const char *
const End);
60 LLVM_NODISCARD
bool lexPPLine(
const char *&First,
const char *
const End);
61 LLVM_NODISCARD
bool lexAt(
const char *&First,
const char *
const End);
62 LLVM_NODISCARD
bool lexDefine(
const char *&First,
const char *
const End);
63 LLVM_NODISCARD
bool lexPragma(
const char *&First,
const char *
const End);
64 LLVM_NODISCARD
bool lexEndif(
const char *&First,
const char *
const End);
66 const char *&First,
const char *
const End);
68 Tokens.emplace_back(K, Out.size());
72 Out.resize(Tokens.back().Offset);
77 Minimizer &put(
char Byte) {
81 Minimizer &append(StringRef S) {
return append(S.begin(), S.end()); }
82 Minimizer &append(
const char *First,
const char *Last) {
83 Out.append(First, Last);
87 void printToNewline(
const char *&First,
const char *
const End);
88 void printAdjacentModuleNameParts(
const char *&First,
const char *
const End);
89 LLVM_NODISCARD
bool printAtImportBody(
const char *&First,
90 const char *
const End);
91 void printDirectiveBody(
const char *&First,
const char *
const End);
92 void printAdjacentMacroArgs(
const char *&First,
const char *
const End);
93 LLVM_NODISCARD
bool printMacroArgs(
const char *&First,
const char *
const End);
97 bool reportError(
const char *CurPtr,
unsigned Err);
99 StringMap<char> SplitIds;
107 bool Minimizer::reportError(
const char *CurPtr,
unsigned Err) {
110 assert(CurPtr >= Input.data() &&
"invalid buffer ptr");
111 Diags->Report(InputSourceLoc.getLocWithOffset(CurPtr - Input.data()), Err);
121 const char *Current) {
122 assert(First <= Current);
125 if (*Current !=
'"' || First == Current)
136 if (*Current ==
'u' || *Current ==
'U' || *Current ==
'L')
140 if (*Current !=
'8' || First == Current || *Current-- !=
'u')
146 assert(First[0] ==
'"');
147 assert(First[-1] ==
'R');
149 const char *
Last = ++First;
150 while (Last != End && *Last !=
'(')
157 StringRef Terminator(First, Last - First);
161 while (First != End && *First !=
')')
169 while (Last != End &&
size_t(Last - First) < Terminator.size() &&
170 Terminator[Last - First] == *Last)
178 if (
size_t(Last - First) < Terminator.size())
188 assert(*First ==
'\'' || *First ==
'"');
189 const char Terminator = *First;
190 for (++First; First != End && *First != Terminator; ++First)
221 if (First[-1] !=
'\\')
229 assert(First <= Last);
236 assert(First[0] ==
'/' && First[1] ==
'/');
242 assert(First[0] ==
'/' && First[1] ==
'*');
243 if (End - First < 4) {
247 for (First += 3; First !=
End; ++First)
248 if (First[-1] ==
'*' && First[0] ==
'/') {
257 const char *
const Cur,
258 const char *
const End) {
259 assert(*Cur ==
'\'' &&
"expected quotation character");
267 char Prev = *(Cur - 1);
268 if (Prev ==
'L' || Prev ==
'U' || Prev ==
'u')
270 if (Prev ==
'8' && (Cur - 1 != Start) && *(Cur - 2) ==
'u')
280 assert(First <= End);
288 const char *Start = First;
301 if (*First !=
'/' || End - First < 2) {
306 if (First[1] ==
'/') {
312 if (First[1] !=
'*') {
326 }
while (First[-2] ==
'\\');
330 const char *
const End) {
331 if (llvm::StringSwitch<bool>(Name)
332 .Case(
"warning",
true)
341 void Minimizer::printToNewline(
const char *&
First,
const char *
const End) {
343 const char *
Last = First;
346 if (*Last ==
'"' || *Last ==
'\'') {
353 if (*Last !=
'/' || End - Last < 2) {
358 if (Last[1] !=
'/' && Last[1] !=
'*') {
367 if (Last[1] ==
'/') {
379 if (Last == End || Last == First || Last[-1] !=
'\\') {
395 assert(First <= End);
411 if (First[1] ==
'/') {
425 void Minimizer::printAdjacentModuleNameParts(
const char *&First,
426 const char *
const End) {
428 const char *
Last = First;
436 bool Minimizer::printAtImportBody(
const char *&First,
const char *
const End) {
449 put(*First++).put(
'\n');
457 printAdjacentModuleNameParts(First, End);
461 void Minimizer::printDirectiveBody(
const char *&First,
const char *
const End) {
463 printToNewline(First, End);
464 while (Out.back() ==
' ')
470 const char *
const End) {
472 const char *
Last = First + 1;
478 LLVM_NODISCARD
static const char *
490 Minimizer::IdInfo Minimizer::lexIdentifier(
const char *First,
491 const char *
const End) {
494 if (LLVM_LIKELY(!Next))
495 return IdInfo{Last, StringRef(First, Last - First)};
501 Id.append(Next, Last);
506 SplitIds.try_emplace(StringRef(Id.begin(), Id.size()), 0).first->first()};
509 void Minimizer::printAdjacentMacroArgs(
const char *&First,
510 const char *
const End) {
512 const char *
Last = First;
515 while (Last != End &&
521 bool Minimizer::printMacroArgs(
const char *&First,
const char *
const End) {
522 assert(*First ==
'(');
538 printAdjacentMacroArgs(First, End);
546 bool Minimizer::isNextIdentifier(StringRef
Id,
const char *&First,
547 const char *
const End) {
552 IdInfo FoundId = lexIdentifier(First, End);
553 First = FoundId.Last;
554 return FoundId.Name ==
Id;
557 bool Minimizer::lexAt(
const char *&First,
const char *
const End) {
559 const char *ImportLoc = First++;
560 if (!isNextIdentifier(
"import", First, End)) {
566 if (printAtImportBody(First, End))
568 ImportLoc, diag::err_dep_source_minimizer_missing_sema_after_at_import);
574 ImportLoc, diag::err_dep_source_minimizer_unexpected_tokens_at_import);
579 bool Minimizer::lexDefine(
const char *&First,
const char *
const End) {
585 return reportError(First, diag::err_pp_macro_not_identifier);
587 IdInfo Id = lexIdentifier(First, End);
588 const char *
Last = Id.Last;
593 size_t Size = Out.size();
594 if (printMacroArgs(Last, End)) {
598 append(
"(/* invalid */\n");
608 printDirectiveBody(Last, End);
613 bool Minimizer::lexPragma(
const char *&First,
const char *
const End) {
615 if (!isNextIdentifier(
"clang", First, End)) {
621 if (!isNextIdentifier(
"module", First, End)) {
627 if (!isNextIdentifier(
"import", First, End)) {
634 append(
"#pragma clang module import ");
635 printDirectiveBody(First, End);
639 bool Minimizer::lexEndif(
const char *&First,
const char *
const End) {
655 return lexDefault(
pp_endif,
"endif", First, End);
659 const char *&First,
const char *
const End) {
661 put(
'#').append(Directive).put(
' ');
662 printDirectiveBody(First, End);
666 bool Minimizer::lexPPLine(
const char *&First,
const char *
const End) {
667 assert(First != End);
670 assert(First <= End);
674 if (*First !=
'#' && *First !=
'@') {
676 assert(First <= End);
682 return lexAt(First, End);
689 return reportError(First, diag::err_pp_expected_eol);
697 IdInfo Id = lexIdentifier(First, End);
699 auto Kind = llvm::StringSwitch<TokenKind>(Id.Name)
720 return lexEndif(First, End);
723 return lexDefine(First, End);
726 return lexPragma(First, End);
729 return lexDefault(Kind, Id.Name, First, End);
732 bool Minimizer::minimizeImpl(
const char *First,
const char *
const End) {
734 if (lexPPLine(First, End))
739 bool Minimizer::minimize() {
740 bool Error = minimizeImpl(Input.begin(), Input.end());
744 if (!Out.empty() && Out.back() !=
'\n')
762 return Minimizer(Output, Tokens, Input, Diags, InputSourceLoc).minimize();
bool minimizeSourceToDependencyDirectives(llvm::StringRef Input, llvm::SmallVectorImpl< char > &Output, llvm::SmallVectorImpl< minimize_source_to_dependency_directives::Token > &Tokens, DiagnosticsEngine *Diags=nullptr, SourceLocation InputSourceLoc=SourceLocation())
Minimize the input down to the preprocessor directives that might have an effect on the dependencies ...
static bool isQuoteCppDigitSeparator(const char *const Start, const char *const Cur, const char *const End)
Specialize PointerLikeTypeTraits to allow LazyGenerationalUpdatePtr to be placed into a PointerUnion...
LLVM_READONLY bool isHorizontalWhitespace(unsigned char c)
Returns true if this character is horizontal ASCII whitespace: ' ', '\t', '\f', '\v'.
static void skipNewline(const char *&First, const char *End)
Represents a simplified token that's lexed as part of the source minimization.
static void skipRawString(const char *&First, const char *const End)
Concrete class used by the front-end to report problems and issues.
Defines the Diagnostic-related interfaces.
static LLVM_NODISCARD const char * getIdentifierContinuation(const char *First, const char *const End)
static LLVM_NODISCARD bool isRawStringLiteral(const char *First, const char *Current)
LLVM_READONLY bool isIdentifierHead(unsigned char c, bool AllowDollar=false)
Returns true if this is a valid first character of a C identifier, which is [a-zA-Z_].
TokenKind
Represents the kind of preprocessor directive or a module declaration that is tracked by the source m...
static void skipWhitespace(const char *&First, const char *const End)
static void skipLineComment(const char *&First, const char *const End)
Encodes a location in the source.
static void skipBlockComment(const char *&First, const char *const End)
static void skipDirective(StringRef Name, const char *&First, const char *const End)
Dataflow Directional Tag Classes.
LLVM_READONLY bool isVerticalWhitespace(unsigned char c)
Returns true if this character is vertical ASCII whitespace: '\n', '\r'.
LLVM_READONLY bool isIdentifierBody(unsigned char c, bool AllowDollar=false)
Returns true if this is a body character of a C identifier, which is [a-zA-Z0-9_].
static const char * reverseOverSpaces(const char *First, const char *Last)
static LLVM_NODISCARD const char * lexRawIdentifier(const char *First, const char *const End)
static void skipOverSpaces(const char *&First, const char *const End)
static void skipToNewlineRaw(const char *&First, const char *const End)
static void skipLine(const char *&First, const char *const End)
This is the interface for minimizing header and source files to the minimum necessary preprocessor di...
Directive - Abstract class representing a parsed verify directive.
static void skipString(const char *&First, const char *const End)
LLVM_READONLY bool isPreprocessingNumberBody(unsigned char c)
Return true if this is the body character of a C preprocessing number, which is [a-zA-Z0-9_.