clang  5.0.0
PrintPreprocessedOutput.cpp
Go to the documentation of this file.
1 //===--- PrintPreprocessedOutput.cpp - Implement the -E mode --------------===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This code simply runs the preprocessor on the input file and prints out the
11 // result. This is the traditional behavior of the -E option.
12 //
13 //===----------------------------------------------------------------------===//
14 
15 #include "clang/Frontend/Utils.h"
16 #include "clang/Basic/CharInfo.h"
17 #include "clang/Basic/Diagnostic.h"
20 #include "clang/Lex/MacroInfo.h"
21 #include "clang/Lex/PPCallbacks.h"
22 #include "clang/Lex/Pragma.h"
23 #include "clang/Lex/Preprocessor.h"
25 #include "llvm/ADT/STLExtras.h"
26 #include "llvm/ADT/SmallString.h"
27 #include "llvm/ADT/StringRef.h"
28 #include "llvm/Support/ErrorHandling.h"
29 #include "llvm/Support/raw_ostream.h"
30 #include <cstdio>
31 using namespace clang;
32 
33 /// PrintMacroDefinition - Print a macro definition in a form that will be
34 /// properly accepted back as a definition.
35 static void PrintMacroDefinition(const IdentifierInfo &II, const MacroInfo &MI,
36  Preprocessor &PP, raw_ostream &OS) {
37  OS << "#define " << II.getName();
38 
39  if (MI.isFunctionLike()) {
40  OS << '(';
41  if (!MI.param_empty()) {
43  for (; AI+1 != E; ++AI) {
44  OS << (*AI)->getName();
45  OS << ',';
46  }
47 
48  // Last argument.
49  if ((*AI)->getName() == "__VA_ARGS__")
50  OS << "...";
51  else
52  OS << (*AI)->getName();
53  }
54 
55  if (MI.isGNUVarargs())
56  OS << "..."; // #define foo(x...)
57 
58  OS << ')';
59  }
60 
61  // GCC always emits a space, even if the macro body is empty. However, do not
62  // want to emit two spaces if the first token has a leading space.
63  if (MI.tokens_empty() || !MI.tokens_begin()->hasLeadingSpace())
64  OS << ' ';
65 
66  SmallString<128> SpellingBuffer;
67  for (const auto &T : MI.tokens()) {
68  if (T.hasLeadingSpace())
69  OS << ' ';
70 
71  OS << PP.getSpelling(T, SpellingBuffer);
72  }
73 }
74 
75 //===----------------------------------------------------------------------===//
76 // Preprocessed token printer
77 //===----------------------------------------------------------------------===//
78 
79 namespace {
80 class PrintPPOutputPPCallbacks : public PPCallbacks {
81  Preprocessor &PP;
83  TokenConcatenation ConcatInfo;
84 public:
85  raw_ostream &OS;
86 private:
87  unsigned CurLine;
88 
89  bool EmittedTokensOnThisLine;
90  bool EmittedDirectiveOnThisLine;
92  SmallString<512> CurFilename;
93  bool Initialized;
94  bool DisableLineMarkers;
95  bool DumpDefines;
96  bool DumpIncludeDirectives;
97  bool UseLineDirectives;
98  bool IsFirstFileEntered;
99 public:
100  PrintPPOutputPPCallbacks(Preprocessor &pp, raw_ostream &os, bool lineMarkers,
101  bool defines, bool DumpIncludeDirectives,
102  bool UseLineDirectives)
103  : PP(pp), SM(PP.getSourceManager()), ConcatInfo(PP), OS(os),
104  DisableLineMarkers(lineMarkers), DumpDefines(defines),
105  DumpIncludeDirectives(DumpIncludeDirectives),
106  UseLineDirectives(UseLineDirectives) {
107  CurLine = 0;
108  CurFilename += "<uninit>";
109  EmittedTokensOnThisLine = false;
110  EmittedDirectiveOnThisLine = false;
111  FileType = SrcMgr::C_User;
112  Initialized = false;
113  IsFirstFileEntered = false;
114  }
115 
116  void setEmittedTokensOnThisLine() { EmittedTokensOnThisLine = true; }
117  bool hasEmittedTokensOnThisLine() const { return EmittedTokensOnThisLine; }
118 
119  void setEmittedDirectiveOnThisLine() { EmittedDirectiveOnThisLine = true; }
120  bool hasEmittedDirectiveOnThisLine() const {
121  return EmittedDirectiveOnThisLine;
122  }
123 
124  bool startNewLineIfNeeded(bool ShouldUpdateCurrentLine = true);
125 
126  void FileChanged(SourceLocation Loc, FileChangeReason Reason,
128  FileID PrevFID) override;
129  void InclusionDirective(SourceLocation HashLoc, const Token &IncludeTok,
130  StringRef FileName, bool IsAngled,
131  CharSourceRange FilenameRange, const FileEntry *File,
132  StringRef SearchPath, StringRef RelativePath,
133  const Module *Imported) override;
134  void Ident(SourceLocation Loc, StringRef str) override;
135  void PragmaMessage(SourceLocation Loc, StringRef Namespace,
136  PragmaMessageKind Kind, StringRef Str) override;
137  void PragmaDebug(SourceLocation Loc, StringRef DebugType) override;
138  void PragmaDiagnosticPush(SourceLocation Loc, StringRef Namespace) override;
139  void PragmaDiagnosticPop(SourceLocation Loc, StringRef Namespace) override;
140  void PragmaDiagnostic(SourceLocation Loc, StringRef Namespace,
141  diag::Severity Map, StringRef Str) override;
142  void PragmaWarning(SourceLocation Loc, StringRef WarningSpec,
143  ArrayRef<int> Ids) override;
144  void PragmaWarningPush(SourceLocation Loc, int Level) override;
145  void PragmaWarningPop(SourceLocation Loc) override;
146 
147  bool HandleFirstTokOnLine(Token &Tok);
148 
149  /// Move to the line of the provided source location. This will
150  /// return true if the output stream required adjustment or if
151  /// the requested location is on the first line.
152  bool MoveToLine(SourceLocation Loc) {
153  PresumedLoc PLoc = SM.getPresumedLoc(Loc);
154  if (PLoc.isInvalid())
155  return false;
156  return MoveToLine(PLoc.getLine()) || (PLoc.getLine() == 1);
157  }
158  bool MoveToLine(unsigned LineNo);
159 
160  bool AvoidConcat(const Token &PrevPrevTok, const Token &PrevTok,
161  const Token &Tok) {
162  return ConcatInfo.AvoidConcat(PrevPrevTok, PrevTok, Tok);
163  }
164  void WriteLineInfo(unsigned LineNo, const char *Extra=nullptr,
165  unsigned ExtraLen=0);
166  bool LineMarkersAreDisabled() const { return DisableLineMarkers; }
167  void HandleNewlinesInToken(const char *TokStr, unsigned Len);
168 
169  /// MacroDefined - This hook is called whenever a macro definition is seen.
170  void MacroDefined(const Token &MacroNameTok,
171  const MacroDirective *MD) override;
172 
173  /// MacroUndefined - This hook is called whenever a macro #undef is seen.
174  void MacroUndefined(const Token &MacroNameTok,
175  const MacroDefinition &MD,
176  const MacroDirective *Undef) override;
177 
178  void BeginModule(const Module *M);
179  void EndModule(const Module *M);
180 };
181 } // end anonymous namespace
182 
183 void PrintPPOutputPPCallbacks::WriteLineInfo(unsigned LineNo,
184  const char *Extra,
185  unsigned ExtraLen) {
186  startNewLineIfNeeded(/*ShouldUpdateCurrentLine=*/false);
187 
188  // Emit #line directives or GNU line markers depending on what mode we're in.
189  if (UseLineDirectives) {
190  OS << "#line" << ' ' << LineNo << ' ' << '"';
191  OS.write_escaped(CurFilename);
192  OS << '"';
193  } else {
194  OS << '#' << ' ' << LineNo << ' ' << '"';
195  OS.write_escaped(CurFilename);
196  OS << '"';
197 
198  if (ExtraLen)
199  OS.write(Extra, ExtraLen);
200 
201  if (FileType == SrcMgr::C_System)
202  OS.write(" 3", 2);
203  else if (FileType == SrcMgr::C_ExternCSystem)
204  OS.write(" 3 4", 4);
205  }
206  OS << '\n';
207 }
208 
209 /// MoveToLine - Move the output to the source line specified by the location
210 /// object. We can do this by emitting some number of \n's, or be emitting a
211 /// #line directive. This returns false if already at the specified line, true
212 /// if some newlines were emitted.
213 bool PrintPPOutputPPCallbacks::MoveToLine(unsigned LineNo) {
214  // If this line is "close enough" to the original line, just print newlines,
215  // otherwise print a #line directive.
216  if (LineNo-CurLine <= 8) {
217  if (LineNo-CurLine == 1)
218  OS << '\n';
219  else if (LineNo == CurLine)
220  return false; // Spelling line moved, but expansion line didn't.
221  else {
222  const char *NewLines = "\n\n\n\n\n\n\n\n";
223  OS.write(NewLines, LineNo-CurLine);
224  }
225  } else if (!DisableLineMarkers) {
226  // Emit a #line or line marker.
227  WriteLineInfo(LineNo, nullptr, 0);
228  } else {
229  // Okay, we're in -P mode, which turns off line markers. However, we still
230  // need to emit a newline between tokens on different lines.
231  startNewLineIfNeeded(/*ShouldUpdateCurrentLine=*/false);
232  }
233 
234  CurLine = LineNo;
235  return true;
236 }
237 
238 bool
239 PrintPPOutputPPCallbacks::startNewLineIfNeeded(bool ShouldUpdateCurrentLine) {
240  if (EmittedTokensOnThisLine || EmittedDirectiveOnThisLine) {
241  OS << '\n';
242  EmittedTokensOnThisLine = false;
243  EmittedDirectiveOnThisLine = false;
244  if (ShouldUpdateCurrentLine)
245  ++CurLine;
246  return true;
247  }
248 
249  return false;
250 }
251 
252 /// FileChanged - Whenever the preprocessor enters or exits a #include file
253 /// it invokes this handler. Update our conception of the current source
254 /// position.
255 void PrintPPOutputPPCallbacks::FileChanged(SourceLocation Loc,
256  FileChangeReason Reason,
257  SrcMgr::CharacteristicKind NewFileType,
258  FileID PrevFID) {
259  // Unless we are exiting a #include, make sure to skip ahead to the line the
260  // #include directive was at.
262 
263  PresumedLoc UserLoc = SourceMgr.getPresumedLoc(Loc);
264  if (UserLoc.isInvalid())
265  return;
266 
267  unsigned NewLine = UserLoc.getLine();
268 
269  if (Reason == PPCallbacks::EnterFile) {
270  SourceLocation IncludeLoc = UserLoc.getIncludeLoc();
271  if (IncludeLoc.isValid())
272  MoveToLine(IncludeLoc);
273  } else if (Reason == PPCallbacks::SystemHeaderPragma) {
274  // GCC emits the # directive for this directive on the line AFTER the
275  // directive and emits a bunch of spaces that aren't needed. This is because
276  // otherwise we will emit a line marker for THIS line, which requires an
277  // extra blank line after the directive to avoid making all following lines
278  // off by one. We can do better by simply incrementing NewLine here.
279  NewLine += 1;
280  }
281 
282  CurLine = NewLine;
283 
284  CurFilename.clear();
285  CurFilename += UserLoc.getFilename();
286  FileType = NewFileType;
287 
288  if (DisableLineMarkers) {
289  startNewLineIfNeeded(/*ShouldUpdateCurrentLine=*/false);
290  return;
291  }
292 
293  if (!Initialized) {
294  WriteLineInfo(CurLine);
295  Initialized = true;
296  }
297 
298  // Do not emit an enter marker for the main file (which we expect is the first
299  // entered file). This matches gcc, and improves compatibility with some tools
300  // which track the # line markers as a way to determine when the preprocessed
301  // output is in the context of the main file.
302  if (Reason == PPCallbacks::EnterFile && !IsFirstFileEntered) {
303  IsFirstFileEntered = true;
304  return;
305  }
306 
307  switch (Reason) {
309  WriteLineInfo(CurLine, " 1", 2);
310  break;
312  WriteLineInfo(CurLine, " 2", 2);
313  break;
316  WriteLineInfo(CurLine);
317  break;
318  }
319 }
320 
321 void PrintPPOutputPPCallbacks::InclusionDirective(SourceLocation HashLoc,
322  const Token &IncludeTok,
323  StringRef FileName,
324  bool IsAngled,
325  CharSourceRange FilenameRange,
326  const FileEntry *File,
327  StringRef SearchPath,
328  StringRef RelativePath,
329  const Module *Imported) {
330  // In -dI mode, dump #include directives prior to dumping their content or
331  // interpretation.
332  if (DumpIncludeDirectives) {
333  startNewLineIfNeeded();
334  MoveToLine(HashLoc);
335  const std::string TokenText = PP.getSpelling(IncludeTok);
336  assert(!TokenText.empty());
337  OS << "#" << TokenText << " "
338  << (IsAngled ? '<' : '"') << FileName << (IsAngled ? '>' : '"')
339  << " /* clang -E -dI */";
340  setEmittedDirectiveOnThisLine();
341  startNewLineIfNeeded();
342  }
343 
344  // When preprocessing, turn implicit imports into module import pragmas.
345  if (Imported) {
346  switch (IncludeTok.getIdentifierInfo()->getPPKeywordID()) {
347  case tok::pp_include:
348  case tok::pp_import:
349  case tok::pp_include_next:
350  startNewLineIfNeeded();
351  MoveToLine(HashLoc);
352  OS << "#pragma clang module import " << Imported->getFullModuleName(true)
353  << " /* clang -E: implicit import for "
354  << "#" << PP.getSpelling(IncludeTok) << " "
355  << (IsAngled ? '<' : '"') << FileName << (IsAngled ? '>' : '"')
356  << " */";
357  // Since we want a newline after the pragma, but not a #<line>, start a
358  // new line immediately.
359  EmittedTokensOnThisLine = true;
360  startNewLineIfNeeded();
361  break;
362 
363  case tok::pp___include_macros:
364  // #__include_macros has no effect on a user of a preprocessed source
365  // file; the only effect is on preprocessing.
366  //
367  // FIXME: That's not *quite* true: it causes the module in question to
368  // be loaded, which can affect downstream diagnostics.
369  break;
370 
371  default:
372  llvm_unreachable("unknown include directive kind");
373  break;
374  }
375  }
376 }
377 
378 /// Handle entering the scope of a module during a module compilation.
379 void PrintPPOutputPPCallbacks::BeginModule(const Module *M) {
380  startNewLineIfNeeded();
381  OS << "#pragma clang module begin " << M->getFullModuleName(true);
382  setEmittedDirectiveOnThisLine();
383 }
384 
385 /// Handle leaving the scope of a module during a module compilation.
386 void PrintPPOutputPPCallbacks::EndModule(const Module *M) {
387  startNewLineIfNeeded();
388  OS << "#pragma clang module end /*" << M->getFullModuleName(true) << "*/";
389  setEmittedDirectiveOnThisLine();
390 }
391 
392 /// Ident - Handle #ident directives when read by the preprocessor.
393 ///
394 void PrintPPOutputPPCallbacks::Ident(SourceLocation Loc, StringRef S) {
395  MoveToLine(Loc);
396 
397  OS.write("#ident ", strlen("#ident "));
398  OS.write(S.begin(), S.size());
399  EmittedTokensOnThisLine = true;
400 }
401 
402 /// MacroDefined - This hook is called whenever a macro definition is seen.
403 void PrintPPOutputPPCallbacks::MacroDefined(const Token &MacroNameTok,
404  const MacroDirective *MD) {
405  const MacroInfo *MI = MD->getMacroInfo();
406  // Only print out macro definitions in -dD mode.
407  if (!DumpDefines ||
408  // Ignore __FILE__ etc.
409  MI->isBuiltinMacro()) return;
410 
411  MoveToLine(MI->getDefinitionLoc());
412  PrintMacroDefinition(*MacroNameTok.getIdentifierInfo(), *MI, PP, OS);
413  setEmittedDirectiveOnThisLine();
414 }
415 
416 void PrintPPOutputPPCallbacks::MacroUndefined(const Token &MacroNameTok,
417  const MacroDefinition &MD,
418  const MacroDirective *Undef) {
419  // Only print out macro definitions in -dD mode.
420  if (!DumpDefines) return;
421 
422  MoveToLine(MacroNameTok.getLocation());
423  OS << "#undef " << MacroNameTok.getIdentifierInfo()->getName();
424  setEmittedDirectiveOnThisLine();
425 }
426 
427 static void outputPrintable(raw_ostream &OS, StringRef Str) {
428  for (unsigned char Char : Str) {
429  if (isPrintable(Char) && Char != '\\' && Char != '"')
430  OS << (char)Char;
431  else // Output anything hard as an octal escape.
432  OS << '\\'
433  << (char)('0' + ((Char >> 6) & 7))
434  << (char)('0' + ((Char >> 3) & 7))
435  << (char)('0' + ((Char >> 0) & 7));
436  }
437 }
438 
439 void PrintPPOutputPPCallbacks::PragmaMessage(SourceLocation Loc,
440  StringRef Namespace,
441  PragmaMessageKind Kind,
442  StringRef Str) {
443  startNewLineIfNeeded();
444  MoveToLine(Loc);
445  OS << "#pragma ";
446  if (!Namespace.empty())
447  OS << Namespace << ' ';
448  switch (Kind) {
449  case PMK_Message:
450  OS << "message(\"";
451  break;
452  case PMK_Warning:
453  OS << "warning \"";
454  break;
455  case PMK_Error:
456  OS << "error \"";
457  break;
458  }
459 
460  outputPrintable(OS, Str);
461  OS << '"';
462  if (Kind == PMK_Message)
463  OS << ')';
464  setEmittedDirectiveOnThisLine();
465 }
466 
467 void PrintPPOutputPPCallbacks::PragmaDebug(SourceLocation Loc,
468  StringRef DebugType) {
469  startNewLineIfNeeded();
470  MoveToLine(Loc);
471 
472  OS << "#pragma clang __debug ";
473  OS << DebugType;
474 
475  setEmittedDirectiveOnThisLine();
476 }
477 
478 void PrintPPOutputPPCallbacks::
479 PragmaDiagnosticPush(SourceLocation Loc, StringRef Namespace) {
480  startNewLineIfNeeded();
481  MoveToLine(Loc);
482  OS << "#pragma " << Namespace << " diagnostic push";
483  setEmittedDirectiveOnThisLine();
484 }
485 
486 void PrintPPOutputPPCallbacks::
487 PragmaDiagnosticPop(SourceLocation Loc, StringRef Namespace) {
488  startNewLineIfNeeded();
489  MoveToLine(Loc);
490  OS << "#pragma " << Namespace << " diagnostic pop";
491  setEmittedDirectiveOnThisLine();
492 }
493 
494 void PrintPPOutputPPCallbacks::PragmaDiagnostic(SourceLocation Loc,
495  StringRef Namespace,
497  StringRef Str) {
498  startNewLineIfNeeded();
499  MoveToLine(Loc);
500  OS << "#pragma " << Namespace << " diagnostic ";
501  switch (Map) {
503  OS << "remark";
504  break;
506  OS << "warning";
507  break;
509  OS << "error";
510  break;
512  OS << "ignored";
513  break;
515  OS << "fatal";
516  break;
517  }
518  OS << " \"" << Str << '"';
519  setEmittedDirectiveOnThisLine();
520 }
521 
522 void PrintPPOutputPPCallbacks::PragmaWarning(SourceLocation Loc,
523  StringRef WarningSpec,
524  ArrayRef<int> Ids) {
525  startNewLineIfNeeded();
526  MoveToLine(Loc);
527  OS << "#pragma warning(" << WarningSpec << ':';
528  for (ArrayRef<int>::iterator I = Ids.begin(), E = Ids.end(); I != E; ++I)
529  OS << ' ' << *I;
530  OS << ')';
531  setEmittedDirectiveOnThisLine();
532 }
533 
534 void PrintPPOutputPPCallbacks::PragmaWarningPush(SourceLocation Loc,
535  int Level) {
536  startNewLineIfNeeded();
537  MoveToLine(Loc);
538  OS << "#pragma warning(push";
539  if (Level >= 0)
540  OS << ", " << Level;
541  OS << ')';
542  setEmittedDirectiveOnThisLine();
543 }
544 
545 void PrintPPOutputPPCallbacks::PragmaWarningPop(SourceLocation Loc) {
546  startNewLineIfNeeded();
547  MoveToLine(Loc);
548  OS << "#pragma warning(pop)";
549  setEmittedDirectiveOnThisLine();
550 }
551 
552 /// HandleFirstTokOnLine - When emitting a preprocessed file in -E mode, this
553 /// is called for the first token on each new line. If this really is the start
554 /// of a new logical line, handle it and return true, otherwise return false.
555 /// This may not be the start of a logical line because the "start of line"
556 /// marker is set for spelling lines, not expansion ones.
557 bool PrintPPOutputPPCallbacks::HandleFirstTokOnLine(Token &Tok) {
558  // Figure out what line we went to and insert the appropriate number of
559  // newline characters.
560  if (!MoveToLine(Tok.getLocation()))
561  return false;
562 
563  // Print out space characters so that the first token on a line is
564  // indented for easy reading.
565  unsigned ColNo = SM.getExpansionColumnNumber(Tok.getLocation());
566 
567  // The first token on a line can have a column number of 1, yet still expect
568  // leading white space, if a macro expansion in column 1 starts with an empty
569  // macro argument, or an empty nested macro expansion. In this case, move the
570  // token to column 2.
571  if (ColNo == 1 && Tok.hasLeadingSpace())
572  ColNo = 2;
573 
574  // This hack prevents stuff like:
575  // #define HASH #
576  // HASH define foo bar
577  // From having the # character end up at column 1, which makes it so it
578  // is not handled as a #define next time through the preprocessor if in
579  // -fpreprocessed mode.
580  if (ColNo <= 1 && Tok.is(tok::hash))
581  OS << ' ';
582 
583  // Otherwise, indent the appropriate number of spaces.
584  for (; ColNo > 1; --ColNo)
585  OS << ' ';
586 
587  return true;
588 }
589 
590 void PrintPPOutputPPCallbacks::HandleNewlinesInToken(const char *TokStr,
591  unsigned Len) {
592  unsigned NumNewlines = 0;
593  for (; Len; --Len, ++TokStr) {
594  if (*TokStr != '\n' &&
595  *TokStr != '\r')
596  continue;
597 
598  ++NumNewlines;
599 
600  // If we have \n\r or \r\n, skip both and count as one line.
601  if (Len != 1 &&
602  (TokStr[1] == '\n' || TokStr[1] == '\r') &&
603  TokStr[0] != TokStr[1]) {
604  ++TokStr;
605  --Len;
606  }
607  }
608 
609  if (NumNewlines == 0) return;
610 
611  CurLine += NumNewlines;
612 }
613 
614 
615 namespace {
616 struct UnknownPragmaHandler : public PragmaHandler {
617  const char *Prefix;
618  PrintPPOutputPPCallbacks *Callbacks;
619 
620  // Set to true if tokens should be expanded
621  bool ShouldExpandTokens;
622 
623  UnknownPragmaHandler(const char *prefix, PrintPPOutputPPCallbacks *callbacks,
624  bool RequireTokenExpansion)
625  : Prefix(prefix), Callbacks(callbacks),
626  ShouldExpandTokens(RequireTokenExpansion) {}
627  void HandlePragma(Preprocessor &PP, PragmaIntroducerKind Introducer,
628  Token &PragmaTok) override {
629  // Figure out what line we went to and insert the appropriate number of
630  // newline characters.
631  Callbacks->startNewLineIfNeeded();
632  Callbacks->MoveToLine(PragmaTok.getLocation());
633  Callbacks->OS.write(Prefix, strlen(Prefix));
634 
635  if (ShouldExpandTokens) {
636  // The first token does not have expanded macros. Expand them, if
637  // required.
638  auto Toks = llvm::make_unique<Token[]>(1);
639  Toks[0] = PragmaTok;
640  PP.EnterTokenStream(std::move(Toks), /*NumToks=*/1,
641  /*DisableMacroExpansion=*/false);
642  PP.Lex(PragmaTok);
643  }
644  Token PrevToken;
645  Token PrevPrevToken;
646  PrevToken.startToken();
647  PrevPrevToken.startToken();
648 
649  // Read and print all of the pragma tokens.
650  while (PragmaTok.isNot(tok::eod)) {
651  if (PragmaTok.hasLeadingSpace() ||
652  Callbacks->AvoidConcat(PrevPrevToken, PrevToken, PragmaTok))
653  Callbacks->OS << ' ';
654  std::string TokSpell = PP.getSpelling(PragmaTok);
655  Callbacks->OS.write(&TokSpell[0], TokSpell.size());
656 
657  PrevPrevToken = PrevToken;
658  PrevToken = PragmaTok;
659 
660  if (ShouldExpandTokens)
661  PP.Lex(PragmaTok);
662  else
663  PP.LexUnexpandedToken(PragmaTok);
664  }
665  Callbacks->setEmittedDirectiveOnThisLine();
666  }
667 };
668 } // end anonymous namespace
669 
670 
672  PrintPPOutputPPCallbacks *Callbacks,
673  raw_ostream &OS) {
674  bool DropComments = PP.getLangOpts().TraditionalCPP &&
676 
677  char Buffer[256];
678  Token PrevPrevTok, PrevTok;
679  PrevPrevTok.startToken();
680  PrevTok.startToken();
681  while (1) {
682  if (Callbacks->hasEmittedDirectiveOnThisLine()) {
683  Callbacks->startNewLineIfNeeded();
684  Callbacks->MoveToLine(Tok.getLocation());
685  }
686 
687  // If this token is at the start of a line, emit newlines if needed.
688  if (Tok.isAtStartOfLine() && Callbacks->HandleFirstTokOnLine(Tok)) {
689  // done.
690  } else if (Tok.hasLeadingSpace() ||
691  // If we haven't emitted a token on this line yet, PrevTok isn't
692  // useful to look at and no concatenation could happen anyway.
693  (Callbacks->hasEmittedTokensOnThisLine() &&
694  // Don't print "-" next to "-", it would form "--".
695  Callbacks->AvoidConcat(PrevPrevTok, PrevTok, Tok))) {
696  OS << ' ';
697  }
698 
699  if (DropComments && Tok.is(tok::comment)) {
700  // Skip comments. Normally the preprocessor does not generate
701  // tok::comment nodes at all when not keeping comments, but under
702  // -traditional-cpp the lexer keeps /all/ whitespace, including comments.
703  SourceLocation StartLoc = Tok.getLocation();
704  Callbacks->MoveToLine(StartLoc.getLocWithOffset(Tok.getLength()));
705  } else if (Tok.is(tok::annot_module_include)) {
706  // PrintPPOutputPPCallbacks::InclusionDirective handles producing
707  // appropriate output here. Ignore this token entirely.
708  PP.Lex(Tok);
709  continue;
710  } else if (Tok.is(tok::annot_module_begin)) {
711  // FIXME: We retrieve this token after the FileChanged callback, and
712  // retrieve the module_end token before the FileChanged callback, so
713  // we render this within the file and render the module end outside the
714  // file, but this is backwards from the token locations: the module_begin
715  // token is at the include location (outside the file) and the module_end
716  // token is at the EOF location (within the file).
717  Callbacks->BeginModule(
718  reinterpret_cast<Module *>(Tok.getAnnotationValue()));
719  PP.Lex(Tok);
720  continue;
721  } else if (Tok.is(tok::annot_module_end)) {
722  Callbacks->EndModule(
723  reinterpret_cast<Module *>(Tok.getAnnotationValue()));
724  PP.Lex(Tok);
725  continue;
726  } else if (IdentifierInfo *II = Tok.getIdentifierInfo()) {
727  OS << II->getName();
728  } else if (Tok.isLiteral() && !Tok.needsCleaning() &&
729  Tok.getLiteralData()) {
730  OS.write(Tok.getLiteralData(), Tok.getLength());
731  } else if (Tok.getLength() < 256) {
732  const char *TokPtr = Buffer;
733  unsigned Len = PP.getSpelling(Tok, TokPtr);
734  OS.write(TokPtr, Len);
735 
736  // Tokens that can contain embedded newlines need to adjust our current
737  // line number.
738  if (Tok.getKind() == tok::comment || Tok.getKind() == tok::unknown)
739  Callbacks->HandleNewlinesInToken(TokPtr, Len);
740  } else {
741  std::string S = PP.getSpelling(Tok);
742  OS.write(&S[0], S.size());
743 
744  // Tokens that can contain embedded newlines need to adjust our current
745  // line number.
746  if (Tok.getKind() == tok::comment || Tok.getKind() == tok::unknown)
747  Callbacks->HandleNewlinesInToken(&S[0], S.size());
748  }
749  Callbacks->setEmittedTokensOnThisLine();
750 
751  if (Tok.is(tok::eof)) break;
752 
753  PrevPrevTok = PrevTok;
754  PrevTok = Tok;
755  PP.Lex(Tok);
756  }
757 }
758 
759 typedef std::pair<const IdentifierInfo *, MacroInfo *> id_macro_pair;
760 static int MacroIDCompare(const id_macro_pair *LHS, const id_macro_pair *RHS) {
761  return LHS->first->getName().compare(RHS->first->getName());
762 }
763 
764 static void DoPrintMacros(Preprocessor &PP, raw_ostream *OS) {
765  // Ignore unknown pragmas.
766  PP.IgnorePragmas();
767 
768  // -dM mode just scans and ignores all tokens in the files, then dumps out
769  // the macro table at the end.
770  PP.EnterMainSourceFile();
771 
772  Token Tok;
773  do PP.Lex(Tok);
774  while (Tok.isNot(tok::eof));
775 
778  I != E; ++I) {
779  auto *MD = I->second.getLatest();
780  if (MD && MD->isDefined())
781  MacrosByID.push_back(id_macro_pair(I->first, MD->getMacroInfo()));
782  }
783  llvm::array_pod_sort(MacrosByID.begin(), MacrosByID.end(), MacroIDCompare);
784 
785  for (unsigned i = 0, e = MacrosByID.size(); i != e; ++i) {
786  MacroInfo &MI = *MacrosByID[i].second;
787  // Ignore computed macros like __LINE__ and friends.
788  if (MI.isBuiltinMacro()) continue;
789 
790  PrintMacroDefinition(*MacrosByID[i].first, MI, PP, *OS);
791  *OS << '\n';
792  }
793 }
794 
795 /// DoPrintPreprocessedInput - This implements -E mode.
796 ///
798  const PreprocessorOutputOptions &Opts) {
799  // Show macros with no output is handled specially.
800  if (!Opts.ShowCPP) {
801  assert(Opts.ShowMacros && "Not yet implemented!");
802  DoPrintMacros(PP, OS);
803  return;
804  }
805 
806  // Inform the preprocessor whether we want it to retain comments or not, due
807  // to -C or -CC.
809 
810  PrintPPOutputPPCallbacks *Callbacks = new PrintPPOutputPPCallbacks(
811  PP, *OS, !Opts.ShowLineMarkers, Opts.ShowMacros,
813 
814  // Expand macros in pragmas with -fms-extensions. The assumption is that
815  // the majority of pragmas in such a file will be Microsoft pragmas.
816  // Remember the handlers we will add so that we can remove them later.
817  std::unique_ptr<UnknownPragmaHandler> MicrosoftExtHandler(
818  new UnknownPragmaHandler(
819  "#pragma", Callbacks,
820  /*RequireTokenExpansion=*/PP.getLangOpts().MicrosoftExt));
821 
822  std::unique_ptr<UnknownPragmaHandler> GCCHandler(new UnknownPragmaHandler(
823  "#pragma GCC", Callbacks,
824  /*RequireTokenExpansion=*/PP.getLangOpts().MicrosoftExt));
825 
826  std::unique_ptr<UnknownPragmaHandler> ClangHandler(new UnknownPragmaHandler(
827  "#pragma clang", Callbacks,
828  /*RequireTokenExpansion=*/PP.getLangOpts().MicrosoftExt));
829 
830  PP.AddPragmaHandler(MicrosoftExtHandler.get());
831  PP.AddPragmaHandler("GCC", GCCHandler.get());
832  PP.AddPragmaHandler("clang", ClangHandler.get());
833 
834  // The tokens after pragma omp need to be expanded.
835  //
836  // OpenMP [2.1, Directive format]
837  // Preprocessing tokens following the #pragma omp are subject to macro
838  // replacement.
839  std::unique_ptr<UnknownPragmaHandler> OpenMPHandler(
840  new UnknownPragmaHandler("#pragma omp", Callbacks,
841  /*RequireTokenExpansion=*/true));
842  PP.AddPragmaHandler("omp", OpenMPHandler.get());
843 
844  PP.addPPCallbacks(std::unique_ptr<PPCallbacks>(Callbacks));
845 
846  // After we have configured the preprocessor, enter the main file.
847  PP.EnterMainSourceFile();
848 
849  // Consume all of the tokens that come from the predefines buffer. Those
850  // should not be emitted into the output and are guaranteed to be at the
851  // start.
852  const SourceManager &SourceMgr = PP.getSourceManager();
853  Token Tok;
854  do {
855  PP.Lex(Tok);
856  if (Tok.is(tok::eof) || !Tok.getLocation().isFileID())
857  break;
858 
859  PresumedLoc PLoc = SourceMgr.getPresumedLoc(Tok.getLocation());
860  if (PLoc.isInvalid())
861  break;
862 
863  if (strcmp(PLoc.getFilename(), "<built-in>"))
864  break;
865  } while (true);
866 
867  // Read all the preprocessed tokens, printing them out to the stream.
868  PrintPreprocessedTokens(PP, Tok, Callbacks, *OS);
869  *OS << '\n';
870 
871  // Remove the handlers we just added to leave the preprocessor in a sane state
872  // so that it can be reused (for example by a clang::Parser instance).
873  PP.RemovePragmaHandler(MicrosoftExtHandler.get());
874  PP.RemovePragmaHandler("GCC", GCCHandler.get());
875  PP.RemovePragmaHandler("clang", ClangHandler.get());
876  PP.RemovePragmaHandler("omp", OpenMPHandler.get());
877 }
bool isAtStartOfLine() const
isAtStartOfLine - Return true if this token is at the start of a line.
Definition: Token.h:266
SourceManager & getSourceManager() const
Definition: Preprocessor.h:729
static void PrintMacroDefinition(const IdentifierInfo &II, const MacroInfo &MI, Preprocessor &PP, raw_ostream &OS)
PrintMacroDefinition - Print a macro definition in a form that will be properly accepted back as a de...
param_iterator param_begin() const
Definition: MacroInfo.h:174
std::pair< const IdentifierInfo *, MacroInfo * > id_macro_pair
void AddPragmaHandler(StringRef Namespace, PragmaHandler *Handler)
Add the specified pragma handler to this preprocessor.
Definition: Pragma.cpp:880
bool isInvalid() const
Return true if this object is invalid or uninitialized.
Defines the SourceManager interface.
TokenConcatenation class, which answers the question of "Is it safe to emit two tokens without a whit...
Defines the clang::MacroInfo and clang::MacroDirective classes.
bool hasLeadingSpace() const
Return true if this token has whitespace before it.
Definition: Token.h:270
A description of the current definition of a macro.
Definition: MacroInfo.h:542
std::unique_ptr< llvm::MemoryBuffer > Buffer
macro_iterator macro_begin(bool IncludeExternalMacros=true) const
bool needsCleaning() const
Return true if this token has trigraphs or escaped newlines in it.
Definition: Token.h:283
MacroMap::const_iterator macro_iterator
Definition: Preprocessor.h:949
void IgnorePragmas()
Install empty handlers for all pragmas (making them ignored).
Definition: Pragma.cpp:1832
Severity
Enum values that allow the client to map NOTEs, WARNINGs, and EXTENSIONs to either Ignore (nothing)...
Definition: DiagnosticIDs.h:63
SourceLocation getDefinitionLoc() const
Return the location that the macro was defined at.
Definition: MacroInfo.h:117
CharacteristicKind
Indicates whether a file or directory holds normal user code, system code, or system code which is im...
Definition: SourceManager.h:82
This interface provides a way to observe the actions of the preprocessor as it does its thing...
Definition: PPCallbacks.h:36
StringRef getSpelling(SourceLocation loc, SmallVectorImpl< char > &buffer, bool *invalid=nullptr) const
Return the 'spelling' of the token at the given location; does not go up to the spelling location or ...
One of these records is kept for each identifier that is lexed.
bool isFileID() const
const LangOptions & getLangOpts() const
Definition: Preprocessor.h:725
std::string getFullModuleName(bool AllowStringLiterals=false) const
Retrieve the full name of this module, including the path from its top-level module.
Definition: Module.cpp:158
Token - This structure provides full information about a lexed token.
Definition: Token.h:35
void SetCommentRetentionState(bool KeepComments, bool KeepMacroComments)
Control whether the preprocessor retains comments in output.
Definition: Preprocessor.h:764
bool tokens_empty() const
Definition: MacroInfo.h:238
Describes a module or submodule.
Definition: Module.h:57
MacroInfo * getMacroInfo() const
Get the MacroInfo that should be used for this definition.
Definition: MacroInfo.h:558
unsigned getExpansionColumnNumber(SourceLocation Loc, bool *Invalid=nullptr) const
SourceLocation getLocWithOffset(int Offset) const
Return a source location with the specified offset from this SourceLocation.
bool getCommentRetentionState() const
Definition: Preprocessor.h:769
Defines the Diagnostic-related interfaces.
tokens_iterator tokens_begin() const
Definition: MacroInfo.h:236
Present this diagnostic as an error.
tok::TokenKind getKind() const
Definition: Token.h:90
unsigned getLine() const
Return the presumed line number of this location.
detail::InMemoryDirectory::const_iterator I
PragmaIntroducerKind
Describes how the pragma was introduced, e.g., with #pragma, _Pragma, or __pragma.
Definition: Pragma.h:32
SourceLocation getIncludeLoc() const
Return the presumed include location of this location.
ArrayRef< Token > tokens() const
Definition: MacroInfo.h:239
void LexUnexpandedToken(Token &Result)
Just like Lex, but disables macro expansion of identifier tokens.
void * getAnnotationValue() const
Definition: Token.h:224
param_iterator param_end() const
Definition: MacroInfo.h:175
PreprocessorOutputOptions - Options for controlling the C preprocessor output (e.g., -E).
void DoPrintPreprocessedInput(Preprocessor &PP, raw_ostream *OS, const PreprocessorOutputOptions &Opts)
DoPrintPreprocessedInput - Implement -E mode.
static void DoPrintMacros(Preprocessor &PP, raw_ostream *OS)
StringRef getName() const
Return the actual identifier string.
Represents a character-granular source range.
static void outputPrintable(raw_ostream &OS, StringRef Str)
void EnterMainSourceFile()
Enter the specified FileID as the main source file, which implicitly adds the builtin defines etc...
Defines the clang::Preprocessor interface.
void RemovePragmaHandler(StringRef Namespace, PragmaHandler *Handler)
Remove the specific pragma handler from this preprocessor.
Definition: Pragma.cpp:911
SourceLocation getLocation() const
Return a source location identifier for the specified offset in the current file. ...
Definition: Token.h:124
bool isNot(tok::TokenKind K) const
Definition: Token.h:96
unsigned ShowMacros
Print macro definitions.
Record the location of an inclusion directive, such as an #include or #import statement.
Represents an unpacked "presumed" location which can be presented to the user.
unsigned Map[FirstTargetAddressSpace]
The type of a lookup table which maps from language-specific address spaces to target-specific ones...
Definition: AddressSpaces.h:53
const SourceManager & SM
Definition: Format.cpp:1293
unsigned ShowIncludeDirectives
Print includes, imports etc. within preprocessed output.
const char * getLiteralData() const
getLiteralData - For a literal token (numeric constant, string, etc), this returns a pointer to the s...
Definition: Token.h:215
Encapsulates changes to the "macros namespace" (the location where the macro name became active...
Definition: MacroInfo.h:286
StringRef FileName
Definition: Format.cpp:1465
Kind
static int MacroIDCompare(const id_macro_pair *LHS, const id_macro_pair *RHS)
const char * getFilename() const
Return the presumed filename of this location.
Encodes a location in the source.
IdentifierInfo *const * param_iterator
Parameters - The list of parameters for a function-like macro.
Definition: MacroInfo.h:172
bool isValid() const
Return true if this is a valid SourceLocation object.
macro_iterator macro_end(bool IncludeExternalMacros=true) const
Cached information about one file (either on disk or in the virtual file system). ...
Definition: FileManager.h:59
void Lex(Token &Result)
Lex the next token for this preprocessor.
bool param_empty() const
Definition: MacroInfo.h:173
unsigned UseLineDirectives
Use #line instead of GCC-style # N.
bool is(tok::TokenKind K) const
is/isNot - Predicates to check if this token is a specific kind, as in "if (Tok.is(tok::l_brace)) {...
Definition: Token.h:95
An opaque identifier used by SourceManager which refers to a source file (MemoryBuffer) along with it...
static LLVM_READONLY bool isPrintable(unsigned char c)
Return true if this character is an ASCII printable character; that is, a character that should take ...
Definition: CharInfo.h:140
Present this diagnostic as a remark.
bool isGNUVarargs() const
Definition: MacroInfo.h:200
PragmaHandler - Instances of this interface defined to handle the various pragmas that the language f...
Definition: Pragma.h:59
const MacroInfo * getMacroInfo() const
Definition: MacroInfo.h:382
detail::InMemoryDirectory::const_iterator E
SourceMgr(SourceMgr)
bool isLiteral() const
Return true if this is a "literal", like a numeric constant, string, etc.
Definition: Token.h:113
bool isFunctionLike() const
Definition: MacroInfo.h:193
Encapsulates the data about a macro definition (e.g.
Definition: MacroInfo.h:34
bool isBuiltinMacro() const
Return true if this macro requires processing before expansion.
Definition: MacroInfo.h:209
Defines the PPCallbacks interface.
unsigned ShowMacroComments
Show comments, even in macros.
Do not present this diagnostic, ignore it.
unsigned ShowLineMarkers
Show #line markers.
unsigned ShowCPP
Print normal preprocessed output.
unsigned getLength() const
Definition: Token.h:127
Present this diagnostic as a fatal error.
Present this diagnostic as a warning.
void addPPCallbacks(std::unique_ptr< PPCallbacks > C)
Definition: Preprocessor.h:822
This class handles loading and caching of source files into memory.
void startToken()
Reset all flags to cleared.
Definition: Token.h:169
static void PrintPreprocessedTokens(Preprocessor &PP, Token &Tok, PrintPPOutputPPCallbacks *Callbacks, raw_ostream &OS)
Engages in a tight little dance with the lexer to efficiently preprocess tokens.
Definition: Preprocessor.h:98
IdentifierInfo * getIdentifierInfo() const
Definition: Token.h:177
tok::PPKeywordKind getPPKeywordID() const
Return the preprocessor keyword ID for this identifier.
PresumedLoc getPresumedLoc(SourceLocation Loc, bool UseLineDirectives=true) const
Returns the "presumed" location of a SourceLocation specifies.