clang  5.0.0
TokenLexer.cpp
Go to the documentation of this file.
1 //===--- TokenLexer.cpp - Lex from a token stream -------------------------===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This file implements the TokenLexer interface.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #include "clang/Lex/TokenLexer.h"
17 #include "clang/Lex/MacroArgs.h"
18 #include "clang/Lex/MacroInfo.h"
19 #include "clang/Lex/Preprocessor.h"
20 #include "llvm/ADT/SmallString.h"
21 
22 using namespace clang;
23 
24 /// Create a TokenLexer for the specified macro with the specified actual
25 /// arguments. Note that this ctor takes ownership of the ActualArgs pointer.
27  MacroArgs *Actuals) {
28  // If the client is reusing a TokenLexer, make sure to free any memory
29  // associated with it.
30  destroy();
31 
32  Macro = MI;
33  ActualArgs = Actuals;
34  CurToken = 0;
35 
36  ExpandLocStart = Tok.getLocation();
37  ExpandLocEnd = ELEnd;
38  AtStartOfLine = Tok.isAtStartOfLine();
39  HasLeadingSpace = Tok.hasLeadingSpace();
40  NextTokGetsSpace = false;
41  Tokens = &*Macro->tokens_begin();
42  OwnsTokens = false;
43  DisableMacroExpansion = false;
44  NumTokens = Macro->tokens_end()-Macro->tokens_begin();
45  MacroExpansionStart = SourceLocation();
46 
48  MacroStartSLocOffset = SM.getNextLocalOffset();
49 
50  if (NumTokens > 0) {
51  assert(Tokens[0].getLocation().isValid());
52  assert((Tokens[0].getLocation().isFileID() || Tokens[0].is(tok::comment)) &&
53  "Macro defined in macro?");
54  assert(ExpandLocStart.isValid());
55 
56  // Reserve a source location entry chunk for the length of the macro
57  // definition. Tokens that get lexed directly from the definition will
58  // have their locations pointing inside this chunk. This is to avoid
59  // creating separate source location entries for each token.
60  MacroDefStart = SM.getExpansionLoc(Tokens[0].getLocation());
61  MacroDefLength = Macro->getDefinitionLength(SM);
62  MacroExpansionStart = SM.createExpansionLoc(MacroDefStart,
63  ExpandLocStart,
64  ExpandLocEnd,
65  MacroDefLength);
66  }
67 
68  // If this is a function-like macro, expand the arguments and change
69  // Tokens to point to the expanded tokens.
70  if (Macro->isFunctionLike() && Macro->getNumParams())
71  ExpandFunctionArguments();
72 
73  // Mark the macro as currently disabled, so that it is not recursively
74  // expanded. The macro must be disabled only after argument pre-expansion of
75  // function-like macro arguments occurs.
76  Macro->DisableMacro();
77 }
78 
79 /// Create a TokenLexer for the specified token stream. This does not
80 /// take ownership of the specified token vector.
81 void TokenLexer::Init(const Token *TokArray, unsigned NumToks,
82  bool disableMacroExpansion, bool ownsTokens) {
83  // If the client is reusing a TokenLexer, make sure to free any memory
84  // associated with it.
85  destroy();
86 
87  Macro = nullptr;
88  ActualArgs = nullptr;
89  Tokens = TokArray;
90  OwnsTokens = ownsTokens;
91  DisableMacroExpansion = disableMacroExpansion;
92  NumTokens = NumToks;
93  CurToken = 0;
94  ExpandLocStart = ExpandLocEnd = SourceLocation();
95  AtStartOfLine = false;
96  HasLeadingSpace = false;
97  NextTokGetsSpace = false;
98  MacroExpansionStart = SourceLocation();
99 
100  // Set HasLeadingSpace/AtStartOfLine so that the first token will be
101  // returned unmodified.
102  if (NumToks != 0) {
103  AtStartOfLine = TokArray[0].isAtStartOfLine();
104  HasLeadingSpace = TokArray[0].hasLeadingSpace();
105  }
106 }
107 
108 void TokenLexer::destroy() {
109  // If this was a function-like macro that actually uses its arguments, delete
110  // the expanded tokens.
111  if (OwnsTokens) {
112  delete [] Tokens;
113  Tokens = nullptr;
114  OwnsTokens = false;
115  }
116 
117  // TokenLexer owns its formal arguments.
118  if (ActualArgs) ActualArgs->destroy(PP);
119 }
120 
121 bool TokenLexer::MaybeRemoveCommaBeforeVaArgs(
122  SmallVectorImpl<Token> &ResultToks, bool HasPasteOperator, MacroInfo *Macro,
123  unsigned MacroArgNo, Preprocessor &PP) {
124  // Is the macro argument __VA_ARGS__?
125  if (!Macro->isVariadic() || MacroArgNo != Macro->getNumParams()-1)
126  return false;
127 
128  // In Microsoft-compatibility mode, a comma is removed in the expansion
129  // of " ... , __VA_ARGS__ " if __VA_ARGS__ is empty. This extension is
130  // not supported by gcc.
131  if (!HasPasteOperator && !PP.getLangOpts().MSVCCompat)
132  return false;
133 
134  // GCC removes the comma in the expansion of " ... , ## __VA_ARGS__ " if
135  // __VA_ARGS__ is empty, but not in strict C99 mode where there are no
136  // named arguments, where it remains. In all other modes, including C99
137  // with GNU extensions, it is removed regardless of named arguments.
138  // Microsoft also appears to support this extension, unofficially.
139  if (PP.getLangOpts().C99 && !PP.getLangOpts().GNUMode
140  && Macro->getNumParams() < 2)
141  return false;
142 
143  // Is a comma available to be removed?
144  if (ResultToks.empty() || !ResultToks.back().is(tok::comma))
145  return false;
146 
147  // Issue an extension diagnostic for the paste operator.
148  if (HasPasteOperator)
149  PP.Diag(ResultToks.back().getLocation(), diag::ext_paste_comma);
150 
151  // Remove the comma.
152  ResultToks.pop_back();
153 
154  if (!ResultToks.empty()) {
155  // If the comma was right after another paste (e.g. "X##,##__VA_ARGS__"),
156  // then removal of the comma should produce a placemarker token (in C99
157  // terms) which we model by popping off the previous ##, giving us a plain
158  // "X" when __VA_ARGS__ is empty.
159  if (ResultToks.back().is(tok::hashhash))
160  ResultToks.pop_back();
161 
162  // Remember that this comma was elided.
163  ResultToks.back().setFlag(Token::CommaAfterElided);
164  }
165 
166  // Never add a space, even if the comma, ##, or arg had a space.
167  NextTokGetsSpace = false;
168  return true;
169 }
170 
171 /// Expand the arguments of a function-like macro so that we can quickly
172 /// return preexpanded tokens from Tokens.
173 void TokenLexer::ExpandFunctionArguments() {
174  SmallVector<Token, 128> ResultToks;
175 
176  // Loop through 'Tokens', expanding them into ResultToks. Keep
177  // track of whether we change anything. If not, no need to keep them. If so,
178  // we install the newly expanded sequence as the new 'Tokens' list.
179  bool MadeChange = false;
180 
181  for (unsigned i = 0, e = NumTokens; i != e; ++i) {
182  // If we found the stringify operator, get the argument stringified. The
183  // preprocessor already verified that the following token is a macro name
184  // when the #define was parsed.
185  const Token &CurTok = Tokens[i];
186  // We don't want a space for the next token after a paste
187  // operator. In valid code, the token will get smooshed onto the
188  // preceding one anyway. In assembler-with-cpp mode, invalid
189  // pastes are allowed through: in this case, we do not want the
190  // extra whitespace to be added. For example, we want ". ## foo"
191  // -> ".foo" not ". foo".
192  if (i != 0 && !Tokens[i-1].is(tok::hashhash) && CurTok.hasLeadingSpace())
193  NextTokGetsSpace = true;
194 
195  if (CurTok.isOneOf(tok::hash, tok::hashat)) {
196  int ArgNo = Macro->getParameterNum(Tokens[i+1].getIdentifierInfo());
197  assert(ArgNo != -1 && "Token following # is not an argument?");
198 
199  SourceLocation ExpansionLocStart =
200  getExpansionLocForMacroDefLoc(CurTok.getLocation());
201  SourceLocation ExpansionLocEnd =
202  getExpansionLocForMacroDefLoc(Tokens[i+1].getLocation());
203 
204  Token Res;
205  if (CurTok.is(tok::hash)) // Stringify
206  Res = ActualArgs->getStringifiedArgument(ArgNo, PP,
207  ExpansionLocStart,
208  ExpansionLocEnd);
209  else {
210  // 'charify': don't bother caching these.
211  Res = MacroArgs::StringifyArgument(ActualArgs->getUnexpArgument(ArgNo),
212  PP, true,
213  ExpansionLocStart,
214  ExpansionLocEnd);
215  }
217 
218  // The stringified/charified string leading space flag gets set to match
219  // the #/#@ operator.
220  if (NextTokGetsSpace)
222 
223  ResultToks.push_back(Res);
224  MadeChange = true;
225  ++i; // Skip arg name.
226  NextTokGetsSpace = false;
227  continue;
228  }
229 
230  // Find out if there is a paste (##) operator before or after the token.
231  bool NonEmptyPasteBefore =
232  !ResultToks.empty() && ResultToks.back().is(tok::hashhash);
233  bool PasteBefore = i != 0 && Tokens[i-1].is(tok::hashhash);
234  bool PasteAfter = i+1 != e && Tokens[i+1].is(tok::hashhash);
235  assert(!NonEmptyPasteBefore || PasteBefore);
236 
237  // Otherwise, if this is not an argument token, just add the token to the
238  // output buffer.
239  IdentifierInfo *II = CurTok.getIdentifierInfo();
240  int ArgNo = II ? Macro->getParameterNum(II) : -1;
241  if (ArgNo == -1) {
242  // This isn't an argument, just add it.
243  ResultToks.push_back(CurTok);
244 
245  if (NextTokGetsSpace) {
246  ResultToks.back().setFlag(Token::LeadingSpace);
247  NextTokGetsSpace = false;
248  } else if (PasteBefore && !NonEmptyPasteBefore)
249  ResultToks.back().clearFlag(Token::LeadingSpace);
250 
251  continue;
252  }
253 
254  // An argument is expanded somehow, the result is different than the
255  // input.
256  MadeChange = true;
257 
258  // Otherwise, this is a use of the argument.
259 
260  // In Microsoft mode, remove the comma before __VA_ARGS__ to ensure there
261  // are no trailing commas if __VA_ARGS__ is empty.
262  if (!PasteBefore && ActualArgs->isVarargsElidedUse() &&
263  MaybeRemoveCommaBeforeVaArgs(ResultToks,
264  /*HasPasteOperator=*/false,
265  Macro, ArgNo, PP))
266  continue;
267 
268  // If it is not the LHS/RHS of a ## operator, we must pre-expand the
269  // argument and substitute the expanded tokens into the result. This is
270  // C99 6.10.3.1p1.
271  if (!PasteBefore && !PasteAfter) {
272  const Token *ResultArgToks;
273 
274  // Only preexpand the argument if it could possibly need it. This
275  // avoids some work in common cases.
276  const Token *ArgTok = ActualArgs->getUnexpArgument(ArgNo);
277  if (ActualArgs->ArgNeedsPreexpansion(ArgTok, PP))
278  ResultArgToks = &ActualArgs->getPreExpArgument(ArgNo, Macro, PP)[0];
279  else
280  ResultArgToks = ArgTok; // Use non-preexpanded tokens.
281 
282  // If the arg token expanded into anything, append it.
283  if (ResultArgToks->isNot(tok::eof)) {
284  size_t FirstResult = ResultToks.size();
285  unsigned NumToks = MacroArgs::getArgLength(ResultArgToks);
286  ResultToks.append(ResultArgToks, ResultArgToks+NumToks);
287 
288  // In Microsoft-compatibility mode, we follow MSVC's preprocessing
289  // behavior by not considering single commas from nested macro
290  // expansions as argument separators. Set a flag on the token so we can
291  // test for this later when the macro expansion is processed.
292  if (PP.getLangOpts().MSVCCompat && NumToks == 1 &&
293  ResultToks.back().is(tok::comma))
294  ResultToks.back().setFlag(Token::IgnoredComma);
295 
296  // If the '##' came from expanding an argument, turn it into 'unknown'
297  // to avoid pasting.
298  for (Token &Tok : llvm::make_range(ResultToks.begin() + FirstResult,
299  ResultToks.end())) {
300  if (Tok.is(tok::hashhash))
301  Tok.setKind(tok::unknown);
302  }
303 
304  if(ExpandLocStart.isValid()) {
305  updateLocForMacroArgTokens(CurTok.getLocation(),
306  ResultToks.begin()+FirstResult,
307  ResultToks.end());
308  }
309 
310  // If any tokens were substituted from the argument, the whitespace
311  // before the first token should match the whitespace of the arg
312  // identifier.
313  ResultToks[FirstResult].setFlagValue(Token::LeadingSpace,
314  NextTokGetsSpace);
315  ResultToks[FirstResult].setFlagValue(Token::StartOfLine, false);
316  NextTokGetsSpace = false;
317  }
318  continue;
319  }
320 
321  // Okay, we have a token that is either the LHS or RHS of a paste (##)
322  // argument. It gets substituted as its non-pre-expanded tokens.
323  const Token *ArgToks = ActualArgs->getUnexpArgument(ArgNo);
324  unsigned NumToks = MacroArgs::getArgLength(ArgToks);
325  if (NumToks) { // Not an empty argument?
326  bool VaArgsPseudoPaste = false;
327  // If this is the GNU ", ## __VA_ARGS__" extension, and we just learned
328  // that __VA_ARGS__ expands to multiple tokens, avoid a pasting error when
329  // the expander trys to paste ',' with the first token of the __VA_ARGS__
330  // expansion.
331  if (NonEmptyPasteBefore && ResultToks.size() >= 2 &&
332  ResultToks[ResultToks.size()-2].is(tok::comma) &&
333  (unsigned)ArgNo == Macro->getNumParams()-1 &&
334  Macro->isVariadic()) {
335  VaArgsPseudoPaste = true;
336  // Remove the paste operator, report use of the extension.
337  PP.Diag(ResultToks.pop_back_val().getLocation(), diag::ext_paste_comma);
338  }
339 
340  ResultToks.append(ArgToks, ArgToks+NumToks);
341 
342  // If the '##' came from expanding an argument, turn it into 'unknown'
343  // to avoid pasting.
344  for (Token &Tok : llvm::make_range(ResultToks.end() - NumToks,
345  ResultToks.end())) {
346  if (Tok.is(tok::hashhash))
347  Tok.setKind(tok::unknown);
348  }
349 
350  if (ExpandLocStart.isValid()) {
351  updateLocForMacroArgTokens(CurTok.getLocation(),
352  ResultToks.end()-NumToks, ResultToks.end());
353  }
354 
355  // Transfer the leading whitespace information from the token
356  // (the macro argument) onto the first token of the
357  // expansion. Note that we don't do this for the GNU
358  // pseudo-paste extension ", ## __VA_ARGS__".
359  if (!VaArgsPseudoPaste) {
360  ResultToks[ResultToks.size() - NumToks].setFlagValue(Token::StartOfLine,
361  false);
362  ResultToks[ResultToks.size() - NumToks].setFlagValue(
363  Token::LeadingSpace, NextTokGetsSpace);
364  }
365 
366  NextTokGetsSpace = false;
367  continue;
368  }
369 
370  // If an empty argument is on the LHS or RHS of a paste, the standard (C99
371  // 6.10.3.3p2,3) calls for a bunch of placemarker stuff to occur. We
372  // implement this by eating ## operators when a LHS or RHS expands to
373  // empty.
374  if (PasteAfter) {
375  // Discard the argument token and skip (don't copy to the expansion
376  // buffer) the paste operator after it.
377  ++i;
378  continue;
379  }
380 
381  // If this is on the RHS of a paste operator, we've already copied the
382  // paste operator to the ResultToks list, unless the LHS was empty too.
383  // Remove it.
384  assert(PasteBefore);
385  if (NonEmptyPasteBefore) {
386  assert(ResultToks.back().is(tok::hashhash));
387  ResultToks.pop_back();
388  }
389 
390  // If this is the __VA_ARGS__ token, and if the argument wasn't provided,
391  // and if the macro had at least one real argument, and if the token before
392  // the ## was a comma, remove the comma. This is a GCC extension which is
393  // disabled when using -std=c99.
394  if (ActualArgs->isVarargsElidedUse())
395  MaybeRemoveCommaBeforeVaArgs(ResultToks,
396  /*HasPasteOperator=*/true,
397  Macro, ArgNo, PP);
398  }
399 
400  // If anything changed, install this as the new Tokens list.
401  if (MadeChange) {
402  assert(!OwnsTokens && "This would leak if we already own the token list");
403  // This is deleted in the dtor.
404  NumTokens = ResultToks.size();
405  // The tokens will be added to Preprocessor's cache and will be removed
406  // when this TokenLexer finishes lexing them.
407  Tokens = PP.cacheMacroExpandedTokens(this, ResultToks);
408 
409  // The preprocessor cache of macro expanded tokens owns these tokens,not us.
410  OwnsTokens = false;
411  }
412 }
413 
414 /// \brief Checks if two tokens form wide string literal.
415 static bool isWideStringLiteralFromMacro(const Token &FirstTok,
416  const Token &SecondTok) {
417  return FirstTok.is(tok::identifier) &&
418  FirstTok.getIdentifierInfo()->isStr("L") && SecondTok.isLiteral() &&
419  SecondTok.stringifiedInMacro();
420 }
421 
422 /// Lex - Lex and return a token from this macro stream.
423 ///
424 bool TokenLexer::Lex(Token &Tok) {
425  // Lexing off the end of the macro, pop this macro off the expansion stack.
426  if (isAtEnd()) {
427  // If this is a macro (not a token stream), mark the macro enabled now
428  // that it is no longer being expanded.
429  if (Macro) Macro->EnableMacro();
430 
431  Tok.startToken();
432  Tok.setFlagValue(Token::StartOfLine , AtStartOfLine);
433  Tok.setFlagValue(Token::LeadingSpace, HasLeadingSpace || NextTokGetsSpace);
434  if (CurToken == 0)
436  return PP.HandleEndOfTokenLexer(Tok);
437  }
438 
440 
441  // If this is the first token of the expanded result, we inherit spacing
442  // properties later.
443  bool isFirstToken = CurToken == 0;
444 
445  // Get the next token to return.
446  Tok = Tokens[CurToken++];
447 
448  bool TokenIsFromPaste = false;
449 
450  // If this token is followed by a token paste (##) operator, paste the tokens!
451  // Note that ## is a normal token when not expanding a macro.
452  if (!isAtEnd() && Macro &&
453  (Tokens[CurToken].is(tok::hashhash) ||
454  // Special processing of L#x macros in -fms-compatibility mode.
455  // Microsoft compiler is able to form a wide string literal from
456  // 'L#macro_arg' construct in a function-like macro.
457  (PP.getLangOpts().MSVCCompat &&
458  isWideStringLiteralFromMacro(Tok, Tokens[CurToken])))) {
459  // When handling the microsoft /##/ extension, the final token is
460  // returned by PasteTokens, not the pasted token.
461  if (PasteTokens(Tok))
462  return true;
463 
464  TokenIsFromPaste = true;
465  }
466 
467  // The token's current location indicate where the token was lexed from. We
468  // need this information to compute the spelling of the token, but any
469  // diagnostics for the expanded token should appear as if they came from
470  // ExpansionLoc. Pull this information together into a new SourceLocation
471  // that captures all of this.
472  if (ExpandLocStart.isValid() && // Don't do this for token streams.
473  // Check that the token's location was not already set properly.
474  SM.isBeforeInSLocAddrSpace(Tok.getLocation(), MacroStartSLocOffset)) {
475  SourceLocation instLoc;
476  if (Tok.is(tok::comment)) {
477  instLoc = SM.createExpansionLoc(Tok.getLocation(),
478  ExpandLocStart,
479  ExpandLocEnd,
480  Tok.getLength());
481  } else {
482  instLoc = getExpansionLocForMacroDefLoc(Tok.getLocation());
483  }
484 
485  Tok.setLocation(instLoc);
486  }
487 
488  // If this is the first token, set the lexical properties of the token to
489  // match the lexical properties of the macro identifier.
490  if (isFirstToken) {
491  Tok.setFlagValue(Token::StartOfLine , AtStartOfLine);
492  Tok.setFlagValue(Token::LeadingSpace, HasLeadingSpace);
493  } else {
494  // If this is not the first token, we may still need to pass through
495  // leading whitespace if we've expanded a macro.
496  if (AtStartOfLine) Tok.setFlag(Token::StartOfLine);
497  if (HasLeadingSpace) Tok.setFlag(Token::LeadingSpace);
498  }
499  AtStartOfLine = false;
500  HasLeadingSpace = false;
501 
502  // Handle recursive expansion!
503  if (!Tok.isAnnotation() && Tok.getIdentifierInfo() != nullptr) {
504  // Change the kind of this identifier to the appropriate token kind, e.g.
505  // turning "for" into a keyword.
506  IdentifierInfo *II = Tok.getIdentifierInfo();
507  Tok.setKind(II->getTokenID());
508 
509  // If this identifier was poisoned and from a paste, emit an error. This
510  // won't be handled by Preprocessor::HandleIdentifier because this is coming
511  // from a macro expansion.
512  if (II->isPoisoned() && TokenIsFromPaste) {
513  PP.HandlePoisonedIdentifier(Tok);
514  }
515 
516  if (!DisableMacroExpansion && II->isHandleIdentifierCase())
517  return PP.HandleIdentifier(Tok);
518  }
519 
520  // Otherwise, return a normal token.
521  return true;
522 }
523 
524 /// PasteTokens - Tok is the LHS of a ## operator, and CurToken is the ##
525 /// operator. Read the ## and RHS, and paste the LHS/RHS together. If there
526 /// are more ## after it, chomp them iteratively. Return the result as Tok.
527 /// If this returns true, the caller should immediately return the token.
528 bool TokenLexer::PasteTokens(Token &Tok) {
529  // MSVC: If previous token was pasted, this must be a recovery from an invalid
530  // paste operation. Ignore spaces before this token to mimic MSVC output.
531  // Required for generating valid UUID strings in some MS headers.
532  if (PP.getLangOpts().MicrosoftExt && (CurToken >= 2) &&
533  Tokens[CurToken - 2].is(tok::hashhash))
535 
537  const char *ResultTokStrPtr = nullptr;
538  SourceLocation StartLoc = Tok.getLocation();
539  SourceLocation PasteOpLoc;
540  do {
541  // Consume the ## operator if any.
542  PasteOpLoc = Tokens[CurToken].getLocation();
543  if (Tokens[CurToken].is(tok::hashhash))
544  ++CurToken;
545  assert(!isAtEnd() && "No token on the RHS of a paste operator!");
546 
547  // Get the RHS token.
548  const Token &RHS = Tokens[CurToken];
549 
550  // Allocate space for the result token. This is guaranteed to be enough for
551  // the two tokens.
552  Buffer.resize(Tok.getLength() + RHS.getLength());
553 
554  // Get the spelling of the LHS token in Buffer.
555  const char *BufPtr = &Buffer[0];
556  bool Invalid = false;
557  unsigned LHSLen = PP.getSpelling(Tok, BufPtr, &Invalid);
558  if (BufPtr != &Buffer[0]) // Really, we want the chars in Buffer!
559  memcpy(&Buffer[0], BufPtr, LHSLen);
560  if (Invalid)
561  return true;
562 
563  BufPtr = Buffer.data() + LHSLen;
564  unsigned RHSLen = PP.getSpelling(RHS, BufPtr, &Invalid);
565  if (Invalid)
566  return true;
567  if (RHSLen && BufPtr != &Buffer[LHSLen])
568  // Really, we want the chars in Buffer!
569  memcpy(&Buffer[LHSLen], BufPtr, RHSLen);
570 
571  // Trim excess space.
572  Buffer.resize(LHSLen+RHSLen);
573 
574  // Plop the pasted result (including the trailing newline and null) into a
575  // scratch buffer where we can lex it.
576  Token ResultTokTmp;
577  ResultTokTmp.startToken();
578 
579  // Claim that the tmp token is a string_literal so that we can get the
580  // character pointer back from CreateString in getLiteralData().
581  ResultTokTmp.setKind(tok::string_literal);
582  PP.CreateString(Buffer, ResultTokTmp);
583  SourceLocation ResultTokLoc = ResultTokTmp.getLocation();
584  ResultTokStrPtr = ResultTokTmp.getLiteralData();
585 
586  // Lex the resultant pasted token into Result.
587  Token Result;
588 
589  if (Tok.isAnyIdentifier() && RHS.isAnyIdentifier()) {
590  // Common paste case: identifier+identifier = identifier. Avoid creating
591  // a lexer and other overhead.
592  PP.IncrementPasteCounter(true);
593  Result.startToken();
594  Result.setKind(tok::raw_identifier);
595  Result.setRawIdentifierData(ResultTokStrPtr);
596  Result.setLocation(ResultTokLoc);
597  Result.setLength(LHSLen+RHSLen);
598  } else {
599  PP.IncrementPasteCounter(false);
600 
601  assert(ResultTokLoc.isFileID() &&
602  "Should be a raw location into scratch buffer");
604  FileID LocFileID = SourceMgr.getFileID(ResultTokLoc);
605 
606  bool Invalid = false;
607  const char *ScratchBufStart
608  = SourceMgr.getBufferData(LocFileID, &Invalid).data();
609  if (Invalid)
610  return false;
611 
612  // Make a lexer to lex this string from. Lex just this one token.
613  // Make a lexer object so that we lex and expand the paste result.
614  Lexer TL(SourceMgr.getLocForStartOfFile(LocFileID),
615  PP.getLangOpts(), ScratchBufStart,
616  ResultTokStrPtr, ResultTokStrPtr+LHSLen+RHSLen);
617 
618  // Lex a token in raw mode. This way it won't look up identifiers
619  // automatically, lexing off the end will return an eof token, and
620  // warnings are disabled. This returns true if the result token is the
621  // entire buffer.
622  bool isInvalid = !TL.LexFromRawLexer(Result);
623 
624  // If we got an EOF token, we didn't form even ONE token. For example, we
625  // did "/ ## /" to get "//".
626  isInvalid |= Result.is(tok::eof);
627 
628  // If pasting the two tokens didn't form a full new token, this is an
629  // error. This occurs with "x ## +" and other stuff. Return with Tok
630  // unmodified and with RHS as the next token to lex.
631  if (isInvalid) {
632  // Explicitly convert the token location to have proper expansion
633  // information so that the user knows where it came from.
635  SourceLocation Loc =
636  SM.createExpansionLoc(PasteOpLoc, ExpandLocStart, ExpandLocEnd, 2);
637 
638  // Test for the Microsoft extension of /##/ turning into // here on the
639  // error path.
640  if (PP.getLangOpts().MicrosoftExt && Tok.is(tok::slash) &&
641  RHS.is(tok::slash)) {
642  HandleMicrosoftCommentPaste(Tok, Loc);
643  return true;
644  }
645 
646  // Do not emit the error when preprocessing assembler code.
647  if (!PP.getLangOpts().AsmPreprocessor) {
648  // If we're in microsoft extensions mode, downgrade this from a hard
649  // error to an extension that defaults to an error. This allows
650  // disabling it.
651  PP.Diag(Loc, PP.getLangOpts().MicrosoftExt ? diag::ext_pp_bad_paste_ms
652  : diag::err_pp_bad_paste)
653  << Buffer;
654  }
655 
656  // An error has occurred so exit loop.
657  break;
658  }
659 
660  // Turn ## into 'unknown' to avoid # ## # from looking like a paste
661  // operator.
662  if (Result.is(tok::hashhash))
663  Result.setKind(tok::unknown);
664  }
665 
666  // Transfer properties of the LHS over the Result.
669 
670  // Finally, replace LHS with the result, consume the RHS, and iterate.
671  ++CurToken;
672  Tok = Result;
673  } while (!isAtEnd() && Tokens[CurToken].is(tok::hashhash));
674 
675  SourceLocation EndLoc = Tokens[CurToken - 1].getLocation();
676 
677  // The token's current location indicate where the token was lexed from. We
678  // need this information to compute the spelling of the token, but any
679  // diagnostics for the expanded token should appear as if the token was
680  // expanded from the full ## expression. Pull this information together into
681  // a new SourceLocation that captures all of this.
682  SourceManager &SM = PP.getSourceManager();
683  if (StartLoc.isFileID())
684  StartLoc = getExpansionLocForMacroDefLoc(StartLoc);
685  if (EndLoc.isFileID())
686  EndLoc = getExpansionLocForMacroDefLoc(EndLoc);
687  FileID MacroFID = SM.getFileID(MacroExpansionStart);
688  while (SM.getFileID(StartLoc) != MacroFID)
689  StartLoc = SM.getImmediateExpansionRange(StartLoc).first;
690  while (SM.getFileID(EndLoc) != MacroFID)
691  EndLoc = SM.getImmediateExpansionRange(EndLoc).second;
692 
693  Tok.setLocation(SM.createExpansionLoc(Tok.getLocation(), StartLoc, EndLoc,
694  Tok.getLength()));
695 
696  // Now that we got the result token, it will be subject to expansion. Since
697  // token pasting re-lexes the result token in raw mode, identifier information
698  // isn't looked up. As such, if the result is an identifier, look up id info.
699  if (Tok.is(tok::raw_identifier)) {
700  // Look up the identifier info for the token. We disabled identifier lookup
701  // by saying we're skipping contents, so we need to do this manually.
702  PP.LookUpIdentifierInfo(Tok);
703  }
704  return false;
705 }
706 
707 /// isNextTokenLParen - If the next token lexed will pop this macro off the
708 /// expansion stack, return 2. If the next unexpanded token is a '(', return
709 /// 1, otherwise return 0.
711  // Out of tokens?
712  if (isAtEnd())
713  return 2;
714  return Tokens[CurToken].is(tok::l_paren);
715 }
716 
717 /// isParsingPreprocessorDirective - Return true if we are in the middle of a
718 /// preprocessor directive.
720  return Tokens[NumTokens-1].is(tok::eod) && !isAtEnd();
721 }
722 
723 /// HandleMicrosoftCommentPaste - In microsoft compatibility mode, /##/ pastes
724 /// together to form a comment that comments out everything in the current
725 /// macro, other active macros, and anything left on the current physical
726 /// source line of the expanded buffer. Handle this by returning the
727 /// first token on the next line.
728 void TokenLexer::HandleMicrosoftCommentPaste(Token &Tok, SourceLocation OpLoc) {
729  PP.Diag(OpLoc, diag::ext_comment_paste_microsoft);
730 
731  // We 'comment out' the rest of this macro by just ignoring the rest of the
732  // tokens that have not been lexed yet, if any.
733 
734  // Since this must be a macro, mark the macro enabled now that it is no longer
735  // being expanded.
736  assert(Macro && "Token streams can't paste comments");
737  Macro->EnableMacro();
738 
740 }
741 
742 /// \brief If \arg loc is a file ID and points inside the current macro
743 /// definition, returns the appropriate source location pointing at the
744 /// macro expansion source location entry, otherwise it returns an invalid
745 /// SourceLocation.
747 TokenLexer::getExpansionLocForMacroDefLoc(SourceLocation loc) const {
748  assert(ExpandLocStart.isValid() && MacroExpansionStart.isValid() &&
749  "Not appropriate for token streams");
750  assert(loc.isValid() && loc.isFileID());
751 
752  SourceManager &SM = PP.getSourceManager();
753  assert(SM.isInSLocAddrSpace(loc, MacroDefStart, MacroDefLength) &&
754  "Expected loc to come from the macro definition");
755 
756  unsigned relativeOffset = 0;
757  SM.isInSLocAddrSpace(loc, MacroDefStart, MacroDefLength, &relativeOffset);
758  return MacroExpansionStart.getLocWithOffset(relativeOffset);
759 }
760 
761 /// \brief Finds the tokens that are consecutive (from the same FileID)
762 /// creates a single SLocEntry, and assigns SourceLocations to each token that
763 /// point to that SLocEntry. e.g for
764 /// assert(foo == bar);
765 /// There will be a single SLocEntry for the "foo == bar" chunk and locations
766 /// for the 'foo', '==', 'bar' tokens will point inside that chunk.
767 ///
768 /// \arg begin_tokens will be updated to a position past all the found
769 /// consecutive tokens.
771  SourceLocation InstLoc,
772  Token *&begin_tokens,
773  Token * end_tokens) {
774  assert(begin_tokens < end_tokens);
775 
776  SourceLocation FirstLoc = begin_tokens->getLocation();
777  SourceLocation CurLoc = FirstLoc;
778 
779  // Compare the source location offset of tokens and group together tokens that
780  // are close, even if their locations point to different FileIDs. e.g.
781  //
782  // |bar | foo | cake | (3 tokens from 3 consecutive FileIDs)
783  // ^ ^
784  // |bar foo cake| (one SLocEntry chunk for all tokens)
785  //
786  // we can perform this "merge" since the token's spelling location depends
787  // on the relative offset.
788 
789  Token *NextTok = begin_tokens + 1;
790  for (; NextTok < end_tokens; ++NextTok) {
791  SourceLocation NextLoc = NextTok->getLocation();
792  if (CurLoc.isFileID() != NextLoc.isFileID())
793  break; // Token from different kind of FileID.
794 
795  int RelOffs;
796  if (!SM.isInSameSLocAddrSpace(CurLoc, NextLoc, &RelOffs))
797  break; // Token from different local/loaded location.
798  // Check that token is not before the previous token or more than 50
799  // "characters" away.
800  if (RelOffs < 0 || RelOffs > 50)
801  break;
802 
803  if (CurLoc.isMacroID() && !SM.isWrittenInSameFile(CurLoc, NextLoc))
804  break; // Token from a different macro.
805 
806  CurLoc = NextLoc;
807  }
808 
809  // For the consecutive tokens, find the length of the SLocEntry to contain
810  // all of them.
811  Token &LastConsecutiveTok = *(NextTok-1);
812  int LastRelOffs = 0;
813  SM.isInSameSLocAddrSpace(FirstLoc, LastConsecutiveTok.getLocation(),
814  &LastRelOffs);
815  unsigned FullLength = LastRelOffs + LastConsecutiveTok.getLength();
816 
817  // Create a macro expansion SLocEntry that will "contain" all of the tokens.
818  SourceLocation Expansion =
819  SM.createMacroArgExpansionLoc(FirstLoc, InstLoc,FullLength);
820 
821  // Change the location of the tokens from the spelling location to the new
822  // expanded location.
823  for (; begin_tokens < NextTok; ++begin_tokens) {
824  Token &Tok = *begin_tokens;
825  int RelOffs = 0;
826  SM.isInSameSLocAddrSpace(FirstLoc, Tok.getLocation(), &RelOffs);
827  Tok.setLocation(Expansion.getLocWithOffset(RelOffs));
828  }
829 }
830 
831 /// \brief Creates SLocEntries and updates the locations of macro argument
832 /// tokens to their new expanded locations.
833 ///
834 /// \param ArgIdDefLoc the location of the macro argument id inside the macro
835 /// definition.
836 /// \param Tokens the macro argument tokens to update.
837 void TokenLexer::updateLocForMacroArgTokens(SourceLocation ArgIdSpellLoc,
838  Token *begin_tokens,
839  Token *end_tokens) {
840  SourceManager &SM = PP.getSourceManager();
841 
842  SourceLocation InstLoc =
843  getExpansionLocForMacroDefLoc(ArgIdSpellLoc);
844 
845  while (begin_tokens < end_tokens) {
846  // If there's only one token just create a SLocEntry for it.
847  if (end_tokens - begin_tokens == 1) {
848  Token &Tok = *begin_tokens;
850  InstLoc,
851  Tok.getLength()));
852  return;
853  }
854 
855  updateConsecutiveMacroArgTokens(SM, InstLoc, begin_tokens, end_tokens);
856  }
857 }
858 
859 void TokenLexer::PropagateLineStartLeadingSpaceInfo(Token &Result) {
860  AtStartOfLine = Result.isAtStartOfLine();
861  HasLeadingSpace = Result.hasLeadingSpace();
862 }
bool isAtStartOfLine() const
isAtStartOfLine - Return true if this token is at the start of a line.
Definition: Token.h:266
SourceManager & getSourceManager() const
Definition: Preprocessor.h:729
bool isPoisoned() const
Return true if this token has been poisoned.
Lexer - This provides a simple interface that turns a text buffer into a stream of tokens...
Definition: Lexer.h:46
void DisableMacro()
Definition: MacroInfo.h:259
void Init(Token &Tok, SourceLocation ILEnd, MacroInfo *MI, MacroArgs *ActualArgs)
Init - Initialize this TokenLexer to expand from the specified macro with the specified argument info...
Definition: TokenLexer.cpp:26
bool isMacroID() const
void setFlagValue(TokenFlags Flag, bool Val)
Set a flag to either true or false.
Definition: Token.h:257
unsigned isNextTokenLParen() const
isNextTokenLParen - If the next token lexed will pop this macro off the expansion stack...
Definition: TokenLexer.cpp:710
Defines the SourceManager interface.
unsigned getNextLocalOffset() const
static void updateConsecutiveMacroArgTokens(SourceManager &SM, SourceLocation InstLoc, Token *&begin_tokens, Token *end_tokens)
Finds the tokens that are consecutive (from the same FileID) creates a single SLocEntry, and assigns SourceLocations to each token that point to that SLocEntry.
Definition: TokenLexer.cpp:770
bool isParsingPreprocessorDirective() const
isParsingPreprocessorDirective - Return true if we are in the middle of a preprocessor directive...
Definition: TokenLexer.cpp:719
Defines the clang::MacroInfo and clang::MacroDirective classes.
bool hasLeadingSpace() const
Return true if this token has whitespace before it.
Definition: Token.h:270
std::unique_ptr< llvm::MemoryBuffer > Buffer
void setFlag(TokenFlags Flag)
Set the specified flag.
Definition: Token.h:234
bool isVarargsElidedUse() const
isVarargsElidedUse - Return true if this is a C99 style varargs macro invocation and there was no arg...
Definition: MacroArgs.h:109
bool isAnyIdentifier() const
Return true if this is a raw identifier (when lexing in raw mode) or a non-keyword identifier (when l...
Definition: Token.h:107
StringRef getSpelling(SourceLocation loc, SmallVectorImpl< char > &buffer, bool *invalid=nullptr) const
Return the 'spelling' of the token at the given location; does not go up to the spelling location or ...
bool HandleEndOfTokenLexer(Token &Result)
Callback invoked when the current TokenLexer hits the end of its token stream.
One of these records is kept for each identifier that is lexed.
bool stringifiedInMacro() const
Returns true if this token is formed by macro by stringizing or charizing operator.
Definition: Token.h:298
void setRawIdentifierData(const char *Ptr)
Definition: Token.h:207
bool isFileID() const
const LangOptions & getLangOpts() const
Definition: Preprocessor.h:725
Token - This structure provides full information about a lexed token.
Definition: Token.h:35
unsigned getDefinitionLength(const SourceManager &SM) const
Get length in characters of the macro definition.
Definition: MacroInfo.h:126
void setKind(tok::TokenKind K)
Definition: Token.h:91
bool Lex(Token &Tok)
Lex - Lex and return a token from this macro stream.
Definition: TokenLexer.cpp:424
SourceLocation getLocWithOffset(int Offset) const
Return a source location with the specified offset from this SourceLocation.
tok::TokenKind getTokenID() const
If this is a source-language token (e.g.
const std::vector< Token > & getPreExpArgument(unsigned Arg, const MacroInfo *MI, Preprocessor &PP)
getPreExpArgument - Return the pre-expanded form of the specified argument.
Definition: MacroArgs.cpp:149
void destroy(Preprocessor &PP)
destroy - Destroy and deallocate the memory for this object.
Definition: MacroArgs.cpp:75
const Token * getUnexpArgument(unsigned Arg) const
getUnexpArgument - Return a pointer to the first token of the unexpanded token list for the specified...
Definition: MacroArgs.cpp:115
tokens_iterator tokens_begin() const
Definition: MacroInfo.h:236
unsigned getNumParams() const
Definition: MacroInfo.h:176
bool isVariadic() const
Definition: MacroInfo.h:201
DiagnosticBuilder Diag(SourceLocation Loc, unsigned DiagID) const
Forwarding function for diagnostics.
const Token & getStringifiedArgument(unsigned ArgNo, Preprocessor &PP, SourceLocation ExpansionLocStart, SourceLocation ExpansionLocEnd)
getStringifiedArgument - Compute, cache, and return the specified argument that has been 'stringified...
Definition: MacroArgs.cpp:299
static bool isWideStringLiteralFromMacro(const Token &FirstTok, const Token &SecondTok)
Checks if two tokens form wide string literal.
Definition: TokenLexer.cpp:415
FileID getFileID(SourceLocation SpellingLoc) const
Return the FileID for a SourceLocation.
static Token StringifyArgument(const Token *ArgToks, Preprocessor &PP, bool Charify, SourceLocation ExpansionLocStart, SourceLocation ExpansionLocEnd)
StringifyArgument - Implement C99 6.10.3.2p2, converting a sequence of tokens into the literal string...
Definition: MacroArgs.cpp:196
bool isWrittenInSameFile(SourceLocation Loc1, SourceLocation Loc2) const
Returns true if the spelling locations for both SourceLocations are part of the same file buffer...
MacroArgs - An instance of this class captures information about the formal arguments specified to a ...
Definition: MacroArgs.h:29
SourceLocation createMacroArgExpansionLoc(SourceLocation Loc, SourceLocation ExpansionLoc, unsigned TokLength)
Return a new SourceLocation that encodes the fact that a token from SpellingLoc should actually be re...
Defines the clang::Preprocessor interface.
void HandleMicrosoftCommentPaste(Token &Tok)
When the macro expander pastes together a comment (/##/) in Microsoft mode, this method handles updat...
void IncrementPasteCounter(bool isFast)
Increment the counters for the number of token paste operations performed.
SourceLocation getLocation() const
Return a source location identifier for the specified offset in the current file. ...
Definition: Token.h:124
bool isNot(tok::TokenKind K) const
Definition: Token.h:96
SourceLocation createExpansionLoc(SourceLocation Loc, SourceLocation ExpansionLocStart, SourceLocation ExpansionLocEnd, unsigned TokLength, int LoadedID=0, unsigned LoadedOffset=0)
Return a new SourceLocation that encodes the fact that a token from SpellingLoc should actually be re...
The result type of a method or function.
bool isBeforeInSLocAddrSpace(SourceLocation LHS, SourceLocation RHS) const
Determines the order of 2 source locations in the "source location address space".
const SourceManager & SM
Definition: Format.cpp:1293
const char * getLiteralData() const
getLiteralData - For a literal token (numeric constant, string, etc), this returns a pointer to the s...
Definition: Token.h:215
bool isHandleIdentifierCase() const
Return true if the Preprocessor::HandleIdentifier must be called on a token of this identifier...
Encodes a location in the source.
void setLength(unsigned Len)
Definition: Token.h:133
bool isValid() const
Return true if this is a valid SourceLocation object.
bool ArgNeedsPreexpansion(const Token *ArgTok, Preprocessor &PP) const
ArgNeedsPreexpansion - If we can prove that the argument won't be affected by pre-expansion, return false.
Definition: MacroArgs.cpp:133
bool is(tok::TokenKind K) const
is/isNot - Predicates to check if this token is a specific kind, as in "if (Tok.is(tok::l_brace)) {...
Definition: Token.h:95
bool isStr(const char(&Str)[StrLen]) const
Return true if this is the identifier for the specified string.
An opaque identifier used by SourceManager which refers to a source file (MemoryBuffer) along with it...
static bool isInvalid(LocType Loc, bool *Invalid)
bool isInSLocAddrSpace(SourceLocation Loc, SourceLocation Start, unsigned Length, unsigned *RelativeOffset=nullptr) const
Returns true if Loc is inside the [Start, +Length) chunk of the source location address space...
std::pair< SourceLocation, SourceLocation > getImmediateExpansionRange(SourceLocation Loc) const
Return the start/end of the expansion information for an expansion location.
static unsigned getArgLength(const Token *ArgPtr)
getArgLength - Given a pointer to an expanded or unexpanded argument, return the number of tokens...
Definition: MacroArgs.cpp:105
SourceMgr(SourceMgr)
bool isLiteral() const
Return true if this is a "literal", like a numeric constant, string, etc.
Definition: Token.h:113
bool isFunctionLike() const
Definition: MacroInfo.h:193
Encapsulates the data about a macro definition (e.g.
Definition: MacroInfo.h:34
bool isOneOf(tok::TokenKind K1, tok::TokenKind K2) const
Definition: Token.h:97
bool isInSameSLocAddrSpace(SourceLocation LHS, SourceLocation RHS, int *RelativeOffset) const
Return true if both LHS and RHS are in the local source location address space or the loaded one...
bool HandleIdentifier(Token &Identifier)
Callback invoked when the lexer reads an identifier and has filled in the tokens IdentifierInfo membe...
void CreateString(StringRef Str, Token &Tok, SourceLocation ExpansionLocStart=SourceLocation(), SourceLocation ExpansionLocEnd=SourceLocation())
Plop the specified string into a scratch buffer and set the specified token's location and length to ...
tokens_iterator tokens_end() const
Definition: MacroInfo.h:237
void HandlePoisonedIdentifier(Token &Tok)
Display reason for poisoned identifier.
IdentifierInfo * LookUpIdentifierInfo(Token &Identifier) const
Given a tok::raw_identifier token, look up the identifier information for the token and install it in...
int getParameterNum(const IdentifierInfo *Arg) const
Return the parameter number of the specified identifier, or -1 if the identifier is not a formal para...
Definition: MacroInfo.h:183
unsigned getLength() const
Definition: Token.h:127
void setLocation(SourceLocation L)
Definition: Token.h:132
void EnableMacro()
Definition: MacroInfo.h:254
void clearFlag(TokenFlags Flag)
Unset the specified flag.
Definition: Token.h:244
SourceLocation getExpansionLoc(SourceLocation Loc) const
Given a SourceLocation object Loc, return the expansion location referenced by the ID...
bool isAnnotation() const
Return true if this is any of tok::annot_* kind tokens.
Definition: Token.h:118
This class handles loading and caching of source files into memory.
void startToken()
Reset all flags to cleared.
Definition: Token.h:169
Engages in a tight little dance with the lexer to efficiently preprocess tokens.
Definition: Preprocessor.h:98
IdentifierInfo * getIdentifierInfo() const
Definition: Token.h:177