clang  5.0.0
Token.h
Go to the documentation of this file.
1 //===--- Token.h - Token interface ------------------------------*- C++ -*-===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This file defines the Token interface.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #ifndef LLVM_CLANG_LEX_TOKEN_H
15 #define LLVM_CLANG_LEX_TOKEN_H
16 
18 #include "clang/Basic/TokenKinds.h"
19 #include "llvm/ADT/StringRef.h"
20 #include <cassert>
21 
22 namespace clang {
23 
24 class IdentifierInfo;
25 
26 /// Token - This structure provides full information about a lexed token.
27 /// It is not intended to be space efficient, it is intended to return as much
28 /// information as possible about each returned token. This is expected to be
29 /// compressed into a smaller form if memory footprint is important.
30 ///
31 /// The parser can create a special "annotation token" representing a stream of
32 /// tokens that were parsed and semantically resolved, e.g.: "foo::MyClass<int>"
33 /// can be represented by a single typename annotation token that carries
34 /// information about the SourceRange of the tokens and the type object.
35 class Token {
36  /// The location of the token. This is actually a SourceLocation.
37  unsigned Loc;
38 
39  // Conceptually these next two fields could be in a union. However, this
40  // causes gcc 4.2 to pessimize LexTokenInternal, a very performance critical
41  // routine. Keeping as separate members with casts until a more beautiful fix
42  // presents itself.
43 
44  /// UintData - This holds either the length of the token text, when
45  /// a normal token, or the end of the SourceRange when an annotation
46  /// token.
47  unsigned UintData;
48 
49  /// PtrData - This is a union of four different pointer types, which depends
50  /// on what type of token this is:
51  /// Identifiers, keywords, etc:
52  /// This is an IdentifierInfo*, which contains the uniqued identifier
53  /// spelling.
54  /// Literals: isLiteral() returns true.
55  /// This is a pointer to the start of the token in a text buffer, which
56  /// may be dirty (have trigraphs / escaped newlines).
57  /// Annotations (resolved type names, C++ scopes, etc): isAnnotation().
58  /// This is a pointer to sema-specific data for the annotation token.
59  /// Eof:
60  // This is a pointer to a Decl.
61  /// Other:
62  /// This is null.
63  void *PtrData;
64 
65  /// Kind - The actual flavor of token this is.
67 
68  /// Flags - Bits we track about this token, members of the TokenFlags enum.
69  unsigned short Flags;
70 
71 public:
72  // Various flags set per token:
73  enum TokenFlags {
74  StartOfLine = 0x01, // At start of line or only after whitespace
75  // (considering the line after macro expansion).
76  LeadingSpace = 0x02, // Whitespace exists before this token (considering
77  // whitespace after macro expansion).
78  DisableExpand = 0x04, // This identifier may never be macro expanded.
79  NeedsCleaning = 0x08, // Contained an escaped newline or trigraph.
80  LeadingEmptyMacro = 0x10, // Empty macro exists before this token.
81  HasUDSuffix = 0x20, // This string or character literal has a ud-suffix.
82  HasUCN = 0x40, // This identifier contains a UCN.
83  IgnoredComma = 0x80, // This comma is not a macro argument separator (MS).
84  StringifiedInMacro = 0x100, // This string or character literal is formed by
85  // macro stringizing or charizing operator.
86  CommaAfterElided = 0x200, // The comma following this token was elided (MS).
87  IsEditorPlaceholder = 0x400, // This identifier is a placeholder.
88  };
89 
90  tok::TokenKind getKind() const { return Kind; }
91  void setKind(tok::TokenKind K) { Kind = K; }
92 
93  /// is/isNot - Predicates to check if this token is a specific kind, as in
94  /// "if (Tok.is(tok::l_brace)) {...}".
95  bool is(tok::TokenKind K) const { return Kind == K; }
96  bool isNot(tok::TokenKind K) const { return Kind != K; }
97  bool isOneOf(tok::TokenKind K1, tok::TokenKind K2) const {
98  return is(K1) || is(K2);
99  }
100  template <typename... Ts>
101  bool isOneOf(tok::TokenKind K1, tok::TokenKind K2, Ts... Ks) const {
102  return is(K1) || isOneOf(K2, Ks...);
103  }
104 
105  /// \brief Return true if this is a raw identifier (when lexing
106  /// in raw mode) or a non-keyword identifier (when lexing in non-raw mode).
107  bool isAnyIdentifier() const {
108  return tok::isAnyIdentifier(getKind());
109  }
110 
111  /// \brief Return true if this is a "literal", like a numeric
112  /// constant, string, etc.
113  bool isLiteral() const {
114  return tok::isLiteral(getKind());
115  }
116 
117  /// \brief Return true if this is any of tok::annot_* kind tokens.
118  bool isAnnotation() const {
119  return tok::isAnnotation(getKind());
120  }
121 
122  /// \brief Return a source location identifier for the specified
123  /// offset in the current file.
126  }
127  unsigned getLength() const {
128  assert(!isAnnotation() && "Annotation tokens have no length field");
129  return UintData;
130  }
131 
133  void setLength(unsigned Len) {
134  assert(!isAnnotation() && "Annotation tokens have no length field");
135  UintData = Len;
136  }
137 
139  assert(isAnnotation() && "Used AnnotEndLocID on non-annotation token");
140  return SourceLocation::getFromRawEncoding(UintData ? UintData : Loc);
141  }
143  assert(isAnnotation() && "Used AnnotEndLocID on non-annotation token");
144  UintData = L.getRawEncoding();
145  }
146 
149  }
150 
152  return isAnnotation() ? getAnnotationEndLoc()
154  }
155 
156  /// \brief SourceRange of the group of tokens that this annotation token
157  /// represents.
160  }
162  setLocation(R.getBegin());
164  }
165 
166  const char *getName() const { return tok::getTokenName(Kind); }
167 
168  /// \brief Reset all flags to cleared.
169  void startToken() {
170  Kind = tok::unknown;
171  Flags = 0;
172  PtrData = nullptr;
173  UintData = 0;
174  Loc = SourceLocation().getRawEncoding();
175  }
176 
178  assert(isNot(tok::raw_identifier) &&
179  "getIdentifierInfo() on a tok::raw_identifier token!");
180  assert(!isAnnotation() &&
181  "getIdentifierInfo() on an annotation token!");
182  if (isLiteral()) return nullptr;
183  if (is(tok::eof)) return nullptr;
184  return (IdentifierInfo*) PtrData;
185  }
187  PtrData = (void*) II;
188  }
189 
190  const void *getEofData() const {
191  assert(is(tok::eof));
192  return reinterpret_cast<const void *>(PtrData);
193  }
194  void setEofData(const void *D) {
195  assert(is(tok::eof));
196  assert(!PtrData);
197  PtrData = const_cast<void *>(D);
198  }
199 
200  /// getRawIdentifier - For a raw identifier token (i.e., an identifier
201  /// lexed in raw mode), returns a reference to the text substring in the
202  /// buffer if known.
203  StringRef getRawIdentifier() const {
204  assert(is(tok::raw_identifier));
205  return StringRef(reinterpret_cast<const char *>(PtrData), getLength());
206  }
207  void setRawIdentifierData(const char *Ptr) {
208  assert(is(tok::raw_identifier));
209  PtrData = const_cast<char*>(Ptr);
210  }
211 
212  /// getLiteralData - For a literal token (numeric constant, string, etc), this
213  /// returns a pointer to the start of it in the text buffer if known, null
214  /// otherwise.
215  const char *getLiteralData() const {
216  assert(isLiteral() && "Cannot get literal data of non-literal");
217  return reinterpret_cast<const char*>(PtrData);
218  }
219  void setLiteralData(const char *Ptr) {
220  assert(isLiteral() && "Cannot set literal data of non-literal");
221  PtrData = const_cast<char*>(Ptr);
222  }
223 
224  void *getAnnotationValue() const {
225  assert(isAnnotation() && "Used AnnotVal on non-annotation token");
226  return PtrData;
227  }
228  void setAnnotationValue(void *val) {
229  assert(isAnnotation() && "Used AnnotVal on non-annotation token");
230  PtrData = val;
231  }
232 
233  /// \brief Set the specified flag.
234  void setFlag(TokenFlags Flag) {
235  Flags |= Flag;
236  }
237 
238  /// \brief Get the specified flag.
239  bool getFlag(TokenFlags Flag) const {
240  return (Flags & Flag) != 0;
241  }
242 
243  /// \brief Unset the specified flag.
244  void clearFlag(TokenFlags Flag) {
245  Flags &= ~Flag;
246  }
247 
248  /// \brief Return the internal represtation of the flags.
249  ///
250  /// This is only intended for low-level operations such as writing tokens to
251  /// disk.
252  unsigned getFlags() const {
253  return Flags;
254  }
255 
256  /// \brief Set a flag to either true or false.
257  void setFlagValue(TokenFlags Flag, bool Val) {
258  if (Val)
259  setFlag(Flag);
260  else
261  clearFlag(Flag);
262  }
263 
264  /// isAtStartOfLine - Return true if this token is at the start of a line.
265  ///
266  bool isAtStartOfLine() const { return getFlag(StartOfLine); }
267 
268  /// \brief Return true if this token has whitespace before it.
269  ///
270  bool hasLeadingSpace() const { return getFlag(LeadingSpace); }
271 
272  /// \brief Return true if this identifier token should never
273  /// be expanded in the future, due to C99 6.10.3.4p2.
274  bool isExpandDisabled() const { return getFlag(DisableExpand); }
275 
276  /// \brief Return true if we have an ObjC keyword identifier.
277  bool isObjCAtKeyword(tok::ObjCKeywordKind objcKey) const;
278 
279  /// \brief Return the ObjC keyword kind.
281 
282  /// \brief Return true if this token has trigraphs or escaped newlines in it.
283  bool needsCleaning() const { return getFlag(NeedsCleaning); }
284 
285  /// \brief Return true if this token has an empty macro before it.
286  ///
288 
289  /// \brief Return true if this token is a string or character literal which
290  /// has a ud-suffix.
291  bool hasUDSuffix() const { return getFlag(HasUDSuffix); }
292 
293  /// Returns true if this token contains a universal character name.
294  bool hasUCN() const { return getFlag(HasUCN); }
295 
296  /// Returns true if this token is formed by macro by stringizing or charizing
297  /// operator.
299 
300  /// Returns true if the comma after this token was elided.
301  bool commaAfterElided() const { return getFlag(CommaAfterElided); }
302 
303  /// Returns true if this token is an editor placeholder.
304  ///
305  /// Editor placeholders are produced by the code-completion engine and are
306  /// represented as characters between '<#' and '#>' in the source code. The
307  /// lexer uses identifier tokens to represent placeholders.
309 };
310 
311 /// \brief Information about the conditional stack (\#if directives)
312 /// currently active.
314  /// \brief Location where the conditional started.
316 
317  /// \brief True if this was contained in a skipping directive, e.g.,
318  /// in a "\#if 0" block.
320 
321  /// \brief True if we have emitted tokens already, and now we're in
322  /// an \#else block or something. Only useful in Skipping blocks.
324 
325  /// \brief True if we've seen a \#else in this block. If so,
326  /// \#elif/\#else directives are not allowed.
327  bool FoundElse;
328 };
329 
330 } // end namespace clang
331 
332 namespace llvm {
333  template <>
334  struct isPodLike<clang::Token> { static const bool value = true; };
335 } // end namespace llvm
336 
337 #endif // LLVM_CLANG_LEX_TOKEN_H
bool isAtStartOfLine() const
isAtStartOfLine - Return true if this token is at the start of a line.
Definition: Token.h:266
tok::ObjCKeywordKind getObjCKeywordID() const
Return the ObjC keyword kind.
Definition: Lexer.cpp:55
SourceLocation getEnd() const
void setFlagValue(TokenFlags Flag, bool Val)
Set a flag to either true or false.
Definition: Token.h:257
const char * getName() const
Definition: Token.h:166
bool hasLeadingSpace() const
Return true if this token has whitespace before it.
Definition: Token.h:270
bool hasUCN() const
Returns true if this token contains a universal character name.
Definition: Token.h:294
void setFlag(TokenFlags Flag)
Set the specified flag.
Definition: Token.h:234
unsigned getRawEncoding() const
When a SourceLocation itself cannot be used, this returns an (opaque) 32-bit integer encoding for it...
bool needsCleaning() const
Return true if this token has trigraphs or escaped newlines in it.
Definition: Token.h:283
bool isAnyIdentifier() const
Return true if this is a raw identifier (when lexing in raw mode) or a non-keyword identifier (when l...
Definition: Token.h:107
One of these records is kept for each identifier that is lexed.
bool stringifiedInMacro() const
Returns true if this token is formed by macro by stringizing or charizing operator.
Definition: Token.h:298
void setRawIdentifierData(const char *Ptr)
Definition: Token.h:207
static SourceLocation getFromRawEncoding(unsigned Encoding)
Turn a raw encoding of a SourceLocation object into a real SourceLocation.
Token - This structure provides full information about a lexed token.
Definition: Token.h:35
void setKind(tok::TokenKind K)
Definition: Token.h:91
SourceLocation getLocWithOffset(int Offset) const
Return a source location with the specified offset from this SourceLocation.
tok::TokenKind getKind() const
Definition: Token.h:90
bool FoundNonSkip
True if we have emitted tokens already, and now we're in an #else block or something.
Definition: Token.h:323
void * getAnnotationValue() const
Definition: Token.h:224
StringRef getRawIdentifier() const
getRawIdentifier - For a raw identifier token (i.e., an identifier lexed in raw mode), returns a reference to the text substring in the buffer if known.
Definition: Token.h:203
const void * getEofData() const
Definition: Token.h:190
bool isEditorPlaceholder() const
Returns true if this token is an editor placeholder.
Definition: Token.h:308
void setAnnotationRange(SourceRange R)
Definition: Token.h:161
SourceRange getAnnotationRange() const
SourceRange of the group of tokens that this annotation token represents.
Definition: Token.h:158
void setAnnotationValue(void *val)
Definition: Token.h:228
void setEofData(const void *D)
Definition: Token.h:194
bool hasUDSuffix() const
Return true if this token is a string or character literal which has a ud-suffix. ...
Definition: Token.h:291
FormatToken * Token
SourceLocation getLocation() const
Return a source location identifier for the specified offset in the current file. ...
Definition: Token.h:124
bool isOneOf(tok::TokenKind K1, tok::TokenKind K2, Ts...Ks) const
Definition: Token.h:101
bool isNot(tok::TokenKind K) const
Definition: Token.h:96
Information about the conditional stack (#if directives) currently active.
Definition: Token.h:313
SourceLocation getAnnotationEndLoc() const
Definition: Token.h:138
ObjCKeywordKind
Provides a namespace for Objective-C keywords which start with an '@'.
Definition: TokenKinds.h:41
const char * getLiteralData() const
getLiteralData - For a literal token (numeric constant, string, etc), this returns a pointer to the s...
Definition: Token.h:215
Kind
bool WasSkipping
True if this was contained in a skipping directive, e.g., in a "\#if 0" block.
Definition: Token.h:319
Encodes a location in the source.
void setLength(unsigned Len)
Definition: Token.h:133
bool isAnnotation(TokenKind K)
Return true if this is any of tok::annot_* kinds.
Definition: TokenKinds.h:95
SourceLocation getEndLoc() const
Definition: Token.h:151
void setAnnotationEndLoc(SourceLocation L)
Definition: Token.h:142
bool isObjCAtKeyword(tok::ObjCKeywordKind objcKey) const
Return true if we have an ObjC keyword identifier.
Definition: Lexer.cpp:46
bool getFlag(TokenFlags Flag) const
Get the specified flag.
Definition: Token.h:239
void setIdentifierInfo(IdentifierInfo *II)
Definition: Token.h:186
bool isLiteral(TokenKind K)
Return true if this is a "literal" kind, like a numeric constant, string, etc.
Definition: TokenKinds.h:87
SourceLocation getLastLoc() const
Definition: Token.h:147
TokenKind
Provides a simple uniform namespace for tokens from all C languages.
Definition: TokenKinds.h:25
SourceLocation getBegin() const
bool is(tok::TokenKind K) const
is/isNot - Predicates to check if this token is a specific kind, as in "if (Tok.is(tok::l_brace)) {...
Definition: Token.h:95
bool commaAfterElided() const
Returns true if the comma after this token was elided.
Definition: Token.h:301
SourceLocation IfLoc
Location where the conditional started.
Definition: Token.h:315
unsigned getFlags() const
Return the internal represtation of the flags.
Definition: Token.h:252
void setLiteralData(const char *Ptr)
Definition: Token.h:219
bool isLiteral() const
Return true if this is a "literal", like a numeric constant, string, etc.
Definition: Token.h:113
bool isOneOf(tok::TokenKind K1, tok::TokenKind K2) const
Definition: Token.h:97
bool hasLeadingEmptyMacro() const
Return true if this token has an empty macro before it.
Definition: Token.h:287
Defines the clang::TokenKind enum and support functions.
Defines the clang::SourceLocation class and associated facilities.
const char * getTokenName(TokenKind Kind) LLVM_READNONE
Determines the name of a token as used within the front end.
Definition: TokenKinds.cpp:25
unsigned getLength() const
Definition: Token.h:127
void setLocation(SourceLocation L)
Definition: Token.h:132
A trivial tuple used to represent a source range.
void clearFlag(TokenFlags Flag)
Unset the specified flag.
Definition: Token.h:244
bool FoundElse
True if we've seen a #else in this block.
Definition: Token.h:327
bool isAnnotation() const
Return true if this is any of tok::annot_* kind tokens.
Definition: Token.h:118
bool isExpandDisabled() const
Return true if this identifier token should never be expanded in the future, due to C99 6...
Definition: Token.h:274
bool isAnyIdentifier(TokenKind K)
Return true if this is a raw identifier or an identifier kind.
Definition: TokenKinds.h:73
void startToken()
Reset all flags to cleared.
Definition: Token.h:169
IdentifierInfo * getIdentifierInfo() const
Definition: Token.h:177