LLVM  9.0.0svn
TGLexer.h
Go to the documentation of this file.
1 //===- TGLexer.h - Lexer for TableGen Files ---------------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This class represents the Lexer for tablegen files.
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #ifndef LLVM_LIB_TABLEGEN_TGLEXER_H
14 #define LLVM_LIB_TABLEGEN_TGLEXER_H
15 
16 #include "llvm/ADT/ArrayRef.h"
17 #include "llvm/ADT/StringRef.h"
18 #include "llvm/ADT/StringSet.h"
19 #include "llvm/Support/DataTypes.h"
20 #include "llvm/Support/SMLoc.h"
21 #include <cassert>
22 #include <map>
23 #include <memory>
24 #include <string>
25 
26 namespace llvm {
27 class SourceMgr;
28 class SMLoc;
29 class Twine;
30 
31 namespace tgtok {
32  enum TokKind {
33  // Markers
35 
36  // Tokens with no info.
37  minus, plus, // - +
38  l_square, r_square, // [ ]
39  l_brace, r_brace, // { }
40  l_paren, r_paren, // ( )
41  less, greater, // < >
42  colon, semi, // : ;
43  comma, period, // , .
44  equal, question, // = ?
45  paste, // #
46 
47  // Keywords.
50 
51  // !keywords.
54  XNe, XLe, XLt, XGe, XGt,
55 
56  // Integer value.
58 
59  // Binary constant. Note that these are sized according to the number of
60  // bits given.
62 
63  // String valued tokens.
65 
66  // Preprocessing tokens for internal usage by the lexer.
67  // They are never returned as a result of Lex().
69  };
70 }
71 
72 /// TGLexer - TableGen Lexer class.
73 class TGLexer {
75 
76  const char *CurPtr;
77  StringRef CurBuf;
78 
79  // Information about the current token.
80  const char *TokStart;
81  tgtok::TokKind CurCode;
82  std::string CurStrVal; // This is valid for ID, STRVAL, VARNAME, CODEFRAGMENT
83  int64_t CurIntVal; // This is valid for INTVAL.
84 
85  /// CurBuffer - This is the current buffer index we're lexing from as managed
86  /// by the SourceMgr object.
87  unsigned CurBuffer;
88 
89 public:
90  typedef std::map<std::string, SMLoc> DependenciesMapTy;
91 private:
92  /// Dependencies - This is the list of all included files.
93  DependenciesMapTy Dependencies;
94 
95 public:
96  TGLexer(SourceMgr &SrcMgr, ArrayRef<std::string> Macros);
97 
99  return CurCode = LexToken(CurPtr == CurBuf.begin());
100  }
101 
102  const DependenciesMapTy &getDependencies() const {
103  return Dependencies;
104  }
105 
106  tgtok::TokKind getCode() const { return CurCode; }
107 
108  const std::string &getCurStrVal() const {
109  assert((CurCode == tgtok::Id || CurCode == tgtok::StrVal ||
110  CurCode == tgtok::VarName || CurCode == tgtok::CodeFragment) &&
111  "This token doesn't have a string value");
112  return CurStrVal;
113  }
114  int64_t getCurIntVal() const {
115  assert(CurCode == tgtok::IntVal && "This token isn't an integer");
116  return CurIntVal;
117  }
118  std::pair<int64_t, unsigned> getCurBinaryIntVal() const {
119  assert(CurCode == tgtok::BinaryIntVal &&
120  "This token isn't a binary integer");
121  return std::make_pair(CurIntVal, (CurPtr - TokStart)-2);
122  }
123 
124  SMLoc getLoc() const;
125 
126 private:
127  /// LexToken - Read the next token and return its code.
128  tgtok::TokKind LexToken(bool FileOrLineStart = false);
129 
130  tgtok::TokKind ReturnError(SMLoc Loc, const Twine &Msg);
131  tgtok::TokKind ReturnError(const char *Loc, const Twine &Msg);
132 
133  int getNextChar();
134  int peekNextChar(int Index) const;
135  void SkipBCPLComment();
136  bool SkipCComment();
137  tgtok::TokKind LexIdentifier();
138  bool LexInclude();
139  tgtok::TokKind LexString();
140  tgtok::TokKind LexVarName();
141  tgtok::TokKind LexNumber();
142  tgtok::TokKind LexBracket();
143  tgtok::TokKind LexExclaim();
144 
145  // Process EOF encountered in LexToken().
146  // If EOF is met in an include file, then the method will update
147  // CurPtr, CurBuf and preprocessing include stack, and return true.
148  // If EOF is met in the top-level file, then the method will
149  // update and check the preprocessing include stack, and return false.
150  bool processEOF();
151 
152  // *** Structures and methods for preprocessing support ***
153 
154  // A set of macro names that are defined either via command line or
155  // by using:
156  // #define NAME
157  StringSet<> DefinedMacros;
158 
159  // Each of #ifdef and #else directives has a descriptor associated
160  // with it.
161  //
162  // An ordered list of preprocessing controls defined by #ifdef/#else
163  // directives that are in effect currently is called preprocessing
164  // control stack. It is represented as a vector of PreprocessorControlDesc's.
165  //
166  // The control stack is updated according to the following rules:
167  //
168  // For each #ifdef we add an element to the control stack.
169  // For each #else we replace the top element with a descriptor
170  // with an inverted IsDefined value.
171  // For each #endif we pop the top element from the control stack.
172  //
173  // When CurPtr reaches the current buffer's end, the control stack
174  // must be empty, i.e. #ifdef and the corresponding #endif
175  // must be located in the same file.
176  struct PreprocessorControlDesc {
177  // Either tgtok::Ifdef or tgtok::Else.
179 
180  // True, if the condition for this directive is true, false - otherwise.
181  // Examples:
182  // #ifdef NAME : true, if NAME is defined, false - otherwise.
183  // ...
184  // #else : false, if NAME is defined, true - otherwise.
185  bool IsDefined;
186 
187  // Pointer into CurBuf to the beginning of the preprocessing directive
188  // word, e.g.:
189  // #ifdef NAME
190  // ^ - SrcPos
191  SMLoc SrcPos;
192  };
193 
194  // We want to disallow code like this:
195  // file1.td:
196  // #define NAME
197  // #ifdef NAME
198  // include "file2.td"
199  // EOF
200  // file2.td:
201  // #endif
202  // EOF
203  //
204  // To do this, we clear the preprocessing control stack on entry
205  // to each of the included file. PrepIncludeStack is used to store
206  // preprocessing control stacks for the current file and all its
207  // parent files. The back() element is the preprocessing control
208  // stack for the current file.
209  std::vector<std::unique_ptr<std::vector<PreprocessorControlDesc>>>
210  PrepIncludeStack;
211 
212  // Validate that the current preprocessing control stack is empty,
213  // since we are about to exit a file, and pop the include stack.
214  //
215  // If IncludeStackMustBeEmpty is true, the include stack must be empty
216  // after the popping, otherwise, the include stack must not be empty
217  // after the popping. Basically, the include stack must be empty
218  // only if we exit the "top-level" file (i.e. finish lexing).
219  //
220  // The method returns false, if the current preprocessing control stack
221  // is not empty (e.g. there is an unterminated #ifdef/#else),
222  // true - otherwise.
223  bool prepExitInclude(bool IncludeStackMustBeEmpty);
224 
225  // Look ahead for a preprocessing directive starting from CurPtr. The caller
226  // must only call this method, if *(CurPtr - 1) is '#'. If the method matches
227  // a preprocessing directive word followed by a whitespace, then it returns
228  // one of the internal token kinds, i.e. Ifdef, Else, Endif, Define.
229  //
230  // CurPtr is not adjusted by this method.
231  tgtok::TokKind prepIsDirective() const;
232 
233  // Given a preprocessing token kind, adjusts CurPtr to the end
234  // of the preprocessing directive word. Returns true, unless
235  // an unsupported token kind is passed in.
236  //
237  // We use look-ahead prepIsDirective() and prepEatPreprocessorDirective()
238  // to avoid adjusting CurPtr before we are sure that '#' is followed
239  // by a preprocessing directive. If it is not, then we fall back to
240  // tgtok::paste interpretation of '#'.
241  bool prepEatPreprocessorDirective(tgtok::TokKind Kind);
242 
243  // The main "exit" point from the token parsing to preprocessor.
244  //
245  // The method is called for CurPtr, when prepIsDirective() returns
246  // true. The first parameter matches the result of prepIsDirective(),
247  // denoting the actual preprocessor directive to be processed.
248  //
249  // If the preprocessing directive disables the tokens processing, e.g.:
250  // #ifdef NAME // NAME is undefined
251  // then lexPreprocessor() enters the lines-skipping mode.
252  // In this mode, it does not parse any tokens, because the code under
253  // the #ifdef may not even be a correct tablegen code. The preprocessor
254  // looks for lines containing other preprocessing directives, which
255  // may be prepended with whitespaces and C-style comments. If the line
256  // does not contain a preprocessing directive, it is skipped completely.
257  // Otherwise, the preprocessing directive is processed by recursively
258  // calling lexPreprocessor(). The processing of the encountered
259  // preprocessing directives includes updating preprocessing control stack
260  // and adding new macros into DefinedMacros set.
261  //
262  // The second parameter controls whether lexPreprocessor() is called from
263  // LexToken() (true) or recursively from lexPreprocessor() (false).
264  //
265  // If ReturnNextLiveToken is true, the method returns the next
266  // LEX token following the current directive or following the end
267  // of the disabled preprocessing region corresponding to this directive.
268  // If ReturnNextLiveToken is false, the method returns the first parameter,
269  // unless there were errors encountered in the disabled preprocessing
270  // region - in this case, it returns tgtok::Error.
271  tgtok::TokKind lexPreprocessor(tgtok::TokKind Kind,
272  bool ReturnNextLiveToken = true);
273 
274  // Worker method for lexPreprocessor() to skip lines after some
275  // preprocessing directive up to the buffer end or to the directive
276  // that re-enables token processing. The method returns true
277  // upon processing the next directive that re-enables tokens
278  // processing. False is returned if an error was encountered.
279  //
280  // Note that prepSkipRegion() calls lexPreprocessor() to process
281  // encountered preprocessing directives. In this case, the second
282  // parameter to lexPreprocessor() is set to false. Being passed
283  // false ReturnNextLiveToken, lexPreprocessor() must never call
284  // prepSkipRegion(). We assert this by passing ReturnNextLiveToken
285  // to prepSkipRegion() and checking that it is never set to false.
286  bool prepSkipRegion(bool MustNeverBeFalse);
287 
288  // Lex name of the macro after either #ifdef or #define. We could have used
289  // LexIdentifier(), but it has special handling of "include" word, which
290  // could result in awkward diagnostic errors. Consider:
291  // ----
292  // #ifdef include
293  // class ...
294  // ----
295  // LexIdentifier() will engage LexInclude(), which will complain about
296  // missing file with name "class". Instead, prepLexMacroName() will treat
297  // "include" as a normal macro name.
298  //
299  // On entry, CurPtr points to the end of a preprocessing directive word.
300  // The method allows for whitespaces between the preprocessing directive
301  // and the macro name. The allowed whitespaces are ' ' and '\t'.
302  //
303  // If the first non-whitespace symbol after the preprocessing directive
304  // is a valid start symbol for an identifier (i.e. [a-zA-Z_]), then
305  // the method updates TokStart to the position of the first non-whitespace
306  // symbol, sets CurPtr to the position of the macro name's last symbol,
307  // and returns a string reference to the macro name. Otherwise,
308  // TokStart is set to the first non-whitespace symbol after the preprocessing
309  // directive, and the method returns an empty string reference.
310  //
311  // In all cases, TokStart may be used to point to the word following
312  // the preprocessing directive.
313  StringRef prepLexMacroName();
314 
315  // Skip any whitespaces starting from CurPtr. The method is used
316  // only in the lines-skipping mode to find the first non-whitespace
317  // symbol after or at CurPtr. Allowed whitespaces are ' ', '\t', '\n'
318  // and '\r'. The method skips C-style comments as well, because
319  // it is used to find the beginning of the preprocessing directive.
320  // If we do not handle C-style comments the following code would
321  // result in incorrect detection of a preprocessing directive:
322  // /*
323  // #ifdef NAME
324  // */
325  // As long as we skip C-style comments, the following code is correctly
326  // recognized as a preprocessing directive:
327  // /* first line comment
328  // second line comment */ #ifdef NAME
329  //
330  // The method returns true upon reaching the first non-whitespace symbol
331  // or EOF, CurPtr is set to point to this symbol. The method returns false,
332  // if an error occured during skipping of a C-style comment.
333  bool prepSkipLineBegin();
334 
335  // Skip any whitespaces or comments after a preprocessing directive.
336  // The method returns true upon reaching either end of the line
337  // or end of the file. If there is a multiline C-style comment
338  // after the preprocessing directive, the method skips
339  // the comment, so the final CurPtr may point to one of the next lines.
340  // The method returns false, if an error occured during skipping
341  // C- or C++-style comment, or a non-whitespace symbol appears
342  // after the preprocessing directive.
343  //
344  // The method maybe called both during lines-skipping and tokens
345  // processing. It actually verifies that only whitespaces or/and
346  // comments follow a preprocessing directive.
347  //
348  // After the execution of this mehod, CurPtr points either to new line
349  // symbol, buffer end or non-whitespace symbol following the preprocesing
350  // directive.
351  bool prepSkipDirectiveEnd();
352 
353  // Skip all symbols to the end of the line/file.
354  // The method adjusts CurPtr, so that it points to either new line
355  // symbol in the current line or the buffer end.
356  void prepSkipToLineEnd();
357 
358  // Return true, if the current preprocessor control stack is such that
359  // we should allow lexer to process the next token, false - otherwise.
360  //
361  // In particular, the method returns true, if all the #ifdef/#else
362  // controls on the stack have their IsDefined member set to true.
363  bool prepIsProcessingEnabled();
364 
365  // Report an error, if we reach EOF with non-empty preprocessing control
366  // stack. This means there is no matching #endif for the previous
367  // #ifdef/#else.
368  void prepReportPreprocessorStackError();
369 };
370 
371 } // end namespace llvm
372 
373 #endif
This class represents lattice values for constants.
Definition: AllocatorList.h:23
SourceMgr SrcMgr
Definition: Error.cpp:23
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition: Twine.h:80
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory)...
Definition: APInt.h:32
const std::string & getCurStrVal() const
Definition: TGLexer.h:108
This owns the files read by a parser, handles include stacks, and handles diagnostic wrangling...
Definition: SourceMgr.h:41
tgtok::TokKind Lex()
Definition: TGLexer.h:98
iterator begin() const
Definition: StringRef.h:101
std::map< std::string, SMLoc > DependenciesMapTy
Definition: TGLexer.h:90
std::pair< int64_t, unsigned > getCurBinaryIntVal() const
Definition: TGLexer.h:118
tgtok::TokKind getCode() const
Definition: TGLexer.h:106
const unsigned Kind
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
const DependenciesMapTy & getDependencies() const
Definition: TGLexer.h:102
int64_t getCurIntVal() const
Definition: TGLexer.h:114
StringSet - A wrapper for StringMap that provides set-like functionality.
Definition: StringSet.h:27
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:48
Represents a location in source code.
Definition: SMLoc.h:23
TGLexer - TableGen Lexer class.
Definition: TGLexer.h:73