LLVM 22.0.0git
TGLexer.h
Go to the documentation of this file.
1//===- TGLexer.h - Lexer for TableGen Files ---------------------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This class represents the Lexer for tablegen files.
10//
11//===----------------------------------------------------------------------===//
12
13#ifndef LLVM_LIB_TABLEGEN_TGLEXER_H
14#define LLVM_LIB_TABLEGEN_TGLEXER_H
15
17#include "llvm/ADT/StringRef.h"
18#include "llvm/ADT/StringSet.h"
20#include "llvm/Support/SMLoc.h"
21#include <cassert>
22#include <set>
23#include <string>
24
25namespace llvm {
26template <typename T> class ArrayRef;
27class SourceMgr;
28class Twine;
29
30namespace tgtok {
31enum TokKind {
32 // Markers
35
36 // Tokens with no info.
37 minus, // -
38 plus, // +
41 l_brace, // {
42 r_brace, // }
43 l_paren, // (
44 r_paren, // )
45 less, // <
46 greater, // >
47 colon, // :
48 semi, // ;
49 comma, // ,
50 dot, // .
51 equal, // =
53 paste, // #
54 dotdotdot, // ...
55
56 // Boolean literals.
59
60 // Integer value.
62
63 // Binary constant. Note that these are sized according to the number of
64 // bits given.
66
67 // Preprocessing tokens for internal usage by the lexer.
68 // They are never returned as a result of Lex().
74
75 // Reserved keywords. ('ElseKW' is named to distinguish it from the
76 // existing 'Else' that means the preprocessor #else.)
89
90 // Object start tokens.
105
106 // Bang operators.
165
166 // String valued tokens.
173};
174
175/// isBangOperator - Return true if this is a bang operator.
176static inline bool isBangOperator(tgtok::TokKind Kind) {
177 return tgtok::BANG_OPERATOR_FIRST <= Kind && Kind <= BANG_OPERATOR_LAST;
178}
179
180/// isObjectStart - Return true if this is a valid first token for a statement.
181static inline bool isObjectStart(tgtok::TokKind Kind) {
182 return tgtok::OBJECT_START_FIRST <= Kind && Kind <= OBJECT_START_LAST;
183}
184
185/// isStringValue - Return true if this is a string value.
186static inline bool isStringValue(tgtok::TokKind Kind) {
187 return tgtok::STRING_VALUE_FIRST <= Kind && Kind <= STRING_VALUE_LAST;
188}
189} // namespace tgtok
190
191/// TGLexer - TableGen Lexer class.
192class TGLexer {
193 SourceMgr &SrcMgr;
194
195 const char *CurPtr = nullptr;
196 StringRef CurBuf;
197
198 // Information about the current token.
199 const char *TokStart = nullptr;
201 std::string CurStrVal; // This is valid for Id, StrVal, VarName, CodeFragment
202 int64_t CurIntVal = 0; // This is valid for IntVal.
203
204 /// CurBuffer - This is the current buffer index we're lexing from as managed
205 /// by the SourceMgr object.
206 unsigned CurBuffer = 0;
207
208public:
209 typedef std::set<std::string> DependenciesSetTy;
210
211private:
212 /// Dependencies - This is the list of all included files.
213 DependenciesSetTy Dependencies;
214
215public:
216 TGLexer(SourceMgr &SrcMgr, ArrayRef<std::string> Macros);
217
218 tgtok::TokKind Lex() { return CurCode = LexToken(CurPtr == CurBuf.begin()); }
219
220 const DependenciesSetTy &getDependencies() const { return Dependencies; }
221
222 tgtok::TokKind getCode() const { return CurCode; }
223
224 const std::string &getCurStrVal() const {
225 assert(tgtok::isStringValue(CurCode) &&
226 "This token doesn't have a string value");
227 return CurStrVal;
228 }
229 int64_t getCurIntVal() const {
230 assert(CurCode == tgtok::IntVal && "This token isn't an integer");
231 return CurIntVal;
232 }
233 std::pair<int64_t, unsigned> getCurBinaryIntVal() const {
234 assert(CurCode == tgtok::BinaryIntVal &&
235 "This token isn't a binary integer");
236 return {CurIntVal, (CurPtr - TokStart) - 2};
237 }
238
239 SMLoc getLoc() const;
240 SMRange getLocRange() const;
241
242private:
243 /// LexToken - Read the next token and return its code.
244 tgtok::TokKind LexToken(bool FileOrLineStart = false);
245
246 tgtok::TokKind ReturnError(SMLoc Loc, const Twine &Msg);
247 tgtok::TokKind ReturnError(const char *Loc, const Twine &Msg);
248
249 int getNextChar();
250 int peekNextChar(int Index) const;
251 void SkipBCPLComment();
252 bool SkipCComment();
253 tgtok::TokKind LexIdentifier();
254 bool LexInclude();
255 tgtok::TokKind LexString();
256 tgtok::TokKind LexVarName();
257 tgtok::TokKind LexNumber();
258 tgtok::TokKind LexBracket();
259 tgtok::TokKind LexExclaim();
260
261 // Process EOF encountered in LexToken().
262 // If EOF is met in an include file, then the method will update
263 // CurPtr, CurBuf and preprocessing include stack, and return true.
264 // If EOF is met in the top-level file, then the method will
265 // update and check the preprocessing include stack, and return false.
266 bool processEOF();
267
268 // *** Structures and methods for preprocessing support ***
269
270 // A set of macro names that are defined either via command line or
271 // by using:
272 // #define NAME
273 StringSet<> DefinedMacros;
274
275 // Each of #ifdef and #else directives has a descriptor associated
276 // with it.
277 //
278 // An ordered list of preprocessing controls defined by #ifdef/#else
279 // directives that are in effect currently is called preprocessing
280 // control stack. It is represented as a vector of PreprocessorControlDesc's.
281 //
282 // The control stack is updated according to the following rules:
283 //
284 // For each #ifdef we add an element to the control stack.
285 // For each #else we replace the top element with a descriptor
286 // with an inverted IsDefined value.
287 // For each #endif we pop the top element from the control stack.
288 //
289 // When CurPtr reaches the current buffer's end, the control stack
290 // must be empty, i.e. #ifdef and the corresponding #endif
291 // must be located in the same file.
292 struct PreprocessorControlDesc {
293 // Either tgtok::Ifdef or tgtok::Else.
294 tgtok::TokKind Kind;
295
296 // True, if the condition for this directive is true, false - otherwise.
297 // Examples:
298 // #ifdef NAME : true, if NAME is defined, false - otherwise.
299 // ...
300 // #else : false, if NAME is defined, true - otherwise.
301 bool IsDefined;
302
303 // Pointer into CurBuf to the beginning of the preprocessing directive
304 // word, e.g.:
305 // #ifdef NAME
306 // ^ - SrcPos
307 SMLoc SrcPos;
308 };
309
310 // We want to disallow code like this:
311 // file1.td:
312 // #define NAME
313 // #ifdef NAME
314 // include "file2.td"
315 // EOF
316 // file2.td:
317 // #endif
318 // EOF
319 //
320 // To do this, we clear the preprocessing control stack on entry
321 // to each of the included file. PrepIncludeStack is used to store
322 // preprocessing control stacks for the current file and all its
323 // parent files. The back() element is the preprocessing control
324 // stack for the current file.
326
327 // Validate that the current preprocessing control stack is empty,
328 // since we are about to exit a file, and pop the include stack.
329 //
330 // If IncludeStackMustBeEmpty is true, the include stack must be empty
331 // after the popping, otherwise, the include stack must not be empty
332 // after the popping. Basically, the include stack must be empty
333 // only if we exit the "top-level" file (i.e. finish lexing).
334 //
335 // The method returns false, if the current preprocessing control stack
336 // is not empty (e.g. there is an unterminated #ifdef/#else),
337 // true - otherwise.
338 bool prepExitInclude(bool IncludeStackMustBeEmpty);
339
340 // Look ahead for a preprocessing directive starting from CurPtr. The caller
341 // must only call this method, if *(CurPtr - 1) is '#'. If the method matches
342 // a preprocessing directive word followed by a whitespace, then it returns
343 // one of the internal token kinds, i.e. Ifdef, Else, Endif, Define.
344 //
345 // CurPtr is not adjusted by this method.
346 tgtok::TokKind prepIsDirective() const;
347
348 // Given a preprocessing token kind, adjusts CurPtr to the end
349 // of the preprocessing directive word.
350 //
351 // We use look-ahead prepIsDirective() and prepEatPreprocessorDirective()
352 // to avoid adjusting CurPtr before we are sure that '#' is followed
353 // by a preprocessing directive. If it is not, then we fall back to
354 // tgtok::paste interpretation of '#'.
355 void prepEatPreprocessorDirective(tgtok::TokKind Kind);
356
357 // The main "exit" point from the token parsing to preprocessor.
358 //
359 // The method is called for CurPtr, when prepIsDirective() returns
360 // true. The first parameter matches the result of prepIsDirective(),
361 // denoting the actual preprocessor directive to be processed.
362 //
363 // If the preprocessing directive disables the tokens processing, e.g.:
364 // #ifdef NAME // NAME is undefined
365 // then lexPreprocessor() enters the lines-skipping mode.
366 // In this mode, it does not parse any tokens, because the code under
367 // the #ifdef may not even be a correct tablegen code. The preprocessor
368 // looks for lines containing other preprocessing directives, which
369 // may be prepended with whitespaces and C-style comments. If the line
370 // does not contain a preprocessing directive, it is skipped completely.
371 // Otherwise, the preprocessing directive is processed by recursively
372 // calling lexPreprocessor(). The processing of the encountered
373 // preprocessing directives includes updating preprocessing control stack
374 // and adding new macros into DefinedMacros set.
375 //
376 // The second parameter controls whether lexPreprocessor() is called from
377 // LexToken() (true) or recursively from lexPreprocessor() (false).
378 //
379 // If ReturnNextLiveToken is true, the method returns the next
380 // LEX token following the current directive or following the end
381 // of the disabled preprocessing region corresponding to this directive.
382 // If ReturnNextLiveToken is false, the method returns the first parameter,
383 // unless there were errors encountered in the disabled preprocessing
384 // region - in this case, it returns tgtok::Error.
385 tgtok::TokKind lexPreprocessor(tgtok::TokKind Kind,
386 bool ReturnNextLiveToken = true);
387
388 // Worker method for lexPreprocessor() to skip lines after some
389 // preprocessing directive up to the buffer end or to the directive
390 // that re-enables token processing. The method returns true
391 // upon processing the next directive that re-enables tokens
392 // processing. False is returned if an error was encountered.
393 //
394 // Note that prepSkipRegion() calls lexPreprocessor() to process
395 // encountered preprocessing directives. In this case, the second
396 // parameter to lexPreprocessor() is set to false. Being passed
397 // false ReturnNextLiveToken, lexPreprocessor() must never call
398 // prepSkipRegion(). We assert this by passing ReturnNextLiveToken
399 // to prepSkipRegion() and checking that it is never set to false.
400 bool prepSkipRegion(bool MustNeverBeFalse);
401
402 // Lex name of the macro after either #ifdef or #define. We could have used
403 // LexIdentifier(), but it has special handling of "include" word, which
404 // could result in awkward diagnostic errors. Consider:
405 // ----
406 // #ifdef include
407 // class ...
408 // ----
409 // LexIdentifier() will engage LexInclude(), which will complain about
410 // missing file with name "class". Instead, prepLexMacroName() will treat
411 // "include" as a normal macro name.
412 //
413 // On entry, CurPtr points to the end of a preprocessing directive word.
414 // The method allows for whitespaces between the preprocessing directive
415 // and the macro name. The allowed whitespaces are ' ' and '\t'.
416 //
417 // If the first non-whitespace symbol after the preprocessing directive
418 // is a valid start symbol for an identifier (i.e. [a-zA-Z_]), then
419 // the method updates TokStart to the position of the first non-whitespace
420 // symbol, sets CurPtr to the position of the macro name's last symbol,
421 // and returns a string reference to the macro name. Otherwise,
422 // TokStart is set to the first non-whitespace symbol after the preprocessing
423 // directive, and the method returns an empty string reference.
424 //
425 // In all cases, TokStart may be used to point to the word following
426 // the preprocessing directive.
427 StringRef prepLexMacroName();
428
429 // Skip any whitespaces starting from CurPtr. The method is used
430 // only in the lines-skipping mode to find the first non-whitespace
431 // symbol after or at CurPtr. Allowed whitespaces are ' ', '\t', '\n'
432 // and '\r'. The method skips C-style comments as well, because
433 // it is used to find the beginning of the preprocessing directive.
434 // If we do not handle C-style comments the following code would
435 // result in incorrect detection of a preprocessing directive:
436 // /*
437 // #ifdef NAME
438 // */
439 // As long as we skip C-style comments, the following code is correctly
440 // recognized as a preprocessing directive:
441 // /* first line comment
442 // second line comment */ #ifdef NAME
443 //
444 // The method returns true upon reaching the first non-whitespace symbol
445 // or EOF, CurPtr is set to point to this symbol. The method returns false,
446 // if an error occurred during skipping of a C-style comment.
447 bool prepSkipLineBegin();
448
449 // Skip any whitespaces or comments after a preprocessing directive.
450 // The method returns true upon reaching either end of the line
451 // or end of the file. If there is a multiline C-style comment
452 // after the preprocessing directive, the method skips
453 // the comment, so the final CurPtr may point to one of the next lines.
454 // The method returns false, if an error occurred during skipping
455 // C- or C++-style comment, or a non-whitespace symbol appears
456 // after the preprocessing directive.
457 //
458 // The method maybe called both during lines-skipping and tokens
459 // processing. It actually verifies that only whitespaces or/and
460 // comments follow a preprocessing directive.
461 //
462 // After the execution of this mehod, CurPtr points either to new line
463 // symbol, buffer end or non-whitespace symbol following the preprocesing
464 // directive.
465 bool prepSkipDirectiveEnd();
466
467 // Return true, if the current preprocessor control stack is such that
468 // we should allow lexer to process the next token, false - otherwise.
469 //
470 // In particular, the method returns true, if all the #ifdef/#else
471 // controls on the stack have their IsDefined member set to true.
472 bool prepIsProcessingEnabled();
473
474 // Report an error, if we reach EOF with non-empty preprocessing control
475 // stack. This means there is no matching #endif for the previous
476 // #ifdef/#else.
477 void prepReportPreprocessorStackError();
478};
479
480} // end namespace llvm
481
482#endif
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
This file defines the SmallVector class.
StringSet - A set-like wrapper for the StringMap.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition ArrayRef.h:40
Represents a location in source code.
Definition SMLoc.h:22
Represents a range in source code.
Definition SMLoc.h:47
This owns the files read by a parser, handles include stacks, and handles diagnostic wrangling.
Definition SourceMgr.h:37
StringRef - Represent a constant reference to a string, i.e.
Definition StringRef.h:55
StringSet - A wrapper for StringMap that provides set-like functionality.
Definition StringSet.h:25
SMRange getLocRange() const
Definition TGLexer.cpp:98
tgtok::TokKind Lex()
Definition TGLexer.h:218
int64_t getCurIntVal() const
Definition TGLexer.h:229
std::pair< int64_t, unsigned > getCurBinaryIntVal() const
Definition TGLexer.h:233
const std::string & getCurStrVal() const
Definition TGLexer.h:224
tgtok::TokKind getCode() const
Definition TGLexer.h:222
SMLoc getLoc() const
Definition TGLexer.cpp:96
TGLexer(SourceMgr &SrcMgr, ArrayRef< std::string > Macros)
Definition TGLexer.cpp:75
std::set< std::string > DependenciesSetTy
Definition TGLexer.h:209
const DependenciesSetTy & getDependencies() const
Definition TGLexer.h:220
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition Twine.h:82
static bool isBangOperator(tgtok::TokKind Kind)
isBangOperator - Return true if this is a bang operator.
Definition TGLexer.h:176
@ BANG_OPERATOR_FIRST
Definition TGLexer.h:107
@ STRING_VALUE_FIRST
Definition TGLexer.h:167
@ XSetDagOpName
Definition TGLexer.h:152
@ BinaryIntVal
Definition TGLexer.h:65
@ BANG_OPERATOR_LAST
Definition TGLexer.h:164
@ OBJECT_START_LAST
Definition TGLexer.h:104
@ STRING_VALUE_LAST
Definition TGLexer.h:172
@ OBJECT_START_FIRST
Definition TGLexer.h:91
@ XGetDagOpName
Definition TGLexer.h:153
static bool isObjectStart(tgtok::TokKind Kind)
isObjectStart - Return true if this is a valid first token for a statement.
Definition TGLexer.h:181
static bool isStringValue(tgtok::TokKind Kind)
isStringValue - Return true if this is a string value.
Definition TGLexer.h:186
This is an optimization pass for GlobalISel generic memory operations.
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...