LLVM 17.0.0git
TGLexer.h
Go to the documentation of this file.
1//===- TGLexer.h - Lexer for TableGen Files ---------------------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This class represents the Lexer for tablegen files.
10//
11//===----------------------------------------------------------------------===//
12
13#ifndef LLVM_LIB_TABLEGEN_TGLEXER_H
14#define LLVM_LIB_TABLEGEN_TGLEXER_H
15
16#include "llvm/ADT/StringRef.h"
17#include "llvm/ADT/StringSet.h"
19#include "llvm/Support/SMLoc.h"
20#include <cassert>
21#include <memory>
22#include <set>
23#include <string>
24#include <vector>
25
26namespace llvm {
27template <typename T> class ArrayRef;
28class SourceMgr;
29class Twine;
30
31namespace tgtok {
32enum TokKind {
33 // Markers
36
37 // Tokens with no info.
38 minus, // -
39 plus, // +
42 l_brace, // {
43 r_brace, // }
44 l_paren, // (
45 r_paren, // )
46 less, // <
47 greater, // >
48 colon, // :
49 semi, // ;
50 comma, // ,
51 dot, // .
52 equal, // =
54 paste, // #
55 dotdotdot, // ...
56
57 // Reserved keywords. ('ElseKW' is named to distinguish it from the
58 // existing 'Else' that means the preprocessor #else.)
83
84 // Bang operators.
132
133 // Boolean literals.
136
137 // Integer value.
139
140 // Binary constant. Note that these are sized according to the number of
141 // bits given.
143
144 // String valued tokens.
149
150 // Preprocessing tokens for internal usage by the lexer.
151 // They are never returned as a result of Lex().
156 Define
158}
159
160/// TGLexer - TableGen Lexer class.
161class TGLexer {
162 SourceMgr &SrcMgr;
163
164 const char *CurPtr = nullptr;
165 StringRef CurBuf;
166
167 // Information about the current token.
168 const char *TokStart = nullptr;
170 std::string CurStrVal; // This is valid for Id, StrVal, VarName, CodeFragment
171 int64_t CurIntVal = 0; // This is valid for IntVal.
172
173 /// CurBuffer - This is the current buffer index we're lexing from as managed
174 /// by the SourceMgr object.
175 unsigned CurBuffer = 0;
176
177public:
178 typedef std::set<std::string> DependenciesSetTy;
179
180private:
181 /// Dependencies - This is the list of all included files.
182 DependenciesSetTy Dependencies;
183
184public:
185 TGLexer(SourceMgr &SrcMgr, ArrayRef<std::string> Macros);
186
188 return CurCode = LexToken(CurPtr == CurBuf.begin());
189 }
190
192 return Dependencies;
193 }
194
195 tgtok::TokKind getCode() const { return CurCode; }
196
197 const std::string &getCurStrVal() const {
198 assert((CurCode == tgtok::Id || CurCode == tgtok::StrVal ||
199 CurCode == tgtok::VarName || CurCode == tgtok::CodeFragment) &&
200 "This token doesn't have a string value");
201 return CurStrVal;
202 }
203 int64_t getCurIntVal() const {
204 assert(CurCode == tgtok::IntVal && "This token isn't an integer");
205 return CurIntVal;
206 }
207 std::pair<int64_t, unsigned> getCurBinaryIntVal() const {
208 assert(CurCode == tgtok::BinaryIntVal &&
209 "This token isn't a binary integer");
210 return std::make_pair(CurIntVal, (CurPtr - TokStart)-2);
211 }
212
213 SMLoc getLoc() const;
214 SMRange getLocRange() const;
215
216private:
217 /// LexToken - Read the next token and return its code.
218 tgtok::TokKind LexToken(bool FileOrLineStart = false);
219
220 tgtok::TokKind ReturnError(SMLoc Loc, const Twine &Msg);
221 tgtok::TokKind ReturnError(const char *Loc, const Twine &Msg);
222
223 int getNextChar();
224 int peekNextChar(int Index) const;
225 void SkipBCPLComment();
226 bool SkipCComment();
227 tgtok::TokKind LexIdentifier();
228 bool LexInclude();
229 tgtok::TokKind LexString();
230 tgtok::TokKind LexVarName();
231 tgtok::TokKind LexNumber();
232 tgtok::TokKind LexBracket();
233 tgtok::TokKind LexExclaim();
234
235 // Process EOF encountered in LexToken().
236 // If EOF is met in an include file, then the method will update
237 // CurPtr, CurBuf and preprocessing include stack, and return true.
238 // If EOF is met in the top-level file, then the method will
239 // update and check the preprocessing include stack, and return false.
240 bool processEOF();
241
242 // *** Structures and methods for preprocessing support ***
243
244 // A set of macro names that are defined either via command line or
245 // by using:
246 // #define NAME
247 StringSet<> DefinedMacros;
248
249 // Each of #ifdef and #else directives has a descriptor associated
250 // with it.
251 //
252 // An ordered list of preprocessing controls defined by #ifdef/#else
253 // directives that are in effect currently is called preprocessing
254 // control stack. It is represented as a vector of PreprocessorControlDesc's.
255 //
256 // The control stack is updated according to the following rules:
257 //
258 // For each #ifdef we add an element to the control stack.
259 // For each #else we replace the top element with a descriptor
260 // with an inverted IsDefined value.
261 // For each #endif we pop the top element from the control stack.
262 //
263 // When CurPtr reaches the current buffer's end, the control stack
264 // must be empty, i.e. #ifdef and the corresponding #endif
265 // must be located in the same file.
266 struct PreprocessorControlDesc {
267 // Either tgtok::Ifdef or tgtok::Else.
268 tgtok::TokKind Kind;
269
270 // True, if the condition for this directive is true, false - otherwise.
271 // Examples:
272 // #ifdef NAME : true, if NAME is defined, false - otherwise.
273 // ...
274 // #else : false, if NAME is defined, true - otherwise.
275 bool IsDefined;
276
277 // Pointer into CurBuf to the beginning of the preprocessing directive
278 // word, e.g.:
279 // #ifdef NAME
280 // ^ - SrcPos
281 SMLoc SrcPos;
282 };
283
284 // We want to disallow code like this:
285 // file1.td:
286 // #define NAME
287 // #ifdef NAME
288 // include "file2.td"
289 // EOF
290 // file2.td:
291 // #endif
292 // EOF
293 //
294 // To do this, we clear the preprocessing control stack on entry
295 // to each of the included file. PrepIncludeStack is used to store
296 // preprocessing control stacks for the current file and all its
297 // parent files. The back() element is the preprocessing control
298 // stack for the current file.
299 std::vector<std::unique_ptr<std::vector<PreprocessorControlDesc>>>
300 PrepIncludeStack;
301
302 // Validate that the current preprocessing control stack is empty,
303 // since we are about to exit a file, and pop the include stack.
304 //
305 // If IncludeStackMustBeEmpty is true, the include stack must be empty
306 // after the popping, otherwise, the include stack must not be empty
307 // after the popping. Basically, the include stack must be empty
308 // only if we exit the "top-level" file (i.e. finish lexing).
309 //
310 // The method returns false, if the current preprocessing control stack
311 // is not empty (e.g. there is an unterminated #ifdef/#else),
312 // true - otherwise.
313 bool prepExitInclude(bool IncludeStackMustBeEmpty);
314
315 // Look ahead for a preprocessing directive starting from CurPtr. The caller
316 // must only call this method, if *(CurPtr - 1) is '#'. If the method matches
317 // a preprocessing directive word followed by a whitespace, then it returns
318 // one of the internal token kinds, i.e. Ifdef, Else, Endif, Define.
319 //
320 // CurPtr is not adjusted by this method.
321 tgtok::TokKind prepIsDirective() const;
322
323 // Given a preprocessing token kind, adjusts CurPtr to the end
324 // of the preprocessing directive word. Returns true, unless
325 // an unsupported token kind is passed in.
326 //
327 // We use look-ahead prepIsDirective() and prepEatPreprocessorDirective()
328 // to avoid adjusting CurPtr before we are sure that '#' is followed
329 // by a preprocessing directive. If it is not, then we fall back to
330 // tgtok::paste interpretation of '#'.
331 bool prepEatPreprocessorDirective(tgtok::TokKind Kind);
332
333 // The main "exit" point from the token parsing to preprocessor.
334 //
335 // The method is called for CurPtr, when prepIsDirective() returns
336 // true. The first parameter matches the result of prepIsDirective(),
337 // denoting the actual preprocessor directive to be processed.
338 //
339 // If the preprocessing directive disables the tokens processing, e.g.:
340 // #ifdef NAME // NAME is undefined
341 // then lexPreprocessor() enters the lines-skipping mode.
342 // In this mode, it does not parse any tokens, because the code under
343 // the #ifdef may not even be a correct tablegen code. The preprocessor
344 // looks for lines containing other preprocessing directives, which
345 // may be prepended with whitespaces and C-style comments. If the line
346 // does not contain a preprocessing directive, it is skipped completely.
347 // Otherwise, the preprocessing directive is processed by recursively
348 // calling lexPreprocessor(). The processing of the encountered
349 // preprocessing directives includes updating preprocessing control stack
350 // and adding new macros into DefinedMacros set.
351 //
352 // The second parameter controls whether lexPreprocessor() is called from
353 // LexToken() (true) or recursively from lexPreprocessor() (false).
354 //
355 // If ReturnNextLiveToken is true, the method returns the next
356 // LEX token following the current directive or following the end
357 // of the disabled preprocessing region corresponding to this directive.
358 // If ReturnNextLiveToken is false, the method returns the first parameter,
359 // unless there were errors encountered in the disabled preprocessing
360 // region - in this case, it returns tgtok::Error.
361 tgtok::TokKind lexPreprocessor(tgtok::TokKind Kind,
362 bool ReturnNextLiveToken = true);
363
364 // Worker method for lexPreprocessor() to skip lines after some
365 // preprocessing directive up to the buffer end or to the directive
366 // that re-enables token processing. The method returns true
367 // upon processing the next directive that re-enables tokens
368 // processing. False is returned if an error was encountered.
369 //
370 // Note that prepSkipRegion() calls lexPreprocessor() to process
371 // encountered preprocessing directives. In this case, the second
372 // parameter to lexPreprocessor() is set to false. Being passed
373 // false ReturnNextLiveToken, lexPreprocessor() must never call
374 // prepSkipRegion(). We assert this by passing ReturnNextLiveToken
375 // to prepSkipRegion() and checking that it is never set to false.
376 bool prepSkipRegion(bool MustNeverBeFalse);
377
378 // Lex name of the macro after either #ifdef or #define. We could have used
379 // LexIdentifier(), but it has special handling of "include" word, which
380 // could result in awkward diagnostic errors. Consider:
381 // ----
382 // #ifdef include
383 // class ...
384 // ----
385 // LexIdentifier() will engage LexInclude(), which will complain about
386 // missing file with name "class". Instead, prepLexMacroName() will treat
387 // "include" as a normal macro name.
388 //
389 // On entry, CurPtr points to the end of a preprocessing directive word.
390 // The method allows for whitespaces between the preprocessing directive
391 // and the macro name. The allowed whitespaces are ' ' and '\t'.
392 //
393 // If the first non-whitespace symbol after the preprocessing directive
394 // is a valid start symbol for an identifier (i.e. [a-zA-Z_]), then
395 // the method updates TokStart to the position of the first non-whitespace
396 // symbol, sets CurPtr to the position of the macro name's last symbol,
397 // and returns a string reference to the macro name. Otherwise,
398 // TokStart is set to the first non-whitespace symbol after the preprocessing
399 // directive, and the method returns an empty string reference.
400 //
401 // In all cases, TokStart may be used to point to the word following
402 // the preprocessing directive.
403 StringRef prepLexMacroName();
404
405 // Skip any whitespaces starting from CurPtr. The method is used
406 // only in the lines-skipping mode to find the first non-whitespace
407 // symbol after or at CurPtr. Allowed whitespaces are ' ', '\t', '\n'
408 // and '\r'. The method skips C-style comments as well, because
409 // it is used to find the beginning of the preprocessing directive.
410 // If we do not handle C-style comments the following code would
411 // result in incorrect detection of a preprocessing directive:
412 // /*
413 // #ifdef NAME
414 // */
415 // As long as we skip C-style comments, the following code is correctly
416 // recognized as a preprocessing directive:
417 // /* first line comment
418 // second line comment */ #ifdef NAME
419 //
420 // The method returns true upon reaching the first non-whitespace symbol
421 // or EOF, CurPtr is set to point to this symbol. The method returns false,
422 // if an error occurred during skipping of a C-style comment.
423 bool prepSkipLineBegin();
424
425 // Skip any whitespaces or comments after a preprocessing directive.
426 // The method returns true upon reaching either end of the line
427 // or end of the file. If there is a multiline C-style comment
428 // after the preprocessing directive, the method skips
429 // the comment, so the final CurPtr may point to one of the next lines.
430 // The method returns false, if an error occurred during skipping
431 // C- or C++-style comment, or a non-whitespace symbol appears
432 // after the preprocessing directive.
433 //
434 // The method maybe called both during lines-skipping and tokens
435 // processing. It actually verifies that only whitespaces or/and
436 // comments follow a preprocessing directive.
437 //
438 // After the execution of this mehod, CurPtr points either to new line
439 // symbol, buffer end or non-whitespace symbol following the preprocesing
440 // directive.
441 bool prepSkipDirectiveEnd();
442
443 // Skip all symbols to the end of the line/file.
444 // The method adjusts CurPtr, so that it points to either new line
445 // symbol in the current line or the buffer end.
446 void prepSkipToLineEnd();
447
448 // Return true, if the current preprocessor control stack is such that
449 // we should allow lexer to process the next token, false - otherwise.
450 //
451 // In particular, the method returns true, if all the #ifdef/#else
452 // controls on the stack have their IsDefined member set to true.
453 bool prepIsProcessingEnabled();
454
455 // Report an error, if we reach EOF with non-empty preprocessing control
456 // stack. This means there is no matching #endif for the previous
457 // #ifdef/#else.
458 void prepReportPreprocessorStackError();
459};
460
461} // end namespace llvm
462
463#endif
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
StringSet - A set-like wrapper for the StringMap.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: ArrayRef.h:41
Represents a location in source code.
Definition: SMLoc.h:23
Represents a range in source code.
Definition: SMLoc.h:48
This owns the files read by a parser, handles include stacks, and handles diagnostic wrangling.
Definition: SourceMgr.h:31
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:50
iterator begin() const
Definition: StringRef.h:111
StringSet - A wrapper for StringMap that provides set-like functionality.
Definition: StringSet.h:23
TGLexer - TableGen Lexer class.
Definition: TGLexer.h:161
SMRange getLocRange() const
Definition: TGLexer.cpp:66
tgtok::TokKind Lex()
Definition: TGLexer.h:187
int64_t getCurIntVal() const
Definition: TGLexer.h:203
std::pair< int64_t, unsigned > getCurBinaryIntVal() const
Definition: TGLexer.h:207
const std::string & getCurStrVal() const
Definition: TGLexer.h:197
tgtok::TokKind getCode() const
Definition: TGLexer.h:195
SMLoc getLoc() const
Definition: TGLexer.cpp:62
std::set< std::string > DependenciesSetTy
Definition: TGLexer.h:178
const DependenciesSetTy & getDependencies() const
Definition: TGLexer.h:191
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition: Twine.h:81
@ r_square
Definition: TGLexer.h:41
@ XListSplat
Definition: TGLexer.h:99
@ XGetDagName
Definition: TGLexer.h:131
@ l_square
Definition: TGLexer.h:40
@ CodeFragment
Definition: TGLexer.h:148
@ XInterleave
Definition: TGLexer.h:101
@ MultiClass
Definition: TGLexer.h:79
@ BinaryIntVal
Definition: TGLexer.h:142
@ XGetDagArg
Definition: TGLexer.h:130
@ XListConcat
Definition: TGLexer.h:98
@ XStrConcat
Definition: TGLexer.h:100
@ dotdotdot
Definition: TGLexer.h:55
@ question
Definition: TGLexer.h:53
@ XListRemove
Definition: TGLexer.h:126
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
ArrayRef(const T &OneElt) -> ArrayRef< T >