LLVM 20.0.0git
TGLexer.cpp
Go to the documentation of this file.
1//===- TGLexer.cpp - Lexer for TableGen -----------------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// Implement the Lexer for TableGen.
10//
11//===----------------------------------------------------------------------===//
12
13#include "TGLexer.h"
14#include "llvm/ADT/ArrayRef.h"
17#include "llvm/ADT/Twine.h"
18#include "llvm/Config/config.h" // for strtoull()/strtoll() define
22#include "llvm/TableGen/Error.h"
23#include <algorithm>
24#include <cerrno>
25#include <cstdint>
26#include <cstdio>
27#include <cstdlib>
28#include <cstring>
29
30using namespace llvm;
31
32namespace {
33// A list of supported preprocessing directives with their
34// internal token kinds and names.
35struct PreprocessorDir {
38};
39} // end anonymous namespace
40
41/// Returns true if `C` is a valid character in an identifier. If `First` is
42/// true, returns true if `C` is a valid first character of an identifier,
43/// else returns true if `C` is a valid non-first character of an identifier.
44/// Identifiers match the following regular expression:
45/// [a-zA-Z_][0-9a-zA-Z_]*
46static bool isValidIDChar(char C, bool First) {
47 if (C == '_' || isAlpha(C))
48 return true;
49 return !First && isDigit(C);
50}
51
52constexpr PreprocessorDir PreprocessorDirs[] = {{tgtok::Ifdef, "ifdef"},
53 {tgtok::Ifndef, "ifndef"},
54 {tgtok::Else, "else"},
55 {tgtok::Endif, "endif"},
56 {tgtok::Define, "define"}};
57
58// Returns a pointer past the end of a valid macro name at the start of `Str`.
59// Valid macro names match the regular expression [a-zA-Z_][0-9a-zA-Z_]*.
60static const char *lexMacroName(StringRef Str) {
61 assert(!Str.empty());
62
63 // Macro names start with [a-zA-Z_].
64 const char *Next = Str.begin();
65 if (!isValidIDChar(*Next, /*First=*/true))
66 return Next;
67 // Eat the first character of the name.
68 ++Next;
69
70 // Match the rest of the identifier regex: [0-9a-zA-Z_]*
71 const char *End = Str.end();
72 while (Next != End && isValidIDChar(*Next, /*First=*/false))
73 ++Next;
74 return Next;
75}
76
78 CurBuffer = SrcMgr.getMainFileID();
79 CurBuf = SrcMgr.getMemoryBuffer(CurBuffer)->getBuffer();
80 CurPtr = CurBuf.begin();
81 TokStart = nullptr;
82
83 // Pretend that we enter the "top-level" include file.
84 PrepIncludeStack.emplace_back();
85
86 // Add all macros defined on the command line to the DefinedMacros set.
87 // Check invalid macro names and print fatal error if we find one.
88 for (StringRef MacroName : Macros) {
89 const char *End = lexMacroName(MacroName);
90 if (End != MacroName.end())
91 PrintFatalError("invalid macro name `" + MacroName +
92 "` specified on command line");
93
94 DefinedMacros.insert(MacroName);
95 }
96}
97
99 return SMLoc::getFromPointer(TokStart);
100}
101
103 return {getLoc(), SMLoc::getFromPointer(CurPtr)};
104}
105
106/// ReturnError - Set the error to the specified string at the specified
107/// location. This is defined to always return tgtok::Error.
108tgtok::TokKind TGLexer::ReturnError(SMLoc Loc, const Twine &Msg) {
109 PrintError(Loc, Msg);
110 return tgtok::Error;
111}
112
113tgtok::TokKind TGLexer::ReturnError(const char *Loc, const Twine &Msg) {
114 return ReturnError(SMLoc::getFromPointer(Loc), Msg);
115}
116
117bool TGLexer::processEOF() {
118 SMLoc ParentIncludeLoc = SrcMgr.getParentIncludeLoc(CurBuffer);
119 if (ParentIncludeLoc != SMLoc()) {
120 // If prepExitInclude() detects a problem with the preprocessing
121 // control stack, it will return false. Pretend that we reached
122 // the final EOF and stop lexing more tokens by returning false
123 // to LexToken().
124 if (!prepExitInclude(false))
125 return false;
126
127 CurBuffer = SrcMgr.FindBufferContainingLoc(ParentIncludeLoc);
128 CurBuf = SrcMgr.getMemoryBuffer(CurBuffer)->getBuffer();
129 CurPtr = ParentIncludeLoc.getPointer();
130 // Make sure TokStart points into the parent file's buffer.
131 // LexToken() assigns to it before calling getNextChar(),
132 // so it is pointing into the included file now.
133 TokStart = CurPtr;
134 return true;
135 }
136
137 // Pretend that we exit the "top-level" include file.
138 // Note that in case of an error (e.g. control stack imbalance)
139 // the routine will issue a fatal error.
140 prepExitInclude(true);
141 return false;
142}
143
144int TGLexer::getNextChar() {
145 char CurChar = *CurPtr++;
146 switch (CurChar) {
147 default:
148 return (unsigned char)CurChar;
149
150 case 0: {
151 // A NUL character in the stream is either the end of the current buffer or
152 // a spurious NUL in the file. Disambiguate that here.
153 if (CurPtr - 1 == CurBuf.end()) {
154 --CurPtr; // Arrange for another call to return EOF again.
155 return EOF;
156 }
158 "NUL character is invalid in source; treated as space");
159 return ' ';
160 }
161
162 case '\n':
163 case '\r':
164 // Handle the newline character by ignoring it and incrementing the line
165 // count. However, be careful about 'dos style' files with \n\r in them.
166 // Only treat a \n\r or \r\n as a single line.
167 if ((*CurPtr == '\n' || (*CurPtr == '\r')) &&
168 *CurPtr != CurChar)
169 ++CurPtr; // Eat the two char newline sequence.
170 return '\n';
171 }
172}
173
174int TGLexer::peekNextChar(int Index) const {
175 return *(CurPtr + Index);
176}
177
178tgtok::TokKind TGLexer::LexToken(bool FileOrLineStart) {
179 TokStart = CurPtr;
180 // This always consumes at least one character.
181 int CurChar = getNextChar();
182
183 switch (CurChar) {
184 default:
185 // Handle letters: [a-zA-Z_]
186 if (isValidIDChar(CurChar, /*First=*/true))
187 return LexIdentifier();
188
189 // Unknown character, emit an error.
190 return ReturnError(TokStart, "unexpected character");
191 case EOF:
192 // Lex next token, if we just left an include file.
193 // Note that leaving an include file means that the next
194 // symbol is located at the end of the 'include "..."'
195 // construct, so LexToken() is called with default
196 // false parameter.
197 if (processEOF())
198 return LexToken();
199
200 // Return EOF denoting the end of lexing.
201 return tgtok::Eof;
202
203 case ':': return tgtok::colon;
204 case ';': return tgtok::semi;
205 case ',': return tgtok::comma;
206 case '<': return tgtok::less;
207 case '>': return tgtok::greater;
208 case ']': return tgtok::r_square;
209 case '{': return tgtok::l_brace;
210 case '}': return tgtok::r_brace;
211 case '(': return tgtok::l_paren;
212 case ')': return tgtok::r_paren;
213 case '=': return tgtok::equal;
214 case '?': return tgtok::question;
215 case '#':
216 if (FileOrLineStart) {
217 tgtok::TokKind Kind = prepIsDirective();
218 if (Kind != tgtok::Error)
219 return lexPreprocessor(Kind);
220 }
221
222 return tgtok::paste;
223
224 // The period is a separate case so we can recognize the "..."
225 // range punctuator.
226 case '.':
227 if (peekNextChar(0) == '.') {
228 ++CurPtr; // Eat second dot.
229 if (peekNextChar(0) == '.') {
230 ++CurPtr; // Eat third dot.
231 return tgtok::dotdotdot;
232 }
233 return ReturnError(TokStart, "invalid '..' punctuation");
234 }
235 return tgtok::dot;
236
237 case '\r':
238 llvm_unreachable("getNextChar() must never return '\r'");
239
240 case ' ':
241 case '\t':
242 // Ignore whitespace.
243 return LexToken(FileOrLineStart);
244 case '\n':
245 // Ignore whitespace, and identify the new line.
246 return LexToken(true);
247 case '/':
248 // If this is the start of a // comment, skip until the end of the line or
249 // the end of the buffer.
250 if (*CurPtr == '/')
251 SkipBCPLComment();
252 else if (*CurPtr == '*') {
253 if (SkipCComment())
254 return tgtok::Error;
255 } else // Otherwise, this is an error.
256 return ReturnError(TokStart, "unexpected character");
257 return LexToken(FileOrLineStart);
258 case '-': case '+':
259 case '0': case '1': case '2': case '3': case '4': case '5': case '6':
260 case '7': case '8': case '9': {
261 int NextChar = 0;
262 if (isDigit(CurChar)) {
263 // Allow identifiers to start with a number if it is followed by
264 // an identifier. This can happen with paste operations like
265 // foo#8i.
266 int i = 0;
267 do {
268 NextChar = peekNextChar(i++);
269 } while (isDigit(NextChar));
270
271 if (NextChar == 'x' || NextChar == 'b') {
272 // If this is [0-9]b[01] or [0-9]x[0-9A-fa-f] this is most
273 // likely a number.
274 int NextNextChar = peekNextChar(i);
275 switch (NextNextChar) {
276 default:
277 break;
278 case '0': case '1':
279 if (NextChar == 'b')
280 return LexNumber();
281 [[fallthrough]];
282 case '2': case '3': case '4': case '5':
283 case '6': case '7': case '8': case '9':
284 case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
285 case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
286 if (NextChar == 'x')
287 return LexNumber();
288 break;
289 }
290 }
291 }
292
293 if (isValidIDChar(NextChar, /*First=*/true))
294 return LexIdentifier();
295
296 return LexNumber();
297 }
298 case '"': return LexString();
299 case '$': return LexVarName();
300 case '[': return LexBracket();
301 case '!': return LexExclaim();
302 }
303}
304
305/// LexString - Lex "[^"]*"
306tgtok::TokKind TGLexer::LexString() {
307 const char *StrStart = CurPtr;
308
309 CurStrVal = "";
310
311 while (*CurPtr != '"') {
312 // If we hit the end of the buffer, report an error.
313 if (*CurPtr == 0 && CurPtr == CurBuf.end())
314 return ReturnError(StrStart, "end of file in string literal");
315
316 if (*CurPtr == '\n' || *CurPtr == '\r')
317 return ReturnError(StrStart, "end of line in string literal");
318
319 if (*CurPtr != '\\') {
320 CurStrVal += *CurPtr++;
321 continue;
322 }
323
324 ++CurPtr;
325
326 switch (*CurPtr) {
327 case '\\': case '\'': case '"':
328 // These turn into their literal character.
329 CurStrVal += *CurPtr++;
330 break;
331 case 't':
332 CurStrVal += '\t';
333 ++CurPtr;
334 break;
335 case 'n':
336 CurStrVal += '\n';
337 ++CurPtr;
338 break;
339
340 case '\n':
341 case '\r':
342 return ReturnError(CurPtr, "escaped newlines not supported in tblgen");
343
344 // If we hit the end of the buffer, report an error.
345 case '\0':
346 if (CurPtr == CurBuf.end())
347 return ReturnError(StrStart, "end of file in string literal");
348 [[fallthrough]];
349 default:
350 return ReturnError(CurPtr, "invalid escape in string literal");
351 }
352 }
353
354 ++CurPtr;
355 return tgtok::StrVal;
356}
357
358tgtok::TokKind TGLexer::LexVarName() {
359 if (!isValidIDChar(CurPtr[0], /*First=*/true))
360 return ReturnError(TokStart, "invalid variable name");
361
362 // Otherwise, we're ok, consume the rest of the characters.
363 const char *VarNameStart = CurPtr++;
364
365 while (isValidIDChar(*CurPtr, /*First=*/false))
366 ++CurPtr;
367
368 CurStrVal.assign(VarNameStart, CurPtr);
369 return tgtok::VarName;
370}
371
372tgtok::TokKind TGLexer::LexIdentifier() {
373 // The first letter is [a-zA-Z_].
374 const char *IdentStart = TokStart;
375
376 // Match the rest of the identifier regex: [0-9a-zA-Z_]*
377 while (isValidIDChar(*CurPtr, /*First=*/false))
378 ++CurPtr;
379
380 // Check to see if this identifier is a reserved keyword.
381 StringRef Str(IdentStart, CurPtr-IdentStart);
382
384 .Case("int", tgtok::Int)
385 .Case("bit", tgtok::Bit)
386 .Case("bits", tgtok::Bits)
387 .Case("string", tgtok::String)
388 .Case("list", tgtok::List)
389 .Case("code", tgtok::Code)
390 .Case("dag", tgtok::Dag)
391 .Case("class", tgtok::Class)
392 .Case("def", tgtok::Def)
393 .Case("true", tgtok::TrueVal)
394 .Case("false", tgtok::FalseVal)
395 .Case("foreach", tgtok::Foreach)
396 .Case("defm", tgtok::Defm)
397 .Case("defset", tgtok::Defset)
398 .Case("deftype", tgtok::Deftype)
399 .Case("multiclass", tgtok::MultiClass)
400 .Case("field", tgtok::Field)
401 .Case("let", tgtok::Let)
402 .Case("in", tgtok::In)
403 .Case("defvar", tgtok::Defvar)
404 .Case("include", tgtok::Include)
405 .Case("if", tgtok::If)
406 .Case("then", tgtok::Then)
407 .Case("else", tgtok::ElseKW)
408 .Case("assert", tgtok::Assert)
409 .Case("dump", tgtok::Dump)
411
412 // A couple of tokens require special processing.
413 switch (Kind) {
414 case tgtok::Include:
415 if (LexInclude()) return tgtok::Error;
416 return Lex();
417 case tgtok::Id:
418 CurStrVal.assign(Str.begin(), Str.end());
419 break;
420 default:
421 break;
422 }
423
424 return Kind;
425}
426
427/// LexInclude - We just read the "include" token. Get the string token that
428/// comes next and enter the include.
429bool TGLexer::LexInclude() {
430 // The token after the include must be a string.
431 tgtok::TokKind Tok = LexToken();
432 if (Tok == tgtok::Error) return true;
433 if (Tok != tgtok::StrVal) {
434 PrintError(getLoc(), "expected filename after include");
435 return true;
436 }
437
438 // Get the string.
439 std::string Filename = CurStrVal;
440 std::string IncludedFile;
441
442 CurBuffer = SrcMgr.AddIncludeFile(Filename, SMLoc::getFromPointer(CurPtr),
443 IncludedFile);
444 if (!CurBuffer) {
445 PrintError(getLoc(), "could not find include file '" + Filename + "'");
446 return true;
447 }
448
449 Dependencies.insert(IncludedFile);
450 // Save the line number and lex buffer of the includer.
451 CurBuf = SrcMgr.getMemoryBuffer(CurBuffer)->getBuffer();
452 CurPtr = CurBuf.begin();
453
454 PrepIncludeStack.emplace_back();
455 return false;
456}
457
458/// SkipBCPLComment - Skip over the comment by finding the next CR or LF.
459/// Or we may end up at the end of the buffer.
460void TGLexer::SkipBCPLComment() {
461 ++CurPtr; // skip the second slash.
462 auto EOLPos = CurBuf.find_first_of("\r\n", CurPtr - CurBuf.data());
463 CurPtr = (EOLPos == StringRef::npos) ? CurBuf.end() : CurBuf.data() + EOLPos;
464}
465
466/// SkipCComment - This skips C-style /**/ comments. The only difference from C
467/// is that we allow nesting.
468bool TGLexer::SkipCComment() {
469 ++CurPtr; // skip the star.
470 unsigned CommentDepth = 1;
471
472 while (true) {
473 int CurChar = getNextChar();
474 switch (CurChar) {
475 case EOF:
476 PrintError(TokStart, "unterminated comment");
477 return true;
478 case '*':
479 // End of the comment?
480 if (CurPtr[0] != '/') break;
481
482 ++CurPtr; // End the */.
483 if (--CommentDepth == 0)
484 return false;
485 break;
486 case '/':
487 // Start of a nested comment?
488 if (CurPtr[0] != '*') break;
489 ++CurPtr;
490 ++CommentDepth;
491 break;
492 }
493 }
494}
495
496/// LexNumber - Lex:
497/// [-+]?[0-9]+
498/// 0x[0-9a-fA-F]+
499/// 0b[01]+
500tgtok::TokKind TGLexer::LexNumber() {
501 unsigned Base = 0;
502 const char *NumStart;
503
504 // Check if it's a hex or a binary value.
505 if (CurPtr[-1] == '0') {
506 NumStart = CurPtr + 1;
507 if (CurPtr[0] == 'x') {
508 Base = 16;
509 do
510 ++CurPtr;
511 while (isHexDigit(CurPtr[0]));
512 } else if (CurPtr[0] == 'b') {
513 Base = 2;
514 do
515 ++CurPtr;
516 while (CurPtr[0] == '0' || CurPtr[0] == '1');
517 }
518 }
519
520 // For a hex or binary value, we always convert it to an unsigned value.
521 bool IsMinus = false;
522
523 // Check if it's a decimal value.
524 if (Base == 0) {
525 // Check for a sign without a digit.
526 if (!isDigit(CurPtr[0])) {
527 if (CurPtr[-1] == '-')
528 return tgtok::minus;
529 else if (CurPtr[-1] == '+')
530 return tgtok::plus;
531 }
532
533 Base = 10;
534 NumStart = TokStart;
535 IsMinus = CurPtr[-1] == '-';
536
537 while (isDigit(CurPtr[0]))
538 ++CurPtr;
539 }
540
541 // Requires at least one digit.
542 if (CurPtr == NumStart)
543 return ReturnError(TokStart, "invalid number");
544
545 errno = 0;
546 if (IsMinus)
547 CurIntVal = strtoll(NumStart, nullptr, Base);
548 else
549 CurIntVal = strtoull(NumStart, nullptr, Base);
550
551 if (errno == EINVAL)
552 return ReturnError(TokStart, "invalid number");
553 if (errno == ERANGE)
554 return ReturnError(TokStart, "number out of range");
555
556 return Base == 2 ? tgtok::BinaryIntVal : tgtok::IntVal;
557}
558
559/// LexBracket - We just read '['. If this is a code block, return it,
560/// otherwise return the bracket. Match: '[' and '[{ ( [^}]+ | }[^]] )* }]'
561tgtok::TokKind TGLexer::LexBracket() {
562 if (CurPtr[0] != '{')
563 return tgtok::l_square;
564 ++CurPtr;
565 const char *CodeStart = CurPtr;
566 while (true) {
567 int Char = getNextChar();
568 if (Char == EOF) break;
569
570 if (Char != '}') continue;
571
572 Char = getNextChar();
573 if (Char == EOF) break;
574 if (Char == ']') {
575 CurStrVal.assign(CodeStart, CurPtr-2);
576 return tgtok::CodeFragment;
577 }
578 }
579
580 return ReturnError(CodeStart - 2, "unterminated code block");
581}
582
583/// LexExclaim - Lex '!' and '![a-zA-Z]+'.
584tgtok::TokKind TGLexer::LexExclaim() {
585 if (!isAlpha(*CurPtr))
586 return ReturnError(CurPtr - 1, "invalid \"!operator\"");
587
588 const char *Start = CurPtr++;
589 while (isAlpha(*CurPtr))
590 ++CurPtr;
591
592 // Check to see which operator this is.
594 StringSwitch<tgtok::TokKind>(StringRef(Start, CurPtr - Start))
595 .Case("eq", tgtok::XEq)
596 .Case("ne", tgtok::XNe)
597 .Case("le", tgtok::XLe)
598 .Case("lt", tgtok::XLt)
599 .Case("ge", tgtok::XGe)
600 .Case("gt", tgtok::XGt)
601 .Case("if", tgtok::XIf)
602 .Case("cond", tgtok::XCond)
603 .Case("isa", tgtok::XIsA)
604 .Case("head", tgtok::XHead)
605 .Case("tail", tgtok::XTail)
606 .Case("size", tgtok::XSize)
607 .Case("con", tgtok::XConcat)
608 .Case("dag", tgtok::XDag)
609 .Case("add", tgtok::XADD)
610 .Case("sub", tgtok::XSUB)
611 .Case("mul", tgtok::XMUL)
612 .Case("div", tgtok::XDIV)
613 .Case("not", tgtok::XNOT)
614 .Case("logtwo", tgtok::XLOG2)
615 .Case("and", tgtok::XAND)
616 .Case("or", tgtok::XOR)
617 .Case("xor", tgtok::XXOR)
618 .Case("shl", tgtok::XSHL)
619 .Case("sra", tgtok::XSRA)
620 .Case("srl", tgtok::XSRL)
621 .Case("cast", tgtok::XCast)
622 .Case("empty", tgtok::XEmpty)
623 .Case("subst", tgtok::XSubst)
624 .Case("foldl", tgtok::XFoldl)
625 .Case("foreach", tgtok::XForEach)
626 .Case("filter", tgtok::XFilter)
627 .Case("listconcat", tgtok::XListConcat)
628 .Case("listflatten", tgtok::XListFlatten)
629 .Case("listsplat", tgtok::XListSplat)
630 .Case("listremove", tgtok::XListRemove)
631 .Case("range", tgtok::XRange)
632 .Case("strconcat", tgtok::XStrConcat)
633 .Case("initialized", tgtok::XInitialized)
634 .Case("interleave", tgtok::XInterleave)
635 .Case("substr", tgtok::XSubstr)
636 .Case("find", tgtok::XFind)
637 .Cases("setdagop", "setop", tgtok::XSetDagOp) // !setop is deprecated.
638 .Cases("getdagop", "getop", tgtok::XGetDagOp) // !getop is deprecated.
639 .Case("getdagarg", tgtok::XGetDagArg)
640 .Case("getdagname", tgtok::XGetDagName)
641 .Case("setdagarg", tgtok::XSetDagArg)
642 .Case("setdagname", tgtok::XSetDagName)
643 .Case("exists", tgtok::XExists)
644 .Case("tolower", tgtok::XToLower)
645 .Case("toupper", tgtok::XToUpper)
646 .Case("repr", tgtok::XRepr)
648
649 return Kind != tgtok::Error ? Kind
650 : ReturnError(Start - 1, "unknown operator");
651}
652
653bool TGLexer::prepExitInclude(bool IncludeStackMustBeEmpty) {
654 // Report an error, if preprocessor control stack for the current
655 // file is not empty.
656 if (!PrepIncludeStack.back().empty()) {
657 prepReportPreprocessorStackError();
658
659 return false;
660 }
661
662 // Pop the preprocessing controls from the include stack.
663 PrepIncludeStack.pop_back();
664
665 if (IncludeStackMustBeEmpty) {
666 assert(PrepIncludeStack.empty() &&
667 "preprocessor include stack is not empty");
668 } else {
669 assert(!PrepIncludeStack.empty() && "preprocessor include stack is empty");
670 }
671
672 return true;
673}
674
675tgtok::TokKind TGLexer::prepIsDirective() const {
676 for (const auto [Kind, Word] : PreprocessorDirs) {
677 if (StringRef(CurPtr, Word.size()) != Word)
678 continue;
679 int NextChar = peekNextChar(Word.size());
680
681 // Check for whitespace after the directive. If there is no whitespace,
682 // then we do not recognize it as a preprocessing directive.
683
684 // New line and EOF may follow only #else/#endif. It will be reported
685 // as an error for #ifdef/#define after the call to prepLexMacroName().
686 if (NextChar == ' ' || NextChar == '\t' || NextChar == EOF ||
687 NextChar == '\n' ||
688 // It looks like TableGen does not support '\r' as the actual
689 // carriage return, e.g. getNextChar() treats a single '\r'
690 // as '\n'. So we do the same here.
691 NextChar == '\r')
692 return Kind;
693
694 // Allow comments after some directives, e.g.:
695 // #else// OR #else/**/
696 // #endif// OR #endif/**/
697 //
698 // Note that we do allow comments after #ifdef/#define here, e.g.
699 // #ifdef/**/ AND #ifdef//
700 // #define/**/ AND #define//
701 //
702 // These cases will be reported as incorrect after calling
703 // prepLexMacroName(). We could have supported C-style comments
704 // after #ifdef/#define, but this would complicate the code
705 // for little benefit.
706 if (NextChar == '/') {
707 NextChar = peekNextChar(Word.size() + 1);
708
709 if (NextChar == '*' || NextChar == '/')
710 return Kind;
711
712 // Pretend that we do not recognize the directive.
713 }
714 }
715
716 return tgtok::Error;
717}
718
719void TGLexer::prepEatPreprocessorDirective(tgtok::TokKind Kind) {
720 TokStart = CurPtr;
721
722 for (const auto [PKind, PWord] : PreprocessorDirs) {
723 if (PKind == Kind) {
724 // Advance CurPtr to the end of the preprocessing word.
725 CurPtr += PWord.size();
726 return;
727 }
728 }
729
731 "unsupported preprocessing token in prepEatPreprocessorDirective()");
732}
733
734tgtok::TokKind TGLexer::lexPreprocessor(tgtok::TokKind Kind,
735 bool ReturnNextLiveToken) {
736 // We must be looking at a preprocessing directive. Eat it!
737 prepEatPreprocessorDirective(Kind);
738
739 if (Kind == tgtok::Ifdef || Kind == tgtok::Ifndef) {
740 StringRef MacroName = prepLexMacroName();
741 StringRef IfTokName = Kind == tgtok::Ifdef ? "#ifdef" : "#ifndef";
742 if (MacroName.empty())
743 return ReturnError(TokStart, "expected macro name after " + IfTokName);
744
745 bool MacroIsDefined = DefinedMacros.count(MacroName) != 0;
746
747 // Canonicalize ifndef's MacroIsDefined to its ifdef equivalent.
748 if (Kind == tgtok::Ifndef)
749 MacroIsDefined = !MacroIsDefined;
750
751 // Regardless of whether we are processing tokens or not,
752 // we put the #ifdef control on stack.
753 // Note that MacroIsDefined has been canonicalized against ifdef.
754 PrepIncludeStack.back().push_back(
755 {tgtok::Ifdef, MacroIsDefined, SMLoc::getFromPointer(TokStart)});
756
757 if (!prepSkipDirectiveEnd())
758 return ReturnError(CurPtr, "only comments are supported after " +
759 IfTokName + " NAME");
760
761 // If we were not processing tokens before this #ifdef,
762 // then just return back to the lines skipping code.
763 if (!ReturnNextLiveToken)
764 return Kind;
765
766 // If we were processing tokens before this #ifdef,
767 // and the macro is defined, then just return the next token.
768 if (MacroIsDefined)
769 return LexToken();
770
771 // We were processing tokens before this #ifdef, and the macro
772 // is not defined, so we have to start skipping the lines.
773 // If the skipping is successful, it will return the token following
774 // either #else or #endif corresponding to this #ifdef.
775 if (prepSkipRegion(ReturnNextLiveToken))
776 return LexToken();
777
778 return tgtok::Error;
779 } else if (Kind == tgtok::Else) {
780 // Check if this #else is correct before calling prepSkipDirectiveEnd(),
781 // which will move CurPtr away from the beginning of #else.
782 if (PrepIncludeStack.back().empty())
783 return ReturnError(TokStart, "#else without #ifdef or #ifndef");
784
785 PreprocessorControlDesc IfdefEntry = PrepIncludeStack.back().back();
786
787 if (IfdefEntry.Kind != tgtok::Ifdef) {
788 PrintError(TokStart, "double #else");
789 return ReturnError(IfdefEntry.SrcPos, "previous #else is here");
790 }
791
792 // Replace the corresponding #ifdef's control with its negation
793 // on the control stack.
794 PrepIncludeStack.back().back() = {Kind, !IfdefEntry.IsDefined,
795 SMLoc::getFromPointer(TokStart)};
796
797 if (!prepSkipDirectiveEnd())
798 return ReturnError(CurPtr, "only comments are supported after #else");
799
800 // If we were processing tokens before this #else,
801 // we have to start skipping lines until the matching #endif.
802 if (ReturnNextLiveToken) {
803 if (prepSkipRegion(ReturnNextLiveToken))
804 return LexToken();
805
806 return tgtok::Error;
807 }
808
809 // Return to the lines skipping code.
810 return Kind;
811 } else if (Kind == tgtok::Endif) {
812 // Check if this #endif is correct before calling prepSkipDirectiveEnd(),
813 // which will move CurPtr away from the beginning of #endif.
814 if (PrepIncludeStack.back().empty())
815 return ReturnError(TokStart, "#endif without #ifdef");
816
817 [[maybe_unused]] auto &IfdefOrElseEntry = PrepIncludeStack.back().back();
818
819 assert((IfdefOrElseEntry.Kind == tgtok::Ifdef ||
820 IfdefOrElseEntry.Kind == tgtok::Else) &&
821 "invalid preprocessor control on the stack");
822
823 if (!prepSkipDirectiveEnd())
824 return ReturnError(CurPtr, "only comments are supported after #endif");
825
826 PrepIncludeStack.back().pop_back();
827
828 // If we were processing tokens before this #endif, then
829 // we should continue it.
830 if (ReturnNextLiveToken) {
831 return LexToken();
832 }
833
834 // Return to the lines skipping code.
835 return Kind;
836 } else if (Kind == tgtok::Define) {
837 StringRef MacroName = prepLexMacroName();
838 if (MacroName.empty())
839 return ReturnError(TokStart, "expected macro name after #define");
840
841 if (!DefinedMacros.insert(MacroName).second)
843 "duplicate definition of macro: " + Twine(MacroName));
844
845 if (!prepSkipDirectiveEnd())
846 return ReturnError(CurPtr,
847 "only comments are supported after #define NAME");
848
849 assert(ReturnNextLiveToken &&
850 "#define must be ignored during the lines skipping");
851
852 return LexToken();
853 }
854
855 llvm_unreachable("preprocessing directive is not supported");
856}
857
858bool TGLexer::prepSkipRegion(bool MustNeverBeFalse) {
859 assert(MustNeverBeFalse && "invalid recursion.");
860
861 do {
862 // Skip all symbols to the line end.
863 while (*CurPtr != '\n')
864 ++CurPtr;
865
866 // Find the first non-whitespace symbol in the next line(s).
867 if (!prepSkipLineBegin())
868 return false;
869
870 // If the first non-blank/comment symbol on the line is '#',
871 // it may be a start of preprocessing directive.
872 //
873 // If it is not '#' just go to the next line.
874 if (*CurPtr == '#')
875 ++CurPtr;
876 else
877 continue;
878
879 tgtok::TokKind Kind = prepIsDirective();
880
881 // If we did not find a preprocessing directive or it is #define,
882 // then just skip to the next line. We do not have to do anything
883 // for #define in the line-skipping mode.
884 if (Kind == tgtok::Error || Kind == tgtok::Define)
885 continue;
886
887 tgtok::TokKind ProcessedKind = lexPreprocessor(Kind, false);
888
889 // If lexPreprocessor() encountered an error during lexing this
890 // preprocessor idiom, then return false to the calling lexPreprocessor().
891 // This will force tgtok::Error to be returned to the tokens processing.
892 if (ProcessedKind == tgtok::Error)
893 return false;
894
895 assert(Kind == ProcessedKind && "prepIsDirective() and lexPreprocessor() "
896 "returned different token kinds");
897
898 // If this preprocessing directive enables tokens processing,
899 // then return to the lexPreprocessor() and get to the next token.
900 // We can move from line-skipping mode to processing tokens only
901 // due to #else or #endif.
902 if (prepIsProcessingEnabled()) {
903 assert((Kind == tgtok::Else || Kind == tgtok::Endif) &&
904 "tokens processing was enabled by an unexpected preprocessing "
905 "directive");
906
907 return true;
908 }
909 } while (CurPtr != CurBuf.end());
910
911 // We have reached the end of the file, but never left the lines-skipping
912 // mode. This means there is no matching #endif.
913 prepReportPreprocessorStackError();
914 return false;
915}
916
917StringRef TGLexer::prepLexMacroName() {
918 // Skip whitespaces between the preprocessing directive and the macro name.
919 while (*CurPtr == ' ' || *CurPtr == '\t')
920 ++CurPtr;
921
922 TokStart = CurPtr;
923 CurPtr = lexMacroName(StringRef(CurPtr, CurBuf.end() - CurPtr));
924 return StringRef(TokStart, CurPtr - TokStart);
925}
926
927bool TGLexer::prepSkipLineBegin() {
928 while (CurPtr != CurBuf.end()) {
929 switch (*CurPtr) {
930 case ' ':
931 case '\t':
932 case '\n':
933 case '\r':
934 break;
935
936 case '/': {
937 int NextChar = peekNextChar(1);
938 if (NextChar == '*') {
939 // Skip C-style comment.
940 // Note that we do not care about skipping the C++-style comments.
941 // If the line contains "//", it may not contain any processable
942 // preprocessing directive. Just return CurPtr pointing to
943 // the first '/' in this case. We also do not care about
944 // incorrect symbols after the first '/' - we are in lines-skipping
945 // mode, so incorrect code is allowed to some extent.
946
947 // Set TokStart to the beginning of the comment to enable proper
948 // diagnostic printing in case of error in SkipCComment().
949 TokStart = CurPtr;
950
951 // CurPtr must point to '*' before call to SkipCComment().
952 ++CurPtr;
953 if (SkipCComment())
954 return false;
955 } else {
956 // CurPtr points to the non-whitespace '/'.
957 return true;
958 }
959
960 // We must not increment CurPtr after the comment was lexed.
961 continue;
962 }
963
964 default:
965 return true;
966 }
967
968 ++CurPtr;
969 }
970
971 // We have reached the end of the file. Return to the lines skipping
972 // code, and allow it to handle the EOF as needed.
973 return true;
974}
975
976bool TGLexer::prepSkipDirectiveEnd() {
977 while (CurPtr != CurBuf.end()) {
978 switch (*CurPtr) {
979 case ' ':
980 case '\t':
981 break;
982
983 case '\n':
984 case '\r':
985 return true;
986
987 case '/': {
988 int NextChar = peekNextChar(1);
989 if (NextChar == '/') {
990 // Skip C++-style comment.
991 // We may just return true now, but let's skip to the line/buffer end
992 // to simplify the method specification.
993 ++CurPtr;
994 SkipBCPLComment();
995 } else if (NextChar == '*') {
996 // When we are skipping C-style comment at the end of a preprocessing
997 // directive, we can skip several lines. If any meaningful TD token
998 // follows the end of the C-style comment on the same line, it will
999 // be considered as an invalid usage of TD token.
1000 // For example, we want to forbid usages like this one:
1001 // #define MACRO class Class {}
1002 // But with C-style comments we also disallow the following:
1003 // #define MACRO /* This macro is used
1004 // to ... */ class Class {}
1005 // One can argue that this should be allowed, but it does not seem
1006 // to be worth of the complication. Moreover, this matches
1007 // the C preprocessor behavior.
1008
1009 // Set TokStart to the beginning of the comment to enable proper
1010 // diagnostic printer in case of error in SkipCComment().
1011 TokStart = CurPtr;
1012 ++CurPtr;
1013 if (SkipCComment())
1014 return false;
1015 } else {
1016 TokStart = CurPtr;
1017 PrintError(CurPtr, "unexpected character");
1018 return false;
1019 }
1020
1021 // We must not increment CurPtr after the comment was lexed.
1022 continue;
1023 }
1024
1025 default:
1026 // Do not allow any non-whitespaces after the directive.
1027 TokStart = CurPtr;
1028 return false;
1029 }
1030
1031 ++CurPtr;
1032 }
1033
1034 return true;
1035}
1036
1037bool TGLexer::prepIsProcessingEnabled() {
1038 return all_of(PrepIncludeStack.back(),
1039 [](const PreprocessorControlDesc &I) { return I.IsDefined; });
1040}
1041
1042void TGLexer::prepReportPreprocessorStackError() {
1043 auto &PrepControl = PrepIncludeStack.back().back();
1044 PrintError(CurBuf.end(), "reached EOF without matching #endif");
1045 PrintError(PrepControl.SrcPos, "the latest preprocessor control is here");
1046
1047 TokStart = CurPtr;
1048}
bool End
Definition: ELF_riscv.cpp:480
#define I(x, y, z)
Definition: MD5.cpp:58
static bool isDigit(const char C)
static bool isHexDigit(const char C)
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
This file contains some functions that are useful when dealing with strings.
This file implements the StringSwitch template, which mimics a switch() statement whose cases are str...
constexpr PreprocessorDir PreprocessorDirs[]
Definition: TGLexer.cpp:52
static bool isValidIDChar(char C, bool First)
Returns true if C is a valid character in an identifier.
Definition: TGLexer.cpp:46
static const char * lexMacroName(StringRef Str)
Definition: TGLexer.cpp:60
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: ArrayRef.h:41
StringRef getBuffer() const
Definition: MemoryBuffer.h:70
Represents a location in source code.
Definition: SMLoc.h:23
static SMLoc getFromPointer(const char *Ptr)
Definition: SMLoc.h:36
constexpr const char * getPointer() const
Definition: SMLoc.h:34
Represents a range in source code.
Definition: SMLoc.h:48
This owns the files read by a parser, handles include stacks, and handles diagnostic wrangling.
Definition: SourceMgr.h:31
unsigned getMainFileID() const
Definition: SourceMgr.h:132
const MemoryBuffer * getMemoryBuffer(unsigned i) const
Definition: SourceMgr.h:125
SMLoc getParentIncludeLoc(unsigned i) const
Definition: SourceMgr.h:137
unsigned FindBufferContainingLoc(SMLoc Loc) const
Return the ID of the buffer containing the specified location.
Definition: SourceMgr.cpp:73
unsigned AddIncludeFile(const std::string &Filename, SMLoc IncludeLoc, std::string &IncludedFile)
Search for a file with the specified name in the current directory or in one of the IncludeDirs.
Definition: SourceMgr.cpp:41
size_type count(StringRef Key) const
count - Return 1 if the element is in the map, 0 otherwise.
Definition: StringMap.h:276
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:51
constexpr bool empty() const
empty - Check if the string is empty.
Definition: StringRef.h:147
iterator begin() const
Definition: StringRef.h:116
constexpr const char * data() const
data - Get a pointer to the start of the string (which may not be null terminated).
Definition: StringRef.h:144
size_t find_first_of(char C, size_t From=0) const
Find the first character in the string that is C, or npos if not found.
Definition: StringRef.h:377
iterator end() const
Definition: StringRef.h:118
static constexpr size_t npos
Definition: StringRef.h:53
std::pair< typename Base::iterator, bool > insert(StringRef key)
Definition: StringSet.h:38
A switch()-like statement whose cases are string literals.
Definition: StringSwitch.h:44
StringSwitch & Case(StringLiteral S, T Value)
Definition: StringSwitch.h:69
R Default(T Value)
Definition: StringSwitch.h:182
StringSwitch & Cases(StringLiteral S0, StringLiteral S1, T Value)
Definition: StringSwitch.h:90
SMRange getLocRange() const
Definition: TGLexer.cpp:102
tgtok::TokKind Lex()
Definition: TGLexer.h:215
SMLoc getLoc() const
Definition: TGLexer.cpp:98
TGLexer(SourceMgr &SrcMgr, ArrayRef< std::string > Macros)
Definition: TGLexer.cpp:77
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition: Twine.h:81
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ C
The default llvm calling convention, compatible with C.
Definition: CallingConv.h:34
support::ulittle32_t Word
Definition: IRSymtab.h:52
@ r_square
Definition: TGLexer.h:41
@ XListSplat
Definition: TGLexer.h:124
@ XSetDagArg
Definition: TGLexer.h:158
@ XGetDagName
Definition: TGLexer.h:157
@ l_square
Definition: TGLexer.h:40
@ CodeFragment
Definition: TGLexer.h:168
@ XInterleave
Definition: TGLexer.h:126
@ MultiClass
Definition: TGLexer.h:104
@ BinaryIntVal
Definition: TGLexer.h:66
@ XSetDagName
Definition: TGLexer.h:159
@ XGetDagArg
Definition: TGLexer.h:156
@ XListConcat
Definition: TGLexer.h:122
@ XInitialized
Definition: TGLexer.h:138
@ XStrConcat
Definition: TGLexer.h:125
@ XListFlatten
Definition: TGLexer.h:123
@ FalseVal
Definition: TGLexer.h:59
@ dotdotdot
Definition: TGLexer.h:55
@ question
Definition: TGLexer.h:53
@ XListRemove
Definition: TGLexer.h:152
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1739
void PrintFatalError(const Twine &Msg)
Definition: Error.cpp:132
void PrintError(const Twine &Msg)
Definition: Error.cpp:104
SourceMgr SrcMgr
Definition: Error.cpp:24
void PrintWarning(const Twine &Msg)
Definition: Error.cpp:92
@ First
Helpers to iterate all locations in the MemoryEffectsBase class.