LLVM 18.0.0git
AsmLexer.cpp
Go to the documentation of this file.
1//===- AsmLexer.cpp - Lexer for Assembly Files ----------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This class implements the lexer for assembly files.
10//
11//===----------------------------------------------------------------------===//
12
14#include "llvm/ADT/APInt.h"
15#include "llvm/ADT/ArrayRef.h"
17#include "llvm/ADT/StringRef.h"
19#include "llvm/MC/MCAsmInfo.h"
22#include "llvm/Support/SMLoc.h"
24#include <cassert>
25#include <cctype>
26#include <cstdio>
27#include <cstring>
28#include <string>
29#include <tuple>
30#include <utility>
31
32using namespace llvm;
33
34AsmLexer::AsmLexer(const MCAsmInfo &MAI) : MAI(MAI) {
37}
38
39AsmLexer::~AsmLexer() = default;
40
41void AsmLexer::setBuffer(StringRef Buf, const char *ptr,
42 bool EndStatementAtEOF) {
43 CurBuf = Buf;
44
45 if (ptr)
46 CurPtr = ptr;
47 else
48 CurPtr = CurBuf.begin();
49
50 TokStart = nullptr;
51 this->EndStatementAtEOF = EndStatementAtEOF;
52}
53
54/// ReturnError - Set the error to the specified string at the specified
55/// location. This is defined to always return AsmToken::Error.
56AsmToken AsmLexer::ReturnError(const char *Loc, const std::string &Msg) {
58
59 return AsmToken(AsmToken::Error, StringRef(Loc, CurPtr - Loc));
60}
61
62int AsmLexer::getNextChar() {
63 if (CurPtr == CurBuf.end())
64 return EOF;
65 return (unsigned char)*CurPtr++;
66}
67
68int AsmLexer::peekNextChar() {
69 if (CurPtr == CurBuf.end())
70 return EOF;
71 return (unsigned char)*CurPtr;
72}
73
74/// The leading integral digit sequence and dot should have already been
75/// consumed, some or all of the fractional digit sequence *can* have been
76/// consumed.
77AsmToken AsmLexer::LexFloatLiteral() {
78 // Skip the fractional digit sequence.
79 while (isDigit(*CurPtr))
80 ++CurPtr;
81
82 if (*CurPtr == '-' || *CurPtr == '+')
83 return ReturnError(CurPtr, "invalid sign in float literal");
84
85 // Check for exponent
86 if ((*CurPtr == 'e' || *CurPtr == 'E')) {
87 ++CurPtr;
88
89 if (*CurPtr == '-' || *CurPtr == '+')
90 ++CurPtr;
91
92 while (isDigit(*CurPtr))
93 ++CurPtr;
94 }
95
97 StringRef(TokStart, CurPtr - TokStart));
98}
99
100/// LexHexFloatLiteral matches essentially (.[0-9a-fA-F]*)?[pP][+-]?[0-9a-fA-F]+
101/// while making sure there are enough actual digits around for the constant to
102/// be valid.
103///
104/// The leading "0x[0-9a-fA-F]*" (i.e. integer part) has already been consumed
105/// before we get here.
106AsmToken AsmLexer::LexHexFloatLiteral(bool NoIntDigits) {
107 assert((*CurPtr == 'p' || *CurPtr == 'P' || *CurPtr == '.') &&
108 "unexpected parse state in floating hex");
109 bool NoFracDigits = true;
110
111 // Skip the fractional part if there is one
112 if (*CurPtr == '.') {
113 ++CurPtr;
114
115 const char *FracStart = CurPtr;
116 while (isHexDigit(*CurPtr))
117 ++CurPtr;
118
119 NoFracDigits = CurPtr == FracStart;
120 }
121
122 if (NoIntDigits && NoFracDigits)
123 return ReturnError(TokStart, "invalid hexadecimal floating-point constant: "
124 "expected at least one significand digit");
125
126 // Make sure we do have some kind of proper exponent part
127 if (*CurPtr != 'p' && *CurPtr != 'P')
128 return ReturnError(TokStart, "invalid hexadecimal floating-point constant: "
129 "expected exponent part 'p'");
130 ++CurPtr;
131
132 if (*CurPtr == '+' || *CurPtr == '-')
133 ++CurPtr;
134
135 // N.b. exponent digits are *not* hex
136 const char *ExpStart = CurPtr;
137 while (isDigit(*CurPtr))
138 ++CurPtr;
139
140 if (CurPtr == ExpStart)
141 return ReturnError(TokStart, "invalid hexadecimal floating-point constant: "
142 "expected at least one exponent digit");
143
145}
146
147/// LexIdentifier: [a-zA-Z_$.@?][a-zA-Z0-9_$.@#?]*
148static bool isIdentifierChar(char C, bool AllowAt, bool AllowHash) {
149 return isAlnum(C) || C == '_' || C == '$' || C == '.' || C == '?' ||
150 (AllowAt && C == '@') || (AllowHash && C == '#');
151}
152
153AsmToken AsmLexer::LexIdentifier() {
154 // Check for floating point literals.
155 if (CurPtr[-1] == '.' && isDigit(*CurPtr)) {
156 // Disambiguate a .1243foo identifier from a floating literal.
157 while (isDigit(*CurPtr))
158 ++CurPtr;
159
162 *CurPtr == 'e' || *CurPtr == 'E')
163 return LexFloatLiteral();
164 }
165
167 ++CurPtr;
168
169 // Handle . as a special case.
170 if (CurPtr == TokStart+1 && TokStart[0] == '.')
172
174}
175
176/// LexSlash: Slash: /
177/// C-Style Comment: /* ... */
178/// C-style Comment: // ...
179AsmToken AsmLexer::LexSlash() {
181 IsAtStartOfStatement = false;
183 }
184
185 switch (*CurPtr) {
186 case '*':
187 IsAtStartOfStatement = false;
188 break; // C style comment.
189 case '/':
190 ++CurPtr;
191 return LexLineComment();
192 default:
193 IsAtStartOfStatement = false;
195 }
196
197 // C Style comment.
198 ++CurPtr; // skip the star.
199 const char *CommentTextStart = CurPtr;
200 while (CurPtr != CurBuf.end()) {
201 switch (*CurPtr++) {
202 case '*':
203 // End of the comment?
204 if (*CurPtr != '/')
205 break;
206 // If we have a CommentConsumer, notify it about the comment.
207 if (CommentConsumer) {
209 SMLoc::getFromPointer(CommentTextStart),
210 StringRef(CommentTextStart, CurPtr - 1 - CommentTextStart));
211 }
212 ++CurPtr; // End the */.
214 StringRef(TokStart, CurPtr - TokStart));
215 }
216 }
217 return ReturnError(TokStart, "unterminated comment");
218}
219
220/// LexLineComment: Comment: #[^\n]*
221/// : //[^\n]*
222AsmToken AsmLexer::LexLineComment() {
223 // Mark This as an end of statement with a body of the
224 // comment. While it would be nicer to leave this two tokens,
225 // backwards compatability with TargetParsers makes keeping this in this form
226 // better.
227 const char *CommentTextStart = CurPtr;
228 int CurChar = getNextChar();
229 while (CurChar != '\n' && CurChar != '\r' && CurChar != EOF)
230 CurChar = getNextChar();
231 const char *NewlinePtr = CurPtr;
232 if (CurChar == '\r' && CurPtr != CurBuf.end() && *CurPtr == '\n')
233 ++CurPtr;
234
235 // If we have a CommentConsumer, notify it about the comment.
236 if (CommentConsumer) {
238 SMLoc::getFromPointer(CommentTextStart),
239 StringRef(CommentTextStart, NewlinePtr - 1 - CommentTextStart));
240 }
241
242 IsAtStartOfLine = true;
243 // This is a whole line comment. leave newline
244 if (IsAtStartOfStatement)
246 StringRef(TokStart, CurPtr - TokStart));
247 IsAtStartOfStatement = true;
248
250 StringRef(TokStart, CurPtr - 1 - TokStart));
251}
252
253static void SkipIgnoredIntegerSuffix(const char *&CurPtr) {
254 // Skip case-insensitive ULL, UL, U, L and LL suffixes.
255 if (CurPtr[0] == 'U' || CurPtr[0] == 'u')
256 ++CurPtr;
257 if (CurPtr[0] == 'L' || CurPtr[0] == 'l')
258 ++CurPtr;
259 if (CurPtr[0] == 'L' || CurPtr[0] == 'l')
260 ++CurPtr;
261}
262
263// Look ahead to search for first non-hex digit, if it's [hH], then we treat the
264// integer as a hexadecimal, possibly with leading zeroes.
265static unsigned doHexLookAhead(const char *&CurPtr, unsigned DefaultRadix,
266 bool LexHex) {
267 const char *FirstNonDec = nullptr;
268 const char *LookAhead = CurPtr;
269 while (true) {
270 if (isDigit(*LookAhead)) {
271 ++LookAhead;
272 } else {
273 if (!FirstNonDec)
274 FirstNonDec = LookAhead;
275
276 // Keep going if we are looking for a 'h' suffix.
277 if (LexHex && isHexDigit(*LookAhead))
278 ++LookAhead;
279 else
280 break;
281 }
282 }
283 bool isHex = LexHex && (*LookAhead == 'h' || *LookAhead == 'H');
284 CurPtr = isHex || !FirstNonDec ? LookAhead : FirstNonDec;
285 if (isHex)
286 return 16;
287 return DefaultRadix;
288}
289
290static const char *findLastDigit(const char *CurPtr, unsigned DefaultRadix) {
291 while (hexDigitValue(*CurPtr) < DefaultRadix) {
292 ++CurPtr;
293 }
294 return CurPtr;
295}
296
298 if (Value.isIntN(64))
301}
302
303static std::string radixName(unsigned Radix) {
304 switch (Radix) {
305 case 2:
306 return "binary";
307 case 8:
308 return "octal";
309 case 10:
310 return "decimal";
311 case 16:
312 return "hexadecimal";
313 default:
314 return "base-" + std::to_string(Radix);
315 }
316}
317
318/// LexDigit: First character is [0-9].
319/// Local Label: [0-9][:]
320/// Forward/Backward Label: [0-9][fb]
321/// Binary integer: 0b[01]+
322/// Octal integer: 0[0-7]+
323/// Hex integer: 0x[0-9a-fA-F]+ or [0x]?[0-9][0-9a-fA-F]*[hH]
324/// Decimal integer: [1-9][0-9]*
325AsmToken AsmLexer::LexDigit() {
326 // MASM-flavor binary integer: [01]+[yY] (if DefaultRadix < 16, [bByY])
327 // MASM-flavor octal integer: [0-7]+[oOqQ]
328 // MASM-flavor decimal integer: [0-9]+[tT] (if DefaultRadix < 16, [dDtT])
329 // MASM-flavor hexadecimal integer: [0-9][0-9a-fA-F]*[hH]
330 if (LexMasmIntegers && isdigit(CurPtr[-1])) {
331 const char *FirstNonBinary =
332 (CurPtr[-1] != '0' && CurPtr[-1] != '1') ? CurPtr - 1 : nullptr;
333 const char *FirstNonDecimal =
334 (CurPtr[-1] < '0' || CurPtr[-1] > '9') ? CurPtr - 1 : nullptr;
335 const char *OldCurPtr = CurPtr;
336 while (isHexDigit(*CurPtr)) {
337 switch (*CurPtr) {
338 default:
339 if (!FirstNonDecimal) {
340 FirstNonDecimal = CurPtr;
341 }
342 [[fallthrough]];
343 case '9':
344 case '8':
345 case '7':
346 case '6':
347 case '5':
348 case '4':
349 case '3':
350 case '2':
351 if (!FirstNonBinary) {
352 FirstNonBinary = CurPtr;
353 }
354 break;
355 case '1':
356 case '0':
357 break;
358 }
359 ++CurPtr;
360 }
361 if (*CurPtr == '.') {
362 // MASM float literals (other than hex floats) always contain a ".", and
363 // are always written in decimal.
364 ++CurPtr;
365 return LexFloatLiteral();
366 }
367
368 if (LexMasmHexFloats && (*CurPtr == 'r' || *CurPtr == 'R')) {
369 ++CurPtr;
371 }
372
373 unsigned Radix = 0;
374 if (*CurPtr == 'h' || *CurPtr == 'H') {
375 // hexadecimal number
376 ++CurPtr;
377 Radix = 16;
378 } else if (*CurPtr == 't' || *CurPtr == 'T') {
379 // decimal number
380 ++CurPtr;
381 Radix = 10;
382 } else if (*CurPtr == 'o' || *CurPtr == 'O' || *CurPtr == 'q' ||
383 *CurPtr == 'Q') {
384 // octal number
385 ++CurPtr;
386 Radix = 8;
387 } else if (*CurPtr == 'y' || *CurPtr == 'Y') {
388 // binary number
389 ++CurPtr;
390 Radix = 2;
391 } else if (FirstNonDecimal && FirstNonDecimal + 1 == CurPtr &&
392 DefaultRadix < 14 &&
393 (*FirstNonDecimal == 'd' || *FirstNonDecimal == 'D')) {
394 Radix = 10;
395 } else if (FirstNonBinary && FirstNonBinary + 1 == CurPtr &&
396 DefaultRadix < 12 &&
397 (*FirstNonBinary == 'b' || *FirstNonBinary == 'B')) {
398 Radix = 2;
399 }
400
401 if (Radix) {
403 APInt Value(128, 0, true);
404
405 if (Result.drop_back().getAsInteger(Radix, Value))
406 return ReturnError(TokStart, "invalid " + radixName(Radix) + " number");
407
408 // MSVC accepts and ignores type suffices on integer literals.
410
411 return intToken(Result, Value);
412 }
413
414 // default-radix integers, or floating point numbers, fall through
415 CurPtr = OldCurPtr;
416 }
417
418 // MASM default-radix integers: [0-9a-fA-F]+
419 // (All other integer literals have a radix specifier.)
421 CurPtr = findLastDigit(CurPtr, 16);
423
424 APInt Value(128, 0, true);
425 if (Result.getAsInteger(DefaultRadix, Value)) {
426 return ReturnError(TokStart,
427 "invalid " + radixName(DefaultRadix) + " number");
428 }
429
430 return intToken(Result, Value);
431 }
432
433 // Motorola hex integers: $[0-9a-fA-F]+
434 if (LexMotorolaIntegers && CurPtr[-1] == '$') {
435 const char *NumStart = CurPtr;
436 while (isHexDigit(CurPtr[0]))
437 ++CurPtr;
438
439 APInt Result(128, 0);
440 if (StringRef(NumStart, CurPtr - NumStart).getAsInteger(16, Result))
441 return ReturnError(TokStart, "invalid hexadecimal number");
442
443 return intToken(StringRef(TokStart, CurPtr - TokStart), Result);
444 }
445
446 // Motorola binary integers: %[01]+
447 if (LexMotorolaIntegers && CurPtr[-1] == '%') {
448 const char *NumStart = CurPtr;
449 while (*CurPtr == '0' || *CurPtr == '1')
450 ++CurPtr;
451
452 APInt Result(128, 0);
453 if (StringRef(NumStart, CurPtr - NumStart).getAsInteger(2, Result))
454 return ReturnError(TokStart, "invalid binary number");
455
456 return intToken(StringRef(TokStart, CurPtr - TokStart), Result);
457 }
458
459 // Decimal integer: [1-9][0-9]*
460 // HLASM-flavour decimal integer: [0-9][0-9]*
461 // FIXME: Later on, support for fb for HLASM has to be added in
462 // as they probably would be needed for asm goto
463 if (LexHLASMIntegers || CurPtr[-1] != '0' || CurPtr[0] == '.') {
464 unsigned Radix = doHexLookAhead(CurPtr, 10, LexMasmIntegers);
465
466 if (!LexHLASMIntegers) {
467 bool IsHex = Radix == 16;
468 // Check for floating point literals.
469 if (!IsHex && (*CurPtr == '.' || *CurPtr == 'e' || *CurPtr == 'E')) {
470 if (*CurPtr == '.')
471 ++CurPtr;
472 return LexFloatLiteral();
473 }
474 }
475
477
478 APInt Value(128, 0, true);
479 if (Result.getAsInteger(Radix, Value))
480 return ReturnError(TokStart, "invalid " + radixName(Radix) + " number");
481
482 if (!LexHLASMIntegers)
483 // The darwin/x86 (and x86-64) assembler accepts and ignores type
484 // suffices on integer literals.
486
487 return intToken(Result, Value);
488 }
489
490 if (!LexMasmIntegers && ((*CurPtr == 'b') || (*CurPtr == 'B'))) {
491 ++CurPtr;
492 // See if we actually have "0b" as part of something like "jmp 0b\n"
493 if (!isDigit(CurPtr[0])) {
494 --CurPtr;
496 return AsmToken(AsmToken::Integer, Result, 0);
497 }
498 const char *NumStart = CurPtr;
499 while (CurPtr[0] == '0' || CurPtr[0] == '1')
500 ++CurPtr;
501
502 // Requires at least one binary digit.
503 if (CurPtr == NumStart)
504 return ReturnError(TokStart, "invalid binary number");
505
507
508 APInt Value(128, 0, true);
509 if (Result.substr(2).getAsInteger(2, Value))
510 return ReturnError(TokStart, "invalid binary number");
511
512 // The darwin/x86 (and x86-64) assembler accepts and ignores ULL and LL
513 // suffixes on integer literals.
515
516 return intToken(Result, Value);
517 }
518
519 if ((*CurPtr == 'x') || (*CurPtr == 'X')) {
520 ++CurPtr;
521 const char *NumStart = CurPtr;
522 while (isHexDigit(CurPtr[0]))
523 ++CurPtr;
524
525 // "0x.0p0" is valid, and "0x0p0" (but not "0xp0" for example, which will be
526 // diagnosed by LexHexFloatLiteral).
527 if (CurPtr[0] == '.' || CurPtr[0] == 'p' || CurPtr[0] == 'P')
528 return LexHexFloatLiteral(NumStart == CurPtr);
529
530 // Otherwise requires at least one hex digit.
531 if (CurPtr == NumStart)
532 return ReturnError(CurPtr-2, "invalid hexadecimal number");
533
534 APInt Result(128, 0);
535 if (StringRef(TokStart, CurPtr - TokStart).getAsInteger(0, Result))
536 return ReturnError(TokStart, "invalid hexadecimal number");
537
538 // Consume the optional [hH].
539 if (LexMasmIntegers && (*CurPtr == 'h' || *CurPtr == 'H'))
540 ++CurPtr;
541
542 // The darwin/x86 (and x86-64) assembler accepts and ignores ULL and LL
543 // suffixes on integer literals.
545
546 return intToken(StringRef(TokStart, CurPtr - TokStart), Result);
547 }
548
549 // Either octal or hexadecimal.
550 APInt Value(128, 0, true);
551 unsigned Radix = doHexLookAhead(CurPtr, 8, LexMasmIntegers);
553 if (Result.getAsInteger(Radix, Value))
554 return ReturnError(TokStart, "invalid " + radixName(Radix) + " number");
555
556 // Consume the [hH].
557 if (Radix == 16)
558 ++CurPtr;
559
560 // The darwin/x86 (and x86-64) assembler accepts and ignores ULL and LL
561 // suffixes on integer literals.
563
564 return intToken(Result, Value);
565}
566
567/// LexSingleQuote: Integer: 'b'
568AsmToken AsmLexer::LexSingleQuote() {
569 int CurChar = getNextChar();
570
571 if (LexHLASMStrings)
572 return ReturnError(TokStart, "invalid usage of character literals");
573
574 if (LexMasmStrings) {
575 while (CurChar != EOF) {
576 if (CurChar != '\'') {
577 CurChar = getNextChar();
578 } else if (peekNextChar() == '\'') {
579 // In MASM single-quote strings, doubled single-quotes mean an escaped
580 // single quote, so should be lexed in.
581 (void)getNextChar();
582 CurChar = getNextChar();
583 } else {
584 break;
585 }
586 }
587 if (CurChar == EOF)
588 return ReturnError(TokStart, "unterminated string constant");
590 }
591
592 if (CurChar == '\\')
593 CurChar = getNextChar();
594
595 if (CurChar == EOF)
596 return ReturnError(TokStart, "unterminated single quote");
597
598 CurChar = getNextChar();
599
600 if (CurChar != '\'')
601 return ReturnError(TokStart, "single quote way too long");
602
603 // The idea here being that 'c' is basically just an integral
604 // constant.
605 StringRef Res = StringRef(TokStart,CurPtr - TokStart);
606 long long Value;
607
608 if (Res.startswith("\'\\")) {
609 char theChar = Res[2];
610 switch (theChar) {
611 default: Value = theChar; break;
612 case '\'': Value = '\''; break;
613 case 't': Value = '\t'; break;
614 case 'n': Value = '\n'; break;
615 case 'b': Value = '\b'; break;
616 case 'f': Value = '\f'; break;
617 case 'r': Value = '\r'; break;
618 }
619 } else
620 Value = TokStart[1];
621
622 return AsmToken(AsmToken::Integer, Res, Value);
623}
624
625/// LexQuote: String: "..."
626AsmToken AsmLexer::LexQuote() {
627 int CurChar = getNextChar();
628 if (LexHLASMStrings)
629 return ReturnError(TokStart, "invalid usage of string literals");
630
631 if (LexMasmStrings) {
632 while (CurChar != EOF) {
633 if (CurChar != '"') {
634 CurChar = getNextChar();
635 } else if (peekNextChar() == '"') {
636 // In MASM double-quoted strings, doubled double-quotes mean an escaped
637 // double quote, so should be lexed in.
638 (void)getNextChar();
639 CurChar = getNextChar();
640 } else {
641 break;
642 }
643 }
644 if (CurChar == EOF)
645 return ReturnError(TokStart, "unterminated string constant");
647 }
648
649 // TODO: does gas allow multiline string constants?
650 while (CurChar != '"') {
651 if (CurChar == '\\') {
652 // Allow \", etc.
653 CurChar = getNextChar();
654 }
655
656 if (CurChar == EOF)
657 return ReturnError(TokStart, "unterminated string constant");
658
659 CurChar = getNextChar();
660 }
661
663}
664
666 TokStart = CurPtr;
667
668 while (!isAtStartOfComment(CurPtr) && // Start of line comment.
669 !isAtStatementSeparator(CurPtr) && // End of statement marker.
670 *CurPtr != '\n' && *CurPtr != '\r' && CurPtr != CurBuf.end()) {
671 ++CurPtr;
672 }
673 return StringRef(TokStart, CurPtr-TokStart);
674}
675
676StringRef AsmLexer::LexUntilEndOfLine() {
677 TokStart = CurPtr;
678
679 while (*CurPtr != '\n' && *CurPtr != '\r' && CurPtr != CurBuf.end()) {
680 ++CurPtr;
681 }
682 return StringRef(TokStart, CurPtr-TokStart);
683}
684
686 bool ShouldSkipSpace) {
687 SaveAndRestore SavedTokenStart(TokStart);
688 SaveAndRestore SavedCurPtr(CurPtr);
689 SaveAndRestore SavedAtStartOfLine(IsAtStartOfLine);
690 SaveAndRestore SavedAtStartOfStatement(IsAtStartOfStatement);
691 SaveAndRestore SavedSkipSpace(SkipSpace, ShouldSkipSpace);
692 SaveAndRestore SavedIsPeeking(IsPeeking, true);
693 std::string SavedErr = getErr();
694 SMLoc SavedErrLoc = getErrLoc();
695
696 size_t ReadCount;
697 for (ReadCount = 0; ReadCount < Buf.size(); ++ReadCount) {
698 AsmToken Token = LexToken();
699
700 Buf[ReadCount] = Token;
701
702 if (Token.is(AsmToken::Eof))
703 break;
704 }
705
706 SetError(SavedErrLoc, SavedErr);
707 return ReadCount;
708}
709
710bool AsmLexer::isAtStartOfComment(const char *Ptr) {
711 if (MAI.getRestrictCommentStringToStartOfStatement() && !IsAtStartOfStatement)
712 return false;
713
714 StringRef CommentString = MAI.getCommentString();
715
716 if (CommentString.size() == 1)
717 return CommentString[0] == Ptr[0];
718
719 // Allow # preprocessor comments also be counted as comments for "##" cases
720 if (CommentString[1] == '#')
721 return CommentString[0] == Ptr[0];
722
723 return strncmp(Ptr, CommentString.data(), CommentString.size()) == 0;
724}
725
726bool AsmLexer::isAtStatementSeparator(const char *Ptr) {
727 return strncmp(Ptr, MAI.getSeparatorString(),
728 strlen(MAI.getSeparatorString())) == 0;
729}
730
732 TokStart = CurPtr;
733 // This always consumes at least one character.
734 int CurChar = getNextChar();
735
736 if (!IsPeeking && CurChar == '#' && IsAtStartOfStatement) {
737 // If this starts with a '#', this may be a cpp
738 // hash directive and otherwise a line comment.
739 AsmToken TokenBuf[2];
740 MutableArrayRef<AsmToken> Buf(TokenBuf, 2);
741 size_t num = peekTokens(Buf, true);
742 // There cannot be a space preceding this
743 if (IsAtStartOfLine && num == 2 && TokenBuf[0].is(AsmToken::Integer) &&
744 TokenBuf[1].is(AsmToken::String)) {
745 CurPtr = TokStart; // reset curPtr;
746 StringRef s = LexUntilEndOfLine();
747 UnLex(TokenBuf[1]);
748 UnLex(TokenBuf[0]);
750 }
751
753 return LexLineComment();
754 }
755
756 if (isAtStartOfComment(TokStart))
757 return LexLineComment();
758
759 if (isAtStatementSeparator(TokStart)) {
760 CurPtr += strlen(MAI.getSeparatorString()) - 1;
761 IsAtStartOfLine = true;
762 IsAtStartOfStatement = true;
764 StringRef(TokStart, strlen(MAI.getSeparatorString())));
765 }
766
767 // If we're missing a newline at EOF, make sure we still get an
768 // EndOfStatement token before the Eof token.
769 if (CurChar == EOF && !IsAtStartOfStatement && EndStatementAtEOF) {
770 IsAtStartOfLine = true;
771 IsAtStartOfStatement = true;
773 }
774 IsAtStartOfLine = false;
775 bool OldIsAtStartOfStatement = IsAtStartOfStatement;
776 IsAtStartOfStatement = false;
777 switch (CurChar) {
778 default:
779 // Handle identifier: [a-zA-Z_.$@#?][a-zA-Z0-9_.$@#?]*
780 // Whether or not the lexer accepts '$', '@', '#' and '?' at the start of
781 // an identifier is target-dependent. These characters are handled in the
782 // respective switch cases.
783 if (isalpha(CurChar) || CurChar == '_' || CurChar == '.')
784 return LexIdentifier();
785
786 // Unknown character, emit an error.
787 return ReturnError(TokStart, "invalid character in input");
788 case EOF:
789 if (EndStatementAtEOF) {
790 IsAtStartOfLine = true;
791 IsAtStartOfStatement = true;
792 }
794 case 0:
795 case ' ':
796 case '\t':
797 IsAtStartOfStatement = OldIsAtStartOfStatement;
798 while (*CurPtr == ' ' || *CurPtr == '\t')
799 CurPtr++;
800 if (SkipSpace)
801 return LexToken(); // Ignore whitespace.
802 else
804 case '\r': {
805 IsAtStartOfLine = true;
806 IsAtStartOfStatement = true;
807 // If this is a CR followed by LF, treat that as one token.
808 if (CurPtr != CurBuf.end() && *CurPtr == '\n')
809 ++CurPtr;
811 StringRef(TokStart, CurPtr - TokStart));
812 }
813 case '\n':
814 IsAtStartOfLine = true;
815 IsAtStartOfStatement = true;
817 case ':': return AsmToken(AsmToken::Colon, StringRef(TokStart, 1));
818 case '+': return AsmToken(AsmToken::Plus, StringRef(TokStart, 1));
819 case '~': return AsmToken(AsmToken::Tilde, StringRef(TokStart, 1));
820 case '(': return AsmToken(AsmToken::LParen, StringRef(TokStart, 1));
821 case ')': return AsmToken(AsmToken::RParen, StringRef(TokStart, 1));
822 case '[': return AsmToken(AsmToken::LBrac, StringRef(TokStart, 1));
823 case ']': return AsmToken(AsmToken::RBrac, StringRef(TokStart, 1));
824 case '{': return AsmToken(AsmToken::LCurly, StringRef(TokStart, 1));
825 case '}': return AsmToken(AsmToken::RCurly, StringRef(TokStart, 1));
826 case '*': return AsmToken(AsmToken::Star, StringRef(TokStart, 1));
827 case ',': return AsmToken(AsmToken::Comma, StringRef(TokStart, 1));
828 case '$': {
829 if (LexMotorolaIntegers && isHexDigit(*CurPtr))
830 return LexDigit();
832 return LexIdentifier();
834 }
835 case '@':
837 return LexIdentifier();
839 case '#':
841 return LexIdentifier();
843 case '?':
845 return LexIdentifier();
847 case '\\': return AsmToken(AsmToken::BackSlash, StringRef(TokStart, 1));
848 case '=':
849 if (*CurPtr == '=') {
850 ++CurPtr;
852 }
854 case '-':
855 if (*CurPtr == '>') {
856 ++CurPtr;
858 }
860 case '|':
861 if (*CurPtr == '|') {
862 ++CurPtr;
864 }
866 case '^': return AsmToken(AsmToken::Caret, StringRef(TokStart, 1));
867 case '&':
868 if (*CurPtr == '&') {
869 ++CurPtr;
871 }
873 case '!':
874 if (*CurPtr == '=') {
875 ++CurPtr;
877 }
879 case '%':
880 if (LexMotorolaIntegers && (*CurPtr == '0' || *CurPtr == '1')) {
881 return LexDigit();
882 }
883
884 if (MAI.hasMipsExpressions()) {
886 unsigned OperatorLength;
887
888 std::tie(Operator, OperatorLength) =
890 StringRef(CurPtr))
891 .StartsWith("call16", {AsmToken::PercentCall16, 7})
892 .StartsWith("call_hi", {AsmToken::PercentCall_Hi, 8})
893 .StartsWith("call_lo", {AsmToken::PercentCall_Lo, 8})
894 .StartsWith("dtprel_hi", {AsmToken::PercentDtprel_Hi, 10})
895 .StartsWith("dtprel_lo", {AsmToken::PercentDtprel_Lo, 10})
896 .StartsWith("got_disp", {AsmToken::PercentGot_Disp, 9})
897 .StartsWith("got_hi", {AsmToken::PercentGot_Hi, 7})
898 .StartsWith("got_lo", {AsmToken::PercentGot_Lo, 7})
899 .StartsWith("got_ofst", {AsmToken::PercentGot_Ofst, 9})
900 .StartsWith("got_page", {AsmToken::PercentGot_Page, 9})
901 .StartsWith("gottprel", {AsmToken::PercentGottprel, 9})
902 .StartsWith("got", {AsmToken::PercentGot, 4})
903 .StartsWith("gp_rel", {AsmToken::PercentGp_Rel, 7})
904 .StartsWith("higher", {AsmToken::PercentHigher, 7})
905 .StartsWith("highest", {AsmToken::PercentHighest, 8})
906 .StartsWith("hi", {AsmToken::PercentHi, 3})
907 .StartsWith("lo", {AsmToken::PercentLo, 3})
908 .StartsWith("neg", {AsmToken::PercentNeg, 4})
909 .StartsWith("pcrel_hi", {AsmToken::PercentPcrel_Hi, 9})
910 .StartsWith("pcrel_lo", {AsmToken::PercentPcrel_Lo, 9})
911 .StartsWith("tlsgd", {AsmToken::PercentTlsgd, 6})
912 .StartsWith("tlsldm", {AsmToken::PercentTlsldm, 7})
913 .StartsWith("tprel_hi", {AsmToken::PercentTprel_Hi, 9})
914 .StartsWith("tprel_lo", {AsmToken::PercentTprel_Lo, 9})
916
918 CurPtr += OperatorLength - 1;
919 return AsmToken(Operator, StringRef(TokStart, OperatorLength));
920 }
921 }
923 case '/':
924 IsAtStartOfStatement = OldIsAtStartOfStatement;
925 return LexSlash();
926 case '\'': return LexSingleQuote();
927 case '"': return LexQuote();
928 case '0': case '1': case '2': case '3': case '4':
929 case '5': case '6': case '7': case '8': case '9':
930 return LexDigit();
931 case '<':
932 switch (*CurPtr) {
933 case '<':
934 ++CurPtr;
936 case '=':
937 ++CurPtr;
939 case '>':
940 ++CurPtr;
942 default:
944 }
945 case '>':
946 switch (*CurPtr) {
947 case '>':
948 ++CurPtr;
950 case '=':
951 ++CurPtr;
953 default:
955 }
956
957 // TODO: Quoted identifiers (objc methods etc)
958 // local labels: [0-9][:]
959 // Forward/backward labels: [0-9][fb]
960 // Integers, fp constants, character constants.
961 }
962}
This file implements a class to represent arbitrary precision integral constant values and operations...
static std::string radixName(unsigned Radix)
Definition: AsmLexer.cpp:303
static void SkipIgnoredIntegerSuffix(const char *&CurPtr)
Definition: AsmLexer.cpp:253
static unsigned doHexLookAhead(const char *&CurPtr, unsigned DefaultRadix, bool LexHex)
Definition: AsmLexer.cpp:265
static AsmToken intToken(StringRef Ref, APInt &Value)
Definition: AsmLexer.cpp:297
static const char * findLastDigit(const char *CurPtr, unsigned DefaultRadix)
Definition: AsmLexer.cpp:290
static bool isIdentifierChar(char C)
Return true if the given character satisfies the following regular expression: [-a-zA-Z$....
Definition: MILexer.cpp:118
static bool isDigit(const char C)
static bool isHexDigit(const char C)
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
This file provides utility classes that use RAII to save and restore values.
This file contains some functions that are useful when dealing with strings.
This file implements the StringSwitch template, which mimics a switch() statement whose cases are str...
Class for arbitrary precision integers.
Definition: APInt.h:76
size_t size() const
size - Get the array size.
Definition: ArrayRef.h:165
virtual void HandleComment(SMLoc Loc, StringRef CommentText)=0
Callback function for when a comment is lexed.
~AsmLexer() override
size_t peekTokens(MutableArrayRef< AsmToken > Buf, bool ShouldSkipSpace=true) override
Look ahead an arbitrary number of tokens.
Definition: AsmLexer.cpp:685
AsmLexer(const MCAsmInfo &MAI)
Definition: AsmLexer.cpp:34
StringRef LexUntilEndOfStatement() override
Definition: AsmLexer.cpp:665
void setBuffer(StringRef Buf, const char *ptr=nullptr, bool EndStatementAtEOF=true)
Definition: AsmLexer.cpp:41
AsmToken LexToken() override
LexToken - Read the next token and return its code.
Definition: AsmLexer.cpp:731
Target independent representation for an assembler token.
Definition: MCAsmMacro.h:21
bool is(TokenKind K) const
Definition: MCAsmMacro.h:82
This class is intended to be used as a base class for asm properties and features specific to the tar...
Definition: MCAsmInfo.h:56
bool getRestrictCommentStringToStartOfStatement() const
Definition: MCAsmInfo.h:656
bool doesAllowDollarAtStartOfIdentifier() const
Definition: MCAsmInfo.h:695
bool hasMipsExpressions() const
Definition: MCAsmInfo.h:883
bool shouldUseMotorolaIntegers() const
Definition: MCAsmInfo.h:885
StringRef getCommentString() const
Definition: MCAsmInfo.h:655
const char * getSeparatorString() const
Definition: MCAsmInfo.h:649
bool doesAllowAtAtStartOfIdentifier() const
Definition: MCAsmInfo.h:692
bool doesAllowHashAtStartOfIdentifier() const
Definition: MCAsmInfo.h:698
bool shouldAllowAdditionalComments() const
Definition: MCAsmInfo.h:659
bool doesAllowQuestionAtStartOfIdentifier() const
Definition: MCAsmInfo.h:689
void UnLex(AsmToken const &Token)
Definition: MCAsmLexer.h:93
bool LexMasmHexFloats
Definition: MCAsmLexer.h:51
bool LexHLASMStrings
Definition: MCAsmLexer.h:58
bool UseMasmDefaultRadix
Definition: MCAsmLexer.h:55
bool LexHLASMIntegers
Definition: MCAsmLexer.h:57
AsmCommentConsumer * CommentConsumer
Definition: MCAsmLexer.h:59
bool LexMasmIntegers
Definition: MCAsmLexer.h:52
bool LexMotorolaIntegers
Definition: MCAsmLexer.h:54
SMLoc getErrLoc()
Get the current error location.
Definition: MCAsmLexer.h:128
const char * TokStart
Definition: MCAsmLexer.h:46
unsigned DefaultRadix
Definition: MCAsmLexer.h:56
bool AllowAtInIdentifier
Definition: MCAsmLexer.h:48
bool is(AsmToken::TokenKind K) const
Check if the current token has kind K.
Definition: MCAsmLexer.h:141
void SetError(SMLoc errLoc, const std::string &err)
Definition: MCAsmLexer.h:65
bool AllowHashInIdentifier
Definition: MCAsmLexer.h:49
const std::string & getErr()
Get the current error string.
Definition: MCAsmLexer.h:133
MutableArrayRef - Represent a mutable reference to an array (0 or more elements consecutively in memo...
Definition: ArrayRef.h:307
This is a utility class that provides an abstraction for the common functionality between Instruction...
Definition: Operator.h:31
Represents a location in source code.
Definition: SMLoc.h:23
static SMLoc getFromPointer(const char *Ptr)
Definition: SMLoc.h:36
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:50
iterator begin() const
Definition: StringRef.h:111
constexpr size_t size() const
size - Get the string size.
Definition: StringRef.h:137
bool startswith(StringRef Prefix) const
Definition: StringRef.h:261
iterator end() const
Definition: StringRef.h:113
const char * data() const
data - Get a pointer to the start of the string (which may not be null terminated).
Definition: StringRef.h:131
A switch()-like statement whose cases are string literals.
Definition: StringSwitch.h:44
LLVM Value Representation.
Definition: Value.h:74
@ C
The default llvm calling convention, compatible with C.
Definition: CallingConv.h:34
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
@ Ref
The access may reference the value stored in memory.
@ Default
The result values are uniform if and only if all operands are uniform.
A utility class that uses RAII to save and restore the value of a variable.