LLVM  13.0.0git
AsmLexer.cpp
Go to the documentation of this file.
1 //===- AsmLexer.cpp - Lexer for Assembly Files ----------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This class implements the lexer for assembly files.
10 //
11 //===----------------------------------------------------------------------===//
12 
14 #include "llvm/ADT/APInt.h"
15 #include "llvm/ADT/ArrayRef.h"
16 #include "llvm/ADT/StringExtras.h"
17 #include "llvm/ADT/StringRef.h"
18 #include "llvm/ADT/StringSwitch.h"
19 #include "llvm/MC/MCAsmInfo.h"
21 #include "llvm/Support/Compiler.h"
22 #include "llvm/Support/SMLoc.h"
24 #include <cassert>
25 #include <cctype>
26 #include <cstdio>
27 #include <cstring>
28 #include <string>
29 #include <tuple>
30 #include <utility>
31 
32 using namespace llvm;
33 
34 AsmLexer::AsmLexer(const MCAsmInfo &MAI) : MAI(MAI) {
36 }
37 
38 AsmLexer::~AsmLexer() = default;
39 
40 void AsmLexer::setBuffer(StringRef Buf, const char *ptr,
41  bool EndStatementAtEOF) {
42  CurBuf = Buf;
43 
44  if (ptr)
45  CurPtr = ptr;
46  else
47  CurPtr = CurBuf.begin();
48 
49  TokStart = nullptr;
50  this->EndStatementAtEOF = EndStatementAtEOF;
51 }
52 
53 /// ReturnError - Set the error to the specified string at the specified
54 /// location. This is defined to always return AsmToken::Error.
55 AsmToken AsmLexer::ReturnError(const char *Loc, const std::string &Msg) {
57 
58  return AsmToken(AsmToken::Error, StringRef(Loc, CurPtr - Loc));
59 }
60 
61 int AsmLexer::getNextChar() {
62  if (CurPtr == CurBuf.end())
63  return EOF;
64  return (unsigned char)*CurPtr++;
65 }
66 
67 int AsmLexer::peekNextChar() {
68  if (CurPtr == CurBuf.end())
69  return EOF;
70  return (unsigned char)*CurPtr;
71 }
72 
73 /// The leading integral digit sequence and dot should have already been
74 /// consumed, some or all of the fractional digit sequence *can* have been
75 /// consumed.
76 AsmToken AsmLexer::LexFloatLiteral() {
77  // Skip the fractional digit sequence.
78  while (isDigit(*CurPtr))
79  ++CurPtr;
80 
81  if (*CurPtr == '-' || *CurPtr == '+')
82  return ReturnError(CurPtr, "Invalid sign in float literal");
83 
84  // Check for exponent
85  if ((*CurPtr == 'e' || *CurPtr == 'E')) {
86  ++CurPtr;
87 
88  if (*CurPtr == '-' || *CurPtr == '+')
89  ++CurPtr;
90 
91  while (isDigit(*CurPtr))
92  ++CurPtr;
93  }
94 
95  return AsmToken(AsmToken::Real,
96  StringRef(TokStart, CurPtr - TokStart));
97 }
98 
99 /// LexHexFloatLiteral matches essentially (.[0-9a-fA-F]*)?[pP][+-]?[0-9a-fA-F]+
100 /// while making sure there are enough actual digits around for the constant to
101 /// be valid.
102 ///
103 /// The leading "0x[0-9a-fA-F]*" (i.e. integer part) has already been consumed
104 /// before we get here.
105 AsmToken AsmLexer::LexHexFloatLiteral(bool NoIntDigits) {
106  assert((*CurPtr == 'p' || *CurPtr == 'P' || *CurPtr == '.') &&
107  "unexpected parse state in floating hex");
108  bool NoFracDigits = true;
109 
110  // Skip the fractional part if there is one
111  if (*CurPtr == '.') {
112  ++CurPtr;
113 
114  const char *FracStart = CurPtr;
115  while (isHexDigit(*CurPtr))
116  ++CurPtr;
117 
118  NoFracDigits = CurPtr == FracStart;
119  }
120 
121  if (NoIntDigits && NoFracDigits)
122  return ReturnError(TokStart, "invalid hexadecimal floating-point constant: "
123  "expected at least one significand digit");
124 
125  // Make sure we do have some kind of proper exponent part
126  if (*CurPtr != 'p' && *CurPtr != 'P')
127  return ReturnError(TokStart, "invalid hexadecimal floating-point constant: "
128  "expected exponent part 'p'");
129  ++CurPtr;
130 
131  if (*CurPtr == '+' || *CurPtr == '-')
132  ++CurPtr;
133 
134  // N.b. exponent digits are *not* hex
135  const char *ExpStart = CurPtr;
136  while (isDigit(*CurPtr))
137  ++CurPtr;
138 
139  if (CurPtr == ExpStart)
140  return ReturnError(TokStart, "invalid hexadecimal floating-point constant: "
141  "expected at least one exponent digit");
142 
143  return AsmToken(AsmToken::Real, StringRef(TokStart, CurPtr - TokStart));
144 }
145 
146 /// LexIdentifier: [a-zA-Z_.][a-zA-Z0-9_$.@?]*
147 static bool IsIdentifierChar(char c, bool AllowAt) {
148  return isAlnum(c) || c == '_' || c == '$' || c == '.' ||
149  (c == '@' && AllowAt) || c == '?';
150 }
151 
152 AsmToken AsmLexer::LexIdentifier() {
153  // Check for floating point literals.
154  if (CurPtr[-1] == '.' && isDigit(*CurPtr)) {
155  // Disambiguate a .1243foo identifier from a floating literal.
156  while (isDigit(*CurPtr))
157  ++CurPtr;
158 
159  if (!IsIdentifierChar(*CurPtr, AllowAtInIdentifier) ||
160  *CurPtr == 'e' || *CurPtr == 'E')
161  return LexFloatLiteral();
162  }
163 
164  while (IsIdentifierChar(*CurPtr, AllowAtInIdentifier))
165  ++CurPtr;
166 
167  // Handle . as a special case.
168  if (CurPtr == TokStart+1 && TokStart[0] == '.')
170 
172 }
173 
174 /// LexSlash: Slash: /
175 /// C-Style Comment: /* ... */
176 AsmToken AsmLexer::LexSlash() {
177  switch (*CurPtr) {
178  case '*':
179  IsAtStartOfStatement = false;
180  break; // C style comment.
181  case '/':
182  ++CurPtr;
183  return LexLineComment();
184  default:
185  IsAtStartOfStatement = false;
187  }
188 
189  // C Style comment.
190  ++CurPtr; // skip the star.
191  const char *CommentTextStart = CurPtr;
192  while (CurPtr != CurBuf.end()) {
193  switch (*CurPtr++) {
194  case '*':
195  // End of the comment?
196  if (*CurPtr != '/')
197  break;
198  // If we have a CommentConsumer, notify it about the comment.
199  if (CommentConsumer) {
201  SMLoc::getFromPointer(CommentTextStart),
202  StringRef(CommentTextStart, CurPtr - 1 - CommentTextStart));
203  }
204  ++CurPtr; // End the */.
206  StringRef(TokStart, CurPtr - TokStart));
207  }
208  }
209  return ReturnError(TokStart, "unterminated comment");
210 }
211 
212 /// LexLineComment: Comment: #[^\n]*
213 /// : //[^\n]*
214 AsmToken AsmLexer::LexLineComment() {
215  // Mark This as an end of statement with a body of the
216  // comment. While it would be nicer to leave this two tokens,
217  // backwards compatability with TargetParsers makes keeping this in this form
218  // better.
219  const char *CommentTextStart = CurPtr;
220  int CurChar = getNextChar();
221  while (CurChar != '\n' && CurChar != '\r' && CurChar != EOF)
222  CurChar = getNextChar();
223  if (CurChar == '\r' && CurPtr != CurBuf.end() && *CurPtr == '\n')
224  ++CurPtr;
225 
226  // If we have a CommentConsumer, notify it about the comment.
227  if (CommentConsumer) {
229  SMLoc::getFromPointer(CommentTextStart),
230  StringRef(CommentTextStart, CurPtr - 1 - CommentTextStart));
231  }
232 
233  IsAtStartOfLine = true;
234  // This is a whole line comment. leave newline
235  if (IsAtStartOfStatement)
237  StringRef(TokStart, CurPtr - TokStart));
238  IsAtStartOfStatement = true;
239 
241  StringRef(TokStart, CurPtr - 1 - TokStart));
242 }
243 
244 static void SkipIgnoredIntegerSuffix(const char *&CurPtr) {
245  // Skip ULL, UL, U, L and LL suffices.
246  if (CurPtr[0] == 'U')
247  ++CurPtr;
248  if (CurPtr[0] == 'L')
249  ++CurPtr;
250  if (CurPtr[0] == 'L')
251  ++CurPtr;
252 }
253 
254 // Look ahead to search for first non-hex digit, if it's [hH], then we treat the
255 // integer as a hexadecimal, possibly with leading zeroes.
256 static unsigned doHexLookAhead(const char *&CurPtr, unsigned DefaultRadix,
257  bool LexHex) {
258  const char *FirstNonDec = nullptr;
259  const char *LookAhead = CurPtr;
260  while (true) {
261  if (isDigit(*LookAhead)) {
262  ++LookAhead;
263  } else {
264  if (!FirstNonDec)
265  FirstNonDec = LookAhead;
266 
267  // Keep going if we are looking for a 'h' suffix.
268  if (LexHex && isHexDigit(*LookAhead))
269  ++LookAhead;
270  else
271  break;
272  }
273  }
274  bool isHex = LexHex && (*LookAhead == 'h' || *LookAhead == 'H');
275  CurPtr = isHex || !FirstNonDec ? LookAhead : FirstNonDec;
276  if (isHex)
277  return 16;
278  return DefaultRadix;
279 }
280 
281 static const char *findLastDigit(const char *CurPtr, unsigned DefaultRadix) {
282  while (hexDigitValue(*CurPtr) < DefaultRadix) {
283  ++CurPtr;
284  }
285  return CurPtr;
286 }
287 
289  if (Value.isIntN(64))
292 }
293 
294 static std::string radixName(unsigned Radix) {
295  switch (Radix) {
296  case 2:
297  return "binary";
298  case 8:
299  return "octal";
300  case 10:
301  return "decimal";
302  case 16:
303  return "hexadecimal";
304  default:
305  return "base-" + std::to_string(Radix);
306  }
307 }
308 
309 /// LexDigit: First character is [0-9].
310 /// Local Label: [0-9][:]
311 /// Forward/Backward Label: [0-9][fb]
312 /// Binary integer: 0b[01]+
313 /// Octal integer: 0[0-7]+
314 /// Hex integer: 0x[0-9a-fA-F]+ or [0x]?[0-9][0-9a-fA-F]*[hH]
315 /// Decimal integer: [1-9][0-9]*
316 AsmToken AsmLexer::LexDigit() {
317  // MASM-flavor binary integer: [01]+[yY] (if DefaultRadix < 16, [bByY])
318  // MASM-flavor octal integer: [0-7]+[oOqQ]
319  // MASM-flavor decimal integer: [0-9]+[tT] (if DefaultRadix < 16, [dDtT])
320  // MASM-flavor hexadecimal integer: [0-9][0-9a-fA-F]*[hH]
321  if (LexMasmIntegers && isdigit(CurPtr[-1])) {
322  const char *FirstNonBinary =
323  (CurPtr[-1] != '0' && CurPtr[-1] != '1') ? CurPtr - 1 : nullptr;
324  const char *FirstNonDecimal =
325  (CurPtr[-1] < '0' || CurPtr[-1] > '9') ? CurPtr - 1 : nullptr;
326  const char *OldCurPtr = CurPtr;
327  while (isHexDigit(*CurPtr)) {
328  switch (*CurPtr) {
329  default:
330  if (!FirstNonDecimal) {
331  FirstNonDecimal = CurPtr;
332  }
334  case '9':
335  case '8':
336  case '7':
337  case '6':
338  case '5':
339  case '4':
340  case '3':
341  case '2':
342  if (!FirstNonBinary) {
343  FirstNonBinary = CurPtr;
344  }
345  break;
346  case '1':
347  case '0':
348  break;
349  }
350  ++CurPtr;
351  }
352  if (*CurPtr == '.') {
353  // MASM float literals (other than hex floats) always contain a ".", and
354  // are always written in decimal.
355  ++CurPtr;
356  return LexFloatLiteral();
357  }
358 
359  if (LexMasmHexFloats && (*CurPtr == 'r' || *CurPtr == 'R')) {
360  ++CurPtr;
361  return AsmToken(AsmToken::Real, StringRef(TokStart, CurPtr - TokStart));
362  }
363 
364  unsigned Radix = 0;
365  if (*CurPtr == 'h' || *CurPtr == 'H') {
366  // hexadecimal number
367  ++CurPtr;
368  Radix = 16;
369  } else if (*CurPtr == 't' || *CurPtr == 'T') {
370  // decimal number
371  ++CurPtr;
372  Radix = 10;
373  } else if (*CurPtr == 'o' || *CurPtr == 'O' || *CurPtr == 'q' ||
374  *CurPtr == 'Q') {
375  // octal number
376  ++CurPtr;
377  Radix = 8;
378  } else if (*CurPtr == 'y' || *CurPtr == 'Y') {
379  // binary number
380  ++CurPtr;
381  Radix = 2;
382  } else if (FirstNonDecimal && FirstNonDecimal + 1 == CurPtr &&
383  DefaultRadix < 14 &&
384  (*FirstNonDecimal == 'd' || *FirstNonDecimal == 'D')) {
385  Radix = 10;
386  } else if (FirstNonBinary && FirstNonBinary + 1 == CurPtr &&
387  DefaultRadix < 12 &&
388  (*FirstNonBinary == 'b' || *FirstNonBinary == 'B')) {
389  Radix = 2;
390  }
391 
392  if (Radix) {
393  StringRef Result(TokStart, CurPtr - TokStart);
394  APInt Value(128, 0, true);
395 
396  if (Result.drop_back().getAsInteger(Radix, Value))
397  return ReturnError(TokStart, "invalid " + radixName(Radix) + " number");
398 
399  // MSVC accepts and ignores type suffices on integer literals.
400  SkipIgnoredIntegerSuffix(CurPtr);
401 
402  return intToken(Result, Value);
403  }
404 
405  // default-radix integers, or floating point numbers, fall through
406  CurPtr = OldCurPtr;
407  }
408 
409  // MASM default-radix integers: [0-9a-fA-F]+
410  // (All other integer literals have a radix specifier.)
412  CurPtr = findLastDigit(CurPtr, 16);
413  StringRef Result(TokStart, CurPtr - TokStart);
414 
415  APInt Value(128, 0, true);
416  if (Result.getAsInteger(DefaultRadix, Value)) {
417  return ReturnError(TokStart,
418  "invalid " + radixName(DefaultRadix) + " number");
419  }
420 
421  return intToken(Result, Value);
422  }
423 
424  // Decimal integer: [1-9][0-9]*
425  if (CurPtr[-1] != '0' || CurPtr[0] == '.') {
426  unsigned Radix = doHexLookAhead(CurPtr, 10, LexMasmIntegers);
427  bool isHex = Radix == 16;
428  // Check for floating point literals.
429  if (!isHex && (*CurPtr == '.' || *CurPtr == 'e' || *CurPtr == 'E')) {
430  if (*CurPtr == '.')
431  ++CurPtr;
432  return LexFloatLiteral();
433  }
434 
435  StringRef Result(TokStart, CurPtr - TokStart);
436 
437  APInt Value(128, 0, true);
438  if (Result.getAsInteger(Radix, Value)) {
439  return ReturnError(TokStart, "invalid " + radixName(Radix) + " number");
440  }
441 
442  // The darwin/x86 (and x86-64) assembler accepts and ignores type
443  // suffices on integer literals.
444  SkipIgnoredIntegerSuffix(CurPtr);
445 
446  return intToken(Result, Value);
447  }
448 
449  if (!LexMasmIntegers && ((*CurPtr == 'b') || (*CurPtr == 'B'))) {
450  ++CurPtr;
451  // See if we actually have "0b" as part of something like "jmp 0b\n"
452  if (!isDigit(CurPtr[0])) {
453  --CurPtr;
454  StringRef Result(TokStart, CurPtr - TokStart);
455  return AsmToken(AsmToken::Integer, Result, 0);
456  }
457  const char *NumStart = CurPtr;
458  while (CurPtr[0] == '0' || CurPtr[0] == '1')
459  ++CurPtr;
460 
461  // Requires at least one binary digit.
462  if (CurPtr == NumStart)
463  return ReturnError(TokStart, "invalid binary number");
464 
465  StringRef Result(TokStart, CurPtr - TokStart);
466 
467  APInt Value(128, 0, true);
468  if (Result.substr(2).getAsInteger(2, Value))
469  return ReturnError(TokStart, "invalid binary number");
470 
471  // The darwin/x86 (and x86-64) assembler accepts and ignores ULL and LL
472  // suffixes on integer literals.
473  SkipIgnoredIntegerSuffix(CurPtr);
474 
475  return intToken(Result, Value);
476  }
477 
478  if ((*CurPtr == 'x') || (*CurPtr == 'X')) {
479  ++CurPtr;
480  const char *NumStart = CurPtr;
481  while (isHexDigit(CurPtr[0]))
482  ++CurPtr;
483 
484  // "0x.0p0" is valid, and "0x0p0" (but not "0xp0" for example, which will be
485  // diagnosed by LexHexFloatLiteral).
486  if (CurPtr[0] == '.' || CurPtr[0] == 'p' || CurPtr[0] == 'P')
487  return LexHexFloatLiteral(NumStart == CurPtr);
488 
489  // Otherwise requires at least one hex digit.
490  if (CurPtr == NumStart)
491  return ReturnError(CurPtr-2, "invalid hexadecimal number");
492 
493  APInt Result(128, 0);
494  if (StringRef(TokStart, CurPtr - TokStart).getAsInteger(0, Result))
495  return ReturnError(TokStart, "invalid hexadecimal number");
496 
497  // Consume the optional [hH].
498  if (LexMasmIntegers && (*CurPtr == 'h' || *CurPtr == 'H'))
499  ++CurPtr;
500 
501  // The darwin/x86 (and x86-64) assembler accepts and ignores ULL and LL
502  // suffixes on integer literals.
503  SkipIgnoredIntegerSuffix(CurPtr);
504 
505  return intToken(StringRef(TokStart, CurPtr - TokStart), Result);
506  }
507 
508  // Either octal or hexadecimal.
509  APInt Value(128, 0, true);
510  unsigned Radix = doHexLookAhead(CurPtr, 8, LexMasmIntegers);
511  StringRef Result(TokStart, CurPtr - TokStart);
512  if (Result.getAsInteger(Radix, Value))
513  return ReturnError(TokStart, "invalid " + radixName(Radix) + " number");
514 
515  // Consume the [hH].
516  if (Radix == 16)
517  ++CurPtr;
518 
519  // The darwin/x86 (and x86-64) assembler accepts and ignores ULL and LL
520  // suffixes on integer literals.
521  SkipIgnoredIntegerSuffix(CurPtr);
522 
523  return intToken(Result, Value);
524 }
525 
526 /// LexSingleQuote: Integer: 'b'
527 AsmToken AsmLexer::LexSingleQuote() {
528  int CurChar = getNextChar();
529 
530  if (LexMasmStrings) {
531  while (CurChar != EOF) {
532  if (CurChar != '\'') {
533  CurChar = getNextChar();
534  } else if (peekNextChar() == '\'') {
535  // In MASM single-quote strings, doubled single-quotes mean an escaped
536  // single quote, so should be lexed in.
537  getNextChar();
538  CurChar = getNextChar();
539  } else {
540  break;
541  }
542  }
543  if (CurChar == EOF)
544  return ReturnError(TokStart, "unterminated string constant");
546  }
547 
548  if (CurChar == '\\')
549  CurChar = getNextChar();
550 
551  if (CurChar == EOF)
552  return ReturnError(TokStart, "unterminated single quote");
553 
554  CurChar = getNextChar();
555 
556  if (CurChar != '\'')
557  return ReturnError(TokStart, "single quote way too long");
558 
559  // The idea here being that 'c' is basically just an integral
560  // constant.
561  StringRef Res = StringRef(TokStart,CurPtr - TokStart);
562  long long Value;
563 
564  if (Res.startswith("\'\\")) {
565  char theChar = Res[2];
566  switch (theChar) {
567  default: Value = theChar; break;
568  case '\'': Value = '\''; break;
569  case 't': Value = '\t'; break;
570  case 'n': Value = '\n'; break;
571  case 'b': Value = '\b'; break;
572  }
573  } else
574  Value = TokStart[1];
575 
576  return AsmToken(AsmToken::Integer, Res, Value);
577 }
578 
579 /// LexQuote: String: "..."
580 AsmToken AsmLexer::LexQuote() {
581  int CurChar = getNextChar();
582  if (LexMasmStrings) {
583  while (CurChar != EOF) {
584  if (CurChar != '"') {
585  CurChar = getNextChar();
586  } else if (peekNextChar() == '"') {
587  // In MASM double-quoted strings, doubled double-quotes mean an escaped
588  // double quote, so should be lexed in.
589  getNextChar();
590  CurChar = getNextChar();
591  } else {
592  break;
593  }
594  }
595  if (CurChar == EOF)
596  return ReturnError(TokStart, "unterminated string constant");
598  }
599 
600  // TODO: does gas allow multiline string constants?
601  while (CurChar != '"') {
602  if (CurChar == '\\') {
603  // Allow \", etc.
604  CurChar = getNextChar();
605  }
606 
607  if (CurChar == EOF)
608  return ReturnError(TokStart, "unterminated string constant");
609 
610  CurChar = getNextChar();
611  }
612 
614 }
615 
617  TokStart = CurPtr;
618 
619  while (!isAtStartOfComment(CurPtr) && // Start of line comment.
620  !isAtStatementSeparator(CurPtr) && // End of statement marker.
621  *CurPtr != '\n' && *CurPtr != '\r' && CurPtr != CurBuf.end()) {
622  ++CurPtr;
623  }
624  return StringRef(TokStart, CurPtr-TokStart);
625 }
626 
627 StringRef AsmLexer::LexUntilEndOfLine() {
628  TokStart = CurPtr;
629 
630  while (*CurPtr != '\n' && *CurPtr != '\r' && CurPtr != CurBuf.end()) {
631  ++CurPtr;
632  }
633  return StringRef(TokStart, CurPtr-TokStart);
634 }
635 
637  bool ShouldSkipSpace) {
638  SaveAndRestore<const char *> SavedTokenStart(TokStart);
639  SaveAndRestore<const char *> SavedCurPtr(CurPtr);
640  SaveAndRestore<bool> SavedAtStartOfLine(IsAtStartOfLine);
641  SaveAndRestore<bool> SavedAtStartOfStatement(IsAtStartOfStatement);
642  SaveAndRestore<bool> SavedSkipSpace(SkipSpace, ShouldSkipSpace);
643  SaveAndRestore<bool> SavedIsPeeking(IsPeeking, true);
644  std::string SavedErr = getErr();
645  SMLoc SavedErrLoc = getErrLoc();
646 
647  size_t ReadCount;
648  for (ReadCount = 0; ReadCount < Buf.size(); ++ReadCount) {
649  AsmToken Token = LexToken();
650 
651  Buf[ReadCount] = Token;
652 
653  if (Token.is(AsmToken::Eof))
654  break;
655  }
656 
657  SetError(SavedErrLoc, SavedErr);
658  return ReadCount;
659 }
660 
661 bool AsmLexer::isAtStartOfComment(const char *Ptr) {
662  StringRef CommentString = MAI.getCommentString();
663 
664  if (CommentString.size() == 1)
665  return CommentString[0] == Ptr[0];
666 
667  // Allow # preprocessor commments also be counted as comments for "##" cases
668  if (CommentString[1] == '#')
669  return CommentString[0] == Ptr[0];
670 
671  return strncmp(Ptr, CommentString.data(), CommentString.size()) == 0;
672 }
673 
674 bool AsmLexer::isAtStatementSeparator(const char *Ptr) {
675  return strncmp(Ptr, MAI.getSeparatorString(),
676  strlen(MAI.getSeparatorString())) == 0;
677 }
678 
680  TokStart = CurPtr;
681  // This always consumes at least one character.
682  int CurChar = getNextChar();
683 
684  if (!IsPeeking && CurChar == '#' && IsAtStartOfStatement) {
685  // If this starts with a '#', this may be a cpp
686  // hash directive and otherwise a line comment.
687  AsmToken TokenBuf[2];
688  MutableArrayRef<AsmToken> Buf(TokenBuf, 2);
689  size_t num = peekTokens(Buf, true);
690  // There cannot be a space preceding this
691  if (IsAtStartOfLine && num == 2 && TokenBuf[0].is(AsmToken::Integer) &&
692  TokenBuf[1].is(AsmToken::String)) {
693  CurPtr = TokStart; // reset curPtr;
694  StringRef s = LexUntilEndOfLine();
695  UnLex(TokenBuf[1]);
696  UnLex(TokenBuf[0]);
698  }
699  return LexLineComment();
700  }
701 
702  if (isAtStartOfComment(TokStart))
703  return LexLineComment();
704 
705  if (isAtStatementSeparator(TokStart)) {
706  CurPtr += strlen(MAI.getSeparatorString()) - 1;
707  IsAtStartOfLine = true;
708  IsAtStartOfStatement = true;
710  StringRef(TokStart, strlen(MAI.getSeparatorString())));
711  }
712 
713  // If we're missing a newline at EOF, make sure we still get an
714  // EndOfStatement token before the Eof token.
715  if (CurChar == EOF && !IsAtStartOfStatement && EndStatementAtEOF) {
716  IsAtStartOfLine = true;
717  IsAtStartOfStatement = true;
719  }
720  IsAtStartOfLine = false;
721  bool OldIsAtStartOfStatement = IsAtStartOfStatement;
722  IsAtStartOfStatement = false;
723  switch (CurChar) {
724  default:
725  if (MAI.doesAllowSymbolAtNameStart()) {
726  // Handle Microsoft-style identifier: [a-zA-Z_$.@?][a-zA-Z0-9_$.@?]*
727  if (!isDigit(CurChar) &&
728  IsIdentifierChar(CurChar, MAI.doesAllowAtInName()))
729  return LexIdentifier();
730  } else {
731  // Handle identifier: [a-zA-Z_.][a-zA-Z0-9_$.@]*
732  if (isalpha(CurChar) || CurChar == '_' || CurChar == '.')
733  return LexIdentifier();
734  }
735 
736  // Unknown character, emit an error.
737  return ReturnError(TokStart, "invalid character in input");
738  case EOF:
739  if (EndStatementAtEOF) {
740  IsAtStartOfLine = true;
741  IsAtStartOfStatement = true;
742  }
744  case 0:
745  case ' ':
746  case '\t':
747  IsAtStartOfStatement = OldIsAtStartOfStatement;
748  while (*CurPtr == ' ' || *CurPtr == '\t')
749  CurPtr++;
750  if (SkipSpace)
751  return LexToken(); // Ignore whitespace.
752  else
753  return AsmToken(AsmToken::Space, StringRef(TokStart, CurPtr - TokStart));
754  case '\r': {
755  IsAtStartOfLine = true;
756  IsAtStartOfStatement = true;
757  // If this is a CR followed by LF, treat that as one token.
758  if (CurPtr != CurBuf.end() && *CurPtr == '\n')
759  ++CurPtr;
761  StringRef(TokStart, CurPtr - TokStart));
762  }
763  case '\n':
764  IsAtStartOfLine = true;
765  IsAtStartOfStatement = true;
767  case ':': return AsmToken(AsmToken::Colon, StringRef(TokStart, 1));
768  case '+': return AsmToken(AsmToken::Plus, StringRef(TokStart, 1));
769  case '~': return AsmToken(AsmToken::Tilde, StringRef(TokStart, 1));
770  case '(': return AsmToken(AsmToken::LParen, StringRef(TokStart, 1));
771  case ')': return AsmToken(AsmToken::RParen, StringRef(TokStart, 1));
772  case '[': return AsmToken(AsmToken::LBrac, StringRef(TokStart, 1));
773  case ']': return AsmToken(AsmToken::RBrac, StringRef(TokStart, 1));
774  case '{': return AsmToken(AsmToken::LCurly, StringRef(TokStart, 1));
775  case '}': return AsmToken(AsmToken::RCurly, StringRef(TokStart, 1));
776  case '*': return AsmToken(AsmToken::Star, StringRef(TokStart, 1));
777  case ',': return AsmToken(AsmToken::Comma, StringRef(TokStart, 1));
778  case '$': return AsmToken(AsmToken::Dollar, StringRef(TokStart, 1));
779  case '@': return AsmToken(AsmToken::At, StringRef(TokStart, 1));
780  case '\\': return AsmToken(AsmToken::BackSlash, StringRef(TokStart, 1));
781  case '=':
782  if (*CurPtr == '=') {
783  ++CurPtr;
785  }
787  case '-':
788  if (*CurPtr == '>') {
789  ++CurPtr;
791  }
793  case '|':
794  if (*CurPtr == '|') {
795  ++CurPtr;
797  }
799  case '^': return AsmToken(AsmToken::Caret, StringRef(TokStart, 1));
800  case '&':
801  if (*CurPtr == '&') {
802  ++CurPtr;
804  }
806  case '!':
807  if (*CurPtr == '=') {
808  ++CurPtr;
810  }
812  case '%':
813  if (MAI.hasMipsExpressions()) {
815  unsigned OperatorLength;
816 
817  std::tie(Operator, OperatorLength) =
819  StringRef(CurPtr))
820  .StartsWith("call16", {AsmToken::PercentCall16, 7})
821  .StartsWith("call_hi", {AsmToken::PercentCall_Hi, 8})
822  .StartsWith("call_lo", {AsmToken::PercentCall_Lo, 8})
823  .StartsWith("dtprel_hi", {AsmToken::PercentDtprel_Hi, 10})
824  .StartsWith("dtprel_lo", {AsmToken::PercentDtprel_Lo, 10})
825  .StartsWith("got_disp", {AsmToken::PercentGot_Disp, 9})
826  .StartsWith("got_hi", {AsmToken::PercentGot_Hi, 7})
827  .StartsWith("got_lo", {AsmToken::PercentGot_Lo, 7})
828  .StartsWith("got_ofst", {AsmToken::PercentGot_Ofst, 9})
829  .StartsWith("got_page", {AsmToken::PercentGot_Page, 9})
830  .StartsWith("gottprel", {AsmToken::PercentGottprel, 9})
831  .StartsWith("got", {AsmToken::PercentGot, 4})
832  .StartsWith("gp_rel", {AsmToken::PercentGp_Rel, 7})
833  .StartsWith("higher", {AsmToken::PercentHigher, 7})
834  .StartsWith("highest", {AsmToken::PercentHighest, 8})
835  .StartsWith("hi", {AsmToken::PercentHi, 3})
836  .StartsWith("lo", {AsmToken::PercentLo, 3})
837  .StartsWith("neg", {AsmToken::PercentNeg, 4})
838  .StartsWith("pcrel_hi", {AsmToken::PercentPcrel_Hi, 9})
839  .StartsWith("pcrel_lo", {AsmToken::PercentPcrel_Lo, 9})
840  .StartsWith("tlsgd", {AsmToken::PercentTlsgd, 6})
841  .StartsWith("tlsldm", {AsmToken::PercentTlsldm, 7})
842  .StartsWith("tprel_hi", {AsmToken::PercentTprel_Hi, 9})
843  .StartsWith("tprel_lo", {AsmToken::PercentTprel_Lo, 9})
844  .Default({AsmToken::Percent, 1});
845 
846  if (Operator != AsmToken::Percent) {
847  CurPtr += OperatorLength - 1;
848  return AsmToken(Operator, StringRef(TokStart, OperatorLength));
849  }
850  }
852  case '/':
853  IsAtStartOfStatement = OldIsAtStartOfStatement;
854  return LexSlash();
855  case '#': return AsmToken(AsmToken::Hash, StringRef(TokStart, 1));
856  case '\'': return LexSingleQuote();
857  case '"': return LexQuote();
858  case '0': case '1': case '2': case '3': case '4':
859  case '5': case '6': case '7': case '8': case '9':
860  return LexDigit();
861  case '<':
862  switch (*CurPtr) {
863  case '<':
864  ++CurPtr;
866  case '=':
867  ++CurPtr;
869  case '>':
870  ++CurPtr;
872  default:
874  }
875  case '>':
876  switch (*CurPtr) {
877  case '>':
878  ++CurPtr;
880  case '=':
881  ++CurPtr;
883  default:
885  }
886 
887  // TODO: Quoted identifiers (objc methods etc)
888  // local labels: [0-9][:]
889  // Forward/backward labels: [0-9][fb]
890  // Integers, fp constants, character constants.
891  }
892 }
llvm::AsmToken::PercentPcrel_Lo
@ PercentPcrel_Lo
Definition: MCAsmMacro.h:61
llvm::StringRef::startswith
LLVM_NODISCARD bool startswith(StringRef Prefix) const
Check if this string starts with the given Prefix.
Definition: StringRef.h:289
llvm
This class represents lattice values for constants.
Definition: AllocatorList.h:23
llvm::MCAsmInfo::getCommentString
StringRef getCommentString() const
Definition: MCAsmInfo.h:559
llvm::Operator
This is a utility class that provides an abstraction for the common functionality between Instruction...
Definition: Operator.h:30
llvm::AsmToken::is
bool is(TokenKind K) const
Definition: MCAsmMacro.h:82
llvm::MCAsmInfo::getSeparatorString
const char * getSeparatorString() const
Definition: MCAsmInfo.h:553
llvm::AsmCommentConsumer::HandleComment
virtual void HandleComment(SMLoc Loc, StringRef CommentText)=0
Callback function for when a comment is lexed.
llvm::AsmToken::PercentGottprel
@ PercentGottprel
Definition: MCAsmMacro.h:59
llvm::MCAsmInfo::hasMipsExpressions
bool hasMipsExpressions() const
Definition: MCAsmInfo.h:734
IsIdentifierChar
static bool IsIdentifierChar(char c, bool AllowAt)
LexIdentifier: [a-zA-Z_.][a-zA-Z0-9_$.@?]*.
Definition: AsmLexer.cpp:147
llvm::MCAsmLexer::DefaultRadix
unsigned DefaultRadix
Definition: MCAsmLexer.h:56
llvm::AsmToken::PercentNeg
@ PercentNeg
Definition: MCAsmMacro.h:60
llvm::AsmToken::LBrac
@ LBrac
Definition: MCAsmMacro.h:48
StringRef.h
llvm::AsmToken::Dot
@ Dot
Definition: MCAsmMacro.h:49
llvm::AsmToken::EndOfStatement
@ EndOfStatement
Definition: MCAsmMacro.h:42
llvm::MCAsmInfo
This class is intended to be used as a base class for asm properties and features specific to the tar...
Definition: MCAsmInfo.h:56
llvm::MCAsmLexer::SkipSpace
bool SkipSpace
Definition: MCAsmLexer.h:49
radixName
static std::string radixName(unsigned Radix)
Definition: AsmLexer.cpp:294
llvm::AsmToken::Eof
@ Eof
Definition: MCAsmMacro.h:25
llvm::AsmToken::PercentPcrel_Hi
@ PercentPcrel_Hi
Definition: MCAsmMacro.h:60
llvm::AsmToken::PercentHi
@ PercentHi
Definition: MCAsmMacro.h:59
llvm::AsmLexer::LexUntilEndOfStatement
StringRef LexUntilEndOfStatement() override
Definition: AsmLexer.cpp:616
APInt.h
AsmLexer.h
llvm::AsmToken::Integer
@ Integer
Definition: MCAsmMacro.h:32
llvm::MCAsmLexer::LexMasmStrings
bool LexMasmStrings
Definition: MCAsmLexer.h:54
llvm::MCAsmLexer::is
bool is(AsmToken::TokenKind K) const
Check if the current token has kind K.
Definition: MCAsmLexer.h:139
llvm::AsmLexer::setBuffer
void setBuffer(StringRef Buf, const char *ptr=nullptr, bool EndStatementAtEOF=true)
Definition: AsmLexer.cpp:40
llvm::AsmToken
Target independent representation for an assembler token.
Definition: MCAsmMacro.h:21
llvm::AsmToken::PercentTlsldm
@ PercentTlsldm
Definition: MCAsmMacro.h:61
llvm::AsmToken::ExclaimEqual
@ ExclaimEqual
Definition: MCAsmMacro.h:52
llvm::AsmToken::Minus
@ Minus
Definition: MCAsmMacro.h:45
llvm::AsmToken::LParen
@ LParen
Definition: MCAsmMacro.h:48
llvm::AsmToken::Dollar
@ Dollar
Definition: MCAsmMacro.h:49
llvm::AsmToken::PercentGot_Page
@ PercentGot_Page
Definition: MCAsmMacro.h:59
llvm::AsmToken::PercentCall16
@ PercentCall16
Definition: MCAsmMacro.h:57
llvm::MutableArrayRef
MutableArrayRef - Represent a mutable reference to an array (0 or more elements consecutively in memo...
Definition: ArrayRef.h:305
llvm::SMLoc
Represents a location in source code.
Definition: SMLoc.h:23
llvm::ModRefInfo::Ref
@ Ref
The access may reference the value stored in memory.
llvm::AsmLexer::LexToken
AsmToken LexToken() override
LexToken - Read the next token and return its code.
Definition: AsmLexer.cpp:679
llvm::AsmToken::GreaterGreater
@ GreaterGreater
Definition: MCAsmMacro.h:54
llvm::AsmToken::BackSlash
@ BackSlash
Definition: MCAsmMacro.h:47
llvm::ms_demangle::QualifierMangleMode::Result
@ Result
llvm::AsmLexer::~AsmLexer
~AsmLexer() override
llvm::AsmToken::PercentTprel_Hi
@ PercentTprel_Hi
Definition: MCAsmMacro.h:61
llvm::AsmToken::Star
@ Star
Definition: MCAsmMacro.h:49
SMLoc.h
llvm::AsmToken::MinusGreater
@ MinusGreater
Definition: MCAsmMacro.h:54
llvm::AsmToken::Percent
@ Percent
Definition: MCAsmMacro.h:52
c
the resulting code requires compare and branches when and if the revised code is with conditional branches instead of More there is a byte word extend before each where there should be only and the condition codes are not remembered when the same two values are compared twice More LSR enhancements i8 and i32 load store addressing modes are identical int int c
Definition: README.txt:418
llvm::AsmToken::PercentDtprel_Lo
@ PercentDtprel_Lo
Definition: MCAsmMacro.h:58
llvm::AsmToken::HashDirective
@ HashDirective
Definition: MCAsmMacro.h:40
llvm::MCAsmLexer::getErr
const std::string & getErr()
Get the current error string.
Definition: MCAsmLexer.h:131
llvm::AsmToken::Error
@ Error
Definition: MCAsmMacro.h:25
llvm::MCAsmLexer::getErrLoc
SMLoc getErrLoc()
Get the current error location.
Definition: MCAsmLexer.h:126
findLastDigit
static const char * findLastDigit(const char *CurPtr, unsigned DefaultRadix)
Definition: AsmLexer.cpp:281
llvm::AsmToken::PercentDtprel_Hi
@ PercentDtprel_Hi
Definition: MCAsmMacro.h:57
llvm::MCAsmInfo::doesAllowSymbolAtNameStart
bool doesAllowSymbolAtNameStart() const
Definition: MCAsmInfo.h:584
llvm::AsmToken::Equal
@ Equal
Definition: MCAsmMacro.h:49
MCAsmLexer.h
llvm::AsmToken::Greater
@ Greater
Definition: MCAsmMacro.h:54
llvm::AsmToken::PercentLo
@ PercentLo
Definition: MCAsmMacro.h:60
SaveAndRestore.h
llvm::StringRef::end
iterator end() const
Definition: StringRef.h:133
llvm::AsmToken::At
@ At
Definition: MCAsmMacro.h:54
llvm::MCAsmLexer::LexMasmIntegers
bool LexMasmIntegers
Definition: MCAsmLexer.h:53
s
multiplies can be turned into SHL s
Definition: README.txt:370
llvm::AsmToken::AmpAmp
@ AmpAmp
Definition: MCAsmMacro.h:52
llvm::AsmToken::Colon
@ Colon
Definition: MCAsmMacro.h:43
llvm::MCAsmLexer::LexMasmHexFloats
bool LexMasmHexFloats
Definition: MCAsmLexer.h:52
llvm::AsmToken::PercentGot_Hi
@ PercentGot_Hi
Definition: MCAsmMacro.h:58
StringExtras.h
llvm::AsmToken::BigNum
@ BigNum
Definition: MCAsmMacro.h:33
ArrayRef.h
llvm::AsmToken::Slash
@ Slash
Definition: MCAsmMacro.h:46
llvm::AsmToken::Exclaim
@ Exclaim
Definition: MCAsmMacro.h:52
llvm::AsmToken::Amp
@ Amp
Definition: MCAsmMacro.h:52
assert
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
llvm::AsmToken::PercentTlsgd
@ PercentTlsgd
Definition: MCAsmMacro.h:61
llvm::AsmToken::PercentHighest
@ PercentHighest
Definition: MCAsmMacro.h:60
llvm::AsmToken::EqualEqual
@ EqualEqual
Definition: MCAsmMacro.h:49
llvm::AsmToken::PercentGp_Rel
@ PercentGp_Rel
Definition: MCAsmMacro.h:59
llvm::AsmLexer::AsmLexer
AsmLexer(const MCAsmInfo &MAI)
Definition: AsmLexer.cpp:34
intToken
static AsmToken intToken(StringRef Ref, APInt &Value)
Definition: AsmLexer.cpp:288
llvm::AsmToken::Less
@ Less
Definition: MCAsmMacro.h:53
llvm::APInt
Class for arbitrary precision integers.
Definition: APInt.h:70
llvm::AsmToken::Comma
@ Comma
Definition: MCAsmMacro.h:49
llvm::MCAsmInfo::doesAllowAtInName
bool doesAllowAtInName() const
Definition: MCAsmInfo.h:583
llvm::AsmToken::PercentCall_Lo
@ PercentCall_Lo
Definition: MCAsmMacro.h:57
llvm::AsmToken::GreaterEqual
@ GreaterEqual
Definition: MCAsmMacro.h:54
llvm::AsmToken::PercentTprel_Lo
@ PercentTprel_Lo
Definition: MCAsmMacro.h:62
MCAsmInfo.h
llvm::StringRef
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:57
llvm::AsmToken::Plus
@ Plus
Definition: MCAsmMacro.h:45
llvm::MCAsmLexer::AllowAtInIdentifier
bool AllowAtInIdentifier
Definition: MCAsmLexer.h:50
llvm::AsmToken::Space
@ Space
Definition: MCAsmMacro.h:44
Compiler.h
llvm::SaveAndRestore
A utility class that uses RAII to save and restore the value of a variable.
Definition: SaveAndRestore.h:21
llvm::AsmToken::RParen
@ RParen
Definition: MCAsmMacro.h:48
llvm::AsmLexer::peekTokens
size_t peekTokens(MutableArrayRef< AsmToken > Buf, bool ShouldSkipSpace=true) override
Look ahead an arbitrary number of tokens.
Definition: AsmLexer.cpp:636
LLVM_FALLTHROUGH
#define LLVM_FALLTHROUGH
LLVM_FALLTHROUGH - Mark fallthrough cases in switch statements.
Definition: Compiler.h:280
SkipIgnoredIntegerSuffix
static void SkipIgnoredIntegerSuffix(const char *&CurPtr)
Definition: AsmLexer.cpp:244
llvm::AsmToken::LessLess
@ LessLess
Definition: MCAsmMacro.h:53
llvm::AsmToken::LessGreater
@ LessGreater
Definition: MCAsmMacro.h:53
llvm::AsmToken::Pipe
@ Pipe
Definition: MCAsmMacro.h:51
llvm::MCAsmLexer::TokStart
const char * TokStart
Definition: MCAsmLexer.h:48
llvm::AsmToken::PercentGot_Ofst
@ PercentGot_Ofst
Definition: MCAsmMacro.h:59
llvm::AsmToken::Tilde
@ Tilde
Definition: MCAsmMacro.h:45
llvm::AsmToken::Real
@ Real
Definition: MCAsmMacro.h:36
llvm::MCAsmLexer::CommentConsumer
AsmCommentConsumer * CommentConsumer
Definition: MCAsmLexer.h:57
llvm::AsmToken::Identifier
@ Identifier
Definition: MCAsmMacro.h:28
llvm::AsmToken::RBrac
@ RBrac
Definition: MCAsmMacro.h:48
llvm::TargetStackID::Value
Value
Definition: TargetFrameLowering.h:27
llvm::AsmToken::PercentCall_Hi
@ PercentCall_Hi
Definition: MCAsmMacro.h:57
StringSwitch.h
llvm::AsmToken::PipePipe
@ PipePipe
Definition: MCAsmMacro.h:51
llvm::AsmToken::TokenKind
TokenKind
Definition: MCAsmMacro.h:23
llvm::AsmToken::Caret
@ Caret
Definition: MCAsmMacro.h:51
llvm::AsmToken::LessEqual
@ LessEqual
Definition: MCAsmMacro.h:53
llvm::to_string
std::string to_string(const T &Value)
Definition: ScopedPrinter.h:62
llvm::ArrayRef::size
size_t size() const
size - Get the array size.
Definition: ArrayRef.h:163
llvm::AsmToken::LCurly
@ LCurly
Definition: MCAsmMacro.h:48
llvm::StringRef::data
const LLVM_NODISCARD char * data() const
data - Get a pointer to the start of the string (which may not be null terminated).
Definition: StringRef.h:152
llvm::SMLoc::getFromPointer
static SMLoc getFromPointer(const char *Ptr)
Definition: SMLoc.h:36
llvm::StringSwitch
A switch()-like statement whose cases are string literals.
Definition: StringSwitch.h:42
llvm::AsmToken::PercentGot
@ PercentGot
Definition: MCAsmMacro.h:58
llvm::AsmToken::String
@ String
Definition: MCAsmMacro.h:29
llvm::AsmToken::RCurly
@ RCurly
Definition: MCAsmMacro.h:48
llvm::AsmToken::Hash
@ Hash
Definition: MCAsmMacro.h:52
llvm::MCAsmLexer::UnLex
void UnLex(AsmToken const &Token)
Definition: MCAsmLexer.h:91
llvm::StringRef::size
LLVM_NODISCARD size_t size() const
size - Get the string size.
Definition: StringRef.h:160
llvm::AsmToken::PercentGot_Disp
@ PercentGot_Disp
Definition: MCAsmMacro.h:58
llvm::StringRef::begin
iterator begin() const
Definition: StringRef.h:131
llvm::AsmToken::PercentHigher
@ PercentHigher
Definition: MCAsmMacro.h:60
llvm::AsmToken::PercentGot_Lo
@ PercentGot_Lo
Definition: MCAsmMacro.h:58
llvm::Value
LLVM Value Representation.
Definition: Value.h:75
llvm::MCAsmLexer::SetError
void SetError(SMLoc errLoc, const std::string &err)
Definition: MCAsmLexer.h:63
llvm::AsmToken::Comment
@ Comment
Definition: MCAsmMacro.h:39
llvm::MCAsmLexer::UseMasmDefaultRadix
bool UseMasmDefaultRadix
Definition: MCAsmLexer.h:55
doHexLookAhead
static unsigned doHexLookAhead(const char *&CurPtr, unsigned DefaultRadix, bool LexHex)
Definition: AsmLexer.cpp:256