LLVM  4.0.0
AsmLexer.cpp
Go to the documentation of this file.
1 //===- AsmLexer.cpp - Lexer for Assembly Files ----------------------------===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This class implements the lexer for assembly files.
11 //
12 //===----------------------------------------------------------------------===//
13 
15 #include "llvm/ADT/APInt.h"
16 #include "llvm/ADT/ArrayRef.h"
17 #include "llvm/ADT/StringRef.h"
18 #include "llvm/ADT/StringSwitch.h"
19 #include "llvm/MC/MCAsmInfo.h"
21 #include "llvm/Support/SMLoc.h"
23 #include <cassert>
24 #include <cctype>
25 #include <cstdio>
26 #include <cstring>
27 #include <string>
28 #include <tuple>
29 #include <utility>
30 
31 using namespace llvm;
32 
33 AsmLexer::AsmLexer(const MCAsmInfo &MAI)
34  : MAI(MAI), CurPtr(nullptr), IsAtStartOfLine(true),
35  IsAtStartOfStatement(true), IsParsingMSInlineAsm(false),
36  IsPeeking(false) {
38 }
39 
41 }
42 
43 void AsmLexer::setBuffer(StringRef Buf, const char *ptr) {
44  CurBuf = Buf;
45 
46  if (ptr)
47  CurPtr = ptr;
48  else
49  CurPtr = CurBuf.begin();
50 
51  TokStart = nullptr;
52 }
53 
54 /// ReturnError - Set the error to the specified string at the specified
55 /// location. This is defined to always return AsmToken::Error.
56 AsmToken AsmLexer::ReturnError(const char *Loc, const std::string &Msg) {
58 
59  return AsmToken(AsmToken::Error, StringRef(Loc, CurPtr - Loc));
60 }
61 
62 int AsmLexer::getNextChar() {
63  if (CurPtr == CurBuf.end())
64  return EOF;
65  return (unsigned char)*CurPtr++;
66 }
67 
68 /// LexFloatLiteral: [0-9]*[.][0-9]*([eE][+-]?[0-9]*)?
69 ///
70 /// The leading integral digit sequence and dot should have already been
71 /// consumed, some or all of the fractional digit sequence *can* have been
72 /// consumed.
73 AsmToken AsmLexer::LexFloatLiteral() {
74  // Skip the fractional digit sequence.
75  while (isdigit(*CurPtr))
76  ++CurPtr;
77 
78  // Check for exponent; we intentionally accept a slighlty wider set of
79  // literals here and rely on the upstream client to reject invalid ones (e.g.,
80  // "1e+").
81  if (*CurPtr == 'e' || *CurPtr == 'E') {
82  ++CurPtr;
83  if (*CurPtr == '-' || *CurPtr == '+')
84  ++CurPtr;
85  while (isdigit(*CurPtr))
86  ++CurPtr;
87  }
88 
89  return AsmToken(AsmToken::Real,
90  StringRef(TokStart, CurPtr - TokStart));
91 }
92 
93 /// LexHexFloatLiteral matches essentially (.[0-9a-fA-F]*)?[pP][+-]?[0-9a-fA-F]+
94 /// while making sure there are enough actual digits around for the constant to
95 /// be valid.
96 ///
97 /// The leading "0x[0-9a-fA-F]*" (i.e. integer part) has already been consumed
98 /// before we get here.
99 AsmToken AsmLexer::LexHexFloatLiteral(bool NoIntDigits) {
100  assert((*CurPtr == 'p' || *CurPtr == 'P' || *CurPtr == '.') &&
101  "unexpected parse state in floating hex");
102  bool NoFracDigits = true;
103 
104  // Skip the fractional part if there is one
105  if (*CurPtr == '.') {
106  ++CurPtr;
107 
108  const char *FracStart = CurPtr;
109  while (isxdigit(*CurPtr))
110  ++CurPtr;
111 
112  NoFracDigits = CurPtr == FracStart;
113  }
114 
115  if (NoIntDigits && NoFracDigits)
116  return ReturnError(TokStart, "invalid hexadecimal floating-point constant: "
117  "expected at least one significand digit");
118 
119  // Make sure we do have some kind of proper exponent part
120  if (*CurPtr != 'p' && *CurPtr != 'P')
121  return ReturnError(TokStart, "invalid hexadecimal floating-point constant: "
122  "expected exponent part 'p'");
123  ++CurPtr;
124 
125  if (*CurPtr == '+' || *CurPtr == '-')
126  ++CurPtr;
127 
128  // N.b. exponent digits are *not* hex
129  const char *ExpStart = CurPtr;
130  while (isdigit(*CurPtr))
131  ++CurPtr;
132 
133  if (CurPtr == ExpStart)
134  return ReturnError(TokStart, "invalid hexadecimal floating-point constant: "
135  "expected at least one exponent digit");
136 
137  return AsmToken(AsmToken::Real, StringRef(TokStart, CurPtr - TokStart));
138 }
139 
140 /// LexIdentifier: [a-zA-Z_.][a-zA-Z0-9_$.@?]*
141 static bool IsIdentifierChar(char c, bool AllowAt) {
142  return isalnum(c) || c == '_' || c == '$' || c == '.' ||
143  (c == '@' && AllowAt) || c == '?';
144 }
145 
146 AsmToken AsmLexer::LexIdentifier() {
147  // Check for floating point literals.
148  if (CurPtr[-1] == '.' && isdigit(*CurPtr)) {
149  // Disambiguate a .1243foo identifier from a floating literal.
150  while (isdigit(*CurPtr))
151  ++CurPtr;
152  if (*CurPtr == 'e' || *CurPtr == 'E' ||
154  return LexFloatLiteral();
155  }
156 
157  while (IsIdentifierChar(*CurPtr, AllowAtInIdentifier))
158  ++CurPtr;
159 
160  // Handle . as a special case.
161  if (CurPtr == TokStart+1 && TokStart[0] == '.')
163 
165 }
166 
167 /// LexSlash: Slash: /
168 /// C-Style Comment: /* ... */
169 AsmToken AsmLexer::LexSlash() {
170  switch (*CurPtr) {
171  case '*':
172  IsAtStartOfStatement = false;
173  break; // C style comment.
174  case '/':
175  ++CurPtr;
176  return LexLineComment();
177  default:
178  IsAtStartOfStatement = false;
180  }
181 
182  // C Style comment.
183  ++CurPtr; // skip the star.
184  const char *CommentTextStart = CurPtr;
185  while (CurPtr != CurBuf.end()) {
186  switch (*CurPtr++) {
187  case '*':
188  // End of the comment?
189  if (*CurPtr != '/')
190  break;
191  // If we have a CommentConsumer, notify it about the comment.
192  if (CommentConsumer) {
194  SMLoc::getFromPointer(CommentTextStart),
195  StringRef(CommentTextStart, CurPtr - 1 - CommentTextStart));
196  }
197  ++CurPtr; // End the */.
199  StringRef(TokStart, CurPtr - TokStart));
200  }
201  }
202  return ReturnError(TokStart, "unterminated comment");
203 }
204 
205 /// LexLineComment: Comment: #[^\n]*
206 /// : //[^\n]*
207 AsmToken AsmLexer::LexLineComment() {
208  // Mark This as an end of statement with a body of the
209  // comment. While it would be nicer to leave this two tokens,
210  // backwards compatability with TargetParsers makes keeping this in this form
211  // better.
212  const char *CommentTextStart = CurPtr;
213  int CurChar = getNextChar();
214  while (CurChar != '\n' && CurChar != '\r' && CurChar != EOF)
215  CurChar = getNextChar();
216 
217  // If we have a CommentConsumer, notify it about the comment.
218  if (CommentConsumer) {
220  SMLoc::getFromPointer(CommentTextStart),
221  StringRef(CommentTextStart, CurPtr - 1 - CommentTextStart));
222  }
223 
224  IsAtStartOfLine = true;
225  // This is a whole line comment. leave newline
226  if (IsAtStartOfStatement)
228  StringRef(TokStart, CurPtr - TokStart));
229  IsAtStartOfStatement = true;
230 
232  StringRef(TokStart, CurPtr - 1 - TokStart));
233 }
234 
235 static void SkipIgnoredIntegerSuffix(const char *&CurPtr) {
236  // Skip ULL, UL, U, L and LL suffices.
237  if (CurPtr[0] == 'U')
238  ++CurPtr;
239  if (CurPtr[0] == 'L')
240  ++CurPtr;
241  if (CurPtr[0] == 'L')
242  ++CurPtr;
243 }
244 
245 // Look ahead to search for first non-hex digit, if it's [hH], then we treat the
246 // integer as a hexadecimal, possibly with leading zeroes.
247 static unsigned doLookAhead(const char *&CurPtr, unsigned DefaultRadix) {
248  const char *FirstHex = nullptr;
249  const char *LookAhead = CurPtr;
250  while (true) {
251  if (isdigit(*LookAhead)) {
252  ++LookAhead;
253  } else if (isxdigit(*LookAhead)) {
254  if (!FirstHex)
255  FirstHex = LookAhead;
256  ++LookAhead;
257  } else {
258  break;
259  }
260  }
261  bool isHex = *LookAhead == 'h' || *LookAhead == 'H';
262  CurPtr = isHex || !FirstHex ? LookAhead : FirstHex;
263  if (isHex)
264  return 16;
265  return DefaultRadix;
266 }
267 
269 {
270  if (Value.isIntN(64))
271  return AsmToken(AsmToken::Integer, Ref, Value);
272  return AsmToken(AsmToken::BigNum, Ref, Value);
273 }
274 
275 /// LexDigit: First character is [0-9].
276 /// Local Label: [0-9][:]
277 /// Forward/Backward Label: [0-9][fb]
278 /// Binary integer: 0b[01]+
279 /// Octal integer: 0[0-7]+
280 /// Hex integer: 0x[0-9a-fA-F]+ or [0x]?[0-9][0-9a-fA-F]*[hH]
281 /// Decimal integer: [1-9][0-9]*
282 AsmToken AsmLexer::LexDigit() {
283  // MASM-flavor binary integer: [01]+[bB]
284  // MASM-flavor hexadecimal integer: [0-9][0-9a-fA-F]*[hH]
285  if (IsParsingMSInlineAsm && isdigit(CurPtr[-1])) {
286  const char *FirstNonBinary = (CurPtr[-1] != '0' && CurPtr[-1] != '1') ?
287  CurPtr - 1 : nullptr;
288  const char *OldCurPtr = CurPtr;
289  while (isxdigit(*CurPtr)) {
290  if (*CurPtr != '0' && *CurPtr != '1' && !FirstNonBinary)
291  FirstNonBinary = CurPtr;
292  ++CurPtr;
293  }
294 
295  unsigned Radix = 0;
296  if (*CurPtr == 'h' || *CurPtr == 'H') {
297  // hexadecimal number
298  ++CurPtr;
299  Radix = 16;
300  } else if (FirstNonBinary && FirstNonBinary + 1 == CurPtr &&
301  (*FirstNonBinary == 'b' || *FirstNonBinary == 'B'))
302  Radix = 2;
303 
304  if (Radix == 2 || Radix == 16) {
305  StringRef Result(TokStart, CurPtr - TokStart);
306  APInt Value(128, 0, true);
307 
308  if (Result.drop_back().getAsInteger(Radix, Value))
309  return ReturnError(TokStart, Radix == 2 ? "invalid binary number" :
310  "invalid hexdecimal number");
311 
312  // MSVC accepts and ignores type suffices on integer literals.
313  SkipIgnoredIntegerSuffix(CurPtr);
314 
315  return intToken(Result, Value);
316  }
317 
318  // octal/decimal integers, or floating point numbers, fall through
319  CurPtr = OldCurPtr;
320  }
321 
322  // Decimal integer: [1-9][0-9]*
323  if (CurPtr[-1] != '0' || CurPtr[0] == '.') {
324  unsigned Radix = doLookAhead(CurPtr, 10);
325  bool isHex = Radix == 16;
326  // Check for floating point literals.
327  if (!isHex && (*CurPtr == '.' || *CurPtr == 'e')) {
328  ++CurPtr;
329  return LexFloatLiteral();
330  }
331 
332  StringRef Result(TokStart, CurPtr - TokStart);
333 
334  APInt Value(128, 0, true);
335  if (Result.getAsInteger(Radix, Value))
336  return ReturnError(TokStart, !isHex ? "invalid decimal number" :
337  "invalid hexdecimal number");
338 
339  // Consume the [bB][hH].
340  if (Radix == 2 || Radix == 16)
341  ++CurPtr;
342 
343  // The darwin/x86 (and x86-64) assembler accepts and ignores type
344  // suffices on integer literals.
345  SkipIgnoredIntegerSuffix(CurPtr);
346 
347  return intToken(Result, Value);
348  }
349 
350  if (!IsParsingMSInlineAsm && ((*CurPtr == 'b') || (*CurPtr == 'B'))) {
351  ++CurPtr;
352  // See if we actually have "0b" as part of something like "jmp 0b\n"
353  if (!isdigit(CurPtr[0])) {
354  --CurPtr;
355  StringRef Result(TokStart, CurPtr - TokStart);
356  return AsmToken(AsmToken::Integer, Result, 0);
357  }
358  const char *NumStart = CurPtr;
359  while (CurPtr[0] == '0' || CurPtr[0] == '1')
360  ++CurPtr;
361 
362  // Requires at least one binary digit.
363  if (CurPtr == NumStart)
364  return ReturnError(TokStart, "invalid binary number");
365 
366  StringRef Result(TokStart, CurPtr - TokStart);
367 
368  APInt Value(128, 0, true);
369  if (Result.substr(2).getAsInteger(2, Value))
370  return ReturnError(TokStart, "invalid binary number");
371 
372  // The darwin/x86 (and x86-64) assembler accepts and ignores ULL and LL
373  // suffixes on integer literals.
374  SkipIgnoredIntegerSuffix(CurPtr);
375 
376  return intToken(Result, Value);
377  }
378 
379  if ((*CurPtr == 'x') || (*CurPtr == 'X')) {
380  ++CurPtr;
381  const char *NumStart = CurPtr;
382  while (isxdigit(CurPtr[0]))
383  ++CurPtr;
384 
385  // "0x.0p0" is valid, and "0x0p0" (but not "0xp0" for example, which will be
386  // diagnosed by LexHexFloatLiteral).
387  if (CurPtr[0] == '.' || CurPtr[0] == 'p' || CurPtr[0] == 'P')
388  return LexHexFloatLiteral(NumStart == CurPtr);
389 
390  // Otherwise requires at least one hex digit.
391  if (CurPtr == NumStart)
392  return ReturnError(CurPtr-2, "invalid hexadecimal number");
393 
394  APInt Result(128, 0);
395  if (StringRef(TokStart, CurPtr - TokStart).getAsInteger(0, Result))
396  return ReturnError(TokStart, "invalid hexadecimal number");
397 
398  // Consume the optional [hH].
399  if (!IsParsingMSInlineAsm && (*CurPtr == 'h' || *CurPtr == 'H'))
400  ++CurPtr;
401 
402  // The darwin/x86 (and x86-64) assembler accepts and ignores ULL and LL
403  // suffixes on integer literals.
404  SkipIgnoredIntegerSuffix(CurPtr);
405 
406  return intToken(StringRef(TokStart, CurPtr - TokStart), Result);
407  }
408 
409  // Either octal or hexadecimal.
410  APInt Value(128, 0, true);
411  unsigned Radix = doLookAhead(CurPtr, 8);
412  bool isHex = Radix == 16;
413  StringRef Result(TokStart, CurPtr - TokStart);
414  if (Result.getAsInteger(Radix, Value))
415  return ReturnError(TokStart, !isHex ? "invalid octal number" :
416  "invalid hexdecimal number");
417 
418  // Consume the [hH].
419  if (Radix == 16)
420  ++CurPtr;
421 
422  // The darwin/x86 (and x86-64) assembler accepts and ignores ULL and LL
423  // suffixes on integer literals.
424  SkipIgnoredIntegerSuffix(CurPtr);
425 
426  return intToken(Result, Value);
427 }
428 
429 /// LexSingleQuote: Integer: 'b'
430 AsmToken AsmLexer::LexSingleQuote() {
431  int CurChar = getNextChar();
432 
433  if (CurChar == '\\')
434  CurChar = getNextChar();
435 
436  if (CurChar == EOF)
437  return ReturnError(TokStart, "unterminated single quote");
438 
439  CurChar = getNextChar();
440 
441  if (CurChar != '\'')
442  return ReturnError(TokStart, "single quote way too long");
443 
444  // The idea here being that 'c' is basically just an integral
445  // constant.
446  StringRef Res = StringRef(TokStart,CurPtr - TokStart);
447  long long Value;
448 
449  if (Res.startswith("\'\\")) {
450  char theChar = Res[2];
451  switch (theChar) {
452  default: Value = theChar; break;
453  case '\'': Value = '\''; break;
454  case 't': Value = '\t'; break;
455  case 'n': Value = '\n'; break;
456  case 'b': Value = '\b'; break;
457  }
458  } else
459  Value = TokStart[1];
460 
461  return AsmToken(AsmToken::Integer, Res, Value);
462 }
463 
464 /// LexQuote: String: "..."
465 AsmToken AsmLexer::LexQuote() {
466  int CurChar = getNextChar();
467  // TODO: does gas allow multiline string constants?
468  while (CurChar != '"') {
469  if (CurChar == '\\') {
470  // Allow \", etc.
471  CurChar = getNextChar();
472  }
473 
474  if (CurChar == EOF)
475  return ReturnError(TokStart, "unterminated string constant");
476 
477  CurChar = getNextChar();
478  }
479 
481 }
482 
484  TokStart = CurPtr;
485 
486  while (!isAtStartOfComment(CurPtr) && // Start of line comment.
487  !isAtStatementSeparator(CurPtr) && // End of statement marker.
488  *CurPtr != '\n' && *CurPtr != '\r' && CurPtr != CurBuf.end()) {
489  ++CurPtr;
490  }
491  return StringRef(TokStart, CurPtr-TokStart);
492 }
493 
494 StringRef AsmLexer::LexUntilEndOfLine() {
495  TokStart = CurPtr;
496 
497  while (*CurPtr != '\n' && *CurPtr != '\r' && CurPtr != CurBuf.end()) {
498  ++CurPtr;
499  }
500  return StringRef(TokStart, CurPtr-TokStart);
501 }
502 
504  bool ShouldSkipSpace) {
505  SaveAndRestore<const char *> SavedTokenStart(TokStart);
506  SaveAndRestore<const char *> SavedCurPtr(CurPtr);
507  SaveAndRestore<bool> SavedAtStartOfLine(IsAtStartOfLine);
508  SaveAndRestore<bool> SavedAtStartOfStatement(IsAtStartOfStatement);
509  SaveAndRestore<bool> SavedSkipSpace(SkipSpace, ShouldSkipSpace);
510  SaveAndRestore<bool> SavedIsPeeking(IsPeeking, true);
511  std::string SavedErr = getErr();
512  SMLoc SavedErrLoc = getErrLoc();
513 
514  size_t ReadCount;
515  for (ReadCount = 0; ReadCount < Buf.size(); ++ReadCount) {
516  AsmToken Token = LexToken();
517 
518  Buf[ReadCount] = Token;
519 
520  if (Token.is(AsmToken::Eof))
521  break;
522  }
523 
524  SetError(SavedErrLoc, SavedErr);
525  return ReadCount;
526 }
527 
528 bool AsmLexer::isAtStartOfComment(const char *Ptr) {
529  StringRef CommentString = MAI.getCommentString();
530 
531  if (CommentString.size() == 1)
532  return CommentString[0] == Ptr[0];
533 
534  // Allow # preprocessor commments also be counted as comments for "##" cases
535  if (CommentString[1] == '#')
536  return CommentString[0] == Ptr[0];
537 
538  return strncmp(Ptr, CommentString.data(), CommentString.size()) == 0;
539 }
540 
541 bool AsmLexer::isAtStatementSeparator(const char *Ptr) {
542  return strncmp(Ptr, MAI.getSeparatorString(),
543  strlen(MAI.getSeparatorString())) == 0;
544 }
545 
547  TokStart = CurPtr;
548  // This always consumes at least one character.
549  int CurChar = getNextChar();
550 
551  if (!IsPeeking && CurChar == '#' && IsAtStartOfStatement) {
552  // If this starts with a '#', this may be a cpp
553  // hash directive and otherwise a line comment.
554  AsmToken TokenBuf[2];
555  MutableArrayRef<AsmToken> Buf(TokenBuf, 2);
556  size_t num = peekTokens(Buf, true);
557  // There cannot be a space preceeding this
558  if (IsAtStartOfLine && num == 2 && TokenBuf[0].is(AsmToken::Integer) &&
559  TokenBuf[1].is(AsmToken::String)) {
560  CurPtr = TokStart; // reset curPtr;
561  StringRef s = LexUntilEndOfLine();
562  UnLex(TokenBuf[1]);
563  UnLex(TokenBuf[0]);
565  }
566  return LexLineComment();
567  }
568 
569  if (isAtStartOfComment(TokStart))
570  return LexLineComment();
571 
572  if (isAtStatementSeparator(TokStart)) {
573  CurPtr += strlen(MAI.getSeparatorString()) - 1;
574  IsAtStartOfLine = true;
575  IsAtStartOfStatement = true;
577  StringRef(TokStart, strlen(MAI.getSeparatorString())));
578  }
579 
580  // If we're missing a newline at EOF, make sure we still get an
581  // EndOfStatement token before the Eof token.
582  if (CurChar == EOF && !IsAtStartOfStatement) {
583  IsAtStartOfLine = true;
584  IsAtStartOfStatement = true;
586  }
587  IsAtStartOfLine = false;
588  bool OldIsAtStartOfStatement = IsAtStartOfStatement;
589  IsAtStartOfStatement = false;
590  switch (CurChar) {
591  default:
592  // Handle identifier: [a-zA-Z_.][a-zA-Z0-9_$.@]*
593  if (isalpha(CurChar) || CurChar == '_' || CurChar == '.')
594  return LexIdentifier();
595 
596  // Unknown character, emit an error.
597  return ReturnError(TokStart, "invalid character in input");
598  case EOF:
599  IsAtStartOfLine = true;
600  IsAtStartOfStatement = true;
602  case 0:
603  case ' ':
604  case '\t':
605  IsAtStartOfStatement = OldIsAtStartOfStatement;
606  while (*CurPtr == ' ' || *CurPtr == '\t')
607  CurPtr++;
608  if (SkipSpace)
609  return LexToken(); // Ignore whitespace.
610  else
611  return AsmToken(AsmToken::Space, StringRef(TokStart, CurPtr - TokStart));
612  case '\n':
613  case '\r':
614  IsAtStartOfLine = true;
615  IsAtStartOfStatement = true;
617  case ':': return AsmToken(AsmToken::Colon, StringRef(TokStart, 1));
618  case '+': return AsmToken(AsmToken::Plus, StringRef(TokStart, 1));
619  case '-': return AsmToken(AsmToken::Minus, StringRef(TokStart, 1));
620  case '~': return AsmToken(AsmToken::Tilde, StringRef(TokStart, 1));
621  case '(': return AsmToken(AsmToken::LParen, StringRef(TokStart, 1));
622  case ')': return AsmToken(AsmToken::RParen, StringRef(TokStart, 1));
623  case '[': return AsmToken(AsmToken::LBrac, StringRef(TokStart, 1));
624  case ']': return AsmToken(AsmToken::RBrac, StringRef(TokStart, 1));
625  case '{': return AsmToken(AsmToken::LCurly, StringRef(TokStart, 1));
626  case '}': return AsmToken(AsmToken::RCurly, StringRef(TokStart, 1));
627  case '*': return AsmToken(AsmToken::Star, StringRef(TokStart, 1));
628  case ',': return AsmToken(AsmToken::Comma, StringRef(TokStart, 1));
629  case '$': return AsmToken(AsmToken::Dollar, StringRef(TokStart, 1));
630  case '@': return AsmToken(AsmToken::At, StringRef(TokStart, 1));
631  case '\\': return AsmToken(AsmToken::BackSlash, StringRef(TokStart, 1));
632  case '=':
633  if (*CurPtr == '=') {
634  ++CurPtr;
636  }
638  case '|':
639  if (*CurPtr == '|') {
640  ++CurPtr;
642  }
644  case '^': return AsmToken(AsmToken::Caret, StringRef(TokStart, 1));
645  case '&':
646  if (*CurPtr == '&') {
647  ++CurPtr;
649  }
651  case '!':
652  if (*CurPtr == '=') {
653  ++CurPtr;
655  }
657  case '%':
658  if (MAI.hasMipsExpressions()) {
660  unsigned OperatorLength;
661 
662  std::tie(Operator, OperatorLength) =
664  StringRef(CurPtr))
665  .StartsWith("call16", {AsmToken::PercentCall16, 7})
666  .StartsWith("call_hi", {AsmToken::PercentCall_Hi, 8})
667  .StartsWith("call_lo", {AsmToken::PercentCall_Lo, 8})
668  .StartsWith("dtprel_hi", {AsmToken::PercentDtprel_Hi, 10})
669  .StartsWith("dtprel_lo", {AsmToken::PercentDtprel_Lo, 10})
670  .StartsWith("got_disp", {AsmToken::PercentGot_Disp, 9})
671  .StartsWith("got_hi", {AsmToken::PercentGot_Hi, 7})
672  .StartsWith("got_lo", {AsmToken::PercentGot_Lo, 7})
673  .StartsWith("got_ofst", {AsmToken::PercentGot_Ofst, 9})
674  .StartsWith("got_page", {AsmToken::PercentGot_Page, 9})
675  .StartsWith("gottprel", {AsmToken::PercentGottprel, 9})
676  .StartsWith("got", {AsmToken::PercentGot, 4})
677  .StartsWith("gp_rel", {AsmToken::PercentGp_Rel, 7})
678  .StartsWith("higher", {AsmToken::PercentHigher, 7})
679  .StartsWith("highest", {AsmToken::PercentHighest, 8})
680  .StartsWith("hi", {AsmToken::PercentHi, 3})
681  .StartsWith("lo", {AsmToken::PercentLo, 3})
682  .StartsWith("neg", {AsmToken::PercentNeg, 4})
683  .StartsWith("pcrel_hi", {AsmToken::PercentPcrel_Hi, 9})
684  .StartsWith("pcrel_lo", {AsmToken::PercentPcrel_Lo, 9})
685  .StartsWith("tlsgd", {AsmToken::PercentTlsgd, 6})
686  .StartsWith("tlsldm", {AsmToken::PercentTlsldm, 7})
687  .StartsWith("tprel_hi", {AsmToken::PercentTprel_Hi, 9})
688  .StartsWith("tprel_lo", {AsmToken::PercentTprel_Lo, 9})
689  .Default({AsmToken::Percent, 1});
690 
691  if (Operator != AsmToken::Percent) {
692  CurPtr += OperatorLength - 1;
693  return AsmToken(Operator, StringRef(TokStart, OperatorLength));
694  }
695  }
697  case '/':
698  IsAtStartOfStatement = OldIsAtStartOfStatement;
699  return LexSlash();
700  case '#': return AsmToken(AsmToken::Hash, StringRef(TokStart, 1));
701  case '\'': return LexSingleQuote();
702  case '"': return LexQuote();
703  case '0': case '1': case '2': case '3': case '4':
704  case '5': case '6': case '7': case '8': case '9':
705  return LexDigit();
706  case '<':
707  switch (*CurPtr) {
708  case '<':
709  ++CurPtr;
711  case '=':
712  ++CurPtr;
714  case '>':
715  ++CurPtr;
717  default:
719  }
720  case '>':
721  switch (*CurPtr) {
722  case '>':
723  ++CurPtr;
725  case '=':
726  ++CurPtr;
728  default:
730  }
731 
732  // TODO: Quoted identifiers (objc methods etc)
733  // local labels: [0-9][:]
734  // Forward/backward labels: [0-9][fb]
735  // Integers, fp constants, character constants.
736  }
737 }
AsmCommentConsumer * CommentConsumer
Definition: MCAsmLexer.h:162
uint64_t Token
StringRef getCommentString() const
Definition: MCAsmInfo.h:471
void setBuffer(StringRef Buf, const char *ptr=nullptr)
Definition: AsmLexer.cpp:43
Target independent representation for an assembler token.
Definition: MCAsmLexer.h:25
bool isIntN(unsigned N) const
Check if this APInt has an N-bits unsigned integer value.
Definition: APInt.h:377
This file implements a class to represent arbitrary precision integral constant values and operations...
size_t peekTokens(MutableArrayRef< AsmToken > Buf, bool ShouldSkipSpace=true) override
Look ahead an arbitrary number of tokens.
Definition: AsmLexer.cpp:503
LLVM_NODISCARD LLVM_ATTRIBUTE_ALWAYS_INLINE bool startswith(StringRef Prefix) const
Check if this string starts with the given Prefix.
Definition: StringRef.h:264
Function Alias Analysis false
LLVM_NODISCARD LLVM_ATTRIBUTE_ALWAYS_INLINE size_t size() const
size - Get the string size.
Definition: StringRef.h:135
size_t size() const
size - Get the array size.
Definition: ArrayRef.h:141
iterator begin() const
Definition: StringRef.h:103
This class is intended to be used as a base class for asm properties and features specific to the tar...
Definition: MCAsmInfo.h:57
A switch()-like statement whose cases are string literals.
Definition: StringSwitch.h:43
void SetError(SMLoc errLoc, const std::string &err)
Definition: MCAsmLexer.h:168
MutableArrayRef - Represent a mutable reference to an array (0 or more elements consecutively in memo...
Definition: ArrayRef.h:283
static AsmToken intToken(StringRef Ref, APInt &Value)
Definition: AsmLexer.cpp:268
const char * TokStart
Definition: MCAsmLexer.h:158
const std::string & getErr()
Get the current error string.
Definition: MCAsmLexer.h:234
bool is(AsmToken::TokenKind K) const
Check if the current token has kind K.
Definition: MCAsmLexer.h:242
bool hasMipsExpressions() const
Definition: MCAsmInfo.h:601
void UnLex(AsmToken const &Token)
Definition: MCAsmLexer.h:194
This is a utility class that provides an abstraction for the common functionality between Instruction...
Definition: Operator.h:33
bool is(TokenKind K) const
Definition: MCAsmLexer.h:86
StringRef LexUntilEndOfStatement() override
Definition: AsmLexer.cpp:483
Class for arbitrary precision integers.
Definition: APInt.h:77
A utility class that uses RAII to save and restore the value of a variable.
static unsigned doLookAhead(const char *&CurPtr, unsigned DefaultRadix)
Definition: AsmLexer.cpp:247
static SMLoc getFromPointer(const char *Ptr)
Definition: SMLoc.h:37
static bool startswith(StringRef Magic, const char(&S)[N])
Definition: Path.cpp:994
Basic Alias true
~AsmLexer() override
Definition: AsmLexer.cpp:40
const char * getSeparatorString() const
Definition: MCAsmInfo.h:465
AsmToken LexToken() override
LexToken - Read the next token and return its code.
Definition: AsmLexer.cpp:546
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
LLVM Value Representation.
Definition: Value.h:71
SMLoc getErrLoc()
Get the current error location.
Definition: MCAsmLexer.h:229
This file provides utility classes that use RAII to save and restore values.
iterator end() const
Definition: StringRef.h:105
LLVM_NODISCARD LLVM_ATTRIBUTE_ALWAYS_INLINE const char * data() const
data - Get a pointer to the start of the string (which may not be null terminated).
Definition: StringRef.h:125
static void SkipIgnoredIntegerSuffix(const char *&CurPtr)
Definition: AsmLexer.cpp:235
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:47
int * Ptr
Represents a location in source code.
Definition: SMLoc.h:24
static bool IsIdentifierChar(char c, bool AllowAt)
LexIdentifier: [a-zA-Z_.][a-zA-Z0-9_$.@?]*.
Definition: AsmLexer.cpp:141
bool AllowAtInIdentifier
Definition: MCAsmLexer.h:160
virtual void HandleComment(SMLoc Loc, StringRef CommentText)=0
Callback function for when a comment is lexed.