LLVM  6.0.0svn
AsmLexer.cpp
Go to the documentation of this file.
1 //===- AsmLexer.cpp - Lexer for Assembly Files ----------------------------===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This class implements the lexer for assembly files.
11 //
12 //===----------------------------------------------------------------------===//
13 
15 #include "llvm/ADT/APInt.h"
16 #include "llvm/ADT/ArrayRef.h"
17 #include "llvm/ADT/StringExtras.h"
18 #include "llvm/ADT/StringRef.h"
19 #include "llvm/ADT/StringSwitch.h"
20 #include "llvm/MC/MCAsmInfo.h"
22 #include "llvm/Support/SMLoc.h"
24 #include <cassert>
25 #include <cctype>
26 #include <cstdio>
27 #include <cstring>
28 #include <string>
29 #include <tuple>
30 #include <utility>
31 
32 using namespace llvm;
33 
34 AsmLexer::AsmLexer(const MCAsmInfo &MAI) : MAI(MAI) {
36 }
37 
38 AsmLexer::~AsmLexer() = default;
39 
40 void AsmLexer::setBuffer(StringRef Buf, const char *ptr) {
41  CurBuf = Buf;
42 
43  if (ptr)
44  CurPtr = ptr;
45  else
46  CurPtr = CurBuf.begin();
47 
48  TokStart = nullptr;
49 }
50 
51 /// ReturnError - Set the error to the specified string at the specified
52 /// location. This is defined to always return AsmToken::Error.
53 AsmToken AsmLexer::ReturnError(const char *Loc, const std::string &Msg) {
55 
56  return AsmToken(AsmToken::Error, StringRef(Loc, CurPtr - Loc));
57 }
58 
59 int AsmLexer::getNextChar() {
60  if (CurPtr == CurBuf.end())
61  return EOF;
62  return (unsigned char)*CurPtr++;
63 }
64 
65 /// LexFloatLiteral: [0-9]*[.][0-9]*([eE][+-]?[0-9]*)?
66 ///
67 /// The leading integral digit sequence and dot should have already been
68 /// consumed, some or all of the fractional digit sequence *can* have been
69 /// consumed.
70 AsmToken AsmLexer::LexFloatLiteral() {
71  // Skip the fractional digit sequence.
72  while (isDigit(*CurPtr))
73  ++CurPtr;
74 
75  // Check for exponent; we intentionally accept a slighlty wider set of
76  // literals here and rely on the upstream client to reject invalid ones (e.g.,
77  // "1e+").
78  if (*CurPtr == 'e' || *CurPtr == 'E') {
79  ++CurPtr;
80  if (*CurPtr == '-' || *CurPtr == '+')
81  ++CurPtr;
82  while (isDigit(*CurPtr))
83  ++CurPtr;
84  }
85 
86  return AsmToken(AsmToken::Real,
87  StringRef(TokStart, CurPtr - TokStart));
88 }
89 
90 /// LexHexFloatLiteral matches essentially (.[0-9a-fA-F]*)?[pP][+-]?[0-9a-fA-F]+
91 /// while making sure there are enough actual digits around for the constant to
92 /// be valid.
93 ///
94 /// The leading "0x[0-9a-fA-F]*" (i.e. integer part) has already been consumed
95 /// before we get here.
96 AsmToken AsmLexer::LexHexFloatLiteral(bool NoIntDigits) {
97  assert((*CurPtr == 'p' || *CurPtr == 'P' || *CurPtr == '.') &&
98  "unexpected parse state in floating hex");
99  bool NoFracDigits = true;
100 
101  // Skip the fractional part if there is one
102  if (*CurPtr == '.') {
103  ++CurPtr;
104 
105  const char *FracStart = CurPtr;
106  while (isHexDigit(*CurPtr))
107  ++CurPtr;
108 
109  NoFracDigits = CurPtr == FracStart;
110  }
111 
112  if (NoIntDigits && NoFracDigits)
113  return ReturnError(TokStart, "invalid hexadecimal floating-point constant: "
114  "expected at least one significand digit");
115 
116  // Make sure we do have some kind of proper exponent part
117  if (*CurPtr != 'p' && *CurPtr != 'P')
118  return ReturnError(TokStart, "invalid hexadecimal floating-point constant: "
119  "expected exponent part 'p'");
120  ++CurPtr;
121 
122  if (*CurPtr == '+' || *CurPtr == '-')
123  ++CurPtr;
124 
125  // N.b. exponent digits are *not* hex
126  const char *ExpStart = CurPtr;
127  while (isDigit(*CurPtr))
128  ++CurPtr;
129 
130  if (CurPtr == ExpStart)
131  return ReturnError(TokStart, "invalid hexadecimal floating-point constant: "
132  "expected at least one exponent digit");
133 
134  return AsmToken(AsmToken::Real, StringRef(TokStart, CurPtr - TokStart));
135 }
136 
137 /// LexIdentifier: [a-zA-Z_.][a-zA-Z0-9_$.@?]*
138 static bool IsIdentifierChar(char c, bool AllowAt) {
139  return isAlnum(c) || c == '_' || c == '$' || c == '.' ||
140  (c == '@' && AllowAt) || c == '?';
141 }
142 
143 AsmToken AsmLexer::LexIdentifier() {
144  // Check for floating point literals.
145  if (CurPtr[-1] == '.' && isDigit(*CurPtr)) {
146  // Disambiguate a .1243foo identifier from a floating literal.
147  while (isDigit(*CurPtr))
148  ++CurPtr;
149  if (*CurPtr == 'e' || *CurPtr == 'E' ||
151  return LexFloatLiteral();
152  }
153 
154  while (IsIdentifierChar(*CurPtr, AllowAtInIdentifier))
155  ++CurPtr;
156 
157  // Handle . as a special case.
158  if (CurPtr == TokStart+1 && TokStart[0] == '.')
160 
162 }
163 
164 /// LexSlash: Slash: /
165 /// C-Style Comment: /* ... */
166 AsmToken AsmLexer::LexSlash() {
167  switch (*CurPtr) {
168  case '*':
169  IsAtStartOfStatement = false;
170  break; // C style comment.
171  case '/':
172  ++CurPtr;
173  return LexLineComment();
174  default:
175  IsAtStartOfStatement = false;
177  }
178 
179  // C Style comment.
180  ++CurPtr; // skip the star.
181  const char *CommentTextStart = CurPtr;
182  while (CurPtr != CurBuf.end()) {
183  switch (*CurPtr++) {
184  case '*':
185  // End of the comment?
186  if (*CurPtr != '/')
187  break;
188  // If we have a CommentConsumer, notify it about the comment.
189  if (CommentConsumer) {
191  SMLoc::getFromPointer(CommentTextStart),
192  StringRef(CommentTextStart, CurPtr - 1 - CommentTextStart));
193  }
194  ++CurPtr; // End the */.
196  StringRef(TokStart, CurPtr - TokStart));
197  }
198  }
199  return ReturnError(TokStart, "unterminated comment");
200 }
201 
202 /// LexLineComment: Comment: #[^\n]*
203 /// : //[^\n]*
204 AsmToken AsmLexer::LexLineComment() {
205  // Mark This as an end of statement with a body of the
206  // comment. While it would be nicer to leave this two tokens,
207  // backwards compatability with TargetParsers makes keeping this in this form
208  // better.
209  const char *CommentTextStart = CurPtr;
210  int CurChar = getNextChar();
211  while (CurChar != '\n' && CurChar != '\r' && CurChar != EOF)
212  CurChar = getNextChar();
213  if (CurChar == '\r' && CurPtr != CurBuf.end() && *CurPtr == '\n')
214  ++CurPtr;
215 
216  // If we have a CommentConsumer, notify it about the comment.
217  if (CommentConsumer) {
219  SMLoc::getFromPointer(CommentTextStart),
220  StringRef(CommentTextStart, CurPtr - 1 - CommentTextStart));
221  }
222 
223  IsAtStartOfLine = true;
224  // This is a whole line comment. leave newline
225  if (IsAtStartOfStatement)
227  StringRef(TokStart, CurPtr - TokStart));
228  IsAtStartOfStatement = true;
229 
231  StringRef(TokStart, CurPtr - 1 - TokStart));
232 }
233 
234 static void SkipIgnoredIntegerSuffix(const char *&CurPtr) {
235  // Skip ULL, UL, U, L and LL suffices.
236  if (CurPtr[0] == 'U')
237  ++CurPtr;
238  if (CurPtr[0] == 'L')
239  ++CurPtr;
240  if (CurPtr[0] == 'L')
241  ++CurPtr;
242 }
243 
244 // Look ahead to search for first non-hex digit, if it's [hH], then we treat the
245 // integer as a hexadecimal, possibly with leading zeroes.
246 static unsigned doLookAhead(const char *&CurPtr, unsigned DefaultRadix) {
247  const char *FirstHex = nullptr;
248  const char *LookAhead = CurPtr;
249  while (true) {
250  if (isDigit(*LookAhead)) {
251  ++LookAhead;
252  } else if (isHexDigit(*LookAhead)) {
253  if (!FirstHex)
254  FirstHex = LookAhead;
255  ++LookAhead;
256  } else {
257  break;
258  }
259  }
260  bool isHex = *LookAhead == 'h' || *LookAhead == 'H';
261  CurPtr = isHex || !FirstHex ? LookAhead : FirstHex;
262  if (isHex)
263  return 16;
264  return DefaultRadix;
265 }
266 
268 {
269  if (Value.isIntN(64))
270  return AsmToken(AsmToken::Integer, Ref, Value);
271  return AsmToken(AsmToken::BigNum, Ref, Value);
272 }
273 
274 /// LexDigit: First character is [0-9].
275 /// Local Label: [0-9][:]
276 /// Forward/Backward Label: [0-9][fb]
277 /// Binary integer: 0b[01]+
278 /// Octal integer: 0[0-7]+
279 /// Hex integer: 0x[0-9a-fA-F]+ or [0x]?[0-9][0-9a-fA-F]*[hH]
280 /// Decimal integer: [1-9][0-9]*
281 AsmToken AsmLexer::LexDigit() {
282  // MASM-flavor binary integer: [01]+[bB]
283  // MASM-flavor hexadecimal integer: [0-9][0-9a-fA-F]*[hH]
284  if (IsParsingMSInlineAsm && isdigit(CurPtr[-1])) {
285  const char *FirstNonBinary = (CurPtr[-1] != '0' && CurPtr[-1] != '1') ?
286  CurPtr - 1 : nullptr;
287  const char *OldCurPtr = CurPtr;
288  while (isHexDigit(*CurPtr)) {
289  if (*CurPtr != '0' && *CurPtr != '1' && !FirstNonBinary)
290  FirstNonBinary = CurPtr;
291  ++CurPtr;
292  }
293 
294  unsigned Radix = 0;
295  if (*CurPtr == 'h' || *CurPtr == 'H') {
296  // hexadecimal number
297  ++CurPtr;
298  Radix = 16;
299  } else if (FirstNonBinary && FirstNonBinary + 1 == CurPtr &&
300  (*FirstNonBinary == 'b' || *FirstNonBinary == 'B'))
301  Radix = 2;
302 
303  if (Radix == 2 || Radix == 16) {
304  StringRef Result(TokStart, CurPtr - TokStart);
305  APInt Value(128, 0, true);
306 
307  if (Result.drop_back().getAsInteger(Radix, Value))
308  return ReturnError(TokStart, Radix == 2 ? "invalid binary number" :
309  "invalid hexdecimal number");
310 
311  // MSVC accepts and ignores type suffices on integer literals.
312  SkipIgnoredIntegerSuffix(CurPtr);
313 
314  return intToken(Result, Value);
315  }
316 
317  // octal/decimal integers, or floating point numbers, fall through
318  CurPtr = OldCurPtr;
319  }
320 
321  // Decimal integer: [1-9][0-9]*
322  if (CurPtr[-1] != '0' || CurPtr[0] == '.') {
323  unsigned Radix = doLookAhead(CurPtr, 10);
324  bool isHex = Radix == 16;
325  // Check for floating point literals.
326  if (!isHex && (*CurPtr == '.' || *CurPtr == 'e')) {
327  ++CurPtr;
328  return LexFloatLiteral();
329  }
330 
331  StringRef Result(TokStart, CurPtr - TokStart);
332 
333  APInt Value(128, 0, true);
334  if (Result.getAsInteger(Radix, Value))
335  return ReturnError(TokStart, !isHex ? "invalid decimal number" :
336  "invalid hexdecimal number");
337 
338  // Consume the [bB][hH].
339  if (Radix == 2 || Radix == 16)
340  ++CurPtr;
341 
342  // The darwin/x86 (and x86-64) assembler accepts and ignores type
343  // suffices on integer literals.
344  SkipIgnoredIntegerSuffix(CurPtr);
345 
346  return intToken(Result, Value);
347  }
348 
349  if (!IsParsingMSInlineAsm && ((*CurPtr == 'b') || (*CurPtr == 'B'))) {
350  ++CurPtr;
351  // See if we actually have "0b" as part of something like "jmp 0b\n"
352  if (!isDigit(CurPtr[0])) {
353  --CurPtr;
354  StringRef Result(TokStart, CurPtr - TokStart);
355  return AsmToken(AsmToken::Integer, Result, 0);
356  }
357  const char *NumStart = CurPtr;
358  while (CurPtr[0] == '0' || CurPtr[0] == '1')
359  ++CurPtr;
360 
361  // Requires at least one binary digit.
362  if (CurPtr == NumStart)
363  return ReturnError(TokStart, "invalid binary number");
364 
365  StringRef Result(TokStart, CurPtr - TokStart);
366 
367  APInt Value(128, 0, true);
368  if (Result.substr(2).getAsInteger(2, Value))
369  return ReturnError(TokStart, "invalid binary number");
370 
371  // The darwin/x86 (and x86-64) assembler accepts and ignores ULL and LL
372  // suffixes on integer literals.
373  SkipIgnoredIntegerSuffix(CurPtr);
374 
375  return intToken(Result, Value);
376  }
377 
378  if ((*CurPtr == 'x') || (*CurPtr == 'X')) {
379  ++CurPtr;
380  const char *NumStart = CurPtr;
381  while (isHexDigit(CurPtr[0]))
382  ++CurPtr;
383 
384  // "0x.0p0" is valid, and "0x0p0" (but not "0xp0" for example, which will be
385  // diagnosed by LexHexFloatLiteral).
386  if (CurPtr[0] == '.' || CurPtr[0] == 'p' || CurPtr[0] == 'P')
387  return LexHexFloatLiteral(NumStart == CurPtr);
388 
389  // Otherwise requires at least one hex digit.
390  if (CurPtr == NumStart)
391  return ReturnError(CurPtr-2, "invalid hexadecimal number");
392 
393  APInt Result(128, 0);
394  if (StringRef(TokStart, CurPtr - TokStart).getAsInteger(0, Result))
395  return ReturnError(TokStart, "invalid hexadecimal number");
396 
397  // Consume the optional [hH].
398  if (!IsParsingMSInlineAsm && (*CurPtr == 'h' || *CurPtr == 'H'))
399  ++CurPtr;
400 
401  // The darwin/x86 (and x86-64) assembler accepts and ignores ULL and LL
402  // suffixes on integer literals.
403  SkipIgnoredIntegerSuffix(CurPtr);
404 
405  return intToken(StringRef(TokStart, CurPtr - TokStart), Result);
406  }
407 
408  // Either octal or hexadecimal.
409  APInt Value(128, 0, true);
410  unsigned Radix = doLookAhead(CurPtr, 8);
411  bool isHex = Radix == 16;
412  StringRef Result(TokStart, CurPtr - TokStart);
413  if (Result.getAsInteger(Radix, Value))
414  return ReturnError(TokStart, !isHex ? "invalid octal number" :
415  "invalid hexdecimal number");
416 
417  // Consume the [hH].
418  if (Radix == 16)
419  ++CurPtr;
420 
421  // The darwin/x86 (and x86-64) assembler accepts and ignores ULL and LL
422  // suffixes on integer literals.
423  SkipIgnoredIntegerSuffix(CurPtr);
424 
425  return intToken(Result, Value);
426 }
427 
428 /// LexSingleQuote: Integer: 'b'
429 AsmToken AsmLexer::LexSingleQuote() {
430  int CurChar = getNextChar();
431 
432  if (CurChar == '\\')
433  CurChar = getNextChar();
434 
435  if (CurChar == EOF)
436  return ReturnError(TokStart, "unterminated single quote");
437 
438  CurChar = getNextChar();
439 
440  if (CurChar != '\'')
441  return ReturnError(TokStart, "single quote way too long");
442 
443  // The idea here being that 'c' is basically just an integral
444  // constant.
445  StringRef Res = StringRef(TokStart,CurPtr - TokStart);
446  long long Value;
447 
448  if (Res.startswith("\'\\")) {
449  char theChar = Res[2];
450  switch (theChar) {
451  default: Value = theChar; break;
452  case '\'': Value = '\''; break;
453  case 't': Value = '\t'; break;
454  case 'n': Value = '\n'; break;
455  case 'b': Value = '\b'; break;
456  }
457  } else
458  Value = TokStart[1];
459 
460  return AsmToken(AsmToken::Integer, Res, Value);
461 }
462 
463 /// LexQuote: String: "..."
464 AsmToken AsmLexer::LexQuote() {
465  int CurChar = getNextChar();
466  // TODO: does gas allow multiline string constants?
467  while (CurChar != '"') {
468  if (CurChar == '\\') {
469  // Allow \", etc.
470  CurChar = getNextChar();
471  }
472 
473  if (CurChar == EOF)
474  return ReturnError(TokStart, "unterminated string constant");
475 
476  CurChar = getNextChar();
477  }
478 
480 }
481 
483  TokStart = CurPtr;
484 
485  while (!isAtStartOfComment(CurPtr) && // Start of line comment.
486  !isAtStatementSeparator(CurPtr) && // End of statement marker.
487  *CurPtr != '\n' && *CurPtr != '\r' && CurPtr != CurBuf.end()) {
488  ++CurPtr;
489  }
490  return StringRef(TokStart, CurPtr-TokStart);
491 }
492 
493 StringRef AsmLexer::LexUntilEndOfLine() {
494  TokStart = CurPtr;
495 
496  while (*CurPtr != '\n' && *CurPtr != '\r' && CurPtr != CurBuf.end()) {
497  ++CurPtr;
498  }
499  return StringRef(TokStart, CurPtr-TokStart);
500 }
501 
503  bool ShouldSkipSpace) {
504  SaveAndRestore<const char *> SavedTokenStart(TokStart);
505  SaveAndRestore<const char *> SavedCurPtr(CurPtr);
506  SaveAndRestore<bool> SavedAtStartOfLine(IsAtStartOfLine);
507  SaveAndRestore<bool> SavedAtStartOfStatement(IsAtStartOfStatement);
508  SaveAndRestore<bool> SavedSkipSpace(SkipSpace, ShouldSkipSpace);
509  SaveAndRestore<bool> SavedIsPeeking(IsPeeking, true);
510  std::string SavedErr = getErr();
511  SMLoc SavedErrLoc = getErrLoc();
512 
513  size_t ReadCount;
514  for (ReadCount = 0; ReadCount < Buf.size(); ++ReadCount) {
515  AsmToken Token = LexToken();
516 
517  Buf[ReadCount] = Token;
518 
519  if (Token.is(AsmToken::Eof))
520  break;
521  }
522 
523  SetError(SavedErrLoc, SavedErr);
524  return ReadCount;
525 }
526 
527 bool AsmLexer::isAtStartOfComment(const char *Ptr) {
528  StringRef CommentString = MAI.getCommentString();
529 
530  if (CommentString.size() == 1)
531  return CommentString[0] == Ptr[0];
532 
533  // Allow # preprocessor commments also be counted as comments for "##" cases
534  if (CommentString[1] == '#')
535  return CommentString[0] == Ptr[0];
536 
537  return strncmp(Ptr, CommentString.data(), CommentString.size()) == 0;
538 }
539 
540 bool AsmLexer::isAtStatementSeparator(const char *Ptr) {
541  return strncmp(Ptr, MAI.getSeparatorString(),
542  strlen(MAI.getSeparatorString())) == 0;
543 }
544 
546  TokStart = CurPtr;
547  // This always consumes at least one character.
548  int CurChar = getNextChar();
549 
550  if (!IsPeeking && CurChar == '#' && IsAtStartOfStatement) {
551  // If this starts with a '#', this may be a cpp
552  // hash directive and otherwise a line comment.
553  AsmToken TokenBuf[2];
554  MutableArrayRef<AsmToken> Buf(TokenBuf, 2);
555  size_t num = peekTokens(Buf, true);
556  // There cannot be a space preceeding this
557  if (IsAtStartOfLine && num == 2 && TokenBuf[0].is(AsmToken::Integer) &&
558  TokenBuf[1].is(AsmToken::String)) {
559  CurPtr = TokStart; // reset curPtr;
560  StringRef s = LexUntilEndOfLine();
561  UnLex(TokenBuf[1]);
562  UnLex(TokenBuf[0]);
564  }
565  return LexLineComment();
566  }
567 
568  if (isAtStartOfComment(TokStart))
569  return LexLineComment();
570 
571  if (isAtStatementSeparator(TokStart)) {
572  CurPtr += strlen(MAI.getSeparatorString()) - 1;
573  IsAtStartOfLine = true;
574  IsAtStartOfStatement = true;
576  StringRef(TokStart, strlen(MAI.getSeparatorString())));
577  }
578 
579  // If we're missing a newline at EOF, make sure we still get an
580  // EndOfStatement token before the Eof token.
581  if (CurChar == EOF && !IsAtStartOfStatement) {
582  IsAtStartOfLine = true;
583  IsAtStartOfStatement = true;
585  }
586  IsAtStartOfLine = false;
587  bool OldIsAtStartOfStatement = IsAtStartOfStatement;
588  IsAtStartOfStatement = false;
589  switch (CurChar) {
590  default:
591  // Handle identifier: [a-zA-Z_.][a-zA-Z0-9_$.@]*
592  if (isalpha(CurChar) || CurChar == '_' || CurChar == '.')
593  return LexIdentifier();
594 
595  // Unknown character, emit an error.
596  return ReturnError(TokStart, "invalid character in input");
597  case EOF:
598  IsAtStartOfLine = true;
599  IsAtStartOfStatement = true;
601  case 0:
602  case ' ':
603  case '\t':
604  IsAtStartOfStatement = OldIsAtStartOfStatement;
605  while (*CurPtr == ' ' || *CurPtr == '\t')
606  CurPtr++;
607  if (SkipSpace)
608  return LexToken(); // Ignore whitespace.
609  else
610  return AsmToken(AsmToken::Space, StringRef(TokStart, CurPtr - TokStart));
611  case '\r': {
612  IsAtStartOfLine = true;
613  IsAtStartOfStatement = true;
614  // If this is a CR followed by LF, treat that as one token.
615  if (CurPtr != CurBuf.end() && *CurPtr == '\n')
616  ++CurPtr;
618  StringRef(TokStart, CurPtr - TokStart));
619  }
620  case '\n':
621  IsAtStartOfLine = true;
622  IsAtStartOfStatement = true;
624  case ':': return AsmToken(AsmToken::Colon, StringRef(TokStart, 1));
625  case '+': return AsmToken(AsmToken::Plus, StringRef(TokStart, 1));
626  case '-': return AsmToken(AsmToken::Minus, StringRef(TokStart, 1));
627  case '~': return AsmToken(AsmToken::Tilde, StringRef(TokStart, 1));
628  case '(': return AsmToken(AsmToken::LParen, StringRef(TokStart, 1));
629  case ')': return AsmToken(AsmToken::RParen, StringRef(TokStart, 1));
630  case '[': return AsmToken(AsmToken::LBrac, StringRef(TokStart, 1));
631  case ']': return AsmToken(AsmToken::RBrac, StringRef(TokStart, 1));
632  case '{': return AsmToken(AsmToken::LCurly, StringRef(TokStart, 1));
633  case '}': return AsmToken(AsmToken::RCurly, StringRef(TokStart, 1));
634  case '*': return AsmToken(AsmToken::Star, StringRef(TokStart, 1));
635  case ',': return AsmToken(AsmToken::Comma, StringRef(TokStart, 1));
636  case '$': return AsmToken(AsmToken::Dollar, StringRef(TokStart, 1));
637  case '@': return AsmToken(AsmToken::At, StringRef(TokStart, 1));
638  case '\\': return AsmToken(AsmToken::BackSlash, StringRef(TokStart, 1));
639  case '=':
640  if (*CurPtr == '=') {
641  ++CurPtr;
643  }
645  case '|':
646  if (*CurPtr == '|') {
647  ++CurPtr;
649  }
651  case '^': return AsmToken(AsmToken::Caret, StringRef(TokStart, 1));
652  case '&':
653  if (*CurPtr == '&') {
654  ++CurPtr;
656  }
658  case '!':
659  if (*CurPtr == '=') {
660  ++CurPtr;
662  }
664  case '%':
665  if (MAI.hasMipsExpressions()) {
667  unsigned OperatorLength;
668 
669  std::tie(Operator, OperatorLength) =
671  StringRef(CurPtr))
672  .StartsWith("call16", {AsmToken::PercentCall16, 7})
673  .StartsWith("call_hi", {AsmToken::PercentCall_Hi, 8})
674  .StartsWith("call_lo", {AsmToken::PercentCall_Lo, 8})
675  .StartsWith("dtprel_hi", {AsmToken::PercentDtprel_Hi, 10})
676  .StartsWith("dtprel_lo", {AsmToken::PercentDtprel_Lo, 10})
677  .StartsWith("got_disp", {AsmToken::PercentGot_Disp, 9})
678  .StartsWith("got_hi", {AsmToken::PercentGot_Hi, 7})
679  .StartsWith("got_lo", {AsmToken::PercentGot_Lo, 7})
680  .StartsWith("got_ofst", {AsmToken::PercentGot_Ofst, 9})
681  .StartsWith("got_page", {AsmToken::PercentGot_Page, 9})
682  .StartsWith("gottprel", {AsmToken::PercentGottprel, 9})
683  .StartsWith("got", {AsmToken::PercentGot, 4})
684  .StartsWith("gp_rel", {AsmToken::PercentGp_Rel, 7})
685  .StartsWith("higher", {AsmToken::PercentHigher, 7})
686  .StartsWith("highest", {AsmToken::PercentHighest, 8})
687  .StartsWith("hi", {AsmToken::PercentHi, 3})
688  .StartsWith("lo", {AsmToken::PercentLo, 3})
689  .StartsWith("neg", {AsmToken::PercentNeg, 4})
690  .StartsWith("pcrel_hi", {AsmToken::PercentPcrel_Hi, 9})
691  .StartsWith("pcrel_lo", {AsmToken::PercentPcrel_Lo, 9})
692  .StartsWith("tlsgd", {AsmToken::PercentTlsgd, 6})
693  .StartsWith("tlsldm", {AsmToken::PercentTlsldm, 7})
694  .StartsWith("tprel_hi", {AsmToken::PercentTprel_Hi, 9})
695  .StartsWith("tprel_lo", {AsmToken::PercentTprel_Lo, 9})
696  .Default({AsmToken::Percent, 1});
697 
698  if (Operator != AsmToken::Percent) {
699  CurPtr += OperatorLength - 1;
700  return AsmToken(Operator, StringRef(TokStart, OperatorLength));
701  }
702  }
704  case '/':
705  IsAtStartOfStatement = OldIsAtStartOfStatement;
706  return LexSlash();
707  case '#': return AsmToken(AsmToken::Hash, StringRef(TokStart, 1));
708  case '\'': return LexSingleQuote();
709  case '"': return LexQuote();
710  case '0': case '1': case '2': case '3': case '4':
711  case '5': case '6': case '7': case '8': case '9':
712  return LexDigit();
713  case '<':
714  switch (*CurPtr) {
715  case '<':
716  ++CurPtr;
718  case '=':
719  ++CurPtr;
721  case '>':
722  ++CurPtr;
724  default:
726  }
727  case '>':
728  switch (*CurPtr) {
729  case '>':
730  ++CurPtr;
732  case '=':
733  ++CurPtr;
735  default:
737  }
738 
739  // TODO: Quoted identifiers (objc methods etc)
740  // local labels: [0-9][:]
741  // Forward/backward labels: [0-9][fb]
742  // Integers, fp constants, character constants.
743  }
744 }
AsmCommentConsumer * CommentConsumer
Definition: MCAsmLexer.h:162
Compute iterated dominance frontiers using a linear time algorithm.
Definition: AllocatorList.h:24
bool is(AsmToken::TokenKind K) const
Check if the current token has kind K.
Definition: MCAsmLexer.h:253
LLVM_NODISCARD LLVM_ATTRIBUTE_ALWAYS_INLINE size_t size() const
size - Get the string size.
Definition: StringRef.h:138
LLVM_NODISCARD LLVM_ATTRIBUTE_ALWAYS_INLINE const char * data() const
data - Get a pointer to the start of the string (which may not be null terminated).
Definition: StringRef.h:128
bool isAlnum(char C)
Checks whether character C is either a decimal digit or an uppercase or lowercase letter as classifie...
Definition: StringExtras.h:74
void setBuffer(StringRef Buf, const char *ptr=nullptr)
Definition: AsmLexer.cpp:40
Target independent representation for an assembler token.
Definition: MCAsmLexer.h:27
This file implements a class to represent arbitrary precision integral constant values and operations...
static bool startswith(StringRef Magic, const char(&S)[N])
Definition: Magic.cpp:29
LLVM_NODISCARD LLVM_ATTRIBUTE_ALWAYS_INLINE bool startswith(StringRef Prefix) const
Check if this string starts with the given Prefix.
Definition: StringRef.h:267
size_t peekTokens(MutableArrayRef< AsmToken > Buf, bool ShouldSkipSpace=true) override
Look ahead an arbitrary number of tokens.
Definition: AsmLexer.cpp:502
LLVM_NODISCARD LLVM_ATTRIBUTE_ALWAYS_INLINE StringRef substr(size_t Start, size_t N=npos) const
Return a reference to the substring from [Start, Start + N).
Definition: StringRef.h:598
This class is intended to be used as a base class for asm properties and features specific to the tar...
Definition: MCAsmInfo.h:56
A switch()-like statement whose cases are string literals.
Definition: StringSwitch.h:43
void SetError(SMLoc errLoc, const std::string &err)
Definition: MCAsmLexer.h:169
MutableArrayRef - Represent a mutable reference to an array (0 or more elements consecutively in memo...
Definition: ArrayRef.h:291
size_t size() const
size - Get the array size.
Definition: ArrayRef.h:149
bool isHexDigit(char C)
Checks if character C is a hexadecimal numeric character.
Definition: StringExtras.h:65
const char * getSeparatorString() const
Definition: MCAsmInfo.h:464
static AsmToken intToken(StringRef Ref, APInt &Value)
Definition: AsmLexer.cpp:267
const char * TokStart
Definition: MCAsmLexer.h:158
StringRef getCommentString() const
Definition: MCAsmInfo.h:470
const std::string & getErr()
Get the current error string.
Definition: MCAsmLexer.h:245
bool isIntN(unsigned N) const
Check if this APInt has an N-bits unsigned integer value.
Definition: APInt.h:443
LLVM_NODISCARD LLVM_ATTRIBUTE_ALWAYS_INLINE StringRef drop_back(size_t N=1) const
Return a StringRef equal to &#39;this&#39; but with the last N elements dropped.
Definition: StringRef.h:654
std::enable_if< std::numeric_limits< T >::is_signed, bool >::type getAsInteger(unsigned Radix, T &Result) const
Parse the current string as an integer of the specified radix.
Definition: StringRef.h:497
bool hasMipsExpressions() const
Definition: MCAsmInfo.h:618
void UnLex(AsmToken const &Token)
Definition: MCAsmLexer.h:205
This is a utility class that provides an abstraction for the common functionality between Instruction...
Definition: Operator.h:31
StringRef LexUntilEndOfStatement() override
Definition: AsmLexer.cpp:482
bool is(TokenKind K) const
Definition: MCAsmLexer.h:88
Class for arbitrary precision integers.
Definition: APInt.h:69
iterator begin() const
Definition: StringRef.h:106
A utility class that uses RAII to save and restore the value of a variable.
static unsigned doLookAhead(const char *&CurPtr, unsigned DefaultRadix)
Definition: AsmLexer.cpp:246
static SMLoc getFromPointer(const char *Ptr)
Definition: SMLoc.h:37
bool isDigit(char C)
Checks if character C is one of the 10 decimal digits.
Definition: StringExtras.h:62
~AsmLexer() override
AsmToken LexToken() override
LexToken - Read the next token and return its code.
Definition: AsmLexer.cpp:545
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
LLVM Value Representation.
Definition: Value.h:73
SMLoc getErrLoc()
Get the current error location.
Definition: MCAsmLexer.h:240
This file provides utility classes that use RAII to save and restore values.
static void SkipIgnoredIntegerSuffix(const char *&CurPtr)
Definition: AsmLexer.cpp:234
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:49
Represents a location in source code.
Definition: SMLoc.h:24
static bool IsIdentifierChar(char c, bool AllowAt)
LexIdentifier: [a-zA-Z_.][a-zA-Z0-9_$.@?]*.
Definition: AsmLexer.cpp:138
iterator end() const
Definition: StringRef.h:108
bool AllowAtInIdentifier
Definition: MCAsmLexer.h:160
virtual void HandleComment(SMLoc Loc, StringRef CommentText)=0
Callback function for when a comment is lexed.
AsmLexer(const MCAsmInfo &MAI)
Definition: AsmLexer.cpp:34