LLVM  9.0.0svn
AsmLexer.cpp
Go to the documentation of this file.
1 //===- AsmLexer.cpp - Lexer for Assembly Files ----------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This class implements the lexer for assembly files.
10 //
11 //===----------------------------------------------------------------------===//
12 
14 #include "llvm/ADT/APInt.h"
15 #include "llvm/ADT/ArrayRef.h"
16 #include "llvm/ADT/StringExtras.h"
17 #include "llvm/ADT/StringRef.h"
18 #include "llvm/ADT/StringSwitch.h"
19 #include "llvm/MC/MCAsmInfo.h"
21 #include "llvm/Support/SMLoc.h"
23 #include <cassert>
24 #include <cctype>
25 #include <cstdio>
26 #include <cstring>
27 #include <string>
28 #include <tuple>
29 #include <utility>
30 
31 using namespace llvm;
32 
33 AsmLexer::AsmLexer(const MCAsmInfo &MAI) : MAI(MAI) {
35 }
36 
37 AsmLexer::~AsmLexer() = default;
38 
39 void AsmLexer::setBuffer(StringRef Buf, const char *ptr) {
40  CurBuf = Buf;
41 
42  if (ptr)
43  CurPtr = ptr;
44  else
45  CurPtr = CurBuf.begin();
46 
47  TokStart = nullptr;
48 }
49 
50 /// ReturnError - Set the error to the specified string at the specified
51 /// location. This is defined to always return AsmToken::Error.
52 AsmToken AsmLexer::ReturnError(const char *Loc, const std::string &Msg) {
54 
55  return AsmToken(AsmToken::Error, StringRef(Loc, CurPtr - Loc));
56 }
57 
58 int AsmLexer::getNextChar() {
59  if (CurPtr == CurBuf.end())
60  return EOF;
61  return (unsigned char)*CurPtr++;
62 }
63 
64 /// LexFloatLiteral: [0-9]*[.][0-9]*([eE][+-]?[0-9]*)?
65 ///
66 /// The leading integral digit sequence and dot should have already been
67 /// consumed, some or all of the fractional digit sequence *can* have been
68 /// consumed.
69 AsmToken AsmLexer::LexFloatLiteral() {
70  // Skip the fractional digit sequence.
71  while (isDigit(*CurPtr))
72  ++CurPtr;
73 
74  // Check for exponent; we intentionally accept a slighlty wider set of
75  // literals here and rely on the upstream client to reject invalid ones (e.g.,
76  // "1e+").
77  if (*CurPtr == 'e' || *CurPtr == 'E') {
78  ++CurPtr;
79  if (*CurPtr == '-' || *CurPtr == '+')
80  ++CurPtr;
81  while (isDigit(*CurPtr))
82  ++CurPtr;
83  }
84 
85  return AsmToken(AsmToken::Real,
86  StringRef(TokStart, CurPtr - TokStart));
87 }
88 
89 /// LexHexFloatLiteral matches essentially (.[0-9a-fA-F]*)?[pP][+-]?[0-9a-fA-F]+
90 /// while making sure there are enough actual digits around for the constant to
91 /// be valid.
92 ///
93 /// The leading "0x[0-9a-fA-F]*" (i.e. integer part) has already been consumed
94 /// before we get here.
95 AsmToken AsmLexer::LexHexFloatLiteral(bool NoIntDigits) {
96  assert((*CurPtr == 'p' || *CurPtr == 'P' || *CurPtr == '.') &&
97  "unexpected parse state in floating hex");
98  bool NoFracDigits = true;
99 
100  // Skip the fractional part if there is one
101  if (*CurPtr == '.') {
102  ++CurPtr;
103 
104  const char *FracStart = CurPtr;
105  while (isHexDigit(*CurPtr))
106  ++CurPtr;
107 
108  NoFracDigits = CurPtr == FracStart;
109  }
110 
111  if (NoIntDigits && NoFracDigits)
112  return ReturnError(TokStart, "invalid hexadecimal floating-point constant: "
113  "expected at least one significand digit");
114 
115  // Make sure we do have some kind of proper exponent part
116  if (*CurPtr != 'p' && *CurPtr != 'P')
117  return ReturnError(TokStart, "invalid hexadecimal floating-point constant: "
118  "expected exponent part 'p'");
119  ++CurPtr;
120 
121  if (*CurPtr == '+' || *CurPtr == '-')
122  ++CurPtr;
123 
124  // N.b. exponent digits are *not* hex
125  const char *ExpStart = CurPtr;
126  while (isDigit(*CurPtr))
127  ++CurPtr;
128 
129  if (CurPtr == ExpStart)
130  return ReturnError(TokStart, "invalid hexadecimal floating-point constant: "
131  "expected at least one exponent digit");
132 
133  return AsmToken(AsmToken::Real, StringRef(TokStart, CurPtr - TokStart));
134 }
135 
136 /// LexIdentifier: [a-zA-Z_.][a-zA-Z0-9_$.@?]*
137 static bool IsIdentifierChar(char c, bool AllowAt) {
138  return isAlnum(c) || c == '_' || c == '$' || c == '.' ||
139  (c == '@' && AllowAt) || c == '?';
140 }
141 
142 AsmToken AsmLexer::LexIdentifier() {
143  // Check for floating point literals.
144  if (CurPtr[-1] == '.' && isDigit(*CurPtr)) {
145  // Disambiguate a .1243foo identifier from a floating literal.
146  while (isDigit(*CurPtr))
147  ++CurPtr;
148  if (*CurPtr == 'e' || *CurPtr == 'E' ||
150  return LexFloatLiteral();
151  }
152 
153  while (IsIdentifierChar(*CurPtr, AllowAtInIdentifier))
154  ++CurPtr;
155 
156  // Handle . as a special case.
157  if (CurPtr == TokStart+1 && TokStart[0] == '.')
159 
161 }
162 
163 /// LexSlash: Slash: /
164 /// C-Style Comment: /* ... */
165 AsmToken AsmLexer::LexSlash() {
166  switch (*CurPtr) {
167  case '*':
168  IsAtStartOfStatement = false;
169  break; // C style comment.
170  case '/':
171  ++CurPtr;
172  return LexLineComment();
173  default:
174  IsAtStartOfStatement = false;
176  }
177 
178  // C Style comment.
179  ++CurPtr; // skip the star.
180  const char *CommentTextStart = CurPtr;
181  while (CurPtr != CurBuf.end()) {
182  switch (*CurPtr++) {
183  case '*':
184  // End of the comment?
185  if (*CurPtr != '/')
186  break;
187  // If we have a CommentConsumer, notify it about the comment.
188  if (CommentConsumer) {
190  SMLoc::getFromPointer(CommentTextStart),
191  StringRef(CommentTextStart, CurPtr - 1 - CommentTextStart));
192  }
193  ++CurPtr; // End the */.
195  StringRef(TokStart, CurPtr - TokStart));
196  }
197  }
198  return ReturnError(TokStart, "unterminated comment");
199 }
200 
201 /// LexLineComment: Comment: #[^\n]*
202 /// : //[^\n]*
203 AsmToken AsmLexer::LexLineComment() {
204  // Mark This as an end of statement with a body of the
205  // comment. While it would be nicer to leave this two tokens,
206  // backwards compatability with TargetParsers makes keeping this in this form
207  // better.
208  const char *CommentTextStart = CurPtr;
209  int CurChar = getNextChar();
210  while (CurChar != '\n' && CurChar != '\r' && CurChar != EOF)
211  CurChar = getNextChar();
212  if (CurChar == '\r' && CurPtr != CurBuf.end() && *CurPtr == '\n')
213  ++CurPtr;
214 
215  // If we have a CommentConsumer, notify it about the comment.
216  if (CommentConsumer) {
218  SMLoc::getFromPointer(CommentTextStart),
219  StringRef(CommentTextStart, CurPtr - 1 - CommentTextStart));
220  }
221 
222  IsAtStartOfLine = true;
223  // This is a whole line comment. leave newline
224  if (IsAtStartOfStatement)
226  StringRef(TokStart, CurPtr - TokStart));
227  IsAtStartOfStatement = true;
228 
230  StringRef(TokStart, CurPtr - 1 - TokStart));
231 }
232 
233 static void SkipIgnoredIntegerSuffix(const char *&CurPtr) {
234  // Skip ULL, UL, U, L and LL suffices.
235  if (CurPtr[0] == 'U')
236  ++CurPtr;
237  if (CurPtr[0] == 'L')
238  ++CurPtr;
239  if (CurPtr[0] == 'L')
240  ++CurPtr;
241 }
242 
243 // Look ahead to search for first non-hex digit, if it's [hH], then we treat the
244 // integer as a hexadecimal, possibly with leading zeroes.
245 static unsigned doHexLookAhead(const char *&CurPtr, unsigned DefaultRadix,
246  bool LexHex) {
247  const char *FirstNonDec = nullptr;
248  const char *LookAhead = CurPtr;
249  while (true) {
250  if (isDigit(*LookAhead)) {
251  ++LookAhead;
252  } else {
253  if (!FirstNonDec)
254  FirstNonDec = LookAhead;
255 
256  // Keep going if we are looking for a 'h' suffix.
257  if (LexHex && isHexDigit(*LookAhead))
258  ++LookAhead;
259  else
260  break;
261  }
262  }
263  bool isHex = LexHex && (*LookAhead == 'h' || *LookAhead == 'H');
264  CurPtr = isHex || !FirstNonDec ? LookAhead : FirstNonDec;
265  if (isHex)
266  return 16;
267  return DefaultRadix;
268 }
269 
271 {
272  if (Value.isIntN(64))
273  return AsmToken(AsmToken::Integer, Ref, Value);
274  return AsmToken(AsmToken::BigNum, Ref, Value);
275 }
276 
277 /// LexDigit: First character is [0-9].
278 /// Local Label: [0-9][:]
279 /// Forward/Backward Label: [0-9][fb]
280 /// Binary integer: 0b[01]+
281 /// Octal integer: 0[0-7]+
282 /// Hex integer: 0x[0-9a-fA-F]+ or [0x]?[0-9][0-9a-fA-F]*[hH]
283 /// Decimal integer: [1-9][0-9]*
284 AsmToken AsmLexer::LexDigit() {
285  // MASM-flavor binary integer: [01]+[bB]
286  // MASM-flavor hexadecimal integer: [0-9][0-9a-fA-F]*[hH]
287  if (LexMasmIntegers && isdigit(CurPtr[-1])) {
288  const char *FirstNonBinary = (CurPtr[-1] != '0' && CurPtr[-1] != '1') ?
289  CurPtr - 1 : nullptr;
290  const char *OldCurPtr = CurPtr;
291  while (isHexDigit(*CurPtr)) {
292  if (*CurPtr != '0' && *CurPtr != '1' && !FirstNonBinary)
293  FirstNonBinary = CurPtr;
294  ++CurPtr;
295  }
296 
297  unsigned Radix = 0;
298  if (*CurPtr == 'h' || *CurPtr == 'H') {
299  // hexadecimal number
300  ++CurPtr;
301  Radix = 16;
302  } else if (FirstNonBinary && FirstNonBinary + 1 == CurPtr &&
303  (*FirstNonBinary == 'b' || *FirstNonBinary == 'B'))
304  Radix = 2;
305 
306  if (Radix == 2 || Radix == 16) {
307  StringRef Result(TokStart, CurPtr - TokStart);
308  APInt Value(128, 0, true);
309 
310  if (Result.drop_back().getAsInteger(Radix, Value))
311  return ReturnError(TokStart, Radix == 2 ? "invalid binary number" :
312  "invalid hexdecimal number");
313 
314  // MSVC accepts and ignores type suffices on integer literals.
315  SkipIgnoredIntegerSuffix(CurPtr);
316 
317  return intToken(Result, Value);
318  }
319 
320  // octal/decimal integers, or floating point numbers, fall through
321  CurPtr = OldCurPtr;
322  }
323 
324  // Decimal integer: [1-9][0-9]*
325  if (CurPtr[-1] != '0' || CurPtr[0] == '.') {
326  unsigned Radix = doHexLookAhead(CurPtr, 10, LexMasmIntegers);
327  bool isHex = Radix == 16;
328  // Check for floating point literals.
329  if (!isHex && (*CurPtr == '.' || *CurPtr == 'e')) {
330  ++CurPtr;
331  return LexFloatLiteral();
332  }
333 
334  StringRef Result(TokStart, CurPtr - TokStart);
335 
336  APInt Value(128, 0, true);
337  if (Result.getAsInteger(Radix, Value))
338  return ReturnError(TokStart, !isHex ? "invalid decimal number" :
339  "invalid hexdecimal number");
340 
341  // Consume the [hH].
342  if (LexMasmIntegers && Radix == 16)
343  ++CurPtr;
344 
345  // The darwin/x86 (and x86-64) assembler accepts and ignores type
346  // suffices on integer literals.
347  SkipIgnoredIntegerSuffix(CurPtr);
348 
349  return intToken(Result, Value);
350  }
351 
352  if (!LexMasmIntegers && ((*CurPtr == 'b') || (*CurPtr == 'B'))) {
353  ++CurPtr;
354  // See if we actually have "0b" as part of something like "jmp 0b\n"
355  if (!isDigit(CurPtr[0])) {
356  --CurPtr;
357  StringRef Result(TokStart, CurPtr - TokStart);
358  return AsmToken(AsmToken::Integer, Result, 0);
359  }
360  const char *NumStart = CurPtr;
361  while (CurPtr[0] == '0' || CurPtr[0] == '1')
362  ++CurPtr;
363 
364  // Requires at least one binary digit.
365  if (CurPtr == NumStart)
366  return ReturnError(TokStart, "invalid binary number");
367 
368  StringRef Result(TokStart, CurPtr - TokStart);
369 
370  APInt Value(128, 0, true);
371  if (Result.substr(2).getAsInteger(2, Value))
372  return ReturnError(TokStart, "invalid binary number");
373 
374  // The darwin/x86 (and x86-64) assembler accepts and ignores ULL and LL
375  // suffixes on integer literals.
376  SkipIgnoredIntegerSuffix(CurPtr);
377 
378  return intToken(Result, Value);
379  }
380 
381  if ((*CurPtr == 'x') || (*CurPtr == 'X')) {
382  ++CurPtr;
383  const char *NumStart = CurPtr;
384  while (isHexDigit(CurPtr[0]))
385  ++CurPtr;
386 
387  // "0x.0p0" is valid, and "0x0p0" (but not "0xp0" for example, which will be
388  // diagnosed by LexHexFloatLiteral).
389  if (CurPtr[0] == '.' || CurPtr[0] == 'p' || CurPtr[0] == 'P')
390  return LexHexFloatLiteral(NumStart == CurPtr);
391 
392  // Otherwise requires at least one hex digit.
393  if (CurPtr == NumStart)
394  return ReturnError(CurPtr-2, "invalid hexadecimal number");
395 
396  APInt Result(128, 0);
397  if (StringRef(TokStart, CurPtr - TokStart).getAsInteger(0, Result))
398  return ReturnError(TokStart, "invalid hexadecimal number");
399 
400  // Consume the optional [hH].
401  if (LexMasmIntegers && (*CurPtr == 'h' || *CurPtr == 'H'))
402  ++CurPtr;
403 
404  // The darwin/x86 (and x86-64) assembler accepts and ignores ULL and LL
405  // suffixes on integer literals.
406  SkipIgnoredIntegerSuffix(CurPtr);
407 
408  return intToken(StringRef(TokStart, CurPtr - TokStart), Result);
409  }
410 
411  // Either octal or hexadecimal.
412  APInt Value(128, 0, true);
413  unsigned Radix = doHexLookAhead(CurPtr, 8, LexMasmIntegers);
414  bool isHex = Radix == 16;
415  StringRef Result(TokStart, CurPtr - TokStart);
416  if (Result.getAsInteger(Radix, Value))
417  return ReturnError(TokStart, !isHex ? "invalid octal number" :
418  "invalid hexdecimal number");
419 
420  // Consume the [hH].
421  if (Radix == 16)
422  ++CurPtr;
423 
424  // The darwin/x86 (and x86-64) assembler accepts and ignores ULL and LL
425  // suffixes on integer literals.
426  SkipIgnoredIntegerSuffix(CurPtr);
427 
428  return intToken(Result, Value);
429 }
430 
431 /// LexSingleQuote: Integer: 'b'
432 AsmToken AsmLexer::LexSingleQuote() {
433  int CurChar = getNextChar();
434 
435  if (CurChar == '\\')
436  CurChar = getNextChar();
437 
438  if (CurChar == EOF)
439  return ReturnError(TokStart, "unterminated single quote");
440 
441  CurChar = getNextChar();
442 
443  if (CurChar != '\'')
444  return ReturnError(TokStart, "single quote way too long");
445 
446  // The idea here being that 'c' is basically just an integral
447  // constant.
448  StringRef Res = StringRef(TokStart,CurPtr - TokStart);
449  long long Value;
450 
451  if (Res.startswith("\'\\")) {
452  char theChar = Res[2];
453  switch (theChar) {
454  default: Value = theChar; break;
455  case '\'': Value = '\''; break;
456  case 't': Value = '\t'; break;
457  case 'n': Value = '\n'; break;
458  case 'b': Value = '\b'; break;
459  }
460  } else
461  Value = TokStart[1];
462 
463  return AsmToken(AsmToken::Integer, Res, Value);
464 }
465 
466 /// LexQuote: String: "..."
467 AsmToken AsmLexer::LexQuote() {
468  int CurChar = getNextChar();
469  // TODO: does gas allow multiline string constants?
470  while (CurChar != '"') {
471  if (CurChar == '\\') {
472  // Allow \", etc.
473  CurChar = getNextChar();
474  }
475 
476  if (CurChar == EOF)
477  return ReturnError(TokStart, "unterminated string constant");
478 
479  CurChar = getNextChar();
480  }
481 
483 }
484 
486  TokStart = CurPtr;
487 
488  while (!isAtStartOfComment(CurPtr) && // Start of line comment.
489  !isAtStatementSeparator(CurPtr) && // End of statement marker.
490  *CurPtr != '\n' && *CurPtr != '\r' && CurPtr != CurBuf.end()) {
491  ++CurPtr;
492  }
493  return StringRef(TokStart, CurPtr-TokStart);
494 }
495 
496 StringRef AsmLexer::LexUntilEndOfLine() {
497  TokStart = CurPtr;
498 
499  while (*CurPtr != '\n' && *CurPtr != '\r' && CurPtr != CurBuf.end()) {
500  ++CurPtr;
501  }
502  return StringRef(TokStart, CurPtr-TokStart);
503 }
504 
506  bool ShouldSkipSpace) {
507  SaveAndRestore<const char *> SavedTokenStart(TokStart);
508  SaveAndRestore<const char *> SavedCurPtr(CurPtr);
509  SaveAndRestore<bool> SavedAtStartOfLine(IsAtStartOfLine);
510  SaveAndRestore<bool> SavedAtStartOfStatement(IsAtStartOfStatement);
511  SaveAndRestore<bool> SavedSkipSpace(SkipSpace, ShouldSkipSpace);
512  SaveAndRestore<bool> SavedIsPeeking(IsPeeking, true);
513  std::string SavedErr = getErr();
514  SMLoc SavedErrLoc = getErrLoc();
515 
516  size_t ReadCount;
517  for (ReadCount = 0; ReadCount < Buf.size(); ++ReadCount) {
518  AsmToken Token = LexToken();
519 
520  Buf[ReadCount] = Token;
521 
522  if (Token.is(AsmToken::Eof))
523  break;
524  }
525 
526  SetError(SavedErrLoc, SavedErr);
527  return ReadCount;
528 }
529 
530 bool AsmLexer::isAtStartOfComment(const char *Ptr) {
531  StringRef CommentString = MAI.getCommentString();
532 
533  if (CommentString.size() == 1)
534  return CommentString[0] == Ptr[0];
535 
536  // Allow # preprocessor commments also be counted as comments for "##" cases
537  if (CommentString[1] == '#')
538  return CommentString[0] == Ptr[0];
539 
540  return strncmp(Ptr, CommentString.data(), CommentString.size()) == 0;
541 }
542 
543 bool AsmLexer::isAtStatementSeparator(const char *Ptr) {
544  return strncmp(Ptr, MAI.getSeparatorString(),
545  strlen(MAI.getSeparatorString())) == 0;
546 }
547 
549  TokStart = CurPtr;
550  // This always consumes at least one character.
551  int CurChar = getNextChar();
552 
553  if (!IsPeeking && CurChar == '#' && IsAtStartOfStatement) {
554  // If this starts with a '#', this may be a cpp
555  // hash directive and otherwise a line comment.
556  AsmToken TokenBuf[2];
557  MutableArrayRef<AsmToken> Buf(TokenBuf, 2);
558  size_t num = peekTokens(Buf, true);
559  // There cannot be a space preceeding this
560  if (IsAtStartOfLine && num == 2 && TokenBuf[0].is(AsmToken::Integer) &&
561  TokenBuf[1].is(AsmToken::String)) {
562  CurPtr = TokStart; // reset curPtr;
563  StringRef s = LexUntilEndOfLine();
564  UnLex(TokenBuf[1]);
565  UnLex(TokenBuf[0]);
567  }
568  return LexLineComment();
569  }
570 
571  if (isAtStartOfComment(TokStart))
572  return LexLineComment();
573 
574  if (isAtStatementSeparator(TokStart)) {
575  CurPtr += strlen(MAI.getSeparatorString()) - 1;
576  IsAtStartOfLine = true;
577  IsAtStartOfStatement = true;
579  StringRef(TokStart, strlen(MAI.getSeparatorString())));
580  }
581 
582  // If we're missing a newline at EOF, make sure we still get an
583  // EndOfStatement token before the Eof token.
584  if (CurChar == EOF && !IsAtStartOfStatement) {
585  IsAtStartOfLine = true;
586  IsAtStartOfStatement = true;
588  }
589  IsAtStartOfLine = false;
590  bool OldIsAtStartOfStatement = IsAtStartOfStatement;
591  IsAtStartOfStatement = false;
592  switch (CurChar) {
593  default:
594  // Handle identifier: [a-zA-Z_.][a-zA-Z0-9_$.@]*
595  if (isalpha(CurChar) || CurChar == '_' || CurChar == '.')
596  return LexIdentifier();
597 
598  // Unknown character, emit an error.
599  return ReturnError(TokStart, "invalid character in input");
600  case EOF:
601  IsAtStartOfLine = true;
602  IsAtStartOfStatement = true;
604  case 0:
605  case ' ':
606  case '\t':
607  IsAtStartOfStatement = OldIsAtStartOfStatement;
608  while (*CurPtr == ' ' || *CurPtr == '\t')
609  CurPtr++;
610  if (SkipSpace)
611  return LexToken(); // Ignore whitespace.
612  else
613  return AsmToken(AsmToken::Space, StringRef(TokStart, CurPtr - TokStart));
614  case '\r': {
615  IsAtStartOfLine = true;
616  IsAtStartOfStatement = true;
617  // If this is a CR followed by LF, treat that as one token.
618  if (CurPtr != CurBuf.end() && *CurPtr == '\n')
619  ++CurPtr;
621  StringRef(TokStart, CurPtr - TokStart));
622  }
623  case '\n':
624  IsAtStartOfLine = true;
625  IsAtStartOfStatement = true;
627  case ':': return AsmToken(AsmToken::Colon, StringRef(TokStart, 1));
628  case '+': return AsmToken(AsmToken::Plus, StringRef(TokStart, 1));
629  case '~': return AsmToken(AsmToken::Tilde, StringRef(TokStart, 1));
630  case '(': return AsmToken(AsmToken::LParen, StringRef(TokStart, 1));
631  case ')': return AsmToken(AsmToken::RParen, StringRef(TokStart, 1));
632  case '[': return AsmToken(AsmToken::LBrac, StringRef(TokStart, 1));
633  case ']': return AsmToken(AsmToken::RBrac, StringRef(TokStart, 1));
634  case '{': return AsmToken(AsmToken::LCurly, StringRef(TokStart, 1));
635  case '}': return AsmToken(AsmToken::RCurly, StringRef(TokStart, 1));
636  case '*': return AsmToken(AsmToken::Star, StringRef(TokStart, 1));
637  case ',': return AsmToken(AsmToken::Comma, StringRef(TokStart, 1));
638  case '$': return AsmToken(AsmToken::Dollar, StringRef(TokStart, 1));
639  case '@': return AsmToken(AsmToken::At, StringRef(TokStart, 1));
640  case '\\': return AsmToken(AsmToken::BackSlash, StringRef(TokStart, 1));
641  case '=':
642  if (*CurPtr == '=') {
643  ++CurPtr;
645  }
647  case '-':
648  if (*CurPtr == '>') {
649  ++CurPtr;
651  }
653  case '|':
654  if (*CurPtr == '|') {
655  ++CurPtr;
657  }
659  case '^': return AsmToken(AsmToken::Caret, StringRef(TokStart, 1));
660  case '&':
661  if (*CurPtr == '&') {
662  ++CurPtr;
664  }
666  case '!':
667  if (*CurPtr == '=') {
668  ++CurPtr;
670  }
672  case '%':
673  if (MAI.hasMipsExpressions()) {
675  unsigned OperatorLength;
676 
677  std::tie(Operator, OperatorLength) =
679  StringRef(CurPtr))
680  .StartsWith("call16", {AsmToken::PercentCall16, 7})
681  .StartsWith("call_hi", {AsmToken::PercentCall_Hi, 8})
682  .StartsWith("call_lo", {AsmToken::PercentCall_Lo, 8})
683  .StartsWith("dtprel_hi", {AsmToken::PercentDtprel_Hi, 10})
684  .StartsWith("dtprel_lo", {AsmToken::PercentDtprel_Lo, 10})
685  .StartsWith("got_disp", {AsmToken::PercentGot_Disp, 9})
686  .StartsWith("got_hi", {AsmToken::PercentGot_Hi, 7})
687  .StartsWith("got_lo", {AsmToken::PercentGot_Lo, 7})
688  .StartsWith("got_ofst", {AsmToken::PercentGot_Ofst, 9})
689  .StartsWith("got_page", {AsmToken::PercentGot_Page, 9})
690  .StartsWith("gottprel", {AsmToken::PercentGottprel, 9})
691  .StartsWith("got", {AsmToken::PercentGot, 4})
692  .StartsWith("gp_rel", {AsmToken::PercentGp_Rel, 7})
693  .StartsWith("higher", {AsmToken::PercentHigher, 7})
694  .StartsWith("highest", {AsmToken::PercentHighest, 8})
695  .StartsWith("hi", {AsmToken::PercentHi, 3})
696  .StartsWith("lo", {AsmToken::PercentLo, 3})
697  .StartsWith("neg", {AsmToken::PercentNeg, 4})
698  .StartsWith("pcrel_hi", {AsmToken::PercentPcrel_Hi, 9})
699  .StartsWith("pcrel_lo", {AsmToken::PercentPcrel_Lo, 9})
700  .StartsWith("tlsgd", {AsmToken::PercentTlsgd, 6})
701  .StartsWith("tlsldm", {AsmToken::PercentTlsldm, 7})
702  .StartsWith("tprel_hi", {AsmToken::PercentTprel_Hi, 9})
703  .StartsWith("tprel_lo", {AsmToken::PercentTprel_Lo, 9})
704  .Default({AsmToken::Percent, 1});
705 
706  if (Operator != AsmToken::Percent) {
707  CurPtr += OperatorLength - 1;
708  return AsmToken(Operator, StringRef(TokStart, OperatorLength));
709  }
710  }
712  case '/':
713  IsAtStartOfStatement = OldIsAtStartOfStatement;
714  return LexSlash();
715  case '#': return AsmToken(AsmToken::Hash, StringRef(TokStart, 1));
716  case '\'': return LexSingleQuote();
717  case '"': return LexQuote();
718  case '0': case '1': case '2': case '3': case '4':
719  case '5': case '6': case '7': case '8': case '9':
720  return LexDigit();
721  case '<':
722  switch (*CurPtr) {
723  case '<':
724  ++CurPtr;
726  case '=':
727  ++CurPtr;
729  case '>':
730  ++CurPtr;
732  default:
734  }
735  case '>':
736  switch (*CurPtr) {
737  case '>':
738  ++CurPtr;
740  case '=':
741  ++CurPtr;
743  default:
745  }
746 
747  // TODO: Quoted identifiers (objc methods etc)
748  // local labels: [0-9][:]
749  // Forward/backward labels: [0-9][fb]
750  // Integers, fp constants, character constants.
751  }
752 }
AsmCommentConsumer * CommentConsumer
Definition: MCAsmLexer.h:53
This class represents lattice values for constants.
Definition: AllocatorList.h:23
bool is(AsmToken::TokenKind K) const
Check if the current token has kind K.
Definition: MCAsmLexer.h:135
LLVM_NODISCARD bool startswith(StringRef Prefix) const
Check if this string starts with the given Prefix.
Definition: StringRef.h:256
static unsigned doHexLookAhead(const char *&CurPtr, unsigned DefaultRadix, bool LexHex)
Definition: AsmLexer.cpp:245
bool isAlnum(char C)
Checks whether character C is either a decimal digit or an uppercase or lowercase letter as classifie...
Definition: StringExtras.h:88
void setBuffer(StringRef Buf, const char *ptr=nullptr)
Definition: AsmLexer.cpp:39
The access may reference the value stored in memory.
Target independent representation for an assembler token.
Definition: MCAsmMacro.h:21
LLVM_NODISCARD StringRef substr(size_t Start, size_t N=npos) const
Return a reference to the substring from [Start, Start + N).
Definition: StringRef.h:578
This file implements a class to represent arbitrary precision integral constant values and operations...
static bool startswith(StringRef Magic, const char(&S)[N])
Definition: Magic.cpp:29
size_t peekTokens(MutableArrayRef< AsmToken > Buf, bool ShouldSkipSpace=true) override
Look ahead an arbitrary number of tokens.
Definition: AsmLexer.cpp:505
LLVM_NODISCARD size_t size() const
size - Get the string size.
Definition: StringRef.h:130
This class is intended to be used as a base class for asm properties and features specific to the tar...
Definition: MCAsmInfo.h:55
A switch()-like statement whose cases are string literals.
Definition: StringSwitch.h:42
void SetError(SMLoc errLoc, const std::string &err)
Definition: MCAsmLexer.h:59
MutableArrayRef - Represent a mutable reference to an array (0 or more elements consecutively in memo...
Definition: ArrayRef.h:290
size_t size() const
size - Get the array size.
Definition: ArrayRef.h:148
bool isHexDigit(char C)
Checks if character C is a hexadecimal numeric character.
Definition: StringExtras.h:79
const char * getSeparatorString() const
Definition: MCAsmInfo.h:479
static AsmToken intToken(StringRef Ref, APInt &Value)
Definition: AsmLexer.cpp:270
const char * TokStart
Definition: MCAsmLexer.h:48
StringRef getCommentString() const
Definition: MCAsmInfo.h:485
const std::string & getErr()
Get the current error string.
Definition: MCAsmLexer.h:127
bool isIntN(unsigned N) const
Check if this APInt has an N-bits unsigned integer value.
Definition: APInt.h:449
std::enable_if< std::numeric_limits< T >::is_signed, bool >::type getAsInteger(unsigned Radix, T &Result) const
Parse the current string as an integer of the specified radix.
Definition: StringRef.h:478
bool hasMipsExpressions() const
Definition: MCAsmInfo.h:636
void UnLex(AsmToken const &Token)
Definition: MCAsmLexer.h:87
This is a utility class that provides an abstraction for the common functionality between Instruction...
Definition: Operator.h:30
StringRef LexUntilEndOfStatement() override
Definition: AsmLexer.cpp:485
bool is(TokenKind K) const
Definition: MCAsmMacro.h:82
Class for arbitrary precision integers.
Definition: APInt.h:69
iterator begin() const
Definition: StringRef.h:101
A utility class that uses RAII to save and restore the value of a variable.
static SMLoc getFromPointer(const char *Ptr)
Definition: SMLoc.h:36
bool isDigit(char C)
Checks if character C is one of the 10 decimal digits.
Definition: StringExtras.h:76
~AsmLexer() override
AsmToken LexToken() override
LexToken - Read the next token and return its code.
Definition: AsmLexer.cpp:548
LLVM_NODISCARD const char * data() const
data - Get a pointer to the start of the string (which may not be null terminated).
Definition: StringRef.h:122
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
LLVM Value Representation.
Definition: Value.h:72
LLVM_NODISCARD StringRef drop_back(size_t N=1) const
Return a StringRef equal to &#39;this&#39; but with the last N elements dropped.
Definition: StringRef.h:628
SMLoc getErrLoc()
Get the current error location.
Definition: MCAsmLexer.h:122
This file provides utility classes that use RAII to save and restore values.
static void SkipIgnoredIntegerSuffix(const char *&CurPtr)
Definition: AsmLexer.cpp:233
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:48
Represents a location in source code.
Definition: SMLoc.h:23
static bool IsIdentifierChar(char c, bool AllowAt)
LexIdentifier: [a-zA-Z_.][a-zA-Z0-9_$.@?]*.
Definition: AsmLexer.cpp:137
bool LexMasmIntegers
Definition: MCAsmLexer.h:52
iterator end() const
Definition: StringRef.h:103
bool AllowAtInIdentifier
Definition: MCAsmLexer.h:50
virtual void HandleComment(SMLoc Loc, StringRef CommentText)=0
Callback function for when a comment is lexed.
AsmLexer(const MCAsmInfo &MAI)
Definition: AsmLexer.cpp:33