24 AsmLexer::AsmLexer(
const MCAsmInfo &MAI) : MAI(MAI) {
26 isAtStartOfLine =
true;
39 CurPtr = CurBuf.
begin();
46 AsmToken AsmLexer::ReturnError(
const char *Loc,
const std::string &Msg) {
52 int AsmLexer::getNextChar() {
53 char CurChar = *CurPtr++;
56 return (
unsigned char)CurChar;
60 if (CurPtr - 1 != CurBuf.
end())
74 AsmToken AsmLexer::LexFloatLiteral() {
76 while (isdigit(*CurPtr))
82 if (*CurPtr ==
'e' || *CurPtr ==
'E') {
84 if (*CurPtr ==
'-' || *CurPtr ==
'+')
86 while (isdigit(*CurPtr))
100 AsmToken AsmLexer::LexHexFloatLiteral(
bool NoIntDigits) {
101 assert((*CurPtr ==
'p' || *CurPtr ==
'P' || *CurPtr ==
'.') &&
102 "unexpected parse state in floating hex");
103 bool NoFracDigits =
true;
106 if (*CurPtr ==
'.') {
109 const char *FracStart = CurPtr;
110 while (isxdigit(*CurPtr))
113 NoFracDigits = CurPtr == FracStart;
116 if (NoIntDigits && NoFracDigits)
117 return ReturnError(
TokStart,
"invalid hexadecimal floating-point constant: "
118 "expected at least one significand digit");
121 if (*CurPtr !=
'p' && *CurPtr !=
'P')
122 return ReturnError(
TokStart,
"invalid hexadecimal floating-point constant: "
123 "expected exponent part 'p'");
126 if (*CurPtr ==
'+' || *CurPtr ==
'-')
130 const char *ExpStart = CurPtr;
131 while (isdigit(*CurPtr))
134 if (CurPtr == ExpStart)
135 return ReturnError(
TokStart,
"invalid hexadecimal floating-point constant: "
136 "expected at least one exponent digit");
143 return isalnum(c) || c ==
'_' || c ==
'$' || c ==
'.' ||
144 (c ==
'@' && AllowAt) || c ==
'?';
146 AsmToken AsmLexer::LexIdentifier() {
148 if (CurPtr[-1] ==
'.' && isdigit(*CurPtr)) {
150 while (isdigit(*CurPtr))
152 if (*CurPtr ==
'e' || *CurPtr ==
'E' ||
154 return LexFloatLiteral();
172 case '/':
return ++CurPtr, LexLineComment();
179 int CurChar = getNextChar();
182 return ReturnError(
TokStart,
"unterminated comment");
185 if (CurPtr[0] !=
'/')
break;
195 AsmToken AsmLexer::LexLineComment() {
198 int CurChar = getNextChar();
199 while (CurChar !=
'\n' && CurChar !=
'\r' && CurChar != EOF)
200 CurChar = getNextChar();
209 if (CurPtr[0] ==
'U')
211 if (CurPtr[0] ==
'L')
213 if (CurPtr[0] ==
'L')
219 static unsigned doLookAhead(
const char *&CurPtr,
unsigned DefaultRadix) {
220 const char *FirstHex =
nullptr;
221 const char *LookAhead = CurPtr;
223 if (isdigit(*LookAhead)) {
225 }
else if (isxdigit(*LookAhead)) {
227 FirstHex = LookAhead;
233 bool isHex = *LookAhead ==
'h' || *LookAhead ==
'H';
234 CurPtr = isHex || !FirstHex ? LookAhead : FirstHex;
256 if (CurPtr[-1] !=
'0' || CurPtr[0] ==
'.') {
258 bool isHex = Radix == 16;
260 if (!isHex && (*CurPtr ==
'.' || *CurPtr ==
'e')) {
262 return LexFloatLiteral();
268 if (Result.getAsInteger(Radix,
Value))
269 return ReturnError(
TokStart, !isHex ?
"invalid decimal number" :
270 "invalid hexdecimal number");
273 if (Radix == 2 || Radix == 16)
283 if (*CurPtr ==
'b') {
286 if (!isdigit(CurPtr[0])) {
291 const char *NumStart = CurPtr;
292 while (CurPtr[0] ==
'0' || CurPtr[0] ==
'1')
296 if (CurPtr == NumStart)
297 return ReturnError(
TokStart,
"invalid binary number");
302 if (Result.substr(2).getAsInteger(2,
Value))
303 return ReturnError(
TokStart,
"invalid binary number");
312 if (*CurPtr ==
'x') {
314 const char *NumStart = CurPtr;
315 while (isxdigit(CurPtr[0]))
320 if (CurPtr[0] ==
'.' || CurPtr[0] ==
'p' || CurPtr[0] ==
'P')
321 return LexHexFloatLiteral(NumStart == CurPtr);
324 if (CurPtr == NumStart)
325 return ReturnError(CurPtr-2,
"invalid hexadecimal number");
327 APInt Result(128, 0);
329 return ReturnError(
TokStart,
"invalid hexadecimal number");
332 if (*CurPtr ==
'h' || *CurPtr ==
'H')
345 bool isHex = Radix == 16;
347 if (Result.getAsInteger(Radix,
Value))
348 return ReturnError(
TokStart, !isHex ?
"invalid octal number" :
349 "invalid hexdecimal number");
363 AsmToken AsmLexer::LexSingleQuote() {
364 int CurChar = getNextChar();
367 CurChar = getNextChar();
370 return ReturnError(
TokStart,
"unterminated single quote");
372 CurChar = getNextChar();
375 return ReturnError(
TokStart,
"single quote way too long");
383 char theChar = Res[2];
385 default: Value = theChar;
break;
386 case '\'': Value =
'\'';
break;
387 case 't': Value =
'\t';
break;
388 case 'n': Value =
'\n';
break;
389 case 'b': Value =
'\b';
break;
400 int CurChar = getNextChar();
402 while (CurChar !=
'"') {
403 if (CurChar ==
'\\') {
405 CurChar = getNextChar();
409 return ReturnError(
TokStart,
"unterminated string constant");
411 CurChar = getNextChar();
422 *CurPtr !=
'\n' && *CurPtr !=
'\r' &&
423 (*CurPtr != 0 || CurPtr != CurBuf.
end())) {
432 while (*CurPtr !=
'\n' && *CurPtr !=
'\r' &&
433 (*CurPtr != 0 || CurPtr != CurBuf.
end())) {
440 const char *SavedTokStart =
TokStart;
441 const char *SavedCurPtr = CurPtr;
442 bool SavedAtStartOfLine = isAtStartOfLine;
445 std::string SavedErr =
getErr();
454 isAtStartOfLine = SavedAtStartOfLine;
455 CurPtr = SavedCurPtr;
464 if (CommentString[1] ==
'\0')
465 return CommentString[0] == Ptr[0];
468 if (CommentString[1] ==
'#')
469 return CommentString[0] == Ptr[0];
471 return strncmp(Ptr, CommentString, strlen(CommentString)) == 0;
482 int CurChar = getNextChar();
488 if (CurChar ==
'#' && isAtStartOfLine)
490 isAtStartOfLine =
true;
491 return LexLineComment();
501 if (CurChar == EOF && !isAtStartOfLine) {
502 isAtStartOfLine =
true;
506 isAtStartOfLine =
false;
510 if (isalpha(CurChar) || CurChar ==
'_' || CurChar ==
'.')
511 return LexIdentifier();
514 return ReturnError(
TokStart,
"invalid character in input");
524 while (*CurPtr==
' ' || *CurPtr==
'\t') {
532 isAtStartOfLine =
true;
567 case '/':
return LexSlash();
569 case '\'':
return LexSingleQuote();
570 case '"':
return LexQuote();
571 case '0':
case '1':
case '2':
case '3':
case '4':
572 case '5':
case '6':
case '7':
case '8':
case '9':
const AsmToken peekTok(bool ShouldSkipSpace=true) override
Look ahead at the next token to be lexed.
void setBuffer(StringRef Buf, const char *ptr=nullptr)
Target independent representation for an assembler token.
bool isIntN(unsigned N) const
Check if this APInt has an N-bits unsigned integer value.
StringRef LexUntilEndOfLine()
This class is intended to be used as a base class for asm properties and features specific to the tar...
static AsmToken intToken(StringRef Ref, APInt &Value)
const std::string & getErr()
Get the current error string.
bool isAtStatementSeparator(const char *Ptr)
bool isAtStartOfComment(const char *Ptr)
bool startswith(StringRef Prefix) const
Check if this string starts with the given Prefix.
const SMLoc & getErrLoc()
Get the current error location.
StringRef LexUntilEndOfStatement() override
Class for arbitrary precision integers.
static unsigned doLookAhead(const char *&CurPtr, unsigned DefaultRadix)
static SMLoc getFromPointer(const char *Ptr)
const char * getSeparatorString() const
AsmToken LexToken() override
LexToken - Read the next token and return its code.
void SetError(const SMLoc &errLoc, const std::string &err)
LLVM Value Representation.
static void SkipIgnoredIntegerSuffix(const char *&CurPtr)
StringRef - Represent a constant reference to a string, i.e.
Represents a location in source code.
static bool IsIdentifierChar(char c, bool AllowAt)
LexIdentifier: [a-zA-Z_.][a-zA-Z0-9_$.@?]*.
const char * getCommentString() const