114 MAI.useAtForSpecifier();
115 LexMotorolaIntegers = MAI.shouldUseMotorolaIntegers();
121 bool EndStatementAtEOF) {
125 "Buffer provided to AsmLexer lacks null terminator.");
132 CurPtr = CurBuf.begin();
135 this->EndStatementAtEOF = EndStatementAtEOF;
140AsmToken AsmLexer::ReturnError(
const char *
Loc,
const std::string &Msg) {
146int AsmLexer::getNextChar() {
147 if (CurPtr == CurBuf.
end())
149 return (
unsigned char)*CurPtr++;
152int AsmLexer::peekNextChar() {
153 if (CurPtr == CurBuf.end())
155 return (
unsigned char)*CurPtr;
161AsmToken AsmLexer::LexFloatLiteral() {
166 if (*CurPtr ==
'-' || *CurPtr ==
'+')
167 return ReturnError(CurPtr,
"invalid sign in float literal");
170 if ((*CurPtr ==
'e' || *CurPtr ==
'E')) {
173 if (*CurPtr ==
'-' || *CurPtr ==
'+')
181 StringRef(TokStart, CurPtr - TokStart));
190AsmToken AsmLexer::LexHexFloatLiteral(
bool NoIntDigits) {
191 assert((*CurPtr ==
'p' || *CurPtr ==
'P' || *CurPtr ==
'.') &&
192 "unexpected parse state in floating hex");
193 bool NoFracDigits =
true;
196 if (*CurPtr ==
'.') {
199 const char *FracStart = CurPtr;
203 NoFracDigits = CurPtr == FracStart;
206 if (NoIntDigits && NoFracDigits)
207 return ReturnError(TokStart,
"invalid hexadecimal floating-point constant: "
208 "expected at least one significand digit");
211 if (*CurPtr !=
'p' && *CurPtr !=
'P')
212 return ReturnError(TokStart,
"invalid hexadecimal floating-point constant: "
213 "expected exponent part 'p'");
216 if (*CurPtr ==
'+' || *CurPtr ==
'-')
220 const char *ExpStart = CurPtr;
224 if (CurPtr == ExpStart)
225 return ReturnError(TokStart,
"invalid hexadecimal floating-point constant: "
226 "expected at least one exponent digit");
228 return AsmToken(
AsmToken::Real, StringRef(TokStart, CurPtr - TokStart));
233 return isAlnum(
C) ||
C ==
'_' ||
C ==
'$' ||
C ==
'.' ||
C ==
'?' ||
234 (AllowAt &&
C ==
'@') || (AllowHash &&
C ==
'#');
239 if (CurPtr[-1] ==
'.' &&
isDigit(*CurPtr)) {
245 AllowHashInIdentifier) ||
246 *CurPtr ==
'e' || *CurPtr ==
'E')
247 return LexFloatLiteral();
250 while (
isIdentifierChar(*CurPtr, AllowAtInIdentifier, AllowHashInIdentifier))
254 if (CurPtr == TokStart+1 && TokStart[0] ==
'.')
264 if (!MAI.shouldAllowAdditionalComments()) {
265 IsAtStartOfStatement =
false;
271 IsAtStartOfStatement =
false;
275 return LexLineComment();
277 IsAtStartOfStatement =
false;
283 const char *CommentTextStart = CurPtr;
284 while (CurPtr != CurBuf.end()) {
291 if (CommentConsumer) {
292 CommentConsumer->HandleComment(
294 StringRef(CommentTextStart, CurPtr - 1 - CommentTextStart));
298 StringRef(TokStart, CurPtr - TokStart));
301 return ReturnError(TokStart,
"unterminated comment");
306AsmToken AsmLexer::LexLineComment() {
311 const char *CommentTextStart = CurPtr;
312 int CurChar = getNextChar();
313 while (CurChar !=
'\n' && CurChar !=
'\r' && CurChar != EOF)
314 CurChar = getNextChar();
315 const char *NewlinePtr = CurPtr;
316 if (CurChar ==
'\r' && CurPtr != CurBuf.end() && *CurPtr ==
'\n')
320 if (CommentConsumer) {
321 CommentConsumer->HandleComment(
323 StringRef(CommentTextStart, NewlinePtr - 1 - CommentTextStart));
326 IsAtStartOfLine =
true;
328 if (IsAtStartOfStatement)
330 StringRef(TokStart, CurPtr - TokStart));
331 IsAtStartOfStatement =
true;
334 StringRef(TokStart, CurPtr - 1 - TokStart));
339 if (CurPtr[0] ==
'U' || CurPtr[0] ==
'u')
341 if (CurPtr[0] ==
'L' || CurPtr[0] ==
'l')
343 if (CurPtr[0] ==
'L' || CurPtr[0] ==
'l')
351 const char *FirstNonDec =
nullptr;
352 const char *LookAhead = CurPtr;
358 FirstNonDec = LookAhead;
367 bool isHex = LexHex && (*LookAhead ==
'h' || *LookAhead ==
'H');
368 CurPtr = isHex || !FirstNonDec ? LookAhead : FirstNonDec;
374static const char *
findLastDigit(
const char *CurPtr,
unsigned DefaultRadix) {
382 if (
Value.isIntN(64))
396 return "hexadecimal";
398 return "base-" + std::to_string(Radix);
414 if (LexMasmIntegers && isdigit(CurPtr[-1])) {
415 const char *FirstNonBinary =
416 (CurPtr[-1] !=
'0' && CurPtr[-1] !=
'1') ? CurPtr - 1 : nullptr;
417 const char *FirstNonDecimal =
418 (CurPtr[-1] <
'0' || CurPtr[-1] >
'9') ? CurPtr - 1 : nullptr;
419 const char *OldCurPtr = CurPtr;
423 if (!FirstNonDecimal) {
424 FirstNonDecimal = CurPtr;
435 if (!FirstNonBinary) {
436 FirstNonBinary = CurPtr;
445 if (*CurPtr ==
'.') {
449 return LexFloatLiteral();
452 if (LexMasmHexFloats && (*CurPtr ==
'r' || *CurPtr ==
'R')) {
454 return AsmToken(
AsmToken::Real, StringRef(TokStart, CurPtr - TokStart));
458 if (*CurPtr ==
'h' || *CurPtr ==
'H') {
462 }
else if (*CurPtr ==
't' || *CurPtr ==
'T') {
466 }
else if (*CurPtr ==
'o' || *CurPtr ==
'O' || *CurPtr ==
'q' ||
471 }
else if (*CurPtr ==
'y' || *CurPtr ==
'Y') {
475 }
else if (FirstNonDecimal && FirstNonDecimal + 1 == CurPtr &&
477 (*FirstNonDecimal ==
'd' || *FirstNonDecimal ==
'D')) {
479 }
else if (FirstNonBinary && FirstNonBinary + 1 == CurPtr &&
481 (*FirstNonBinary ==
'b' || *FirstNonBinary ==
'B')) {
486 StringRef
Result(TokStart, CurPtr - TokStart);
487 APInt
Value(128, 0,
true);
490 return ReturnError(TokStart,
"invalid " +
radixName(Radix) +
" number");
504 if (LexMasmIntegers && UseMasmDefaultRadix) {
506 StringRef
Result(TokStart, CurPtr - TokStart);
508 APInt
Value(128, 0,
true);
510 return ReturnError(TokStart,
511 "invalid " +
radixName(DefaultRadix) +
" number");
518 if (LexMotorolaIntegers && CurPtr[-1] ==
'$') {
519 const char *NumStart = CurPtr;
524 if (StringRef(NumStart, CurPtr - NumStart).getAsInteger(16, Result))
525 return ReturnError(TokStart,
"invalid hexadecimal number");
527 return intToken(StringRef(TokStart, CurPtr - TokStart), Result);
531 if (LexMotorolaIntegers && CurPtr[-1] ==
'%') {
532 const char *NumStart = CurPtr;
533 while (*CurPtr ==
'0' || *CurPtr ==
'1')
537 if (StringRef(NumStart, CurPtr - NumStart).getAsInteger(2, Result))
538 return ReturnError(TokStart,
"invalid binary number");
540 return intToken(StringRef(TokStart, CurPtr - TokStart), Result);
547 if (LexHLASMIntegers || CurPtr[-1] !=
'0' || CurPtr[0] ==
'.') {
550 if (!LexHLASMIntegers) {
551 bool IsHex = Radix == 16;
553 if (!IsHex && (*CurPtr ==
'.' || *CurPtr ==
'e' || *CurPtr ==
'E')) {
556 return LexFloatLiteral();
560 StringRef
Result(TokStart, CurPtr - TokStart);
562 APInt
Value(128, 0,
true);
564 return ReturnError(TokStart,
"invalid " +
radixName(Radix) +
" number");
566 if (!LexHLASMIntegers)
574 if (!LexMasmIntegers && ((*CurPtr ==
'b') || (*CurPtr ==
'B'))) {
579 StringRef
Result(TokStart, CurPtr - TokStart);
582 const char *NumStart = CurPtr;
583 while (CurPtr[0] ==
'0' || CurPtr[0] ==
'1')
587 if (CurPtr == NumStart)
588 return ReturnError(TokStart,
"invalid binary number");
590 StringRef
Result(TokStart, CurPtr - TokStart);
592 APInt
Value(128, 0,
true);
594 return ReturnError(TokStart,
"invalid binary number");
603 if ((*CurPtr ==
'x') || (*CurPtr ==
'X')) {
605 const char *NumStart = CurPtr;
611 if (CurPtr[0] ==
'.' || CurPtr[0] ==
'p' || CurPtr[0] ==
'P')
612 return LexHexFloatLiteral(NumStart == CurPtr);
615 if (CurPtr == NumStart)
616 return ReturnError(CurPtr-2,
"invalid hexadecimal number");
619 if (StringRef(TokStart, CurPtr - TokStart).getAsInteger(0, Result))
620 return ReturnError(TokStart,
"invalid hexadecimal number");
623 if (LexMasmIntegers && (*CurPtr ==
'h' || *CurPtr ==
'H'))
630 return intToken(StringRef(TokStart, CurPtr - TokStart), Result);
634 APInt
Value(128, 0,
true);
636 StringRef
Result(TokStart, CurPtr - TokStart);
638 return ReturnError(TokStart,
"invalid " +
radixName(Radix) +
" number");
652AsmToken AsmLexer::LexSingleQuote() {
653 int CurChar = getNextChar();
656 return ReturnError(TokStart,
"invalid usage of character literals");
658 if (LexMasmStrings) {
659 while (CurChar != EOF) {
660 if (CurChar !=
'\'') {
661 CurChar = getNextChar();
662 }
else if (peekNextChar() ==
'\'') {
666 CurChar = getNextChar();
672 return ReturnError(TokStart,
"unterminated string constant");
677 CurChar = getNextChar();
680 return ReturnError(TokStart,
"unterminated single quote");
682 CurChar = getNextChar();
685 return ReturnError(TokStart,
"single quote way too long");
689 StringRef Res = StringRef(TokStart,CurPtr - TokStart);
693 char theChar = Res[2];
695 default:
Value = theChar;
break;
696 case '\'':
Value =
'\'';
break;
697 case 't':
Value =
'\t';
break;
698 case 'n':
Value =
'\n';
break;
699 case 'b':
Value =
'\b';
break;
700 case 'f':
Value =
'\f';
break;
701 case 'r':
Value =
'\r';
break;
711 int CurChar = getNextChar();
713 return ReturnError(TokStart,
"invalid usage of string literals");
715 if (LexMasmStrings) {
716 while (CurChar != EOF) {
717 if (CurChar !=
'"') {
718 CurChar = getNextChar();
719 }
else if (peekNextChar() ==
'"') {
723 CurChar = getNextChar();
729 return ReturnError(TokStart,
"unterminated string constant");
733 while (CurChar !=
'"') {
734 if (CurChar ==
'\\') {
736 CurChar = getNextChar();
740 return ReturnError(TokStart,
"unterminated string constant");
742 CurChar = getNextChar();
751 while (!isAtStartOfComment(CurPtr) &&
752 !isAtStatementSeparator(CurPtr) &&
753 *CurPtr !=
'\n' && *CurPtr !=
'\r' && CurPtr != CurBuf.end()) {
756 return StringRef(TokStart, CurPtr-TokStart);
762 while (*CurPtr !=
'\n' && *CurPtr !=
'\r' && CurPtr != CurBuf.
end()) {
765 return StringRef(TokStart, CurPtr-TokStart);
769 bool ShouldSkipSpace) {
776 std::string SavedErr =
getErr();
780 for (ReadCount = 0; ReadCount < Buf.
size(); ++ReadCount) {
783 Buf[ReadCount] = Token;
791 SetError(SavedErrLoc, SavedErr);
795bool AsmLexer::isAtStartOfComment(
const char *Ptr) {
796 if (MAI.
isHLASM() && !IsAtStartOfStatement)
801 if (CommentString.
size() == 1)
802 return CommentString[0] == Ptr[0];
805 if (CommentString[1] ==
'#')
806 return CommentString[0] == Ptr[0];
808 return strncmp(Ptr, CommentString.
data(), CommentString.
size()) == 0;
811bool AsmLexer::isAtStatementSeparator(
const char *Ptr) {
819 int CurChar = getNextChar();
821 if (!IsPeeking && CurChar ==
'#' && IsAtStartOfStatement) {
824 AsmToken TokenBuf[2];
831 StringRef s = LexUntilEndOfLine();
837 if (MAI.shouldAllowAdditionalComments())
838 return LexLineComment();
841 if (isAtStartOfComment(TokStart)) {
842 StringRef CommentString = MAI.getCommentString();
846 if (CommentString.
size() > 1 &&
847 StringRef(TokStart, CommentString.
size()) == CommentString) {
848 CurPtr += CommentString.
size() - 1;
850 return LexLineComment();
853 if (isAtStatementSeparator(TokStart)) {
854 CurPtr += strlen(MAI.getSeparatorString()) - 1;
855 IsAtStartOfLine =
true;
856 IsAtStartOfStatement =
true;
858 StringRef(TokStart, strlen(MAI.getSeparatorString())));
863 if (CurChar == EOF && !IsAtStartOfStatement && EndStatementAtEOF) {
864 IsAtStartOfLine =
true;
865 IsAtStartOfStatement =
true;
868 IsAtStartOfLine =
false;
869 bool OldIsAtStartOfStatement = IsAtStartOfStatement;
870 IsAtStartOfStatement =
false;
877 if (isalpha(CurChar) || CurChar ==
'_' || CurChar ==
'.')
878 return LexIdentifier();
881 return ReturnError(TokStart,
"invalid character in input");
883 if (EndStatementAtEOF) {
884 IsAtStartOfLine =
true;
885 IsAtStartOfStatement =
true;
891 IsAtStartOfStatement = OldIsAtStartOfStatement;
892 while (*CurPtr ==
' ' || *CurPtr ==
'\t')
897 return AsmToken(
AsmToken::Space, StringRef(TokStart, CurPtr - TokStart));
899 IsAtStartOfLine =
true;
900 IsAtStartOfStatement =
true;
902 if (CurPtr != CurBuf.end() && *CurPtr ==
'\n')
905 StringRef(TokStart, CurPtr - TokStart));
908 IsAtStartOfLine =
true;
909 IsAtStartOfStatement =
true;
912 case '+':
return AsmToken(
AsmToken::Plus, StringRef(TokStart, 1));
920 case '*':
return AsmToken(
AsmToken::Star, StringRef(TokStart, 1));
923 if (LexMotorolaIntegers &&
isHexDigit(*CurPtr))
925 if (MAI.doesAllowDollarAtStartOfIdentifier())
926 return LexIdentifier();
930 if (MAI.doesAllowAtAtStartOfIdentifier())
931 return LexIdentifier();
935 return LexIdentifier();
938 if (MAI.doesAllowQuestionAtStartOfIdentifier())
939 return LexIdentifier();
943 if (*CurPtr ==
'=') {
949 if (*CurPtr ==
'>') {
955 if (*CurPtr ==
'|') {
962 if (*CurPtr ==
'&') {
968 if (*CurPtr ==
'=') {
974 if (LexMotorolaIntegers && (*CurPtr ==
'0' || *CurPtr ==
'1')) {
979 IsAtStartOfStatement = OldIsAtStartOfStatement;
981 case '\'':
return LexSingleQuote();
982 case '"':
return LexQuote();
983 case '0':
case '1':
case '2':
case '3':
case '4':
984 case '5':
case '6':
case '7':
case '8':
case '9':
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
This file implements a class to represent arbitrary precision integral constant values and operations...
static std::string radixName(unsigned Radix)
static void SkipIgnoredIntegerSuffix(const char *&CurPtr)
static unsigned doHexLookAhead(const char *&CurPtr, unsigned DefaultRadix, bool LexHex)
static AsmToken intToken(StringRef Ref, APInt &Value)
static const char * findLastDigit(const char *CurPtr, unsigned DefaultRadix)
static bool isIdentifierChar(char C)
Return true if the given character satisfies the following regular expression: [-a-zA-Z$....
This file provides utility classes that use RAII to save and restore values.
Class for arbitrary precision integers.
size_t size() const
size - Get the array size.
LLVM_ABI AsmLexer(const MCAsmInfo &MAI)
void UnLex(AsmToken const &Token)
bool is(AsmToken::TokenKind K) const
Check if the current token has kind K.
SMLoc getErrLoc()
Get the current error location.
const std::string & getErr()
Get the current error string.
LLVM_ABI StringRef LexUntilEndOfStatement()
LLVM_ABI void setBuffer(StringRef Buf, const char *ptr=nullptr, bool EndStatementAtEOF=true)
Set buffer to be lexed.
LLVM_ABI size_t peekTokens(MutableArrayRef< AsmToken > Buf, bool ShouldSkipSpace=true)
Look ahead an arbitrary number of tokens.
Target independent representation for an assembler token.
LLVM_ABI SMLoc getLoc() const
StringRef getString() const
Get the string for the current token, this includes all characters (for example, the quotes on string...
bool is(TokenKind K) const
LLVM_ABI SMLoc getEndLoc() const
LLVM_ABI void dump(raw_ostream &OS) const
LLVM_ABI SMRange getLocRange() const
This class is intended to be used as a base class for asm properties and features specific to the tar...
StringRef getCommentString() const
const char * getSeparatorString() const
MutableArrayRef - Represent a mutable reference to an array (0 or more elements consecutively in memo...
Represents a location in source code.
static SMLoc getFromPointer(const char *Ptr)
Represents a range in source code.
StringRef - Represent a constant reference to a string, i.e.
bool starts_with(StringRef Prefix) const
Check if this string starts with the given Prefix.
constexpr size_t size() const
size - Get the string size.
constexpr const char * data() const
data - Get a pointer to the start of the string (which may not be null terminated).
LLVM Value Representation.
This class implements an extremely fast bulk output stream that can only output to a stream.
raw_ostream & write_escaped(StringRef Str, bool UseHexEscapes=false)
Output Str, turning '\', '\t', ' ', '"', and anything that doesn't satisfy llvm::isPrint into an esca...
@ C
The default llvm calling convention, compatible with C.
This is an optimization pass for GlobalISel generic memory operations.
FunctionAddr VTableAddr Value
unsigned hexDigitValue(char C)
Interpret the given character C as a hexadecimal digit and return its value.
bool isDigit(char C)
Checks if character C is one of the 10 decimal digits.
bool isAlnum(char C)
Checks whether character C is either a decimal digit or an uppercase or lowercase letter as classifie...
MutableArrayRef(T &OneElt) -> MutableArrayRef< T >
@ Ref
The access may reference the value stored in memory.
bool isHexDigit(char C)
Checks if character C is a hexadecimal numeric character.
A utility class that uses RAII to save and restore the value of a variable.