LLVM API Documentation
00001 //===-- llvm/MC/MCAsmLexer.h - Abstract Asm Lexer Interface -----*- C++ -*-===// 00002 // 00003 // The LLVM Compiler Infrastructure 00004 // 00005 // This file is distributed under the University of Illinois Open Source 00006 // License. See LICENSE.TXT for details. 00007 // 00008 //===----------------------------------------------------------------------===// 00009 00010 #ifndef LLVM_MC_MCPARSER_MCASMLEXER_H 00011 #define LLVM_MC_MCPARSER_MCASMLEXER_H 00012 00013 #include "llvm/ADT/StringRef.h" 00014 #include "llvm/Support/Compiler.h" 00015 #include "llvm/Support/DataTypes.h" 00016 #include "llvm/Support/SMLoc.h" 00017 00018 namespace llvm { 00019 00020 /// AsmToken - Target independent representation for an assembler token. 00021 class AsmToken { 00022 public: 00023 enum TokenKind { 00024 // Markers 00025 Eof, Error, 00026 00027 // String values. 00028 Identifier, 00029 String, 00030 00031 // Integer values. 00032 Integer, 00033 00034 // Real values. 00035 Real, 00036 00037 // No-value. 00038 EndOfStatement, 00039 Colon, 00040 Space, 00041 Plus, Minus, Tilde, 00042 Slash, // '/' 00043 BackSlash, // '\' 00044 LParen, RParen, LBrac, RBrac, LCurly, RCurly, 00045 Star, Dot, Comma, Dollar, Equal, EqualEqual, 00046 00047 Pipe, PipePipe, Caret, 00048 Amp, AmpAmp, Exclaim, ExclaimEqual, Percent, Hash, 00049 Less, LessEqual, LessLess, LessGreater, 00050 Greater, GreaterEqual, GreaterGreater, At 00051 }; 00052 00053 private: 00054 TokenKind Kind; 00055 00056 /// A reference to the entire token contents; this is always a pointer into 00057 /// a memory buffer owned by the source manager. 00058 StringRef Str; 00059 00060 int64_t IntVal; 00061 00062 public: 00063 AsmToken() {} 00064 AsmToken(TokenKind _Kind, StringRef _Str, int64_t _IntVal = 0) 00065 : Kind(_Kind), Str(_Str), IntVal(_IntVal) {} 00066 00067 TokenKind getKind() const { return Kind; } 00068 bool is(TokenKind K) const { return Kind == K; } 00069 bool isNot(TokenKind K) const { return Kind != K; } 00070 00071 SMLoc getLoc() const; 00072 SMLoc getEndLoc() const; 00073 00074 /// getStringContents - Get the contents of a string token (without quotes). 00075 StringRef getStringContents() const { 00076 assert(Kind == String && "This token isn't a string!"); 00077 return Str.slice(1, Str.size() - 1); 00078 } 00079 00080 /// getIdentifier - Get the identifier string for the current token, which 00081 /// should be an identifier or a string. This gets the portion of the string 00082 /// which should be used as the identifier, e.g., it does not include the 00083 /// quotes on strings. 00084 StringRef getIdentifier() const { 00085 if (Kind == Identifier) 00086 return getString(); 00087 return getStringContents(); 00088 } 00089 00090 /// getString - Get the string for the current token, this includes all 00091 /// characters (for example, the quotes on strings) in the token. 00092 /// 00093 /// The returned StringRef points into the source manager's memory buffer, and 00094 /// is safe to store across calls to Lex(). 00095 StringRef getString() const { return Str; } 00096 00097 // FIXME: Don't compute this in advance, it makes every token larger, and is 00098 // also not generally what we want (it is nicer for recovery etc. to lex 123br 00099 // as a single token, then diagnose as an invalid number). 00100 int64_t getIntVal() const { 00101 assert(Kind == Integer && "This token isn't an integer!"); 00102 return IntVal; 00103 } 00104 }; 00105 00106 /// MCAsmLexer - Generic assembler lexer interface, for use by target specific 00107 /// assembly lexers. 00108 class MCAsmLexer { 00109 /// The current token, stored in the base class for faster access. 00110 AsmToken CurTok; 00111 00112 /// The location and description of the current error 00113 SMLoc ErrLoc; 00114 std::string Err; 00115 00116 MCAsmLexer(const MCAsmLexer &) LLVM_DELETED_FUNCTION; 00117 void operator=(const MCAsmLexer &) LLVM_DELETED_FUNCTION; 00118 protected: // Can only create subclasses. 00119 const char *TokStart; 00120 bool SkipSpace; 00121 00122 MCAsmLexer(); 00123 00124 virtual AsmToken LexToken() = 0; 00125 00126 void SetError(const SMLoc &errLoc, const std::string &err) { 00127 ErrLoc = errLoc; 00128 Err = err; 00129 } 00130 00131 public: 00132 virtual ~MCAsmLexer(); 00133 00134 /// Lex - Consume the next token from the input stream and return it. 00135 /// 00136 /// The lexer will continuosly return the end-of-file token once the end of 00137 /// the main input file has been reached. 00138 const AsmToken &Lex() { 00139 return CurTok = LexToken(); 00140 } 00141 00142 virtual StringRef LexUntilEndOfStatement() = 0; 00143 00144 /// getLoc - Get the current source location. 00145 SMLoc getLoc() const; 00146 00147 /// getTok - Get the current (last) lexed token. 00148 const AsmToken &getTok() { 00149 return CurTok; 00150 } 00151 00152 /// getErrLoc - Get the current error location 00153 const SMLoc &getErrLoc() { 00154 return ErrLoc; 00155 } 00156 00157 /// getErr - Get the current error string 00158 const std::string &getErr() { 00159 return Err; 00160 } 00161 00162 /// getKind - Get the kind of current token. 00163 AsmToken::TokenKind getKind() const { return CurTok.getKind(); } 00164 00165 /// is - Check if the current token has kind \p K. 00166 bool is(AsmToken::TokenKind K) const { return CurTok.is(K); } 00167 00168 /// isNot - Check if the current token has kind \p K. 00169 bool isNot(AsmToken::TokenKind K) const { return CurTok.isNot(K); } 00170 00171 /// setSkipSpace - Set whether spaces should be ignored by the lexer 00172 void setSkipSpace(bool val) { SkipSpace = val; } 00173 }; 00174 00175 } // End llvm namespace 00176 00177 #endif