LLVM API Documentation

MCAsmLexer.h
Go to the documentation of this file.
00001 //===-- llvm/MC/MCAsmLexer.h - Abstract Asm Lexer Interface -----*- C++ -*-===//
00002 //
00003 //                     The LLVM Compiler Infrastructure
00004 //
00005 // This file is distributed under the University of Illinois Open Source
00006 // License. See LICENSE.TXT for details.
00007 //
00008 //===----------------------------------------------------------------------===//
00009 
00010 #ifndef LLVM_MC_MCPARSER_MCASMLEXER_H
00011 #define LLVM_MC_MCPARSER_MCASMLEXER_H
00012 
00013 #include "llvm/ADT/StringRef.h"
00014 #include "llvm/Support/Compiler.h"
00015 #include "llvm/Support/DataTypes.h"
00016 #include "llvm/Support/SMLoc.h"
00017 
00018 namespace llvm {
00019 
00020 /// AsmToken - Target independent representation for an assembler token.
00021 class AsmToken {
00022 public:
00023   enum TokenKind {
00024     // Markers
00025     Eof, Error,
00026 
00027     // String values.
00028     Identifier,
00029     String,
00030 
00031     // Integer values.
00032     Integer,
00033 
00034     // Real values.
00035     Real,
00036 
00037     // No-value.
00038     EndOfStatement,
00039     Colon,
00040     Space,
00041     Plus, Minus, Tilde,
00042     Slash,    // '/'
00043     BackSlash, // '\'
00044     LParen, RParen, LBrac, RBrac, LCurly, RCurly,
00045     Star, Dot, Comma, Dollar, Equal, EqualEqual,
00046 
00047     Pipe, PipePipe, Caret,
00048     Amp, AmpAmp, Exclaim, ExclaimEqual, Percent, Hash,
00049     Less, LessEqual, LessLess, LessGreater,
00050     Greater, GreaterEqual, GreaterGreater, At
00051   };
00052 
00053 private:
00054   TokenKind Kind;
00055 
00056   /// A reference to the entire token contents; this is always a pointer into
00057   /// a memory buffer owned by the source manager.
00058   StringRef Str;
00059 
00060   int64_t IntVal;
00061 
00062 public:
00063   AsmToken() {}
00064   AsmToken(TokenKind _Kind, StringRef _Str, int64_t _IntVal = 0)
00065     : Kind(_Kind), Str(_Str), IntVal(_IntVal) {}
00066 
00067   TokenKind getKind() const { return Kind; }
00068   bool is(TokenKind K) const { return Kind == K; }
00069   bool isNot(TokenKind K) const { return Kind != K; }
00070 
00071   SMLoc getLoc() const;
00072   SMLoc getEndLoc() const;
00073 
00074   /// getStringContents - Get the contents of a string token (without quotes).
00075   StringRef getStringContents() const {
00076     assert(Kind == String && "This token isn't a string!");
00077     return Str.slice(1, Str.size() - 1);
00078   }
00079 
00080   /// getIdentifier - Get the identifier string for the current token, which
00081   /// should be an identifier or a string. This gets the portion of the string
00082   /// which should be used as the identifier, e.g., it does not include the
00083   /// quotes on strings.
00084   StringRef getIdentifier() const {
00085     if (Kind == Identifier)
00086       return getString();
00087     return getStringContents();
00088   }
00089 
00090   /// getString - Get the string for the current token, this includes all
00091   /// characters (for example, the quotes on strings) in the token.
00092   ///
00093   /// The returned StringRef points into the source manager's memory buffer, and
00094   /// is safe to store across calls to Lex().
00095   StringRef getString() const { return Str; }
00096 
00097   // FIXME: Don't compute this in advance, it makes every token larger, and is
00098   // also not generally what we want (it is nicer for recovery etc. to lex 123br
00099   // as a single token, then diagnose as an invalid number).
00100   int64_t getIntVal() const {
00101     assert(Kind == Integer && "This token isn't an integer!");
00102     return IntVal;
00103   }
00104 };
00105 
00106 /// MCAsmLexer - Generic assembler lexer interface, for use by target specific
00107 /// assembly lexers.
00108 class MCAsmLexer {
00109   /// The current token, stored in the base class for faster access.
00110   AsmToken CurTok;
00111 
00112   /// The location and description of the current error
00113   SMLoc ErrLoc;
00114   std::string Err;
00115 
00116   MCAsmLexer(const MCAsmLexer &) LLVM_DELETED_FUNCTION;
00117   void operator=(const MCAsmLexer &) LLVM_DELETED_FUNCTION;
00118 protected: // Can only create subclasses.
00119   const char *TokStart;
00120   bool SkipSpace;
00121 
00122   MCAsmLexer();
00123 
00124   virtual AsmToken LexToken() = 0;
00125 
00126   void SetError(const SMLoc &errLoc, const std::string &err) {
00127     ErrLoc = errLoc;
00128     Err = err;
00129   }
00130 
00131 public:
00132   virtual ~MCAsmLexer();
00133 
00134   /// Lex - Consume the next token from the input stream and return it.
00135   ///
00136   /// The lexer will continuosly return the end-of-file token once the end of
00137   /// the main input file has been reached.
00138   const AsmToken &Lex() {
00139     return CurTok = LexToken();
00140   }
00141 
00142   virtual StringRef LexUntilEndOfStatement() = 0;
00143 
00144   /// getLoc - Get the current source location.
00145   SMLoc getLoc() const;
00146 
00147   /// getTok - Get the current (last) lexed token.
00148   const AsmToken &getTok() {
00149     return CurTok;
00150   }
00151 
00152   /// getErrLoc - Get the current error location
00153   const SMLoc &getErrLoc() {
00154     return ErrLoc;
00155   }
00156 
00157   /// getErr - Get the current error string
00158   const std::string &getErr() {
00159     return Err;
00160   }
00161 
00162   /// getKind - Get the kind of current token.
00163   AsmToken::TokenKind getKind() const { return CurTok.getKind(); }
00164 
00165   /// is - Check if the current token has kind \p K.
00166   bool is(AsmToken::TokenKind K) const { return CurTok.is(K); }
00167 
00168   /// isNot - Check if the current token has kind \p K.
00169   bool isNot(AsmToken::TokenKind K) const { return CurTok.isNot(K); }
00170 
00171   /// setSkipSpace - Set whether spaces should be ignored by the lexer
00172   void setSkipSpace(bool val) { SkipSpace = val; }
00173 };
00174 
00175 } // End llvm namespace
00176 
00177 #endif