LLVM  3.7.0
MCAsmLexer.h
Go to the documentation of this file.
1 //===-- llvm/MC/MCAsmLexer.h - Abstract Asm Lexer Interface -----*- C++ -*-===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 
10 #ifndef LLVM_MC_MCPARSER_MCASMLEXER_H
11 #define LLVM_MC_MCPARSER_MCASMLEXER_H
12 
13 #include "llvm/ADT/APInt.h"
14 #include "llvm/ADT/StringRef.h"
15 #include "llvm/Support/Compiler.h"
16 #include "llvm/Support/DataTypes.h"
17 #include "llvm/Support/SMLoc.h"
18 
19 namespace llvm {
20 
21 /// Target independent representation for an assembler token.
22 class AsmToken {
23 public:
24  enum TokenKind {
25  // Markers
27 
28  // String values.
31 
32  // Integer values.
34  BigNum, // larger than 64 bits
35 
36  // Real values.
38 
39  // No-value.
44  Slash, // '/'
45  BackSlash, // '\'
48 
53  };
54 
55 private:
56  TokenKind Kind;
57 
58  /// A reference to the entire token contents; this is always a pointer into
59  /// a memory buffer owned by the source manager.
60  StringRef Str;
61 
62  APInt IntVal;
63 
64 public:
65  AsmToken() {}
66  AsmToken(TokenKind Kind, StringRef Str, APInt IntVal)
67  : Kind(Kind), Str(Str), IntVal(IntVal) {}
68  AsmToken(TokenKind Kind, StringRef Str, int64_t IntVal = 0)
69  : Kind(Kind), Str(Str), IntVal(64, IntVal, true) {}
70 
71  TokenKind getKind() const { return Kind; }
72  bool is(TokenKind K) const { return Kind == K; }
73  bool isNot(TokenKind K) const { return Kind != K; }
74 
75  SMLoc getLoc() const;
76  SMLoc getEndLoc() const;
77  SMRange getLocRange() const;
78 
79  /// Get the contents of a string token (without quotes).
81  assert(Kind == String && "This token isn't a string!");
82  return Str.slice(1, Str.size() - 1);
83  }
84 
85  /// Get the identifier string for the current token, which should be an
86  /// identifier or a string. This gets the portion of the string which should
87  /// be used as the identifier, e.g., it does not include the quotes on
88  /// strings.
90  if (Kind == Identifier)
91  return getString();
92  return getStringContents();
93  }
94 
95  /// Get the string for the current token, this includes all characters (for
96  /// example, the quotes on strings) in the token.
97  ///
98  /// The returned StringRef points into the source manager's memory buffer, and
99  /// is safe to store across calls to Lex().
100  StringRef getString() const { return Str; }
101 
102  // FIXME: Don't compute this in advance, it makes every token larger, and is
103  // also not generally what we want (it is nicer for recovery etc. to lex 123br
104  // as a single token, then diagnose as an invalid number).
105  int64_t getIntVal() const {
106  assert(Kind == Integer && "This token isn't an integer!");
107  return IntVal.getZExtValue();
108  }
109 
110  APInt getAPIntVal() const {
111  assert((Kind == Integer || Kind == BigNum) &&
112  "This token isn't an integer!");
113  return IntVal;
114  }
115 };
116 
117 /// Generic assembler lexer interface, for use by target specific assembly
118 /// lexers.
119 class MCAsmLexer {
120  /// The current token, stored in the base class for faster access.
121  AsmToken CurTok;
122 
123  /// The location and description of the current error
124  SMLoc ErrLoc;
125  std::string Err;
126 
127  MCAsmLexer(const MCAsmLexer &) = delete;
128  void operator=(const MCAsmLexer &) = delete;
129 protected: // Can only create subclasses.
130  const char *TokStart;
131  bool SkipSpace;
133 
134  MCAsmLexer();
135 
136  virtual AsmToken LexToken() = 0;
137 
138  void SetError(const SMLoc &errLoc, const std::string &err) {
139  ErrLoc = errLoc;
140  Err = err;
141  }
142 
143 public:
144  virtual ~MCAsmLexer();
145 
146  /// Consume the next token from the input stream and return it.
147  ///
148  /// The lexer will continuosly return the end-of-file token once the end of
149  /// the main input file has been reached.
150  const AsmToken &Lex() {
151  return CurTok = LexToken();
152  }
153 
154  virtual StringRef LexUntilEndOfStatement() = 0;
155 
156  /// Get the current source location.
157  SMLoc getLoc() const;
158 
159  /// Get the current (last) lexed token.
160  const AsmToken &getTok() const {
161  return CurTok;
162  }
163 
164  /// Look ahead at the next token to be lexed.
165  virtual const AsmToken peekTok(bool ShouldSkipSpace = true) = 0;
166 
167  /// Get the current error location
168  const SMLoc &getErrLoc() {
169  return ErrLoc;
170  }
171 
172  /// Get the current error string
173  const std::string &getErr() {
174  return Err;
175  }
176 
177  /// Get the kind of current token.
178  AsmToken::TokenKind getKind() const { return CurTok.getKind(); }
179 
180  /// Check if the current token has kind \p K.
181  bool is(AsmToken::TokenKind K) const { return CurTok.is(K); }
182 
183  /// Check if the current token has kind \p K.
184  bool isNot(AsmToken::TokenKind K) const { return CurTok.isNot(K); }
185 
186  /// Set whether spaces should be ignored by the lexer
187  void setSkipSpace(bool val) { SkipSpace = val; }
188 
191 };
192 
193 } // End llvm namespace
194 
195 #endif
Represents a range in source code.
Definition: SMLoc.h:47
AsmToken(TokenKind Kind, StringRef Str, APInt IntVal)
Definition: MCAsmLexer.h:66
uint64_t getZExtValue() const
Get zero extended value.
Definition: APInt.h:1327
virtual const AsmToken peekTok(bool ShouldSkipSpace=true)=0
Look ahead at the next token to be lexed.
size_t size() const
size - Get the string size.
Definition: StringRef.h:113
TokenKind getKind() const
Definition: MCAsmLexer.h:71
void setSkipSpace(bool val)
Set whether spaces should be ignored by the lexer.
Definition: MCAsmLexer.h:187
virtual AsmToken LexToken()=0
StringRef getString() const
Get the string for the current token, this includes all characters (for example, the quotes on string...
Definition: MCAsmLexer.h:100
bool isNot(TokenKind K) const
Definition: MCAsmLexer.h:73
SMLoc getLoc() const
Get the current source location.
Definition: MCAsmLexer.cpp:22
const AsmToken & getTok() const
Get the current (last) lexed token.
Definition: MCAsmLexer.h:160
Generic assembler lexer interface, for use by target specific assembly lexers.
Definition: MCAsmLexer.h:119
AsmToken::TokenKind getKind() const
Get the kind of current token.
Definition: MCAsmLexer.h:178
Target independent representation for an assembler token.
Definition: MCAsmLexer.h:22
This file implements a class to represent arbitrary precision integral constant values and operations...
bool isNot(AsmToken::TokenKind K) const
Check if the current token has kind K.
Definition: MCAsmLexer.h:184
int64_t getIntVal() const
Definition: MCAsmLexer.h:105
#define true
Definition: ConvertUTF.c:66
SMLoc getLoc() const
Definition: MCAsmLexer.cpp:26
StringRef getStringContents() const
Get the contents of a string token (without quotes).
Definition: MCAsmLexer.h:80
const char * TokStart
Definition: MCAsmLexer.h:130
const std::string & getErr()
Get the current error string.
Definition: MCAsmLexer.h:173
bool getAllowAtInIdentifier()
Definition: MCAsmLexer.h:189
bool is(AsmToken::TokenKind K) const
Check if the current token has kind K.
Definition: MCAsmLexer.h:181
AsmToken(TokenKind Kind, StringRef Str, int64_t IntVal=0)
Definition: MCAsmLexer.h:68
virtual StringRef LexUntilEndOfStatement()=0
SMRange getLocRange() const
Definition: MCAsmLexer.cpp:34
const SMLoc & getErrLoc()
Get the current error location.
Definition: MCAsmLexer.h:168
bool is(TokenKind K) const
Definition: MCAsmLexer.h:72
const AsmToken & Lex()
Consume the next token from the input stream and return it.
Definition: MCAsmLexer.h:150
Class for arbitrary precision integers.
Definition: APInt.h:73
StringRef getIdentifier() const
Get the identifier string for the current token, which should be an identifier or a string...
Definition: MCAsmLexer.h:89
APInt getAPIntVal() const
Definition: MCAsmLexer.h:110
void SetError(const SMLoc &errLoc, const std::string &err)
Definition: MCAsmLexer.h:138
virtual ~MCAsmLexer()
Definition: MCAsmLexer.cpp:19
SMLoc getEndLoc() const
Definition: MCAsmLexer.cpp:30
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:40
StringRef slice(size_t Start, size_t End) const
Return a reference to the substring from [Start, End).
Definition: StringRef.h:434
Represents a location in source code.
Definition: SMLoc.h:23
bool AllowAtInIdentifier
Definition: MCAsmLexer.h:132
void setAllowAtInIdentifier(bool v)
Definition: MCAsmLexer.h:190