LLVM  4.0.0
MCAsmLexer.h
Go to the documentation of this file.
1 //===-- llvm/MC/MCAsmLexer.h - Abstract Asm Lexer Interface -----*- C++ -*-===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 
10 #ifndef LLVM_MC_MCPARSER_MCASMLEXER_H
11 #define LLVM_MC_MCPARSER_MCASMLEXER_H
12 
13 #include "llvm/ADT/APInt.h"
14 #include "llvm/ADT/ArrayRef.h"
15 #include "llvm/ADT/SmallVector.h"
16 #include "llvm/ADT/StringRef.h"
17 #include "llvm/Support/Compiler.h"
18 #include "llvm/Support/DataTypes.h"
19 #include "llvm/Support/SMLoc.h"
20 #include <utility>
21 
22 namespace llvm {
23 
24 /// Target independent representation for an assembler token.
25 class AsmToken {
26 public:
27  enum TokenKind {
28  // Markers
30 
31  // String values.
34 
35  // Integer values.
37  BigNum, // larger than 64 bits
38 
39  // Real values.
41 
42  // Comments
45  // No-value.
50  Slash, // '/'
51  BackSlash, // '\'
54 
59 
60  // MIPS unary expression operators such as %neg.
67  };
68 
69 private:
70  TokenKind Kind;
71 
72  /// A reference to the entire token contents; this is always a pointer into
73  /// a memory buffer owned by the source manager.
74  StringRef Str;
75 
76  APInt IntVal;
77 
78 public:
79  AsmToken() {}
80  AsmToken(TokenKind Kind, StringRef Str, APInt IntVal)
81  : Kind(Kind), Str(Str), IntVal(std::move(IntVal)) {}
82  AsmToken(TokenKind Kind, StringRef Str, int64_t IntVal = 0)
83  : Kind(Kind), Str(Str), IntVal(64, IntVal, true) {}
84 
85  TokenKind getKind() const { return Kind; }
86  bool is(TokenKind K) const { return Kind == K; }
87  bool isNot(TokenKind K) const { return Kind != K; }
88 
89  SMLoc getLoc() const;
90  SMLoc getEndLoc() const;
91  SMRange getLocRange() const;
92 
93  /// Get the contents of a string token (without quotes).
95  assert(Kind == String && "This token isn't a string!");
96  return Str.slice(1, Str.size() - 1);
97  }
98 
99  /// Get the identifier string for the current token, which should be an
100  /// identifier or a string. This gets the portion of the string which should
101  /// be used as the identifier, e.g., it does not include the quotes on
102  /// strings.
104  if (Kind == Identifier)
105  return getString();
106  return getStringContents();
107  }
108 
109  /// Get the string for the current token, this includes all characters (for
110  /// example, the quotes on strings) in the token.
111  ///
112  /// The returned StringRef points into the source manager's memory buffer, and
113  /// is safe to store across calls to Lex().
114  StringRef getString() const { return Str; }
115 
116  // FIXME: Don't compute this in advance, it makes every token larger, and is
117  // also not generally what we want (it is nicer for recovery etc. to lex 123br
118  // as a single token, then diagnose as an invalid number).
119  int64_t getIntVal() const {
120  assert(Kind == Integer && "This token isn't an integer!");
121  return IntVal.getZExtValue();
122  }
123 
124  APInt getAPIntVal() const {
125  assert((Kind == Integer || Kind == BigNum) &&
126  "This token isn't an integer!");
127  return IntVal;
128  }
129 };
130 
131 /// A callback class which is notified of each comment in an assembly file as
132 /// it is lexed.
134 public:
135  virtual ~AsmCommentConsumer() {};
136 
137  /// Callback function for when a comment is lexed. Loc is the start of the
138  /// comment text (excluding the comment-start marker). CommentText is the text
139  /// of the comment, excluding the comment start and end markers, and the
140  /// newline for single-line comments.
141  virtual void HandleComment(SMLoc Loc, StringRef CommentText) = 0;
142 };
143 
144 
145 /// Generic assembler lexer interface, for use by target specific assembly
146 /// lexers.
147 class MCAsmLexer {
148  /// The current token, stored in the base class for faster access.
150 
151  /// The location and description of the current error
152  SMLoc ErrLoc;
153  std::string Err;
154 
155  MCAsmLexer(const MCAsmLexer &) = delete;
156  void operator=(const MCAsmLexer &) = delete;
157 protected: // Can only create subclasses.
158  const char *TokStart;
159  bool SkipSpace;
163 
164  MCAsmLexer();
165 
166  virtual AsmToken LexToken() = 0;
167 
168  void SetError(SMLoc errLoc, const std::string &err) {
169  ErrLoc = errLoc;
170  Err = err;
171  }
172 
173 public:
174  virtual ~MCAsmLexer();
175 
176  /// Consume the next token from the input stream and return it.
177  ///
178  /// The lexer will continuosly return the end-of-file token once the end of
179  /// the main input file has been reached.
180  const AsmToken &Lex() {
181  assert(!CurTok.empty());
182  // Mark if we parsing out a EndOfStatement.
183  IsAtStartOfStatement = CurTok.front().getKind() == AsmToken::EndOfStatement;
184  CurTok.erase(CurTok.begin());
185  // LexToken may generate multiple tokens via UnLex but will always return
186  // the first one. Place returned value at head of CurTok vector.
187  if (CurTok.empty()) {
188  AsmToken T = LexToken();
189  CurTok.insert(CurTok.begin(), T);
190  }
191  return CurTok.front();
192  }
193 
194  void UnLex(AsmToken const &Token) {
195  IsAtStartOfStatement = false;
196  CurTok.insert(CurTok.begin(), Token);
197  }
198 
200 
201  virtual StringRef LexUntilEndOfStatement() = 0;
202 
203  /// Get the current source location.
204  SMLoc getLoc() const;
205 
206  /// Get the current (last) lexed token.
207  const AsmToken &getTok() const {
208  return CurTok[0];
209  }
210 
211  /// Look ahead at the next token to be lexed.
212  const AsmToken peekTok(bool ShouldSkipSpace = true) {
213  AsmToken Tok;
214 
215  MutableArrayRef<AsmToken> Buf(Tok);
216  size_t ReadCount = peekTokens(Buf, ShouldSkipSpace);
217 
218  assert(ReadCount == 1);
219  (void)ReadCount;
220 
221  return Tok;
222  }
223 
224  /// Look ahead an arbitrary number of tokens.
225  virtual size_t peekTokens(MutableArrayRef<AsmToken> Buf,
226  bool ShouldSkipSpace = true) = 0;
227 
228  /// Get the current error location
230  return ErrLoc;
231  }
232 
233  /// Get the current error string
234  const std::string &getErr() {
235  return Err;
236  }
237 
238  /// Get the kind of current token.
239  AsmToken::TokenKind getKind() const { return getTok().getKind(); }
240 
241  /// Check if the current token has kind \p K.
242  bool is(AsmToken::TokenKind K) const { return getTok().is(K); }
243 
244  /// Check if the current token has kind \p K.
245  bool isNot(AsmToken::TokenKind K) const { return getTok().isNot(K); }
246 
247  /// Set whether spaces should be ignored by the lexer
248  void setSkipSpace(bool val) { SkipSpace = val; }
249 
252 
254  this->CommentConsumer = CommentConsumer;
255  }
256 };
257 
258 } // End llvm namespace
259 
260 #endif
Represents a range in source code.
Definition: SMLoc.h:49
AsmToken(TokenKind Kind, StringRef Str, APInt IntVal)
Definition: MCAsmLexer.h:80
AsmCommentConsumer * CommentConsumer
Definition: MCAsmLexer.h:162
uint64_t Token
uint64_t getZExtValue() const
Get zero extended value.
Definition: APInt.h:1309
TokenKind getKind() const
Definition: MCAsmLexer.h:85
void setSkipSpace(bool val)
Set whether spaces should be ignored by the lexer.
Definition: MCAsmLexer.h:248
virtual AsmToken LexToken()=0
StringRef getString() const
Get the string for the current token, this includes all characters (for example, the quotes on string...
Definition: MCAsmLexer.h:114
bool isNot(TokenKind K) const
Definition: MCAsmLexer.h:87
SMLoc getLoc() const
Get the current source location.
Definition: MCAsmLexer.cpp:24
const AsmToken & getTok() const
Get the current (last) lexed token.
Definition: MCAsmLexer.h:207
Generic assembler lexer interface, for use by target specific assembly lexers.
Definition: MCAsmLexer.h:147
AsmToken::TokenKind getKind() const
Get the kind of current token.
Definition: MCAsmLexer.h:239
Target independent representation for an assembler token.
Definition: MCAsmLexer.h:25
This file implements a class to represent arbitrary precision integral constant values and operations...
bool isNot(AsmToken::TokenKind K) const
Check if the current token has kind K.
Definition: MCAsmLexer.h:245
#define T
LLVM_NODISCARD LLVM_ATTRIBUTE_ALWAYS_INLINE StringRef slice(size_t Start, size_t End) const
Return a reference to the substring from [Start, End).
Definition: StringRef.h:699
int64_t getIntVal() const
Definition: MCAsmLexer.h:119
LLVM_NODISCARD LLVM_ATTRIBUTE_ALWAYS_INLINE size_t size() const
size - Get the string size.
Definition: StringRef.h:135
virtual ~AsmCommentConsumer()
Definition: MCAsmLexer.h:135
void SetError(SMLoc errLoc, const std::string &err)
Definition: MCAsmLexer.h:168
MutableArrayRef - Represent a mutable reference to an array (0 or more elements consecutively in memo...
Definition: ArrayRef.h:283
SMLoc getLoc() const
Definition: MCAsmLexer.cpp:28
StringRef getStringContents() const
Get the contents of a string token (without quotes).
Definition: MCAsmLexer.h:94
const char * TokStart
Definition: MCAsmLexer.h:158
const std::string & getErr()
Get the current error string.
Definition: MCAsmLexer.h:234
const AsmToken peekTok(bool ShouldSkipSpace=true)
Look ahead at the next token to be lexed.
Definition: MCAsmLexer.h:212
bool getAllowAtInIdentifier()
Definition: MCAsmLexer.h:250
void setCommentConsumer(AsmCommentConsumer *CommentConsumer)
Definition: MCAsmLexer.h:253
bool is(AsmToken::TokenKind K) const
Check if the current token has kind K.
Definition: MCAsmLexer.h:242
AsmToken(TokenKind Kind, StringRef Str, int64_t IntVal=0)
Definition: MCAsmLexer.h:82
virtual StringRef LexUntilEndOfStatement()=0
SMRange getLocRange() const
Definition: MCAsmLexer.cpp:36
void UnLex(AsmToken const &Token)
Definition: MCAsmLexer.h:194
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small...
Definition: SmallVector.h:843
bool is(TokenKind K) const
Definition: MCAsmLexer.h:86
const AsmToken & Lex()
Consume the next token from the input stream and return it.
Definition: MCAsmLexer.h:180
Class for arbitrary precision integers.
Definition: APInt.h:77
bool IsAtStartOfStatement
Definition: MCAsmLexer.h:161
Basic Alias true
StringRef getIdentifier() const
Get the identifier string for the current token, which should be an identifier or a string...
Definition: MCAsmLexer.h:103
APInt getAPIntVal() const
Definition: MCAsmLexer.h:124
virtual ~MCAsmLexer()
Definition: MCAsmLexer.cpp:21
A callback class which is notified of each comment in an assembly file as it is lexed.
Definition: MCAsmLexer.h:133
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
SMLoc getEndLoc() const
Definition: MCAsmLexer.cpp:32
SMLoc getErrLoc()
Get the current error location.
Definition: MCAsmLexer.h:229
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:47
virtual size_t peekTokens(MutableArrayRef< AsmToken > Buf, bool ShouldSkipSpace=true)=0
Look ahead an arbitrary number of tokens.
Represents a location in source code.
Definition: SMLoc.h:24
bool isAtStartOfStatement()
Definition: MCAsmLexer.h:199
bool AllowAtInIdentifier
Definition: MCAsmLexer.h:160
virtual void HandleComment(SMLoc Loc, StringRef CommentText)=0
Callback function for when a comment is lexed.
void setAllowAtInIdentifier(bool v)
Definition: MCAsmLexer.h:251