LLVM  6.0.0svn
MCAsmLexer.h
Go to the documentation of this file.
1 //===- llvm/MC/MCAsmLexer.h - Abstract Asm Lexer Interface ------*- C++ -*-===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 
10 #ifndef LLVM_MC_MCPARSER_MCASMLEXER_H
11 #define LLVM_MC_MCPARSER_MCASMLEXER_H
12 
13 #include "llvm/ADT/APInt.h"
14 #include "llvm/ADT/ArrayRef.h"
15 #include "llvm/ADT/SmallVector.h"
16 #include "llvm/ADT/StringRef.h"
17 #include "llvm/Support/SMLoc.h"
18 #include <algorithm>
19 #include <cassert>
20 #include <cstddef>
21 #include <cstdint>
22 #include <string>
23 
24 namespace llvm {
25 
26 /// Target independent representation for an assembler token.
27 class AsmToken {
28 public:
29  enum TokenKind {
30  // Markers
32 
33  // String values.
36 
37  // Integer values.
39  BigNum, // larger than 64 bits
40 
41  // Real values.
43 
44  // Comments
47  // No-value.
52  Slash, // '/'
53  BackSlash, // '\'
56 
61 
62  // MIPS unary expression operators such as %neg.
69  };
70 
71 private:
72  TokenKind Kind;
73 
74  /// A reference to the entire token contents; this is always a pointer into
75  /// a memory buffer owned by the source manager.
76  StringRef Str;
77 
78  APInt IntVal;
79 
80 public:
81  AsmToken() = default;
82  AsmToken(TokenKind Kind, StringRef Str, APInt IntVal)
83  : Kind(Kind), Str(Str), IntVal(std::move(IntVal)) {}
84  AsmToken(TokenKind Kind, StringRef Str, int64_t IntVal = 0)
85  : Kind(Kind), Str(Str), IntVal(64, IntVal, true) {}
86 
87  TokenKind getKind() const { return Kind; }
88  bool is(TokenKind K) const { return Kind == K; }
89  bool isNot(TokenKind K) const { return Kind != K; }
90 
91  SMLoc getLoc() const;
92  SMLoc getEndLoc() const;
93  SMRange getLocRange() const;
94 
95  /// Get the contents of a string token (without quotes).
97  assert(Kind == String && "This token isn't a string!");
98  return Str.slice(1, Str.size() - 1);
99  }
100 
101  /// Get the identifier string for the current token, which should be an
102  /// identifier or a string. This gets the portion of the string which should
103  /// be used as the identifier, e.g., it does not include the quotes on
104  /// strings.
106  if (Kind == Identifier)
107  return getString();
108  return getStringContents();
109  }
110 
111  /// Get the string for the current token, this includes all characters (for
112  /// example, the quotes on strings) in the token.
113  ///
114  /// The returned StringRef points into the source manager's memory buffer, and
115  /// is safe to store across calls to Lex().
116  StringRef getString() const { return Str; }
117 
118  // FIXME: Don't compute this in advance, it makes every token larger, and is
119  // also not generally what we want (it is nicer for recovery etc. to lex 123br
120  // as a single token, then diagnose as an invalid number).
121  int64_t getIntVal() const {
122  assert(Kind == Integer && "This token isn't an integer!");
123  return IntVal.getZExtValue();
124  }
125 
126  APInt getAPIntVal() const {
127  assert((Kind == Integer || Kind == BigNum) &&
128  "This token isn't an integer!");
129  return IntVal;
130  }
131 };
132 
133 /// A callback class which is notified of each comment in an assembly file as
134 /// it is lexed.
136 public:
137  virtual ~AsmCommentConsumer() = default;
138 
139  /// Callback function for when a comment is lexed. Loc is the start of the
140  /// comment text (excluding the comment-start marker). CommentText is the text
141  /// of the comment, excluding the comment start and end markers, and the
142  /// newline for single-line comments.
143  virtual void HandleComment(SMLoc Loc, StringRef CommentText) = 0;
144 };
145 
146 
147 /// Generic assembler lexer interface, for use by target specific assembly
148 /// lexers.
149 class MCAsmLexer {
150  /// The current token, stored in the base class for faster access.
152 
153  /// The location and description of the current error
154  SMLoc ErrLoc;
155  std::string Err;
156 
157 protected: // Can only create subclasses.
158  const char *TokStart = nullptr;
159  bool SkipSpace = true;
161  bool IsAtStartOfStatement = true;
162  AsmCommentConsumer *CommentConsumer = nullptr;
163 
165  MCAsmLexer();
166 
167  virtual AsmToken LexToken() = 0;
168 
169  void SetError(SMLoc errLoc, const std::string &err) {
170  ErrLoc = errLoc;
171  Err = err;
172  }
173 
174 public:
175  MCAsmLexer(const MCAsmLexer &) = delete;
176  MCAsmLexer &operator=(const MCAsmLexer &) = delete;
177  virtual ~MCAsmLexer();
178 
180  return AltMacroMode;
181  }
182 
183  void SetAltMacroMode(bool AltMacroSet) {
184  AltMacroMode = AltMacroSet;
185  }
186 
187  /// Consume the next token from the input stream and return it.
188  ///
189  /// The lexer will continuosly return the end-of-file token once the end of
190  /// the main input file has been reached.
191  const AsmToken &Lex() {
192  assert(!CurTok.empty());
193  // Mark if we parsing out a EndOfStatement.
194  IsAtStartOfStatement = CurTok.front().getKind() == AsmToken::EndOfStatement;
195  CurTok.erase(CurTok.begin());
196  // LexToken may generate multiple tokens via UnLex but will always return
197  // the first one. Place returned value at head of CurTok vector.
198  if (CurTok.empty()) {
199  AsmToken T = LexToken();
200  CurTok.insert(CurTok.begin(), T);
201  }
202  return CurTok.front();
203  }
204 
205  void UnLex(AsmToken const &Token) {
206  IsAtStartOfStatement = false;
207  CurTok.insert(CurTok.begin(), Token);
208  }
209 
210  bool isAtStartOfStatement() { return IsAtStartOfStatement; }
211 
212  virtual StringRef LexUntilEndOfStatement() = 0;
213 
214  /// Get the current source location.
215  SMLoc getLoc() const;
216 
217  /// Get the current (last) lexed token.
218  const AsmToken &getTok() const {
219  return CurTok[0];
220  }
221 
222  /// Look ahead at the next token to be lexed.
223  const AsmToken peekTok(bool ShouldSkipSpace = true) {
224  AsmToken Tok;
225 
226  MutableArrayRef<AsmToken> Buf(Tok);
227  size_t ReadCount = peekTokens(Buf, ShouldSkipSpace);
228 
229  assert(ReadCount == 1);
230  (void)ReadCount;
231 
232  return Tok;
233  }
234 
235  /// Look ahead an arbitrary number of tokens.
236  virtual size_t peekTokens(MutableArrayRef<AsmToken> Buf,
237  bool ShouldSkipSpace = true) = 0;
238 
239  /// Get the current error location
241  return ErrLoc;
242  }
243 
244  /// Get the current error string
245  const std::string &getErr() {
246  return Err;
247  }
248 
249  /// Get the kind of current token.
250  AsmToken::TokenKind getKind() const { return getTok().getKind(); }
251 
252  /// Check if the current token has kind \p K.
253  bool is(AsmToken::TokenKind K) const { return getTok().is(K); }
254 
255  /// Check if the current token has kind \p K.
256  bool isNot(AsmToken::TokenKind K) const { return getTok().isNot(K); }
257 
258  /// Set whether spaces should be ignored by the lexer
259  void setSkipSpace(bool val) { SkipSpace = val; }
260 
261  bool getAllowAtInIdentifier() { return AllowAtInIdentifier; }
262  void setAllowAtInIdentifier(bool v) { AllowAtInIdentifier = v; }
263 
264  void setCommentConsumer(AsmCommentConsumer *CommentConsumer) {
265  this->CommentConsumer = CommentConsumer;
266  }
267 };
268 
269 } // end namespace llvm
270 
271 #endif // LLVM_MC_MCPARSER_MCASMLEXER_H
Represents a range in source code.
Definition: SMLoc.h:49
AsmToken(TokenKind Kind, StringRef Str, APInt IntVal)
Definition: MCAsmLexer.h:82
const AsmToken & getTok() const
Get the current (last) lexed token.
Definition: MCAsmLexer.h:218
StringRef getString() const
Get the string for the current token, this includes all characters (for example, the quotes on string...
Definition: MCAsmLexer.h:116
uint64_t getZExtValue() const
Get zero extended value.
Definition: APInt.h:1542
AsmToken()=default
Compute iterated dominance frontiers using a linear time algorithm.
Definition: AllocatorList.h:24
void setSkipSpace(bool val)
Set whether spaces should be ignored by the lexer.
Definition: MCAsmLexer.h:259
bool is(AsmToken::TokenKind K) const
Check if the current token has kind K.
Definition: MCAsmLexer.h:253
LLVM_NODISCARD LLVM_ATTRIBUTE_ALWAYS_INLINE size_t size() const
size - Get the string size.
Definition: StringRef.h:138
bool isNot(TokenKind K) const
Definition: MCAsmLexer.h:89
bool IsaAltMacroMode()
Definition: MCAsmLexer.h:179
SMRange getLocRange() const
Definition: MCAsmLexer.cpp:34
StringRef getIdentifier() const
Get the identifier string for the current token, which should be an identifier or a string...
Definition: MCAsmLexer.h:105
Definition: BitVector.h:920
Generic assembler lexer interface, for use by target specific assembly lexers.
Definition: MCAsmLexer.h:149
void SetAltMacroMode(bool AltMacroSet)
Definition: MCAsmLexer.h:183
Target independent representation for an assembler token.
Definition: MCAsmLexer.h:27
This file implements a class to represent arbitrary precision integral constant values and operations...
#define T
SMLoc getLoc() const
Definition: MCAsmLexer.cpp:26
void SetError(SMLoc errLoc, const std::string &err)
Definition: MCAsmLexer.h:169
MutableArrayRef - Represent a mutable reference to an array (0 or more elements consecutively in memo...
Definition: ArrayRef.h:291
SMLoc getEndLoc() const
Definition: MCAsmLexer.cpp:30
LLVM_ATTRIBUTE_ALWAYS_INLINE iterator begin()
Definition: SmallVector.h:116
const std::string & getErr()
Get the current error string.
Definition: MCAsmLexer.h:245
int64_t getIntVal() const
Definition: MCAsmLexer.h:121
const AsmToken peekTok(bool ShouldSkipSpace=true)
Look ahead at the next token to be lexed.
Definition: MCAsmLexer.h:223
bool getAllowAtInIdentifier()
Definition: MCAsmLexer.h:261
iterator erase(const_iterator CI)
Definition: SmallVector.h:449
void setCommentConsumer(AsmCommentConsumer *CommentConsumer)
Definition: MCAsmLexer.h:264
AsmToken(TokenKind Kind, StringRef Str, int64_t IntVal=0)
Definition: MCAsmLexer.h:84
AsmToken::TokenKind getKind() const
Get the kind of current token.
Definition: MCAsmLexer.h:250
void UnLex(AsmToken const &Token)
Definition: MCAsmLexer.h:205
This is a &#39;vector&#39; (really, a variable-sized array), optimized for the case when the array is small...
Definition: SmallVector.h:864
LLVM_NODISCARD LLVM_ATTRIBUTE_ALWAYS_INLINE StringRef slice(size_t Start, size_t End) const
Return a reference to the substring from [Start, End).
Definition: StringRef.h:710
const AsmToken & Lex()
Consume the next token from the input stream and return it.
Definition: MCAsmLexer.h:191
bool is(TokenKind K) const
Definition: MCAsmLexer.h:88
Class for arbitrary precision integers.
Definition: APInt.h:69
iterator insert(iterator I, T &&Elt)
Definition: SmallVector.h:482
Basic Alias true
LLVM_NODISCARD bool empty() const
Definition: SmallVector.h:61
APInt getAPIntVal() const
Definition: MCAsmLexer.h:126
StringRef getStringContents() const
Get the contents of a string token (without quotes).
Definition: MCAsmLexer.h:96
bool isNot(AsmToken::TokenKind K) const
Check if the current token has kind K.
Definition: MCAsmLexer.h:256
A callback class which is notified of each comment in an assembly file as it is lexed.
Definition: MCAsmLexer.h:135
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
SMLoc getErrLoc()
Get the current error location.
Definition: MCAsmLexer.h:240
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:49
Represents a location in source code.
Definition: SMLoc.h:24
bool isAtStartOfStatement()
Definition: MCAsmLexer.h:210
bool AllowAtInIdentifier
Definition: MCAsmLexer.h:160
void setAllowAtInIdentifier(bool v)
Definition: MCAsmLexer.h:262
TokenKind getKind() const
Definition: MCAsmLexer.h:87