LLVM 22.0.0git
AsmLexer.h
Go to the documentation of this file.
1//===- AsmLexer.h - Lexer for Assembly Files --------------------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This class declares the lexer for assembly files.
10//
11//===----------------------------------------------------------------------===//
12
13#ifndef LLVM_MC_MCPARSER_ASMLEXER_H
14#define LLVM_MC_MCPARSER_ASMLEXER_H
15
16#include "llvm/ADT/ArrayRef.h"
18#include "llvm/ADT/StringRef.h"
19#include "llvm/MC/MCAsmMacro.h"
21#include <cassert>
22#include <cstddef>
23#include <string>
24#include <utility>
25
26namespace llvm {
27
28class MCAsmInfo;
29
30/// A callback class which is notified of each comment in an assembly file as
31/// it is lexed.
33public:
34 virtual ~AsmCommentConsumer() = default;
35
36 /// Callback function for when a comment is lexed. Loc is the start of the
37 /// comment text (excluding the comment-start marker). CommentText is the text
38 /// of the comment, excluding the comment start and end markers, and the
39 /// newline for single-line comments.
40 virtual void HandleComment(SMLoc Loc, StringRef CommentText) = 0;
41};
42
43class AsmLexer {
44 /// The current token, stored in the base class for faster access.
46
47 const char *CurPtr = nullptr;
48 StringRef CurBuf;
49
50 /// The location and description of the current error
51 SMLoc ErrLoc;
52 std::string Err;
53
54 const MCAsmInfo &MAI;
55
56 bool IsAtStartOfLine = true;
57 bool JustConsumedEOL = true;
58 bool IsPeeking = false;
59 bool EndStatementAtEOF = true;
60
61 const char *TokStart = nullptr;
62 bool SkipSpace = true;
63 bool AllowAtInIdentifier = false;
64 bool AllowHashInIdentifier = false;
65 bool IsAtStartOfStatement = true;
66 bool LexMasmHexFloats = false;
67 bool LexMasmIntegers = false;
68 bool LexMasmStrings = false;
69 bool LexMotorolaIntegers = false;
70 bool UseMasmDefaultRadix = false;
71 unsigned DefaultRadix = 10;
72 bool LexHLASMIntegers = false;
73 bool LexHLASMStrings = false;
74 AsmCommentConsumer *CommentConsumer = nullptr;
75
76 LLVM_ABI AsmToken LexToken();
77
78 void SetError(SMLoc errLoc, const std::string &err) {
79 ErrLoc = errLoc;
80 Err = err;
81 }
82
83public:
84 LLVM_ABI AsmLexer(const MCAsmInfo &MAI);
85 AsmLexer(const AsmLexer &) = delete;
86 AsmLexer &operator=(const AsmLexer &) = delete;
87
88 /// Consume the next token from the input stream and return it.
89 ///
90 /// The lexer will continuously return the end-of-file token once the end of
91 /// the main input file has been reached.
92 const AsmToken &Lex() {
93 assert(!CurTok.empty());
94 // Mark if we parsing out a EndOfStatement.
95 JustConsumedEOL = CurTok.front().getKind() == AsmToken::EndOfStatement;
96 CurTok.erase(CurTok.begin());
97 // LexToken may generate multiple tokens via UnLex but will always return
98 // the first one. Place returned value at head of CurTok vector.
99 if (CurTok.empty()) {
100 AsmToken T = LexToken();
101 CurTok.insert(CurTok.begin(), T);
102 }
103 return CurTok.front();
104 }
105
106 void UnLex(AsmToken const &Token) {
107 CurTok.insert(CurTok.begin(), Token);
108 }
109
110 bool justConsumedEOL() { return JustConsumedEOL; }
111
113
114 /// Get the current source location.
115 SMLoc getLoc() const { return SMLoc::getFromPointer(TokStart); }
116
117 /// Get the current (last) lexed token.
118 const AsmToken &getTok() const { return CurTok[0]; }
119
120 /// Look ahead at the next token to be lexed.
121 const AsmToken peekTok(bool ShouldSkipSpace = true) {
122 AsmToken Tok;
123
125 size_t ReadCount = peekTokens(Buf, ShouldSkipSpace);
126
127 assert(ReadCount == 1);
128 (void)ReadCount;
129
130 return Tok;
131 }
132
133 /// Look ahead an arbitrary number of tokens.
135 bool ShouldSkipSpace = true);
136
137 /// Get the current error location
138 SMLoc getErrLoc() { return ErrLoc; }
139
140 /// Get the current error string
141 const std::string &getErr() { return Err; }
142
143 /// Get the kind of current token.
145
146 /// Check if the current token has kind \p K.
147 bool is(AsmToken::TokenKind K) const { return getTok().is(K); }
148
149 /// Check if the current token has kind \p K.
150 bool isNot(AsmToken::TokenKind K) const { return getTok().isNot(K); }
151
152 /// Set whether spaces should be ignored by the lexer
153 void setSkipSpace(bool val) { SkipSpace = val; }
154
155 bool getAllowAtInIdentifier() { return AllowAtInIdentifier; }
156 void setAllowAtInIdentifier(bool v) { AllowAtInIdentifier = v; }
157
158 void setAllowHashInIdentifier(bool V) { AllowHashInIdentifier = V; }
159
160 void setCommentConsumer(AsmCommentConsumer *CommentConsumer) {
161 this->CommentConsumer = CommentConsumer;
162 }
163
164 /// Set whether to lex masm-style binary (e.g., 0b1101) and radix-specified
165 /// literals (e.g., 0ABCh [hex], 576t [decimal], 77o [octal], 1101y [binary]).
166 void setLexMasmIntegers(bool V) { LexMasmIntegers = V; }
167
168 /// Set whether to use masm-style default-radix integer literals. If disabled,
169 /// assume decimal unless prefixed (e.g., 0x2c [hex], 077 [octal]).
170 void useMasmDefaultRadix(bool V) { UseMasmDefaultRadix = V; }
171
172 unsigned getMasmDefaultRadix() const { return DefaultRadix; }
173 void setMasmDefaultRadix(unsigned Radix) { DefaultRadix = Radix; }
174
175 /// Set whether to lex masm-style hex float literals, such as 3f800000r.
176 void setLexMasmHexFloats(bool V) { LexMasmHexFloats = V; }
177
178 /// Set whether to lex masm-style string literals, such as 'Can''t find file'
179 /// and "This ""value"" not found".
180 void setLexMasmStrings(bool V) { LexMasmStrings = V; }
181
182 /// Set whether to lex Motorola-style integer literals, such as $deadbeef or
183 /// %01010110.
184 void setLexMotorolaIntegers(bool V) { LexMotorolaIntegers = V; }
185
186 /// Set whether to lex HLASM-flavour integers. For now this is only [0-9]*
187 void setLexHLASMIntegers(bool V) { LexHLASMIntegers = V; }
188
189 /// Set whether to "lex" HLASM-flavour character and string literals. For now,
190 /// setting this option to true, will disable lexing for character and string
191 /// literals.
192 void setLexHLASMStrings(bool V) { LexHLASMStrings = V; }
193
194 LLVM_ABI void setBuffer(StringRef Buf, const char *ptr = nullptr,
195 bool EndStatementAtEOF = true);
196
197 const MCAsmInfo &getMAI() const { return MAI; }
198
199private:
200 bool isAtStartOfComment(const char *Ptr);
201 bool isAtStatementSeparator(const char *Ptr);
202 [[nodiscard]] int getNextChar();
203 int peekNextChar();
204 AsmToken ReturnError(const char *Loc, const std::string &Msg);
205
206 AsmToken LexIdentifier();
207 AsmToken LexSlash();
208 AsmToken LexLineComment();
209 AsmToken LexDigit();
210 AsmToken LexSingleQuote();
211 AsmToken LexQuote();
212 AsmToken LexFloatLiteral();
213 AsmToken LexHexFloatLiteral(bool NoIntDigits);
214
215 StringRef LexUntilEndOfLine();
216};
217
218} // end namespace llvm
219
220#endif // LLVM_MC_MCPARSER_ASMLEXER_H
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
#define LLVM_ABI
Definition: Compiler.h:213
This file defines the SmallVector class.
A callback class which is notified of each comment in an assembly file as it is lexed.
Definition: AsmLexer.h:32
virtual ~AsmCommentConsumer()=default
virtual void HandleComment(SMLoc Loc, StringRef CommentText)=0
Callback function for when a comment is lexed.
void setLexHLASMStrings(bool V)
Set whether to "lex" HLASM-flavour character and string literals.
Definition: AsmLexer.h:192
void setLexMasmIntegers(bool V)
Set whether to lex masm-style binary (e.g., 0b1101) and radix-specified literals (e....
Definition: AsmLexer.h:166
SMLoc getLoc() const
Get the current source location.
Definition: AsmLexer.h:115
void setLexMasmStrings(bool V)
Set whether to lex masm-style string literals, such as 'Can''t find file' and "This ""value"" not fou...
Definition: AsmLexer.h:180
const AsmToken peekTok(bool ShouldSkipSpace=true)
Look ahead at the next token to be lexed.
Definition: AsmLexer.h:121
bool getAllowAtInIdentifier()
Definition: AsmLexer.h:155
void UnLex(AsmToken const &Token)
Definition: AsmLexer.h:106
void setMasmDefaultRadix(unsigned Radix)
Definition: AsmLexer.h:173
AsmToken::TokenKind getKind() const
Get the kind of current token.
Definition: AsmLexer.h:144
void setLexMasmHexFloats(bool V)
Set whether to lex masm-style hex float literals, such as 3f800000r.
Definition: AsmLexer.h:176
const MCAsmInfo & getMAI() const
Definition: AsmLexer.h:197
const AsmToken & getTok() const
Get the current (last) lexed token.
Definition: AsmLexer.h:118
bool is(AsmToken::TokenKind K) const
Check if the current token has kind K.
Definition: AsmLexer.h:147
void setLexMotorolaIntegers(bool V)
Set whether to lex Motorola-style integer literals, such as $deadbeef or %01010110.
Definition: AsmLexer.h:184
SMLoc getErrLoc()
Get the current error location.
Definition: AsmLexer.h:138
bool justConsumedEOL()
Definition: AsmLexer.h:110
AsmLexer(const AsmLexer &)=delete
const std::string & getErr()
Get the current error string.
Definition: AsmLexer.h:141
const AsmToken & Lex()
Consume the next token from the input stream and return it.
Definition: AsmLexer.h:92
void setSkipSpace(bool val)
Set whether spaces should be ignored by the lexer.
Definition: AsmLexer.h:153
void setAllowAtInIdentifier(bool v)
Definition: AsmLexer.h:156
LLVM_ABI StringRef LexUntilEndOfStatement()
Definition: AsmLexer.cpp:744
AsmLexer & operator=(const AsmLexer &)=delete
LLVM_ABI void setBuffer(StringRef Buf, const char *ptr=nullptr, bool EndStatementAtEOF=true)
Definition: AsmLexer.cpp:121
unsigned getMasmDefaultRadix() const
Definition: AsmLexer.h:172
void useMasmDefaultRadix(bool V)
Set whether to use masm-style default-radix integer literals.
Definition: AsmLexer.h:170
void setLexHLASMIntegers(bool V)
Set whether to lex HLASM-flavour integers. For now this is only [0-9]*.
Definition: AsmLexer.h:187
bool isNot(AsmToken::TokenKind K) const
Check if the current token has kind K.
Definition: AsmLexer.h:150
LLVM_ABI size_t peekTokens(MutableArrayRef< AsmToken > Buf, bool ShouldSkipSpace=true)
Look ahead an arbitrary number of tokens.
Definition: AsmLexer.cpp:764
void setCommentConsumer(AsmCommentConsumer *CommentConsumer)
Definition: AsmLexer.h:160
void setAllowHashInIdentifier(bool V)
Definition: AsmLexer.h:158
Target independent representation for an assembler token.
Definition: MCAsmMacro.h:22
bool isNot(TokenKind K) const
Definition: MCAsmMacro.h:76
bool is(TokenKind K) const
Definition: MCAsmMacro.h:75
TokenKind getKind() const
Definition: MCAsmMacro.h:74
This class is intended to be used as a base class for asm properties and features specific to the tar...
Definition: MCAsmInfo.h:64
MutableArrayRef - Represent a mutable reference to an array (0 or more elements consecutively in memo...
Definition: ArrayRef.h:303
Represents a location in source code.
Definition: SMLoc.h:23
static SMLoc getFromPointer(const char *Ptr)
Definition: SMLoc.h:36
bool empty() const
Definition: SmallVector.h:82
iterator erase(const_iterator CI)
Definition: SmallVector.h:738
iterator insert(iterator I, T &&Elt)
Definition: SmallVector.h:806
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1197
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:55
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18