LLVM  14.0.0git
WasmAsmParser.cpp
Go to the documentation of this file.
1 //===- WasmAsmParser.cpp - Wasm Assembly Parser -----------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 // --
8 //
9 // Note, this is for wasm, the binary format (analogous to ELF), not wasm,
10 // the instruction set (analogous to x86), for which parsing code lives in
11 // WebAssemblyAsmParser.
12 //
13 // This file contains processing for generic directives implemented using
14 // MCTargetStreamer, the ones that depend on WebAssemblyTargetStreamer are in
15 // WebAssemblyAsmParser.
16 //
17 //===----------------------------------------------------------------------===//
18 
19 #include "llvm/BinaryFormat/Wasm.h"
20 #include "llvm/MC/MCContext.h"
24 #include "llvm/MC/MCSectionWasm.h"
25 #include "llvm/MC/MCStreamer.h"
26 #include "llvm/MC/MCSymbol.h"
27 #include "llvm/MC/MCSymbolWasm.h"
29 
30 using namespace llvm;
31 
32 namespace {
33 
34 class WasmAsmParser : public MCAsmParserExtension {
35  MCAsmParser *Parser = nullptr;
36  MCAsmLexer *Lexer = nullptr;
37 
38  template<bool (WasmAsmParser::*HandlerMethod)(StringRef, SMLoc)>
39  void addDirectiveHandler(StringRef Directive) {
40  MCAsmParser::ExtensionDirectiveHandler Handler = std::make_pair(
41  this, HandleDirective<WasmAsmParser, HandlerMethod>);
42 
43  getParser().addDirectiveHandler(Directive, Handler);
44  }
45 
46 public:
47  WasmAsmParser() { BracketExpressionsSupported = true; }
48 
49  void Initialize(MCAsmParser &P) override {
50  Parser = &P;
51  Lexer = &Parser->getLexer();
52  // Call the base implementation.
53  this->MCAsmParserExtension::Initialize(*Parser);
54 
55  addDirectiveHandler<&WasmAsmParser::parseSectionDirectiveText>(".text");
56  addDirectiveHandler<&WasmAsmParser::parseSectionDirective>(".section");
57  addDirectiveHandler<&WasmAsmParser::parseDirectiveSize>(".size");
58  addDirectiveHandler<&WasmAsmParser::parseDirectiveType>(".type");
59  addDirectiveHandler<&WasmAsmParser::ParseDirectiveIdent>(".ident");
60  addDirectiveHandler<
61  &WasmAsmParser::ParseDirectiveSymbolAttribute>(".weak");
62  addDirectiveHandler<
63  &WasmAsmParser::ParseDirectiveSymbolAttribute>(".local");
64  addDirectiveHandler<
65  &WasmAsmParser::ParseDirectiveSymbolAttribute>(".internal");
66  addDirectiveHandler<
67  &WasmAsmParser::ParseDirectiveSymbolAttribute>(".hidden");
68  }
69 
70  bool error(const StringRef &Msg, const AsmToken &Tok) {
71  return Parser->Error(Tok.getLoc(), Msg + Tok.getString());
72  }
73 
74  bool isNext(AsmToken::TokenKind Kind) {
75  auto Ok = Lexer->is(Kind);
76  if (Ok)
77  Lex();
78  return Ok;
79  }
80 
81  bool expect(AsmToken::TokenKind Kind, const char *KindName) {
82  if (!isNext(Kind))
83  return error(std::string("Expected ") + KindName + ", instead got: ",
84  Lexer->getTok());
85  return false;
86  }
87 
88  bool parseSectionDirectiveText(StringRef, SMLoc) {
89  // FIXME: .text currently no-op.
90  return false;
91  }
92 
93  uint32_t parseSectionFlags(StringRef FlagStr, bool &Passive, bool &Group) {
94  uint32_t flags = 0;
95  for (char C : FlagStr) {
96  switch (C) {
97  case 'p':
98  Passive = true;
99  break;
100  case 'G':
101  Group = true;
102  break;
103  case 'T':
104  flags |= wasm::WASM_SEG_FLAG_TLS;
105  break;
106  case 'S':
108  break;
109  default:
110  return -1U;
111  }
112  }
113  return flags;
114  }
115 
116  bool parseGroup(StringRef &GroupName) {
117  if (Lexer->isNot(AsmToken::Comma))
118  return TokError("expected group name");
119  Lex();
120  if (Lexer->is(AsmToken::Integer)) {
121  GroupName = getTok().getString();
122  Lex();
123  } else if (Parser->parseIdentifier(GroupName)) {
124  return TokError("invalid group name");
125  }
126  if (Lexer->is(AsmToken::Comma)) {
127  Lex();
129  if (Parser->parseIdentifier(Linkage))
130  return TokError("invalid linkage");
131  if (Linkage != "comdat")
132  return TokError("Linkage must be 'comdat'");
133  }
134  return false;
135  }
136 
137  bool parseSectionDirective(StringRef, SMLoc loc) {
138  StringRef Name;
139  if (Parser->parseIdentifier(Name))
140  return TokError("expected identifier in directive");
141 
142  if (expect(AsmToken::Comma, ","))
143  return true;
144 
145  if (Lexer->isNot(AsmToken::String))
146  return error("expected string in directive, instead got: ", Lexer->getTok());
147 
149  .StartsWith(".data", SectionKind::getData())
150  .StartsWith(".tdata", SectionKind::getThreadData())
151  .StartsWith(".tbss", SectionKind::getThreadBSS())
152  .StartsWith(".rodata", SectionKind::getReadOnly())
153  .StartsWith(".text", SectionKind::getText())
154  .StartsWith(".custom_section", SectionKind::getMetadata())
155  .StartsWith(".bss", SectionKind::getBSS())
156  // See use of .init_array in WasmObjectWriter and
157  // TargetLoweringObjectFileWasm
158  .StartsWith(".init_array", SectionKind::getData())
159  .StartsWith(".debug_", SectionKind::getMetadata())
161 
162  // Update section flags if present in this .section directive
163  bool Passive = false;
164  bool Group = false;
165  uint32_t Flags =
166  parseSectionFlags(getTok().getStringContents(), Passive, Group);
167  if (Flags == -1U)
168  return TokError("unknown flag");
169 
170  Lex();
171 
172  if (expect(AsmToken::Comma, ",") || expect(AsmToken::At, "@"))
173  return true;
174 
175  StringRef GroupName;
176  if (Group && parseGroup(GroupName))
177  return true;
178 
179  if (expect(AsmToken::EndOfStatement, "eol"))
180  return true;
181 
182  // TODO: Parse UniqueID
183  MCSectionWasm *WS = getContext().getWasmSection(
184  Name, Kind.getValue(), Flags, GroupName, MCContext::GenericSectionID);
185 
186  if (WS->getSegmentFlags() != Flags)
187  Parser->Error(loc, "changed section flags for " + Name +
188  ", expected: 0x" +
189  utohexstr(WS->getSegmentFlags()));
190 
191  if (Passive) {
192  if (!WS->isWasmData())
193  return Parser->Error(loc, "Only data sections can be passive");
194  WS->setPassive();
195  }
196 
197  getStreamer().SwitchSection(WS);
198  return false;
199  }
200 
201  // TODO: This function is almost the same as ELFAsmParser::ParseDirectiveSize
202  // so maybe could be shared somehow.
203  bool parseDirectiveSize(StringRef, SMLoc) {
204  StringRef Name;
205  if (Parser->parseIdentifier(Name))
206  return TokError("expected identifier in directive");
207  auto Sym = getContext().getOrCreateSymbol(Name);
208  if (expect(AsmToken::Comma, ","))
209  return true;
210  const MCExpr *Expr;
211  if (Parser->parseExpression(Expr))
212  return true;
213  if (expect(AsmToken::EndOfStatement, "eol"))
214  return true;
215  // This is done automatically by the assembler for functions currently,
216  // so this is only currently needed for data sections:
217  getStreamer().emitELFSize(Sym, Expr);
218  return false;
219  }
220 
221  bool parseDirectiveType(StringRef, SMLoc) {
222  // This could be the start of a function, check if followed by
223  // "label,@function"
224  if (!Lexer->is(AsmToken::Identifier))
225  return error("Expected label after .type directive, got: ",
226  Lexer->getTok());
227  auto WasmSym = cast<MCSymbolWasm>(
228  getStreamer().getContext().getOrCreateSymbol(
229  Lexer->getTok().getString()));
230  Lex();
231  if (!(isNext(AsmToken::Comma) && isNext(AsmToken::At) &&
232  Lexer->is(AsmToken::Identifier)))
233  return error("Expected label,@type declaration, got: ", Lexer->getTok());
234  auto TypeName = Lexer->getTok().getString();
235  if (TypeName == "function") {
236  WasmSym->setType(wasm::WASM_SYMBOL_TYPE_FUNCTION);
237  auto *Current =
238  cast<MCSectionWasm>(getStreamer().getCurrentSection().first);
239  if (Current->getGroup())
240  WasmSym->setComdat(true);
241  } else if (TypeName == "global")
242  WasmSym->setType(wasm::WASM_SYMBOL_TYPE_GLOBAL);
243  else if (TypeName == "object")
244  WasmSym->setType(wasm::WASM_SYMBOL_TYPE_DATA);
245  else
246  return error("Unknown WASM symbol type: ", Lexer->getTok());
247  Lex();
248  return expect(AsmToken::EndOfStatement, "EOL");
249  }
250 
251  // FIXME: Shared with ELF.
252  /// ParseDirectiveIdent
253  /// ::= .ident string
254  bool ParseDirectiveIdent(StringRef, SMLoc) {
255  if (getLexer().isNot(AsmToken::String))
256  return TokError("unexpected token in '.ident' directive");
257  StringRef Data = getTok().getIdentifier();
258  Lex();
259  if (getLexer().isNot(AsmToken::EndOfStatement))
260  return TokError("unexpected token in '.ident' directive");
261  Lex();
262  getStreamer().emitIdent(Data);
263  return false;
264  }
265 
266  // FIXME: Shared with ELF.
267  /// ParseDirectiveSymbolAttribute
268  /// ::= { ".local", ".weak", ... } [ identifier ( , identifier )* ]
269  bool ParseDirectiveSymbolAttribute(StringRef Directive, SMLoc) {
271  .Case(".weak", MCSA_Weak)
272  .Case(".local", MCSA_Local)
273  .Case(".hidden", MCSA_Hidden)
274  .Case(".internal", MCSA_Internal)
275  .Case(".protected", MCSA_Protected)
277  assert(Attr != MCSA_Invalid && "unexpected symbol attribute directive!");
278  if (getLexer().isNot(AsmToken::EndOfStatement)) {
279  while (true) {
280  StringRef Name;
281  if (getParser().parseIdentifier(Name))
282  return TokError("expected identifier in directive");
283  MCSymbol *Sym = getContext().getOrCreateSymbol(Name);
284  getStreamer().emitSymbolAttribute(Sym, Attr);
285  if (getLexer().is(AsmToken::EndOfStatement))
286  break;
287  if (getLexer().isNot(AsmToken::Comma))
288  return TokError("unexpected token in directive");
289  Lex();
290  }
291  }
292  Lex();
293  return false;
294  }
295 };
296 
297 } // end anonymous namespace
298 
299 namespace llvm {
300 
302  return new WasmAsmParser;
303 }
304 
305 } // end namespace llvm
llvm::StringSwitch::Case
StringSwitch & Case(StringLiteral S, T Value)
Definition: StringSwitch.h:67
llvm::MCAsmParser
Generic assembler parser interface, for use by target specific assembly parsers.
Definition: MCAsmParser.h:124
is
should just be implemented with a CLZ instruction Since there are other e that share this it would be best to implement this in a target independent as zero is the default value for the binary encoder e add r0 add r5 Register operands should be distinct That is
Definition: README.txt:725
llvm::MCAsmParser::Error
bool Error(SMLoc L, const Twine &Msg, SMRange Range=None)
Return an error at the location L, with the message Msg.
Definition: MCAsmParser.cpp:99
llvm
This file implements support for optimizing divisions by a constant.
Definition: AllocatorList.h:23
llvm::MCSymbol
MCSymbol - Instances of this class represent a symbol name in the MC file, and MCSymbols are created ...
Definition: MCSymbol.h:41
llvm::SectionKind::getMetadata
static SectionKind getMetadata()
Definition: SectionKind.h:182
llvm::MCAsmLexer
Generic assembler lexer interface, for use by target specific assembly lexers.
Definition: MCAsmLexer.h:39
P
This currently compiles esp xmm0 movsd esp eax eax esp ret We should use not the dag combiner This is because dagcombine2 needs to be able to see through the X86ISD::Wrapper which DAGCombine can t really do The code for turning x load into a single vector load is target independent and should be moved to the dag combiner The code for turning x load into a vector load can only handle a direct load from a global or a direct load from the stack It should be generalized to handle any load from P
Definition: README-SSE.txt:411
llvm::StringSwitch::Default
LLVM_NODISCARD R Default(T Value)
Definition: StringSwitch.h:181
llvm::AsmToken::EndOfStatement
@ EndOfStatement
Definition: MCAsmMacro.h:42
llvm::MCSA_Invalid
@ MCSA_Invalid
Not a valid directive.
Definition: MCDirectives.h:19
llvm::MCContext::GenericSectionID
@ GenericSectionID
Pass this value as the UniqueID during section creation to get the generic section with the given nam...
Definition: MCContext.h:523
llvm::MCAsmParser::parseIdentifier
virtual bool parseIdentifier(StringRef &Res)=0
Parse an identifier or string (as a quoted identifier) and set Res to the identifier contents.
llvm::wasm::WASM_SYMBOL_TYPE_GLOBAL
@ WASM_SYMBOL_TYPE_GLOBAL
Definition: Wasm.h:371
llvm::MCAsmLexer::getTok
const AsmToken & getTok() const
Get the current (last) lexed token.
Definition: MCAsmLexer.h:108
llvm::AsmToken::Integer
@ Integer
Definition: MCAsmMacro.h:32
error
#define error(X)
Definition: SymbolRecordMapping.cpp:14
llvm::MCSectionWasm::getSegmentFlags
unsigned getSegmentFlags() const
Definition: MCSectionWasm.h:59
llvm::MCAsmLexer::is
bool is(AsmToken::TokenKind K) const
Check if the current token has kind K.
Definition: MCAsmLexer.h:143
MCAsmParser.h
llvm::Data
@ Data
Definition: SIMachineScheduler.h:55
llvm::AsmToken
Target independent representation for an assembler token.
Definition: MCAsmMacro.h:21
llvm::MCSymbolAttr
MCSymbolAttr
Definition: MCDirectives.h:18
llvm::MCSA_Local
@ MCSA_Local
.local (ELF)
Definition: MCDirectives.h:37
MachineValueType.h
llvm::SectionKind::getReadOnly
static SectionKind getReadOnly()
Definition: SectionKind.h:185
llvm::MCAsmParser::parseExpression
virtual bool parseExpression(const MCExpr *&Res, SMLoc &EndLoc)=0
Parse an arbitrary expression.
llvm::SectionKind::getThreadBSS
static SectionKind getThreadBSS()
Definition: SectionKind.h:199
llvm::SMLoc
Represents a location in source code.
Definition: SMLoc.h:23
llvm::MCSectionWasm
This represents a section on wasm.
Definition: MCSectionWasm.h:26
C
(vector float) vec_cmpeq(*A, *B) C
Definition: README_ALTIVEC.txt:86
MCSymbolWasm.h
llvm::MCSA_Protected
@ MCSA_Protected
.protected (ELF)
Definition: MCDirectives.h:42
MCContext.h
MCSymbol.h
llvm::createWasmAsmParser
MCAsmParserExtension * createWasmAsmParser()
Definition: WasmAsmParser.cpp:301
llvm::SectionKind::getBSS
static SectionKind getBSS()
Definition: SectionKind.h:202
llvm::MCAsmParser::ExtensionDirectiveHandler
std::pair< MCAsmParserExtension *, DirectiveHandler > ExtensionDirectiveHandler
Definition: MCAsmParser.h:128
llvm::MCSectionWasm::isWasmData
bool isWasmData() const
Definition: MCSectionWasm.h:67
llvm::wasm::WASM_SYMBOL_TYPE_FUNCTION
@ WASM_SYMBOL_TYPE_FUNCTION
Definition: Wasm.h:369
llvm::MCSA_Hidden
@ MCSA_Hidden
.hidden (ELF)
Definition: MCDirectives.h:33
llvm::SectionKind::getText
static SectionKind getText()
Definition: SectionKind.h:183
isNot
static bool isNot(const MachineRegisterInfo &MRI, const MachineInstr &MI)
Definition: AMDGPULegalizerInfo.cpp:2804
llvm::lltok::Kind
Kind
Definition: LLToken.h:18
llvm::wasm::WASM_SEG_FLAG_TLS
@ WASM_SEG_FLAG_TLS
Definition: Wasm.h:379
parseSectionFlags
static unsigned parseSectionFlags(StringRef flagsStr, bool *UseLastGroup)
Definition: ELFAsmParser.cpp:287
MCAsmLexer.h
llvm::AsmToken::At
@ At
Definition: MCAsmMacro.h:54
llvm::MCAsmParserExtension::Initialize
virtual void Initialize(MCAsmParser &Parser)
Initialize the extension for parsing using the given Parser.
Definition: MCAsmParserExtension.cpp:19
assert
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
llvm::SectionKind::getThreadData
static SectionKind getThreadData()
Definition: SectionKind.h:200
llvm::AsmToken::Comma
@ Comma
Definition: MCAsmMacro.h:49
llvm::StringRef
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:58
llvm::MCAsmParserExtension
Generic interface for extending the MCAsmParser, which is implemented by target and object file assem...
Definition: MCAsmParserExtension.h:25
uint32_t
llvm::MCSectionWasm::setPassive
void setPassive(bool V=true)
Definition: MCSectionWasm.h:85
llvm::AsmToken::getString
StringRef getString() const
Get the string for the current token, this includes all characters (for example, the quotes on string...
Definition: MCAsmMacro.h:110
llvm::MCSA_Internal
@ MCSA_Internal
.internal (ELF)
Definition: MCDirectives.h:35
llvm::SectionKind::getData
static SectionKind getData()
Definition: SectionKind.h:206
llvm::GraphProgram::Name
Name
Definition: GraphWriter.h:52
llvm::AsmToken::Identifier
@ Identifier
Definition: MCAsmMacro.h:28
MCAsmParserExtension.h
llvm::AMDGPU::HSAMD::Kernel::Arg::Key::TypeName
constexpr char TypeName[]
Key for Kernel::Arg::Metadata::mTypeName.
Definition: AMDGPUMetadata.h:170
llvm::MCAsmLexer::isNot
bool isNot(AsmToken::TokenKind K) const
Check if the current token has kind K.
Definition: MCAsmLexer.h:146
llvm::TargetStackID::Default
@ Default
Definition: TargetFrameLowering.h:28
Wasm.h
llvm::MCSA_Weak
@ MCSA_Weak
.weak
Definition: MCDirectives.h:44
llvm::AsmToken::TokenKind
TokenKind
Definition: MCAsmMacro.h:23
llvm::MCAsmParser::getLexer
virtual MCAsmLexer & getLexer()=0
MCStreamer.h
llvm::StringSwitch
A switch()-like statement whose cases are string literals.
Definition: StringSwitch.h:42
llvm::AsmToken::String
@ String
Definition: MCAsmMacro.h:29
llvm::AsmToken::getLoc
SMLoc getLoc() const
Definition: MCAsmLexer.cpp:27
llvm::wasm::WASM_SEG_FLAG_STRINGS
@ WASM_SEG_FLAG_STRINGS
Definition: Wasm.h:378
llvm::Directive
Definition: DirectiveEmitter.h:100
llvm::MCExpr
Base class for the full range of assembler expressions which are needed for parsing.
Definition: MCExpr.h:35
llvm::wasm::WASM_SYMBOL_TYPE_DATA
@ WASM_SYMBOL_TYPE_DATA
Definition: Wasm.h:370
MCSectionWasm.h