LLVM 20.0.0git
BPFAsmParser.cpp
Go to the documentation of this file.
1//===-- BPFAsmParser.cpp - Parse BPF assembly to MCInst instructions --===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
12#include "llvm/MC/MCContext.h"
13#include "llvm/MC/MCExpr.h"
14#include "llvm/MC/MCInst.h"
15#include "llvm/MC/MCInstrInfo.h"
19#include "llvm/MC/MCStreamer.h"
23
24using namespace llvm;
25
26namespace {
27struct BPFOperand;
28
29class BPFAsmParser : public MCTargetAsmParser {
30
31 SMLoc getLoc() const { return getParser().getTok().getLoc(); }
32
33 bool PreMatchCheck(OperandVector &Operands);
34
35 bool matchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
38 bool MatchingInlineAsm) override;
39
40 bool parseRegister(MCRegister &Reo, SMLoc &StartLoc, SMLoc &EndLoc) override;
42 SMLoc &EndLoc) override;
43
45 SMLoc NameLoc, OperandVector &Operands) override;
46
47 // "=" is used as assignment operator for assembly statment, so can't be used
48 // for symbol assignment.
49 bool equalIsAsmAssignment() override { return false; }
50 // "*" is used for dereferencing memory that it will be the start of
51 // statement.
52 bool starIsStartOfStatement() override { return true; }
53
54#define GET_ASSEMBLER_HEADER
55#include "BPFGenAsmMatcher.inc"
56
59 ParseStatus parseOperandAsOperator(OperandVector &Operands);
60
61public:
62 enum BPFMatchResultTy {
63 Match_Dummy = FIRST_TARGET_MATCH_RESULT_TY,
64#define GET_OPERAND_DIAGNOSTIC_TYPES
65#include "BPFGenAsmMatcher.inc"
66#undef GET_OPERAND_DIAGNOSTIC_TYPES
67 };
68
69 BPFAsmParser(const MCSubtargetInfo &STI, MCAsmParser &Parser,
70 const MCInstrInfo &MII, const MCTargetOptions &Options)
71 : MCTargetAsmParser(Options, STI, MII) {
72 setAvailableFeatures(ComputeAvailableFeatures(STI.getFeatureBits()));
73 }
74};
75
76/// BPFOperand - Instances of this class represent a parsed machine
77/// instruction
78struct BPFOperand : public MCParsedAsmOperand {
79
80 enum KindTy {
81 Token,
83 Immediate,
84 } Kind;
85
86 struct RegOp {
87 MCRegister RegNum;
88 };
89
90 struct ImmOp {
91 const MCExpr *Val;
92 };
93
94 SMLoc StartLoc, EndLoc;
95 union {
96 StringRef Tok;
97 RegOp Reg;
98 ImmOp Imm;
99 };
100
101 BPFOperand(KindTy K) : Kind(K) {}
102
103public:
104 BPFOperand(const BPFOperand &o) : MCParsedAsmOperand() {
105 Kind = o.Kind;
106 StartLoc = o.StartLoc;
107 EndLoc = o.EndLoc;
108
109 switch (Kind) {
110 case Register:
111 Reg = o.Reg;
112 break;
113 case Immediate:
114 Imm = o.Imm;
115 break;
116 case Token:
117 Tok = o.Tok;
118 break;
119 }
120 }
121
122 bool isToken() const override { return Kind == Token; }
123 bool isReg() const override { return Kind == Register; }
124 bool isImm() const override { return Kind == Immediate; }
125 bool isMem() const override { return false; }
126
127 bool isConstantImm() const {
128 return isImm() && isa<MCConstantExpr>(getImm());
129 }
130
131 int64_t getConstantImm() const {
132 const MCExpr *Val = getImm();
133 return static_cast<const MCConstantExpr *>(Val)->getValue();
134 }
135
136 bool isSImm16() const {
137 return (isConstantImm() && isInt<16>(getConstantImm()));
138 }
139
140 bool isSymbolRef() const { return isImm() && isa<MCSymbolRefExpr>(getImm()); }
141
142 bool isBrTarget() const { return isSymbolRef() || isSImm16(); }
143
144 /// getStartLoc - Gets location of the first token of this operand
145 SMLoc getStartLoc() const override { return StartLoc; }
146 /// getEndLoc - Gets location of the last token of this operand
147 SMLoc getEndLoc() const override { return EndLoc; }
148
149 MCRegister getReg() const override {
150 assert(Kind == Register && "Invalid type access!");
151 return Reg.RegNum;
152 }
153
154 const MCExpr *getImm() const {
155 assert(Kind == Immediate && "Invalid type access!");
156 return Imm.Val;
157 }
158
159 StringRef getToken() const {
160 assert(Kind == Token && "Invalid type access!");
161 return Tok;
162 }
163
164 void print(raw_ostream &OS) const override {
165 switch (Kind) {
166 case Immediate:
167 OS << *getImm();
168 break;
169 case Register:
170 OS << "<register x";
171 OS << getReg() << ">";
172 break;
173 case Token:
174 OS << "'" << getToken() << "'";
175 break;
176 }
177 }
178
179 void addExpr(MCInst &Inst, const MCExpr *Expr) const {
180 assert(Expr && "Expr shouldn't be null!");
181
182 if (auto *CE = dyn_cast<MCConstantExpr>(Expr))
183 Inst.addOperand(MCOperand::createImm(CE->getValue()));
184 else
186 }
187
188 // Used by the TableGen Code
189 void addRegOperands(MCInst &Inst, unsigned N) const {
190 assert(N == 1 && "Invalid number of operands!");
192 }
193
194 void addImmOperands(MCInst &Inst, unsigned N) const {
195 assert(N == 1 && "Invalid number of operands!");
196 addExpr(Inst, getImm());
197 }
198
199 static std::unique_ptr<BPFOperand> createToken(StringRef Str, SMLoc S) {
200 auto Op = std::make_unique<BPFOperand>(Token);
201 Op->Tok = Str;
202 Op->StartLoc = S;
203 Op->EndLoc = S;
204 return Op;
205 }
206
207 static std::unique_ptr<BPFOperand> createReg(MCRegister Reg, SMLoc S,
208 SMLoc E) {
209 auto Op = std::make_unique<BPFOperand>(Register);
210 Op->Reg.RegNum = Reg;
211 Op->StartLoc = S;
212 Op->EndLoc = E;
213 return Op;
214 }
215
216 static std::unique_ptr<BPFOperand> createImm(const MCExpr *Val, SMLoc S,
217 SMLoc E) {
218 auto Op = std::make_unique<BPFOperand>(Immediate);
219 Op->Imm.Val = Val;
220 Op->StartLoc = S;
221 Op->EndLoc = E;
222 return Op;
223 }
224
225 // Identifiers that can be used at the start of a statment.
226 static bool isValidIdAtStart(StringRef Name) {
227 return StringSwitch<bool>(Name.lower())
228 .Case("if", true)
229 .Case("call", true)
230 .Case("callx", true)
231 .Case("goto", true)
232 .Case("gotol", true)
233 .Case("may_goto", true)
234 .Case("*", true)
235 .Case("exit", true)
236 .Case("lock", true)
237 .Case("ld_pseudo", true)
238 .Default(false);
239 }
240
241 // Identifiers that can be used in the middle of a statment.
242 static bool isValidIdInMiddle(StringRef Name) {
243 return StringSwitch<bool>(Name.lower())
244 .Case("u64", true)
245 .Case("u32", true)
246 .Case("u16", true)
247 .Case("u8", true)
248 .Case("s32", true)
249 .Case("s16", true)
250 .Case("s8", true)
251 .Case("be64", true)
252 .Case("be32", true)
253 .Case("be16", true)
254 .Case("le64", true)
255 .Case("le32", true)
256 .Case("le16", true)
257 .Case("bswap16", true)
258 .Case("bswap32", true)
259 .Case("bswap64", true)
260 .Case("goto", true)
261 .Case("gotol", true)
262 .Case("ll", true)
263 .Case("skb", true)
264 .Case("s", true)
265 .Case("atomic_fetch_add", true)
266 .Case("atomic_fetch_and", true)
267 .Case("atomic_fetch_or", true)
268 .Case("atomic_fetch_xor", true)
269 .Case("xchg_64", true)
270 .Case("xchg32_32", true)
271 .Case("cmpxchg_64", true)
272 .Case("cmpxchg32_32", true)
273 .Case("addr_space_cast", true)
274 .Default(false);
275 }
276};
277} // end anonymous namespace.
278
279#define GET_REGISTER_MATCHER
280#define GET_MATCHER_IMPLEMENTATION
281#include "BPFGenAsmMatcher.inc"
282
283bool BPFAsmParser::PreMatchCheck(OperandVector &Operands) {
284
285 if (Operands.size() == 4) {
286 // check "reg1 = -reg2" and "reg1 = be16/be32/be64/le16/le32/le64 reg2",
287 // reg1 must be the same as reg2
288 BPFOperand &Op0 = (BPFOperand &)*Operands[0];
289 BPFOperand &Op1 = (BPFOperand &)*Operands[1];
290 BPFOperand &Op2 = (BPFOperand &)*Operands[2];
291 BPFOperand &Op3 = (BPFOperand &)*Operands[3];
292 if (Op0.isReg() && Op1.isToken() && Op2.isToken() && Op3.isReg()
293 && Op1.getToken() == "="
294 && (Op2.getToken() == "-" || Op2.getToken() == "be16"
295 || Op2.getToken() == "be32" || Op2.getToken() == "be64"
296 || Op2.getToken() == "le16" || Op2.getToken() == "le32"
297 || Op2.getToken() == "le64")
298 && Op0.getReg() != Op3.getReg())
299 return true;
300 }
301
302 return false;
303}
304
305bool BPFAsmParser::matchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
308 bool MatchingInlineAsm) {
309 MCInst Inst;
310 SMLoc ErrorLoc;
311
312 if (PreMatchCheck(Operands))
313 return Error(IDLoc, "additional inst constraint not met");
314
315 switch (MatchInstructionImpl(Operands, Inst, ErrorInfo, MatchingInlineAsm)) {
316 default:
317 break;
318 case Match_Success:
319 Inst.setLoc(IDLoc);
320 Out.emitInstruction(Inst, getSTI());
321 return false;
322 case Match_MissingFeature:
323 return Error(IDLoc, "instruction use requires an option to be enabled");
324 case Match_MnemonicFail:
325 return Error(IDLoc, "unrecognized instruction mnemonic");
326 case Match_InvalidOperand:
327 ErrorLoc = IDLoc;
328
329 if (ErrorInfo != ~0U) {
330 if (ErrorInfo >= Operands.size())
331 return Error(ErrorLoc, "too few operands for instruction");
332
333 ErrorLoc = ((BPFOperand &)*Operands[ErrorInfo]).getStartLoc();
334
335 if (ErrorLoc == SMLoc())
336 ErrorLoc = IDLoc;
337 }
338
339 return Error(ErrorLoc, "invalid operand for instruction");
340 case Match_InvalidBrTarget:
341 return Error(Operands[ErrorInfo]->getStartLoc(),
342 "operand is not an identifier or 16-bit signed integer");
343 case Match_InvalidSImm16:
344 return Error(Operands[ErrorInfo]->getStartLoc(),
345 "operand is not a 16-bit signed integer");
346 }
347
348 llvm_unreachable("Unknown match type detected!");
349}
350
351bool BPFAsmParser::parseRegister(MCRegister &Reg, SMLoc &StartLoc,
352 SMLoc &EndLoc) {
353 if (!tryParseRegister(Reg, StartLoc, EndLoc).isSuccess())
354 return Error(StartLoc, "invalid register name");
355 return false;
356}
357
358ParseStatus BPFAsmParser::tryParseRegister(MCRegister &Reg, SMLoc &StartLoc,
359 SMLoc &EndLoc) {
360 const AsmToken &Tok = getParser().getTok();
361 StartLoc = Tok.getLoc();
362 EndLoc = Tok.getEndLoc();
363 Reg = BPF::NoRegister;
364 StringRef Name = getLexer().getTok().getIdentifier();
365
366 if (!MatchRegisterName(Name)) {
367 getParser().Lex(); // Eat identifier token.
369 }
370
372}
373
374ParseStatus BPFAsmParser::parseOperandAsOperator(OperandVector &Operands) {
375 SMLoc S = getLoc();
376
377 if (getLexer().getKind() == AsmToken::Identifier) {
378 StringRef Name = getLexer().getTok().getIdentifier();
379
380 if (BPFOperand::isValidIdInMiddle(Name)) {
381 getLexer().Lex();
382 Operands.push_back(BPFOperand::createToken(Name, S));
384 }
385
387 }
388
389 switch (getLexer().getKind()) {
390 case AsmToken::Minus:
391 case AsmToken::Plus: {
392 if (getLexer().peekTok().is(AsmToken::Integer))
394 [[fallthrough]];
395 }
396
397 case AsmToken::Equal:
399 case AsmToken::Less:
400 case AsmToken::Pipe:
401 case AsmToken::Star:
402 case AsmToken::LParen:
403 case AsmToken::RParen:
404 case AsmToken::LBrac:
405 case AsmToken::RBrac:
406 case AsmToken::Slash:
407 case AsmToken::Amp:
409 case AsmToken::Caret: {
410 StringRef Name = getLexer().getTok().getString();
411 getLexer().Lex();
412 Operands.push_back(BPFOperand::createToken(Name, S));
413
415 }
416
422 case AsmToken::LessLess: {
423 Operands.push_back(BPFOperand::createToken(
424 getLexer().getTok().getString().substr(0, 1), S));
425 Operands.push_back(BPFOperand::createToken(
426 getLexer().getTok().getString().substr(1, 1), S));
427 getLexer().Lex();
428
430 }
431
432 default:
433 break;
434 }
435
437}
438
439ParseStatus BPFAsmParser::parseRegister(OperandVector &Operands) {
440 SMLoc S = getLoc();
442
443 switch (getLexer().getKind()) {
444 default:
447 StringRef Name = getLexer().getTok().getIdentifier();
449
450 if (!Reg)
452
453 getLexer().Lex();
454 Operands.push_back(BPFOperand::createReg(Reg, S, E));
455 }
457}
458
459ParseStatus BPFAsmParser::parseImmediate(OperandVector &Operands) {
460 switch (getLexer().getKind()) {
461 default:
463 case AsmToken::LParen:
464 case AsmToken::Minus:
465 case AsmToken::Plus:
467 case AsmToken::String:
469 break;
470 }
471
472 const MCExpr *IdVal;
473 SMLoc S = getLoc();
474
475 if (getParser().parseExpression(IdVal))
477
479 Operands.push_back(BPFOperand::createImm(IdVal, S, E));
480
482}
483
484/// Parse an BPF instruction which is in BPF verifier format.
485bool BPFAsmParser::parseInstruction(ParseInstructionInfo &Info, StringRef Name,
486 SMLoc NameLoc, OperandVector &Operands) {
487 // The first operand could be either register or actually an operator.
489
490 if (Reg) {
491 SMLoc E = SMLoc::getFromPointer(NameLoc.getPointer() - 1);
492 Operands.push_back(BPFOperand::createReg(Reg, NameLoc, E));
493 } else if (BPFOperand::isValidIdAtStart(Name))
494 Operands.push_back(BPFOperand::createToken(Name, NameLoc));
495 else
496 return Error(NameLoc, "invalid register/token name");
497
498 while (!getLexer().is(AsmToken::EndOfStatement)) {
499 // Attempt to parse token as operator
500 if (parseOperandAsOperator(Operands).isSuccess())
501 continue;
502
503 // Attempt to parse token as register
504 if (parseRegister(Operands).isSuccess())
505 continue;
506
507 if (getLexer().is(AsmToken::Comma)) {
508 getLexer().Lex();
509 continue;
510 }
511
512 // Attempt to parse token as an immediate
513 if (!parseImmediate(Operands).isSuccess()) {
514 SMLoc Loc = getLexer().getLoc();
515 return Error(Loc, "unexpected token");
516 }
517 }
518
519 if (getLexer().isNot(AsmToken::EndOfStatement)) {
520 SMLoc Loc = getLexer().getLoc();
521
522 getParser().eatToEndOfStatement();
523
524 return Error(Loc, "unexpected token");
525 }
526
527 // Consume the EndOfStatement.
528 getParser().Lex();
529 return false;
530}
531
536}
static MCRegister MatchRegisterName(StringRef Name)
static bool isNot(const MachineRegisterInfo &MRI, const MachineInstr &MI)
static void print(raw_ostream &Out, object::Archive::Kind Kind, T Val)
LLVM_EXTERNAL_VISIBILITY void LLVMInitializeBPFAsmParser()
#define LLVM_EXTERNAL_VISIBILITY
Definition: Compiler.h:128
std::string Name
static GCMetadataPrinterRegistry::Add< ErlangGCPrinter > X("erlang", "erlang-compatible garbage collector")
static LVOptions Options
Definition: LVOptions.cpp:25
mir Rename Register Operands
static unsigned getReg(const MCDisassembler *D, unsigned RC, unsigned RegNo)
static bool isReg(const MCInst &MI, unsigned OpNo)
static GCMetadataPrinterRegistry::Add< OcamlGCMetadataPrinter > Y("ocaml", "ocaml 3.10-compatible collector")
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
static bool isImm(const MachineOperand &MO, MachineRegisterInfo *MRI)
raw_pwrite_stream & OS
static StringRef substr(StringRef Str, uint64_t Len)
This file implements the StringSwitch template, which mimics a switch() statement whose cases are str...
bool parseImmediate(MCInst &MI, uint64_t &Size, ArrayRef< uint8_t > Bytes)
Target independent representation for an assembler token.
Definition: MCAsmMacro.h:21
SMLoc getLoc() const
Definition: MCAsmLexer.cpp:26
SMLoc getEndLoc() const
Definition: MCAsmLexer.cpp:30
This class represents an Operation in the Expression.
Base class for user error types.
Definition: Error.h:355
Lightweight error class with error context and mandatory checking.
Definition: Error.h:160
Generic assembler parser interface, for use by target specific assembly parsers.
Definition: MCAsmParser.h:123
const AsmToken & getTok() const
Get the current AsmToken from the stream.
Definition: MCAsmParser.cpp:40
Base class for the full range of assembler expressions which are needed for parsing.
Definition: MCExpr.h:34
Instances of this class represent a single low-level machine instruction.
Definition: MCInst.h:185
void setLoc(SMLoc loc)
Definition: MCInst.h:204
void addOperand(const MCOperand Op)
Definition: MCInst.h:211
Interface to description of machine instruction set.
Definition: MCInstrInfo.h:26
static MCOperand createExpr(const MCExpr *Val)
Definition: MCInst.h:163
static MCOperand createReg(MCRegister Reg)
Definition: MCInst.h:135
static MCOperand createImm(int64_t Val)
Definition: MCInst.h:142
MCParsedAsmOperand - This abstract class represents a source-level assembly instruction operand.
Wrapper class representing physical registers. Should be passed by value.
Definition: MCRegister.h:33
Streaming machine code generation interface.
Definition: MCStreamer.h:213
virtual void emitInstruction(const MCInst &Inst, const MCSubtargetInfo &STI)
Emit the given Instruction into the current section.
Generic base class for all target subtargets.
const FeatureBitset & getFeatureBits() const
MCTargetAsmParser - Generic interface to target specific assembly parsers.
virtual bool parseInstruction(ParseInstructionInfo &Info, StringRef Name, SMLoc NameLoc, OperandVector &Operands)=0
Parse one assembly instruction.
virtual bool equalIsAsmAssignment()
virtual bool parseRegister(MCRegister &Reg, SMLoc &StartLoc, SMLoc &EndLoc)=0
virtual ParseStatus tryParseRegister(MCRegister &Reg, SMLoc &StartLoc, SMLoc &EndLoc)=0
tryParseRegister - parse one register if possible
virtual bool starIsStartOfStatement()
virtual bool matchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, OperandVector &Operands, MCStreamer &Out, uint64_t &ErrorInfo, bool MatchingInlineAsm)=0
Recognize a series of operands of a parsed instruction as an actual MCInst and emit it to the specifi...
void setAvailableFeatures(const FeatureBitset &Value)
Ternary parse status returned by various parse* methods.
static constexpr StatusTy Failure
static constexpr StatusTy Success
static constexpr StatusTy NoMatch
Wrapper class representing virtual and physical registers.
Definition: Register.h:19
Represents a location in source code.
Definition: SMLoc.h:23
static SMLoc getFromPointer(const char *Ptr)
Definition: SMLoc.h:36
constexpr const char * getPointer() const
Definition: SMLoc.h:34
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: SmallVector.h:573
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:51
A switch()-like statement whose cases are string literals.
Definition: StringSwitch.h:44
StringSwitch & Case(StringLiteral S, T Value)
Definition: StringSwitch.h:69
R Default(T Value)
Definition: StringSwitch.h:182
This class implements an extremely fast bulk output stream that can only output to a stream.
Definition: raw_ostream.h:52
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ CE
Windows NT (Windows on ARM)
Reg
All possible values of the reg field in the ModR/M byte.
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
static bool isMem(const MachineInstr &MI, unsigned Op)
Definition: X86InstrInfo.h:170
Target & getTheBPFleTarget()
Target & getTheBPFbeTarget()
Target & getTheBPFTarget()
DWARFExpression::Operation Op
#define N
RegisterMCAsmParser - Helper template for registering a target specific assembly parser,...