LLVM 19.0.0git
BPFAsmParser.cpp
Go to the documentation of this file.
1//===-- BPFAsmParser.cpp - Parse BPF assembly to MCInst instructions --===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
11#include "llvm/ADT/STLExtras.h"
13#include "llvm/MC/MCContext.h"
14#include "llvm/MC/MCExpr.h"
15#include "llvm/MC/MCInst.h"
16#include "llvm/MC/MCInstrInfo.h"
21#include "llvm/MC/MCStreamer.h"
25
26using namespace llvm;
27
28namespace {
29struct BPFOperand;
30
31class BPFAsmParser : public MCTargetAsmParser {
32
33 SMLoc getLoc() const { return getParser().getTok().getLoc(); }
34
35 bool PreMatchCheck(OperandVector &Operands);
36
37 bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
40 bool MatchingInlineAsm) override;
41
42 bool parseRegister(MCRegister &Reo, SMLoc &StartLoc, SMLoc &EndLoc) override;
44 SMLoc &EndLoc) override;
45
47 SMLoc NameLoc, OperandVector &Operands) override;
48
49 // "=" is used as assignment operator for assembly statment, so can't be used
50 // for symbol assignment.
51 bool equalIsAsmAssignment() override { return false; }
52 // "*" is used for dereferencing memory that it will be the start of
53 // statement.
54 bool starIsStartOfStatement() override { return true; }
55
56#define GET_ASSEMBLER_HEADER
57#include "BPFGenAsmMatcher.inc"
58
61 ParseStatus parseOperandAsOperator(OperandVector &Operands);
62
63public:
64 enum BPFMatchResultTy {
65 Match_Dummy = FIRST_TARGET_MATCH_RESULT_TY,
66#define GET_OPERAND_DIAGNOSTIC_TYPES
67#include "BPFGenAsmMatcher.inc"
68#undef GET_OPERAND_DIAGNOSTIC_TYPES
69 };
70
71 BPFAsmParser(const MCSubtargetInfo &STI, MCAsmParser &Parser,
72 const MCInstrInfo &MII, const MCTargetOptions &Options)
73 : MCTargetAsmParser(Options, STI, MII) {
74 setAvailableFeatures(ComputeAvailableFeatures(STI.getFeatureBits()));
75 }
76};
77
78/// BPFOperand - Instances of this class represent a parsed machine
79/// instruction
80struct BPFOperand : public MCParsedAsmOperand {
81
82 enum KindTy {
83 Token,
85 Immediate,
86 } Kind;
87
88 struct RegOp {
89 unsigned RegNum;
90 };
91
92 struct ImmOp {
93 const MCExpr *Val;
94 };
95
96 SMLoc StartLoc, EndLoc;
97 union {
98 StringRef Tok;
99 RegOp Reg;
100 ImmOp Imm;
101 };
102
103 BPFOperand(KindTy K) : Kind(K) {}
104
105public:
106 BPFOperand(const BPFOperand &o) : MCParsedAsmOperand() {
107 Kind = o.Kind;
108 StartLoc = o.StartLoc;
109 EndLoc = o.EndLoc;
110
111 switch (Kind) {
112 case Register:
113 Reg = o.Reg;
114 break;
115 case Immediate:
116 Imm = o.Imm;
117 break;
118 case Token:
119 Tok = o.Tok;
120 break;
121 }
122 }
123
124 bool isToken() const override { return Kind == Token; }
125 bool isReg() const override { return Kind == Register; }
126 bool isImm() const override { return Kind == Immediate; }
127 bool isMem() const override { return false; }
128
129 bool isConstantImm() const {
130 return isImm() && isa<MCConstantExpr>(getImm());
131 }
132
133 int64_t getConstantImm() const {
134 const MCExpr *Val = getImm();
135 return static_cast<const MCConstantExpr *>(Val)->getValue();
136 }
137
138 bool isSImm16() const {
139 return (isConstantImm() && isInt<16>(getConstantImm()));
140 }
141
142 bool isSymbolRef() const { return isImm() && isa<MCSymbolRefExpr>(getImm()); }
143
144 bool isBrTarget() const { return isSymbolRef() || isSImm16(); }
145
146 /// getStartLoc - Gets location of the first token of this operand
147 SMLoc getStartLoc() const override { return StartLoc; }
148 /// getEndLoc - Gets location of the last token of this operand
149 SMLoc getEndLoc() const override { return EndLoc; }
150
151 unsigned getReg() const override {
152 assert(Kind == Register && "Invalid type access!");
153 return Reg.RegNum;
154 }
155
156 const MCExpr *getImm() const {
157 assert(Kind == Immediate && "Invalid type access!");
158 return Imm.Val;
159 }
160
161 StringRef getToken() const {
162 assert(Kind == Token && "Invalid type access!");
163 return Tok;
164 }
165
166 void print(raw_ostream &OS) const override {
167 switch (Kind) {
168 case Immediate:
169 OS << *getImm();
170 break;
171 case Register:
172 OS << "<register x";
173 OS << getReg() << ">";
174 break;
175 case Token:
176 OS << "'" << getToken() << "'";
177 break;
178 }
179 }
180
181 void addExpr(MCInst &Inst, const MCExpr *Expr) const {
182 assert(Expr && "Expr shouldn't be null!");
183
184 if (auto *CE = dyn_cast<MCConstantExpr>(Expr))
185 Inst.addOperand(MCOperand::createImm(CE->getValue()));
186 else
188 }
189
190 // Used by the TableGen Code
191 void addRegOperands(MCInst &Inst, unsigned N) const {
192 assert(N == 1 && "Invalid number of operands!");
194 }
195
196 void addImmOperands(MCInst &Inst, unsigned N) const {
197 assert(N == 1 && "Invalid number of operands!");
198 addExpr(Inst, getImm());
199 }
200
201 static std::unique_ptr<BPFOperand> createToken(StringRef Str, SMLoc S) {
202 auto Op = std::make_unique<BPFOperand>(Token);
203 Op->Tok = Str;
204 Op->StartLoc = S;
205 Op->EndLoc = S;
206 return Op;
207 }
208
209 static std::unique_ptr<BPFOperand> createReg(unsigned RegNo, SMLoc S,
210 SMLoc E) {
211 auto Op = std::make_unique<BPFOperand>(Register);
212 Op->Reg.RegNum = RegNo;
213 Op->StartLoc = S;
214 Op->EndLoc = E;
215 return Op;
216 }
217
218 static std::unique_ptr<BPFOperand> createImm(const MCExpr *Val, SMLoc S,
219 SMLoc E) {
220 auto Op = std::make_unique<BPFOperand>(Immediate);
221 Op->Imm.Val = Val;
222 Op->StartLoc = S;
223 Op->EndLoc = E;
224 return Op;
225 }
226
227 // Identifiers that can be used at the start of a statment.
228 static bool isValidIdAtStart(StringRef Name) {
229 return StringSwitch<bool>(Name.lower())
230 .Case("if", true)
231 .Case("call", true)
232 .Case("callx", true)
233 .Case("goto", true)
234 .Case("gotol", true)
235 .Case("may_goto", true)
236 .Case("*", true)
237 .Case("exit", true)
238 .Case("lock", true)
239 .Case("ld_pseudo", true)
240 .Default(false);
241 }
242
243 // Identifiers that can be used in the middle of a statment.
244 static bool isValidIdInMiddle(StringRef Name) {
245 return StringSwitch<bool>(Name.lower())
246 .Case("u64", true)
247 .Case("u32", true)
248 .Case("u16", true)
249 .Case("u8", true)
250 .Case("s32", true)
251 .Case("s16", true)
252 .Case("s8", true)
253 .Case("be64", true)
254 .Case("be32", true)
255 .Case("be16", true)
256 .Case("le64", true)
257 .Case("le32", true)
258 .Case("le16", true)
259 .Case("bswap16", true)
260 .Case("bswap32", true)
261 .Case("bswap64", true)
262 .Case("goto", true)
263 .Case("gotol", true)
264 .Case("ll", true)
265 .Case("skb", true)
266 .Case("s", true)
267 .Case("atomic_fetch_add", true)
268 .Case("atomic_fetch_and", true)
269 .Case("atomic_fetch_or", true)
270 .Case("atomic_fetch_xor", true)
271 .Case("xchg_64", true)
272 .Case("xchg32_32", true)
273 .Case("cmpxchg_64", true)
274 .Case("cmpxchg32_32", true)
275 .Case("addr_space_cast", true)
276 .Default(false);
277 }
278};
279} // end anonymous namespace.
280
281#define GET_REGISTER_MATCHER
282#define GET_MATCHER_IMPLEMENTATION
283#include "BPFGenAsmMatcher.inc"
284
285bool BPFAsmParser::PreMatchCheck(OperandVector &Operands) {
286
287 if (Operands.size() == 4) {
288 // check "reg1 = -reg2" and "reg1 = be16/be32/be64/le16/le32/le64 reg2",
289 // reg1 must be the same as reg2
290 BPFOperand &Op0 = (BPFOperand &)*Operands[0];
291 BPFOperand &Op1 = (BPFOperand &)*Operands[1];
292 BPFOperand &Op2 = (BPFOperand &)*Operands[2];
293 BPFOperand &Op3 = (BPFOperand &)*Operands[3];
294 if (Op0.isReg() && Op1.isToken() && Op2.isToken() && Op3.isReg()
295 && Op1.getToken() == "="
296 && (Op2.getToken() == "-" || Op2.getToken() == "be16"
297 || Op2.getToken() == "be32" || Op2.getToken() == "be64"
298 || Op2.getToken() == "le16" || Op2.getToken() == "le32"
299 || Op2.getToken() == "le64")
300 && Op0.getReg() != Op3.getReg())
301 return true;
302 }
303
304 return false;
305}
306
307bool BPFAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
310 bool MatchingInlineAsm) {
311 MCInst Inst;
312 SMLoc ErrorLoc;
313
314 if (PreMatchCheck(Operands))
315 return Error(IDLoc, "additional inst constraint not met");
316
317 switch (MatchInstructionImpl(Operands, Inst, ErrorInfo, MatchingInlineAsm)) {
318 default:
319 break;
320 case Match_Success:
321 Inst.setLoc(IDLoc);
322 Out.emitInstruction(Inst, getSTI());
323 return false;
324 case Match_MissingFeature:
325 return Error(IDLoc, "instruction use requires an option to be enabled");
326 case Match_MnemonicFail:
327 return Error(IDLoc, "unrecognized instruction mnemonic");
328 case Match_InvalidOperand:
329 ErrorLoc = IDLoc;
330
331 if (ErrorInfo != ~0U) {
332 if (ErrorInfo >= Operands.size())
333 return Error(ErrorLoc, "too few operands for instruction");
334
335 ErrorLoc = ((BPFOperand &)*Operands[ErrorInfo]).getStartLoc();
336
337 if (ErrorLoc == SMLoc())
338 ErrorLoc = IDLoc;
339 }
340
341 return Error(ErrorLoc, "invalid operand for instruction");
342 case Match_InvalidBrTarget:
343 return Error(Operands[ErrorInfo]->getStartLoc(),
344 "operand is not an identifier or 16-bit signed integer");
345 case Match_InvalidSImm16:
346 return Error(Operands[ErrorInfo]->getStartLoc(),
347 "operand is not a 16-bit signed integer");
348 }
349
350 llvm_unreachable("Unknown match type detected!");
351}
352
353bool BPFAsmParser::parseRegister(MCRegister &Reg, SMLoc &StartLoc,
354 SMLoc &EndLoc) {
355 if (!tryParseRegister(Reg, StartLoc, EndLoc).isSuccess())
356 return Error(StartLoc, "invalid register name");
357 return false;
358}
359
360ParseStatus BPFAsmParser::tryParseRegister(MCRegister &Reg, SMLoc &StartLoc,
361 SMLoc &EndLoc) {
362 const AsmToken &Tok = getParser().getTok();
363 StartLoc = Tok.getLoc();
364 EndLoc = Tok.getEndLoc();
365 Reg = BPF::NoRegister;
366 StringRef Name = getLexer().getTok().getIdentifier();
367
368 if (!MatchRegisterName(Name)) {
369 getParser().Lex(); // Eat identifier token.
371 }
372
374}
375
376ParseStatus BPFAsmParser::parseOperandAsOperator(OperandVector &Operands) {
377 SMLoc S = getLoc();
378
379 if (getLexer().getKind() == AsmToken::Identifier) {
380 StringRef Name = getLexer().getTok().getIdentifier();
381
382 if (BPFOperand::isValidIdInMiddle(Name)) {
383 getLexer().Lex();
384 Operands.push_back(BPFOperand::createToken(Name, S));
386 }
387
389 }
390
391 switch (getLexer().getKind()) {
392 case AsmToken::Minus:
393 case AsmToken::Plus: {
394 if (getLexer().peekTok().is(AsmToken::Integer))
396 [[fallthrough]];
397 }
398
399 case AsmToken::Equal:
401 case AsmToken::Less:
402 case AsmToken::Pipe:
403 case AsmToken::Star:
404 case AsmToken::LParen:
405 case AsmToken::RParen:
406 case AsmToken::LBrac:
407 case AsmToken::RBrac:
408 case AsmToken::Slash:
409 case AsmToken::Amp:
411 case AsmToken::Caret: {
412 StringRef Name = getLexer().getTok().getString();
413 getLexer().Lex();
414 Operands.push_back(BPFOperand::createToken(Name, S));
415
417 }
418
424 case AsmToken::LessLess: {
425 Operands.push_back(BPFOperand::createToken(
426 getLexer().getTok().getString().substr(0, 1), S));
427 Operands.push_back(BPFOperand::createToken(
428 getLexer().getTok().getString().substr(1, 1), S));
429 getLexer().Lex();
430
432 }
433
434 default:
435 break;
436 }
437
439}
440
441ParseStatus BPFAsmParser::parseRegister(OperandVector &Operands) {
442 SMLoc S = getLoc();
444
445 switch (getLexer().getKind()) {
446 default:
449 StringRef Name = getLexer().getTok().getIdentifier();
450 unsigned RegNo = MatchRegisterName(Name);
451
452 if (RegNo == 0)
454
455 getLexer().Lex();
456 Operands.push_back(BPFOperand::createReg(RegNo, S, E));
457 }
459}
460
461ParseStatus BPFAsmParser::parseImmediate(OperandVector &Operands) {
462 switch (getLexer().getKind()) {
463 default:
465 case AsmToken::LParen:
466 case AsmToken::Minus:
467 case AsmToken::Plus:
469 case AsmToken::String:
471 break;
472 }
473
474 const MCExpr *IdVal;
475 SMLoc S = getLoc();
476
477 if (getParser().parseExpression(IdVal))
479
481 Operands.push_back(BPFOperand::createImm(IdVal, S, E));
482
484}
485
486/// ParseInstruction - Parse an BPF instruction which is in BPF verifier
487/// format.
488bool BPFAsmParser::ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
489 SMLoc NameLoc, OperandVector &Operands) {
490 // The first operand could be either register or actually an operator.
491 unsigned RegNo = MatchRegisterName(Name);
492
493 if (RegNo != 0) {
494 SMLoc E = SMLoc::getFromPointer(NameLoc.getPointer() - 1);
495 Operands.push_back(BPFOperand::createReg(RegNo, NameLoc, E));
496 } else if (BPFOperand::isValidIdAtStart (Name))
497 Operands.push_back(BPFOperand::createToken(Name, NameLoc));
498 else
499 return Error(NameLoc, "invalid register/token name");
500
501 while (!getLexer().is(AsmToken::EndOfStatement)) {
502 // Attempt to parse token as operator
503 if (parseOperandAsOperator(Operands).isSuccess())
504 continue;
505
506 // Attempt to parse token as register
507 if (parseRegister(Operands).isSuccess())
508 continue;
509
510 if (getLexer().is(AsmToken::Comma)) {
511 getLexer().Lex();
512 continue;
513 }
514
515 // Attempt to parse token as an immediate
516 if (!parseImmediate(Operands).isSuccess()) {
517 SMLoc Loc = getLexer().getLoc();
518 return Error(Loc, "unexpected token");
519 }
520 }
521
522 if (getLexer().isNot(AsmToken::EndOfStatement)) {
523 SMLoc Loc = getLexer().getLoc();
524
525 getParser().eatToEndOfStatement();
526
527 return Error(Loc, "unexpected token");
528 }
529
530 // Consume the EndOfStatement.
531 getParser().Lex();
532 return false;
533}
534
539}
static MCRegister MatchRegisterName(StringRef Name)
static bool isNot(const MachineRegisterInfo &MRI, const MachineInstr &MI)
static void print(raw_ostream &Out, object::Archive::Kind Kind, T Val)
LLVM_EXTERNAL_VISIBILITY void LLVMInitializeBPFAsmParser()
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
#define LLVM_EXTERNAL_VISIBILITY
Definition: Compiler.h:135
std::string Name
static GCMetadataPrinterRegistry::Add< ErlangGCPrinter > X("erlang", "erlang-compatible garbage collector")
static LVOptions Options
Definition: LVOptions.cpp:25
mir Rename Register Operands
static unsigned getReg(const MCDisassembler *D, unsigned RC, unsigned RegNo)
static bool isReg(const MCInst &MI, unsigned OpNo)
static GCMetadataPrinterRegistry::Add< OcamlGCMetadataPrinter > Y("ocaml", "ocaml 3.10-compatible collector")
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
static bool isImm(const MachineOperand &MO, MachineRegisterInfo *MRI)
This file contains some templates that are useful if you are working with the STL at all.
raw_pwrite_stream & OS
static StringRef substr(StringRef Str, uint64_t Len)
This file implements the StringSwitch template, which mimics a switch() statement whose cases are str...
bool parseImmediate(MCInst &MI, uint64_t &Size, ArrayRef< uint8_t > Bytes)
Target independent representation for an assembler token.
Definition: MCAsmMacro.h:21
SMLoc getLoc() const
Definition: MCAsmLexer.cpp:26
SMLoc getEndLoc() const
Definition: MCAsmLexer.cpp:30
This class represents an Operation in the Expression.
Base class for user error types.
Definition: Error.h:352
Lightweight error class with error context and mandatory checking.
Definition: Error.h:160
Generic assembler parser interface, for use by target specific assembly parsers.
Definition: MCAsmParser.h:123
const AsmToken & getTok() const
Get the current AsmToken from the stream.
Definition: MCAsmParser.cpp:40
Base class for the full range of assembler expressions which are needed for parsing.
Definition: MCExpr.h:35
Instances of this class represent a single low-level machine instruction.
Definition: MCInst.h:184
void setLoc(SMLoc loc)
Definition: MCInst.h:203
void addOperand(const MCOperand Op)
Definition: MCInst.h:210
Interface to description of machine instruction set.
Definition: MCInstrInfo.h:26
static MCOperand createReg(unsigned Reg)
Definition: MCInst.h:134
static MCOperand createExpr(const MCExpr *Val)
Definition: MCInst.h:162
static MCOperand createImm(int64_t Val)
Definition: MCInst.h:141
MCParsedAsmOperand - This abstract class represents a source-level assembly instruction operand.
Wrapper class representing physical registers. Should be passed by value.
Definition: MCRegister.h:33
Streaming machine code generation interface.
Definition: MCStreamer.h:212
virtual void emitInstruction(const MCInst &Inst, const MCSubtargetInfo &STI)
Emit the given Instruction into the current section.
Generic base class for all target subtargets.
const FeatureBitset & getFeatureBits() const
MCTargetAsmParser - Generic interface to target specific assembly parsers.
virtual bool equalIsAsmAssignment()
virtual bool parseRegister(MCRegister &Reg, SMLoc &StartLoc, SMLoc &EndLoc)=0
virtual ParseStatus tryParseRegister(MCRegister &Reg, SMLoc &StartLoc, SMLoc &EndLoc)=0
tryParseRegister - parse one register if possible
virtual bool starIsStartOfStatement()
void setAvailableFeatures(const FeatureBitset &Value)
virtual bool ParseInstruction(ParseInstructionInfo &Info, StringRef Name, SMLoc NameLoc, OperandVector &Operands)=0
ParseInstruction - Parse one assembly instruction.
virtual bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, OperandVector &Operands, MCStreamer &Out, uint64_t &ErrorInfo, bool MatchingInlineAsm)=0
MatchAndEmitInstruction - Recognize a series of operands of a parsed instruction as an actual MCInst ...
Ternary parse status returned by various parse* methods.
static constexpr StatusTy Failure
static constexpr StatusTy Success
static constexpr StatusTy NoMatch
Wrapper class representing virtual and physical registers.
Definition: Register.h:19
Represents a location in source code.
Definition: SMLoc.h:23
static SMLoc getFromPointer(const char *Ptr)
Definition: SMLoc.h:36
constexpr const char * getPointer() const
Definition: SMLoc.h:34
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: SmallVector.h:586
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:50
A switch()-like statement whose cases are string literals.
Definition: StringSwitch.h:44
StringSwitch & Case(StringLiteral S, T Value)
Definition: StringSwitch.h:69
R Default(T Value)
Definition: StringSwitch.h:182
This class implements an extremely fast bulk output stream that can only output to a stream.
Definition: raw_ostream.h:52
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ CE
Windows NT (Windows on ARM)
Reg
All possible values of the reg field in the ModR/M byte.
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
static bool isMem(const MachineInstr &MI, unsigned Op)
Definition: X86InstrInfo.h:152
Target & getTheBPFleTarget()
Target & getTheBPFbeTarget()
Target & getTheBPFTarget()
DWARFExpression::Operation Op
#define N
RegisterMCAsmParser - Helper template for registering a target specific assembly parser,...