LLVM  9.0.0svn
BPFAsmParser.cpp
Go to the documentation of this file.
1 //===-- BPFAsmParser.cpp - Parse BPF assembly to MCInst instructions --===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
10 #include "llvm/ADT/STLExtras.h"
11 #include "llvm/ADT/StringSwitch.h"
12 #include "llvm/MC/MCContext.h"
13 #include "llvm/MC/MCExpr.h"
14 #include "llvm/MC/MCInst.h"
18 #include "llvm/MC/MCRegisterInfo.h"
19 #include "llvm/MC/MCStreamer.h"
21 #include "llvm/Support/Casting.h"
23 
24 using namespace llvm;
25 
26 namespace {
27 struct BPFOperand;
28 
29 class BPFAsmParser : public MCTargetAsmParser {
30 
31  SMLoc getLoc() const { return getParser().getTok().getLoc(); }
32 
33  bool PreMatchCheck(OperandVector &Operands);
34 
35  bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
36  OperandVector &Operands, MCStreamer &Out,
37  uint64_t &ErrorInfo,
38  bool MatchingInlineAsm) override;
39 
40  bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc) override;
41 
42  bool ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
43  SMLoc NameLoc, OperandVector &Operands) override;
44 
45  bool ParseDirective(AsmToken DirectiveID) override;
46 
47  // "=" is used as assignment operator for assembly statment, so can't be used
48  // for symbol assignment.
49  bool equalIsAsmAssignment() override { return false; }
50  // "*" is used for dereferencing memory that it will be the start of
51  // statement.
52  bool starIsStartOfStatement() override { return true; }
53 
54 #define GET_ASSEMBLER_HEADER
55 #include "BPFGenAsmMatcher.inc"
56 
58  OperandMatchResultTy parseRegister(OperandVector &Operands);
59  OperandMatchResultTy parseOperandAsOperator(OperandVector &Operands);
60 
61 public:
62  enum BPFMatchResultTy {
63  Match_Dummy = FIRST_TARGET_MATCH_RESULT_TY,
64 #define GET_OPERAND_DIAGNOSTIC_TYPES
65 #include "BPFGenAsmMatcher.inc"
66 #undef GET_OPERAND_DIAGNOSTIC_TYPES
67  };
68 
69  BPFAsmParser(const MCSubtargetInfo &STI, MCAsmParser &Parser,
70  const MCInstrInfo &MII, const MCTargetOptions &Options)
71  : MCTargetAsmParser(Options, STI, MII) {
72  setAvailableFeatures(ComputeAvailableFeatures(STI.getFeatureBits()));
73  }
74 };
75 
76 /// BPFOperand - Instances of this class represent a parsed machine
77 /// instruction
78 struct BPFOperand : public MCParsedAsmOperand {
79 
80  enum KindTy {
81  Token,
82  Register,
83  Immediate,
84  } Kind;
85 
86  struct RegOp {
87  unsigned RegNum;
88  };
89 
90  struct ImmOp {
91  const MCExpr *Val;
92  };
93 
94  SMLoc StartLoc, EndLoc;
95  union {
96  StringRef Tok;
97  RegOp Reg;
98  ImmOp Imm;
99  };
100 
101  BPFOperand(KindTy K) : MCParsedAsmOperand(), Kind(K) {}
102 
103 public:
104  BPFOperand(const BPFOperand &o) : MCParsedAsmOperand() {
105  Kind = o.Kind;
106  StartLoc = o.StartLoc;
107  EndLoc = o.EndLoc;
108 
109  switch (Kind) {
110  case Register:
111  Reg = o.Reg;
112  break;
113  case Immediate:
114  Imm = o.Imm;
115  break;
116  case Token:
117  Tok = o.Tok;
118  break;
119  }
120  }
121 
122  bool isToken() const override { return Kind == Token; }
123  bool isReg() const override { return Kind == Register; }
124  bool isImm() const override { return Kind == Immediate; }
125  bool isMem() const override { return false; }
126 
127  bool isConstantImm() const {
128  return isImm() && dyn_cast<MCConstantExpr>(getImm());
129  }
130 
131  int64_t getConstantImm() const {
132  const MCExpr *Val = getImm();
133  return static_cast<const MCConstantExpr *>(Val)->getValue();
134  }
135 
136  bool isSImm12() const {
137  return (isConstantImm() && isInt<12>(getConstantImm()));
138  }
139 
140  /// getStartLoc - Gets location of the first token of this operand
141  SMLoc getStartLoc() const override { return StartLoc; }
142  /// getEndLoc - Gets location of the last token of this operand
143  SMLoc getEndLoc() const override { return EndLoc; }
144 
145  unsigned getReg() const override {
146  assert(Kind == Register && "Invalid type access!");
147  return Reg.RegNum;
148  }
149 
150  const MCExpr *getImm() const {
151  assert(Kind == Immediate && "Invalid type access!");
152  return Imm.Val;
153  }
154 
155  StringRef getToken() const {
156  assert(Kind == Token && "Invalid type access!");
157  return Tok;
158  }
159 
160  void print(raw_ostream &OS) const override {
161  switch (Kind) {
162  case Immediate:
163  OS << *getImm();
164  break;
165  case Register:
166  OS << "<register x";
167  OS << getReg() << ">";
168  break;
169  case Token:
170  OS << "'" << getToken() << "'";
171  break;
172  }
173  }
174 
175  void addExpr(MCInst &Inst, const MCExpr *Expr) const {
176  assert(Expr && "Expr shouldn't be null!");
177 
178  if (auto *CE = dyn_cast<MCConstantExpr>(Expr))
179  Inst.addOperand(MCOperand::createImm(CE->getValue()));
180  else
181  Inst.addOperand(MCOperand::createExpr(Expr));
182  }
183 
184  // Used by the TableGen Code
185  void addRegOperands(MCInst &Inst, unsigned N) const {
186  assert(N == 1 && "Invalid number of operands!");
188  }
189 
190  void addImmOperands(MCInst &Inst, unsigned N) const {
191  assert(N == 1 && "Invalid number of operands!");
192  addExpr(Inst, getImm());
193  }
194 
195  static std::unique_ptr<BPFOperand> createToken(StringRef Str, SMLoc S) {
196  auto Op = make_unique<BPFOperand>(Token);
197  Op->Tok = Str;
198  Op->StartLoc = S;
199  Op->EndLoc = S;
200  return Op;
201  }
202 
203  static std::unique_ptr<BPFOperand> createReg(unsigned RegNo, SMLoc S,
204  SMLoc E) {
205  auto Op = make_unique<BPFOperand>(Register);
206  Op->Reg.RegNum = RegNo;
207  Op->StartLoc = S;
208  Op->EndLoc = E;
209  return Op;
210  }
211 
212  static std::unique_ptr<BPFOperand> createImm(const MCExpr *Val, SMLoc S,
213  SMLoc E) {
214  auto Op = make_unique<BPFOperand>(Immediate);
215  Op->Imm.Val = Val;
216  Op->StartLoc = S;
217  Op->EndLoc = E;
218  return Op;
219  }
220 
221  // Identifiers that can be used at the start of a statment.
222  static bool isValidIdAtStart(StringRef Name) {
223  return StringSwitch<bool>(Name.lower())
224  .Case("if", true)
225  .Case("call", true)
226  .Case("goto", true)
227  .Case("*", true)
228  .Case("exit", true)
229  .Case("lock", true)
230  .Case("ld_pseudo", true)
231  .Default(false);
232  }
233 
234  // Identifiers that can be used in the middle of a statment.
235  static bool isValidIdInMiddle(StringRef Name) {
236  return StringSwitch<bool>(Name.lower())
237  .Case("u64", true)
238  .Case("u32", true)
239  .Case("u16", true)
240  .Case("u8", true)
241  .Case("be64", true)
242  .Case("be32", true)
243  .Case("be16", true)
244  .Case("le64", true)
245  .Case("le32", true)
246  .Case("le16", true)
247  .Case("goto", true)
248  .Case("ll", true)
249  .Case("skb", true)
250  .Case("s", true)
251  .Default(false);
252  }
253 };
254 } // end anonymous namespace.
255 
256 #define GET_REGISTER_MATCHER
257 #define GET_MATCHER_IMPLEMENTATION
258 #include "BPFGenAsmMatcher.inc"
259 
260 bool BPFAsmParser::PreMatchCheck(OperandVector &Operands) {
261 
262  if (Operands.size() == 4) {
263  // check "reg1 = -reg2" and "reg1 = be16/be32/be64/le16/le32/le64 reg2",
264  // reg1 must be the same as reg2
265  BPFOperand &Op0 = (BPFOperand &)*Operands[0];
266  BPFOperand &Op1 = (BPFOperand &)*Operands[1];
267  BPFOperand &Op2 = (BPFOperand &)*Operands[2];
268  BPFOperand &Op3 = (BPFOperand &)*Operands[3];
269  if (Op0.isReg() && Op1.isToken() && Op2.isToken() && Op3.isReg()
270  && Op1.getToken() == "="
271  && (Op2.getToken() == "-" || Op2.getToken() == "be16"
272  || Op2.getToken() == "be32" || Op2.getToken() == "be64"
273  || Op2.getToken() == "le16" || Op2.getToken() == "le32"
274  || Op2.getToken() == "le64")
275  && Op0.getReg() != Op3.getReg())
276  return true;
277  }
278 
279  return false;
280 }
281 
282 bool BPFAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
283  OperandVector &Operands,
284  MCStreamer &Out, uint64_t &ErrorInfo,
285  bool MatchingInlineAsm) {
286  MCInst Inst;
287  SMLoc ErrorLoc;
288 
289  if (PreMatchCheck(Operands))
290  return Error(IDLoc, "additional inst constraint not met");
291 
292  switch (MatchInstructionImpl(Operands, Inst, ErrorInfo, MatchingInlineAsm)) {
293  default:
294  break;
295  case Match_Success:
296  Inst.setLoc(IDLoc);
297  Out.EmitInstruction(Inst, getSTI());
298  return false;
299  case Match_MissingFeature:
300  return Error(IDLoc, "instruction use requires an option to be enabled");
301  case Match_MnemonicFail:
302  return Error(IDLoc, "unrecognized instruction mnemonic");
303  case Match_InvalidOperand:
304  ErrorLoc = IDLoc;
305 
306  if (ErrorInfo != ~0U) {
307  if (ErrorInfo >= Operands.size())
308  return Error(ErrorLoc, "too few operands for instruction");
309 
310  ErrorLoc = ((BPFOperand &)*Operands[ErrorInfo]).getStartLoc();
311 
312  if (ErrorLoc == SMLoc())
313  ErrorLoc = IDLoc;
314  }
315 
316  return Error(ErrorLoc, "invalid operand for instruction");
317  }
318 
319  llvm_unreachable("Unknown match type detected!");
320 }
321 
322 bool BPFAsmParser::ParseRegister(unsigned &RegNo, SMLoc &StartLoc,
323  SMLoc &EndLoc) {
324  const AsmToken &Tok = getParser().getTok();
325  StartLoc = Tok.getLoc();
326  EndLoc = Tok.getEndLoc();
327  RegNo = 0;
328  StringRef Name = getLexer().getTok().getIdentifier();
329 
330  if (!MatchRegisterName(Name)) {
331  getParser().Lex(); // Eat identifier token.
332  return false;
333  }
334 
335  return Error(StartLoc, "invalid register name");
336 }
337 
339 BPFAsmParser::parseOperandAsOperator(OperandVector &Operands) {
340  SMLoc S = getLoc();
341 
342  if (getLexer().getKind() == AsmToken::Identifier) {
343  StringRef Name = getLexer().getTok().getIdentifier();
344 
345  if (BPFOperand::isValidIdInMiddle(Name)) {
346  getLexer().Lex();
347  Operands.push_back(BPFOperand::createToken(Name, S));
348  return MatchOperand_Success;
349  }
350 
351  return MatchOperand_NoMatch;
352  }
353 
354  switch (getLexer().getKind()) {
355  case AsmToken::Minus:
356  case AsmToken::Plus: {
357  if (getLexer().peekTok().is(AsmToken::Integer))
358  return MatchOperand_NoMatch;
360  }
361 
362  case AsmToken::Equal:
363  case AsmToken::Greater:
364  case AsmToken::Less:
365  case AsmToken::Pipe:
366  case AsmToken::Star:
367  case AsmToken::LParen:
368  case AsmToken::RParen:
369  case AsmToken::LBrac:
370  case AsmToken::RBrac:
371  case AsmToken::Slash:
372  case AsmToken::Amp:
373  case AsmToken::Percent:
374  case AsmToken::Caret: {
375  StringRef Name = getLexer().getTok().getString();
376  getLexer().Lex();
377  Operands.push_back(BPFOperand::createToken(Name, S));
378 
379  return MatchOperand_Success;
380  }
381 
386  case AsmToken::LessEqual:
387  case AsmToken::LessLess: {
388  Operands.push_back(BPFOperand::createToken(
389  getLexer().getTok().getString().substr(0, 1), S));
390  Operands.push_back(BPFOperand::createToken(
391  getLexer().getTok().getString().substr(1, 1), S));
392  getLexer().Lex();
393 
394  return MatchOperand_Success;
395  }
396 
397  default:
398  break;
399  }
400 
401  return MatchOperand_NoMatch;
402 }
403 
404 OperandMatchResultTy BPFAsmParser::parseRegister(OperandVector &Operands) {
405  SMLoc S = getLoc();
407 
408  switch (getLexer().getKind()) {
409  default:
410  return MatchOperand_NoMatch;
412  StringRef Name = getLexer().getTok().getIdentifier();
413  unsigned RegNo = MatchRegisterName(Name);
414 
415  if (RegNo == 0)
416  return MatchOperand_NoMatch;
417 
418  getLexer().Lex();
419  Operands.push_back(BPFOperand::createReg(RegNo, S, E));
420  }
421  return MatchOperand_Success;
422 }
423 
425  switch (getLexer().getKind()) {
426  default:
427  return MatchOperand_NoMatch;
428  case AsmToken::LParen:
429  case AsmToken::Minus:
430  case AsmToken::Plus:
431  case AsmToken::Integer:
432  case AsmToken::String:
434  break;
435  }
436 
437  const MCExpr *IdVal;
438  SMLoc S = getLoc();
439 
440  if (getParser().parseExpression(IdVal))
441  return MatchOperand_ParseFail;
442 
444  Operands.push_back(BPFOperand::createImm(IdVal, S, E));
445 
446  return MatchOperand_Success;
447 }
448 
449 /// ParseInstruction - Parse an BPF instruction which is in BPF verifier
450 /// format.
451 bool BPFAsmParser::ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
452  SMLoc NameLoc, OperandVector &Operands) {
453  // The first operand could be either register or actually an operator.
454  unsigned RegNo = MatchRegisterName(Name);
455 
456  if (RegNo != 0) {
457  SMLoc E = SMLoc::getFromPointer(NameLoc.getPointer() - 1);
458  Operands.push_back(BPFOperand::createReg(RegNo, NameLoc, E));
459  } else if (BPFOperand::isValidIdAtStart (Name))
460  Operands.push_back(BPFOperand::createToken(Name, NameLoc));
461  else
462  return Error(NameLoc, "invalid register/token name");
463 
464  while (!getLexer().is(AsmToken::EndOfStatement)) {
465  // Attempt to parse token as operator
466  if (parseOperandAsOperator(Operands) == MatchOperand_Success)
467  continue;
468 
469  // Attempt to parse token as register
470  if (parseRegister(Operands) == MatchOperand_Success)
471  continue;
472 
473  // Attempt to parse token as an immediate
474  if (parseImmediate(Operands) != MatchOperand_Success) {
475  SMLoc Loc = getLexer().getLoc();
476  return Error(Loc, "unexpected token");
477  }
478  }
479 
480  if (getLexer().isNot(AsmToken::EndOfStatement)) {
481  SMLoc Loc = getLexer().getLoc();
482 
483  getParser().eatToEndOfStatement();
484 
485  return Error(Loc, "unexpected token");
486  }
487 
488  // Consume the EndOfStatement.
489  getParser().Lex();
490  return false;
491 }
492 
493 bool BPFAsmParser::ParseDirective(AsmToken DirectiveID) { return true; }
494 
495 extern "C" void LLVMInitializeBPFAsmParser() {
499 }
static bool isReg(const MCInst &MI, unsigned OpNo)
static GCMetadataPrinterRegistry::Add< ErlangGCPrinter > X("erlang", "erlang-compatible garbage collector")
This class represents lattice values for constants.
Definition: AllocatorList.h:23
static unsigned MatchRegisterName(StringRef Name)
Generic assembler parser interface, for use by target specific assembly parsers.
Definition: MCAsmParser.h:109
amdgpu Simplify well known AMD library false FunctionCallee Value const Twine & Name
static MCOperand createExpr(const MCExpr *Val)
Definition: MCInst.h:136
MCTargetAsmParser - Generic interface to target specific assembly parsers.
void push_back(const T &Elt)
Definition: SmallVector.h:211
unsigned Reg
virtual void EmitInstruction(const MCInst &Inst, const MCSubtargetInfo &STI)
Emit the given Instruction into the current section.
Definition: MCStreamer.cpp:955
StringSwitch & Case(StringLiteral S, T Value)
Definition: StringSwitch.h:67
static GCMetadataPrinterRegistry::Add< OcamlGCMetadataPrinter > Y("ocaml", "ocaml 3.10-compatible collector")
static MCOperand createReg(unsigned Reg)
Definition: MCInst.h:115
const FeatureBitset & getFeatureBits() const
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: APFloat.h:41
Base class for the full range of assembler expressions which are needed for parsing.
Definition: MCExpr.h:35
LLVM_NODISCARD R Default(T Value)
Definition: StringSwitch.h:181
Target independent representation for an assembler token.
Definition: MCAsmMacro.h:21
static bool isMem(const MachineInstr &MI, unsigned Op)
Definition: X86InstrInfo.h:160
zlib-gnu style compression
MCParsedAsmOperand - This abstract class represents a source-level assembly instruction operand...
std::pair< StringRef, StringRef > getToken(StringRef Source, StringRef Delimiters=" \\\)
getToken - This function extracts one token from source, ignoring any leading characters that appear ...
RegisterMCAsmParser - Helper template for registering a target specific assembly parser, for use in the target machine initialization function.
SMLoc getLoc() const
Definition: MCAsmLexer.cpp:27
Instances of this class represent a single low-level machine instruction.
Definition: MCInst.h:158
Analysis containing CSE Info
Definition: CSEInfo.cpp:20
const char * getPointer() const
Definition: SMLoc.h:34
A switch()-like statement whose cases are string literals.
Definition: StringSwitch.h:42
Streaming machine code generation interface.
Definition: MCStreamer.h:188
SMLoc getEndLoc() const
Definition: MCAsmLexer.cpp:31
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
Interface to description of machine instruction set.
Definition: MCInstrInfo.h:23
static void print(raw_ostream &Out, object::Archive::Kind Kind, T Val)
size_t size() const
Definition: SmallVector.h:52
void setLoc(SMLoc loc)
Definition: MCInst.h:176
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
void LLVMInitializeBPFAsmParser()
Promote Memory to Register
Definition: Mem2Reg.cpp:109
static unsigned getReg(const void *D, unsigned RC, unsigned RegNo)
Base class for user error types.
Definition: Error.h:344
static SMLoc getFromPointer(const char *Ptr)
Definition: SMLoc.h:36
#define N
Generic base class for all target subtargets.
LLVM_NODISCARD std::enable_if<!is_simple_type< Y >::value, typename cast_retty< X, const Y >::ret_type >::type dyn_cast(const Y &Val)
Definition: Casting.h:322
const unsigned Kind
LLVM_NODISCARD std::string lower() const
Definition: StringRef.cpp:107
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
Target & getTheBPFleTarget()
#define LLVM_FALLTHROUGH
LLVM_FALLTHROUGH - Mark fallthrough cases in switch statements.
Definition: Compiler.h:250
This class implements an extremely fast bulk output stream that can only output to a stream...
Definition: raw_ostream.h:45
void addOperand(const MCOperand &Op)
Definition: MCInst.h:183
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:48
Target & getTheBPFbeTarget()
Represents a location in source code.
Definition: SMLoc.h:23
static MCOperand createImm(int64_t Val)
Definition: MCInst.h:122
Target & getTheBPFTarget()
bool parseImmediate(MCInst &MI, uint64_t &Size, ArrayRef< uint8_t > Bytes)