LLVM  9.0.0svn
MCTargetAsmParser.h
Go to the documentation of this file.
1 //===- llvm/MC/MCTargetAsmParser.h - Target Assembly Parser -----*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #ifndef LLVM_MC_MCPARSER_MCTARGETASMPARSER_H
10 #define LLVM_MC_MCPARSER_MCTARGETASMPARSER_H
11 
12 #include "llvm/ADT/StringRef.h"
13 #include "llvm/MC/MCExpr.h"
14 #include "llvm/MC/MCInstrInfo.h"
19 #include "llvm/Support/SMLoc.h"
20 #include <cstdint>
21 #include <memory>
22 
23 namespace llvm {
24 
25 class MCInst;
26 class MCParsedAsmOperand;
27 class MCStreamer;
28 class MCSubtargetInfo;
29 template <typename T> class SmallVectorImpl;
30 
32 
34  AOK_Align, // Rewrite align as .align.
35  AOK_EVEN, // Rewrite even as .even.
36  AOK_Emit, // Rewrite _emit as .byte.
37  AOK_Input, // Rewrite in terms of $N.
38  AOK_Output, // Rewrite in terms of $N.
39  AOK_SizeDirective, // Add a sizing directive (e.g., dword ptr).
40  AOK_Label, // Rewrite local labels.
41  AOK_EndOfStatement, // Add EndOfStatement (e.g., "\n\t").
42  AOK_Skip, // Skip emission (e.g., offset/type operators).
43  AOK_IntelExpr // SizeDirective SymDisp [BaseReg + IndexReg * Scale + ImmDisp]
44 };
45 
46 const char AsmRewritePrecedence [] = {
47  2, // AOK_Align
48  2, // AOK_EVEN
49  2, // AOK_Emit
50  3, // AOK_Input
51  3, // AOK_Output
52  5, // AOK_SizeDirective
53  1, // AOK_Label
54  5, // AOK_EndOfStatement
55  2, // AOK_Skip
56  2 // AOK_IntelExpr
57 };
58 
59 // Represnt the various parts which makes up an intel expression,
60 // used for emitting compound intel expressions
61 struct IntelExpr {
62  bool NeedBracs;
63  int64_t Imm;
66  unsigned Scale;
67 
68  IntelExpr(bool needBracs = false) : NeedBracs(needBracs), Imm(0),
69  BaseReg(StringRef()), IndexReg(StringRef()),
70  Scale(1) {}
71  // Compund immediate expression
72  IntelExpr(int64_t imm, bool needBracs) : IntelExpr(needBracs) {
73  Imm = imm;
74  }
75  // [Reg + ImmediateExpression]
76  // We don't bother to emit an immediate expression evaluated to zero
77  IntelExpr(StringRef reg, int64_t imm = 0, unsigned scale = 0,
78  bool needBracs = true) :
79  IntelExpr(imm, needBracs) {
80  IndexReg = reg;
81  if (scale)
82  Scale = scale;
83  }
84  // [BaseReg + IndexReg * ScaleExpression + ImmediateExpression]
85  IntelExpr(StringRef baseReg, StringRef indexReg, unsigned scale = 0,
86  int64_t imm = 0, bool needBracs = true) :
87  IntelExpr(indexReg, imm, scale, needBracs) {
88  BaseReg = baseReg;
89  }
90  bool hasBaseReg() const {
91  return BaseReg.size();
92  }
93  bool hasIndexReg() const {
94  return IndexReg.size();
95  }
96  bool hasRegs() const {
97  return hasBaseReg() || hasIndexReg();
98  }
99  bool isValid() const {
100  return (Scale == 1) ||
101  (hasIndexReg() && (Scale == 2 || Scale == 4 || Scale == 8));
102  }
103 };
104 
105 struct AsmRewrite {
108  unsigned Len;
109  int64_t Val;
112 
113 public:
114  AsmRewrite(AsmRewriteKind kind, SMLoc loc, unsigned len = 0, int64_t val = 0)
115  : Kind(kind), Loc(loc), Len(len), Val(val) {}
116  AsmRewrite(AsmRewriteKind kind, SMLoc loc, unsigned len, StringRef label)
117  : AsmRewrite(kind, loc, len) { Label = label; }
118  AsmRewrite(SMLoc loc, unsigned len, IntelExpr exp)
119  : AsmRewrite(AOK_IntelExpr, loc, len) { IntelExp = exp; }
120 };
121 
123  SmallVectorImpl<AsmRewrite> *AsmRewrites = nullptr;
124 
125  ParseInstructionInfo() = default;
127  : AsmRewrites(rewrites) {}
128 };
129 
131  MatchOperand_Success, // operand matched successfully
132  MatchOperand_NoMatch, // operand did not match
133  MatchOperand_ParseFail // operand matched but had errors
134 };
135 
137  Match,
138  NearMatch,
139  NoMatch,
140 };
141 
142 // When an operand is parsed, the assembler will try to iterate through a set of
143 // possible operand classes that the operand might match and call the
144 // corresponding PredicateMethod to determine that.
145 //
146 // If there are two AsmOperands that would give a specific diagnostic if there
147 // is no match, there is currently no mechanism to distinguish which operand is
148 // a closer match. The DiagnosticPredicate distinguishes between 'completely
149 // no match' and 'near match', so the assembler can decide whether to give a
150 // specific diagnostic, or use 'InvalidOperand' and continue to find a
151 // 'better matching' diagnostic.
152 //
153 // For example:
154 // opcode opnd0, onpd1, opnd2
155 //
156 // where:
157 // opnd2 could be an 'immediate of range [-8, 7]'
158 // opnd2 could be a 'register + shift/extend'.
159 //
160 // If opnd2 is a valid register, but with a wrong shift/extend suffix, it makes
161 // little sense to give a diagnostic that the operand should be an immediate
162 // in range [-8, 7].
163 //
164 // This is a light-weight alternative to the 'NearMissInfo' approach
165 // below which collects *all* possible diagnostics. This alternative
166 // is optional and fully backward compatible with existing
167 // PredicateMethods that return a 'bool' (match or no match).
170 
171  explicit DiagnosticPredicate(bool Match)
172  : Type(Match ? DiagnosticPredicateTy::Match
175  DiagnosticPredicate(const DiagnosticPredicate &) = default;
176 
177  operator bool() const { return Type == DiagnosticPredicateTy::Match; }
178  bool isMatch() const { return Type == DiagnosticPredicateTy::Match; }
179  bool isNearMatch() const { return Type == DiagnosticPredicateTy::NearMatch; }
180  bool isNoMatch() const { return Type == DiagnosticPredicateTy::NoMatch; }
181 };
182 
183 // When matching of an assembly instruction fails, there may be multiple
184 // encodings that are close to being a match. It's often ambiguous which one
185 // the programmer intended to use, so we want to report an error which mentions
186 // each of these "near-miss" encodings. This struct contains information about
187 // one such encoding, and why it did not match the parsed instruction.
189 public:
196  };
197 
198  // The encoding is valid for the parsed assembly string. This is only used
199  // internally to the table-generated assembly matcher.
200  static NearMissInfo getSuccess() { return NearMissInfo(); }
201 
202  // The instruction encoding is not valid because it requires some target
203  // features that are not currently enabled. MissingFeatures has a bit set for
204  // each feature that the encoding needs but which is not enabled.
205  static NearMissInfo getMissedFeature(uint64_t MissingFeatures) {
206  NearMissInfo Result;
207  Result.Kind = NearMissFeature;
208  Result.Features = MissingFeatures;
209  return Result;
210  }
211 
212  // The instruction encoding is not valid because the target-specific
213  // predicate function returned an error code. FailureCode is the
214  // target-specific error code returned by the predicate.
215  static NearMissInfo getMissedPredicate(unsigned FailureCode) {
216  NearMissInfo Result;
217  Result.Kind = NearMissPredicate;
218  Result.PredicateError = FailureCode;
219  return Result;
220  }
221 
222  // The instruction encoding is not valid because one (and only one) parsed
223  // operand is not of the correct type. OperandError is the error code
224  // relating to the operand class expected by the encoding. OperandClass is
225  // the type of the expected operand. Opcode is the opcode of the encoding.
226  // OperandIndex is the index into the parsed operand list.
227  static NearMissInfo getMissedOperand(unsigned OperandError,
228  unsigned OperandClass, unsigned Opcode,
229  unsigned OperandIndex) {
230  NearMissInfo Result;
231  Result.Kind = NearMissOperand;
232  Result.MissedOperand.Error = OperandError;
233  Result.MissedOperand.Class = OperandClass;
234  Result.MissedOperand.Opcode = Opcode;
235  Result.MissedOperand.Index = OperandIndex;
236  return Result;
237  }
238 
239  // The instruction encoding is not valid because it expects more operands
240  // than were parsed. OperandClass is the class of the expected operand that
241  // was not provided. Opcode is the instruction encoding.
242  static NearMissInfo getTooFewOperands(unsigned OperandClass,
243  unsigned Opcode) {
244  NearMissInfo Result;
245  Result.Kind = NearMissTooFewOperands;
246  Result.TooFewOperands.Class = OperandClass;
247  Result.TooFewOperands.Opcode = Opcode;
248  return Result;
249  }
250 
251  operator bool() const { return Kind != NoNearMiss; }
252 
253  NearMissKind getKind() const { return Kind; }
254 
255  // Feature flags required by the instruction, that the current target does
256  // not have.
257  uint64_t getFeatures() const {
258  assert(Kind == NearMissFeature);
259  return Features;
260  }
261  // Error code returned by the target predicate when validating this
262  // instruction encoding.
263  unsigned getPredicateError() const {
264  assert(Kind == NearMissPredicate);
265  return PredicateError;
266  }
267  // MatchClassKind of the operand that we expected to see.
268  unsigned getOperandClass() const {
269  assert(Kind == NearMissOperand || Kind == NearMissTooFewOperands);
270  return MissedOperand.Class;
271  }
272  // Opcode of the encoding we were trying to match.
273  unsigned getOpcode() const {
274  assert(Kind == NearMissOperand || Kind == NearMissTooFewOperands);
275  return MissedOperand.Opcode;
276  }
277  // Error code returned when validating the operand.
278  unsigned getOperandError() const {
279  assert(Kind == NearMissOperand);
280  return MissedOperand.Error;
281  }
282  // Index of the actual operand we were trying to match in the list of parsed
283  // operands.
284  unsigned getOperandIndex() const {
285  assert(Kind == NearMissOperand);
286  return MissedOperand.Index;
287  }
288 
289 private:
291 
292  // These two structs share a common prefix, so we can safely rely on the fact
293  // that they overlap in the union.
294  struct MissedOpInfo {
295  unsigned Class;
296  unsigned Opcode;
297  unsigned Error;
298  unsigned Index;
299  };
300 
301  struct TooFewOperandsInfo {
302  unsigned Class;
303  unsigned Opcode;
304  };
305 
306  union {
307  uint64_t Features;
308  unsigned PredicateError;
309  MissedOpInfo MissedOperand;
310  TooFewOperandsInfo TooFewOperands;
311  };
312 
313  NearMissInfo() : Kind(NoNearMiss) {}
314 };
315 
316 /// MCTargetAsmParser - Generic interface to target specific assembly parsers.
318 public:
326  FIRST_TARGET_MATCH_RESULT_TY
327  };
328 
329 protected: // Can only create subclasses.
331  const MCInstrInfo &MII);
332 
333  /// Create a copy of STI and return a non-const reference to it.
334  MCSubtargetInfo &copySTI();
335 
336  /// AvailableFeatures - The current set of available features.
337  uint64_t AvailableFeatures = 0;
338 
339  /// ParsingInlineAsm - Are we parsing ms-style inline assembly?
340  bool ParsingInlineAsm = false;
341 
342  /// SemaCallback - The Sema callback implementation. Must be set when parsing
343  /// ms-style inline assembly.
345 
346  /// Set of options which affects instrumentation of inline assembly.
348 
349  /// Current STI.
351 
352  const MCInstrInfo &MII;
353 
354 public:
355  MCTargetAsmParser(const MCTargetAsmParser &) = delete;
356  MCTargetAsmParser &operator=(const MCTargetAsmParser &) = delete;
357 
358  ~MCTargetAsmParser() override;
359 
360  const MCSubtargetInfo &getSTI() const;
361 
362  uint64_t getAvailableFeatures() const { return AvailableFeatures; }
363  void setAvailableFeatures(uint64_t Value) { AvailableFeatures = Value; }
364 
365  bool isParsingInlineAsm () { return ParsingInlineAsm; }
366  void setParsingInlineAsm (bool Value) { ParsingInlineAsm = Value; }
367 
368  MCTargetOptions getTargetOptions() const { return MCOptions; }
369 
371  SemaCallback = Callback;
372  }
373 
374  // Target-specific parsing of expression.
375  virtual bool parsePrimaryExpr(const MCExpr *&Res, SMLoc &EndLoc) {
376  return getParser().parsePrimaryExpr(Res, EndLoc);
377  }
378 
379  virtual bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc,
380  SMLoc &EndLoc) = 0;
381 
382  /// Sets frame register corresponding to the current MachineFunction.
383  virtual void SetFrameRegister(unsigned RegNo) {}
384 
385  /// ParseInstruction - Parse one assembly instruction.
386  ///
387  /// The parser is positioned following the instruction name. The target
388  /// specific instruction parser should parse the entire instruction and
389  /// construct the appropriate MCInst, or emit an error. On success, the entire
390  /// line should be parsed up to and including the end-of-statement token. On
391  /// failure, the parser is not required to read to the end of the line.
392  //
393  /// \param Name - The instruction name.
394  /// \param NameLoc - The source location of the name.
395  /// \param Operands [out] - The list of parsed operands, this returns
396  /// ownership of them to the caller.
397  /// \return True on failure.
398  virtual bool ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
399  SMLoc NameLoc, OperandVector &Operands) = 0;
401  AsmToken Token, OperandVector &Operands) {
402  return ParseInstruction(Info, Name, Token.getLoc(), Operands);
403  }
404 
405  /// ParseDirective - Parse a target specific assembler directive
406  ///
407  /// The parser is positioned following the directive name. The target
408  /// specific directive parser should parse the entire directive doing or
409  /// recording any target specific work, or return true and do nothing if the
410  /// directive is not target specific. If the directive is specific for
411  /// the target, the entire line is parsed up to and including the
412  /// end-of-statement token and false is returned.
413  ///
414  /// \param DirectiveID - the identifier token of the directive.
415  virtual bool ParseDirective(AsmToken DirectiveID) = 0;
416 
417  /// MatchAndEmitInstruction - Recognize a series of operands of a parsed
418  /// instruction as an actual MCInst and emit it to the specified MCStreamer.
419  /// This returns false on success and returns true on failure to match.
420  ///
421  /// On failure, the target parser is responsible for emitting a diagnostic
422  /// explaining the match failure.
423  virtual bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
424  OperandVector &Operands, MCStreamer &Out,
425  uint64_t &ErrorInfo,
426  bool MatchingInlineAsm) = 0;
427 
428  /// Allows targets to let registers opt out of clobber lists.
429  virtual bool OmitRegisterFromClobberLists(unsigned RegNo) { return false; }
430 
431  /// Allow a target to add special case operand matching for things that
432  /// tblgen doesn't/can't handle effectively. For example, literal
433  /// immediates on ARM. TableGen expects a token operand, but the parser
434  /// will recognize them as immediates.
436  unsigned Kind) {
437  return Match_InvalidOperand;
438  }
439 
440  /// Validate the instruction match against any complex target predicates
441  /// before rendering any operands to it.
442  virtual unsigned
444  return Match_Success;
445  }
446 
447  /// checkTargetMatchPredicate - Validate the instruction match against
448  /// any complex target predicates not expressible via match classes.
449  virtual unsigned checkTargetMatchPredicate(MCInst &Inst) {
450  return Match_Success;
451  }
452 
453  virtual void convertToMapAndConstraints(unsigned Kind,
454  const OperandVector &Operands) = 0;
455 
456  /// Returns whether two registers are equal and is used by the tied-operands
457  /// checks in the AsmMatcher. This method can be overridden allow e.g. a
458  /// sub- or super-register as the tied operand.
459  virtual bool regsEqual(const MCParsedAsmOperand &Op1,
460  const MCParsedAsmOperand &Op2) const {
461  assert(Op1.isReg() && Op2.isReg() && "Operands not all regs");
462  return Op1.getReg() == Op2.getReg();
463  }
464 
465  // Return whether this parser uses assignment statements with equals tokens
466  virtual bool equalIsAsmAssignment() { return true; };
467  // Return whether this start of statement identifier is a label
468  virtual bool isLabel(AsmToken &Token) { return true; };
469  // Return whether this parser accept star as start of statement
470  virtual bool starIsStartOfStatement() { return false; };
471 
472  virtual const MCExpr *applyModifierToExpr(const MCExpr *E,
474  MCContext &Ctx) {
475  return nullptr;
476  }
477 
478  // For actions that have to be performed before a label is emitted
480 
481  virtual void onLabelParsed(MCSymbol *Symbol) {}
482 
483  /// Ensure that all previously parsed instructions have been emitted to the
484  /// output streamer, if the target does not emit them immediately.
485  virtual void flushPendingInstructions(MCStreamer &Out) {}
486 
487  virtual const MCExpr *createTargetUnaryExpr(const MCExpr *E,
488  AsmToken::TokenKind OperatorToken,
489  MCContext &Ctx) {
490  return nullptr;
491  }
492 
493  // For any checks or cleanups at the end of parsing.
494  virtual void onEndOfFile() {}
495 };
496 
497 } // end namespace llvm
498 
499 #endif // LLVM_MC_MCPARSER_MCTARGETASMPARSER_H
virtual bool isLabel(AsmToken &Token)
bool hasIndexReg() const
MCAsmParserSemaCallback * SemaCallback
SemaCallback - The Sema callback implementation.
This class represents lattice values for constants.
Definition: AllocatorList.h:23
MCSymbol - Instances of this class represent a symbol name in the MC file, and MCSymbols are created ...
Definition: MCSymbol.h:41
virtual unsigned checkEarlyTargetMatchPredicate(MCInst &Inst, const OperandVector &Operands)
Validate the instruction match against any complex target predicates before rendering any operands to...
IntelExpr(StringRef reg, int64_t imm=0, unsigned scale=0, bool needBracs=true)
virtual bool regsEqual(const MCParsedAsmOperand &Op1, const MCParsedAsmOperand &Op2) const
Returns whether two registers are equal and is used by the tied-operands checks in the AsmMatcher...
MCTargetAsmParser - Generic interface to target specific assembly parsers.
LLVM_NODISCARD LLVM_ATTRIBUTE_ALWAYS_INLINE size_t size() const
size - Get the string size.
Definition: StringRef.h:137
const MCSubtargetInfo * STI
Current STI.
const FeatureBitset Features
uint64_t getAvailableFeatures() const
static NearMissInfo getMissedOperand(unsigned OperandError, unsigned OperandClass, unsigned Opcode, unsigned OperandIndex)
ParseInstructionInfo(SmallVectorImpl< AsmRewrite > *rewrites)
virtual void onLabelParsed(MCSymbol *Symbol)
unsigned getOperandClass() const
AsmRewrite(AsmRewriteKind kind, SMLoc loc, unsigned len, StringRef label)
MCTargetOptions MCOptions
Set of options which affects instrumentation of inline assembly.
bool hasRegs() const
amdgpu Simplify well known AMD library false Value Value const Twine & Name
unsigned getOpcode() const
IntelExpr(StringRef baseReg, StringRef indexReg, unsigned scale=0, int64_t imm=0, bool needBracs=true)
virtual unsigned validateTargetOperandClass(MCParsedAsmOperand &Op, unsigned Kind)
Allow a target to add special case operand matching for things that tblgen doesn&#39;t/can&#39;t handle effec...
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: APFloat.h:41
Base class for the full range of assembler expressions which are needed for parsing.
Definition: MCExpr.h:35
DiagnosticPredicate(DiagnosticPredicateTy T)
Target independent representation for an assembler token.
Definition: MCAsmMacro.h:21
IntelExpr(bool needBracs=false)
MCParsedAsmOperand - This abstract class represents a source-level assembly instruction operand...
static NearMissInfo getTooFewOperands(unsigned OperandClass, unsigned Opcode)
Context object for machine code objects.
Definition: MCContext.h:62
unsigned getPredicateError() const
SMLoc getLoc() const
Definition: MCAsmLexer.cpp:27
Instances of this class represent a single low-level machine instruction.
Definition: MCInst.h:158
Analysis containing CSE Info
Definition: CSEInfo.cpp:20
virtual void SetFrameRegister(unsigned RegNo)
Sets frame register corresponding to the current MachineFunction.
uint64_t getFeatures() const
Streaming machine code generation interface.
Definition: MCStreamer.h:188
virtual bool OmitRegisterFromClobberLists(unsigned RegNo)
Allows targets to let registers opt out of clobber lists.
MissedOpInfo MissedOperand
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
static NearMissInfo getMissedPredicate(unsigned FailureCode)
void setSemaCallback(MCAsmParserSemaCallback *Callback)
Interface to description of machine instruction set.
Definition: MCInstrInfo.h:23
bool hasBaseReg() const
virtual unsigned checkTargetMatchPredicate(MCInst &Inst)
checkTargetMatchPredicate - Validate the instruction match against any complex target predicates not ...
void setParsingInlineAsm(bool Value)
static uint64_t scale(uint64_t Num, uint32_t N, uint32_t D)
Generic Sema callback for assembly parser.
Definition: MCAsmParser.h:94
static NearMissInfo getMissedFeature(uint64_t MissingFeatures)
virtual bool starIsStartOfStatement()
virtual unsigned getReg() const =0
virtual bool ParseInstruction(ParseInstructionInfo &Info, StringRef Name, AsmToken Token, OperandVector &Operands)
virtual bool equalIsAsmAssignment()
IntelExpr(int64_t imm, bool needBracs)
virtual const MCExpr * createTargetUnaryExpr(const MCExpr *E, AsmToken::TokenKind OperatorToken, MCContext &Ctx)
const MCInstrInfo & MII
DiagnosticPredicateTy Type
static NearMissInfo getSuccess()
AsmRewrite(AsmRewriteKind kind, SMLoc loc, unsigned len=0, int64_t val=0)
Base class for user error types.
Definition: Error.h:344
virtual bool parsePrimaryExpr(const MCExpr *&Res, SMLoc &EndLoc)
void setAvailableFeatures(uint64_t Value)
bool isValid() const
Generic base class for all target subtargets.
unsigned getOperandError() const
virtual const MCExpr * applyModifierToExpr(const MCExpr *E, MCSymbolRefExpr::VariantKind, MCContext &Ctx)
const unsigned Kind
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
NearMissKind getKind() const
unsigned getOperandIndex() const
virtual void flushPendingInstructions(MCStreamer &Out)
Ensure that all previously parsed instructions have been emitted to the output streamer, if the target does not emit them immediately.
Generic interface for extending the MCAsmParser, which is implemented by target and object file assem...
const char AsmRewritePrecedence[]
TooFewOperandsInfo TooFewOperands
LLVM Value Representation.
Definition: Value.h:72
virtual void doBeforeLabelEmit(MCSymbol *Symbol)
AsmRewriteKind Kind
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:48
AsmRewrite(SMLoc loc, unsigned len, IntelExpr exp)
Represents a location in source code.
Definition: SMLoc.h:23
MCTargetOptions getTargetOptions() const
virtual bool isReg() const =0
isReg - Is this a register operand?