Line data Source code
1 : //===- llvm/MC/MCTargetAsmParser.h - Target Assembly Parser -----*- C++ -*-===//
2 : //
3 : // The LLVM Compiler Infrastructure
4 : //
5 : // This file is distributed under the University of Illinois Open Source
6 : // License. See LICENSE.TXT for details.
7 : //
8 : //===----------------------------------------------------------------------===//
9 :
10 : #ifndef LLVM_MC_MCPARSER_MCTARGETASMPARSER_H
11 : #define LLVM_MC_MCPARSER_MCTARGETASMPARSER_H
12 :
13 : #include "llvm/ADT/StringRef.h"
14 : #include "llvm/MC/MCExpr.h"
15 : #include "llvm/MC/MCInstrInfo.h"
16 : #include "llvm/MC/MCParser/MCAsmLexer.h"
17 : #include "llvm/MC/MCParser/MCParsedAsmOperand.h"
18 : #include "llvm/MC/MCParser/MCAsmParserExtension.h"
19 : #include "llvm/MC/MCTargetOptions.h"
20 : #include "llvm/Support/SMLoc.h"
21 : #include <cstdint>
22 : #include <memory>
23 :
24 : namespace llvm {
25 :
26 : class MCInst;
27 : class MCParsedAsmOperand;
28 : class MCStreamer;
29 : class MCSubtargetInfo;
30 : template <typename T> class SmallVectorImpl;
31 :
32 : using OperandVector = SmallVectorImpl<std::unique_ptr<MCParsedAsmOperand>>;
33 :
34 : enum AsmRewriteKind {
35 : AOK_Align, // Rewrite align as .align.
36 : AOK_EVEN, // Rewrite even as .even.
37 : AOK_Emit, // Rewrite _emit as .byte.
38 : AOK_Input, // Rewrite in terms of $N.
39 : AOK_Output, // Rewrite in terms of $N.
40 : AOK_SizeDirective, // Add a sizing directive (e.g., dword ptr).
41 : AOK_Label, // Rewrite local labels.
42 : AOK_EndOfStatement, // Add EndOfStatement (e.g., "\n\t").
43 : AOK_Skip, // Skip emission (e.g., offset/type operators).
44 : AOK_IntelExpr // SizeDirective SymDisp [BaseReg + IndexReg * Scale + ImmDisp]
45 : };
46 :
47 : const char AsmRewritePrecedence [] = {
48 : 2, // AOK_Align
49 : 2, // AOK_EVEN
50 : 2, // AOK_Emit
51 : 3, // AOK_Input
52 : 3, // AOK_Output
53 : 5, // AOK_SizeDirective
54 : 1, // AOK_Label
55 : 5, // AOK_EndOfStatement
56 : 2, // AOK_Skip
57 : 2 // AOK_IntelExpr
58 : };
59 :
60 : // Represnt the various parts which makes up an intel expression,
61 : // used for emitting compound intel expressions
62 : struct IntelExpr {
63 : bool NeedBracs;
64 : int64_t Imm;
65 : StringRef BaseReg;
66 : StringRef IndexReg;
67 : unsigned Scale;
68 :
69 541 : IntelExpr(bool needBracs = false) : NeedBracs(needBracs), Imm(0),
70 : BaseReg(StringRef()), IndexReg(StringRef()),
71 339 : Scale(1) {}
72 : // Compund immediate expression
73 : IntelExpr(int64_t imm, bool needBracs) : IntelExpr(needBracs) {
74 0 : Imm = imm;
75 : }
76 : // [Reg + ImmediateExpression]
77 : // We don't bother to emit an immediate expression evaluated to zero
78 : IntelExpr(StringRef reg, int64_t imm = 0, unsigned scale = 0,
79 : bool needBracs = true) :
80 : IntelExpr(imm, needBracs) {
81 0 : IndexReg = reg;
82 0 : if (scale)
83 0 : Scale = scale;
84 : }
85 : // [BaseReg + IndexReg * ScaleExpression + ImmediateExpression]
86 : IntelExpr(StringRef baseReg, StringRef indexReg, unsigned scale = 0,
87 : int64_t imm = 0, bool needBracs = true) :
88 : IntelExpr(indexReg, imm, scale, needBracs) {
89 0 : BaseReg = baseReg;
90 : }
91 : bool hasBaseReg() const {
92 : return BaseReg.size();
93 : }
94 : bool hasIndexReg() const {
95 : return IndexReg.size();
96 : }
97 : bool hasRegs() const {
98 201 : return hasBaseReg() || hasIndexReg();
99 : }
100 : bool isValid() const {
101 : return (Scale == 1) ||
102 : (hasIndexReg() && (Scale == 2 || Scale == 4 || Scale == 8));
103 : }
104 : };
105 :
106 : struct AsmRewrite {
107 : AsmRewriteKind Kind;
108 : SMLoc Loc;
109 : unsigned Len;
110 : int64_t Val;
111 : StringRef Label;
112 : IntelExpr IntelExp;
113 :
114 : public:
115 : AsmRewrite(AsmRewriteKind kind, SMLoc loc, unsigned len = 0, int64_t val = 0)
116 541 : : Kind(kind), Loc(loc), Len(len), Val(val) {}
117 : AsmRewrite(AsmRewriteKind kind, SMLoc loc, unsigned len, StringRef label)
118 43 : : AsmRewrite(kind, loc, len) { Label = label; }
119 : AsmRewrite(SMLoc loc, unsigned len, IntelExpr exp)
120 202 : : AsmRewrite(AOK_IntelExpr, loc, len) { IntelExp = exp; }
121 : };
122 :
123 : struct ParseInstructionInfo {
124 : SmallVectorImpl<AsmRewrite> *AsmRewrites = nullptr;
125 :
126 : ParseInstructionInfo() = default;
127 : ParseInstructionInfo(SmallVectorImpl<AsmRewrite> *rewrites)
128 455867 : : AsmRewrites(rewrites) {}
129 : };
130 :
131 : enum OperandMatchResultTy {
132 : MatchOperand_Success, // operand matched successfully
133 : MatchOperand_NoMatch, // operand did not match
134 : MatchOperand_ParseFail // operand matched but had errors
135 : };
136 :
137 : enum class DiagnosticPredicateTy {
138 : Match,
139 : NearMatch,
140 : NoMatch,
141 : };
142 :
143 : // When an operand is parsed, the assembler will try to iterate through a set of
144 : // possible operand classes that the operand might match and call the
145 : // corresponding PredicateMethod to determine that.
146 : //
147 : // If there are two AsmOperands that would give a specific diagnostic if there
148 : // is no match, there is currently no mechanism to distinguish which operand is
149 : // a closer match. The DiagnosticPredicate distinguishes between 'completely
150 : // no match' and 'near match', so the assembler can decide whether to give a
151 : // specific diagnostic, or use 'InvalidOperand' and continue to find a
152 : // 'better matching' diagnostic.
153 : //
154 : // For example:
155 : // opcode opnd0, onpd1, opnd2
156 : //
157 : // where:
158 : // opnd2 could be an 'immediate of range [-8, 7]'
159 : // opnd2 could be a 'register + shift/extend'.
160 : //
161 : // If opnd2 is a valid register, but with a wrong shift/extend suffix, it makes
162 : // little sense to give a diagnostic that the operand should be an immediate
163 : // in range [-8, 7].
164 : //
165 : // This is a light-weight alternative to the 'NearMissInfo' approach
166 : // below which collects *all* possible diagnostics. This alternative
167 : // is optional and fully backward compatible with existing
168 : // PredicateMethods that return a 'bool' (match or no match).
169 : struct DiagnosticPredicate {
170 : DiagnosticPredicateTy Type;
171 :
172 : explicit DiagnosticPredicate(bool Match)
173 1053220 : : Type(Match ? DiagnosticPredicateTy::Match
174 : : DiagnosticPredicateTy::NearMatch) {}
175 : DiagnosticPredicate(DiagnosticPredicateTy T) : Type(T) {}
176 : DiagnosticPredicate(const DiagnosticPredicate &) = default;
177 :
178 0 : operator bool() const { return Type == DiagnosticPredicateTy::Match; }
179 0 : bool isMatch() const { return Type == DiagnosticPredicateTy::Match; }
180 0 : bool isNearMatch() const { return Type == DiagnosticPredicateTy::NearMatch; }
181 : bool isNoMatch() const { return Type == DiagnosticPredicateTy::NoMatch; }
182 : };
183 :
184 : // When matching of an assembly instruction fails, there may be multiple
185 : // encodings that are close to being a match. It's often ambiguous which one
186 : // the programmer intended to use, so we want to report an error which mentions
187 : // each of these "near-miss" encodings. This struct contains information about
188 : // one such encoding, and why it did not match the parsed instruction.
189 : class NearMissInfo {
190 : public:
191 : enum NearMissKind {
192 : NoNearMiss,
193 : NearMissOperand,
194 : NearMissFeature,
195 : NearMissPredicate,
196 : NearMissTooFewOperands,
197 : };
198 :
199 : // The encoding is valid for the parsed assembly string. This is only used
200 : // internally to the table-generated assembly matcher.
201 : static NearMissInfo getSuccess() { return NearMissInfo(); }
202 :
203 : // The instruction encoding is not valid because it requires some target
204 : // features that are not currently enabled. MissingFeatures has a bit set for
205 : // each feature that the encoding needs but which is not enabled.
206 : static NearMissInfo getMissedFeature(uint64_t MissingFeatures) {
207 : NearMissInfo Result;
208 : Result.Kind = NearMissFeature;
209 : Result.Features = MissingFeatures;
210 : return Result;
211 : }
212 :
213 : // The instruction encoding is not valid because the target-specific
214 : // predicate function returned an error code. FailureCode is the
215 : // target-specific error code returned by the predicate.
216 : static NearMissInfo getMissedPredicate(unsigned FailureCode) {
217 : NearMissInfo Result;
218 : Result.Kind = NearMissPredicate;
219 : Result.PredicateError = FailureCode;
220 : return Result;
221 : }
222 :
223 : // The instruction encoding is not valid because one (and only one) parsed
224 : // operand is not of the correct type. OperandError is the error code
225 : // relating to the operand class expected by the encoding. OperandClass is
226 : // the type of the expected operand. Opcode is the opcode of the encoding.
227 : // OperandIndex is the index into the parsed operand list.
228 : static NearMissInfo getMissedOperand(unsigned OperandError,
229 : unsigned OperandClass, unsigned Opcode,
230 : unsigned OperandIndex) {
231 : NearMissInfo Result;
232 : Result.Kind = NearMissOperand;
233 : Result.MissedOperand.Error = OperandError;
234 : Result.MissedOperand.Class = OperandClass;
235 : Result.MissedOperand.Opcode = Opcode;
236 : Result.MissedOperand.Index = OperandIndex;
237 : return Result;
238 : }
239 :
240 : // The instruction encoding is not valid because it expects more operands
241 : // than were parsed. OperandClass is the class of the expected operand that
242 : // was not provided. Opcode is the instruction encoding.
243 : static NearMissInfo getTooFewOperands(unsigned OperandClass,
244 : unsigned Opcode) {
245 : NearMissInfo Result;
246 : Result.Kind = NearMissTooFewOperands;
247 : Result.TooFewOperands.Class = OperandClass;
248 : Result.TooFewOperands.Opcode = Opcode;
249 : return Result;
250 : }
251 :
252 124108 : operator bool() const { return Kind != NoNearMiss; }
253 :
254 0 : NearMissKind getKind() const { return Kind; }
255 :
256 : // Feature flags required by the instruction, that the current target does
257 : // not have.
258 0 : uint64_t getFeatures() const {
259 : assert(Kind == NearMissFeature);
260 0 : return Features;
261 : }
262 : // Error code returned by the target predicate when validating this
263 : // instruction encoding.
264 0 : unsigned getPredicateError() const {
265 : assert(Kind == NearMissPredicate);
266 0 : return PredicateError;
267 : }
268 : // MatchClassKind of the operand that we expected to see.
269 0 : unsigned getOperandClass() const {
270 : assert(Kind == NearMissOperand || Kind == NearMissTooFewOperands);
271 0 : return MissedOperand.Class;
272 : }
273 : // Opcode of the encoding we were trying to match.
274 : unsigned getOpcode() const {
275 : assert(Kind == NearMissOperand || Kind == NearMissTooFewOperands);
276 : return MissedOperand.Opcode;
277 : }
278 : // Error code returned when validating the operand.
279 0 : unsigned getOperandError() const {
280 : assert(Kind == NearMissOperand);
281 0 : return MissedOperand.Error;
282 : }
283 : // Index of the actual operand we were trying to match in the list of parsed
284 : // operands.
285 0 : unsigned getOperandIndex() const {
286 : assert(Kind == NearMissOperand);
287 0 : return MissedOperand.Index;
288 : }
289 :
290 : private:
291 : NearMissKind Kind;
292 :
293 : // These two structs share a common prefix, so we can safely rely on the fact
294 : // that they overlap in the union.
295 : struct MissedOpInfo {
296 : unsigned Class;
297 : unsigned Opcode;
298 : unsigned Error;
299 : unsigned Index;
300 : };
301 :
302 : struct TooFewOperandsInfo {
303 : unsigned Class;
304 : unsigned Opcode;
305 : };
306 :
307 : union {
308 : uint64_t Features;
309 : unsigned PredicateError;
310 : MissedOpInfo MissedOperand;
311 : TooFewOperandsInfo TooFewOperands;
312 : };
313 :
314 337545 : NearMissInfo() : Kind(NoNearMiss) {}
315 : };
316 :
317 : /// MCTargetAsmParser - Generic interface to target specific assembly parsers.
318 18514 : class MCTargetAsmParser : public MCAsmParserExtension {
319 : public:
320 : enum MatchResultTy {
321 : Match_InvalidOperand,
322 : Match_InvalidTiedOperand,
323 : Match_MissingFeature,
324 : Match_MnemonicFail,
325 : Match_Success,
326 : Match_NearMisses,
327 : FIRST_TARGET_MATCH_RESULT_TY
328 : };
329 :
330 : protected: // Can only create subclasses.
331 : MCTargetAsmParser(MCTargetOptions const &, const MCSubtargetInfo &STI,
332 : const MCInstrInfo &MII);
333 :
334 : /// Create a copy of STI and return a non-const reference to it.
335 : MCSubtargetInfo ©STI();
336 :
337 : /// AvailableFeatures - The current set of available features.
338 : uint64_t AvailableFeatures = 0;
339 :
340 : /// ParsingInlineAsm - Are we parsing ms-style inline assembly?
341 : bool ParsingInlineAsm = false;
342 :
343 : /// SemaCallback - The Sema callback implementation. Must be set when parsing
344 : /// ms-style inline assembly.
345 : MCAsmParserSemaCallback *SemaCallback;
346 :
347 : /// Set of options which affects instrumentation of inline assembly.
348 : MCTargetOptions MCOptions;
349 :
350 : /// Current STI.
351 : const MCSubtargetInfo *STI;
352 :
353 : const MCInstrInfo &MII;
354 :
355 : public:
356 : MCTargetAsmParser(const MCTargetAsmParser &) = delete;
357 : MCTargetAsmParser &operator=(const MCTargetAsmParser &) = delete;
358 :
359 : ~MCTargetAsmParser() override;
360 :
361 : const MCSubtargetInfo &getSTI() const;
362 :
363 0 : uint64_t getAvailableFeatures() const { return AvailableFeatures; }
364 88129 : void setAvailableFeatures(uint64_t Value) { AvailableFeatures = Value; }
365 :
366 0 : bool isParsingInlineAsm () { return ParsingInlineAsm; }
367 225 : void setParsingInlineAsm (bool Value) { ParsingInlineAsm = Value; }
368 :
369 35093 : MCTargetOptions getTargetOptions() const { return MCOptions; }
370 :
371 0 : void setSemaCallback(MCAsmParserSemaCallback *Callback) {
372 225 : SemaCallback = Callback;
373 0 : }
374 :
375 : // Target-specific parsing of expression.
376 451508 : virtual bool parsePrimaryExpr(const MCExpr *&Res, SMLoc &EndLoc) {
377 451508 : return getParser().parsePrimaryExpr(Res, EndLoc);
378 : }
379 :
380 : virtual bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc,
381 : SMLoc &EndLoc) = 0;
382 :
383 : /// Sets frame register corresponding to the current MachineFunction.
384 5039 : virtual void SetFrameRegister(unsigned RegNo) {}
385 :
386 : /// ParseInstruction - Parse one assembly instruction.
387 : ///
388 : /// The parser is positioned following the instruction name. The target
389 : /// specific instruction parser should parse the entire instruction and
390 : /// construct the appropriate MCInst, or emit an error. On success, the entire
391 : /// line should be parsed up to and including the end-of-statement token. On
392 : /// failure, the parser is not required to read to the end of the line.
393 : //
394 : /// \param Name - The instruction name.
395 : /// \param NameLoc - The source location of the name.
396 : /// \param Operands [out] - The list of parsed operands, this returns
397 : /// ownership of them to the caller.
398 : /// \return True on failure.
399 : virtual bool ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
400 : SMLoc NameLoc, OperandVector &Operands) = 0;
401 451457 : virtual bool ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
402 : AsmToken Token, OperandVector &Operands) {
403 451457 : return ParseInstruction(Info, Name, Token.getLoc(), Operands);
404 : }
405 :
406 : /// ParseDirective - Parse a target specific assembler directive
407 : ///
408 : /// The parser is positioned following the directive name. The target
409 : /// specific directive parser should parse the entire directive doing or
410 : /// recording any target specific work, or return true and do nothing if the
411 : /// directive is not target specific. If the directive is specific for
412 : /// the target, the entire line is parsed up to and including the
413 : /// end-of-statement token and false is returned.
414 : ///
415 : /// \param DirectiveID - the identifier token of the directive.
416 : virtual bool ParseDirective(AsmToken DirectiveID) = 0;
417 :
418 : /// MatchAndEmitInstruction - Recognize a series of operands of a parsed
419 : /// instruction as an actual MCInst and emit it to the specified MCStreamer.
420 : /// This returns false on success and returns true on failure to match.
421 : ///
422 : /// On failure, the target parser is responsible for emitting a diagnostic
423 : /// explaining the match failure.
424 : virtual bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
425 : OperandVector &Operands, MCStreamer &Out,
426 : uint64_t &ErrorInfo,
427 : bool MatchingInlineAsm) = 0;
428 :
429 : /// Allows targets to let registers opt out of clobber lists.
430 0 : virtual bool OmitRegisterFromClobberLists(unsigned RegNo) { return false; }
431 :
432 : /// Allow a target to add special case operand matching for things that
433 : /// tblgen doesn't/can't handle effectively. For example, literal
434 : /// immediates on ARM. TableGen expects a token operand, but the parser
435 : /// will recognize them as immediates.
436 1000877 : virtual unsigned validateTargetOperandClass(MCParsedAsmOperand &Op,
437 : unsigned Kind) {
438 1000877 : return Match_InvalidOperand;
439 : }
440 :
441 : /// Validate the instruction match against any complex target predicates
442 : /// before rendering any operands to it.
443 : virtual unsigned
444 442458 : checkEarlyTargetMatchPredicate(MCInst &Inst, const OperandVector &Operands) {
445 442458 : return Match_Success;
446 : }
447 :
448 : /// checkTargetMatchPredicate - Validate the instruction match against
449 : /// any complex target predicates not expressible via match classes.
450 197526 : virtual unsigned checkTargetMatchPredicate(MCInst &Inst) {
451 197526 : return Match_Success;
452 : }
453 :
454 : virtual void convertToMapAndConstraints(unsigned Kind,
455 : const OperandVector &Operands) = 0;
456 :
457 : /// Returns whether two registers are equal and is used by the tied-operands
458 : /// checks in the AsmMatcher. This method can be overridden allow e.g. a
459 : /// sub- or super-register as the tied operand.
460 19 : virtual bool regsEqual(const MCParsedAsmOperand &Op1,
461 : const MCParsedAsmOperand &Op2) const {
462 : assert(Op1.isReg() && Op2.isReg() && "Operands not all regs");
463 2071 : return Op1.getReg() == Op2.getReg();
464 : }
465 :
466 : // Return whether this parser uses assignment statements with equals tokens
467 590 : virtual bool equalIsAsmAssignment() { return true; };
468 : // Return whether this start of statement identifier is a label
469 21535 : virtual bool isLabel(AsmToken &Token) { return true; };
470 : // Return whether this parser accept star as start of statement
471 0 : virtual bool starIsStartOfStatement() { return false; };
472 :
473 26 : virtual const MCExpr *applyModifierToExpr(const MCExpr *E,
474 : MCSymbolRefExpr::VariantKind,
475 : MCContext &Ctx) {
476 26 : return nullptr;
477 : }
478 :
479 : // For actions that have to be performed before a label is emitted
480 19699 : virtual void doBeforeLabelEmit(MCSymbol *Symbol) {}
481 :
482 19696 : virtual void onLabelParsed(MCSymbol *Symbol) {}
483 :
484 : /// Ensure that all previously parsed instructions have been emitted to the
485 : /// output streamer, if the target does not emit them immediately.
486 675305 : virtual void flushPendingInstructions(MCStreamer &Out) {}
487 :
488 0 : virtual const MCExpr *createTargetUnaryExpr(const MCExpr *E,
489 : AsmToken::TokenKind OperatorToken,
490 : MCContext &Ctx) {
491 0 : return nullptr;
492 : }
493 : };
494 :
495 : } // end namespace llvm
496 :
497 : #endif // LLVM_MC_MCPARSER_MCTARGETASMPARSER_H
|