LLVM  3.7.0
X86AsmParser.cpp
Go to the documentation of this file.
1 //===-- X86AsmParser.cpp - Parse X86 assembly to MCInst instructions ------===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 
11 #include "X86AsmInstrumentation.h"
12 #include "X86AsmParserCommon.h"
13 #include "X86Operand.h"
14 #include "X86ISelLowering.h"
15 #include "llvm/ADT/APFloat.h"
16 #include "llvm/ADT/STLExtras.h"
17 #include "llvm/ADT/SmallString.h"
18 #include "llvm/ADT/SmallVector.h"
19 #include "llvm/ADT/StringSwitch.h"
20 #include "llvm/ADT/Twine.h"
21 #include "llvm/MC/MCContext.h"
22 #include "llvm/MC/MCExpr.h"
23 #include "llvm/MC/MCInst.h"
24 #include "llvm/MC/MCInstrInfo.h"
28 #include "llvm/MC/MCRegisterInfo.h"
29 #include "llvm/MC/MCStreamer.h"
31 #include "llvm/MC/MCSymbol.h"
33 #include "llvm/Support/SourceMgr.h"
36 #include <algorithm>
37 #include <memory>
38 
39 using namespace llvm;
40 
41 namespace {
42 
43 static const char OpPrecedence[] = {
44  0, // IC_OR
45  1, // IC_XOR
46  2, // IC_AND
47  3, // IC_LSHIFT
48  3, // IC_RSHIFT
49  4, // IC_PLUS
50  4, // IC_MINUS
51  5, // IC_MULTIPLY
52  5, // IC_DIVIDE
53  6, // IC_RPAREN
54  7, // IC_LPAREN
55  0, // IC_IMM
56  0 // IC_REGISTER
57 };
58 
59 class X86AsmParser : public MCTargetAsmParser {
60  MCSubtargetInfo &STI;
61  const MCInstrInfo &MII;
62  ParseInstructionInfo *InstInfo;
63  std::unique_ptr<X86AsmInstrumentation> Instrumentation;
64 private:
65  SMLoc consumeToken() {
66  MCAsmParser &Parser = getParser();
67  SMLoc Result = Parser.getTok().getLoc();
68  Parser.Lex();
69  return Result;
70  }
71 
72  enum InfixCalculatorTok {
73  IC_OR = 0,
74  IC_XOR,
75  IC_AND,
76  IC_LSHIFT,
77  IC_RSHIFT,
78  IC_PLUS,
79  IC_MINUS,
80  IC_MULTIPLY,
81  IC_DIVIDE,
82  IC_RPAREN,
83  IC_LPAREN,
84  IC_IMM,
85  IC_REGISTER
86  };
87 
88  class InfixCalculator {
89  typedef std::pair< InfixCalculatorTok, int64_t > ICToken;
90  SmallVector<InfixCalculatorTok, 4> InfixOperatorStack;
91  SmallVector<ICToken, 4> PostfixStack;
92 
93  public:
94  int64_t popOperand() {
95  assert (!PostfixStack.empty() && "Poped an empty stack!");
96  ICToken Op = PostfixStack.pop_back_val();
97  assert ((Op.first == IC_IMM || Op.first == IC_REGISTER)
98  && "Expected and immediate or register!");
99  return Op.second;
100  }
101  void pushOperand(InfixCalculatorTok Op, int64_t Val = 0) {
102  assert ((Op == IC_IMM || Op == IC_REGISTER) &&
103  "Unexpected operand!");
104  PostfixStack.push_back(std::make_pair(Op, Val));
105  }
106 
107  void popOperator() { InfixOperatorStack.pop_back(); }
108  void pushOperator(InfixCalculatorTok Op) {
109  // Push the new operator if the stack is empty.
110  if (InfixOperatorStack.empty()) {
111  InfixOperatorStack.push_back(Op);
112  return;
113  }
114 
115  // Push the new operator if it has a higher precedence than the operator
116  // on the top of the stack or the operator on the top of the stack is a
117  // left parentheses.
118  unsigned Idx = InfixOperatorStack.size() - 1;
119  InfixCalculatorTok StackOp = InfixOperatorStack[Idx];
120  if (OpPrecedence[Op] > OpPrecedence[StackOp] || StackOp == IC_LPAREN) {
121  InfixOperatorStack.push_back(Op);
122  return;
123  }
124 
125  // The operator on the top of the stack has higher precedence than the
126  // new operator.
127  unsigned ParenCount = 0;
128  while (1) {
129  // Nothing to process.
130  if (InfixOperatorStack.empty())
131  break;
132 
133  Idx = InfixOperatorStack.size() - 1;
134  StackOp = InfixOperatorStack[Idx];
135  if (!(OpPrecedence[StackOp] >= OpPrecedence[Op] || ParenCount))
136  break;
137 
138  // If we have an even parentheses count and we see a left parentheses,
139  // then stop processing.
140  if (!ParenCount && StackOp == IC_LPAREN)
141  break;
142 
143  if (StackOp == IC_RPAREN) {
144  ++ParenCount;
145  InfixOperatorStack.pop_back();
146  } else if (StackOp == IC_LPAREN) {
147  --ParenCount;
148  InfixOperatorStack.pop_back();
149  } else {
150  InfixOperatorStack.pop_back();
151  PostfixStack.push_back(std::make_pair(StackOp, 0));
152  }
153  }
154  // Push the new operator.
155  InfixOperatorStack.push_back(Op);
156  }
157  int64_t execute() {
158  // Push any remaining operators onto the postfix stack.
159  while (!InfixOperatorStack.empty()) {
160  InfixCalculatorTok StackOp = InfixOperatorStack.pop_back_val();
161  if (StackOp != IC_LPAREN && StackOp != IC_RPAREN)
162  PostfixStack.push_back(std::make_pair(StackOp, 0));
163  }
164 
165  if (PostfixStack.empty())
166  return 0;
167 
168  SmallVector<ICToken, 16> OperandStack;
169  for (unsigned i = 0, e = PostfixStack.size(); i != e; ++i) {
170  ICToken Op = PostfixStack[i];
171  if (Op.first == IC_IMM || Op.first == IC_REGISTER) {
172  OperandStack.push_back(Op);
173  } else {
174  assert (OperandStack.size() > 1 && "Too few operands.");
175  int64_t Val;
176  ICToken Op2 = OperandStack.pop_back_val();
177  ICToken Op1 = OperandStack.pop_back_val();
178  switch (Op.first) {
179  default:
180  report_fatal_error("Unexpected operator!");
181  break;
182  case IC_PLUS:
183  Val = Op1.second + Op2.second;
184  OperandStack.push_back(std::make_pair(IC_IMM, Val));
185  break;
186  case IC_MINUS:
187  Val = Op1.second - Op2.second;
188  OperandStack.push_back(std::make_pair(IC_IMM, Val));
189  break;
190  case IC_MULTIPLY:
191  assert (Op1.first == IC_IMM && Op2.first == IC_IMM &&
192  "Multiply operation with an immediate and a register!");
193  Val = Op1.second * Op2.second;
194  OperandStack.push_back(std::make_pair(IC_IMM, Val));
195  break;
196  case IC_DIVIDE:
197  assert (Op1.first == IC_IMM && Op2.first == IC_IMM &&
198  "Divide operation with an immediate and a register!");
199  assert (Op2.second != 0 && "Division by zero!");
200  Val = Op1.second / Op2.second;
201  OperandStack.push_back(std::make_pair(IC_IMM, Val));
202  break;
203  case IC_OR:
204  assert (Op1.first == IC_IMM && Op2.first == IC_IMM &&
205  "Or operation with an immediate and a register!");
206  Val = Op1.second | Op2.second;
207  OperandStack.push_back(std::make_pair(IC_IMM, Val));
208  break;
209  case IC_XOR:
210  assert(Op1.first == IC_IMM && Op2.first == IC_IMM &&
211  "Xor operation with an immediate and a register!");
212  Val = Op1.second ^ Op2.second;
213  OperandStack.push_back(std::make_pair(IC_IMM, Val));
214  break;
215  case IC_AND:
216  assert (Op1.first == IC_IMM && Op2.first == IC_IMM &&
217  "And operation with an immediate and a register!");
218  Val = Op1.second & Op2.second;
219  OperandStack.push_back(std::make_pair(IC_IMM, Val));
220  break;
221  case IC_LSHIFT:
222  assert (Op1.first == IC_IMM && Op2.first == IC_IMM &&
223  "Left shift operation with an immediate and a register!");
224  Val = Op1.second << Op2.second;
225  OperandStack.push_back(std::make_pair(IC_IMM, Val));
226  break;
227  case IC_RSHIFT:
228  assert (Op1.first == IC_IMM && Op2.first == IC_IMM &&
229  "Right shift operation with an immediate and a register!");
230  Val = Op1.second >> Op2.second;
231  OperandStack.push_back(std::make_pair(IC_IMM, Val));
232  break;
233  }
234  }
235  }
236  assert (OperandStack.size() == 1 && "Expected a single result.");
237  return OperandStack.pop_back_val().second;
238  }
239  };
240 
241  enum IntelExprState {
242  IES_OR,
243  IES_XOR,
244  IES_AND,
245  IES_LSHIFT,
246  IES_RSHIFT,
247  IES_PLUS,
248  IES_MINUS,
249  IES_NOT,
250  IES_MULTIPLY,
251  IES_DIVIDE,
252  IES_LBRAC,
253  IES_RBRAC,
254  IES_LPAREN,
255  IES_RPAREN,
256  IES_REGISTER,
257  IES_INTEGER,
258  IES_IDENTIFIER,
259  IES_ERROR
260  };
261 
262  class IntelExprStateMachine {
263  IntelExprState State, PrevState;
264  unsigned BaseReg, IndexReg, TmpReg, Scale;
265  int64_t Imm;
266  const MCExpr *Sym;
267  StringRef SymName;
268  bool StopOnLBrac, AddImmPrefix;
269  InfixCalculator IC;
271  public:
272  IntelExprStateMachine(int64_t imm, bool stoponlbrac, bool addimmprefix) :
273  State(IES_PLUS), PrevState(IES_ERROR), BaseReg(0), IndexReg(0), TmpReg(0),
274  Scale(1), Imm(imm), Sym(nullptr), StopOnLBrac(stoponlbrac),
275  AddImmPrefix(addimmprefix) { Info.clear(); }
276 
277  unsigned getBaseReg() { return BaseReg; }
278  unsigned getIndexReg() { return IndexReg; }
279  unsigned getScale() { return Scale; }
280  const MCExpr *getSym() { return Sym; }
281  StringRef getSymName() { return SymName; }
282  int64_t getImm() { return Imm + IC.execute(); }
283  bool isValidEndState() {
284  return State == IES_RBRAC || State == IES_INTEGER;
285  }
286  bool getStopOnLBrac() { return StopOnLBrac; }
287  bool getAddImmPrefix() { return AddImmPrefix; }
288  bool hadError() { return State == IES_ERROR; }
289 
290  InlineAsmIdentifierInfo &getIdentifierInfo() {
291  return Info;
292  }
293 
294  void onOr() {
295  IntelExprState CurrState = State;
296  switch (State) {
297  default:
298  State = IES_ERROR;
299  break;
300  case IES_INTEGER:
301  case IES_RPAREN:
302  case IES_REGISTER:
303  State = IES_OR;
304  IC.pushOperator(IC_OR);
305  break;
306  }
307  PrevState = CurrState;
308  }
309  void onXor() {
310  IntelExprState CurrState = State;
311  switch (State) {
312  default:
313  State = IES_ERROR;
314  break;
315  case IES_INTEGER:
316  case IES_RPAREN:
317  case IES_REGISTER:
318  State = IES_XOR;
319  IC.pushOperator(IC_XOR);
320  break;
321  }
322  PrevState = CurrState;
323  }
324  void onAnd() {
325  IntelExprState CurrState = State;
326  switch (State) {
327  default:
328  State = IES_ERROR;
329  break;
330  case IES_INTEGER:
331  case IES_RPAREN:
332  case IES_REGISTER:
333  State = IES_AND;
334  IC.pushOperator(IC_AND);
335  break;
336  }
337  PrevState = CurrState;
338  }
339  void onLShift() {
340  IntelExprState CurrState = State;
341  switch (State) {
342  default:
343  State = IES_ERROR;
344  break;
345  case IES_INTEGER:
346  case IES_RPAREN:
347  case IES_REGISTER:
348  State = IES_LSHIFT;
349  IC.pushOperator(IC_LSHIFT);
350  break;
351  }
352  PrevState = CurrState;
353  }
354  void onRShift() {
355  IntelExprState CurrState = State;
356  switch (State) {
357  default:
358  State = IES_ERROR;
359  break;
360  case IES_INTEGER:
361  case IES_RPAREN:
362  case IES_REGISTER:
363  State = IES_RSHIFT;
364  IC.pushOperator(IC_RSHIFT);
365  break;
366  }
367  PrevState = CurrState;
368  }
369  void onPlus() {
370  IntelExprState CurrState = State;
371  switch (State) {
372  default:
373  State = IES_ERROR;
374  break;
375  case IES_INTEGER:
376  case IES_RPAREN:
377  case IES_REGISTER:
378  State = IES_PLUS;
379  IC.pushOperator(IC_PLUS);
380  if (CurrState == IES_REGISTER && PrevState != IES_MULTIPLY) {
381  // If we already have a BaseReg, then assume this is the IndexReg with
382  // a scale of 1.
383  if (!BaseReg) {
384  BaseReg = TmpReg;
385  } else {
386  assert (!IndexReg && "BaseReg/IndexReg already set!");
387  IndexReg = TmpReg;
388  Scale = 1;
389  }
390  }
391  break;
392  }
393  PrevState = CurrState;
394  }
395  void onMinus() {
396  IntelExprState CurrState = State;
397  switch (State) {
398  default:
399  State = IES_ERROR;
400  break;
401  case IES_PLUS:
402  case IES_NOT:
403  case IES_MULTIPLY:
404  case IES_DIVIDE:
405  case IES_LPAREN:
406  case IES_RPAREN:
407  case IES_LBRAC:
408  case IES_RBRAC:
409  case IES_INTEGER:
410  case IES_REGISTER:
411  State = IES_MINUS;
412  // Only push the minus operator if it is not a unary operator.
413  if (!(CurrState == IES_PLUS || CurrState == IES_MINUS ||
414  CurrState == IES_MULTIPLY || CurrState == IES_DIVIDE ||
415  CurrState == IES_LPAREN || CurrState == IES_LBRAC))
416  IC.pushOperator(IC_MINUS);
417  if (CurrState == IES_REGISTER && PrevState != IES_MULTIPLY) {
418  // If we already have a BaseReg, then assume this is the IndexReg with
419  // a scale of 1.
420  if (!BaseReg) {
421  BaseReg = TmpReg;
422  } else {
423  assert (!IndexReg && "BaseReg/IndexReg already set!");
424  IndexReg = TmpReg;
425  Scale = 1;
426  }
427  }
428  break;
429  }
430  PrevState = CurrState;
431  }
432  void onNot() {
433  IntelExprState CurrState = State;
434  switch (State) {
435  default:
436  State = IES_ERROR;
437  break;
438  case IES_PLUS:
439  case IES_NOT:
440  State = IES_NOT;
441  break;
442  }
443  PrevState = CurrState;
444  }
445  void onRegister(unsigned Reg) {
446  IntelExprState CurrState = State;
447  switch (State) {
448  default:
449  State = IES_ERROR;
450  break;
451  case IES_PLUS:
452  case IES_LPAREN:
453  State = IES_REGISTER;
454  TmpReg = Reg;
455  IC.pushOperand(IC_REGISTER);
456  break;
457  case IES_MULTIPLY:
458  // Index Register - Scale * Register
459  if (PrevState == IES_INTEGER) {
460  assert (!IndexReg && "IndexReg already set!");
461  State = IES_REGISTER;
462  IndexReg = Reg;
463  // Get the scale and replace the 'Scale * Register' with '0'.
464  Scale = IC.popOperand();
465  IC.pushOperand(IC_IMM);
466  IC.popOperator();
467  } else {
468  State = IES_ERROR;
469  }
470  break;
471  }
472  PrevState = CurrState;
473  }
474  void onIdentifierExpr(const MCExpr *SymRef, StringRef SymRefName) {
475  PrevState = State;
476  switch (State) {
477  default:
478  State = IES_ERROR;
479  break;
480  case IES_PLUS:
481  case IES_MINUS:
482  case IES_NOT:
483  State = IES_INTEGER;
484  Sym = SymRef;
485  SymName = SymRefName;
486  IC.pushOperand(IC_IMM);
487  break;
488  }
489  }
490  bool onInteger(int64_t TmpInt, StringRef &ErrMsg) {
491  IntelExprState CurrState = State;
492  switch (State) {
493  default:
494  State = IES_ERROR;
495  break;
496  case IES_PLUS:
497  case IES_MINUS:
498  case IES_NOT:
499  case IES_OR:
500  case IES_XOR:
501  case IES_AND:
502  case IES_LSHIFT:
503  case IES_RSHIFT:
504  case IES_DIVIDE:
505  case IES_MULTIPLY:
506  case IES_LPAREN:
507  State = IES_INTEGER;
508  if (PrevState == IES_REGISTER && CurrState == IES_MULTIPLY) {
509  // Index Register - Register * Scale
510  assert (!IndexReg && "IndexReg already set!");
511  IndexReg = TmpReg;
512  Scale = TmpInt;
513  if(Scale != 1 && Scale != 2 && Scale != 4 && Scale != 8) {
514  ErrMsg = "scale factor in address must be 1, 2, 4 or 8";
515  return true;
516  }
517  // Get the scale and replace the 'Register * Scale' with '0'.
518  IC.popOperator();
519  } else if ((PrevState == IES_PLUS || PrevState == IES_MINUS ||
520  PrevState == IES_OR || PrevState == IES_AND ||
521  PrevState == IES_LSHIFT || PrevState == IES_RSHIFT ||
522  PrevState == IES_MULTIPLY || PrevState == IES_DIVIDE ||
523  PrevState == IES_LPAREN || PrevState == IES_LBRAC ||
524  PrevState == IES_NOT || PrevState == IES_XOR) &&
525  CurrState == IES_MINUS) {
526  // Unary minus. No need to pop the minus operand because it was never
527  // pushed.
528  IC.pushOperand(IC_IMM, -TmpInt); // Push -Imm.
529  } else if ((PrevState == IES_PLUS || PrevState == IES_MINUS ||
530  PrevState == IES_OR || PrevState == IES_AND ||
531  PrevState == IES_LSHIFT || PrevState == IES_RSHIFT ||
532  PrevState == IES_MULTIPLY || PrevState == IES_DIVIDE ||
533  PrevState == IES_LPAREN || PrevState == IES_LBRAC ||
534  PrevState == IES_NOT || PrevState == IES_XOR) &&
535  CurrState == IES_NOT) {
536  // Unary not. No need to pop the not operand because it was never
537  // pushed.
538  IC.pushOperand(IC_IMM, ~TmpInt); // Push ~Imm.
539  } else {
540  IC.pushOperand(IC_IMM, TmpInt);
541  }
542  break;
543  }
544  PrevState = CurrState;
545  return false;
546  }
547  void onStar() {
548  PrevState = State;
549  switch (State) {
550  default:
551  State = IES_ERROR;
552  break;
553  case IES_INTEGER:
554  case IES_REGISTER:
555  case IES_RPAREN:
556  State = IES_MULTIPLY;
557  IC.pushOperator(IC_MULTIPLY);
558  break;
559  }
560  }
561  void onDivide() {
562  PrevState = State;
563  switch (State) {
564  default:
565  State = IES_ERROR;
566  break;
567  case IES_INTEGER:
568  case IES_RPAREN:
569  State = IES_DIVIDE;
570  IC.pushOperator(IC_DIVIDE);
571  break;
572  }
573  }
574  void onLBrac() {
575  PrevState = State;
576  switch (State) {
577  default:
578  State = IES_ERROR;
579  break;
580  case IES_RBRAC:
581  State = IES_PLUS;
582  IC.pushOperator(IC_PLUS);
583  break;
584  }
585  }
586  void onRBrac() {
587  IntelExprState CurrState = State;
588  switch (State) {
589  default:
590  State = IES_ERROR;
591  break;
592  case IES_INTEGER:
593  case IES_REGISTER:
594  case IES_RPAREN:
595  State = IES_RBRAC;
596  if (CurrState == IES_REGISTER && PrevState != IES_MULTIPLY) {
597  // If we already have a BaseReg, then assume this is the IndexReg with
598  // a scale of 1.
599  if (!BaseReg) {
600  BaseReg = TmpReg;
601  } else {
602  assert (!IndexReg && "BaseReg/IndexReg already set!");
603  IndexReg = TmpReg;
604  Scale = 1;
605  }
606  }
607  break;
608  }
609  PrevState = CurrState;
610  }
611  void onLParen() {
612  IntelExprState CurrState = State;
613  switch (State) {
614  default:
615  State = IES_ERROR;
616  break;
617  case IES_PLUS:
618  case IES_MINUS:
619  case IES_NOT:
620  case IES_OR:
621  case IES_XOR:
622  case IES_AND:
623  case IES_LSHIFT:
624  case IES_RSHIFT:
625  case IES_MULTIPLY:
626  case IES_DIVIDE:
627  case IES_LPAREN:
628  // FIXME: We don't handle this type of unary minus or not, yet.
629  if ((PrevState == IES_PLUS || PrevState == IES_MINUS ||
630  PrevState == IES_OR || PrevState == IES_AND ||
631  PrevState == IES_LSHIFT || PrevState == IES_RSHIFT ||
632  PrevState == IES_MULTIPLY || PrevState == IES_DIVIDE ||
633  PrevState == IES_LPAREN || PrevState == IES_LBRAC ||
634  PrevState == IES_NOT || PrevState == IES_XOR) &&
635  (CurrState == IES_MINUS || CurrState == IES_NOT)) {
636  State = IES_ERROR;
637  break;
638  }
639  State = IES_LPAREN;
640  IC.pushOperator(IC_LPAREN);
641  break;
642  }
643  PrevState = CurrState;
644  }
645  void onRParen() {
646  PrevState = State;
647  switch (State) {
648  default:
649  State = IES_ERROR;
650  break;
651  case IES_INTEGER:
652  case IES_REGISTER:
653  case IES_RPAREN:
654  State = IES_RPAREN;
655  IC.pushOperator(IC_RPAREN);
656  break;
657  }
658  }
659  };
660 
661  bool Error(SMLoc L, const Twine &Msg,
663  bool MatchingInlineAsm = false) {
664  MCAsmParser &Parser = getParser();
665  if (MatchingInlineAsm) return true;
666  return Parser.Error(L, Msg, Ranges);
667  }
668 
669  bool ErrorAndEatStatement(SMLoc L, const Twine &Msg,
671  bool MatchingInlineAsm = false) {
672  MCAsmParser &Parser = getParser();
673  Parser.eatToEndOfStatement();
674  return Error(L, Msg, Ranges, MatchingInlineAsm);
675  }
676 
677  std::nullptr_t ErrorOperand(SMLoc Loc, StringRef Msg) {
678  Error(Loc, Msg);
679  return nullptr;
680  }
681 
682  std::unique_ptr<X86Operand> DefaultMemSIOperand(SMLoc Loc);
683  std::unique_ptr<X86Operand> DefaultMemDIOperand(SMLoc Loc);
684  void AddDefaultSrcDestOperands(
685  OperandVector& Operands, std::unique_ptr<llvm::MCParsedAsmOperand> &&Src,
686  std::unique_ptr<llvm::MCParsedAsmOperand> &&Dst);
687  std::unique_ptr<X86Operand> ParseOperand();
688  std::unique_ptr<X86Operand> ParseATTOperand();
689  std::unique_ptr<X86Operand> ParseIntelOperand();
690  std::unique_ptr<X86Operand> ParseIntelOffsetOfOperator();
691  bool ParseIntelDotOperator(const MCExpr *Disp, const MCExpr *&NewDisp);
692  std::unique_ptr<X86Operand> ParseIntelOperator(unsigned OpKind);
693  std::unique_ptr<X86Operand>
694  ParseIntelSegmentOverride(unsigned SegReg, SMLoc Start, unsigned Size);
695  std::unique_ptr<X86Operand>
696  ParseIntelMemOperand(int64_t ImmDisp, SMLoc StartLoc, unsigned Size);
697  std::unique_ptr<X86Operand> ParseRoundingModeOp(SMLoc Start, SMLoc End);
698  bool ParseIntelExpression(IntelExprStateMachine &SM, SMLoc &End);
699  std::unique_ptr<X86Operand> ParseIntelBracExpression(unsigned SegReg,
700  SMLoc Start,
701  int64_t ImmDisp,
702  unsigned Size);
703  bool ParseIntelIdentifier(const MCExpr *&Val, StringRef &Identifier,
705  bool IsUnevaluatedOperand, SMLoc &End);
706 
707  std::unique_ptr<X86Operand> ParseMemOperand(unsigned SegReg, SMLoc StartLoc);
708 
709  std::unique_ptr<X86Operand>
710  CreateMemForInlineAsm(unsigned SegReg, const MCExpr *Disp, unsigned BaseReg,
711  unsigned IndexReg, unsigned Scale, SMLoc Start,
712  SMLoc End, unsigned Size, StringRef Identifier,
714 
715  bool ParseDirectiveWord(unsigned Size, SMLoc L);
716  bool ParseDirectiveCode(StringRef IDVal, SMLoc L);
717 
718  bool validateInstruction(MCInst &Inst, const OperandVector &Ops);
719  bool processInstruction(MCInst &Inst, const OperandVector &Ops);
720 
721  /// Wrapper around MCStreamer::EmitInstruction(). Possibly adds
722  /// instrumentation around Inst.
723  void EmitInstruction(MCInst &Inst, OperandVector &Operands, MCStreamer &Out);
724 
725  bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
727  uint64_t &ErrorInfo,
728  bool MatchingInlineAsm) override;
729 
730  void MatchFPUWaitAlias(SMLoc IDLoc, X86Operand &Op, OperandVector &Operands,
731  MCStreamer &Out, bool MatchingInlineAsm);
732 
733  bool ErrorMissingFeature(SMLoc IDLoc, uint64_t ErrorInfo,
734  bool MatchingInlineAsm);
735 
736  bool MatchAndEmitATTInstruction(SMLoc IDLoc, unsigned &Opcode,
738  uint64_t &ErrorInfo,
739  bool MatchingInlineAsm);
740 
741  bool MatchAndEmitIntelInstruction(SMLoc IDLoc, unsigned &Opcode,
743  uint64_t &ErrorInfo,
744  bool MatchingInlineAsm);
745 
746  bool OmitRegisterFromClobberLists(unsigned RegNo) override;
747 
748  /// doSrcDstMatch - Returns true if operands are matching in their
749  /// word size (%si and %di, %esi and %edi, etc.). Order depends on
750  /// the parsing mode (Intel vs. AT&T).
751  bool doSrcDstMatch(X86Operand &Op1, X86Operand &Op2);
752 
753  /// Parses AVX512 specific operand primitives: masked registers ({%k<NUM>}, {z})
754  /// and memory broadcasting ({1to<NUM>}) primitives, updating Operands vector if required.
755  /// \return \c true if no parsing errors occurred, \c false otherwise.
756  bool HandleAVX512Operand(OperandVector &Operands,
757  const MCParsedAsmOperand &Op);
758 
759  bool is64BitMode() const {
760  // FIXME: Can tablegen auto-generate this?
761  return STI.getFeatureBits()[X86::Mode64Bit];
762  }
763  bool is32BitMode() const {
764  // FIXME: Can tablegen auto-generate this?
765  return STI.getFeatureBits()[X86::Mode32Bit];
766  }
767  bool is16BitMode() const {
768  // FIXME: Can tablegen auto-generate this?
769  return STI.getFeatureBits()[X86::Mode16Bit];
770  }
771  void SwitchMode(unsigned mode) {
772  FeatureBitset AllModes({X86::Mode64Bit, X86::Mode32Bit, X86::Mode16Bit});
773  FeatureBitset OldMode = STI.getFeatureBits() & AllModes;
774  unsigned FB = ComputeAvailableFeatures(
775  STI.ToggleFeature(OldMode.flip(mode)));
776  setAvailableFeatures(FB);
777 
778  assert(FeatureBitset({mode}) == (STI.getFeatureBits() & AllModes));
779  }
780 
781  unsigned getPointerWidth() {
782  if (is16BitMode()) return 16;
783  if (is32BitMode()) return 32;
784  if (is64BitMode()) return 64;
785  llvm_unreachable("invalid mode");
786  }
787 
788  bool isParsingIntelSyntax() {
789  return getParser().getAssemblerDialect();
790  }
791 
792  /// @name Auto-generated Matcher Functions
793  /// {
794 
795 #define GET_ASSEMBLER_HEADER
796 #include "X86GenAsmMatcher.inc"
797 
798  /// }
799 
800 public:
801  X86AsmParser(MCSubtargetInfo &sti, MCAsmParser &Parser,
802  const MCInstrInfo &mii, const MCTargetOptions &Options)
803  : MCTargetAsmParser(), STI(sti), MII(mii), InstInfo(nullptr) {
804 
805  // Initialize the set of available features.
806  setAvailableFeatures(ComputeAvailableFeatures(STI.getFeatureBits()));
807  Instrumentation.reset(
808  CreateX86AsmInstrumentation(Options, Parser.getContext(), STI));
809  }
810 
811  bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc) override;
812 
813  void SetFrameRegister(unsigned RegNo) override;
814 
815  bool ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
816  SMLoc NameLoc, OperandVector &Operands) override;
817 
818  bool ParseDirective(AsmToken DirectiveID) override;
819 };
820 } // end anonymous namespace
821 
822 /// @name Auto-generated Match Functions
823 /// {
824 
825 static unsigned MatchRegisterName(StringRef Name);
826 
827 /// }
828 
829 static bool CheckBaseRegAndIndexReg(unsigned BaseReg, unsigned IndexReg,
830  StringRef &ErrMsg) {
831  // If we have both a base register and an index register make sure they are
832  // both 64-bit or 32-bit registers.
833  // To support VSIB, IndexReg can be 128-bit or 256-bit registers.
834  if (BaseReg != 0 && IndexReg != 0) {
835  if (X86MCRegisterClasses[X86::GR64RegClassID].contains(BaseReg) &&
836  (X86MCRegisterClasses[X86::GR16RegClassID].contains(IndexReg) ||
837  X86MCRegisterClasses[X86::GR32RegClassID].contains(IndexReg)) &&
838  IndexReg != X86::RIZ) {
839  ErrMsg = "base register is 64-bit, but index register is not";
840  return true;
841  }
842  if (X86MCRegisterClasses[X86::GR32RegClassID].contains(BaseReg) &&
843  (X86MCRegisterClasses[X86::GR16RegClassID].contains(IndexReg) ||
844  X86MCRegisterClasses[X86::GR64RegClassID].contains(IndexReg)) &&
845  IndexReg != X86::EIZ){
846  ErrMsg = "base register is 32-bit, but index register is not";
847  return true;
848  }
849  if (X86MCRegisterClasses[X86::GR16RegClassID].contains(BaseReg)) {
850  if (X86MCRegisterClasses[X86::GR32RegClassID].contains(IndexReg) ||
851  X86MCRegisterClasses[X86::GR64RegClassID].contains(IndexReg)) {
852  ErrMsg = "base register is 16-bit, but index register is not";
853  return true;
854  }
855  if (((BaseReg == X86::BX || BaseReg == X86::BP) &&
856  IndexReg != X86::SI && IndexReg != X86::DI) ||
857  ((BaseReg == X86::SI || BaseReg == X86::DI) &&
858  IndexReg != X86::BX && IndexReg != X86::BP)) {
859  ErrMsg = "invalid 16-bit base/index register combination";
860  return true;
861  }
862  }
863  }
864  return false;
865 }
866 
867 bool X86AsmParser::doSrcDstMatch(X86Operand &Op1, X86Operand &Op2)
868 {
869  // Return true and let a normal complaint about bogus operands happen.
870  if (!Op1.isMem() || !Op2.isMem())
871  return true;
872 
873  // Actually these might be the other way round if Intel syntax is
874  // being used. It doesn't matter.
875  unsigned diReg = Op1.Mem.BaseReg;
876  unsigned siReg = Op2.Mem.BaseReg;
877 
878  if (X86MCRegisterClasses[X86::GR16RegClassID].contains(siReg))
879  return X86MCRegisterClasses[X86::GR16RegClassID].contains(diReg);
880  if (X86MCRegisterClasses[X86::GR32RegClassID].contains(siReg))
881  return X86MCRegisterClasses[X86::GR32RegClassID].contains(diReg);
882  if (X86MCRegisterClasses[X86::GR64RegClassID].contains(siReg))
883  return X86MCRegisterClasses[X86::GR64RegClassID].contains(diReg);
884  // Again, return true and let another error happen.
885  return true;
886 }
887 
888 bool X86AsmParser::ParseRegister(unsigned &RegNo,
889  SMLoc &StartLoc, SMLoc &EndLoc) {
890  MCAsmParser &Parser = getParser();
891  RegNo = 0;
892  const AsmToken &PercentTok = Parser.getTok();
893  StartLoc = PercentTok.getLoc();
894 
895  // If we encounter a %, ignore it. This code handles registers with and
896  // without the prefix, unprefixed registers can occur in cfi directives.
897  if (!isParsingIntelSyntax() && PercentTok.is(AsmToken::Percent))
898  Parser.Lex(); // Eat percent token.
899 
900  const AsmToken &Tok = Parser.getTok();
901  EndLoc = Tok.getEndLoc();
902 
903  if (Tok.isNot(AsmToken::Identifier)) {
904  if (isParsingIntelSyntax()) return true;
905  return Error(StartLoc, "invalid register name",
906  SMRange(StartLoc, EndLoc));
907  }
908 
909  RegNo = MatchRegisterName(Tok.getString());
910 
911  // If the match failed, try the register name as lowercase.
912  if (RegNo == 0)
913  RegNo = MatchRegisterName(Tok.getString().lower());
914 
915  if (!is64BitMode()) {
916  // FIXME: This should be done using Requires<Not64BitMode> and
917  // Requires<In64BitMode> so "eiz" usage in 64-bit instructions can be also
918  // checked.
919  // FIXME: Check AH, CH, DH, BH cannot be used in an instruction requiring a
920  // REX prefix.
921  if (RegNo == X86::RIZ ||
922  X86MCRegisterClasses[X86::GR64RegClassID].contains(RegNo) ||
925  return Error(StartLoc, "register %"
926  + Tok.getString() + " is only available in 64-bit mode",
927  SMRange(StartLoc, EndLoc));
928  }
929 
930  // Parse "%st" as "%st(0)" and "%st(1)", which is multiple tokens.
931  if (RegNo == 0 && (Tok.getString() == "st" || Tok.getString() == "ST")) {
932  RegNo = X86::ST0;
933  Parser.Lex(); // Eat 'st'
934 
935  // Check to see if we have '(4)' after %st.
936  if (getLexer().isNot(AsmToken::LParen))
937  return false;
938  // Lex the paren.
939  getParser().Lex();
940 
941  const AsmToken &IntTok = Parser.getTok();
942  if (IntTok.isNot(AsmToken::Integer))
943  return Error(IntTok.getLoc(), "expected stack index");
944  switch (IntTok.getIntVal()) {
945  case 0: RegNo = X86::ST0; break;
946  case 1: RegNo = X86::ST1; break;
947  case 2: RegNo = X86::ST2; break;
948  case 3: RegNo = X86::ST3; break;
949  case 4: RegNo = X86::ST4; break;
950  case 5: RegNo = X86::ST5; break;
951  case 6: RegNo = X86::ST6; break;
952  case 7: RegNo = X86::ST7; break;
953  default: return Error(IntTok.getLoc(), "invalid stack index");
954  }
955 
956  if (getParser().Lex().isNot(AsmToken::RParen))
957  return Error(Parser.getTok().getLoc(), "expected ')'");
958 
959  EndLoc = Parser.getTok().getEndLoc();
960  Parser.Lex(); // Eat ')'
961  return false;
962  }
963 
964  EndLoc = Parser.getTok().getEndLoc();
965 
966  // If this is "db[0-7]", match it as an alias
967  // for dr[0-7].
968  if (RegNo == 0 && Tok.getString().size() == 3 &&
969  Tok.getString().startswith("db")) {
970  switch (Tok.getString()[2]) {
971  case '0': RegNo = X86::DR0; break;
972  case '1': RegNo = X86::DR1; break;
973  case '2': RegNo = X86::DR2; break;
974  case '3': RegNo = X86::DR3; break;
975  case '4': RegNo = X86::DR4; break;
976  case '5': RegNo = X86::DR5; break;
977  case '6': RegNo = X86::DR6; break;
978  case '7': RegNo = X86::DR7; break;
979  }
980 
981  if (RegNo != 0) {
982  EndLoc = Parser.getTok().getEndLoc();
983  Parser.Lex(); // Eat it.
984  return false;
985  }
986  }
987 
988  if (RegNo == 0) {
989  if (isParsingIntelSyntax()) return true;
990  return Error(StartLoc, "invalid register name",
991  SMRange(StartLoc, EndLoc));
992  }
993 
994  Parser.Lex(); // Eat identifier token.
995  return false;
996 }
997 
998 void X86AsmParser::SetFrameRegister(unsigned RegNo) {
999  Instrumentation->SetInitialFrameRegister(RegNo);
1000 }
1001 
1002 std::unique_ptr<X86Operand> X86AsmParser::DefaultMemSIOperand(SMLoc Loc) {
1003  unsigned basereg =
1004  is64BitMode() ? X86::RSI : (is32BitMode() ? X86::ESI : X86::SI);
1005  const MCExpr *Disp = MCConstantExpr::create(0, getContext());
1006  return X86Operand::CreateMem(getPointerWidth(), /*SegReg=*/0, Disp,
1007  /*BaseReg=*/basereg, /*IndexReg=*/0, /*Scale=*/1,
1008  Loc, Loc, 0);
1009 }
1010 
1011 std::unique_ptr<X86Operand> X86AsmParser::DefaultMemDIOperand(SMLoc Loc) {
1012  unsigned basereg =
1013  is64BitMode() ? X86::RDI : (is32BitMode() ? X86::EDI : X86::DI);
1014  const MCExpr *Disp = MCConstantExpr::create(0, getContext());
1015  return X86Operand::CreateMem(getPointerWidth(), /*SegReg=*/0, Disp,
1016  /*BaseReg=*/basereg, /*IndexReg=*/0, /*Scale=*/1,
1017  Loc, Loc, 0);
1018 }
1019 
1020 void X86AsmParser::AddDefaultSrcDestOperands(
1021  OperandVector& Operands, std::unique_ptr<llvm::MCParsedAsmOperand> &&Src,
1022  std::unique_ptr<llvm::MCParsedAsmOperand> &&Dst) {
1023  if (isParsingIntelSyntax()) {
1024  Operands.push_back(std::move(Dst));
1025  Operands.push_back(std::move(Src));
1026  }
1027  else {
1028  Operands.push_back(std::move(Src));
1029  Operands.push_back(std::move(Dst));
1030  }
1031 }
1032 
1033 std::unique_ptr<X86Operand> X86AsmParser::ParseOperand() {
1034  if (isParsingIntelSyntax())
1035  return ParseIntelOperand();
1036  return ParseATTOperand();
1037 }
1038 
1039 /// getIntelMemOperandSize - Return intel memory operand size.
1040 static unsigned getIntelMemOperandSize(StringRef OpStr) {
1041  unsigned Size = StringSwitch<unsigned>(OpStr)
1042  .Cases("BYTE", "byte", 8)
1043  .Cases("WORD", "word", 16)
1044  .Cases("DWORD", "dword", 32)
1045  .Cases("QWORD", "qword", 64)
1046  .Cases("XWORD", "xword", 80)
1047  .Cases("XMMWORD", "xmmword", 128)
1048  .Cases("YMMWORD", "ymmword", 256)
1049  .Cases("ZMMWORD", "zmmword", 512)
1050  .Cases("OPAQUE", "opaque", -1U) // needs to be non-zero, but doesn't matter
1051  .Default(0);
1052  return Size;
1053 }
1054 
1055 std::unique_ptr<X86Operand> X86AsmParser::CreateMemForInlineAsm(
1056  unsigned SegReg, const MCExpr *Disp, unsigned BaseReg, unsigned IndexReg,
1057  unsigned Scale, SMLoc Start, SMLoc End, unsigned Size, StringRef Identifier,
1058  InlineAsmIdentifierInfo &Info) {
1059  // If we found a decl other than a VarDecl, then assume it is a FuncDecl or
1060  // some other label reference.
1061  if (isa<MCSymbolRefExpr>(Disp) && Info.OpDecl && !Info.IsVarDecl) {
1062  // Insert an explicit size if the user didn't have one.
1063  if (!Size) {
1064  Size = getPointerWidth();
1065  InstInfo->AsmRewrites->push_back(AsmRewrite(AOK_SizeDirective, Start,
1066  /*Len=*/0, Size));
1067  }
1068 
1069  // Create an absolute memory reference in order to match against
1070  // instructions taking a PC relative operand.
1071  return X86Operand::CreateMem(getPointerWidth(), Disp, Start, End, Size,
1072  Identifier, Info.OpDecl);
1073  }
1074 
1075  // We either have a direct symbol reference, or an offset from a symbol. The
1076  // parser always puts the symbol on the LHS, so look there for size
1077  // calculation purposes.
1078  const MCBinaryExpr *BinOp = dyn_cast<MCBinaryExpr>(Disp);
1079  bool IsSymRef =
1080  isa<MCSymbolRefExpr>(BinOp ? BinOp->getLHS() : Disp);
1081  if (IsSymRef) {
1082  if (!Size) {
1083  Size = Info.Type * 8; // Size is in terms of bits in this context.
1084  if (Size)
1085  InstInfo->AsmRewrites->push_back(AsmRewrite(AOK_SizeDirective, Start,
1086  /*Len=*/0, Size));
1087  }
1088  }
1089 
1090  // When parsing inline assembly we set the base register to a non-zero value
1091  // if we don't know the actual value at this time. This is necessary to
1092  // get the matching correct in some cases.
1093  BaseReg = BaseReg ? BaseReg : 1;
1094  return X86Operand::CreateMem(getPointerWidth(), SegReg, Disp, BaseReg,
1095  IndexReg, Scale, Start, End, Size, Identifier,
1096  Info.OpDecl);
1097 }
1098 
1099 static void
1101  StringRef SymName, int64_t ImmDisp,
1102  int64_t FinalImmDisp, SMLoc &BracLoc,
1103  SMLoc &StartInBrac, SMLoc &End) {
1104  // Remove the '[' and ']' from the IR string.
1105  AsmRewrites->push_back(AsmRewrite(AOK_Skip, BracLoc, 1));
1106  AsmRewrites->push_back(AsmRewrite(AOK_Skip, End, 1));
1107 
1108  // If ImmDisp is non-zero, then we parsed a displacement before the
1109  // bracketed expression (i.e., ImmDisp [ BaseReg + Scale*IndexReg + Disp])
1110  // If ImmDisp doesn't match the displacement computed by the state machine
1111  // then we have an additional displacement in the bracketed expression.
1112  if (ImmDisp != FinalImmDisp) {
1113  if (ImmDisp) {
1114  // We have an immediate displacement before the bracketed expression.
1115  // Adjust this to match the final immediate displacement.
1116  bool Found = false;
1117  for (SmallVectorImpl<AsmRewrite>::iterator I = AsmRewrites->begin(),
1118  E = AsmRewrites->end(); I != E; ++I) {
1119  if ((*I).Loc.getPointer() > BracLoc.getPointer())
1120  continue;
1121  if ((*I).Kind == AOK_ImmPrefix || (*I).Kind == AOK_Imm) {
1122  assert (!Found && "ImmDisp already rewritten.");
1123  (*I).Kind = AOK_Imm;
1124  (*I).Len = BracLoc.getPointer() - (*I).Loc.getPointer();
1125  (*I).Val = FinalImmDisp;
1126  Found = true;
1127  break;
1128  }
1129  }
1130  assert (Found && "Unable to rewrite ImmDisp.");
1131  (void)Found;
1132  } else {
1133  // We have a symbolic and an immediate displacement, but no displacement
1134  // before the bracketed expression. Put the immediate displacement
1135  // before the bracketed expression.
1136  AsmRewrites->push_back(AsmRewrite(AOK_Imm, BracLoc, 0, FinalImmDisp));
1137  }
1138  }
1139  // Remove all the ImmPrefix rewrites within the brackets.
1140  for (SmallVectorImpl<AsmRewrite>::iterator I = AsmRewrites->begin(),
1141  E = AsmRewrites->end(); I != E; ++I) {
1142  if ((*I).Loc.getPointer() < StartInBrac.getPointer())
1143  continue;
1144  if ((*I).Kind == AOK_ImmPrefix)
1145  (*I).Kind = AOK_Delete;
1146  }
1147  const char *SymLocPtr = SymName.data();
1148  // Skip everything before the symbol.
1149  if (unsigned Len = SymLocPtr - StartInBrac.getPointer()) {
1150  assert(Len > 0 && "Expected a non-negative length.");
1151  AsmRewrites->push_back(AsmRewrite(AOK_Skip, StartInBrac, Len));
1152  }
1153  // Skip everything after the symbol.
1154  if (unsigned Len = End.getPointer() - (SymLocPtr + SymName.size())) {
1155  SMLoc Loc = SMLoc::getFromPointer(SymLocPtr + SymName.size());
1156  assert(Len > 0 && "Expected a non-negative length.");
1157  AsmRewrites->push_back(AsmRewrite(AOK_Skip, Loc, Len));
1158  }
1159 }
1160 
1161 bool X86AsmParser::ParseIntelExpression(IntelExprStateMachine &SM, SMLoc &End) {
1162  MCAsmParser &Parser = getParser();
1163  const AsmToken &Tok = Parser.getTok();
1164 
1165  bool Done = false;
1166  while (!Done) {
1167  bool UpdateLocLex = true;
1168 
1169  // The period in the dot operator (e.g., [ebx].foo.bar) is parsed as an
1170  // identifier. Don't try an parse it as a register.
1171  if (Tok.getString().startswith("."))
1172  break;
1173 
1174  // If we're parsing an immediate expression, we don't expect a '['.
1175  if (SM.getStopOnLBrac() && getLexer().getKind() == AsmToken::LBrac)
1176  break;
1177 
1178  AsmToken::TokenKind TK = getLexer().getKind();
1179  switch (TK) {
1180  default: {
1181  if (SM.isValidEndState()) {
1182  Done = true;
1183  break;
1184  }
1185  return Error(Tok.getLoc(), "unknown token in expression");
1186  }
1187  case AsmToken::EndOfStatement: {
1188  Done = true;
1189  break;
1190  }
1191  case AsmToken::String:
1192  case AsmToken::Identifier: {
1193  // This could be a register or a symbolic displacement.
1194  unsigned TmpReg;
1195  const MCExpr *Val;
1196  SMLoc IdentLoc = Tok.getLoc();
1197  StringRef Identifier = Tok.getString();
1198  if (TK != AsmToken::String && !ParseRegister(TmpReg, IdentLoc, End)) {
1199  SM.onRegister(TmpReg);
1200  UpdateLocLex = false;
1201  break;
1202  } else {
1203  if (!isParsingInlineAsm()) {
1204  if (getParser().parsePrimaryExpr(Val, End))
1205  return Error(Tok.getLoc(), "Unexpected identifier!");
1206  } else {
1207  // This is a dot operator, not an adjacent identifier.
1208  if (Identifier.find('.') != StringRef::npos) {
1209  return false;
1210  } else {
1211  InlineAsmIdentifierInfo &Info = SM.getIdentifierInfo();
1212  if (ParseIntelIdentifier(Val, Identifier, Info,
1213  /*Unevaluated=*/false, End))
1214  return true;
1215  }
1216  }
1217  SM.onIdentifierExpr(Val, Identifier);
1218  UpdateLocLex = false;
1219  break;
1220  }
1221  return Error(Tok.getLoc(), "Unexpected identifier!");
1222  }
1223  case AsmToken::Integer: {
1224  StringRef ErrMsg;
1225  if (isParsingInlineAsm() && SM.getAddImmPrefix())
1226  InstInfo->AsmRewrites->push_back(AsmRewrite(AOK_ImmPrefix,
1227  Tok.getLoc()));
1228  // Look for 'b' or 'f' following an Integer as a directional label
1229  SMLoc Loc = getTok().getLoc();
1230  int64_t IntVal = getTok().getIntVal();
1231  End = consumeToken();
1232  UpdateLocLex = false;
1233  if (getLexer().getKind() == AsmToken::Identifier) {
1234  StringRef IDVal = getTok().getString();
1235  if (IDVal == "f" || IDVal == "b") {
1236  MCSymbol *Sym =
1237  getContext().getDirectionalLocalSymbol(IntVal, IDVal == "b");
1239  const MCExpr *Val =
1240  MCSymbolRefExpr::create(Sym, Variant, getContext());
1241  if (IDVal == "b" && Sym->isUndefined())
1242  return Error(Loc, "invalid reference to undefined symbol");
1243  StringRef Identifier = Sym->getName();
1244  SM.onIdentifierExpr(Val, Identifier);
1245  End = consumeToken();
1246  } else {
1247  if (SM.onInteger(IntVal, ErrMsg))
1248  return Error(Loc, ErrMsg);
1249  }
1250  } else {
1251  if (SM.onInteger(IntVal, ErrMsg))
1252  return Error(Loc, ErrMsg);
1253  }
1254  break;
1255  }
1256  case AsmToken::Plus: SM.onPlus(); break;
1257  case AsmToken::Minus: SM.onMinus(); break;
1258  case AsmToken::Tilde: SM.onNot(); break;
1259  case AsmToken::Star: SM.onStar(); break;
1260  case AsmToken::Slash: SM.onDivide(); break;
1261  case AsmToken::Pipe: SM.onOr(); break;
1262  case AsmToken::Caret: SM.onXor(); break;
1263  case AsmToken::Amp: SM.onAnd(); break;
1264  case AsmToken::LessLess:
1265  SM.onLShift(); break;
1267  SM.onRShift(); break;
1268  case AsmToken::LBrac: SM.onLBrac(); break;
1269  case AsmToken::RBrac: SM.onRBrac(); break;
1270  case AsmToken::LParen: SM.onLParen(); break;
1271  case AsmToken::RParen: SM.onRParen(); break;
1272  }
1273  if (SM.hadError())
1274  return Error(Tok.getLoc(), "unknown token in expression");
1275 
1276  if (!Done && UpdateLocLex)
1277  End = consumeToken();
1278  }
1279  return false;
1280 }
1281 
1282 std::unique_ptr<X86Operand>
1283 X86AsmParser::ParseIntelBracExpression(unsigned SegReg, SMLoc Start,
1284  int64_t ImmDisp, unsigned Size) {
1285  MCAsmParser &Parser = getParser();
1286  const AsmToken &Tok = Parser.getTok();
1287  SMLoc BracLoc = Tok.getLoc(), End = Tok.getEndLoc();
1288  if (getLexer().isNot(AsmToken::LBrac))
1289  return ErrorOperand(BracLoc, "Expected '[' token!");
1290  Parser.Lex(); // Eat '['
1291 
1292  SMLoc StartInBrac = Tok.getLoc();
1293  // Parse [ Symbol + ImmDisp ] and [ BaseReg + Scale*IndexReg + ImmDisp ]. We
1294  // may have already parsed an immediate displacement before the bracketed
1295  // expression.
1296  IntelExprStateMachine SM(ImmDisp, /*StopOnLBrac=*/false, /*AddImmPrefix=*/true);
1297  if (ParseIntelExpression(SM, End))
1298  return nullptr;
1299 
1300  const MCExpr *Disp = nullptr;
1301  if (const MCExpr *Sym = SM.getSym()) {
1302  // A symbolic displacement.
1303  Disp = Sym;
1304  if (isParsingInlineAsm())
1305  RewriteIntelBracExpression(InstInfo->AsmRewrites, SM.getSymName(),
1306  ImmDisp, SM.getImm(), BracLoc, StartInBrac,
1307  End);
1308  }
1309 
1310  if (SM.getImm() || !Disp) {
1311  const MCExpr *Imm = MCConstantExpr::create(SM.getImm(), getContext());
1312  if (Disp)
1313  Disp = MCBinaryExpr::createAdd(Disp, Imm, getContext());
1314  else
1315  Disp = Imm; // An immediate displacement only.
1316  }
1317 
1318  // Parse struct field access. Intel requires a dot, but MSVC doesn't. MSVC
1319  // will in fact do global lookup the field name inside all global typedefs,
1320  // but we don't emulate that.
1321  if (Tok.getString().find('.') != StringRef::npos) {
1322  const MCExpr *NewDisp;
1323  if (ParseIntelDotOperator(Disp, NewDisp))
1324  return nullptr;
1325 
1326  End = Tok.getEndLoc();
1327  Parser.Lex(); // Eat the field.
1328  Disp = NewDisp;
1329  }
1330 
1331  int BaseReg = SM.getBaseReg();
1332  int IndexReg = SM.getIndexReg();
1333  int Scale = SM.getScale();
1334  if (!isParsingInlineAsm()) {
1335  // handle [-42]
1336  if (!BaseReg && !IndexReg) {
1337  if (!SegReg)
1338  return X86Operand::CreateMem(getPointerWidth(), Disp, Start, End, Size);
1339  return X86Operand::CreateMem(getPointerWidth(), SegReg, Disp, 0, 0, 1,
1340  Start, End, Size);
1341  }
1342  StringRef ErrMsg;
1343  if (CheckBaseRegAndIndexReg(BaseReg, IndexReg, ErrMsg)) {
1344  Error(StartInBrac, ErrMsg);
1345  return nullptr;
1346  }
1347  return X86Operand::CreateMem(getPointerWidth(), SegReg, Disp, BaseReg,
1348  IndexReg, Scale, Start, End, Size);
1349  }
1350 
1351  InlineAsmIdentifierInfo &Info = SM.getIdentifierInfo();
1352  return CreateMemForInlineAsm(SegReg, Disp, BaseReg, IndexReg, Scale, Start,
1353  End, Size, SM.getSymName(), Info);
1354 }
1355 
1356 // Inline assembly may use variable names with namespace alias qualifiers.
1357 bool X86AsmParser::ParseIntelIdentifier(const MCExpr *&Val,
1358  StringRef &Identifier,
1360  bool IsUnevaluatedOperand, SMLoc &End) {
1361  MCAsmParser &Parser = getParser();
1362  assert (isParsingInlineAsm() && "Expected to be parsing inline assembly.");
1363  Val = nullptr;
1364 
1365  StringRef LineBuf(Identifier.data());
1366  void *Result =
1367  SemaCallback->LookupInlineAsmIdentifier(LineBuf, Info, IsUnevaluatedOperand);
1368 
1369  const AsmToken &Tok = Parser.getTok();
1370  SMLoc Loc = Tok.getLoc();
1371 
1372  // Advance the token stream until the end of the current token is
1373  // after the end of what the frontend claimed.
1374  const char *EndPtr = Tok.getLoc().getPointer() + LineBuf.size();
1375  while (true) {
1376  End = Tok.getEndLoc();
1377  getLexer().Lex();
1378 
1379  assert(End.getPointer() <= EndPtr && "frontend claimed part of a token?");
1380  if (End.getPointer() == EndPtr) break;
1381  }
1382  Identifier = LineBuf;
1383 
1384  // If the identifier lookup was unsuccessful, assume that we are dealing with
1385  // a label.
1386  if (!Result) {
1387  StringRef InternalName =
1388  SemaCallback->LookupInlineAsmLabel(Identifier, getSourceManager(),
1389  Loc, false);
1390  assert(InternalName.size() && "We should have an internal name here.");
1391  // Push a rewrite for replacing the identifier name with the internal name.
1392  InstInfo->AsmRewrites->push_back(AsmRewrite(AOK_Label, Loc,
1393  Identifier.size(),
1394  InternalName));
1395  }
1396 
1397  // Create the symbol reference.
1398  MCSymbol *Sym = getContext().getOrCreateSymbol(Identifier);
1400  Val = MCSymbolRefExpr::create(Sym, Variant, getParser().getContext());
1401  return false;
1402 }
1403 
1404 /// \brief Parse intel style segment override.
1405 std::unique_ptr<X86Operand>
1406 X86AsmParser::ParseIntelSegmentOverride(unsigned SegReg, SMLoc Start,
1407  unsigned Size) {
1408  MCAsmParser &Parser = getParser();
1409  assert(SegReg != 0 && "Tried to parse a segment override without a segment!");
1410  const AsmToken &Tok = Parser.getTok(); // Eat colon.
1411  if (Tok.isNot(AsmToken::Colon))
1412  return ErrorOperand(Tok.getLoc(), "Expected ':' token!");
1413  Parser.Lex(); // Eat ':'
1414 
1415  int64_t ImmDisp = 0;
1416  if (getLexer().is(AsmToken::Integer)) {
1417  ImmDisp = Tok.getIntVal();
1418  AsmToken ImmDispToken = Parser.Lex(); // Eat the integer.
1419 
1420  if (isParsingInlineAsm())
1421  InstInfo->AsmRewrites->push_back(
1422  AsmRewrite(AOK_ImmPrefix, ImmDispToken.getLoc()));
1423 
1424  if (getLexer().isNot(AsmToken::LBrac)) {
1425  // An immediate following a 'segment register', 'colon' token sequence can
1426  // be followed by a bracketed expression. If it isn't we know we have our
1427  // final segment override.
1428  const MCExpr *Disp = MCConstantExpr::create(ImmDisp, getContext());
1429  return X86Operand::CreateMem(getPointerWidth(), SegReg, Disp,
1430  /*BaseReg=*/0, /*IndexReg=*/0, /*Scale=*/1,
1431  Start, ImmDispToken.getEndLoc(), Size);
1432  }
1433  }
1434 
1435  if (getLexer().is(AsmToken::LBrac))
1436  return ParseIntelBracExpression(SegReg, Start, ImmDisp, Size);
1437 
1438  const MCExpr *Val;
1439  SMLoc End;
1440  if (!isParsingInlineAsm()) {
1441  if (getParser().parsePrimaryExpr(Val, End))
1442  return ErrorOperand(Tok.getLoc(), "unknown token in expression");
1443 
1444  return X86Operand::CreateMem(getPointerWidth(), Val, Start, End, Size);
1445  }
1446 
1448  StringRef Identifier = Tok.getString();
1449  if (ParseIntelIdentifier(Val, Identifier, Info,
1450  /*Unevaluated=*/false, End))
1451  return nullptr;
1452  return CreateMemForInlineAsm(/*SegReg=*/0, Val, /*BaseReg=*/0,/*IndexReg=*/0,
1453  /*Scale=*/1, Start, End, Size, Identifier, Info);
1454 }
1455 
1456 //ParseRoundingModeOp - Parse AVX-512 rounding mode operand
1457 std::unique_ptr<X86Operand>
1458 X86AsmParser::ParseRoundingModeOp(SMLoc Start, SMLoc End) {
1459  MCAsmParser &Parser = getParser();
1460  const AsmToken &Tok = Parser.getTok();
1461  // Eat "{" and mark the current place.
1462  const SMLoc consumedToken = consumeToken();
1463  if (Tok.getIdentifier().startswith("r")){
1464  int rndMode = StringSwitch<int>(Tok.getIdentifier())
1469  .Default(-1);
1470  if (-1 == rndMode)
1471  return ErrorOperand(Tok.getLoc(), "Invalid rounding mode.");
1472  Parser.Lex(); // Eat "r*" of r*-sae
1473  if (!getLexer().is(AsmToken::Minus))
1474  return ErrorOperand(Tok.getLoc(), "Expected - at this point");
1475  Parser.Lex(); // Eat "-"
1476  Parser.Lex(); // Eat the sae
1477  if (!getLexer().is(AsmToken::RCurly))
1478  return ErrorOperand(Tok.getLoc(), "Expected } at this point");
1479  Parser.Lex(); // Eat "}"
1480  const MCExpr *RndModeOp =
1481  MCConstantExpr::create(rndMode, Parser.getContext());
1482  return X86Operand::CreateImm(RndModeOp, Start, End);
1483  }
1484  if(Tok.getIdentifier().equals("sae")){
1485  Parser.Lex(); // Eat the sae
1486  if (!getLexer().is(AsmToken::RCurly))
1487  return ErrorOperand(Tok.getLoc(), "Expected } at this point");
1488  Parser.Lex(); // Eat "}"
1489  return X86Operand::CreateToken("{sae}", consumedToken);
1490  }
1491  return ErrorOperand(Tok.getLoc(), "unknown token in expression");
1492 }
1493 /// ParseIntelMemOperand - Parse intel style memory operand.
1494 std::unique_ptr<X86Operand> X86AsmParser::ParseIntelMemOperand(int64_t ImmDisp,
1495  SMLoc Start,
1496  unsigned Size) {
1497  MCAsmParser &Parser = getParser();
1498  const AsmToken &Tok = Parser.getTok();
1499  SMLoc End;
1500 
1501  // Parse ImmDisp [ BaseReg + Scale*IndexReg + Disp ].
1502  if (getLexer().is(AsmToken::LBrac))
1503  return ParseIntelBracExpression(/*SegReg=*/0, Start, ImmDisp, Size);
1504  assert(ImmDisp == 0);
1505 
1506  const MCExpr *Val;
1507  if (!isParsingInlineAsm()) {
1508  if (getParser().parsePrimaryExpr(Val, End))
1509  return ErrorOperand(Tok.getLoc(), "unknown token in expression");
1510 
1511  return X86Operand::CreateMem(getPointerWidth(), Val, Start, End, Size);
1512  }
1513 
1515  StringRef Identifier = Tok.getString();
1516  if (ParseIntelIdentifier(Val, Identifier, Info,
1517  /*Unevaluated=*/false, End))
1518  return nullptr;
1519 
1520  if (!getLexer().is(AsmToken::LBrac))
1521  return CreateMemForInlineAsm(/*SegReg=*/0, Val, /*BaseReg=*/0, /*IndexReg=*/0,
1522  /*Scale=*/1, Start, End, Size, Identifier, Info);
1523 
1524  Parser.Lex(); // Eat '['
1525 
1526  // Parse Identifier [ ImmDisp ]
1527  IntelExprStateMachine SM(/*ImmDisp=*/0, /*StopOnLBrac=*/true,
1528  /*AddImmPrefix=*/false);
1529  if (ParseIntelExpression(SM, End))
1530  return nullptr;
1531 
1532  if (SM.getSym()) {
1533  Error(Start, "cannot use more than one symbol in memory operand");
1534  return nullptr;
1535  }
1536  if (SM.getBaseReg()) {
1537  Error(Start, "cannot use base register with variable reference");
1538  return nullptr;
1539  }
1540  if (SM.getIndexReg()) {
1541  Error(Start, "cannot use index register with variable reference");
1542  return nullptr;
1543  }
1544 
1545  const MCExpr *Disp = MCConstantExpr::create(SM.getImm(), getContext());
1546  // BaseReg is non-zero to avoid assertions. In the context of inline asm,
1547  // we're pointing to a local variable in memory, so the base register is
1548  // really the frame or stack pointer.
1549  return X86Operand::CreateMem(getPointerWidth(), /*SegReg=*/0, Disp,
1550  /*BaseReg=*/1, /*IndexReg=*/0, /*Scale=*/1,
1551  Start, End, Size, Identifier, Info.OpDecl);
1552 }
1553 
1554 /// Parse the '.' operator.
1555 bool X86AsmParser::ParseIntelDotOperator(const MCExpr *Disp,
1556  const MCExpr *&NewDisp) {
1557  MCAsmParser &Parser = getParser();
1558  const AsmToken &Tok = Parser.getTok();
1559  int64_t OrigDispVal, DotDispVal;
1560 
1561  // FIXME: Handle non-constant expressions.
1562  if (const MCConstantExpr *OrigDisp = dyn_cast<MCConstantExpr>(Disp))
1563  OrigDispVal = OrigDisp->getValue();
1564  else
1565  return Error(Tok.getLoc(), "Non-constant offsets are not supported!");
1566 
1567  // Drop the optional '.'.
1568  StringRef DotDispStr = Tok.getString();
1569  if (DotDispStr.startswith("."))
1570  DotDispStr = DotDispStr.drop_front(1);
1571 
1572  // .Imm gets lexed as a real.
1573  if (Tok.is(AsmToken::Real)) {
1574  APInt DotDisp;
1575  DotDispStr.getAsInteger(10, DotDisp);
1576  DotDispVal = DotDisp.getZExtValue();
1577  } else if (isParsingInlineAsm() && Tok.is(AsmToken::Identifier)) {
1578  unsigned DotDisp;
1579  std::pair<StringRef, StringRef> BaseMember = DotDispStr.split('.');
1580  if (SemaCallback->LookupInlineAsmField(BaseMember.first, BaseMember.second,
1581  DotDisp))
1582  return Error(Tok.getLoc(), "Unable to lookup field reference!");
1583  DotDispVal = DotDisp;
1584  } else
1585  return Error(Tok.getLoc(), "Unexpected token type!");
1586 
1587  if (isParsingInlineAsm() && Tok.is(AsmToken::Identifier)) {
1588  SMLoc Loc = SMLoc::getFromPointer(DotDispStr.data());
1589  unsigned Len = DotDispStr.size();
1590  unsigned Val = OrigDispVal + DotDispVal;
1591  InstInfo->AsmRewrites->push_back(AsmRewrite(AOK_DotOperator, Loc, Len,
1592  Val));
1593  }
1594 
1595  NewDisp = MCConstantExpr::create(OrigDispVal + DotDispVal, getContext());
1596  return false;
1597 }
1598 
1599 /// Parse the 'offset' operator. This operator is used to specify the
1600 /// location rather then the content of a variable.
1601 std::unique_ptr<X86Operand> X86AsmParser::ParseIntelOffsetOfOperator() {
1602  MCAsmParser &Parser = getParser();
1603  const AsmToken &Tok = Parser.getTok();
1604  SMLoc OffsetOfLoc = Tok.getLoc();
1605  Parser.Lex(); // Eat offset.
1606 
1607  const MCExpr *Val;
1609  SMLoc Start = Tok.getLoc(), End;
1610  StringRef Identifier = Tok.getString();
1611  if (ParseIntelIdentifier(Val, Identifier, Info,
1612  /*Unevaluated=*/false, End))
1613  return nullptr;
1614 
1615  // Don't emit the offset operator.
1616  InstInfo->AsmRewrites->push_back(AsmRewrite(AOK_Skip, OffsetOfLoc, 7));
1617 
1618  // The offset operator will have an 'r' constraint, thus we need to create
1619  // register operand to ensure proper matching. Just pick a GPR based on
1620  // the size of a pointer.
1621  unsigned RegNo =
1622  is64BitMode() ? X86::RBX : (is32BitMode() ? X86::EBX : X86::BX);
1623  return X86Operand::CreateReg(RegNo, Start, End, /*GetAddress=*/true,
1624  OffsetOfLoc, Identifier, Info.OpDecl);
1625 }
1626 
1631 };
1632 
1633 /// Parse the 'LENGTH', 'TYPE' and 'SIZE' operators. The LENGTH operator
1634 /// returns the number of elements in an array. It returns the value 1 for
1635 /// non-array variables. The SIZE operator returns the size of a C or C++
1636 /// variable. A variable's size is the product of its LENGTH and TYPE. The
1637 /// TYPE operator returns the size of a C or C++ type or variable. If the
1638 /// variable is an array, TYPE returns the size of a single element.
1639 std::unique_ptr<X86Operand> X86AsmParser::ParseIntelOperator(unsigned OpKind) {
1640  MCAsmParser &Parser = getParser();
1641  const AsmToken &Tok = Parser.getTok();
1642  SMLoc TypeLoc = Tok.getLoc();
1643  Parser.Lex(); // Eat operator.
1644 
1645  const MCExpr *Val = nullptr;
1647  SMLoc Start = Tok.getLoc(), End;
1648  StringRef Identifier = Tok.getString();
1649  if (ParseIntelIdentifier(Val, Identifier, Info,
1650  /*Unevaluated=*/true, End))
1651  return nullptr;
1652 
1653  if (!Info.OpDecl)
1654  return ErrorOperand(Start, "unable to lookup expression");
1655 
1656  unsigned CVal = 0;
1657  switch(OpKind) {
1658  default: llvm_unreachable("Unexpected operand kind!");
1659  case IOK_LENGTH: CVal = Info.Length; break;
1660  case IOK_SIZE: CVal = Info.Size; break;
1661  case IOK_TYPE: CVal = Info.Type; break;
1662  }
1663 
1664  // Rewrite the type operator and the C or C++ type or variable in terms of an
1665  // immediate. E.g. TYPE foo -> $$4
1666  unsigned Len = End.getPointer() - TypeLoc.getPointer();
1667  InstInfo->AsmRewrites->push_back(AsmRewrite(AOK_Imm, TypeLoc, Len, CVal));
1668 
1669  const MCExpr *Imm = MCConstantExpr::create(CVal, getContext());
1670  return X86Operand::CreateImm(Imm, Start, End);
1671 }
1672 
1673 std::unique_ptr<X86Operand> X86AsmParser::ParseIntelOperand() {
1674  MCAsmParser &Parser = getParser();
1675  const AsmToken &Tok = Parser.getTok();
1676  SMLoc Start, End;
1677 
1678  // Offset, length, type and size operators.
1679  if (isParsingInlineAsm()) {
1680  StringRef AsmTokStr = Tok.getString();
1681  if (AsmTokStr == "offset" || AsmTokStr == "OFFSET")
1682  return ParseIntelOffsetOfOperator();
1683  if (AsmTokStr == "length" || AsmTokStr == "LENGTH")
1684  return ParseIntelOperator(IOK_LENGTH);
1685  if (AsmTokStr == "size" || AsmTokStr == "SIZE")
1686  return ParseIntelOperator(IOK_SIZE);
1687  if (AsmTokStr == "type" || AsmTokStr == "TYPE")
1688  return ParseIntelOperator(IOK_TYPE);
1689  }
1690 
1691  unsigned Size = getIntelMemOperandSize(Tok.getString());
1692  if (Size) {
1693  Parser.Lex(); // Eat operand size (e.g., byte, word).
1694  if (Tok.getString() != "PTR" && Tok.getString() != "ptr")
1695  return ErrorOperand(Tok.getLoc(), "Expected 'PTR' or 'ptr' token!");
1696  Parser.Lex(); // Eat ptr.
1697  }
1698  Start = Tok.getLoc();
1699 
1700  // Immediate.
1701  if (getLexer().is(AsmToken::Integer) || getLexer().is(AsmToken::Minus) ||
1702  getLexer().is(AsmToken::Tilde) || getLexer().is(AsmToken::LParen)) {
1703  AsmToken StartTok = Tok;
1704  IntelExprStateMachine SM(/*Imm=*/0, /*StopOnLBrac=*/true,
1705  /*AddImmPrefix=*/false);
1706  if (ParseIntelExpression(SM, End))
1707  return nullptr;
1708 
1709  int64_t Imm = SM.getImm();
1710  if (isParsingInlineAsm()) {
1711  unsigned Len = Tok.getLoc().getPointer() - Start.getPointer();
1712  if (StartTok.getString().size() == Len)
1713  // Just add a prefix if this wasn't a complex immediate expression.
1714  InstInfo->AsmRewrites->push_back(AsmRewrite(AOK_ImmPrefix, Start));
1715  else
1716  // Otherwise, rewrite the complex expression as a single immediate.
1717  InstInfo->AsmRewrites->push_back(AsmRewrite(AOK_Imm, Start, Len, Imm));
1718  }
1719 
1720  if (getLexer().isNot(AsmToken::LBrac)) {
1721  // If a directional label (ie. 1f or 2b) was parsed above from
1722  // ParseIntelExpression() then SM.getSym() was set to a pointer to
1723  // to the MCExpr with the directional local symbol and this is a
1724  // memory operand not an immediate operand.
1725  if (SM.getSym())
1726  return X86Operand::CreateMem(getPointerWidth(), SM.getSym(), Start, End,
1727  Size);
1728 
1729  const MCExpr *ImmExpr = MCConstantExpr::create(Imm, getContext());
1730  return X86Operand::CreateImm(ImmExpr, Start, End);
1731  }
1732 
1733  // Only positive immediates are valid.
1734  if (Imm < 0)
1735  return ErrorOperand(Start, "expected a positive immediate displacement "
1736  "before bracketed expr.");
1737 
1738  // Parse ImmDisp [ BaseReg + Scale*IndexReg + Disp ].
1739  return ParseIntelMemOperand(Imm, Start, Size);
1740  }
1741 
1742  // rounding mode token
1743  if (STI.getFeatureBits()[X86::FeatureAVX512] &&
1744  getLexer().is(AsmToken::LCurly))
1745  return ParseRoundingModeOp(Start, End);
1746 
1747  // Register.
1748  unsigned RegNo = 0;
1749  if (!ParseRegister(RegNo, Start, End)) {
1750  // If this is a segment register followed by a ':', then this is the start
1751  // of a segment override, otherwise this is a normal register reference.
1752  if (getLexer().isNot(AsmToken::Colon))
1753  return X86Operand::CreateReg(RegNo, Start, End);
1754 
1755  return ParseIntelSegmentOverride(/*SegReg=*/RegNo, Start, Size);
1756  }
1757 
1758  // Memory operand.
1759  return ParseIntelMemOperand(/*Disp=*/0, Start, Size);
1760 }
1761 
1762 std::unique_ptr<X86Operand> X86AsmParser::ParseATTOperand() {
1763  MCAsmParser &Parser = getParser();
1764  switch (getLexer().getKind()) {
1765  default:
1766  // Parse a memory operand with no segment register.
1767  return ParseMemOperand(0, Parser.getTok().getLoc());
1768  case AsmToken::Percent: {
1769  // Read the register.
1770  unsigned RegNo;
1771  SMLoc Start, End;
1772  if (ParseRegister(RegNo, Start, End)) return nullptr;
1773  if (RegNo == X86::EIZ || RegNo == X86::RIZ) {
1774  Error(Start, "%eiz and %riz can only be used as index registers",
1775  SMRange(Start, End));
1776  return nullptr;
1777  }
1778 
1779  // If this is a segment register followed by a ':', then this is the start
1780  // of a memory reference, otherwise this is a normal register reference.
1781  if (getLexer().isNot(AsmToken::Colon))
1782  return X86Operand::CreateReg(RegNo, Start, End);
1783 
1784  if (!X86MCRegisterClasses[X86::SEGMENT_REGRegClassID].contains(RegNo))
1785  return ErrorOperand(Start, "invalid segment register");
1786 
1787  getParser().Lex(); // Eat the colon.
1788  return ParseMemOperand(RegNo, Start);
1789  }
1790  case AsmToken::Dollar: {
1791  // $42 -> immediate.
1792  SMLoc Start = Parser.getTok().getLoc(), End;
1793  Parser.Lex();
1794  const MCExpr *Val;
1795  if (getParser().parseExpression(Val, End))
1796  return nullptr;
1797  return X86Operand::CreateImm(Val, Start, End);
1798  }
1799  case AsmToken::LCurly:{
1800  SMLoc Start = Parser.getTok().getLoc(), End;
1801  if (STI.getFeatureBits()[X86::FeatureAVX512])
1802  return ParseRoundingModeOp(Start, End);
1803  return ErrorOperand(Start, "unknown token in expression");
1804  }
1805  }
1806 }
1807 
1808 bool X86AsmParser::HandleAVX512Operand(OperandVector &Operands,
1809  const MCParsedAsmOperand &Op) {
1810  MCAsmParser &Parser = getParser();
1811  if(STI.getFeatureBits()[X86::FeatureAVX512]) {
1812  if (getLexer().is(AsmToken::LCurly)) {
1813  // Eat "{" and mark the current place.
1814  const SMLoc consumedToken = consumeToken();
1815  // Distinguish {1to<NUM>} from {%k<NUM>}.
1816  if(getLexer().is(AsmToken::Integer)) {
1817  // Parse memory broadcasting ({1to<NUM>}).
1818  if (getLexer().getTok().getIntVal() != 1)
1819  return !ErrorAndEatStatement(getLexer().getLoc(),
1820  "Expected 1to<NUM> at this point");
1821  Parser.Lex(); // Eat "1" of 1to8
1822  if (!getLexer().is(AsmToken::Identifier) ||
1823  !getLexer().getTok().getIdentifier().startswith("to"))
1824  return !ErrorAndEatStatement(getLexer().getLoc(),
1825  "Expected 1to<NUM> at this point");
1826  // Recognize only reasonable suffixes.
1827  const char *BroadcastPrimitive =
1828  StringSwitch<const char*>(getLexer().getTok().getIdentifier())
1829  .Case("to2", "{1to2}")
1830  .Case("to4", "{1to4}")
1831  .Case("to8", "{1to8}")
1832  .Case("to16", "{1to16}")
1833  .Default(nullptr);
1834  if (!BroadcastPrimitive)
1835  return !ErrorAndEatStatement(getLexer().getLoc(),
1836  "Invalid memory broadcast primitive.");
1837  Parser.Lex(); // Eat "toN" of 1toN
1838  if (!getLexer().is(AsmToken::RCurly))
1839  return !ErrorAndEatStatement(getLexer().getLoc(),
1840  "Expected } at this point");
1841  Parser.Lex(); // Eat "}"
1842  Operands.push_back(X86Operand::CreateToken(BroadcastPrimitive,
1843  consumedToken));
1844  // No AVX512 specific primitives can pass
1845  // after memory broadcasting, so return.
1846  return true;
1847  } else {
1848  // Parse mask register {%k1}
1849  Operands.push_back(X86Operand::CreateToken("{", consumedToken));
1850  if (std::unique_ptr<X86Operand> Op = ParseOperand()) {
1851  Operands.push_back(std::move(Op));
1852  if (!getLexer().is(AsmToken::RCurly))
1853  return !ErrorAndEatStatement(getLexer().getLoc(),
1854  "Expected } at this point");
1855  Operands.push_back(X86Operand::CreateToken("}", consumeToken()));
1856 
1857  // Parse "zeroing non-masked" semantic {z}
1858  if (getLexer().is(AsmToken::LCurly)) {
1859  Operands.push_back(X86Operand::CreateToken("{z}", consumeToken()));
1860  if (!getLexer().is(AsmToken::Identifier) ||
1861  getLexer().getTok().getIdentifier() != "z")
1862  return !ErrorAndEatStatement(getLexer().getLoc(),
1863  "Expected z at this point");
1864  Parser.Lex(); // Eat the z
1865  if (!getLexer().is(AsmToken::RCurly))
1866  return !ErrorAndEatStatement(getLexer().getLoc(),
1867  "Expected } at this point");
1868  Parser.Lex(); // Eat the }
1869  }
1870  }
1871  }
1872  }
1873  }
1874  return true;
1875 }
1876 
1877 /// ParseMemOperand: segment: disp(basereg, indexreg, scale). The '%ds:' prefix
1878 /// has already been parsed if present.
1879 std::unique_ptr<X86Operand> X86AsmParser::ParseMemOperand(unsigned SegReg,
1880  SMLoc MemStart) {
1881 
1882  MCAsmParser &Parser = getParser();
1883  // We have to disambiguate a parenthesized expression "(4+5)" from the start
1884  // of a memory operand with a missing displacement "(%ebx)" or "(,%eax)". The
1885  // only way to do this without lookahead is to eat the '(' and see what is
1886  // after it.
1887  const MCExpr *Disp = MCConstantExpr::create(0, getParser().getContext());
1888  if (getLexer().isNot(AsmToken::LParen)) {
1889  SMLoc ExprEnd;
1890  if (getParser().parseExpression(Disp, ExprEnd)) return nullptr;
1891 
1892  // After parsing the base expression we could either have a parenthesized
1893  // memory address or not. If not, return now. If so, eat the (.
1894  if (getLexer().isNot(AsmToken::LParen)) {
1895  // Unless we have a segment register, treat this as an immediate.
1896  if (SegReg == 0)
1897  return X86Operand::CreateMem(getPointerWidth(), Disp, MemStart, ExprEnd);
1898  return X86Operand::CreateMem(getPointerWidth(), SegReg, Disp, 0, 0, 1,
1899  MemStart, ExprEnd);
1900  }
1901 
1902  // Eat the '('.
1903  Parser.Lex();
1904  } else {
1905  // Okay, we have a '('. We don't know if this is an expression or not, but
1906  // so we have to eat the ( to see beyond it.
1907  SMLoc LParenLoc = Parser.getTok().getLoc();
1908  Parser.Lex(); // Eat the '('.
1909 
1910  if (getLexer().is(AsmToken::Percent) || getLexer().is(AsmToken::Comma)) {
1911  // Nothing to do here, fall into the code below with the '(' part of the
1912  // memory operand consumed.
1913  } else {
1914  SMLoc ExprEnd;
1915 
1916  // It must be an parenthesized expression, parse it now.
1917  if (getParser().parseParenExpression(Disp, ExprEnd))
1918  return nullptr;
1919 
1920  // After parsing the base expression we could either have a parenthesized
1921  // memory address or not. If not, return now. If so, eat the (.
1922  if (getLexer().isNot(AsmToken::LParen)) {
1923  // Unless we have a segment register, treat this as an immediate.
1924  if (SegReg == 0)
1925  return X86Operand::CreateMem(getPointerWidth(), Disp, LParenLoc,
1926  ExprEnd);
1927  return X86Operand::CreateMem(getPointerWidth(), SegReg, Disp, 0, 0, 1,
1928  MemStart, ExprEnd);
1929  }
1930 
1931  // Eat the '('.
1932  Parser.Lex();
1933  }
1934  }
1935 
1936  // If we reached here, then we just ate the ( of the memory operand. Process
1937  // the rest of the memory operand.
1938  unsigned BaseReg = 0, IndexReg = 0, Scale = 1;
1939  SMLoc IndexLoc, BaseLoc;
1940 
1941  if (getLexer().is(AsmToken::Percent)) {
1942  SMLoc StartLoc, EndLoc;
1943  BaseLoc = Parser.getTok().getLoc();
1944  if (ParseRegister(BaseReg, StartLoc, EndLoc)) return nullptr;
1945  if (BaseReg == X86::EIZ || BaseReg == X86::RIZ) {
1946  Error(StartLoc, "eiz and riz can only be used as index registers",
1947  SMRange(StartLoc, EndLoc));
1948  return nullptr;
1949  }
1950  }
1951 
1952  if (getLexer().is(AsmToken::Comma)) {
1953  Parser.Lex(); // Eat the comma.
1954  IndexLoc = Parser.getTok().getLoc();
1955 
1956  // Following the comma we should have either an index register, or a scale
1957  // value. We don't support the later form, but we want to parse it
1958  // correctly.
1959  //
1960  // Not that even though it would be completely consistent to support syntax
1961  // like "1(%eax,,1)", the assembler doesn't. Use "eiz" or "riz" for this.
1962  if (getLexer().is(AsmToken::Percent)) {
1963  SMLoc L;
1964  if (ParseRegister(IndexReg, L, L)) return nullptr;
1965 
1966  if (getLexer().isNot(AsmToken::RParen)) {
1967  // Parse the scale amount:
1968  // ::= ',' [scale-expression]
1969  if (getLexer().isNot(AsmToken::Comma)) {
1970  Error(Parser.getTok().getLoc(),
1971  "expected comma in scale expression");
1972  return nullptr;
1973  }
1974  Parser.Lex(); // Eat the comma.
1975 
1976  if (getLexer().isNot(AsmToken::RParen)) {
1977  SMLoc Loc = Parser.getTok().getLoc();
1978 
1979  int64_t ScaleVal;
1980  if (getParser().parseAbsoluteExpression(ScaleVal)){
1981  Error(Loc, "expected scale expression");
1982  return nullptr;
1983  }
1984 
1985  // Validate the scale amount.
1986  if (X86MCRegisterClasses[X86::GR16RegClassID].contains(BaseReg) &&
1987  ScaleVal != 1) {
1988  Error(Loc, "scale factor in 16-bit address must be 1");
1989  return nullptr;
1990  }
1991  if (ScaleVal != 1 && ScaleVal != 2 && ScaleVal != 4 && ScaleVal != 8){
1992  Error(Loc, "scale factor in address must be 1, 2, 4 or 8");
1993  return nullptr;
1994  }
1995  Scale = (unsigned)ScaleVal;
1996  }
1997  }
1998  } else if (getLexer().isNot(AsmToken::RParen)) {
1999  // A scale amount without an index is ignored.
2000  // index.
2001  SMLoc Loc = Parser.getTok().getLoc();
2002 
2003  int64_t Value;
2004  if (getParser().parseAbsoluteExpression(Value))
2005  return nullptr;
2006 
2007  if (Value != 1)
2008  Warning(Loc, "scale factor without index register is ignored");
2009  Scale = 1;
2010  }
2011  }
2012 
2013  // Ok, we've eaten the memory operand, verify we have a ')' and eat it too.
2014  if (getLexer().isNot(AsmToken::RParen)) {
2015  Error(Parser.getTok().getLoc(), "unexpected token in memory operand");
2016  return nullptr;
2017  }
2018  SMLoc MemEnd = Parser.getTok().getEndLoc();
2019  Parser.Lex(); // Eat the ')'.
2020 
2021  // Check for use of invalid 16-bit registers. Only BX/BP/SI/DI are allowed,
2022  // and then only in non-64-bit modes. Except for DX, which is a special case
2023  // because an unofficial form of in/out instructions uses it.
2024  if (X86MCRegisterClasses[X86::GR16RegClassID].contains(BaseReg) &&
2025  (is64BitMode() || (BaseReg != X86::BX && BaseReg != X86::BP &&
2026  BaseReg != X86::SI && BaseReg != X86::DI)) &&
2027  BaseReg != X86::DX) {
2028  Error(BaseLoc, "invalid 16-bit base register");
2029  return nullptr;
2030  }
2031  if (BaseReg == 0 &&
2032  X86MCRegisterClasses[X86::GR16RegClassID].contains(IndexReg)) {
2033  Error(IndexLoc, "16-bit memory operand may not include only index register");
2034  return nullptr;
2035  }
2036 
2037  StringRef ErrMsg;
2038  if (CheckBaseRegAndIndexReg(BaseReg, IndexReg, ErrMsg)) {
2039  Error(BaseLoc, ErrMsg);
2040  return nullptr;
2041  }
2042 
2043  if (SegReg || BaseReg || IndexReg)
2044  return X86Operand::CreateMem(getPointerWidth(), SegReg, Disp, BaseReg,
2045  IndexReg, Scale, MemStart, MemEnd);
2046  return X86Operand::CreateMem(getPointerWidth(), Disp, MemStart, MemEnd);
2047 }
2048 
2049 bool X86AsmParser::ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
2050  SMLoc NameLoc, OperandVector &Operands) {
2051  MCAsmParser &Parser = getParser();
2052  InstInfo = &Info;
2053  StringRef PatchedName = Name;
2054 
2055  // FIXME: Hack to recognize setneb as setne.
2056  if (PatchedName.startswith("set") && PatchedName.endswith("b") &&
2057  PatchedName != "setb" && PatchedName != "setnb")
2058  PatchedName = PatchedName.substr(0, Name.size()-1);
2059 
2060  // FIXME: Hack to recognize cmp<comparison code>{ss,sd,ps,pd}.
2061  if ((PatchedName.startswith("cmp") || PatchedName.startswith("vcmp")) &&
2062  (PatchedName.endswith("ss") || PatchedName.endswith("sd") ||
2063  PatchedName.endswith("ps") || PatchedName.endswith("pd"))) {
2064  bool IsVCMP = PatchedName[0] == 'v';
2065  unsigned CCIdx = IsVCMP ? 4 : 3;
2066  unsigned ComparisonCode = StringSwitch<unsigned>(
2067  PatchedName.slice(CCIdx, PatchedName.size() - 2))
2068  .Case("eq", 0x00)
2069  .Case("lt", 0x01)
2070  .Case("le", 0x02)
2071  .Case("unord", 0x03)
2072  .Case("neq", 0x04)
2073  .Case("nlt", 0x05)
2074  .Case("nle", 0x06)
2075  .Case("ord", 0x07)
2076  /* AVX only from here */
2077  .Case("eq_uq", 0x08)
2078  .Case("nge", 0x09)
2079  .Case("ngt", 0x0A)
2080  .Case("false", 0x0B)
2081  .Case("neq_oq", 0x0C)
2082  .Case("ge", 0x0D)
2083  .Case("gt", 0x0E)
2084  .Case("true", 0x0F)
2085  .Case("eq_os", 0x10)
2086  .Case("lt_oq", 0x11)
2087  .Case("le_oq", 0x12)
2088  .Case("unord_s", 0x13)
2089  .Case("neq_us", 0x14)
2090  .Case("nlt_uq", 0x15)
2091  .Case("nle_uq", 0x16)
2092  .Case("ord_s", 0x17)
2093  .Case("eq_us", 0x18)
2094  .Case("nge_uq", 0x19)
2095  .Case("ngt_uq", 0x1A)
2096  .Case("false_os", 0x1B)
2097  .Case("neq_os", 0x1C)
2098  .Case("ge_oq", 0x1D)
2099  .Case("gt_oq", 0x1E)
2100  .Case("true_us", 0x1F)
2101  .Default(~0U);
2102  if (ComparisonCode != ~0U && (IsVCMP || ComparisonCode < 8)) {
2103 
2104  Operands.push_back(X86Operand::CreateToken(PatchedName.slice(0, CCIdx),
2105  NameLoc));
2106 
2107  const MCExpr *ImmOp = MCConstantExpr::create(ComparisonCode,
2108  getParser().getContext());
2109  Operands.push_back(X86Operand::CreateImm(ImmOp, NameLoc, NameLoc));
2110 
2111  PatchedName = PatchedName.substr(PatchedName.size() - 2);
2112  }
2113  }
2114 
2115  // FIXME: Hack to recognize vpcmp<comparison code>{ub,uw,ud,uq,b,w,d,q}.
2116  if (PatchedName.startswith("vpcmp") &&
2117  (PatchedName.endswith("b") || PatchedName.endswith("w") ||
2118  PatchedName.endswith("d") || PatchedName.endswith("q"))) {
2119  unsigned CCIdx = PatchedName.drop_back().back() == 'u' ? 2 : 1;
2120  unsigned ComparisonCode = StringSwitch<unsigned>(
2121  PatchedName.slice(5, PatchedName.size() - CCIdx))
2122  .Case("eq", 0x0) // Only allowed on unsigned. Checked below.
2123  .Case("lt", 0x1)
2124  .Case("le", 0x2)
2125  //.Case("false", 0x3) // Not a documented alias.
2126  .Case("neq", 0x4)
2127  .Case("nlt", 0x5)
2128  .Case("nle", 0x6)
2129  //.Case("true", 0x7) // Not a documented alias.
2130  .Default(~0U);
2131  if (ComparisonCode != ~0U && (ComparisonCode != 0 || CCIdx == 2)) {
2132  Operands.push_back(X86Operand::CreateToken("vpcmp", NameLoc));
2133 
2134  const MCExpr *ImmOp = MCConstantExpr::create(ComparisonCode,
2135  getParser().getContext());
2136  Operands.push_back(X86Operand::CreateImm(ImmOp, NameLoc, NameLoc));
2137 
2138  PatchedName = PatchedName.substr(PatchedName.size() - CCIdx);
2139  }
2140  }
2141 
2142  // FIXME: Hack to recognize vpcom<comparison code>{ub,uw,ud,uq,b,w,d,q}.
2143  if (PatchedName.startswith("vpcom") &&
2144  (PatchedName.endswith("b") || PatchedName.endswith("w") ||
2145  PatchedName.endswith("d") || PatchedName.endswith("q"))) {
2146  unsigned CCIdx = PatchedName.drop_back().back() == 'u' ? 2 : 1;
2147  unsigned ComparisonCode = StringSwitch<unsigned>(
2148  PatchedName.slice(5, PatchedName.size() - CCIdx))
2149  .Case("lt", 0x0)
2150  .Case("le", 0x1)
2151  .Case("gt", 0x2)
2152  .Case("ge", 0x3)
2153  .Case("eq", 0x4)
2154  .Case("neq", 0x5)
2155  .Case("false", 0x6)
2156  .Case("true", 0x7)
2157  .Default(~0U);
2158  if (ComparisonCode != ~0U) {
2159  Operands.push_back(X86Operand::CreateToken("vpcom", NameLoc));
2160 
2161  const MCExpr *ImmOp = MCConstantExpr::create(ComparisonCode,
2162  getParser().getContext());
2163  Operands.push_back(X86Operand::CreateImm(ImmOp, NameLoc, NameLoc));
2164 
2165  PatchedName = PatchedName.substr(PatchedName.size() - CCIdx);
2166  }
2167  }
2168 
2169  Operands.push_back(X86Operand::CreateToken(PatchedName, NameLoc));
2170 
2171  // Determine whether this is an instruction prefix.
2172  bool isPrefix =
2173  Name == "lock" || Name == "rep" ||
2174  Name == "repe" || Name == "repz" ||
2175  Name == "repne" || Name == "repnz" ||
2176  Name == "rex64" || Name == "data16";
2177 
2178 
2179  // This does the actual operand parsing. Don't parse any more if we have a
2180  // prefix juxtaposed with an operation like "lock incl 4(%rax)", because we
2181  // just want to parse the "lock" as the first instruction and the "incl" as
2182  // the next one.
2183  if (getLexer().isNot(AsmToken::EndOfStatement) && !isPrefix) {
2184 
2185  // Parse '*' modifier.
2186  if (getLexer().is(AsmToken::Star))
2187  Operands.push_back(X86Operand::CreateToken("*", consumeToken()));
2188 
2189  // Read the operands.
2190  while(1) {
2191  if (std::unique_ptr<X86Operand> Op = ParseOperand()) {
2192  Operands.push_back(std::move(Op));
2193  if (!HandleAVX512Operand(Operands, *Operands.back()))
2194  return true;
2195  } else {
2196  Parser.eatToEndOfStatement();
2197  return true;
2198  }
2199  // check for comma and eat it
2200  if (getLexer().is(AsmToken::Comma))
2201  Parser.Lex();
2202  else
2203  break;
2204  }
2205 
2206  if (getLexer().isNot(AsmToken::EndOfStatement))
2207  return ErrorAndEatStatement(getLexer().getLoc(),
2208  "unexpected token in argument list");
2209  }
2210 
2211  // Consume the EndOfStatement or the prefix separator Slash
2212  if (getLexer().is(AsmToken::EndOfStatement) ||
2213  (isPrefix && getLexer().is(AsmToken::Slash)))
2214  Parser.Lex();
2215 
2216  // This is a terrible hack to handle "out[bwl]? %al, (%dx)" ->
2217  // "outb %al, %dx". Out doesn't take a memory form, but this is a widely
2218  // documented form in various unofficial manuals, so a lot of code uses it.
2219  if ((Name == "outb" || Name == "outw" || Name == "outl" || Name == "out") &&
2220  Operands.size() == 3) {
2221  X86Operand &Op = (X86Operand &)*Operands.back();
2222  if (Op.isMem() && Op.Mem.SegReg == 0 &&
2223  isa<MCConstantExpr>(Op.Mem.Disp) &&
2224  cast<MCConstantExpr>(Op.Mem.Disp)->getValue() == 0 &&
2225  Op.Mem.BaseReg == MatchRegisterName("dx") && Op.Mem.IndexReg == 0) {
2226  SMLoc Loc = Op.getEndLoc();
2227  Operands.back() = X86Operand::CreateReg(Op.Mem.BaseReg, Loc, Loc);
2228  }
2229  }
2230  // Same hack for "in[bwl]? (%dx), %al" -> "inb %dx, %al".
2231  if ((Name == "inb" || Name == "inw" || Name == "inl" || Name == "in") &&
2232  Operands.size() == 3) {
2233  X86Operand &Op = (X86Operand &)*Operands[1];
2234  if (Op.isMem() && Op.Mem.SegReg == 0 &&
2235  isa<MCConstantExpr>(Op.Mem.Disp) &&
2236  cast<MCConstantExpr>(Op.Mem.Disp)->getValue() == 0 &&
2237  Op.Mem.BaseReg == MatchRegisterName("dx") && Op.Mem.IndexReg == 0) {
2238  SMLoc Loc = Op.getEndLoc();
2239  Operands[1] = X86Operand::CreateReg(Op.Mem.BaseReg, Loc, Loc);
2240  }
2241  }
2242 
2243  // Append default arguments to "ins[bwld]"
2244  if (Name.startswith("ins") && Operands.size() == 1 &&
2245  (Name == "insb" || Name == "insw" || Name == "insl" ||
2246  Name == "insd" )) {
2247  AddDefaultSrcDestOperands(Operands,
2248  X86Operand::CreateReg(X86::DX, NameLoc, NameLoc),
2249  DefaultMemDIOperand(NameLoc));
2250  }
2251 
2252  // Append default arguments to "outs[bwld]"
2253  if (Name.startswith("outs") && Operands.size() == 1 &&
2254  (Name == "outsb" || Name == "outsw" || Name == "outsl" ||
2255  Name == "outsd" )) {
2256  AddDefaultSrcDestOperands(Operands,
2257  DefaultMemSIOperand(NameLoc),
2258  X86Operand::CreateReg(X86::DX, NameLoc, NameLoc));
2259  }
2260 
2261  // Transform "lods[bwlq]" into "lods[bwlq] ($SIREG)" for appropriate
2262  // values of $SIREG according to the mode. It would be nice if this
2263  // could be achieved with InstAlias in the tables.
2264  if (Name.startswith("lods") && Operands.size() == 1 &&
2265  (Name == "lods" || Name == "lodsb" || Name == "lodsw" ||
2266  Name == "lodsl" || Name == "lodsd" || Name == "lodsq"))
2267  Operands.push_back(DefaultMemSIOperand(NameLoc));
2268 
2269  // Transform "stos[bwlq]" into "stos[bwlq] ($DIREG)" for appropriate
2270  // values of $DIREG according to the mode. It would be nice if this
2271  // could be achieved with InstAlias in the tables.
2272  if (Name.startswith("stos") && Operands.size() == 1 &&
2273  (Name == "stos" || Name == "stosb" || Name == "stosw" ||
2274  Name == "stosl" || Name == "stosd" || Name == "stosq"))
2275  Operands.push_back(DefaultMemDIOperand(NameLoc));
2276 
2277  // Transform "scas[bwlq]" into "scas[bwlq] ($DIREG)" for appropriate
2278  // values of $DIREG according to the mode. It would be nice if this
2279  // could be achieved with InstAlias in the tables.
2280  if (Name.startswith("scas") && Operands.size() == 1 &&
2281  (Name == "scas" || Name == "scasb" || Name == "scasw" ||
2282  Name == "scasl" || Name == "scasd" || Name == "scasq"))
2283  Operands.push_back(DefaultMemDIOperand(NameLoc));
2284 
2285  // Add default SI and DI operands to "cmps[bwlq]".
2286  if (Name.startswith("cmps") &&
2287  (Name == "cmps" || Name == "cmpsb" || Name == "cmpsw" ||
2288  Name == "cmpsl" || Name == "cmpsd" || Name == "cmpsq")) {
2289  if (Operands.size() == 1) {
2290  AddDefaultSrcDestOperands(Operands,
2291  DefaultMemDIOperand(NameLoc),
2292  DefaultMemSIOperand(NameLoc));
2293  } else if (Operands.size() == 3) {
2294  X86Operand &Op = (X86Operand &)*Operands[1];
2295  X86Operand &Op2 = (X86Operand &)*Operands[2];
2296  if (!doSrcDstMatch(Op, Op2))
2297  return Error(Op.getStartLoc(),
2298  "mismatching source and destination index registers");
2299  }
2300  }
2301 
2302  // Add default SI and DI operands to "movs[bwlq]".
2303  if ((Name.startswith("movs") &&
2304  (Name == "movs" || Name == "movsb" || Name == "movsw" ||
2305  Name == "movsl" || Name == "movsd" || Name == "movsq")) ||
2306  (Name.startswith("smov") &&
2307  (Name == "smov" || Name == "smovb" || Name == "smovw" ||
2308  Name == "smovl" || Name == "smovd" || Name == "smovq"))) {
2309  if (Operands.size() == 1) {
2310  if (Name == "movsd")
2311  Operands.back() = X86Operand::CreateToken("movsl", NameLoc);
2312  AddDefaultSrcDestOperands(Operands,
2313  DefaultMemSIOperand(NameLoc),
2314  DefaultMemDIOperand(NameLoc));
2315  } else if (Operands.size() == 3) {
2316  X86Operand &Op = (X86Operand &)*Operands[1];
2317  X86Operand &Op2 = (X86Operand &)*Operands[2];
2318  if (!doSrcDstMatch(Op, Op2))
2319  return Error(Op.getStartLoc(),
2320  "mismatching source and destination index registers");
2321  }
2322  }
2323 
2324  // FIXME: Hack to handle recognize s{hr,ar,hl} $1, <op>. Canonicalize to
2325  // "shift <op>".
2326  if ((Name.startswith("shr") || Name.startswith("sar") ||
2327  Name.startswith("shl") || Name.startswith("sal") ||
2328  Name.startswith("rcl") || Name.startswith("rcr") ||
2329  Name.startswith("rol") || Name.startswith("ror")) &&
2330  Operands.size() == 3) {
2331  if (isParsingIntelSyntax()) {
2332  // Intel syntax
2333  X86Operand &Op1 = static_cast<X86Operand &>(*Operands[2]);
2334  if (Op1.isImm() && isa<MCConstantExpr>(Op1.getImm()) &&
2335  cast<MCConstantExpr>(Op1.getImm())->getValue() == 1)
2336  Operands.pop_back();
2337  } else {
2338  X86Operand &Op1 = static_cast<X86Operand &>(*Operands[1]);
2339  if (Op1.isImm() && isa<MCConstantExpr>(Op1.getImm()) &&
2340  cast<MCConstantExpr>(Op1.getImm())->getValue() == 1)
2341  Operands.erase(Operands.begin() + 1);
2342  }
2343  }
2344 
2345  // Transforms "int $3" into "int3" as a size optimization. We can't write an
2346  // instalias with an immediate operand yet.
2347  if (Name == "int" && Operands.size() == 2) {
2348  X86Operand &Op1 = static_cast<X86Operand &>(*Operands[1]);
2349  if (Op1.isImm() && isa<MCConstantExpr>(Op1.getImm()) &&
2350  cast<MCConstantExpr>(Op1.getImm())->getValue() == 3) {
2351  Operands.erase(Operands.begin() + 1);
2352  static_cast<X86Operand &>(*Operands[0]).setTokenValue("int3");
2353  }
2354  }
2355 
2356  return false;
2357 }
2358 
2359 static bool convertToSExti8(MCInst &Inst, unsigned Opcode, unsigned Reg,
2360  bool isCmp) {
2361  MCInst TmpInst;
2362  TmpInst.setOpcode(Opcode);
2363  if (!isCmp)
2364  TmpInst.addOperand(MCOperand::createReg(Reg));
2365  TmpInst.addOperand(MCOperand::createReg(Reg));
2366  TmpInst.addOperand(Inst.getOperand(0));
2367  Inst = TmpInst;
2368  return true;
2369 }
2370 
2371 static bool convert16i16to16ri8(MCInst &Inst, unsigned Opcode,
2372  bool isCmp = false) {
2373  if (!Inst.getOperand(0).isImm() ||
2375  return false;
2376 
2377  return convertToSExti8(Inst, Opcode, X86::AX, isCmp);
2378 }
2379 
2380 static bool convert32i32to32ri8(MCInst &Inst, unsigned Opcode,
2381  bool isCmp = false) {
2382  if (!Inst.getOperand(0).isImm() ||
2384  return false;
2385 
2386  return convertToSExti8(Inst, Opcode, X86::EAX, isCmp);
2387 }
2388 
2389 static bool convert64i32to64ri8(MCInst &Inst, unsigned Opcode,
2390  bool isCmp = false) {
2391  if (!Inst.getOperand(0).isImm() ||
2393  return false;
2394 
2395  return convertToSExti8(Inst, Opcode, X86::RAX, isCmp);
2396 }
2397 
2398 bool X86AsmParser::validateInstruction(MCInst &Inst, const OperandVector &Ops) {
2399  switch (Inst.getOpcode()) {
2400  default: return true;
2401  case X86::INT:
2402  X86Operand &Op = static_cast<X86Operand &>(*Ops[1]);
2403  assert(Op.isImm() && "expected immediate");
2404  int64_t Res;
2405  if (!Op.getImm()->evaluateAsAbsolute(Res) || Res > 255) {
2406  Error(Op.getStartLoc(), "interrupt vector must be in range [0-255]");
2407  return false;
2408  }
2409  return true;
2410  }
2411  llvm_unreachable("handle the instruction appropriately");
2412 }
2413 
2414 bool X86AsmParser::processInstruction(MCInst &Inst, const OperandVector &Ops) {
2415  switch (Inst.getOpcode()) {
2416  default: return false;
2417  case X86::AND16i16: return convert16i16to16ri8(Inst, X86::AND16ri8);
2418  case X86::AND32i32: return convert32i32to32ri8(Inst, X86::AND32ri8);
2419  case X86::AND64i32: return convert64i32to64ri8(Inst, X86::AND64ri8);
2420  case X86::XOR16i16: return convert16i16to16ri8(Inst, X86::XOR16ri8);
2421  case X86::XOR32i32: return convert32i32to32ri8(Inst, X86::XOR32ri8);
2422  case X86::XOR64i32: return convert64i32to64ri8(Inst, X86::XOR64ri8);
2423  case X86::OR16i16: return convert16i16to16ri8(Inst, X86::OR16ri8);
2424  case X86::OR32i32: return convert32i32to32ri8(Inst, X86::OR32ri8);
2425  case X86::OR64i32: return convert64i32to64ri8(Inst, X86::OR64ri8);
2426  case X86::CMP16i16: return convert16i16to16ri8(Inst, X86::CMP16ri8, true);
2427  case X86::CMP32i32: return convert32i32to32ri8(Inst, X86::CMP32ri8, true);
2428  case X86::CMP64i32: return convert64i32to64ri8(Inst, X86::CMP64ri8, true);
2429  case X86::ADD16i16: return convert16i16to16ri8(Inst, X86::ADD16ri8);
2430  case X86::ADD32i32: return convert32i32to32ri8(Inst, X86::ADD32ri8);
2431  case X86::ADD64i32: return convert64i32to64ri8(Inst, X86::ADD64ri8);
2432  case X86::SUB16i16: return convert16i16to16ri8(Inst, X86::SUB16ri8);
2433  case X86::SUB32i32: return convert32i32to32ri8(Inst, X86::SUB32ri8);
2434  case X86::SUB64i32: return convert64i32to64ri8(Inst, X86::SUB64ri8);
2435  case X86::ADC16i16: return convert16i16to16ri8(Inst, X86::ADC16ri8);
2436  case X86::ADC32i32: return convert32i32to32ri8(Inst, X86::ADC32ri8);
2437  case X86::ADC64i32: return convert64i32to64ri8(Inst, X86::ADC64ri8);
2438  case X86::SBB16i16: return convert16i16to16ri8(Inst, X86::SBB16ri8);
2439  case X86::SBB32i32: return convert32i32to32ri8(Inst, X86::SBB32ri8);
2440  case X86::SBB64i32: return convert64i32to64ri8(Inst, X86::SBB64ri8);
2441  case X86::VMOVAPDrr:
2442  case X86::VMOVAPDYrr:
2443  case X86::VMOVAPSrr:
2444  case X86::VMOVAPSYrr:
2445  case X86::VMOVDQArr:
2446  case X86::VMOVDQAYrr:
2447  case X86::VMOVDQUrr:
2448  case X86::VMOVDQUYrr:
2449  case X86::VMOVUPDrr:
2450  case X86::VMOVUPDYrr:
2451  case X86::VMOVUPSrr:
2452  case X86::VMOVUPSYrr: {
2453  if (X86II::isX86_64ExtendedReg(Inst.getOperand(0).getReg()) ||
2455  return false;
2456 
2457  unsigned NewOpc;
2458  switch (Inst.getOpcode()) {
2459  default: llvm_unreachable("Invalid opcode");
2460  case X86::VMOVAPDrr: NewOpc = X86::VMOVAPDrr_REV; break;
2461  case X86::VMOVAPDYrr: NewOpc = X86::VMOVAPDYrr_REV; break;
2462  case X86::VMOVAPSrr: NewOpc = X86::VMOVAPSrr_REV; break;
2463  case X86::VMOVAPSYrr: NewOpc = X86::VMOVAPSYrr_REV; break;
2464  case X86::VMOVDQArr: NewOpc = X86::VMOVDQArr_REV; break;
2465  case X86::VMOVDQAYrr: NewOpc = X86::VMOVDQAYrr_REV; break;
2466  case X86::VMOVDQUrr: NewOpc = X86::VMOVDQUrr_REV; break;
2467  case X86::VMOVDQUYrr: NewOpc = X86::VMOVDQUYrr_REV; break;
2468  case X86::VMOVUPDrr: NewOpc = X86::VMOVUPDrr_REV; break;
2469  case X86::VMOVUPDYrr: NewOpc = X86::VMOVUPDYrr_REV; break;
2470  case X86::VMOVUPSrr: NewOpc = X86::VMOVUPSrr_REV; break;
2471  case X86::VMOVUPSYrr: NewOpc = X86::VMOVUPSYrr_REV; break;
2472  }
2473  Inst.setOpcode(NewOpc);
2474  return true;
2475  }
2476  case X86::VMOVSDrr:
2477  case X86::VMOVSSrr: {
2478  if (X86II::isX86_64ExtendedReg(Inst.getOperand(0).getReg()) ||
2480  return false;
2481  unsigned NewOpc;
2482  switch (Inst.getOpcode()) {
2483  default: llvm_unreachable("Invalid opcode");
2484  case X86::VMOVSDrr: NewOpc = X86::VMOVSDrr_REV; break;
2485  case X86::VMOVSSrr: NewOpc = X86::VMOVSSrr_REV; break;
2486  }
2487  Inst.setOpcode(NewOpc);
2488  return true;
2489  }
2490  }
2491 }
2492 
2493 static const char *getSubtargetFeatureName(uint64_t Val);
2494 
2495 void X86AsmParser::EmitInstruction(MCInst &Inst, OperandVector &Operands,
2496  MCStreamer &Out) {
2497  Instrumentation->InstrumentAndEmitInstruction(Inst, Operands, getContext(),
2498  MII, Out);
2499 }
2500 
2501 bool X86AsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
2502  OperandVector &Operands,
2503  MCStreamer &Out, uint64_t &ErrorInfo,
2504  bool MatchingInlineAsm) {
2505  if (isParsingIntelSyntax())
2506  return MatchAndEmitIntelInstruction(IDLoc, Opcode, Operands, Out, ErrorInfo,
2507  MatchingInlineAsm);
2508  return MatchAndEmitATTInstruction(IDLoc, Opcode, Operands, Out, ErrorInfo,
2509  MatchingInlineAsm);
2510 }
2511 
2512 void X86AsmParser::MatchFPUWaitAlias(SMLoc IDLoc, X86Operand &Op,
2513  OperandVector &Operands, MCStreamer &Out,
2514  bool MatchingInlineAsm) {
2515  // FIXME: This should be replaced with a real .td file alias mechanism.
2516  // Also, MatchInstructionImpl should actually *do* the EmitInstruction
2517  // call.
2518  const char *Repl = StringSwitch<const char *>(Op.getToken())
2519  .Case("finit", "fninit")
2520  .Case("fsave", "fnsave")
2521  .Case("fstcw", "fnstcw")
2522  .Case("fstcww", "fnstcw")
2523  .Case("fstenv", "fnstenv")
2524  .Case("fstsw", "fnstsw")
2525  .Case("fstsww", "fnstsw")
2526  .Case("fclex", "fnclex")
2527  .Default(nullptr);
2528  if (Repl) {
2529  MCInst Inst;
2530  Inst.setOpcode(X86::WAIT);
2531  Inst.setLoc(IDLoc);
2532  if (!MatchingInlineAsm)
2533  EmitInstruction(Inst, Operands, Out);
2534  Operands[0] = X86Operand::CreateToken(Repl, IDLoc);
2535  }
2536 }
2537 
2538 bool X86AsmParser::ErrorMissingFeature(SMLoc IDLoc, uint64_t ErrorInfo,
2539  bool MatchingInlineAsm) {
2540  assert(ErrorInfo && "Unknown missing feature!");
2541  ArrayRef<SMRange> EmptyRanges = None;
2542  SmallString<126> Msg;
2543  raw_svector_ostream OS(Msg);
2544  OS << "instruction requires:";
2545  uint64_t Mask = 1;
2546  for (unsigned i = 0; i < (sizeof(ErrorInfo)*8-1); ++i) {
2547  if (ErrorInfo & Mask)
2548  OS << ' ' << getSubtargetFeatureName(ErrorInfo & Mask);
2549  Mask <<= 1;
2550  }
2551  return Error(IDLoc, OS.str(), EmptyRanges, MatchingInlineAsm);
2552 }
2553 
2554 bool X86AsmParser::MatchAndEmitATTInstruction(SMLoc IDLoc, unsigned &Opcode,
2555  OperandVector &Operands,
2556  MCStreamer &Out,
2557  uint64_t &ErrorInfo,
2558  bool MatchingInlineAsm) {
2559  assert(!Operands.empty() && "Unexpect empty operand list!");
2560  X86Operand &Op = static_cast<X86Operand &>(*Operands[0]);
2561  assert(Op.isToken() && "Leading operand should always be a mnemonic!");
2562  ArrayRef<SMRange> EmptyRanges = None;
2563 
2564  // First, handle aliases that expand to multiple instructions.
2565  MatchFPUWaitAlias(IDLoc, Op, Operands, Out, MatchingInlineAsm);
2566 
2567  bool WasOriginallyInvalidOperand = false;
2568  MCInst Inst;
2569 
2570  // First, try a direct match.
2571  switch (MatchInstructionImpl(Operands, Inst,
2572  ErrorInfo, MatchingInlineAsm,
2573  isParsingIntelSyntax())) {
2574  default: llvm_unreachable("Unexpected match result!");
2575  case Match_Success:
2576  if (!validateInstruction(Inst, Operands))
2577  return true;
2578 
2579  // Some instructions need post-processing to, for example, tweak which
2580  // encoding is selected. Loop on it while changes happen so the
2581  // individual transformations can chain off each other.
2582  if (!MatchingInlineAsm)
2583  while (processInstruction(Inst, Operands))
2584  ;
2585 
2586  Inst.setLoc(IDLoc);
2587  if (!MatchingInlineAsm)
2588  EmitInstruction(Inst, Operands, Out);
2589  Opcode = Inst.getOpcode();
2590  return false;
2591  case Match_MissingFeature:
2592  return ErrorMissingFeature(IDLoc, ErrorInfo, MatchingInlineAsm);
2593  case Match_InvalidOperand:
2594  WasOriginallyInvalidOperand = true;
2595  break;
2596  case Match_MnemonicFail:
2597  break;
2598  }
2599 
2600  // FIXME: Ideally, we would only attempt suffix matches for things which are
2601  // valid prefixes, and we could just infer the right unambiguous
2602  // type. However, that requires substantially more matcher support than the
2603  // following hack.
2604 
2605  // Change the operand to point to a temporary token.
2606  StringRef Base = Op.getToken();
2607  SmallString<16> Tmp;
2608  Tmp += Base;
2609  Tmp += ' ';
2610  Op.setTokenValue(Tmp);
2611 
2612  // If this instruction starts with an 'f', then it is a floating point stack
2613  // instruction. These come in up to three forms for 32-bit, 64-bit, and
2614  // 80-bit floating point, which use the suffixes s,l,t respectively.
2615  //
2616  // Otherwise, we assume that this may be an integer instruction, which comes
2617  // in 8/16/32/64-bit forms using the b,w,l,q suffixes respectively.
2618  const char *Suffixes = Base[0] != 'f' ? "bwlq" : "slt\0";
2619 
2620  // Check for the various suffix matches.
2621  uint64_t ErrorInfoIgnore;
2622  uint64_t ErrorInfoMissingFeature = 0; // Init suppresses compiler warnings.
2623  unsigned Match[4];
2624 
2625  for (unsigned I = 0, E = array_lengthof(Match); I != E; ++I) {
2626  Tmp.back() = Suffixes[I];
2627  Match[I] = MatchInstructionImpl(Operands, Inst, ErrorInfoIgnore,
2628  MatchingInlineAsm, isParsingIntelSyntax());
2629  // If this returned as a missing feature failure, remember that.
2630  if (Match[I] == Match_MissingFeature)
2631  ErrorInfoMissingFeature = ErrorInfoIgnore;
2632  }
2633 
2634  // Restore the old token.
2635  Op.setTokenValue(Base);
2636 
2637  // If exactly one matched, then we treat that as a successful match (and the
2638  // instruction will already have been filled in correctly, since the failing
2639  // matches won't have modified it).
2640  unsigned NumSuccessfulMatches =
2641  std::count(std::begin(Match), std::end(Match), Match_Success);
2642  if (NumSuccessfulMatches == 1) {
2643  Inst.setLoc(IDLoc);
2644  if (!MatchingInlineAsm)
2645  EmitInstruction(Inst, Operands, Out);
2646  Opcode = Inst.getOpcode();
2647  return false;
2648  }
2649 
2650  // Otherwise, the match failed, try to produce a decent error message.
2651 
2652  // If we had multiple suffix matches, then identify this as an ambiguous
2653  // match.
2654  if (NumSuccessfulMatches > 1) {
2655  char MatchChars[4];
2656  unsigned NumMatches = 0;
2657  for (unsigned I = 0, E = array_lengthof(Match); I != E; ++I)
2658  if (Match[I] == Match_Success)
2659  MatchChars[NumMatches++] = Suffixes[I];
2660 
2661  SmallString<126> Msg;
2662  raw_svector_ostream OS(Msg);
2663  OS << "ambiguous instructions require an explicit suffix (could be ";
2664  for (unsigned i = 0; i != NumMatches; ++i) {
2665  if (i != 0)
2666  OS << ", ";
2667  if (i + 1 == NumMatches)
2668  OS << "or ";
2669  OS << "'" << Base << MatchChars[i] << "'";
2670  }
2671  OS << ")";
2672  Error(IDLoc, OS.str(), EmptyRanges, MatchingInlineAsm);
2673  return true;
2674  }
2675 
2676  // Okay, we know that none of the variants matched successfully.
2677 
2678  // If all of the instructions reported an invalid mnemonic, then the original
2679  // mnemonic was invalid.
2680  if (std::count(std::begin(Match), std::end(Match), Match_MnemonicFail) == 4) {
2681  if (!WasOriginallyInvalidOperand) {
2683  MatchingInlineAsm ? EmptyRanges : Op.getLocRange();
2684  return Error(IDLoc, "invalid instruction mnemonic '" + Base + "'",
2685  Ranges, MatchingInlineAsm);
2686  }
2687 
2688  // Recover location info for the operand if we know which was the problem.
2689  if (ErrorInfo != ~0ULL) {
2690  if (ErrorInfo >= Operands.size())
2691  return Error(IDLoc, "too few operands for instruction",
2692  EmptyRanges, MatchingInlineAsm);
2693 
2694  X86Operand &Operand = (X86Operand &)*Operands[ErrorInfo];
2695  if (Operand.getStartLoc().isValid()) {
2696  SMRange OperandRange = Operand.getLocRange();
2697  return Error(Operand.getStartLoc(), "invalid operand for instruction",
2698  OperandRange, MatchingInlineAsm);
2699  }
2700  }
2701 
2702  return Error(IDLoc, "invalid operand for instruction", EmptyRanges,
2703  MatchingInlineAsm);
2704  }
2705 
2706  // If one instruction matched with a missing feature, report this as a
2707  // missing feature.
2708  if (std::count(std::begin(Match), std::end(Match),
2709  Match_MissingFeature) == 1) {
2710  ErrorInfo = ErrorInfoMissingFeature;
2711  return ErrorMissingFeature(IDLoc, ErrorInfoMissingFeature,
2712  MatchingInlineAsm);
2713  }
2714 
2715  // If one instruction matched with an invalid operand, report this as an
2716  // operand failure.
2717  if (std::count(std::begin(Match), std::end(Match),
2718  Match_InvalidOperand) == 1) {
2719  return Error(IDLoc, "invalid operand for instruction", EmptyRanges,
2720  MatchingInlineAsm);
2721  }
2722 
2723  // If all of these were an outright failure, report it in a useless way.
2724  Error(IDLoc, "unknown use of instruction mnemonic without a size suffix",
2725  EmptyRanges, MatchingInlineAsm);
2726  return true;
2727 }
2728 
2729 bool X86AsmParser::MatchAndEmitIntelInstruction(SMLoc IDLoc, unsigned &Opcode,
2730  OperandVector &Operands,
2731  MCStreamer &Out,
2732  uint64_t &ErrorInfo,
2733  bool MatchingInlineAsm) {
2734  assert(!Operands.empty() && "Unexpect empty operand list!");
2735  X86Operand &Op = static_cast<X86Operand &>(*Operands[0]);
2736  assert(Op.isToken() && "Leading operand should always be a mnemonic!");
2737  StringRef Mnemonic = Op.getToken();
2738  ArrayRef<SMRange> EmptyRanges = None;
2739 
2740  // First, handle aliases that expand to multiple instructions.
2741  MatchFPUWaitAlias(IDLoc, Op, Operands, Out, MatchingInlineAsm);
2742 
2743  MCInst Inst;
2744 
2745  // Find one unsized memory operand, if present.
2746  X86Operand *UnsizedMemOp = nullptr;
2747  for (const auto &Op : Operands) {
2748  X86Operand *X86Op = static_cast<X86Operand *>(Op.get());
2749  if (X86Op->isMemUnsized())
2750  UnsizedMemOp = X86Op;
2751  }
2752 
2753  // Allow some instructions to have implicitly pointer-sized operands. This is
2754  // compatible with gas.
2755  if (UnsizedMemOp) {
2756  static const char *const PtrSizedInstrs[] = {"call", "jmp", "push"};
2757  for (const char *Instr : PtrSizedInstrs) {
2758  if (Mnemonic == Instr) {
2759  UnsizedMemOp->Mem.Size = getPointerWidth();
2760  break;
2761  }
2762  }
2763  }
2764 
2765  // If an unsized memory operand is present, try to match with each memory
2766  // operand size. In Intel assembly, the size is not part of the instruction
2767  // mnemonic.
2769  uint64_t ErrorInfoMissingFeature = 0;
2770  if (UnsizedMemOp && UnsizedMemOp->isMemUnsized()) {
2771  static const unsigned MopSizes[] = {8, 16, 32, 64, 80, 128, 256, 512};
2772  for (unsigned Size : MopSizes) {
2773  UnsizedMemOp->Mem.Size = Size;
2774  uint64_t ErrorInfoIgnore;
2775  unsigned LastOpcode = Inst.getOpcode();
2776  unsigned M =
2777  MatchInstructionImpl(Operands, Inst, ErrorInfoIgnore,
2778  MatchingInlineAsm, isParsingIntelSyntax());
2779  if (Match.empty() || LastOpcode != Inst.getOpcode())
2780  Match.push_back(M);
2781 
2782  // If this returned as a missing feature failure, remember that.
2783  if (Match.back() == Match_MissingFeature)
2784  ErrorInfoMissingFeature = ErrorInfoIgnore;
2785  }
2786 
2787  // Restore the size of the unsized memory operand if we modified it.
2788  if (UnsizedMemOp)
2789  UnsizedMemOp->Mem.Size = 0;
2790  }
2791 
2792  // If we haven't matched anything yet, this is not a basic integer or FPU
2793  // operation. There shouldn't be any ambiguity in our mnemonic table, so try
2794  // matching with the unsized operand.
2795  if (Match.empty()) {
2796  Match.push_back(MatchInstructionImpl(Operands, Inst, ErrorInfo,
2797  MatchingInlineAsm,
2798  isParsingIntelSyntax()));
2799  // If this returned as a missing feature failure, remember that.
2800  if (Match.back() == Match_MissingFeature)
2801  ErrorInfoMissingFeature = ErrorInfo;
2802  }
2803 
2804  // Restore the size of the unsized memory operand if we modified it.
2805  if (UnsizedMemOp)
2806  UnsizedMemOp->Mem.Size = 0;
2807 
2808  // If it's a bad mnemonic, all results will be the same.
2809  if (Match.back() == Match_MnemonicFail) {
2810  ArrayRef<SMRange> Ranges =
2811  MatchingInlineAsm ? EmptyRanges : Op.getLocRange();
2812  return Error(IDLoc, "invalid instruction mnemonic '" + Mnemonic + "'",
2813  Ranges, MatchingInlineAsm);
2814  }
2815 
2816  // If exactly one matched, then we treat that as a successful match (and the
2817  // instruction will already have been filled in correctly, since the failing
2818  // matches won't have modified it).
2819  unsigned NumSuccessfulMatches =
2820  std::count(std::begin(Match), std::end(Match), Match_Success);
2821  if (NumSuccessfulMatches == 1) {
2822  if (!validateInstruction(Inst, Operands))
2823  return true;
2824 
2825  // Some instructions need post-processing to, for example, tweak which
2826  // encoding is selected. Loop on it while changes happen so the individual
2827  // transformations can chain off each other.
2828  if (!MatchingInlineAsm)
2829  while (processInstruction(Inst, Operands))
2830  ;
2831  Inst.setLoc(IDLoc);
2832  if (!MatchingInlineAsm)
2833  EmitInstruction(Inst, Operands, Out);
2834  Opcode = Inst.getOpcode();
2835  return false;
2836  } else if (NumSuccessfulMatches > 1) {
2837  assert(UnsizedMemOp &&
2838  "multiple matches only possible with unsized memory operands");
2839  ArrayRef<SMRange> Ranges =
2840  MatchingInlineAsm ? EmptyRanges : UnsizedMemOp->getLocRange();
2841  return Error(UnsizedMemOp->getStartLoc(),
2842  "ambiguous operand size for instruction '" + Mnemonic + "\'",
2843  Ranges, MatchingInlineAsm);
2844  }
2845 
2846  // If one instruction matched with a missing feature, report this as a
2847  // missing feature.
2848  if (std::count(std::begin(Match), std::end(Match),
2849  Match_MissingFeature) == 1) {
2850  ErrorInfo = ErrorInfoMissingFeature;
2851  return ErrorMissingFeature(IDLoc, ErrorInfoMissingFeature,
2852  MatchingInlineAsm);
2853  }
2854 
2855  // If one instruction matched with an invalid operand, report this as an
2856  // operand failure.
2857  if (std::count(std::begin(Match), std::end(Match),
2858  Match_InvalidOperand) == 1) {
2859  return Error(IDLoc, "invalid operand for instruction", EmptyRanges,
2860  MatchingInlineAsm);
2861  }
2862 
2863  // If all of these were an outright failure, report it in a useless way.
2864  return Error(IDLoc, "unknown instruction mnemonic", EmptyRanges,
2865  MatchingInlineAsm);
2866 }
2867 
2868 bool X86AsmParser::OmitRegisterFromClobberLists(unsigned RegNo) {
2869  return X86MCRegisterClasses[X86::SEGMENT_REGRegClassID].contains(RegNo);
2870 }
2871 
2872 bool X86AsmParser::ParseDirective(AsmToken DirectiveID) {
2873  MCAsmParser &Parser = getParser();
2874  StringRef IDVal = DirectiveID.getIdentifier();
2875  if (IDVal == ".word")
2876  return ParseDirectiveWord(2, DirectiveID.getLoc());
2877  else if (IDVal.startswith(".code"))
2878  return ParseDirectiveCode(IDVal, DirectiveID.getLoc());
2879  else if (IDVal.startswith(".att_syntax")) {
2880  if (getLexer().isNot(AsmToken::EndOfStatement)) {
2881  if (Parser.getTok().getString() == "prefix")
2882  Parser.Lex();
2883  else if (Parser.getTok().getString() == "noprefix")
2884  return Error(DirectiveID.getLoc(), "'.att_syntax noprefix' is not "
2885  "supported: registers must have a "
2886  "'%' prefix in .att_syntax");
2887  }
2888  getParser().setAssemblerDialect(0);
2889  return false;
2890  } else if (IDVal.startswith(".intel_syntax")) {
2891  getParser().setAssemblerDialect(1);
2892  if (getLexer().isNot(AsmToken::EndOfStatement)) {
2893  if (Parser.getTok().getString() == "noprefix")
2894  Parser.Lex();
2895  else if (Parser.getTok().getString() == "prefix")
2896  return Error(DirectiveID.getLoc(), "'.intel_syntax prefix' is not "
2897  "supported: registers must not have "
2898  "a '%' prefix in .intel_syntax");
2899  }
2900  return false;
2901  }
2902  return true;
2903 }
2904 
2905 /// ParseDirectiveWord
2906 /// ::= .word [ expression (, expression)* ]
2907 bool X86AsmParser::ParseDirectiveWord(unsigned Size, SMLoc L) {
2908  MCAsmParser &Parser = getParser();
2909  if (getLexer().isNot(AsmToken::EndOfStatement)) {
2910  for (;;) {
2911  const MCExpr *Value;
2912  if (getParser().parseExpression(Value))
2913  return false;
2914 
2915  getParser().getStreamer().EmitValue(Value, Size);
2916 
2917  if (getLexer().is(AsmToken::EndOfStatement))
2918  break;
2919 
2920  // FIXME: Improve diagnostic.
2921  if (getLexer().isNot(AsmToken::Comma)) {
2922  Error(L, "unexpected token in directive");
2923  return false;
2924  }
2925  Parser.Lex();
2926  }
2927  }
2928 
2929  Parser.Lex();
2930  return false;
2931 }
2932 
2933 /// ParseDirectiveCode
2934 /// ::= .code16 | .code32 | .code64
2935 bool X86AsmParser::ParseDirectiveCode(StringRef IDVal, SMLoc L) {
2936  MCAsmParser &Parser = getParser();
2937  if (IDVal == ".code16") {
2938  Parser.Lex();
2939  if (!is16BitMode()) {
2940  SwitchMode(X86::Mode16Bit);
2941  getParser().getStreamer().EmitAssemblerFlag(MCAF_Code16);
2942  }
2943  } else if (IDVal == ".code32") {
2944  Parser.Lex();
2945  if (!is32BitMode()) {
2946  SwitchMode(X86::Mode32Bit);
2947  getParser().getStreamer().EmitAssemblerFlag(MCAF_Code32);
2948  }
2949  } else if (IDVal == ".code64") {
2950  Parser.Lex();
2951  if (!is64BitMode()) {
2952  SwitchMode(X86::Mode64Bit);
2953  getParser().getStreamer().EmitAssemblerFlag(MCAF_Code64);
2954  }
2955  } else {
2956  Error(L, "unknown directive " + IDVal);
2957  return false;
2958  }
2959 
2960  return false;
2961 }
2962 
2963 // Force static initialization.
2964 extern "C" void LLVMInitializeX86AsmParser() {
2967 }
2968 
2969 #define GET_REGISTER_MATCHER
2970 #define GET_MATCHER_IMPLEMENTATION
2971 #define GET_SUBTARGET_FEATURE_NAME
2972 #include "X86GenAsmMatcher.inc"
const NoneType None
Definition: None.h:23
StringRef getToken() const
Definition: X86Operand.h:87
static const char * getSubtargetFeatureName(uint64_t Val)
Represents a range in source code.
Definition: SMLoc.h:47
const_iterator end(StringRef path)
Get end iterator over path.
Definition: Path.cpp:240
X86AsmInstrumentation * CreateX86AsmInstrumentation(const MCTargetOptions &MCOptions, const MCContext &Ctx, const MCSubtargetInfo &STI)
uint64_t getZExtValue() const
Get zero extended value.
Definition: APInt.h:1327
bool isX86_64NonExtLowByteReg(unsigned reg)
Definition: X86BaseInfo.h:757
static const MCSymbolRefExpr * create(const MCSymbol *Symbol, MCContext &Ctx)
Definition: MCExpr.h:315
const char * getPointer() const
Definition: SMLoc.h:33
size_t size() const
size - Get the string size.
Definition: StringRef.h:113
TokenKind getKind() const
Definition: MCAsmLexer.h:71
static void Found()
MCSymbol - Instances of this class represent a symbol name in the MC file, and MCSymbols are created ...
Definition: MCSymbol.h:39
Generic assembler parser interface, for use by target specific assembly parsers.
Definition: MCAsmParser.h:64
MCTargetAsmParser - Generic interface to target specific assembly parsers.
size_t find(char C, size_t From=0) const
Search for the first character C in the string.
Definition: StringRef.h:240
bool endswith(StringRef Suffix) const
Check if this string ends with the given Suffix.
Definition: StringRef.h:224
static std::unique_ptr< X86Operand > CreateMem(unsigned ModeSize, const MCExpr *Disp, SMLoc StartLoc, SMLoc EndLoc, unsigned Size=0, StringRef SymName=StringRef(), void *OpDecl=nullptr)
Create an absolute memory operand.
Definition: X86Operand.h:496
StringRef getString() const
Get the string for the current token, this includes all characters (for example, the quotes on string...
Definition: MCAsmLexer.h:100
StringRef substr(size_t Start, size_t N=npos) const
Return a reference to the substring from [Start, Start + N).
Definition: StringRef.h:405
virtual const AsmToken & Lex()=0
Get the next AsmToken in the stream, possibly handling file inclusion first.
bool isNot(TokenKind K) const
Definition: MCAsmLexer.h:73
A raw_ostream that writes to an SmallVector or SmallString.
Definition: raw_ostream.h:488
const_iterator begin(StringRef path)
Get begin iterator over path.
Definition: Path.cpp:232
static void RewriteIntelBracExpression(SmallVectorImpl< AsmRewrite > *AsmRewrites, StringRef SymName, int64_t ImmDisp, int64_t FinalImmDisp, SMLoc &BracLoc, SMLoc &StartInBrac, SMLoc &End)
StringRef drop_back(size_t N=1) const
Return a StringRef equal to 'this' but with the last N elements dropped.
Definition: StringRef.h:419
StringSwitch & Case(const char(&S)[N], const T &Value)
Definition: StringSwitch.h:55
LLVM_ATTRIBUTE_NORETURN void report_fatal_error(const char *reason, bool gen_crash_diag=true)
Reports a serious error, calling any installed error handler.
static bool CheckBaseRegAndIndexReg(unsigned BaseReg, unsigned IndexReg, StringRef &ErrMsg)
}
static MCOperand createReg(unsigned Reg)
Definition: MCInst.h:111
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition: Twine.h:79
void LLVMInitializeX86AsmParser()
bool isImmSExti64i8Value(uint64_t Value)
SMLoc getEndLoc() const override
getEndLoc - Get the location of the last token of this operand.
Definition: X86Operand.h:78
T LLVM_ATTRIBUTE_UNUSED_RESULT pop_back_val()
Definition: SmallVector.h:406
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
Definition: ErrorHandling.h:98
Target TheX86_64Target
bool isImm() const override
isImm - Is this an immediate operand?
Definition: X86Operand.h:134
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: APInt.h:33
Base class for the full range of assembler expressions which are needed for parsing.
Definition: MCExpr.h:33
Reg
All possible values of the reg field in the ModR/M byte.
Target independent representation for an assembler token.
Definition: MCAsmLexer.h:22
static unsigned getIntelMemOperandSize(StringRef OpStr)
getIntelMemOperandSize - Return intel memory operand size.
static bool convert16i16to16ri8(MCInst &Inst, unsigned Opcode, bool isCmp=false)
static bool contains(SmallPtrSetImpl< ConstantExpr * > &Cache, ConstantExpr *Expr, Constant *C)
Definition: Value.cpp:317
const MCExpr * getImm() const
Definition: X86Operand.h:102
MCParsedAsmOperand - This abstract class represents a source-level assembly instruction operand...
static std::unique_ptr< X86Operand > CreateImm(const MCExpr *Val, SMLoc StartLoc, SMLoc EndLoc)
Definition: X86Operand.h:487
bool isValid() const
Definition: SMLoc.h:28
.code16 (X86) / .code 16 (ARM)
Definition: MCDirectives.h:50
bool LLVM_ATTRIBUTE_UNUSED_RESULT empty() const
Definition: SmallVector.h:57
unsigned getReg() const
Returns the register number.
Definition: MCInst.h:63
const char * data() const
data - Get a pointer to the start of the string (which may not be null terminated).
Definition: StringRef.h:107
bool isImmSExti16i8Value(uint64_t Value)
static bool processInstruction(Loop &L, Instruction &Inst, DominatorTree &DT, const SmallVectorImpl< BasicBlock * > &ExitBlocks, PredIteratorCache &PredCache, LoopInfo *LI)
Given an instruction in the loop, check to see if it has any uses that are outside the current loop...
Definition: LCSSA.cpp:62
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory)...
Definition: ArrayRef.h:31
SmallString - A SmallString is just a SmallVector with methods and accessors that make it work better...
Definition: SmallString.h:25
static const MCBinaryExpr * createAdd(const MCExpr *LHS, const MCExpr *RHS, MCContext &Ctx)
Definition: MCExpr.h:446
int64_t getIntVal() const
Definition: MCAsmLexer.h:105
const AsmToken & getTok() const
Get the current AsmToken from the stream.
Definition: MCAsmParser.cpp:32
Instances of this class represent a single low-level machine instruction.
Definition: MCInst.h:150
virtual void eatToEndOfStatement()=0
Skip to the end of the current statement, for error recovery.
bool isPrefix(MCInstrInfo const &MCII, MCInst const &MCI)
bool isX86_64ExtendedReg(unsigned RegNo)
isX86_64ExtendedReg - Is the MachineOperand a x86-64 extended (r8 or higher) register? e.g.
Definition: X86BaseInfo.h:722
bool isImm() const
Definition: MCInst.h:57
LLVM_CONSTEXPR size_t array_lengthof(T(&)[N])
Find the length of an array.
Definition: STLExtras.h:247
const MCExpr * getLHS() const
Get the left-hand side expression of the binary operator.
Definition: MCExpr.h:531
virtual MCContext & getContext()=0
A switch()-like statement whose cases are string literals.
Definition: StringSwitch.h:42
Streaming machine code generation interface.
Definition: MCStreamer.h:157
X86Operand - Instances of this class represent a parsed X86 machine instruction.
Definition: X86Operand.h:25
SMRange getLocRange() const
getLocRange - Get the range between the first and last token of this operand.
Definition: X86Operand.h:81
SMLoc getLoc() const
Definition: MCAsmLexer.cpp:26
char back() const
back - Get the last character in the string.
Definition: StringRef.h:122
Target TheX86_32Target
Interface to description of machine instruction set.
Definition: MCInstrInfo.h:24
This file declares a class to represent arbitrary precision floating point values and provide a varie...
static GCMetadataPrinterRegistry::Add< ErlangGCPrinter > X("erlang","erlang-compatible garbage collector")
SI Fold Operands
iterator erase(iterator I)
Definition: SmallVector.h:455
Binary assembler expressions.
Definition: MCExpr.h:405
virtual bool Error(SMLoc L, const Twine &Msg, ArrayRef< SMRange > Ranges=None)=0
Emit an error at the location L, with the message Msg.
void setLoc(SMLoc loc)
Definition: MCInst.h:161
SMLoc getStartLoc() const override
getStartLoc - Get the location of the first token of this operand.
Definition: X86Operand.h:76
static bool convert64i32to64ri8(MCInst &Inst, unsigned Opcode, bool isCmp=false)
void setOpcode(unsigned Op)
Definition: MCInst.h:158
bool startswith(StringRef Prefix) const
Check if this string starts with the given Prefix.
Definition: StringRef.h:215
StringRef drop_front(size_t N=1) const
Return a StringRef equal to 'this' but with the first N elements dropped.
Definition: StringRef.h:412
static unsigned MatchRegisterName(StringRef Name)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small...
Definition: SmallVector.h:861
struct MemOp Mem
Definition: X86Operand.h:66
bool is(TokenKind K) const
Definition: MCAsmLexer.h:72
R Default(const T &Value) const
Definition: StringSwitch.h:111
bool isMemUnsized() const
Definition: X86Operand.h:209
unsigned getOpcode() const
Definition: MCInst.h:159
Class for arbitrary precision integers.
Definition: APInt.h:73
static bool convertToSExti8(MCInst &Inst, unsigned Opcode, unsigned Reg, bool isCmp)
LLVM_ATTRIBUTE_UNUSED_RESULT std::enable_if< !is_simple_type< Y >::value, typename cast_retty< X, const Y >::ret_type >::type dyn_cast(const Y &Val)
Definition: Casting.h:285
int64_t getImm() const
Definition: MCInst.h:74
bool equals(StringRef RHS) const
equals - Check for string equality, this is more efficient than compare() when the relative ordering ...
Definition: StringRef.h:136
static std::unique_ptr< X86Operand > CreateToken(StringRef Str, SMLoc Loc)
Definition: X86Operand.h:466
.code32 (X86) / .code 32 (ARM)
Definition: MCDirectives.h:51
static SMLoc getFromPointer(const char *Ptr)
Definition: SMLoc.h:35
SI Fix SGPR Live Ranges
static std::unique_ptr< X86Operand > CreateReg(unsigned RegNo, SMLoc StartLoc, SMLoc EndLoc, bool AddressOf=false, SMLoc OffsetOfLoc=SMLoc(), StringRef SymName=StringRef(), void *OpDecl=nullptr)
Definition: X86Operand.h:475
.code64 (X86)
Definition: MCDirectives.h:52
StringRef getIdentifier() const
Get the identifier string for the current token, which should be an identifier or a string...
Definition: MCAsmLexer.h:89
RegisterMCAsmParser - Helper template for registering a target specific assembly parser, for use in the target machine initialization function.
StringRef getName() const
getName - Get the symbol name.
Definition: MCSymbol.h:205
static const size_t npos
Definition: StringRef.h:44
#define I(x, y, z)
Definition: MD5.cpp:54
static bool convert32i32to32ri8(MCInst &Inst, unsigned Opcode, bool isCmp=false)
MCSubtargetInfo - Generic base class for all target subtargets.
IntelOperatorKind
LLVM Value Representation.
Definition: Value.h:69
SMLoc getEndLoc() const
Definition: MCAsmLexer.cpp:30
StringSwitch & Cases(const char(&S0)[N0], const char(&S1)[N1], const T &Value)
Definition: StringSwitch.h:85
bool isImmSExti32i8Value(uint64_t Value)
void addOperand(const MCOperand &Op)
Definition: MCInst.h:168
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:40
bool isUndefined() const
isUndefined - Check if this symbol undefined (i.e., implicitly defined).
Definition: MCSymbol.h:258
const MCExpr * Disp
Definition: X86Operand.h:54
static GCMetadataPrinterRegistry::Add< OcamlGCMetadataPrinter > Y("ocaml","ocaml 3.10-compatible collector")
StringRef slice(size_t Start, size_t End) const
Return a reference to the substring from [Start, End).
Definition: StringRef.h:434
Represents a location in source code.
Definition: SMLoc.h:23
std::string lower() const
Definition: StringRef.cpp:117
static const MCConstantExpr * create(int64_t Value, MCContext &Ctx)
Definition: MCExpr.cpp:150
const MCOperand & getOperand(unsigned i) const
Definition: MCInst.h:164
bool isMem() const override
isMem - Is this a memory operand?
Definition: X86Operand.h:208