LLVM  15.0.0git
X86AsmParser.cpp
Go to the documentation of this file.
1 //===-- X86AsmParser.cpp - Parse X86 assembly to MCInst instructions ------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
11 #include "MCTargetDesc/X86MCExpr.h"
15 #include "X86AsmParserCommon.h"
16 #include "X86Operand.h"
17 #include "llvm/ADT/STLExtras.h"
18 #include "llvm/ADT/SmallString.h"
19 #include "llvm/ADT/SmallVector.h"
20 #include "llvm/ADT/StringSwitch.h"
21 #include "llvm/ADT/Twine.h"
22 #include "llvm/MC/MCContext.h"
23 #include "llvm/MC/MCExpr.h"
24 #include "llvm/MC/MCInst.h"
25 #include "llvm/MC/MCInstrInfo.h"
30 #include "llvm/MC/MCRegisterInfo.h"
31 #include "llvm/MC/MCSection.h"
32 #include "llvm/MC/MCStreamer.h"
34 #include "llvm/MC/MCSymbol.h"
35 #include "llvm/MC/TargetRegistry.h"
37 #include "llvm/Support/Compiler.h"
38 #include "llvm/Support/SourceMgr.h"
40 #include <algorithm>
41 #include <memory>
42 
43 using namespace llvm;
44 
46  "x86-experimental-lvi-inline-asm-hardening",
47  cl::desc("Harden inline assembly code that may be vulnerable to Load Value"
48  " Injection (LVI). This feature is experimental."), cl::Hidden);
49 
50 static bool checkScale(unsigned Scale, StringRef &ErrMsg) {
51  if (Scale != 1 && Scale != 2 && Scale != 4 && Scale != 8) {
52  ErrMsg = "scale factor in address must be 1, 2, 4 or 8";
53  return true;
54  }
55  return false;
56 }
57 
58 namespace {
59 
60 static const char OpPrecedence[] = {
61  0, // IC_OR
62  1, // IC_XOR
63  2, // IC_AND
64  4, // IC_LSHIFT
65  4, // IC_RSHIFT
66  5, // IC_PLUS
67  5, // IC_MINUS
68  6, // IC_MULTIPLY
69  6, // IC_DIVIDE
70  6, // IC_MOD
71  7, // IC_NOT
72  8, // IC_NEG
73  9, // IC_RPAREN
74  10, // IC_LPAREN
75  0, // IC_IMM
76  0, // IC_REGISTER
77  3, // IC_EQ
78  3, // IC_NE
79  3, // IC_LT
80  3, // IC_LE
81  3, // IC_GT
82  3 // IC_GE
83 };
84 
85 class X86AsmParser : public MCTargetAsmParser {
86  ParseInstructionInfo *InstInfo;
87  bool Code16GCC;
88  unsigned ForcedDataPrefix = 0;
89 
90  enum VEXEncoding {
91  VEXEncoding_Default,
92  VEXEncoding_VEX,
93  VEXEncoding_VEX2,
94  VEXEncoding_VEX3,
95  VEXEncoding_EVEX,
96  };
97 
98  VEXEncoding ForcedVEXEncoding = VEXEncoding_Default;
99 
100  enum DispEncoding {
101  DispEncoding_Default,
102  DispEncoding_Disp8,
103  DispEncoding_Disp32,
104  };
105 
106  DispEncoding ForcedDispEncoding = DispEncoding_Default;
107 
108 private:
109  SMLoc consumeToken() {
110  MCAsmParser &Parser = getParser();
111  SMLoc Result = Parser.getTok().getLoc();
112  Parser.Lex();
113  return Result;
114  }
115 
116  X86TargetStreamer &getTargetStreamer() {
117  assert(getParser().getStreamer().getTargetStreamer() &&
118  "do not have a target streamer");
119  MCTargetStreamer &TS = *getParser().getStreamer().getTargetStreamer();
120  return static_cast<X86TargetStreamer &>(TS);
121  }
122 
123  unsigned MatchInstruction(const OperandVector &Operands, MCInst &Inst,
124  uint64_t &ErrorInfo, FeatureBitset &MissingFeatures,
125  bool matchingInlineAsm, unsigned VariantID = 0) {
126  // In Code16GCC mode, match as 32-bit.
127  if (Code16GCC)
128  SwitchMode(X86::Is32Bit);
129  unsigned rv = MatchInstructionImpl(Operands, Inst, ErrorInfo,
130  MissingFeatures, matchingInlineAsm,
131  VariantID);
132  if (Code16GCC)
133  SwitchMode(X86::Is16Bit);
134  return rv;
135  }
136 
137  enum InfixCalculatorTok {
138  IC_OR = 0,
139  IC_XOR,
140  IC_AND,
141  IC_LSHIFT,
142  IC_RSHIFT,
143  IC_PLUS,
144  IC_MINUS,
145  IC_MULTIPLY,
146  IC_DIVIDE,
147  IC_MOD,
148  IC_NOT,
149  IC_NEG,
150  IC_RPAREN,
151  IC_LPAREN,
152  IC_IMM,
153  IC_REGISTER,
154  IC_EQ,
155  IC_NE,
156  IC_LT,
157  IC_LE,
158  IC_GT,
159  IC_GE
160  };
161 
162  enum IntelOperatorKind {
163  IOK_INVALID = 0,
164  IOK_LENGTH,
165  IOK_SIZE,
166  IOK_TYPE,
167  };
168 
169  enum MasmOperatorKind {
170  MOK_INVALID = 0,
171  MOK_LENGTHOF,
172  MOK_SIZEOF,
173  MOK_TYPE,
174  };
175 
176  class InfixCalculator {
177  typedef std::pair< InfixCalculatorTok, int64_t > ICToken;
178  SmallVector<InfixCalculatorTok, 4> InfixOperatorStack;
179  SmallVector<ICToken, 4> PostfixStack;
180 
181  bool isUnaryOperator(InfixCalculatorTok Op) const {
182  return Op == IC_NEG || Op == IC_NOT;
183  }
184 
185  public:
186  int64_t popOperand() {
187  assert (!PostfixStack.empty() && "Poped an empty stack!");
188  ICToken Op = PostfixStack.pop_back_val();
189  if (!(Op.first == IC_IMM || Op.first == IC_REGISTER))
190  return -1; // The invalid Scale value will be caught later by checkScale
191  return Op.second;
192  }
193  void pushOperand(InfixCalculatorTok Op, int64_t Val = 0) {
194  assert ((Op == IC_IMM || Op == IC_REGISTER) &&
195  "Unexpected operand!");
196  PostfixStack.push_back(std::make_pair(Op, Val));
197  }
198 
199  void popOperator() { InfixOperatorStack.pop_back(); }
200  void pushOperator(InfixCalculatorTok Op) {
201  // Push the new operator if the stack is empty.
202  if (InfixOperatorStack.empty()) {
203  InfixOperatorStack.push_back(Op);
204  return;
205  }
206 
207  // Push the new operator if it has a higher precedence than the operator
208  // on the top of the stack or the operator on the top of the stack is a
209  // left parentheses.
210  unsigned Idx = InfixOperatorStack.size() - 1;
211  InfixCalculatorTok StackOp = InfixOperatorStack[Idx];
212  if (OpPrecedence[Op] > OpPrecedence[StackOp] || StackOp == IC_LPAREN) {
213  InfixOperatorStack.push_back(Op);
214  return;
215  }
216 
217  // The operator on the top of the stack has higher precedence than the
218  // new operator.
219  unsigned ParenCount = 0;
220  while (true) {
221  // Nothing to process.
222  if (InfixOperatorStack.empty())
223  break;
224 
225  Idx = InfixOperatorStack.size() - 1;
226  StackOp = InfixOperatorStack[Idx];
227  if (!(OpPrecedence[StackOp] >= OpPrecedence[Op] || ParenCount))
228  break;
229 
230  // If we have an even parentheses count and we see a left parentheses,
231  // then stop processing.
232  if (!ParenCount && StackOp == IC_LPAREN)
233  break;
234 
235  if (StackOp == IC_RPAREN) {
236  ++ParenCount;
237  InfixOperatorStack.pop_back();
238  } else if (StackOp == IC_LPAREN) {
239  --ParenCount;
240  InfixOperatorStack.pop_back();
241  } else {
242  InfixOperatorStack.pop_back();
243  PostfixStack.push_back(std::make_pair(StackOp, 0));
244  }
245  }
246  // Push the new operator.
247  InfixOperatorStack.push_back(Op);
248  }
249 
250  int64_t execute() {
251  // Push any remaining operators onto the postfix stack.
252  while (!InfixOperatorStack.empty()) {
253  InfixCalculatorTok StackOp = InfixOperatorStack.pop_back_val();
254  if (StackOp != IC_LPAREN && StackOp != IC_RPAREN)
255  PostfixStack.push_back(std::make_pair(StackOp, 0));
256  }
257 
258  if (PostfixStack.empty())
259  return 0;
260 
261  SmallVector<ICToken, 16> OperandStack;
262  for (unsigned i = 0, e = PostfixStack.size(); i != e; ++i) {
263  ICToken Op = PostfixStack[i];
264  if (Op.first == IC_IMM || Op.first == IC_REGISTER) {
265  OperandStack.push_back(Op);
266  } else if (isUnaryOperator(Op.first)) {
267  assert (OperandStack.size() > 0 && "Too few operands.");
268  ICToken Operand = OperandStack.pop_back_val();
269  assert (Operand.first == IC_IMM &&
270  "Unary operation with a register!");
271  switch (Op.first) {
272  default:
273  report_fatal_error("Unexpected operator!");
274  break;
275  case IC_NEG:
276  OperandStack.push_back(std::make_pair(IC_IMM, -Operand.second));
277  break;
278  case IC_NOT:
279  OperandStack.push_back(std::make_pair(IC_IMM, ~Operand.second));
280  break;
281  }
282  } else {
283  assert (OperandStack.size() > 1 && "Too few operands.");
284  int64_t Val;
285  ICToken Op2 = OperandStack.pop_back_val();
286  ICToken Op1 = OperandStack.pop_back_val();
287  switch (Op.first) {
288  default:
289  report_fatal_error("Unexpected operator!");
290  break;
291  case IC_PLUS:
292  Val = Op1.second + Op2.second;
293  OperandStack.push_back(std::make_pair(IC_IMM, Val));
294  break;
295  case IC_MINUS:
296  Val = Op1.second - Op2.second;
297  OperandStack.push_back(std::make_pair(IC_IMM, Val));
298  break;
299  case IC_MULTIPLY:
300  assert (Op1.first == IC_IMM && Op2.first == IC_IMM &&
301  "Multiply operation with an immediate and a register!");
302  Val = Op1.second * Op2.second;
303  OperandStack.push_back(std::make_pair(IC_IMM, Val));
304  break;
305  case IC_DIVIDE:
306  assert (Op1.first == IC_IMM && Op2.first == IC_IMM &&
307  "Divide operation with an immediate and a register!");
308  assert (Op2.second != 0 && "Division by zero!");
309  Val = Op1.second / Op2.second;
310  OperandStack.push_back(std::make_pair(IC_IMM, Val));
311  break;
312  case IC_MOD:
313  assert (Op1.first == IC_IMM && Op2.first == IC_IMM &&
314  "Modulo operation with an immediate and a register!");
315  Val = Op1.second % Op2.second;
316  OperandStack.push_back(std::make_pair(IC_IMM, Val));
317  break;
318  case IC_OR:
319  assert (Op1.first == IC_IMM && Op2.first == IC_IMM &&
320  "Or operation with an immediate and a register!");
321  Val = Op1.second | Op2.second;
322  OperandStack.push_back(std::make_pair(IC_IMM, Val));
323  break;
324  case IC_XOR:
325  assert(Op1.first == IC_IMM && Op2.first == IC_IMM &&
326  "Xor operation with an immediate and a register!");
327  Val = Op1.second ^ Op2.second;
328  OperandStack.push_back(std::make_pair(IC_IMM, Val));
329  break;
330  case IC_AND:
331  assert (Op1.first == IC_IMM && Op2.first == IC_IMM &&
332  "And operation with an immediate and a register!");
333  Val = Op1.second & Op2.second;
334  OperandStack.push_back(std::make_pair(IC_IMM, Val));
335  break;
336  case IC_LSHIFT:
337  assert (Op1.first == IC_IMM && Op2.first == IC_IMM &&
338  "Left shift operation with an immediate and a register!");
339  Val = Op1.second << Op2.second;
340  OperandStack.push_back(std::make_pair(IC_IMM, Val));
341  break;
342  case IC_RSHIFT:
343  assert (Op1.first == IC_IMM && Op2.first == IC_IMM &&
344  "Right shift operation with an immediate and a register!");
345  Val = Op1.second >> Op2.second;
346  OperandStack.push_back(std::make_pair(IC_IMM, Val));
347  break;
348  case IC_EQ:
349  assert(Op1.first == IC_IMM && Op2.first == IC_IMM &&
350  "Equals operation with an immediate and a register!");
351  Val = (Op1.second == Op2.second) ? -1 : 0;
352  OperandStack.push_back(std::make_pair(IC_IMM, Val));
353  break;
354  case IC_NE:
355  assert(Op1.first == IC_IMM && Op2.first == IC_IMM &&
356  "Not-equals operation with an immediate and a register!");
357  Val = (Op1.second != Op2.second) ? -1 : 0;
358  OperandStack.push_back(std::make_pair(IC_IMM, Val));
359  break;
360  case IC_LT:
361  assert(Op1.first == IC_IMM && Op2.first == IC_IMM &&
362  "Less-than operation with an immediate and a register!");
363  Val = (Op1.second < Op2.second) ? -1 : 0;
364  OperandStack.push_back(std::make_pair(IC_IMM, Val));
365  break;
366  case IC_LE:
367  assert(Op1.first == IC_IMM && Op2.first == IC_IMM &&
368  "Less-than-or-equal operation with an immediate and a "
369  "register!");
370  Val = (Op1.second <= Op2.second) ? -1 : 0;
371  OperandStack.push_back(std::make_pair(IC_IMM, Val));
372  break;
373  case IC_GT:
374  assert(Op1.first == IC_IMM && Op2.first == IC_IMM &&
375  "Greater-than operation with an immediate and a register!");
376  Val = (Op1.second > Op2.second) ? -1 : 0;
377  OperandStack.push_back(std::make_pair(IC_IMM, Val));
378  break;
379  case IC_GE:
380  assert(Op1.first == IC_IMM && Op2.first == IC_IMM &&
381  "Greater-than-or-equal operation with an immediate and a "
382  "register!");
383  Val = (Op1.second >= Op2.second) ? -1 : 0;
384  OperandStack.push_back(std::make_pair(IC_IMM, Val));
385  break;
386  }
387  }
388  }
389  assert (OperandStack.size() == 1 && "Expected a single result.");
390  return OperandStack.pop_back_val().second;
391  }
392  };
393 
394  enum IntelExprState {
395  IES_INIT,
396  IES_OR,
397  IES_XOR,
398  IES_AND,
399  IES_EQ,
400  IES_NE,
401  IES_LT,
402  IES_LE,
403  IES_GT,
404  IES_GE,
405  IES_LSHIFT,
406  IES_RSHIFT,
407  IES_PLUS,
408  IES_MINUS,
409  IES_OFFSET,
410  IES_CAST,
411  IES_NOT,
412  IES_MULTIPLY,
413  IES_DIVIDE,
414  IES_MOD,
415  IES_LBRAC,
416  IES_RBRAC,
417  IES_LPAREN,
418  IES_RPAREN,
419  IES_REGISTER,
420  IES_INTEGER,
421  IES_IDENTIFIER,
422  IES_ERROR
423  };
424 
425  class IntelExprStateMachine {
426  IntelExprState State = IES_INIT, PrevState = IES_ERROR;
427  unsigned BaseReg = 0, IndexReg = 0, TmpReg = 0, Scale = 0;
428  int64_t Imm = 0;
429  const MCExpr *Sym = nullptr;
430  StringRef SymName;
431  InfixCalculator IC;
433  short BracCount = 0;
434  bool MemExpr = false;
435  bool OffsetOperator = false;
436  bool AttachToOperandIdx = false;
437  bool IsPIC = false;
438  SMLoc OffsetOperatorLoc;
439  AsmTypeInfo CurType;
440 
441  bool setSymRef(const MCExpr *Val, StringRef ID, StringRef &ErrMsg) {
442  if (Sym) {
443  ErrMsg = "cannot use more than one symbol in memory operand";
444  return true;
445  }
446  Sym = Val;
447  SymName = ID;
448  return false;
449  }
450 
451  public:
452  IntelExprStateMachine() = default;
453 
454  void addImm(int64_t imm) { Imm += imm; }
455  short getBracCount() const { return BracCount; }
456  bool isMemExpr() const { return MemExpr; }
457  bool isOffsetOperator() const { return OffsetOperator; }
458  SMLoc getOffsetLoc() const { return OffsetOperatorLoc; }
459  unsigned getBaseReg() const { return BaseReg; }
460  unsigned getIndexReg() const { return IndexReg; }
461  unsigned getScale() const { return Scale; }
462  const MCExpr *getSym() const { return Sym; }
463  StringRef getSymName() const { return SymName; }
464  StringRef getType() const { return CurType.Name; }
465  unsigned getSize() const { return CurType.Size; }
466  unsigned getElementSize() const { return CurType.ElementSize; }
467  unsigned getLength() const { return CurType.Length; }
468  int64_t getImm() { return Imm + IC.execute(); }
469  bool isValidEndState() const {
470  return State == IES_RBRAC || State == IES_INTEGER;
471  }
472 
473  // Is the intel expression appended after an operand index.
474  // [OperandIdx][Intel Expression]
475  // This is neccessary for checking if it is an independent
476  // intel expression at back end when parse inline asm.
477  void setAppendAfterOperand() { AttachToOperandIdx = true; }
478 
479  bool isPIC() const { return IsPIC; }
480  void setPIC() { IsPIC = true; }
481 
482  bool hadError() const { return State == IES_ERROR; }
483  const InlineAsmIdentifierInfo &getIdentifierInfo() const { return Info; }
484 
485  bool regsUseUpError(StringRef &ErrMsg) {
486  // This case mostly happen in inline asm, e.g. Arr[BaseReg + IndexReg]
487  // can not intruduce additional register in inline asm in PIC model.
488  if (IsPIC && AttachToOperandIdx)
489  ErrMsg = "Don't use 2 or more regs for mem offset in PIC model!";
490  else
491  ErrMsg = "BaseReg/IndexReg already set!";
492  return true;
493  }
494 
495  void onOr() {
496  IntelExprState CurrState = State;
497  switch (State) {
498  default:
499  State = IES_ERROR;
500  break;
501  case IES_INTEGER:
502  case IES_RPAREN:
503  case IES_REGISTER:
504  State = IES_OR;
505  IC.pushOperator(IC_OR);
506  break;
507  }
508  PrevState = CurrState;
509  }
510  void onXor() {
511  IntelExprState CurrState = State;
512  switch (State) {
513  default:
514  State = IES_ERROR;
515  break;
516  case IES_INTEGER:
517  case IES_RPAREN:
518  case IES_REGISTER:
519  State = IES_XOR;
520  IC.pushOperator(IC_XOR);
521  break;
522  }
523  PrevState = CurrState;
524  }
525  void onAnd() {
526  IntelExprState CurrState = State;
527  switch (State) {
528  default:
529  State = IES_ERROR;
530  break;
531  case IES_INTEGER:
532  case IES_RPAREN:
533  case IES_REGISTER:
534  State = IES_AND;
535  IC.pushOperator(IC_AND);
536  break;
537  }
538  PrevState = CurrState;
539  }
540  void onEq() {
541  IntelExprState CurrState = State;
542  switch (State) {
543  default:
544  State = IES_ERROR;
545  break;
546  case IES_INTEGER:
547  case IES_RPAREN:
548  case IES_REGISTER:
549  State = IES_EQ;
550  IC.pushOperator(IC_EQ);
551  break;
552  }
553  PrevState = CurrState;
554  }
555  void onNE() {
556  IntelExprState CurrState = State;
557  switch (State) {
558  default:
559  State = IES_ERROR;
560  break;
561  case IES_INTEGER:
562  case IES_RPAREN:
563  case IES_REGISTER:
564  State = IES_NE;
565  IC.pushOperator(IC_NE);
566  break;
567  }
568  PrevState = CurrState;
569  }
570  void onLT() {
571  IntelExprState CurrState = State;
572  switch (State) {
573  default:
574  State = IES_ERROR;
575  break;
576  case IES_INTEGER:
577  case IES_RPAREN:
578  case IES_REGISTER:
579  State = IES_LT;
580  IC.pushOperator(IC_LT);
581  break;
582  }
583  PrevState = CurrState;
584  }
585  void onLE() {
586  IntelExprState CurrState = State;
587  switch (State) {
588  default:
589  State = IES_ERROR;
590  break;
591  case IES_INTEGER:
592  case IES_RPAREN:
593  case IES_REGISTER:
594  State = IES_LE;
595  IC.pushOperator(IC_LE);
596  break;
597  }
598  PrevState = CurrState;
599  }
600  void onGT() {
601  IntelExprState CurrState = State;
602  switch (State) {
603  default:
604  State = IES_ERROR;
605  break;
606  case IES_INTEGER:
607  case IES_RPAREN:
608  case IES_REGISTER:
609  State = IES_GT;
610  IC.pushOperator(IC_GT);
611  break;
612  }
613  PrevState = CurrState;
614  }
615  void onGE() {
616  IntelExprState CurrState = State;
617  switch (State) {
618  default:
619  State = IES_ERROR;
620  break;
621  case IES_INTEGER:
622  case IES_RPAREN:
623  case IES_REGISTER:
624  State = IES_GE;
625  IC.pushOperator(IC_GE);
626  break;
627  }
628  PrevState = CurrState;
629  }
630  void onLShift() {
631  IntelExprState CurrState = State;
632  switch (State) {
633  default:
634  State = IES_ERROR;
635  break;
636  case IES_INTEGER:
637  case IES_RPAREN:
638  case IES_REGISTER:
639  State = IES_LSHIFT;
640  IC.pushOperator(IC_LSHIFT);
641  break;
642  }
643  PrevState = CurrState;
644  }
645  void onRShift() {
646  IntelExprState CurrState = State;
647  switch (State) {
648  default:
649  State = IES_ERROR;
650  break;
651  case IES_INTEGER:
652  case IES_RPAREN:
653  case IES_REGISTER:
654  State = IES_RSHIFT;
655  IC.pushOperator(IC_RSHIFT);
656  break;
657  }
658  PrevState = CurrState;
659  }
660  bool onPlus(StringRef &ErrMsg) {
661  IntelExprState CurrState = State;
662  switch (State) {
663  default:
664  State = IES_ERROR;
665  break;
666  case IES_INTEGER:
667  case IES_RPAREN:
668  case IES_REGISTER:
669  case IES_OFFSET:
670  State = IES_PLUS;
671  IC.pushOperator(IC_PLUS);
672  if (CurrState == IES_REGISTER && PrevState != IES_MULTIPLY) {
673  // If we already have a BaseReg, then assume this is the IndexReg with
674  // no explicit scale.
675  if (!BaseReg) {
676  BaseReg = TmpReg;
677  } else {
678  if (IndexReg)
679  return regsUseUpError(ErrMsg);
680  IndexReg = TmpReg;
681  Scale = 0;
682  }
683  }
684  break;
685  }
686  PrevState = CurrState;
687  return false;
688  }
689  bool onMinus(StringRef &ErrMsg) {
690  IntelExprState CurrState = State;
691  switch (State) {
692  default:
693  State = IES_ERROR;
694  break;
695  case IES_OR:
696  case IES_XOR:
697  case IES_AND:
698  case IES_EQ:
699  case IES_NE:
700  case IES_LT:
701  case IES_LE:
702  case IES_GT:
703  case IES_GE:
704  case IES_LSHIFT:
705  case IES_RSHIFT:
706  case IES_PLUS:
707  case IES_NOT:
708  case IES_MULTIPLY:
709  case IES_DIVIDE:
710  case IES_MOD:
711  case IES_LPAREN:
712  case IES_RPAREN:
713  case IES_LBRAC:
714  case IES_RBRAC:
715  case IES_INTEGER:
716  case IES_REGISTER:
717  case IES_INIT:
718  case IES_OFFSET:
719  State = IES_MINUS;
720  // push minus operator if it is not a negate operator
721  if (CurrState == IES_REGISTER || CurrState == IES_RPAREN ||
722  CurrState == IES_INTEGER || CurrState == IES_RBRAC ||
723  CurrState == IES_OFFSET)
724  IC.pushOperator(IC_MINUS);
725  else if (PrevState == IES_REGISTER && CurrState == IES_MULTIPLY) {
726  // We have negate operator for Scale: it's illegal
727  ErrMsg = "Scale can't be negative";
728  return true;
729  } else
730  IC.pushOperator(IC_NEG);
731  if (CurrState == IES_REGISTER && PrevState != IES_MULTIPLY) {
732  // If we already have a BaseReg, then assume this is the IndexReg with
733  // no explicit scale.
734  if (!BaseReg) {
735  BaseReg = TmpReg;
736  } else {
737  if (IndexReg)
738  return regsUseUpError(ErrMsg);
739  IndexReg = TmpReg;
740  Scale = 0;
741  }
742  }
743  break;
744  }
745  PrevState = CurrState;
746  return false;
747  }
748  void onNot() {
749  IntelExprState CurrState = State;
750  switch (State) {
751  default:
752  State = IES_ERROR;
753  break;
754  case IES_OR:
755  case IES_XOR:
756  case IES_AND:
757  case IES_EQ:
758  case IES_NE:
759  case IES_LT:
760  case IES_LE:
761  case IES_GT:
762  case IES_GE:
763  case IES_LSHIFT:
764  case IES_RSHIFT:
765  case IES_PLUS:
766  case IES_MINUS:
767  case IES_NOT:
768  case IES_MULTIPLY:
769  case IES_DIVIDE:
770  case IES_MOD:
771  case IES_LPAREN:
772  case IES_LBRAC:
773  case IES_INIT:
774  State = IES_NOT;
775  IC.pushOperator(IC_NOT);
776  break;
777  }
778  PrevState = CurrState;
779  }
780  bool onRegister(unsigned Reg, StringRef &ErrMsg) {
781  IntelExprState CurrState = State;
782  switch (State) {
783  default:
784  State = IES_ERROR;
785  break;
786  case IES_PLUS:
787  case IES_LPAREN:
788  case IES_LBRAC:
789  State = IES_REGISTER;
790  TmpReg = Reg;
791  IC.pushOperand(IC_REGISTER);
792  break;
793  case IES_MULTIPLY:
794  // Index Register - Scale * Register
795  if (PrevState == IES_INTEGER) {
796  if (IndexReg)
797  return regsUseUpError(ErrMsg);
798  State = IES_REGISTER;
799  IndexReg = Reg;
800  // Get the scale and replace the 'Scale * Register' with '0'.
801  Scale = IC.popOperand();
802  if (checkScale(Scale, ErrMsg))
803  return true;
804  IC.pushOperand(IC_IMM);
805  IC.popOperator();
806  } else {
807  State = IES_ERROR;
808  }
809  break;
810  }
811  PrevState = CurrState;
812  return false;
813  }
814  bool onIdentifierExpr(const MCExpr *SymRef, StringRef SymRefName,
815  const InlineAsmIdentifierInfo &IDInfo,
816  const AsmTypeInfo &Type, bool ParsingMSInlineAsm,
817  StringRef &ErrMsg) {
818  // InlineAsm: Treat an enum value as an integer
819  if (ParsingMSInlineAsm)
821  return onInteger(IDInfo.Enum.EnumVal, ErrMsg);
822  // Treat a symbolic constant like an integer
823  if (auto *CE = dyn_cast<MCConstantExpr>(SymRef))
824  return onInteger(CE->getValue(), ErrMsg);
825  PrevState = State;
826  switch (State) {
827  default:
828  State = IES_ERROR;
829  break;
830  case IES_CAST:
831  case IES_PLUS:
832  case IES_MINUS:
833  case IES_NOT:
834  case IES_INIT:
835  case IES_LBRAC:
836  case IES_LPAREN:
837  if (setSymRef(SymRef, SymRefName, ErrMsg))
838  return true;
839  MemExpr = true;
840  State = IES_INTEGER;
841  IC.pushOperand(IC_IMM);
842  if (ParsingMSInlineAsm)
843  Info = IDInfo;
844  setTypeInfo(Type);
845  break;
846  }
847  return false;
848  }
849  bool onInteger(int64_t TmpInt, StringRef &ErrMsg) {
850  IntelExprState CurrState = State;
851  switch (State) {
852  default:
853  State = IES_ERROR;
854  break;
855  case IES_PLUS:
856  case IES_MINUS:
857  case IES_NOT:
858  case IES_OR:
859  case IES_XOR:
860  case IES_AND:
861  case IES_EQ:
862  case IES_NE:
863  case IES_LT:
864  case IES_LE:
865  case IES_GT:
866  case IES_GE:
867  case IES_LSHIFT:
868  case IES_RSHIFT:
869  case IES_DIVIDE:
870  case IES_MOD:
871  case IES_MULTIPLY:
872  case IES_LPAREN:
873  case IES_INIT:
874  case IES_LBRAC:
875  State = IES_INTEGER;
876  if (PrevState == IES_REGISTER && CurrState == IES_MULTIPLY) {
877  // Index Register - Register * Scale
878  if (IndexReg)
879  return regsUseUpError(ErrMsg);
880  IndexReg = TmpReg;
881  Scale = TmpInt;
882  if (checkScale(Scale, ErrMsg))
883  return true;
884  // Get the scale and replace the 'Register * Scale' with '0'.
885  IC.popOperator();
886  } else {
887  IC.pushOperand(IC_IMM, TmpInt);
888  }
889  break;
890  }
891  PrevState = CurrState;
892  return false;
893  }
894  void onStar() {
895  PrevState = State;
896  switch (State) {
897  default:
898  State = IES_ERROR;
899  break;
900  case IES_INTEGER:
901  case IES_REGISTER:
902  case IES_RPAREN:
903  State = IES_MULTIPLY;
904  IC.pushOperator(IC_MULTIPLY);
905  break;
906  }
907  }
908  void onDivide() {
909  PrevState = State;
910  switch (State) {
911  default:
912  State = IES_ERROR;
913  break;
914  case IES_INTEGER:
915  case IES_RPAREN:
916  State = IES_DIVIDE;
917  IC.pushOperator(IC_DIVIDE);
918  break;
919  }
920  }
921  void onMod() {
922  PrevState = State;
923  switch (State) {
924  default:
925  State = IES_ERROR;
926  break;
927  case IES_INTEGER:
928  case IES_RPAREN:
929  State = IES_MOD;
930  IC.pushOperator(IC_MOD);
931  break;
932  }
933  }
934  bool onLBrac() {
935  if (BracCount)
936  return true;
937  PrevState = State;
938  switch (State) {
939  default:
940  State = IES_ERROR;
941  break;
942  case IES_RBRAC:
943  case IES_INTEGER:
944  case IES_RPAREN:
945  State = IES_PLUS;
946  IC.pushOperator(IC_PLUS);
947  CurType.Length = 1;
948  CurType.Size = CurType.ElementSize;
949  break;
950  case IES_INIT:
951  case IES_CAST:
952  assert(!BracCount && "BracCount should be zero on parsing's start");
953  State = IES_LBRAC;
954  break;
955  }
956  MemExpr = true;
957  BracCount++;
958  return false;
959  }
960  bool onRBrac(StringRef &ErrMsg) {
961  IntelExprState CurrState = State;
962  switch (State) {
963  default:
964  State = IES_ERROR;
965  break;
966  case IES_INTEGER:
967  case IES_OFFSET:
968  case IES_REGISTER:
969  case IES_RPAREN:
970  if (BracCount-- != 1) {
971  ErrMsg = "unexpected bracket encountered";
972  return true;
973  }
974  State = IES_RBRAC;
975  if (CurrState == IES_REGISTER && PrevState != IES_MULTIPLY) {
976  // If we already have a BaseReg, then assume this is the IndexReg with
977  // no explicit scale.
978  if (!BaseReg) {
979  BaseReg = TmpReg;
980  } else {
981  if (IndexReg)
982  return regsUseUpError(ErrMsg);
983  IndexReg = TmpReg;
984  Scale = 0;
985  }
986  }
987  break;
988  }
989  PrevState = CurrState;
990  return false;
991  }
992  void onLParen() {
993  IntelExprState CurrState = State;
994  switch (State) {
995  default:
996  State = IES_ERROR;
997  break;
998  case IES_PLUS:
999  case IES_MINUS:
1000  case IES_NOT:
1001  case IES_OR:
1002  case IES_XOR:
1003  case IES_AND:
1004  case IES_EQ:
1005  case IES_NE:
1006  case IES_LT:
1007  case IES_LE:
1008  case IES_GT:
1009  case IES_GE:
1010  case IES_LSHIFT:
1011  case IES_RSHIFT:
1012  case IES_MULTIPLY:
1013  case IES_DIVIDE:
1014  case IES_MOD:
1015  case IES_LPAREN:
1016  case IES_INIT:
1017  case IES_LBRAC:
1018  State = IES_LPAREN;
1019  IC.pushOperator(IC_LPAREN);
1020  break;
1021  }
1022  PrevState = CurrState;
1023  }
1024  void onRParen() {
1025  PrevState = State;
1026  switch (State) {
1027  default:
1028  State = IES_ERROR;
1029  break;
1030  case IES_INTEGER:
1031  case IES_OFFSET:
1032  case IES_REGISTER:
1033  case IES_RBRAC:
1034  case IES_RPAREN:
1035  State = IES_RPAREN;
1036  IC.pushOperator(IC_RPAREN);
1037  break;
1038  }
1039  }
1040  bool onOffset(const MCExpr *Val, SMLoc OffsetLoc, StringRef ID,
1041  const InlineAsmIdentifierInfo &IDInfo,
1042  bool ParsingMSInlineAsm, StringRef &ErrMsg) {
1043  PrevState = State;
1044  switch (State) {
1045  default:
1046  ErrMsg = "unexpected offset operator expression";
1047  return true;
1048  case IES_PLUS:
1049  case IES_INIT:
1050  case IES_LBRAC:
1051  if (setSymRef(Val, ID, ErrMsg))
1052  return true;
1053  OffsetOperator = true;
1054  OffsetOperatorLoc = OffsetLoc;
1055  State = IES_OFFSET;
1056  // As we cannot yet resolve the actual value (offset), we retain
1057  // the requested semantics by pushing a '0' to the operands stack
1058  IC.pushOperand(IC_IMM);
1059  if (ParsingMSInlineAsm) {
1060  Info = IDInfo;
1061  }
1062  break;
1063  }
1064  return false;
1065  }
1066  void onCast(AsmTypeInfo Info) {
1067  PrevState = State;
1068  switch (State) {
1069  default:
1070  State = IES_ERROR;
1071  break;
1072  case IES_LPAREN:
1073  setTypeInfo(Info);
1074  State = IES_CAST;
1075  break;
1076  }
1077  }
1078  void setTypeInfo(AsmTypeInfo Type) { CurType = Type; }
1079  };
1080 
1081  bool Error(SMLoc L, const Twine &Msg, SMRange Range = None,
1082  bool MatchingInlineAsm = false) {
1083  MCAsmParser &Parser = getParser();
1084  if (MatchingInlineAsm) {
1085  if (!getLexer().isAtStartOfStatement())
1086  Parser.eatToEndOfStatement();
1087  return false;
1088  }
1089  return Parser.Error(L, Msg, Range);
1090  }
1091 
1092  bool MatchRegisterByName(unsigned &RegNo, StringRef RegName, SMLoc StartLoc,
1093  SMLoc EndLoc);
1094  bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc,
1095  bool RestoreOnFailure);
1096 
1097  std::unique_ptr<X86Operand> DefaultMemSIOperand(SMLoc Loc);
1098  std::unique_ptr<X86Operand> DefaultMemDIOperand(SMLoc Loc);
1099  bool IsSIReg(unsigned Reg);
1100  unsigned GetSIDIForRegClass(unsigned RegClassID, unsigned Reg, bool IsSIReg);
1101  void
1102  AddDefaultSrcDestOperands(OperandVector &Operands,
1103  std::unique_ptr<llvm::MCParsedAsmOperand> &&Src,
1104  std::unique_ptr<llvm::MCParsedAsmOperand> &&Dst);
1105  bool VerifyAndAdjustOperands(OperandVector &OrigOperands,
1106  OperandVector &FinalOperands);
1107  bool parseOperand(OperandVector &Operands, StringRef Name);
1108  bool parseATTOperand(OperandVector &Operands);
1109  bool parseIntelOperand(OperandVector &Operands, StringRef Name);
1110  bool ParseIntelOffsetOperator(const MCExpr *&Val, StringRef &ID,
1112  bool ParseIntelDotOperator(IntelExprStateMachine &SM, SMLoc &End);
1113  unsigned IdentifyIntelInlineAsmOperator(StringRef Name);
1114  unsigned ParseIntelInlineAsmOperator(unsigned OpKind);
1115  unsigned IdentifyMasmOperator(StringRef Name);
1116  bool ParseMasmOperator(unsigned OpKind, int64_t &Val);
1117  bool ParseRoundingModeOp(SMLoc Start, OperandVector &Operands);
1118  bool ParseIntelNamedOperator(StringRef Name, IntelExprStateMachine &SM,
1119  bool &ParseError, SMLoc &End);
1120  bool ParseMasmNamedOperator(StringRef Name, IntelExprStateMachine &SM,
1121  bool &ParseError, SMLoc &End);
1122  void RewriteIntelExpression(IntelExprStateMachine &SM, SMLoc Start,
1123  SMLoc End);
1124  bool ParseIntelExpression(IntelExprStateMachine &SM, SMLoc &End);
1125  bool ParseIntelInlineAsmIdentifier(const MCExpr *&Val, StringRef &Identifier,
1127  bool IsUnevaluatedOperand, SMLoc &End,
1128  bool IsParsingOffsetOperator = false);
1129  void tryParseOperandIdx(AsmToken::TokenKind PrevTK,
1130  IntelExprStateMachine &SM);
1131 
1132  bool ParseMemOperand(unsigned SegReg, const MCExpr *Disp, SMLoc StartLoc,
1133  SMLoc EndLoc, OperandVector &Operands);
1134 
1135  X86::CondCode ParseConditionCode(StringRef CCode);
1136 
1137  bool ParseIntelMemoryOperandSize(unsigned &Size);
1138  bool CreateMemForMSInlineAsm(unsigned SegReg, const MCExpr *Disp,
1139  unsigned BaseReg, unsigned IndexReg,
1140  unsigned Scale, SMLoc Start, SMLoc End,
1141  unsigned Size, StringRef Identifier,
1144 
1145  bool parseDirectiveArch();
1146  bool parseDirectiveNops(SMLoc L);
1147  bool parseDirectiveEven(SMLoc L);
1148  bool ParseDirectiveCode(StringRef IDVal, SMLoc L);
1149 
1150  /// CodeView FPO data directives.
1151  bool parseDirectiveFPOProc(SMLoc L);
1152  bool parseDirectiveFPOSetFrame(SMLoc L);
1153  bool parseDirectiveFPOPushReg(SMLoc L);
1154  bool parseDirectiveFPOStackAlloc(SMLoc L);
1155  bool parseDirectiveFPOStackAlign(SMLoc L);
1156  bool parseDirectiveFPOEndPrologue(SMLoc L);
1157  bool parseDirectiveFPOEndProc(SMLoc L);
1158 
1159  /// SEH directives.
1160  bool parseSEHRegisterNumber(unsigned RegClassID, unsigned &RegNo);
1161  bool parseDirectiveSEHPushReg(SMLoc);
1162  bool parseDirectiveSEHSetFrame(SMLoc);
1163  bool parseDirectiveSEHSaveReg(SMLoc);
1164  bool parseDirectiveSEHSaveXMM(SMLoc);
1165  bool parseDirectiveSEHPushFrame(SMLoc);
1166 
1167  unsigned checkTargetMatchPredicate(MCInst &Inst) override;
1168 
1169  bool validateInstruction(MCInst &Inst, const OperandVector &Ops);
1170  bool processInstruction(MCInst &Inst, const OperandVector &Ops);
1171 
1172  // Load Value Injection (LVI) Mitigations for machine code
1173  void emitWarningForSpecialLVIInstruction(SMLoc Loc);
1174  void applyLVICFIMitigation(MCInst &Inst, MCStreamer &Out);
1175  void applyLVILoadHardeningMitigation(MCInst &Inst, MCStreamer &Out);
1176 
1177  /// Wrapper around MCStreamer::emitInstruction(). Possibly adds
1178  /// instrumentation around Inst.
1179  void emitInstruction(MCInst &Inst, OperandVector &Operands, MCStreamer &Out);
1180 
1181  bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
1184  bool MatchingInlineAsm) override;
1185 
1186  void MatchFPUWaitAlias(SMLoc IDLoc, X86Operand &Op, OperandVector &Operands,
1187  MCStreamer &Out, bool MatchingInlineAsm);
1188 
1189  bool ErrorMissingFeature(SMLoc IDLoc, const FeatureBitset &MissingFeatures,
1190  bool MatchingInlineAsm);
1191 
1192  bool MatchAndEmitATTInstruction(SMLoc IDLoc, unsigned &Opcode,
1195  bool MatchingInlineAsm);
1196 
1197  bool MatchAndEmitIntelInstruction(SMLoc IDLoc, unsigned &Opcode,
1200  bool MatchingInlineAsm);
1201 
1202  bool OmitRegisterFromClobberLists(unsigned RegNo) override;
1203 
1204  /// Parses AVX512 specific operand primitives: masked registers ({%k<NUM>}, {z})
1205  /// and memory broadcasting ({1to<NUM>}) primitives, updating Operands vector if required.
1206  /// return false if no parsing errors occurred, true otherwise.
1207  bool HandleAVX512Operand(OperandVector &Operands);
1208 
1209  bool ParseZ(std::unique_ptr<X86Operand> &Z, const SMLoc &StartLoc);
1210 
1211  bool is64BitMode() const {
1212  // FIXME: Can tablegen auto-generate this?
1213  return getSTI().getFeatureBits()[X86::Is64Bit];
1214  }
1215  bool is32BitMode() const {
1216  // FIXME: Can tablegen auto-generate this?
1217  return getSTI().getFeatureBits()[X86::Is32Bit];
1218  }
1219  bool is16BitMode() const {
1220  // FIXME: Can tablegen auto-generate this?
1221  return getSTI().getFeatureBits()[X86::Is16Bit];
1222  }
1223  void SwitchMode(unsigned mode) {
1224  MCSubtargetInfo &STI = copySTI();
1225  FeatureBitset AllModes({X86::Is64Bit, X86::Is32Bit, X86::Is16Bit});
1226  FeatureBitset OldMode = STI.getFeatureBits() & AllModes;
1227  FeatureBitset FB = ComputeAvailableFeatures(
1228  STI.ToggleFeature(OldMode.flip(mode)));
1229  setAvailableFeatures(FB);
1230 
1231  assert(FeatureBitset({mode}) == (STI.getFeatureBits() & AllModes));
1232  }
1233 
1234  unsigned getPointerWidth() {
1235  if (is16BitMode()) return 16;
1236  if (is32BitMode()) return 32;
1237  if (is64BitMode()) return 64;
1238  llvm_unreachable("invalid mode");
1239  }
1240 
1241  bool isParsingIntelSyntax() {
1242  return getParser().getAssemblerDialect();
1243  }
1244 
1245  /// @name Auto-generated Matcher Functions
1246  /// {
1247 
1248 #define GET_ASSEMBLER_HEADER
1249 #include "X86GenAsmMatcher.inc"
1250 
1251  /// }
1252 
1253 public:
1254  enum X86MatchResultTy {
1255  Match_Unsupported = FIRST_TARGET_MATCH_RESULT_TY,
1256 #define GET_OPERAND_DIAGNOSTIC_TYPES
1257 #include "X86GenAsmMatcher.inc"
1258  };
1259 
1260  X86AsmParser(const MCSubtargetInfo &sti, MCAsmParser &Parser,
1261  const MCInstrInfo &mii, const MCTargetOptions &Options)
1262  : MCTargetAsmParser(Options, sti, mii), InstInfo(nullptr),
1263  Code16GCC(false) {
1264 
1265  Parser.addAliasForDirective(".word", ".2byte");
1266 
1267  // Initialize the set of available features.
1268  setAvailableFeatures(ComputeAvailableFeatures(getSTI().getFeatureBits()));
1269  }
1270 
1271  bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc) override;
1272  OperandMatchResultTy tryParseRegister(unsigned &RegNo, SMLoc &StartLoc,
1273  SMLoc &EndLoc) override;
1274 
1275  bool parsePrimaryExpr(const MCExpr *&Res, SMLoc &EndLoc) override;
1276 
1277  bool ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
1278  SMLoc NameLoc, OperandVector &Operands) override;
1279 
1280  bool ParseDirective(AsmToken DirectiveID) override;
1281 };
1282 } // end anonymous namespace
1283 
1284 /// @name Auto-generated Match Functions
1285 /// {
1286 
1287 static unsigned MatchRegisterName(StringRef Name);
1288 
1289 /// }
1290 
1291 static bool CheckBaseRegAndIndexRegAndScale(unsigned BaseReg, unsigned IndexReg,
1292  unsigned Scale, bool Is64BitMode,
1293  StringRef &ErrMsg) {
1294  // If we have both a base register and an index register make sure they are
1295  // both 64-bit or 32-bit registers.
1296  // To support VSIB, IndexReg can be 128-bit or 256-bit registers.
1297 
1298  if (BaseReg != 0 &&
1299  !(BaseReg == X86::RIP || BaseReg == X86::EIP ||
1300  X86MCRegisterClasses[X86::GR16RegClassID].contains(BaseReg) ||
1301  X86MCRegisterClasses[X86::GR32RegClassID].contains(BaseReg) ||
1302  X86MCRegisterClasses[X86::GR64RegClassID].contains(BaseReg))) {
1303  ErrMsg = "invalid base+index expression";
1304  return true;
1305  }
1306 
1307  if (IndexReg != 0 &&
1308  !(IndexReg == X86::EIZ || IndexReg == X86::RIZ ||
1309  X86MCRegisterClasses[X86::GR16RegClassID].contains(IndexReg) ||
1310  X86MCRegisterClasses[X86::GR32RegClassID].contains(IndexReg) ||
1311  X86MCRegisterClasses[X86::GR64RegClassID].contains(IndexReg) ||
1312  X86MCRegisterClasses[X86::VR128XRegClassID].contains(IndexReg) ||
1313  X86MCRegisterClasses[X86::VR256XRegClassID].contains(IndexReg) ||
1314  X86MCRegisterClasses[X86::VR512RegClassID].contains(IndexReg))) {
1315  ErrMsg = "invalid base+index expression";
1316  return true;
1317  }
1318 
1319  if (((BaseReg == X86::RIP || BaseReg == X86::EIP) && IndexReg != 0) ||
1320  IndexReg == X86::EIP || IndexReg == X86::RIP ||
1321  IndexReg == X86::ESP || IndexReg == X86::RSP) {
1322  ErrMsg = "invalid base+index expression";
1323  return true;
1324  }
1325 
1326  // Check for use of invalid 16-bit registers. Only BX/BP/SI/DI are allowed,
1327  // and then only in non-64-bit modes.
1328  if (X86MCRegisterClasses[X86::GR16RegClassID].contains(BaseReg) &&
1329  (Is64BitMode || (BaseReg != X86::BX && BaseReg != X86::BP &&
1330  BaseReg != X86::SI && BaseReg != X86::DI))) {
1331  ErrMsg = "invalid 16-bit base register";
1332  return true;
1333  }
1334 
1335  if (BaseReg == 0 &&
1336  X86MCRegisterClasses[X86::GR16RegClassID].contains(IndexReg)) {
1337  ErrMsg = "16-bit memory operand may not include only index register";
1338  return true;
1339  }
1340 
1341  if (BaseReg != 0 && IndexReg != 0) {
1342  if (X86MCRegisterClasses[X86::GR64RegClassID].contains(BaseReg) &&
1343  (X86MCRegisterClasses[X86::GR16RegClassID].contains(IndexReg) ||
1344  X86MCRegisterClasses[X86::GR32RegClassID].contains(IndexReg) ||
1345  IndexReg == X86::EIZ)) {
1346  ErrMsg = "base register is 64-bit, but index register is not";
1347  return true;
1348  }
1349  if (X86MCRegisterClasses[X86::GR32RegClassID].contains(BaseReg) &&
1350  (X86MCRegisterClasses[X86::GR16RegClassID].contains(IndexReg) ||
1351  X86MCRegisterClasses[X86::GR64RegClassID].contains(IndexReg) ||
1352  IndexReg == X86::RIZ)) {
1353  ErrMsg = "base register is 32-bit, but index register is not";
1354  return true;
1355  }
1356  if (X86MCRegisterClasses[X86::GR16RegClassID].contains(BaseReg)) {
1357  if (X86MCRegisterClasses[X86::GR32RegClassID].contains(IndexReg) ||
1358  X86MCRegisterClasses[X86::GR64RegClassID].contains(IndexReg)) {
1359  ErrMsg = "base register is 16-bit, but index register is not";
1360  return true;
1361  }
1362  if ((BaseReg != X86::BX && BaseReg != X86::BP) ||
1363  (IndexReg != X86::SI && IndexReg != X86::DI)) {
1364  ErrMsg = "invalid 16-bit base/index register combination";
1365  return true;
1366  }
1367  }
1368  }
1369 
1370  // RIP/EIP-relative addressing is only supported in 64-bit mode.
1371  if (!Is64BitMode && BaseReg != 0 &&
1372  (BaseReg == X86::RIP || BaseReg == X86::EIP)) {
1373  ErrMsg = "IP-relative addressing requires 64-bit mode";
1374  return true;
1375  }
1376 
1377  return checkScale(Scale, ErrMsg);
1378 }
1379 
1380 bool X86AsmParser::MatchRegisterByName(unsigned &RegNo, StringRef RegName,
1381  SMLoc StartLoc, SMLoc EndLoc) {
1382  // If we encounter a %, ignore it. This code handles registers with and
1383  // without the prefix, unprefixed registers can occur in cfi directives.
1384  RegName.consume_front("%");
1385 
1386  RegNo = MatchRegisterName(RegName);
1387 
1388  // If the match failed, try the register name as lowercase.
1389  if (RegNo == 0)
1390  RegNo = MatchRegisterName(RegName.lower());
1391 
1392  // The "flags" and "mxcsr" registers cannot be referenced directly.
1393  // Treat it as an identifier instead.
1394  if (isParsingMSInlineAsm() && isParsingIntelSyntax() &&
1395  (RegNo == X86::EFLAGS || RegNo == X86::MXCSR))
1396  RegNo = 0;
1397 
1398  if (!is64BitMode()) {
1399  // FIXME: This should be done using Requires<Not64BitMode> and
1400  // Requires<In64BitMode> so "eiz" usage in 64-bit instructions can be also
1401  // checked.
1402  if (RegNo == X86::RIZ || RegNo == X86::RIP ||
1403  X86MCRegisterClasses[X86::GR64RegClassID].contains(RegNo) ||
1405  X86II::isX86_64ExtendedReg(RegNo)) {
1406  return Error(StartLoc,
1407  "register %" + RegName + " is only available in 64-bit mode",
1408  SMRange(StartLoc, EndLoc));
1409  }
1410  }
1411 
1412  // If this is "db[0-15]", match it as an alias
1413  // for dr[0-15].
1414  if (RegNo == 0 && RegName.startswith("db")) {
1415  if (RegName.size() == 3) {
1416  switch (RegName[2]) {
1417  case '0':
1418  RegNo = X86::DR0;
1419  break;
1420  case '1':
1421  RegNo = X86::DR1;
1422  break;
1423  case '2':
1424  RegNo = X86::DR2;
1425  break;
1426  case '3':
1427  RegNo = X86::DR3;
1428  break;
1429  case '4':
1430  RegNo = X86::DR4;
1431  break;
1432  case '5':
1433  RegNo = X86::DR5;
1434  break;
1435  case '6':
1436  RegNo = X86::DR6;
1437  break;
1438  case '7':
1439  RegNo = X86::DR7;
1440  break;
1441  case '8':
1442  RegNo = X86::DR8;
1443  break;
1444  case '9':
1445  RegNo = X86::DR9;
1446  break;
1447  }
1448  } else if (RegName.size() == 4 && RegName[2] == '1') {
1449  switch (RegName[3]) {
1450  case '0':
1451  RegNo = X86::DR10;
1452  break;
1453  case '1':
1454  RegNo = X86::DR11;
1455  break;
1456  case '2':
1457  RegNo = X86::DR12;
1458  break;
1459  case '3':
1460  RegNo = X86::DR13;
1461  break;
1462  case '4':
1463  RegNo = X86::DR14;
1464  break;
1465  case '5':
1466  RegNo = X86::DR15;
1467  break;
1468  }
1469  }
1470  }
1471 
1472  if (RegNo == 0) {
1473  if (isParsingIntelSyntax())
1474  return true;
1475  return Error(StartLoc, "invalid register name", SMRange(StartLoc, EndLoc));
1476  }
1477  return false;
1478 }
1479 
1480 bool X86AsmParser::ParseRegister(unsigned &RegNo, SMLoc &StartLoc,
1481  SMLoc &EndLoc, bool RestoreOnFailure) {
1482  MCAsmParser &Parser = getParser();
1483  MCAsmLexer &Lexer = getLexer();
1484  RegNo = 0;
1485 
1486  SmallVector<AsmToken, 5> Tokens;
1487  auto OnFailure = [RestoreOnFailure, &Lexer, &Tokens]() {
1488  if (RestoreOnFailure) {
1489  while (!Tokens.empty()) {
1490  Lexer.UnLex(Tokens.pop_back_val());
1491  }
1492  }
1493  };
1494 
1495  const AsmToken &PercentTok = Parser.getTok();
1496  StartLoc = PercentTok.getLoc();
1497 
1498  // If we encounter a %, ignore it. This code handles registers with and
1499  // without the prefix, unprefixed registers can occur in cfi directives.
1500  if (!isParsingIntelSyntax() && PercentTok.is(AsmToken::Percent)) {
1501  Tokens.push_back(PercentTok);
1502  Parser.Lex(); // Eat percent token.
1503  }
1504 
1505  const AsmToken &Tok = Parser.getTok();
1506  EndLoc = Tok.getEndLoc();
1507 
1508  if (Tok.isNot(AsmToken::Identifier)) {
1509  OnFailure();
1510  if (isParsingIntelSyntax()) return true;
1511  return Error(StartLoc, "invalid register name",
1512  SMRange(StartLoc, EndLoc));
1513  }
1514 
1515  if (MatchRegisterByName(RegNo, Tok.getString(), StartLoc, EndLoc)) {
1516  OnFailure();
1517  return true;
1518  }
1519 
1520  // Parse "%st" as "%st(0)" and "%st(1)", which is multiple tokens.
1521  if (RegNo == X86::ST0) {
1522  Tokens.push_back(Tok);
1523  Parser.Lex(); // Eat 'st'
1524 
1525  // Check to see if we have '(4)' after %st.
1526  if (Lexer.isNot(AsmToken::LParen))
1527  return false;
1528  // Lex the paren.
1529  Tokens.push_back(Parser.getTok());
1530  Parser.Lex();
1531 
1532  const AsmToken &IntTok = Parser.getTok();
1533  if (IntTok.isNot(AsmToken::Integer)) {
1534  OnFailure();
1535  return Error(IntTok.getLoc(), "expected stack index");
1536  }
1537  switch (IntTok.getIntVal()) {
1538  case 0: RegNo = X86::ST0; break;
1539  case 1: RegNo = X86::ST1; break;
1540  case 2: RegNo = X86::ST2; break;
1541  case 3: RegNo = X86::ST3; break;
1542  case 4: RegNo = X86::ST4; break;
1543  case 5: RegNo = X86::ST5; break;
1544  case 6: RegNo = X86::ST6; break;
1545  case 7: RegNo = X86::ST7; break;
1546  default:
1547  OnFailure();
1548  return Error(IntTok.getLoc(), "invalid stack index");
1549  }
1550 
1551  // Lex IntTok
1552  Tokens.push_back(IntTok);
1553  Parser.Lex();
1554  if (Lexer.isNot(AsmToken::RParen)) {
1555  OnFailure();
1556  return Error(Parser.getTok().getLoc(), "expected ')'");
1557  }
1558 
1559  EndLoc = Parser.getTok().getEndLoc();
1560  Parser.Lex(); // Eat ')'
1561  return false;
1562  }
1563 
1564  EndLoc = Parser.getTok().getEndLoc();
1565 
1566  if (RegNo == 0) {
1567  OnFailure();
1568  if (isParsingIntelSyntax()) return true;
1569  return Error(StartLoc, "invalid register name",
1570  SMRange(StartLoc, EndLoc));
1571  }
1572 
1573  Parser.Lex(); // Eat identifier token.
1574  return false;
1575 }
1576 
1577 bool X86AsmParser::ParseRegister(unsigned &RegNo, SMLoc &StartLoc,
1578  SMLoc &EndLoc) {
1579  return ParseRegister(RegNo, StartLoc, EndLoc, /*RestoreOnFailure=*/false);
1580 }
1581 
1582 OperandMatchResultTy X86AsmParser::tryParseRegister(unsigned &RegNo,
1583  SMLoc &StartLoc,
1584  SMLoc &EndLoc) {
1585  bool Result =
1586  ParseRegister(RegNo, StartLoc, EndLoc, /*RestoreOnFailure=*/true);
1587  bool PendingErrors = getParser().hasPendingError();
1588  getParser().clearPendingErrors();
1589  if (PendingErrors)
1590  return MatchOperand_ParseFail;
1591  if (Result)
1592  return MatchOperand_NoMatch;
1593  return MatchOperand_Success;
1594 }
1595 
1596 std::unique_ptr<X86Operand> X86AsmParser::DefaultMemSIOperand(SMLoc Loc) {
1597  bool Parse32 = is32BitMode() || Code16GCC;
1598  unsigned Basereg = is64BitMode() ? X86::RSI : (Parse32 ? X86::ESI : X86::SI);
1599  const MCExpr *Disp = MCConstantExpr::create(0, getContext());
1600  return X86Operand::CreateMem(getPointerWidth(), /*SegReg=*/0, Disp,
1601  /*BaseReg=*/Basereg, /*IndexReg=*/0, /*Scale=*/1,
1602  Loc, Loc, 0);
1603 }
1604 
1605 std::unique_ptr<X86Operand> X86AsmParser::DefaultMemDIOperand(SMLoc Loc) {
1606  bool Parse32 = is32BitMode() || Code16GCC;
1607  unsigned Basereg = is64BitMode() ? X86::RDI : (Parse32 ? X86::EDI : X86::DI);
1608  const MCExpr *Disp = MCConstantExpr::create(0, getContext());
1609  return X86Operand::CreateMem(getPointerWidth(), /*SegReg=*/0, Disp,
1610  /*BaseReg=*/Basereg, /*IndexReg=*/0, /*Scale=*/1,
1611  Loc, Loc, 0);
1612 }
1613 
1614 bool X86AsmParser::IsSIReg(unsigned Reg) {
1615  switch (Reg) {
1616  default: llvm_unreachable("Only (R|E)SI and (R|E)DI are expected!");
1617  case X86::RSI:
1618  case X86::ESI:
1619  case X86::SI:
1620  return true;
1621  case X86::RDI:
1622  case X86::EDI:
1623  case X86::DI:
1624  return false;
1625  }
1626 }
1627 
1628 unsigned X86AsmParser::GetSIDIForRegClass(unsigned RegClassID, unsigned Reg,
1629  bool IsSIReg) {
1630  switch (RegClassID) {
1631  default: llvm_unreachable("Unexpected register class");
1632  case X86::GR64RegClassID:
1633  return IsSIReg ? X86::RSI : X86::RDI;
1634  case X86::GR32RegClassID:
1635  return IsSIReg ? X86::ESI : X86::EDI;
1636  case X86::GR16RegClassID:
1637  return IsSIReg ? X86::SI : X86::DI;
1638  }
1639 }
1640 
1641 void X86AsmParser::AddDefaultSrcDestOperands(
1642  OperandVector& Operands, std::unique_ptr<llvm::MCParsedAsmOperand> &&Src,
1643  std::unique_ptr<llvm::MCParsedAsmOperand> &&Dst) {
1644  if (isParsingIntelSyntax()) {
1645  Operands.push_back(std::move(Dst));
1646  Operands.push_back(std::move(Src));
1647  }
1648  else {
1649  Operands.push_back(std::move(Src));
1650  Operands.push_back(std::move(Dst));
1651  }
1652 }
1653 
1654 bool X86AsmParser::VerifyAndAdjustOperands(OperandVector &OrigOperands,
1655  OperandVector &FinalOperands) {
1656 
1657  if (OrigOperands.size() > 1) {
1658  // Check if sizes match, OrigOperands also contains the instruction name
1659  assert(OrigOperands.size() == FinalOperands.size() + 1 &&
1660  "Operand size mismatch");
1661 
1663  // Verify types match
1664  int RegClassID = -1;
1665  for (unsigned int i = 0; i < FinalOperands.size(); ++i) {
1666  X86Operand &OrigOp = static_cast<X86Operand &>(*OrigOperands[i + 1]);
1667  X86Operand &FinalOp = static_cast<X86Operand &>(*FinalOperands[i]);
1668 
1669  if (FinalOp.isReg() &&
1670  (!OrigOp.isReg() || FinalOp.getReg() != OrigOp.getReg()))
1671  // Return false and let a normal complaint about bogus operands happen
1672  return false;
1673 
1674  if (FinalOp.isMem()) {
1675 
1676  if (!OrigOp.isMem())
1677  // Return false and let a normal complaint about bogus operands happen
1678  return false;
1679 
1680  unsigned OrigReg = OrigOp.Mem.BaseReg;
1681  unsigned FinalReg = FinalOp.Mem.BaseReg;
1682 
1683  // If we've already encounterd a register class, make sure all register
1684  // bases are of the same register class
1685  if (RegClassID != -1 &&
1686  !X86MCRegisterClasses[RegClassID].contains(OrigReg)) {
1687  return Error(OrigOp.getStartLoc(),
1688  "mismatching source and destination index registers");
1689  }
1690 
1691  if (X86MCRegisterClasses[X86::GR64RegClassID].contains(OrigReg))
1692  RegClassID = X86::GR64RegClassID;
1693  else if (X86MCRegisterClasses[X86::GR32RegClassID].contains(OrigReg))
1694  RegClassID = X86::GR32RegClassID;
1695  else if (X86MCRegisterClasses[X86::GR16RegClassID].contains(OrigReg))
1696  RegClassID = X86::GR16RegClassID;
1697  else
1698  // Unexpected register class type
1699  // Return false and let a normal complaint about bogus operands happen
1700  return false;
1701 
1702  bool IsSI = IsSIReg(FinalReg);
1703  FinalReg = GetSIDIForRegClass(RegClassID, FinalReg, IsSI);
1704 
1705  if (FinalReg != OrigReg) {
1706  std::string RegName = IsSI ? "ES:(R|E)SI" : "ES:(R|E)DI";
1707  Warnings.push_back(std::make_pair(
1708  OrigOp.getStartLoc(),
1709  "memory operand is only for determining the size, " + RegName +
1710  " will be used for the location"));
1711  }
1712 
1713  FinalOp.Mem.Size = OrigOp.Mem.Size;
1714  FinalOp.Mem.SegReg = OrigOp.Mem.SegReg;
1715  FinalOp.Mem.BaseReg = FinalReg;
1716  }
1717  }
1718 
1719  // Produce warnings only if all the operands passed the adjustment - prevent
1720  // legal cases like "movsd (%rax), %xmm0" mistakenly produce warnings
1721  for (auto &WarningMsg : Warnings) {
1722  Warning(WarningMsg.first, WarningMsg.second);
1723  }
1724 
1725  // Remove old operands
1726  for (unsigned int i = 0; i < FinalOperands.size(); ++i)
1727  OrigOperands.pop_back();
1728  }
1729  // OrigOperands.append(FinalOperands.begin(), FinalOperands.end());
1730  for (unsigned int i = 0; i < FinalOperands.size(); ++i)
1731  OrigOperands.push_back(std::move(FinalOperands[i]));
1732 
1733  return false;
1734 }
1735 
1736 bool X86AsmParser::parseOperand(OperandVector &Operands, StringRef Name) {
1737  if (isParsingIntelSyntax())
1738  return parseIntelOperand(Operands, Name);
1739 
1740  return parseATTOperand(Operands);
1741 }
1742 
1743 bool X86AsmParser::CreateMemForMSInlineAsm(
1744  unsigned SegReg, const MCExpr *Disp, unsigned BaseReg, unsigned IndexReg,
1745  unsigned Scale, SMLoc Start, SMLoc End, unsigned Size, StringRef Identifier,
1747  // If we found a decl other than a VarDecl, then assume it is a FuncDecl or
1748  // some other label reference.
1750  // Insert an explicit size if the user didn't have one.
1751  if (!Size) {
1752  Size = getPointerWidth();
1753  InstInfo->AsmRewrites->emplace_back(AOK_SizeDirective, Start,
1754  /*Len=*/0, Size);
1755  }
1756  // Create an absolute memory reference in order to match against
1757  // instructions taking a PC relative operand.
1758  Operands.push_back(X86Operand::CreateMem(getPointerWidth(), Disp, Start,
1759  End, Size, Identifier,
1760  Info.Label.Decl));
1761  return false;
1762  }
1763  // We either have a direct symbol reference, or an offset from a symbol. The
1764  // parser always puts the symbol on the LHS, so look there for size
1765  // calculation purposes.
1766  unsigned FrontendSize = 0;
1767  void *Decl = nullptr;
1768  bool IsGlobalLV = false;
1769  if (Info.isKind(InlineAsmIdentifierInfo::IK_Var)) {
1770  // Size is in terms of bits in this context.
1771  FrontendSize = Info.Var.Type * 8;
1772  Decl = Info.Var.Decl;
1773  IsGlobalLV = Info.Var.IsGlobalLV;
1774  }
1775  // It is widely common for MS InlineAsm to use a global variable and one/two
1776  // registers in a mmory expression, and though unaccessible via rip/eip.
1777  if (IsGlobalLV && (BaseReg || IndexReg)) {
1778  Operands.push_back(X86Operand::CreateMem(getPointerWidth(), Disp, Start,
1779  End, Size, Identifier, Decl, 0,
1780  BaseReg && IndexReg));
1781  return false;
1782  }
1783  // Otherwise, we set the base register to a non-zero value
1784  // if we don't know the actual value at this time. This is necessary to
1785  // get the matching correct in some cases.
1786  BaseReg = BaseReg ? BaseReg : 1;
1787  Operands.push_back(X86Operand::CreateMem(
1788  getPointerWidth(), SegReg, Disp, BaseReg, IndexReg, Scale, Start, End,
1789  Size,
1790  /*DefaultBaseReg=*/X86::RIP, Identifier, Decl, FrontendSize));
1791  return false;
1792 }
1793 
1794 // Some binary bitwise operators have a named synonymous
1795 // Query a candidate string for being such a named operator
1796 // and if so - invoke the appropriate handler
1797 bool X86AsmParser::ParseIntelNamedOperator(StringRef Name,
1798  IntelExprStateMachine &SM,
1799  bool &ParseError, SMLoc &End) {
1800  // A named operator should be either lower or upper case, but not a mix...
1801  // except in MASM, which uses full case-insensitivity.
1802  if (Name.compare(Name.lower()) && Name.compare(Name.upper()) &&
1803  !getParser().isParsingMasm())
1804  return false;
1805  if (Name.equals_insensitive("not")) {
1806  SM.onNot();
1807  } else if (Name.equals_insensitive("or")) {
1808  SM.onOr();
1809  } else if (Name.equals_insensitive("shl")) {
1810  SM.onLShift();
1811  } else if (Name.equals_insensitive("shr")) {
1812  SM.onRShift();
1813  } else if (Name.equals_insensitive("xor")) {
1814  SM.onXor();
1815  } else if (Name.equals_insensitive("and")) {
1816  SM.onAnd();
1817  } else if (Name.equals_insensitive("mod")) {
1818  SM.onMod();
1819  } else if (Name.equals_insensitive("offset")) {
1820  SMLoc OffsetLoc = getTok().getLoc();
1821  const MCExpr *Val = nullptr;
1822  StringRef ID;
1824  ParseError = ParseIntelOffsetOperator(Val, ID, Info, End);
1825  if (ParseError)
1826  return true;
1827  StringRef ErrMsg;
1828  ParseError =
1829  SM.onOffset(Val, OffsetLoc, ID, Info, isParsingMSInlineAsm(), ErrMsg);
1830  if (ParseError)
1831  return Error(SMLoc::getFromPointer(Name.data()), ErrMsg);
1832  } else {
1833  return false;
1834  }
1835  if (!Name.equals_insensitive("offset"))
1836  End = consumeToken();
1837  return true;
1838 }
1839 bool X86AsmParser::ParseMasmNamedOperator(StringRef Name,
1840  IntelExprStateMachine &SM,
1841  bool &ParseError, SMLoc &End) {
1842  if (Name.equals_insensitive("eq")) {
1843  SM.onEq();
1844  } else if (Name.equals_insensitive("ne")) {
1845  SM.onNE();
1846  } else if (Name.equals_insensitive("lt")) {
1847  SM.onLT();
1848  } else if (Name.equals_insensitive("le")) {
1849  SM.onLE();
1850  } else if (Name.equals_insensitive("gt")) {
1851  SM.onGT();
1852  } else if (Name.equals_insensitive("ge")) {
1853  SM.onGE();
1854  } else {
1855  return false;
1856  }
1857  End = consumeToken();
1858  return true;
1859 }
1860 
1861 // Check if current intel expression append after an operand.
1862 // Like: [Operand][Intel Expression]
1863 void X86AsmParser::tryParseOperandIdx(AsmToken::TokenKind PrevTK,
1864  IntelExprStateMachine &SM) {
1865  if (PrevTK != AsmToken::RBrac)
1866  return;
1867 
1868  SM.setAppendAfterOperand();
1869 }
1870 
1871 bool X86AsmParser::ParseIntelExpression(IntelExprStateMachine &SM, SMLoc &End) {
1872  MCAsmParser &Parser = getParser();
1873  StringRef ErrMsg;
1874 
1876 
1877  if (getContext().getObjectFileInfo()->isPositionIndependent())
1878  SM.setPIC();
1879 
1880  bool Done = false;
1881  while (!Done) {
1882  // Get a fresh reference on each loop iteration in case the previous
1883  // iteration moved the token storage during UnLex().
1884  const AsmToken &Tok = Parser.getTok();
1885 
1886  bool UpdateLocLex = true;
1887  AsmToken::TokenKind TK = getLexer().getKind();
1888 
1889  switch (TK) {
1890  default:
1891  if ((Done = SM.isValidEndState()))
1892  break;
1893  return Error(Tok.getLoc(), "unknown token in expression");
1894  case AsmToken::Error:
1895  return Error(getLexer().getErrLoc(), getLexer().getErr());
1896  break;
1898  Done = true;
1899  break;
1900  case AsmToken::Real:
1901  // DotOperator: [ebx].0
1902  UpdateLocLex = false;
1903  if (ParseIntelDotOperator(SM, End))
1904  return true;
1905  break;
1906  case AsmToken::Dot:
1907  if (!Parser.isParsingMasm()) {
1908  if ((Done = SM.isValidEndState()))
1909  break;
1910  return Error(Tok.getLoc(), "unknown token in expression");
1911  }
1912  // MASM allows spaces around the dot operator (e.g., "var . x")
1913  Lex();
1914  UpdateLocLex = false;
1915  if (ParseIntelDotOperator(SM, End))
1916  return true;
1917  break;
1918  case AsmToken::Dollar:
1919  if (!Parser.isParsingMasm()) {
1920  if ((Done = SM.isValidEndState()))
1921  break;
1922  return Error(Tok.getLoc(), "unknown token in expression");
1923  }
1925  case AsmToken::String: {
1926  if (Parser.isParsingMasm()) {
1927  // MASM parsers handle strings in expressions as constants.
1928  SMLoc ValueLoc = Tok.getLoc();
1929  int64_t Res;
1930  const MCExpr *Val;
1931  if (Parser.parsePrimaryExpr(Val, End, nullptr))
1932  return true;
1933  UpdateLocLex = false;
1934  if (!Val->evaluateAsAbsolute(Res, getStreamer().getAssemblerPtr()))
1935  return Error(ValueLoc, "expected absolute value");
1936  if (SM.onInteger(Res, ErrMsg))
1937  return Error(ValueLoc, ErrMsg);
1938  break;
1939  }
1941  }
1942  case AsmToken::At:
1943  case AsmToken::Identifier: {
1944  SMLoc IdentLoc = Tok.getLoc();
1945  StringRef Identifier = Tok.getString();
1946  UpdateLocLex = false;
1947  if (Parser.isParsingMasm()) {
1948  size_t DotOffset = Identifier.find_first_of('.');
1949  if (DotOffset != StringRef::npos) {
1950  consumeToken();
1951  StringRef LHS = Identifier.slice(0, DotOffset);
1952  StringRef Dot = Identifier.slice(DotOffset, DotOffset + 1);
1953  StringRef RHS = Identifier.slice(DotOffset + 1, StringRef::npos);
1954  if (!RHS.empty()) {
1955  getLexer().UnLex(AsmToken(AsmToken::Identifier, RHS));
1956  }
1957  getLexer().UnLex(AsmToken(AsmToken::Dot, Dot));
1958  if (!LHS.empty()) {
1959  getLexer().UnLex(AsmToken(AsmToken::Identifier, LHS));
1960  }
1961  break;
1962  }
1963  }
1964  // (MASM only) <TYPE> PTR operator
1965  if (Parser.isParsingMasm()) {
1966  const AsmToken &NextTok = getLexer().peekTok();
1967  if (NextTok.is(AsmToken::Identifier) &&
1968  NextTok.getIdentifier().equals_insensitive("ptr")) {
1969  AsmTypeInfo Info;
1970  if (Parser.lookUpType(Identifier, Info))
1971  return Error(Tok.getLoc(), "unknown type");
1972  SM.onCast(Info);
1973  // Eat type and PTR.
1974  consumeToken();
1975  End = consumeToken();
1976  break;
1977  }
1978  }
1979  // Register, or (MASM only) <register>.<field>
1980  unsigned Reg;
1981  if (Tok.is(AsmToken::Identifier)) {
1982  if (!ParseRegister(Reg, IdentLoc, End, /*RestoreOnFailure=*/true)) {
1983  if (SM.onRegister(Reg, ErrMsg))
1984  return Error(IdentLoc, ErrMsg);
1985  break;
1986  }
1987  if (Parser.isParsingMasm()) {
1988  const std::pair<StringRef, StringRef> IDField =
1989  Tok.getString().split('.');
1990  const StringRef ID = IDField.first, Field = IDField.second;
1991  SMLoc IDEndLoc = SMLoc::getFromPointer(ID.data() + ID.size());
1992  if (!Field.empty() &&
1993  !MatchRegisterByName(Reg, ID, IdentLoc, IDEndLoc)) {
1994  if (SM.onRegister(Reg, ErrMsg))
1995  return Error(IdentLoc, ErrMsg);
1996 
1998  SMLoc FieldStartLoc = SMLoc::getFromPointer(Field.data());
1999  if (Parser.lookUpField(Field, Info))
2000  return Error(FieldStartLoc, "unknown offset");
2001  else if (SM.onPlus(ErrMsg))
2002  return Error(getTok().getLoc(), ErrMsg);
2003  else if (SM.onInteger(Info.Offset, ErrMsg))
2004  return Error(IdentLoc, ErrMsg);
2005  SM.setTypeInfo(Info.Type);
2006 
2007  End = consumeToken();
2008  break;
2009  }
2010  }
2011  }
2012  // Operator synonymous ("not", "or" etc.)
2013  bool ParseError = false;
2014  if (ParseIntelNamedOperator(Identifier, SM, ParseError, End)) {
2015  if (ParseError)
2016  return true;
2017  break;
2018  }
2019  if (Parser.isParsingMasm() &&
2020  ParseMasmNamedOperator(Identifier, SM, ParseError, End)) {
2021  if (ParseError)
2022  return true;
2023  break;
2024  }
2025  // Symbol reference, when parsing assembly content
2027  AsmFieldInfo FieldInfo;
2028  const MCExpr *Val;
2029  if (isParsingMSInlineAsm() || Parser.isParsingMasm()) {
2030  // MS Dot Operator expression
2031  if (Identifier.count('.') &&
2032  (PrevTK == AsmToken::RBrac || PrevTK == AsmToken::RParen)) {
2033  if (ParseIntelDotOperator(SM, End))
2034  return true;
2035  break;
2036  }
2037  }
2038  if (isParsingMSInlineAsm()) {
2039  // MS InlineAsm operators (TYPE/LENGTH/SIZE)
2040  if (unsigned OpKind = IdentifyIntelInlineAsmOperator(Identifier)) {
2041  if (int64_t Val = ParseIntelInlineAsmOperator(OpKind)) {
2042  if (SM.onInteger(Val, ErrMsg))
2043  return Error(IdentLoc, ErrMsg);
2044  } else {
2045  return true;
2046  }
2047  break;
2048  }
2049  // MS InlineAsm identifier
2050  // Call parseIdentifier() to combine @ with the identifier behind it.
2051  if (TK == AsmToken::At && Parser.parseIdentifier(Identifier))
2052  return Error(IdentLoc, "expected identifier");
2053  if (ParseIntelInlineAsmIdentifier(Val, Identifier, Info, false, End))
2054  return true;
2055  else if (SM.onIdentifierExpr(Val, Identifier, Info, FieldInfo.Type,
2056  true, ErrMsg))
2057  return Error(IdentLoc, ErrMsg);
2058  break;
2059  }
2060  if (Parser.isParsingMasm()) {
2061  if (unsigned OpKind = IdentifyMasmOperator(Identifier)) {
2062  int64_t Val;
2063  if (ParseMasmOperator(OpKind, Val))
2064  return true;
2065  if (SM.onInteger(Val, ErrMsg))
2066  return Error(IdentLoc, ErrMsg);
2067  break;
2068  }
2069  if (!getParser().lookUpType(Identifier, FieldInfo.Type)) {
2070  // Field offset immediate; <TYPE>.<field specification>
2071  Lex(); // eat type
2072  bool EndDot = parseOptionalToken(AsmToken::Dot);
2073  while (EndDot || (getTok().is(AsmToken::Identifier) &&
2074  getTok().getString().startswith("."))) {
2075  getParser().parseIdentifier(Identifier);
2076  if (!EndDot)
2077  Identifier.consume_front(".");
2078  EndDot = Identifier.consume_back(".");
2079  if (getParser().lookUpField(FieldInfo.Type.Name, Identifier,
2080  FieldInfo)) {
2081  SMLoc IDEnd =
2082  SMLoc::getFromPointer(Identifier.data() + Identifier.size());
2083  return Error(IdentLoc, "Unable to lookup field reference!",
2084  SMRange(IdentLoc, IDEnd));
2085  }
2086  if (!EndDot)
2087  EndDot = parseOptionalToken(AsmToken::Dot);
2088  }
2089  if (SM.onInteger(FieldInfo.Offset, ErrMsg))
2090  return Error(IdentLoc, ErrMsg);
2091  break;
2092  }
2093  }
2094  if (getParser().parsePrimaryExpr(Val, End, &FieldInfo.Type)) {
2095  return Error(Tok.getLoc(), "Unexpected identifier!");
2096  } else if (SM.onIdentifierExpr(Val, Identifier, Info, FieldInfo.Type,
2097  false, ErrMsg)) {
2098  return Error(IdentLoc, ErrMsg);
2099  }
2100  break;
2101  }
2102  case AsmToken::Integer: {
2103  // Look for 'b' or 'f' following an Integer as a directional label
2104  SMLoc Loc = getTok().getLoc();
2105  int64_t IntVal = getTok().getIntVal();
2106  End = consumeToken();
2107  UpdateLocLex = false;
2108  if (getLexer().getKind() == AsmToken::Identifier) {
2109  StringRef IDVal = getTok().getString();
2110  if (IDVal == "f" || IDVal == "b") {
2111  MCSymbol *Sym =
2112  getContext().getDirectionalLocalSymbol(IntVal, IDVal == "b");
2114  const MCExpr *Val =
2115  MCSymbolRefExpr::create(Sym, Variant, getContext());
2116  if (IDVal == "b" && Sym->isUndefined())
2117  return Error(Loc, "invalid reference to undefined symbol");
2118  StringRef Identifier = Sym->getName();
2120  AsmTypeInfo Type;
2121  if (SM.onIdentifierExpr(Val, Identifier, Info, Type,
2122  isParsingMSInlineAsm(), ErrMsg))
2123  return Error(Loc, ErrMsg);
2124  End = consumeToken();
2125  } else {
2126  if (SM.onInteger(IntVal, ErrMsg))
2127  return Error(Loc, ErrMsg);
2128  }
2129  } else {
2130  if (SM.onInteger(IntVal, ErrMsg))
2131  return Error(Loc, ErrMsg);
2132  }
2133  break;
2134  }
2135  case AsmToken::Plus:
2136  if (SM.onPlus(ErrMsg))
2137  return Error(getTok().getLoc(), ErrMsg);
2138  break;
2139  case AsmToken::Minus:
2140  if (SM.onMinus(ErrMsg))
2141  return Error(getTok().getLoc(), ErrMsg);
2142  break;
2143  case AsmToken::Tilde: SM.onNot(); break;
2144  case AsmToken::Star: SM.onStar(); break;
2145  case AsmToken::Slash: SM.onDivide(); break;
2146  case AsmToken::Percent: SM.onMod(); break;
2147  case AsmToken::Pipe: SM.onOr(); break;
2148  case AsmToken::Caret: SM.onXor(); break;
2149  case AsmToken::Amp: SM.onAnd(); break;
2150  case AsmToken::LessLess:
2151  SM.onLShift(); break;
2153  SM.onRShift(); break;
2154  case AsmToken::LBrac:
2155  if (SM.onLBrac())
2156  return Error(Tok.getLoc(), "unexpected bracket encountered");
2157  tryParseOperandIdx(PrevTK, SM);
2158  break;
2159  case AsmToken::RBrac:
2160  if (SM.onRBrac(ErrMsg)) {
2161  return Error(Tok.getLoc(), ErrMsg);
2162  }
2163  break;
2164  case AsmToken::LParen: SM.onLParen(); break;
2165  case AsmToken::RParen: SM.onRParen(); break;
2166  }
2167  if (SM.hadError())
2168  return Error(Tok.getLoc(), "unknown token in expression");
2169 
2170  if (!Done && UpdateLocLex)
2171  End = consumeToken();
2172 
2173  PrevTK = TK;
2174  }
2175  return false;
2176 }
2177 
2178 void X86AsmParser::RewriteIntelExpression(IntelExprStateMachine &SM,
2179  SMLoc Start, SMLoc End) {
2180  SMLoc Loc = Start;
2181  unsigned ExprLen = End.getPointer() - Start.getPointer();
2182  // Skip everything before a symbol displacement (if we have one)
2183  if (SM.getSym() && !SM.isOffsetOperator()) {
2184  StringRef SymName = SM.getSymName();
2185  if (unsigned Len = SymName.data() - Start.getPointer())
2186  InstInfo->AsmRewrites->emplace_back(AOK_Skip, Start, Len);
2187  Loc = SMLoc::getFromPointer(SymName.data() + SymName.size());
2188  ExprLen = End.getPointer() - (SymName.data() + SymName.size());
2189  // If we have only a symbol than there's no need for complex rewrite,
2190  // simply skip everything after it
2191  if (!(SM.getBaseReg() || SM.getIndexReg() || SM.getImm())) {
2192  if (ExprLen)
2193  InstInfo->AsmRewrites->emplace_back(AOK_Skip, Loc, ExprLen);
2194  return;
2195  }
2196  }
2197  // Build an Intel Expression rewrite
2198  StringRef BaseRegStr;
2199  StringRef IndexRegStr;
2200  StringRef OffsetNameStr;
2201  if (SM.getBaseReg())
2202  BaseRegStr = X86IntelInstPrinter::getRegisterName(SM.getBaseReg());
2203  if (SM.getIndexReg())
2204  IndexRegStr = X86IntelInstPrinter::getRegisterName(SM.getIndexReg());
2205  if (SM.isOffsetOperator())
2206  OffsetNameStr = SM.getSymName();
2207  // Emit it
2208  IntelExpr Expr(BaseRegStr, IndexRegStr, SM.getScale(), OffsetNameStr,
2209  SM.getImm(), SM.isMemExpr());
2210  InstInfo->AsmRewrites->emplace_back(Loc, ExprLen, Expr);
2211 }
2212 
2213 // Inline assembly may use variable names with namespace alias qualifiers.
2214 bool X86AsmParser::ParseIntelInlineAsmIdentifier(
2215  const MCExpr *&Val, StringRef &Identifier, InlineAsmIdentifierInfo &Info,
2216  bool IsUnevaluatedOperand, SMLoc &End, bool IsParsingOffsetOperator) {
2217  MCAsmParser &Parser = getParser();
2218  assert(isParsingMSInlineAsm() && "Expected to be parsing inline assembly.");
2219  Val = nullptr;
2220 
2221  StringRef LineBuf(Identifier.data());
2222  SemaCallback->LookupInlineAsmIdentifier(LineBuf, Info, IsUnevaluatedOperand);
2223 
2224  const AsmToken &Tok = Parser.getTok();
2225  SMLoc Loc = Tok.getLoc();
2226 
2227  // Advance the token stream until the end of the current token is
2228  // after the end of what the frontend claimed.
2229  const char *EndPtr = Tok.getLoc().getPointer() + LineBuf.size();
2230  do {
2231  End = Tok.getEndLoc();
2232  getLexer().Lex();
2233  } while (End.getPointer() < EndPtr);
2234  Identifier = LineBuf;
2235 
2236  // The frontend should end parsing on an assembler token boundary, unless it
2237  // failed parsing.
2238  assert((End.getPointer() == EndPtr ||
2240  "frontend claimed part of a token?");
2241 
2242  // If the identifier lookup was unsuccessful, assume that we are dealing with
2243  // a label.
2245  StringRef InternalName =
2246  SemaCallback->LookupInlineAsmLabel(Identifier, getSourceManager(),
2247  Loc, false);
2248  assert(InternalName.size() && "We should have an internal name here.");
2249  // Push a rewrite for replacing the identifier name with the internal name,
2250  // unless we are parsing the operand of an offset operator
2251  if (!IsParsingOffsetOperator)
2252  InstInfo->AsmRewrites->emplace_back(AOK_Label, Loc, Identifier.size(),
2253  InternalName);
2254  else
2255  Identifier = InternalName;
2256  } else if (Info.isKind(InlineAsmIdentifierInfo::IK_EnumVal))
2257  return false;
2258  // Create the symbol reference.
2259  MCSymbol *Sym = getContext().getOrCreateSymbol(Identifier);
2261  Val = MCSymbolRefExpr::create(Sym, Variant, getParser().getContext());
2262  return false;
2263 }
2264 
2265 //ParseRoundingModeOp - Parse AVX-512 rounding mode operand
2266 bool X86AsmParser::ParseRoundingModeOp(SMLoc Start, OperandVector &Operands) {
2267  MCAsmParser &Parser = getParser();
2268  const AsmToken &Tok = Parser.getTok();
2269  // Eat "{" and mark the current place.
2270  const SMLoc consumedToken = consumeToken();
2271  if (Tok.isNot(AsmToken::Identifier))
2272  return Error(Tok.getLoc(), "Expected an identifier after {");
2273  if (Tok.getIdentifier().startswith("r")){
2274  int rndMode = StringSwitch<int>(Tok.getIdentifier())
2279  .Default(-1);
2280  if (-1 == rndMode)
2281  return Error(Tok.getLoc(), "Invalid rounding mode.");
2282  Parser.Lex(); // Eat "r*" of r*-sae
2283  if (!getLexer().is(AsmToken::Minus))
2284  return Error(Tok.getLoc(), "Expected - at this point");
2285  Parser.Lex(); // Eat "-"
2286  Parser.Lex(); // Eat the sae
2287  if (!getLexer().is(AsmToken::RCurly))
2288  return Error(Tok.getLoc(), "Expected } at this point");
2289  SMLoc End = Tok.getEndLoc();
2290  Parser.Lex(); // Eat "}"
2291  const MCExpr *RndModeOp =
2292  MCConstantExpr::create(rndMode, Parser.getContext());
2293  Operands.push_back(X86Operand::CreateImm(RndModeOp, Start, End));
2294  return false;
2295  }
2296  if(Tok.getIdentifier().equals("sae")){
2297  Parser.Lex(); // Eat the sae
2298  if (!getLexer().is(AsmToken::RCurly))
2299  return Error(Tok.getLoc(), "Expected } at this point");
2300  Parser.Lex(); // Eat "}"
2301  Operands.push_back(X86Operand::CreateToken("{sae}", consumedToken));
2302  return false;
2303  }
2304  return Error(Tok.getLoc(), "unknown token in expression");
2305 }
2306 
2307 /// Parse the '.' operator.
2308 bool X86AsmParser::ParseIntelDotOperator(IntelExprStateMachine &SM,
2309  SMLoc &End) {
2310  const AsmToken &Tok = getTok();
2312 
2313  // Drop the optional '.'.
2314  StringRef DotDispStr = Tok.getString();
2315  if (DotDispStr.startswith("."))
2316  DotDispStr = DotDispStr.drop_front(1);
2317  StringRef TrailingDot;
2318 
2319  // .Imm gets lexed as a real.
2320  if (Tok.is(AsmToken::Real)) {
2321  APInt DotDisp;
2322  DotDispStr.getAsInteger(10, DotDisp);
2323  Info.Offset = DotDisp.getZExtValue();
2324  } else if ((isParsingMSInlineAsm() || getParser().isParsingMasm()) &&
2325  Tok.is(AsmToken::Identifier)) {
2326  if (DotDispStr.endswith(".")) {
2327  TrailingDot = DotDispStr.substr(DotDispStr.size() - 1);
2328  DotDispStr = DotDispStr.drop_back(1);
2329  }
2330  const std::pair<StringRef, StringRef> BaseMember = DotDispStr.split('.');
2331  const StringRef Base = BaseMember.first, Member = BaseMember.second;
2332  if (getParser().lookUpField(SM.getType(), DotDispStr, Info) &&
2333  getParser().lookUpField(SM.getSymName(), DotDispStr, Info) &&
2334  getParser().lookUpField(DotDispStr, Info) &&
2335  (!SemaCallback ||
2336  SemaCallback->LookupInlineAsmField(Base, Member, Info.Offset)))
2337  return Error(Tok.getLoc(), "Unable to lookup field reference!");
2338  } else {
2339  return Error(Tok.getLoc(), "Unexpected token type!");
2340  }
2341 
2342  // Eat the DotExpression and update End
2343  End = SMLoc::getFromPointer(DotDispStr.data());
2344  const char *DotExprEndLoc = DotDispStr.data() + DotDispStr.size();
2345  while (Tok.getLoc().getPointer() < DotExprEndLoc)
2346  Lex();
2347  if (!TrailingDot.empty())
2348  getLexer().UnLex(AsmToken(AsmToken::Dot, TrailingDot));
2349  SM.addImm(Info.Offset);
2350  SM.setTypeInfo(Info.Type);
2351  return false;
2352 }
2353 
2354 /// Parse the 'offset' operator.
2355 /// This operator is used to specify the location of a given operand
2356 bool X86AsmParser::ParseIntelOffsetOperator(const MCExpr *&Val, StringRef &ID,
2358  SMLoc &End) {
2359  // Eat offset, mark start of identifier.
2360  SMLoc Start = Lex().getLoc();
2361  ID = getTok().getString();
2362  if (!isParsingMSInlineAsm()) {
2363  if ((getTok().isNot(AsmToken::Identifier) &&
2364  getTok().isNot(AsmToken::String)) ||
2365  getParser().parsePrimaryExpr(Val, End, nullptr))
2366  return Error(Start, "unexpected token!");
2367  } else if (ParseIntelInlineAsmIdentifier(Val, ID, Info, false, End, true)) {
2368  return Error(Start, "unable to lookup expression");
2369  } else if (Info.isKind(InlineAsmIdentifierInfo::IK_EnumVal)) {
2370  return Error(Start, "offset operator cannot yet handle constants");
2371  }
2372  return false;
2373 }
2374 
2375 // Query a candidate string for being an Intel assembly operator
2376 // Report back its kind, or IOK_INVALID if does not evaluated as a known one
2377 unsigned X86AsmParser::IdentifyIntelInlineAsmOperator(StringRef Name) {
2379  .Cases("TYPE","type",IOK_TYPE)
2380  .Cases("SIZE","size",IOK_SIZE)
2381  .Cases("LENGTH","length",IOK_LENGTH)
2382  .Default(IOK_INVALID);
2383 }
2384 
2385 /// Parse the 'LENGTH', 'TYPE' and 'SIZE' operators. The LENGTH operator
2386 /// returns the number of elements in an array. It returns the value 1 for
2387 /// non-array variables. The SIZE operator returns the size of a C or C++
2388 /// variable. A variable's size is the product of its LENGTH and TYPE. The
2389 /// TYPE operator returns the size of a C or C++ type or variable. If the
2390 /// variable is an array, TYPE returns the size of a single element.
2391 unsigned X86AsmParser::ParseIntelInlineAsmOperator(unsigned OpKind) {
2392  MCAsmParser &Parser = getParser();
2393  const AsmToken &Tok = Parser.getTok();
2394  Parser.Lex(); // Eat operator.
2395 
2396  const MCExpr *Val = nullptr;
2398  SMLoc Start = Tok.getLoc(), End;
2399  StringRef Identifier = Tok.getString();
2400  if (ParseIntelInlineAsmIdentifier(Val, Identifier, Info,
2401  /*IsUnevaluatedOperand=*/true, End))
2402  return 0;
2403 
2404  if (!Info.isKind(InlineAsmIdentifierInfo::IK_Var)) {
2405  Error(Start, "unable to lookup expression");
2406  return 0;
2407  }
2408 
2409  unsigned CVal = 0;
2410  switch(OpKind) {
2411  default: llvm_unreachable("Unexpected operand kind!");
2412  case IOK_LENGTH: CVal = Info.Var.Length; break;
2413  case IOK_SIZE: CVal = Info.Var.Size; break;
2414  case IOK_TYPE: CVal = Info.Var.Type; break;
2415  }
2416 
2417  return CVal;
2418 }
2419 
2420 // Query a candidate string for being an Intel assembly operator
2421 // Report back its kind, or IOK_INVALID if does not evaluated as a known one
2422 unsigned X86AsmParser::IdentifyMasmOperator(StringRef Name) {
2423  return StringSwitch<unsigned>(Name.lower())
2424  .Case("type", MOK_TYPE)
2425  .Cases("size", "sizeof", MOK_SIZEOF)
2426  .Cases("length", "lengthof", MOK_LENGTHOF)
2427  .Default(MOK_INVALID);
2428 }
2429 
2430 /// Parse the 'LENGTHOF', 'SIZEOF', and 'TYPE' operators. The LENGTHOF operator
2431 /// returns the number of elements in an array. It returns the value 1 for
2432 /// non-array variables. The SIZEOF operator returns the size of a type or
2433 /// variable in bytes. A variable's size is the product of its LENGTH and TYPE.
2434 /// The TYPE operator returns the size of a variable. If the variable is an
2435 /// array, TYPE returns the size of a single element.
2436 bool X86AsmParser::ParseMasmOperator(unsigned OpKind, int64_t &Val) {
2437  MCAsmParser &Parser = getParser();
2438  SMLoc OpLoc = Parser.getTok().getLoc();
2439  Parser.Lex(); // Eat operator.
2440 
2441  Val = 0;
2442  if (OpKind == MOK_SIZEOF || OpKind == MOK_TYPE) {
2443  // Check for SIZEOF(<type>) and TYPE(<type>).
2444  bool InParens = Parser.getTok().is(AsmToken::LParen);
2445  const AsmToken &IDTok = InParens ? getLexer().peekTok() : Parser.getTok();
2446  AsmTypeInfo Type;
2447  if (IDTok.is(AsmToken::Identifier) &&
2448  !Parser.lookUpType(IDTok.getIdentifier(), Type)) {
2449  Val = Type.Size;
2450 
2451  // Eat tokens.
2452  if (InParens)
2453  parseToken(AsmToken::LParen);
2454  parseToken(AsmToken::Identifier);
2455  if (InParens)
2456  parseToken(AsmToken::RParen);
2457  }
2458  }
2459 
2460  if (!Val) {
2461  IntelExprStateMachine SM;
2462  SMLoc End, Start = Parser.getTok().getLoc();
2463  if (ParseIntelExpression(SM, End))
2464  return true;
2465 
2466  switch (OpKind) {
2467  default:
2468  llvm_unreachable("Unexpected operand kind!");
2469  case MOK_SIZEOF:
2470  Val = SM.getSize();
2471  break;
2472  case MOK_LENGTHOF:
2473  Val = SM.getLength();
2474  break;
2475  case MOK_TYPE:
2476  Val = SM.getElementSize();
2477  break;
2478  }
2479 
2480  if (!Val)
2481  return Error(OpLoc, "expression has unknown type", SMRange(Start, End));
2482  }
2483 
2484  return false;
2485 }
2486 
2487 bool X86AsmParser::ParseIntelMemoryOperandSize(unsigned &Size) {
2488  Size = StringSwitch<unsigned>(getTok().getString())
2489  .Cases("BYTE", "byte", 8)
2490  .Cases("WORD", "word", 16)
2491  .Cases("DWORD", "dword", 32)
2492  .Cases("FLOAT", "float", 32)
2493  .Cases("LONG", "long", 32)
2494  .Cases("FWORD", "fword", 48)
2495  .Cases("DOUBLE", "double", 64)
2496  .Cases("QWORD", "qword", 64)
2497  .Cases("MMWORD","mmword", 64)
2498  .Cases("XWORD", "xword", 80)
2499  .Cases("TBYTE", "tbyte", 80)
2500  .Cases("XMMWORD", "xmmword", 128)
2501  .Cases("YMMWORD", "ymmword", 256)
2502  .Cases("ZMMWORD", "zmmword", 512)
2503  .Default(0);
2504  if (Size) {
2505  const AsmToken &Tok = Lex(); // Eat operand size (e.g., byte, word).
2506  if (!(Tok.getString().equals("PTR") || Tok.getString().equals("ptr")))
2507  return Error(Tok.getLoc(), "Expected 'PTR' or 'ptr' token!");
2508  Lex(); // Eat ptr.
2509  }
2510  return false;
2511 }
2512 
2513 bool X86AsmParser::parseIntelOperand(OperandVector &Operands, StringRef Name) {
2514  MCAsmParser &Parser = getParser();
2515  const AsmToken &Tok = Parser.getTok();
2516  SMLoc Start, End;
2517 
2518  // Parse optional Size directive.
2519  unsigned Size;
2520  if (ParseIntelMemoryOperandSize(Size))
2521  return true;
2522  bool PtrInOperand = bool(Size);
2523 
2524  Start = Tok.getLoc();
2525 
2526  // Rounding mode operand.
2527  if (getLexer().is(AsmToken::LCurly))
2528  return ParseRoundingModeOp(Start, Operands);
2529 
2530  // Register operand.
2531  unsigned RegNo = 0;
2532  if (Tok.is(AsmToken::Identifier) && !ParseRegister(RegNo, Start, End)) {
2533  if (RegNo == X86::RIP)
2534  return Error(Start, "rip can only be used as a base register");
2535  // A Register followed by ':' is considered a segment override
2536  if (Tok.isNot(AsmToken::Colon)) {
2537  if (PtrInOperand)
2538  return Error(Start, "expected memory operand after 'ptr', "
2539  "found register operand instead");
2540  Operands.push_back(X86Operand::CreateReg(RegNo, Start, End));
2541  return false;
2542  }
2543  // An alleged segment override. check if we have a valid segment register
2544  if (!X86MCRegisterClasses[X86::SEGMENT_REGRegClassID].contains(RegNo))
2545  return Error(Start, "invalid segment register");
2546  // Eat ':' and update Start location
2547  Start = Lex().getLoc();
2548  }
2549 
2550  // Immediates and Memory
2551  IntelExprStateMachine SM;
2552  if (ParseIntelExpression(SM, End))
2553  return true;
2554 
2555  if (isParsingMSInlineAsm())
2556  RewriteIntelExpression(SM, Start, Tok.getLoc());
2557 
2558  int64_t Imm = SM.getImm();
2559  const MCExpr *Disp = SM.getSym();
2560  const MCExpr *ImmDisp = MCConstantExpr::create(Imm, getContext());
2561  if (Disp && Imm)
2562  Disp = MCBinaryExpr::createAdd(Disp, ImmDisp, getContext());
2563  if (!Disp)
2564  Disp = ImmDisp;
2565 
2566  // RegNo != 0 specifies a valid segment register,
2567  // and we are parsing a segment override
2568  if (!SM.isMemExpr() && !RegNo) {
2569  if (isParsingMSInlineAsm() && SM.isOffsetOperator()) {
2570  const InlineAsmIdentifierInfo &Info = SM.getIdentifierInfo();
2571  if (Info.isKind(InlineAsmIdentifierInfo::IK_Var)) {
2572  // Disp includes the address of a variable; make sure this is recorded
2573  // for later handling.
2574  Operands.push_back(X86Operand::CreateImm(Disp, Start, End,
2575  SM.getSymName(), Info.Var.Decl,
2576  Info.Var.IsGlobalLV));
2577  return false;
2578  }
2579  }
2580 
2581  Operands.push_back(X86Operand::CreateImm(Disp, Start, End));
2582  return false;
2583  }
2584 
2585  StringRef ErrMsg;
2586  unsigned BaseReg = SM.getBaseReg();
2587  unsigned IndexReg = SM.getIndexReg();
2588  if (IndexReg && BaseReg == X86::RIP)
2589  BaseReg = 0;
2590  unsigned Scale = SM.getScale();
2591  if (!PtrInOperand)
2592  Size = SM.getElementSize() << 3;
2593 
2594  if (Scale == 0 && BaseReg != X86::ESP && BaseReg != X86::RSP &&
2595  (IndexReg == X86::ESP || IndexReg == X86::RSP))
2596  std::swap(BaseReg, IndexReg);
2597 
2598  // If BaseReg is a vector register and IndexReg is not, swap them unless
2599  // Scale was specified in which case it would be an error.
2600  if (Scale == 0 &&
2601  !(X86MCRegisterClasses[X86::VR128XRegClassID].contains(IndexReg) ||
2602  X86MCRegisterClasses[X86::VR256XRegClassID].contains(IndexReg) ||
2603  X86MCRegisterClasses[X86::VR512RegClassID].contains(IndexReg)) &&
2604  (X86MCRegisterClasses[X86::VR128XRegClassID].contains(BaseReg) ||
2605  X86MCRegisterClasses[X86::VR256XRegClassID].contains(BaseReg) ||
2606  X86MCRegisterClasses[X86::VR512RegClassID].contains(BaseReg)))
2607  std::swap(BaseReg, IndexReg);
2608 
2609  if (Scale != 0 &&
2610  X86MCRegisterClasses[X86::GR16RegClassID].contains(IndexReg))
2611  return Error(Start, "16-bit addresses cannot have a scale");
2612 
2613  // If there was no explicit scale specified, change it to 1.
2614  if (Scale == 0)
2615  Scale = 1;
2616 
2617  // If this is a 16-bit addressing mode with the base and index in the wrong
2618  // order, swap them so CheckBaseRegAndIndexRegAndScale doesn't fail. It is
2619  // shared with att syntax where order matters.
2620  if ((BaseReg == X86::SI || BaseReg == X86::DI) &&
2621  (IndexReg == X86::BX || IndexReg == X86::BP))
2622  std::swap(BaseReg, IndexReg);
2623 
2624  if ((BaseReg || IndexReg) &&
2625  CheckBaseRegAndIndexRegAndScale(BaseReg, IndexReg, Scale, is64BitMode(),
2626  ErrMsg))
2627  return Error(Start, ErrMsg);
2628  if (isParsingMSInlineAsm())
2629  return CreateMemForMSInlineAsm(RegNo, Disp, BaseReg, IndexReg, Scale, Start,
2630  End, Size, SM.getSymName(),
2631  SM.getIdentifierInfo(), Operands);
2632 
2633  // When parsing x64 MS-style assembly, all non-absolute references to a named
2634  // variable default to RIP-relative.
2635  unsigned DefaultBaseReg = X86::NoRegister;
2636  bool MaybeDirectBranchDest = true;
2637 
2638  if (Parser.isParsingMasm()) {
2639  bool IsUnconditionalBranch =
2640  Name.equals_insensitive("jmp") || Name.equals_insensitive("call");
2641  if (is64BitMode() && SM.getElementSize() > 0) {
2642  DefaultBaseReg = X86::RIP;
2643  }
2644  if (IsUnconditionalBranch) {
2645  if (PtrInOperand) {
2646  MaybeDirectBranchDest = false;
2647  if (is64BitMode())
2648  DefaultBaseReg = X86::RIP;
2649  } else if (!BaseReg && !IndexReg && Disp &&
2650  Disp->getKind() == MCExpr::SymbolRef) {
2651  if (is64BitMode()) {
2652  if (SM.getSize() == 8) {
2653  MaybeDirectBranchDest = false;
2654  DefaultBaseReg = X86::RIP;
2655  }
2656  } else {
2657  if (SM.getSize() == 4 || SM.getSize() == 2)
2658  MaybeDirectBranchDest = false;
2659  }
2660  }
2661  }
2662  }
2663 
2664  if ((BaseReg || IndexReg || RegNo || DefaultBaseReg != X86::NoRegister))
2665  Operands.push_back(X86Operand::CreateMem(
2666  getPointerWidth(), RegNo, Disp, BaseReg, IndexReg, Scale, Start, End,
2667  Size, DefaultBaseReg, /*SymName=*/StringRef(), /*OpDecl=*/nullptr,
2668  /*FrontendSize=*/0, /*UseUpRegs=*/false, MaybeDirectBranchDest));
2669  else
2670  Operands.push_back(X86Operand::CreateMem(
2671  getPointerWidth(), Disp, Start, End, Size, /*SymName=*/StringRef(),
2672  /*OpDecl=*/nullptr, /*FrontendSize=*/0, /*UseUpRegs=*/false,
2673  MaybeDirectBranchDest));
2674  return false;
2675 }
2676 
2677 bool X86AsmParser::parseATTOperand(OperandVector &Operands) {
2678  MCAsmParser &Parser = getParser();
2679  switch (getLexer().getKind()) {
2680  case AsmToken::Dollar: {
2681  // $42 or $ID -> immediate.
2682  SMLoc Start = Parser.getTok().getLoc(), End;
2683  Parser.Lex();
2684  const MCExpr *Val;
2685  // This is an immediate, so we should not parse a register. Do a precheck
2686  // for '%' to supercede intra-register parse errors.
2687  SMLoc L = Parser.getTok().getLoc();
2688  if (check(getLexer().is(AsmToken::Percent), L,
2689  "expected immediate expression") ||
2690  getParser().parseExpression(Val, End) ||
2691  check(isa<X86MCExpr>(Val), L, "expected immediate expression"))
2692  return true;
2693  Operands.push_back(X86Operand::CreateImm(Val, Start, End));
2694  return false;
2695  }
2696  case AsmToken::LCurly: {
2697  SMLoc Start = Parser.getTok().getLoc();
2698  return ParseRoundingModeOp(Start, Operands);
2699  }
2700  default: {
2701  // This a memory operand or a register. We have some parsing complications
2702  // as a '(' may be part of an immediate expression or the addressing mode
2703  // block. This is complicated by the fact that an assembler-level variable
2704  // may refer either to a register or an immediate expression.
2705 
2706  SMLoc Loc = Parser.getTok().getLoc(), EndLoc;
2707  const MCExpr *Expr = nullptr;
2708  unsigned Reg = 0;
2709  if (getLexer().isNot(AsmToken::LParen)) {
2710  // No '(' so this is either a displacement expression or a register.
2711  if (Parser.parseExpression(Expr, EndLoc))
2712  return true;
2713  if (auto *RE = dyn_cast<X86MCExpr>(Expr)) {
2714  // Segment Register. Reset Expr and copy value to register.
2715  Expr = nullptr;
2716  Reg = RE->getRegNo();
2717 
2718  // Check the register.
2719  if (Reg == X86::EIZ || Reg == X86::RIZ)
2720  return Error(
2721  Loc, "%eiz and %riz can only be used as index registers",
2722  SMRange(Loc, EndLoc));
2723  if (Reg == X86::RIP)
2724  return Error(Loc, "%rip can only be used as a base register",
2725  SMRange(Loc, EndLoc));
2726  // Return register that are not segment prefixes immediately.
2727  if (!Parser.parseOptionalToken(AsmToken::Colon)) {
2728  Operands.push_back(X86Operand::CreateReg(Reg, Loc, EndLoc));
2729  return false;
2730  }
2731  if (!X86MCRegisterClasses[X86::SEGMENT_REGRegClassID].contains(Reg))
2732  return Error(Loc, "invalid segment register");
2733  // Accept a '*' absolute memory reference after the segment. Place it
2734  // before the full memory operand.
2735  if (getLexer().is(AsmToken::Star))
2736  Operands.push_back(X86Operand::CreateToken("*", consumeToken()));
2737  }
2738  }
2739  // This is a Memory operand.
2740  return ParseMemOperand(Reg, Expr, Loc, EndLoc, Operands);
2741  }
2742  }
2743 }
2744 
2745 // X86::COND_INVALID if not a recognized condition code or alternate mnemonic,
2746 // otherwise the EFLAGS Condition Code enumerator.
2747 X86::CondCode X86AsmParser::ParseConditionCode(StringRef CC) {
2748  return StringSwitch<X86::CondCode>(CC)
2749  .Case("o", X86::COND_O) // Overflow
2750  .Case("no", X86::COND_NO) // No Overflow
2751  .Cases("b", "nae", X86::COND_B) // Below/Neither Above nor Equal
2752  .Cases("ae", "nb", X86::COND_AE) // Above or Equal/Not Below
2753  .Cases("e", "z", X86::COND_E) // Equal/Zero
2754  .Cases("ne", "nz", X86::COND_NE) // Not Equal/Not Zero
2755  .Cases("be", "na", X86::COND_BE) // Below or Equal/Not Above
2756  .Cases("a", "nbe", X86::COND_A) // Above/Neither Below nor Equal
2757  .Case("s", X86::COND_S) // Sign
2758  .Case("ns", X86::COND_NS) // No Sign
2759  .Cases("p", "pe", X86::COND_P) // Parity/Parity Even
2760  .Cases("np", "po", X86::COND_NP) // No Parity/Parity Odd
2761  .Cases("l", "nge", X86::COND_L) // Less/Neither Greater nor Equal
2762  .Cases("ge", "nl", X86::COND_GE) // Greater or Equal/Not Less
2763  .Cases("le", "ng", X86::COND_LE) // Less or Equal/Not Greater
2764  .Cases("g", "nle", X86::COND_G) // Greater/Neither Less nor Equal
2766 }
2767 
2768 // true on failure, false otherwise
2769 // If no {z} mark was found - Parser doesn't advance
2770 bool X86AsmParser::ParseZ(std::unique_ptr<X86Operand> &Z,
2771  const SMLoc &StartLoc) {
2772  MCAsmParser &Parser = getParser();
2773  // Assuming we are just pass the '{' mark, quering the next token
2774  // Searched for {z}, but none was found. Return false, as no parsing error was
2775  // encountered
2776  if (!(getLexer().is(AsmToken::Identifier) &&
2777  (getLexer().getTok().getIdentifier() == "z")))
2778  return false;
2779  Parser.Lex(); // Eat z
2780  // Query and eat the '}' mark
2781  if (!getLexer().is(AsmToken::RCurly))
2782  return Error(getLexer().getLoc(), "Expected } at this point");
2783  Parser.Lex(); // Eat '}'
2784  // Assign Z with the {z} mark operand
2785  Z = X86Operand::CreateToken("{z}", StartLoc);
2786  return false;
2787 }
2788 
2789 // true on failure, false otherwise
2790 bool X86AsmParser::HandleAVX512Operand(OperandVector &Operands) {
2791  MCAsmParser &Parser = getParser();
2792  if (getLexer().is(AsmToken::LCurly)) {
2793  // Eat "{" and mark the current place.
2794  const SMLoc consumedToken = consumeToken();
2795  // Distinguish {1to<NUM>} from {%k<NUM>}.
2796  if(getLexer().is(AsmToken::Integer)) {
2797  // Parse memory broadcasting ({1to<NUM>}).
2798  if (getLexer().getTok().getIntVal() != 1)
2799  return TokError("Expected 1to<NUM> at this point");
2800  StringRef Prefix = getLexer().getTok().getString();
2801  Parser.Lex(); // Eat first token of 1to8
2802  if (!getLexer().is(AsmToken::Identifier))
2803  return TokError("Expected 1to<NUM> at this point");
2804  // Recognize only reasonable suffixes.
2805  SmallVector<char, 5> BroadcastVector;
2806  StringRef BroadcastString = (Prefix + getLexer().getTok().getIdentifier())
2807  .toStringRef(BroadcastVector);
2808  if (!BroadcastString.startswith("1to"))
2809  return TokError("Expected 1to<NUM> at this point");
2810  const char *BroadcastPrimitive =
2811  StringSwitch<const char *>(BroadcastString)
2812  .Case("1to2", "{1to2}")
2813  .Case("1to4", "{1to4}")
2814  .Case("1to8", "{1to8}")
2815  .Case("1to16", "{1to16}")
2816  .Case("1to32", "{1to32}")
2817  .Default(nullptr);
2818  if (!BroadcastPrimitive)
2819  return TokError("Invalid memory broadcast primitive.");
2820  Parser.Lex(); // Eat trailing token of 1toN
2821  if (!getLexer().is(AsmToken::RCurly))
2822  return TokError("Expected } at this point");
2823  Parser.Lex(); // Eat "}"
2824  Operands.push_back(X86Operand::CreateToken(BroadcastPrimitive,
2825  consumedToken));
2826  // No AVX512 specific primitives can pass
2827  // after memory broadcasting, so return.
2828  return false;
2829  } else {
2830  // Parse either {k}{z}, {z}{k}, {k} or {z}
2831  // last one have no meaning, but GCC accepts it
2832  // Currently, we're just pass a '{' mark
2833  std::unique_ptr<X86Operand> Z;
2834  if (ParseZ(Z, consumedToken))
2835  return true;
2836  // Reaching here means that parsing of the allegadly '{z}' mark yielded
2837  // no errors.
2838  // Query for the need of further parsing for a {%k<NUM>} mark
2839  if (!Z || getLexer().is(AsmToken::LCurly)) {
2840  SMLoc StartLoc = Z ? consumeToken() : consumedToken;
2841  // Parse an op-mask register mark ({%k<NUM>}), which is now to be
2842  // expected
2843  unsigned RegNo;
2844  SMLoc RegLoc;
2845  if (!ParseRegister(RegNo, RegLoc, StartLoc) &&
2846  X86MCRegisterClasses[X86::VK1RegClassID].contains(RegNo)) {
2847  if (RegNo == X86::K0)
2848  return Error(RegLoc, "Register k0 can't be used as write mask");
2849  if (!getLexer().is(AsmToken::RCurly))
2850  return Error(getLexer().getLoc(), "Expected } at this point");
2851  Operands.push_back(X86Operand::CreateToken("{", StartLoc));
2852  Operands.push_back(
2853  X86Operand::CreateReg(RegNo, StartLoc, StartLoc));
2854  Operands.push_back(X86Operand::CreateToken("}", consumeToken()));
2855  } else
2856  return Error(getLexer().getLoc(),
2857  "Expected an op-mask register at this point");
2858  // {%k<NUM>} mark is found, inquire for {z}
2859  if (getLexer().is(AsmToken::LCurly) && !Z) {
2860  // Have we've found a parsing error, or found no (expected) {z} mark
2861  // - report an error
2862  if (ParseZ(Z, consumeToken()) || !Z)
2863  return Error(getLexer().getLoc(),
2864  "Expected a {z} mark at this point");
2865 
2866  }
2867  // '{z}' on its own is meaningless, hence should be ignored.
2868  // on the contrary - have it been accompanied by a K register,
2869  // allow it.
2870  if (Z)
2871  Operands.push_back(std::move(Z));
2872  }
2873  }
2874  }
2875  return false;
2876 }
2877 
2878 /// ParseMemOperand: 'seg : disp(basereg, indexreg, scale)'. The '%ds:' prefix
2879 /// has already been parsed if present. disp may be provided as well.
2880 bool X86AsmParser::ParseMemOperand(unsigned SegReg, const MCExpr *Disp,
2881  SMLoc StartLoc, SMLoc EndLoc,
2883  MCAsmParser &Parser = getParser();
2884  SMLoc Loc;
2885  // Based on the initial passed values, we may be in any of these cases, we are
2886  // in one of these cases (with current position (*)):
2887 
2888  // 1. seg : * disp (base-index-scale-expr)
2889  // 2. seg : *(disp) (base-index-scale-expr)
2890  // 3. seg : *(base-index-scale-expr)
2891  // 4. disp *(base-index-scale-expr)
2892  // 5. *(disp) (base-index-scale-expr)
2893  // 6. *(base-index-scale-expr)
2894  // 7. disp *
2895  // 8. *(disp)
2896 
2897  // If we do not have an displacement yet, check if we're in cases 4 or 6 by
2898  // checking if the first object after the parenthesis is a register (or an
2899  // identifier referring to a register) and parse the displacement or default
2900  // to 0 as appropriate.
2901  auto isAtMemOperand = [this]() {
2902  if (this->getLexer().isNot(AsmToken::LParen))
2903  return false;
2904  AsmToken Buf[2];
2905  StringRef Id;
2906  auto TokCount = this->getLexer().peekTokens(Buf, true);
2907  if (TokCount == 0)
2908  return false;
2909  switch (Buf[0].getKind()) {
2910  case AsmToken::Percent:
2911  case AsmToken::Comma:
2912  return true;
2913  // These lower cases are doing a peekIdentifier.
2914  case AsmToken::At:
2915  case AsmToken::Dollar:
2916  if ((TokCount > 1) &&
2917  (Buf[1].is(AsmToken::Identifier) || Buf[1].is(AsmToken::String)) &&
2918  (Buf[0].getLoc().getPointer() + 1 == Buf[1].getLoc().getPointer()))
2919  Id = StringRef(Buf[0].getLoc().getPointer(),
2920  Buf[1].getIdentifier().size() + 1);
2921  break;
2922  case AsmToken::Identifier:
2923  case AsmToken::String:
2924  Id = Buf[0].getIdentifier();
2925  break;
2926  default:
2927  return false;
2928  }
2929  // We have an ID. Check if it is bound to a register.
2930  if (!Id.empty()) {
2931  MCSymbol *Sym = this->getContext().getOrCreateSymbol(Id);
2932  if (Sym->isVariable()) {
2933  auto V = Sym->getVariableValue(/*SetUsed*/ false);
2934  return isa<X86MCExpr>(V);
2935  }
2936  }
2937  return false;
2938  };
2939 
2940  if (!Disp) {
2941  // Parse immediate if we're not at a mem operand yet.
2942  if (!isAtMemOperand()) {
2943  if (Parser.parseTokenLoc(Loc) || Parser.parseExpression(Disp, EndLoc))
2944  return true;
2945  assert(!isa<X86MCExpr>(Disp) && "Expected non-register here.");
2946  } else {
2947  // Disp is implicitly zero if we haven't parsed it yet.
2948  Disp = MCConstantExpr::create(0, Parser.getContext());
2949  }
2950  }
2951 
2952  // We are now either at the end of the operand or at the '(' at the start of a
2953  // base-index-scale-expr.
2954 
2955  if (!parseOptionalToken(AsmToken::LParen)) {
2956  if (SegReg == 0)
2957  Operands.push_back(
2958  X86Operand::CreateMem(getPointerWidth(), Disp, StartLoc, EndLoc));
2959  else
2960  Operands.push_back(X86Operand::CreateMem(getPointerWidth(), SegReg, Disp,
2961  0, 0, 1, StartLoc, EndLoc));
2962  return false;
2963  }
2964 
2965  // If we reached here, then eat the '(' and Process
2966  // the rest of the memory operand.
2967  unsigned BaseReg = 0, IndexReg = 0, Scale = 1;
2968  SMLoc BaseLoc = getLexer().getLoc();
2969  const MCExpr *E;
2970  StringRef ErrMsg;
2971 
2972  // Parse BaseReg if one is provided.
2973  if (getLexer().isNot(AsmToken::Comma) && getLexer().isNot(AsmToken::RParen)) {
2974  if (Parser.parseExpression(E, EndLoc) ||
2975  check(!isa<X86MCExpr>(E), BaseLoc, "expected register here"))
2976  return true;
2977 
2978  // Check the register.
2979  BaseReg = cast<X86MCExpr>(E)->getRegNo();
2980  if (BaseReg == X86::EIZ || BaseReg == X86::RIZ)
2981  return Error(BaseLoc, "eiz and riz can only be used as index registers",
2982  SMRange(BaseLoc, EndLoc));
2983  }
2984 
2985  if (parseOptionalToken(AsmToken::Comma)) {
2986  // Following the comma we should have either an index register, or a scale
2987  // value. We don't support the later form, but we want to parse it
2988  // correctly.
2989  //
2990  // Even though it would be completely consistent to support syntax like
2991  // "1(%eax,,1)", the assembler doesn't. Use "eiz" or "riz" for this.
2992  if (getLexer().isNot(AsmToken::RParen)) {
2993  if (Parser.parseTokenLoc(Loc) || Parser.parseExpression(E, EndLoc))
2994  return true;
2995 
2996  if (!isa<X86MCExpr>(E)) {
2997  // We've parsed an unexpected Scale Value instead of an index
2998  // register. Interpret it as an absolute.
2999  int64_t ScaleVal;
3000  if (!E->evaluateAsAbsolute(ScaleVal, getStreamer().getAssemblerPtr()))
3001  return Error(Loc, "expected absolute expression");
3002  if (ScaleVal != 1)
3003  Warning(Loc, "scale factor without index register is ignored");
3004  Scale = 1;
3005  } else { // IndexReg Found.
3006  IndexReg = cast<X86MCExpr>(E)->getRegNo();
3007 
3008  if (BaseReg == X86::RIP)
3009  return Error(Loc,
3010  "%rip as base register can not have an index register");
3011  if (IndexReg == X86::RIP)
3012  return Error(Loc, "%rip is not allowed as an index register");
3013 
3014  if (parseOptionalToken(AsmToken::Comma)) {
3015  // Parse the scale amount:
3016  // ::= ',' [scale-expression]
3017 
3018  // A scale amount without an index is ignored.
3019  if (getLexer().isNot(AsmToken::RParen)) {
3020  int64_t ScaleVal;
3021  if (Parser.parseTokenLoc(Loc) ||
3022  Parser.parseAbsoluteExpression(ScaleVal))
3023  return Error(Loc, "expected scale expression");
3024  Scale = (unsigned)ScaleVal;
3025  // Validate the scale amount.
3026  if (X86MCRegisterClasses[X86::GR16RegClassID].contains(BaseReg) &&
3027  Scale != 1)
3028  return Error(Loc, "scale factor in 16-bit address must be 1");
3029  if (checkScale(Scale, ErrMsg))
3030  return Error(Loc, ErrMsg);
3031  }
3032  }
3033  }
3034  }
3035  }
3036 
3037  // Ok, we've eaten the memory operand, verify we have a ')' and eat it too.
3038  if (parseToken(AsmToken::RParen, "unexpected token in memory operand"))
3039  return true;
3040 
3041  // This is to support otherwise illegal operand (%dx) found in various
3042  // unofficial manuals examples (e.g. "out[s]?[bwl]? %al, (%dx)") and must now
3043  // be supported. Mark such DX variants separately fix only in special cases.
3044  if (BaseReg == X86::DX && IndexReg == 0 && Scale == 1 && SegReg == 0 &&
3045  isa<MCConstantExpr>(Disp) &&
3046  cast<MCConstantExpr>(Disp)->getValue() == 0) {
3047  Operands.push_back(X86Operand::CreateDXReg(BaseLoc, BaseLoc));
3048  return false;
3049  }
3050 
3051  if (CheckBaseRegAndIndexRegAndScale(BaseReg, IndexReg, Scale, is64BitMode(),
3052  ErrMsg))
3053  return Error(BaseLoc, ErrMsg);
3054 
3055  if (SegReg || BaseReg || IndexReg)
3056  Operands.push_back(X86Operand::CreateMem(getPointerWidth(), SegReg, Disp,
3057  BaseReg, IndexReg, Scale, StartLoc,
3058  EndLoc));
3059  else
3060  Operands.push_back(
3061  X86Operand::CreateMem(getPointerWidth(), Disp, StartLoc, EndLoc));
3062  return false;
3063 }
3064 
3065 // Parse either a standard primary expression or a register.
3066 bool X86AsmParser::parsePrimaryExpr(const MCExpr *&Res, SMLoc &EndLoc) {
3067  MCAsmParser &Parser = getParser();
3068  // See if this is a register first.
3069  if (getTok().is(AsmToken::Percent) ||
3070  (isParsingIntelSyntax() && getTok().is(AsmToken::Identifier) &&
3071  MatchRegisterName(Parser.getTok().getString()))) {
3072  SMLoc StartLoc = Parser.getTok().getLoc();
3073  unsigned RegNo;
3074  if (ParseRegister(RegNo, StartLoc, EndLoc))
3075  return true;
3076  Res = X86MCExpr::create(RegNo, Parser.getContext());
3077  return false;
3078  }
3079  return Parser.parsePrimaryExpr(Res, EndLoc, nullptr);
3080 }
3081 
3082 bool X86AsmParser::ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
3083  SMLoc NameLoc, OperandVector &Operands) {
3084  MCAsmParser &Parser = getParser();
3085  InstInfo = &Info;
3086 
3087  // Reset the forced VEX encoding.
3088  ForcedVEXEncoding = VEXEncoding_Default;
3089  ForcedDispEncoding = DispEncoding_Default;
3090 
3091  // Parse pseudo prefixes.
3092  while (true) {
3093  if (Name == "{") {
3094  if (getLexer().isNot(AsmToken::Identifier))
3095  return Error(Parser.getTok().getLoc(), "Unexpected token after '{'");
3096  std::string Prefix = Parser.getTok().getString().lower();
3097  Parser.Lex(); // Eat identifier.
3098  if (getLexer().isNot(AsmToken::RCurly))
3099  return Error(Parser.getTok().getLoc(), "Expected '}'");
3100  Parser.Lex(); // Eat curly.
3101 
3102  if (Prefix == "vex")
3103  ForcedVEXEncoding = VEXEncoding_VEX;
3104  else if (Prefix == "vex2")
3105  ForcedVEXEncoding = VEXEncoding_VEX2;
3106  else if (Prefix == "vex3")
3107  ForcedVEXEncoding = VEXEncoding_VEX3;
3108  else if (Prefix == "evex")
3109  ForcedVEXEncoding = VEXEncoding_EVEX;
3110  else if (Prefix == "disp8")
3111  ForcedDispEncoding = DispEncoding_Disp8;
3112  else if (Prefix == "disp32")
3113  ForcedDispEncoding = DispEncoding_Disp32;
3114  else
3115  return Error(NameLoc, "unknown prefix");
3116 
3117  NameLoc = Parser.getTok().getLoc();
3118  if (getLexer().is(AsmToken::LCurly)) {
3119  Parser.Lex();
3120  Name = "{";
3121  } else {
3122  if (getLexer().isNot(AsmToken::Identifier))
3123  return Error(Parser.getTok().getLoc(), "Expected identifier");
3124  // FIXME: The mnemonic won't match correctly if its not in lower case.
3125  Name = Parser.getTok().getString();
3126  Parser.Lex();
3127  }
3128  continue;
3129  }
3130  // Parse MASM style pseudo prefixes.
3131  if (isParsingMSInlineAsm()) {
3132  if (Name.equals_insensitive("vex"))
3133  ForcedVEXEncoding = VEXEncoding_VEX;
3134  else if (Name.equals_insensitive("vex2"))
3135  ForcedVEXEncoding = VEXEncoding_VEX2;
3136  else if (Name.equals_insensitive("vex3"))
3137  ForcedVEXEncoding = VEXEncoding_VEX3;
3138  else if (Name.equals_insensitive("evex"))
3139  ForcedVEXEncoding = VEXEncoding_EVEX;
3140 
3141  if (ForcedVEXEncoding != VEXEncoding_Default) {
3142  if (getLexer().isNot(AsmToken::Identifier))
3143  return Error(Parser.getTok().getLoc(), "Expected identifier");
3144  // FIXME: The mnemonic won't match correctly if its not in lower case.
3145  Name = Parser.getTok().getString();
3146  NameLoc = Parser.getTok().getLoc();
3147  Parser.Lex();
3148  }
3149  }
3150  break;
3151  }
3152 
3153  // Support the suffix syntax for overriding displacement size as well.
3154  if (Name.consume_back(".d32")) {
3155  ForcedDispEncoding = DispEncoding_Disp32;
3156  } else if (Name.consume_back(".d8")) {
3157  ForcedDispEncoding = DispEncoding_Disp8;
3158  }
3159 
3160  StringRef PatchedName = Name;
3161 
3162  // Hack to skip "short" following Jcc.
3163  if (isParsingIntelSyntax() &&
3164  (PatchedName == "jmp" || PatchedName == "jc" || PatchedName == "jnc" ||
3165  PatchedName == "jcxz" || PatchedName == "jecxz" ||
3166  (PatchedName.startswith("j") &&
3167  ParseConditionCode(PatchedName.substr(1)) != X86::COND_INVALID))) {
3168  StringRef NextTok = Parser.getTok().getString();
3169  if (Parser.isParsingMasm() ? NextTok.equals_insensitive("short")
3170  : NextTok == "short") {
3171  SMLoc NameEndLoc =
3172  NameLoc.getFromPointer(NameLoc.getPointer() + Name.size());
3173  // Eat the short keyword.
3174  Parser.Lex();
3175  // MS and GAS ignore the short keyword; they both determine the jmp type
3176  // based on the distance of the label. (NASM does emit different code with
3177  // and without "short," though.)
3178  InstInfo->AsmRewrites->emplace_back(AOK_Skip, NameEndLoc,
3179  NextTok.size() + 1);
3180  }
3181  }
3182 
3183  // FIXME: Hack to recognize setneb as setne.
3184  if (PatchedName.startswith("set") && PatchedName.endswith("b") &&
3185  PatchedName != "setb" && PatchedName != "setnb")
3186  PatchedName = PatchedName.substr(0, Name.size()-1);
3187 
3188  unsigned ComparisonPredicate = ~0U;
3189 
3190  // FIXME: Hack to recognize cmp<comparison code>{sh,ss,sd,ph,ps,pd}.
3191  if ((PatchedName.startswith("cmp") || PatchedName.startswith("vcmp")) &&
3192  (PatchedName.endswith("ss") || PatchedName.endswith("sd") ||
3193  PatchedName.endswith("sh") || PatchedName.endswith("ph") ||
3194  PatchedName.endswith("ps") || PatchedName.endswith("pd"))) {
3195  bool IsVCMP = PatchedName[0] == 'v';
3196  unsigned CCIdx = IsVCMP ? 4 : 3;
3197  unsigned CC = StringSwitch<unsigned>(
3198  PatchedName.slice(CCIdx, PatchedName.size() - 2))
3199  .Case("eq", 0x00)
3200  .Case("eq_oq", 0x00)
3201  .Case("lt", 0x01)
3202  .Case("lt_os", 0x01)
3203  .Case("le", 0x02)
3204  .Case("le_os", 0x02)
3205  .Case("unord", 0x03)
3206  .Case("unord_q", 0x03)
3207  .Case("neq", 0x04)
3208  .Case("neq_uq", 0x04)
3209  .Case("nlt", 0x05)
3210  .Case("nlt_us", 0x05)
3211  .Case("nle", 0x06)
3212  .Case("nle_us", 0x06)
3213  .Case("ord", 0x07)
3214  .Case("ord_q", 0x07)
3215  /* AVX only from here */
3216  .Case("eq_uq", 0x08)
3217  .Case("nge", 0x09)
3218  .Case("nge_us", 0x09)
3219  .Case("ngt", 0x0A)
3220  .Case("ngt_us", 0x0A)
3221  .Case("false", 0x0B)
3222  .Case("false_oq", 0x0B)
3223  .Case("neq_oq", 0x0C)
3224  .Case("ge", 0x0D)
3225  .Case("ge_os", 0x0D)
3226  .Case("gt", 0x0E)
3227  .Case("gt_os", 0x0E)
3228  .Case("true", 0x0F)
3229  .Case("true_uq", 0x0F)
3230  .Case("eq_os", 0x10)
3231  .Case("lt_oq", 0x11)
3232  .Case("le_oq", 0x12)
3233  .Case("unord_s", 0x13)
3234  .Case("neq_us", 0x14)
3235  .Case("nlt_uq", 0x15)
3236  .Case("nle_uq", 0x16)
3237  .Case("ord_s", 0x17)
3238  .Case("eq_us", 0x18)
3239  .Case("nge_uq", 0x19)
3240  .Case("ngt_uq", 0x1A)
3241  .Case("false_os", 0x1B)
3242  .Case("neq_os", 0x1C)
3243  .Case("ge_oq", 0x1D)
3244  .Case("gt_oq", 0x1E)
3245  .Case("true_us", 0x1F)
3246  .Default(~0U);
3247  if (CC != ~0U && (IsVCMP || CC < 8) &&
3248  (IsVCMP || PatchedName.back() != 'h')) {
3249  if (PatchedName.endswith("ss"))
3250  PatchedName = IsVCMP ? "vcmpss" : "cmpss";
3251  else if (PatchedName.endswith("sd"))
3252  PatchedName = IsVCMP ? "vcmpsd" : "cmpsd";
3253  else if (PatchedName.endswith("ps"))
3254  PatchedName = IsVCMP ? "vcmpps" : "cmpps";
3255  else if (PatchedName.endswith("pd"))
3256  PatchedName = IsVCMP ? "vcmppd" : "cmppd";
3257  else if (PatchedName.endswith("sh"))
3258  PatchedName = "vcmpsh";
3259  else if (PatchedName.endswith("ph"))
3260  PatchedName = "vcmpph";
3261  else
3262  llvm_unreachable("Unexpected suffix!");
3263 
3264  ComparisonPredicate = CC;
3265  }
3266  }
3267 
3268  // FIXME: Hack to recognize vpcmp<comparison code>{ub,uw,ud,uq,b,w,d,q}.
3269  if (PatchedName.startswith("vpcmp") &&
3270  (PatchedName.back() == 'b' || PatchedName.back() == 'w' ||
3271  PatchedName.back() == 'd' || PatchedName.back() == 'q')) {
3272  unsigned SuffixSize = PatchedName.drop_back().back() == 'u' ? 2 : 1;
3273  unsigned CC = StringSwitch<unsigned>(
3274  PatchedName.slice(5, PatchedName.size() - SuffixSize))
3275  .Case("eq", 0x0) // Only allowed on unsigned. Checked below.
3276  .Case("lt", 0x1)
3277  .Case("le", 0x2)
3278  //.Case("false", 0x3) // Not a documented alias.
3279  .Case("neq", 0x4)
3280  .Case("nlt", 0x5)
3281  .Case("nle", 0x6)
3282  //.Case("true", 0x7) // Not a documented alias.
3283  .Default(~0U);
3284  if (CC != ~0U && (CC != 0 || SuffixSize == 2)) {
3285  switch (PatchedName.back()) {
3286  default: llvm_unreachable("Unexpected character!");
3287  case 'b': PatchedName = SuffixSize == 2 ? "vpcmpub" : "vpcmpb"; break;
3288  case 'w': PatchedName = SuffixSize == 2 ? "vpcmpuw" : "vpcmpw"; break;
3289  case 'd': PatchedName = SuffixSize == 2 ? "vpcmpud" : "vpcmpd"; break;
3290  case 'q': PatchedName = SuffixSize == 2 ? "vpcmpuq" : "vpcmpq"; break;
3291  }
3292  // Set up the immediate to push into the operands later.
3293  ComparisonPredicate = CC;
3294  }
3295  }
3296 
3297  // FIXME: Hack to recognize vpcom<comparison code>{ub,uw,ud,uq,b,w,d,q}.
3298  if (PatchedName.startswith("vpcom") &&
3299  (PatchedName.back() == 'b' || PatchedName.back() == 'w' ||
3300  PatchedName.back() == 'd' || PatchedName.back() == 'q')) {
3301  unsigned SuffixSize = PatchedName.drop_back().back() == 'u' ? 2 : 1;
3302  unsigned CC = StringSwitch<unsigned>(
3303  PatchedName.slice(5, PatchedName.size() - SuffixSize))
3304  .Case("lt", 0x0)
3305  .Case("le", 0x1)
3306  .Case("gt", 0x2)
3307  .Case("ge", 0x3)
3308  .Case("eq", 0x4)
3309  .Case("neq", 0x5)
3310  .Case("false", 0x6)
3311  .Case("true", 0x7)
3312  .Default(~0U);
3313  if (CC != ~0U) {
3314  switch (PatchedName.back()) {
3315  default: llvm_unreachable("Unexpected character!");
3316  case 'b': PatchedName = SuffixSize == 2 ? "vpcomub" : "vpcomb"; break;
3317  case 'w': PatchedName = SuffixSize == 2 ? "vpcomuw" : "vpcomw"; break;
3318  case 'd': PatchedName = SuffixSize == 2 ? "vpcomud" : "vpcomd"; break;
3319  case 'q': PatchedName = SuffixSize == 2 ? "vpcomuq" : "vpcomq"; break;
3320  }
3321  // Set up the immediate to push into the operands later.
3322  ComparisonPredicate = CC;
3323  }
3324  }
3325 
3326 
3327  // Determine whether this is an instruction prefix.
3328  // FIXME:
3329  // Enhance prefixes integrity robustness. for example, following forms
3330  // are currently tolerated:
3331  // repz repnz <insn> ; GAS errors for the use of two similar prefixes
3332  // lock addq %rax, %rbx ; Destination operand must be of memory type
3333  // xacquire <insn> ; xacquire must be accompanied by 'lock'
3334  bool IsPrefix =
3336  .Cases("cs", "ds", "es", "fs", "gs", "ss", true)
3337  .Cases("rex64", "data32", "data16", "addr32", "addr16", true)
3338  .Cases("xacquire", "xrelease", true)
3339  .Cases("acquire", "release", isParsingIntelSyntax())
3340  .Default(false);
3341 
3342  auto isLockRepeatNtPrefix = [](StringRef N) {
3343  return StringSwitch<bool>(N)
3344  .Cases("lock", "rep", "repe", "repz", "repne", "repnz", "notrack", true)
3345  .Default(false);
3346  };
3347 
3348  bool CurlyAsEndOfStatement = false;
3349 
3350  unsigned Flags = X86::IP_NO_PREFIX;
3351  while (isLockRepeatNtPrefix(Name.lower())) {
3352  unsigned Prefix =
3354  .Cases("lock", "lock", X86::IP_HAS_LOCK)
3355  .Cases("rep", "repe", "repz", X86::IP_HAS_REPEAT)
3356  .Cases("repne", "repnz", X86::IP_HAS_REPEAT_NE)
3357  .Cases("notrack", "notrack", X86::IP_HAS_NOTRACK)
3358  .Default(X86::IP_NO_PREFIX); // Invalid prefix (impossible)
3359  Flags |= Prefix;
3360  if (getLexer().is(AsmToken::EndOfStatement)) {
3361  // We don't have real instr with the given prefix
3362  // let's use the prefix as the instr.
3363  // TODO: there could be several prefixes one after another
3364  Flags = X86::IP_NO_PREFIX;
3365  break;
3366  }
3367  // FIXME: The mnemonic won't match correctly if its not in lower case.
3368  Name = Parser.getTok().getString();
3369  Parser.Lex(); // eat the prefix
3370  // Hack: we could have something like "rep # some comment" or
3371  // "lock; cmpxchg16b $1" or "lock\0A\09incl" or "lock/incl"
3372  while (Name.startswith(";") || Name.startswith("\n") ||
3373  Name.startswith("#") || Name.startswith("\t") ||
3374  Name.startswith("/")) {
3375  // FIXME: The mnemonic won't match correctly if its not in lower case.
3376  Name = Parser.getTok().getString();
3377  Parser.Lex(); // go to next prefix or instr
3378  }
3379  }
3380 
3381  if (Flags)
3382  PatchedName = Name;
3383 
3384  // Hacks to handle 'data16' and 'data32'
3385  if (PatchedName == "data16" && is16BitMode()) {
3386  return Error(NameLoc, "redundant data16 prefix");
3387  }
3388  if (PatchedName == "data32") {
3389  if (is32BitMode())
3390  return Error(NameLoc, "redundant data32 prefix");
3391  if (is64BitMode())
3392  return Error(NameLoc, "'data32' is not supported in 64-bit mode");
3393  // Hack to 'data16' for the table lookup.
3394  PatchedName = "data16";
3395 
3396  if (getLexer().isNot(AsmToken::EndOfStatement)) {
3397  StringRef Next = Parser.getTok().getString();
3398  getLexer().Lex();
3399  // data32 effectively changes the instruction suffix.
3400  // TODO Generalize.
3401  if (Next == "callw")
3402  Next = "calll";
3403  if (Next == "ljmpw")
3404  Next = "ljmpl";
3405 
3406  Name = Next;
3407  PatchedName = Name;
3408  ForcedDataPrefix = X86::Is32Bit;
3409  IsPrefix = false;
3410  }
3411  }
3412 
3413  Operands.push_back(X86Operand::CreateToken(PatchedName, NameLoc));
3414 
3415  // Push the immediate if we extracted one from the mnemonic.
3416  if (ComparisonPredicate != ~0U && !isParsingIntelSyntax()) {
3417  const MCExpr *ImmOp = MCConstantExpr::create(ComparisonPredicate,
3418  getParser().getContext());
3419  Operands.push_back(X86Operand::CreateImm(ImmOp, NameLoc, NameLoc));
3420  }
3421 
3422  // This does the actual operand parsing. Don't parse any more if we have a
3423  // prefix juxtaposed with an operation like "lock incl 4(%rax)", because we
3424  // just want to parse the "lock" as the first instruction and the "incl" as
3425  // the next one.
3426  if (getLexer().isNot(AsmToken::EndOfStatement) && !IsPrefix) {
3427  // Parse '*' modifier.
3428  if (getLexer().is(AsmToken::Star))
3429  Operands.push_back(X86Operand::CreateToken("*", consumeToken()));
3430 
3431  // Read the operands.
3432  while (true) {
3433  if (parseOperand(Operands, Name))
3434  return true;
3435  if (HandleAVX512Operand(Operands))
3436  return true;
3437 
3438  // check for comma and eat it
3439  if (getLexer().is(AsmToken::Comma))
3440  Parser.Lex();
3441  else
3442  break;
3443  }
3444 
3445  // In MS inline asm curly braces mark the beginning/end of a block,
3446  // therefore they should be interepreted as end of statement
3447  CurlyAsEndOfStatement =
3448  isParsingIntelSyntax() && isParsingMSInlineAsm() &&
3449  (getLexer().is(AsmToken::LCurly) || getLexer().is(AsmToken::RCurly));
3450  if (getLexer().isNot(AsmToken::EndOfStatement) && !CurlyAsEndOfStatement)
3451  return TokError("unexpected token in argument list");
3452  }
3453 
3454  // Push the immediate if we extracted one from the mnemonic.
3455  if (ComparisonPredicate != ~0U && isParsingIntelSyntax()) {
3456  const MCExpr *ImmOp = MCConstantExpr::create(ComparisonPredicate,
3457  getParser().getContext());
3458  Operands.push_back(X86Operand::CreateImm(ImmOp, NameLoc, NameLoc));
3459  }
3460 
3461  // Consume the EndOfStatement or the prefix separator Slash
3462  if (getLexer().is(AsmToken::EndOfStatement) ||
3463  (IsPrefix && getLexer().is(AsmToken::Slash)))
3464  Parser.Lex();
3465  else if (CurlyAsEndOfStatement)
3466  // Add an actual EndOfStatement before the curly brace
3467  Info.AsmRewrites->emplace_back(AOK_EndOfStatement,
3468  getLexer().getTok().getLoc(), 0);
3469 
3470  // This is for gas compatibility and cannot be done in td.
3471  // Adding "p" for some floating point with no argument.
3472  // For example: fsub --> fsubp
3473  bool IsFp =
3474  Name == "fsub" || Name == "fdiv" || Name == "fsubr" || Name == "fdivr";
3475  if (IsFp && Operands.size() == 1) {
3476  const char *Repl = StringSwitch<const char *>(Name)
3477  .Case("fsub", "fsubp")
3478  .Case("fdiv", "fdivp")
3479  .Case("fsubr", "fsubrp")
3480  .Case("fdivr", "fdivrp");
3481  static_cast<X86Operand &>(*Operands[0]).setTokenValue(Repl);
3482  }
3483 
3484  if ((Name == "mov" || Name == "movw" || Name == "movl") &&
3485  (Operands.size() == 3)) {
3486  X86Operand &Op1 = (X86Operand &)*Operands[1];
3487  X86Operand &Op2 = (X86Operand &)*Operands[2];
3488  SMLoc Loc = Op1.getEndLoc();
3489  // Moving a 32 or 16 bit value into a segment register has the same
3490  // behavior. Modify such instructions to always take shorter form.
3491  if (Op1.isReg() && Op2.isReg() &&
3492  X86MCRegisterClasses[X86::SEGMENT_REGRegClassID].contains(
3493  Op2.getReg()) &&
3494  (X86MCRegisterClasses[X86::GR16RegClassID].contains(Op1.getReg()) ||
3495  X86MCRegisterClasses[X86::GR32RegClassID].contains(Op1.getReg()))) {
3496  // Change instruction name to match new instruction.
3497  if (Name != "mov" && Name[3] == (is16BitMode() ? 'l' : 'w')) {
3498  Name = is16BitMode() ? "movw" : "movl";
3499  Operands[0] = X86Operand::CreateToken(Name, NameLoc);
3500  }
3501  // Select the correct equivalent 16-/32-bit source register.
3502  unsigned Reg =
3503  getX86SubSuperRegisterOrZero(Op1.getReg(), is16BitMode() ? 16 : 32);
3504  Operands[1] = X86Operand::CreateReg(Reg, Loc, Loc);
3505  }
3506  }
3507 
3508  // This is a terrible hack to handle "out[s]?[bwl]? %al, (%dx)" ->
3509  // "outb %al, %dx". Out doesn't take a memory form, but this is a widely
3510  // documented form in various unofficial manuals, so a lot of code uses it.
3511  if ((Name == "outb" || Name == "outsb" || Name == "outw" || Name == "outsw" ||
3512  Name == "outl" || Name == "outsl" || Name == "out" || Name == "outs") &&
3513  Operands.size() == 3) {
3514  X86Operand &Op = (X86Operand &)*Operands.back();
3515  if (Op.isDXReg())
3516  Operands.back() = X86Operand::CreateReg(X86::DX, Op.getStartLoc(),
3517  Op.getEndLoc());
3518  }
3519  // Same hack for "in[s]?[bwl]? (%dx), %al" -> "inb %dx, %al".
3520  if ((Name == "inb" || Name == "insb" || Name == "inw" || Name == "insw" ||
3521  Name == "inl" || Name == "insl" || Name == "in" || Name == "ins") &&
3522  Operands.size() == 3) {
3523  X86Operand &Op = (X86Operand &)*Operands[1];
3524  if (Op.isDXReg())
3525  Operands[1] = X86Operand::CreateReg(X86::DX, Op.getStartLoc(),
3526  Op.getEndLoc());
3527  }
3528 
3530  bool HadVerifyError = false;
3531 
3532  // Append default arguments to "ins[bwld]"
3533  if (Name.startswith("ins") &&
3534  (Operands.size() == 1 || Operands.size() == 3) &&
3535  (Name == "insb" || Name == "insw" || Name == "insl" || Name == "insd" ||
3536  Name == "ins")) {
3537 
3538  AddDefaultSrcDestOperands(TmpOperands,
3539  X86Operand::CreateReg(X86::DX, NameLoc, NameLoc),
3540  DefaultMemDIOperand(NameLoc));
3541  HadVerifyError = VerifyAndAdjustOperands(Operands, TmpOperands);
3542  }
3543 
3544  // Append default arguments to "outs[bwld]"
3545  if (Name.startswith("outs") &&
3546  (Operands.size() == 1 || Operands.size() == 3) &&
3547  (Name == "outsb" || Name == "outsw" || Name == "outsl" ||
3548  Name == "outsd" || Name == "outs")) {
3549  AddDefaultSrcDestOperands(TmpOperands, DefaultMemSIOperand(NameLoc),
3550  X86Operand::CreateReg(X86::DX, NameLoc, NameLoc));
3551  HadVerifyError = VerifyAndAdjustOperands(Operands, TmpOperands);
3552  }
3553 
3554  // Transform "lods[bwlq]" into "lods[bwlq] ($SIREG)" for appropriate
3555  // values of $SIREG according to the mode. It would be nice if this
3556  // could be achieved with InstAlias in the tables.
3557  if (Name.startswith("lods") &&
3558  (Operands.size() == 1 || Operands.size() == 2) &&
3559  (Name == "lods" || Name == "lodsb" || Name == "lodsw" ||
3560  Name == "lodsl" || Name == "lodsd" || Name == "lodsq")) {
3561  TmpOperands.push_back(DefaultMemSIOperand(NameLoc));
3562  HadVerifyError = VerifyAndAdjustOperands(Operands, TmpOperands);
3563  }
3564 
3565  // Transform "stos[bwlq]" into "stos[bwlq] ($DIREG)" for appropriate
3566  // values of $DIREG according to the mode. It would be nice if this
3567  // could be achieved with InstAlias in the tables.
3568  if (Name.startswith("stos") &&
3569  (Operands.size() == 1 || Operands.size() == 2) &&
3570  (Name == "stos" || Name == "stosb" || Name == "stosw" ||
3571  Name == "stosl" || Name == "stosd" || Name == "stosq")) {
3572  TmpOperands.push_back(DefaultMemDIOperand(NameLoc));
3573  HadVerifyError = VerifyAndAdjustOperands(Operands, TmpOperands);
3574  }
3575 
3576  // Transform "scas[bwlq]" into "scas[bwlq] ($DIREG)" for appropriate
3577  // values of $DIREG according to the mode. It would be nice if this
3578  // could be achieved with InstAlias in the tables.
3579  if (Name.startswith("scas") &&
3580  (Operands.size() == 1 || Operands.size() == 2) &&
3581  (Name == "scas" || Name == "scasb" || Name == "scasw" ||
3582  Name == "scasl" || Name == "scasd" || Name == "scasq")) {
3583  TmpOperands.push_back(DefaultMemDIOperand(NameLoc));
3584  HadVerifyError = VerifyAndAdjustOperands(Operands, TmpOperands);
3585  }
3586 
3587  // Add default SI and DI operands to "cmps[bwlq]".
3588  if (Name.startswith("cmps") &&
3589  (Operands.size() == 1 || Operands.size() == 3) &&
3590  (Name == "cmps" || Name == "cmpsb" || Name == "cmpsw" ||
3591  Name == "cmpsl" || Name == "cmpsd" || Name == "cmpsq")) {
3592  AddDefaultSrcDestOperands(TmpOperands, DefaultMemDIOperand(NameLoc),
3593  DefaultMemSIOperand(NameLoc));
3594  HadVerifyError = VerifyAndAdjustOperands(Operands, TmpOperands);
3595  }
3596 
3597  // Add default SI and DI operands to "movs[bwlq]".
3598  if (((Name.startswith("movs") &&
3599  (Name == "movs" || Name == "movsb" || Name == "movsw" ||
3600  Name == "movsl" || Name == "movsd" || Name == "movsq")) ||
3601  (Name.startswith("smov") &&
3602  (Name == "smov" || Name == "smovb" || Name == "smovw" ||
3603  Name == "smovl" || Name == "smovd" || Name == "smovq"))) &&
3604  (Operands.size() == 1 || Operands.size() == 3)) {
3605  if (Name == "movsd" && Operands.size() == 1 && !isParsingIntelSyntax())
3606  Operands.back() = X86Operand::CreateToken("movsl", NameLoc);
3607  AddDefaultSrcDestOperands(TmpOperands, DefaultMemSIOperand(NameLoc),
3608  DefaultMemDIOperand(NameLoc));
3609  HadVerifyError = VerifyAndAdjustOperands(Operands, TmpOperands);
3610  }
3611 
3612  // Check if we encountered an error for one the string insturctions
3613  if (HadVerifyError) {
3614  return HadVerifyError;
3615  }
3616 
3617  // Transforms "xlat mem8" into "xlatb"
3618  if ((Name == "xlat" || Name == "xlatb") && Operands.size() == 2) {
3619  X86Operand &Op1 = static_cast<X86Operand &>(*Operands[1]);
3620  if (Op1.isMem8()) {
3621  Warning(Op1.getStartLoc(), "memory operand is only for determining the "
3622  "size, (R|E)BX will be used for the location");
3623  Operands.pop_back();
3624  static_cast<X86Operand &>(*Operands[0]).setTokenValue("xlatb");
3625  }
3626  }
3627 
3628  if (Flags)
3629  Operands.push_back(X86Operand::CreatePrefix(Flags, NameLoc, NameLoc));
3630  return false;
3631 }
3632 
3633 bool X86AsmParser::processInstruction(MCInst &Inst, const OperandVector &Ops) {
3634  const MCRegisterInfo *MRI = getContext().getRegisterInfo();
3635 
3636  switch (Inst.getOpcode()) {
3637  default: return false;
3638  case X86::JMP_1:
3639  // {disp32} forces a larger displacement as if the instruction was relaxed.
3640  // NOTE: 16-bit mode uses 16-bit displacement even though it says {disp32}.
3641  // This matches GNU assembler.
3642  if (ForcedDispEncoding == DispEncoding_Disp32) {
3643  Inst.setOpcode(is16BitMode() ? X86::JMP_2 : X86::JMP_4);
3644  return true;
3645  }
3646 
3647  return false;
3648  case X86::JCC_1:
3649  // {disp32} forces a larger displacement as if the instruction was relaxed.
3650  // NOTE: 16-bit mode uses 16-bit displacement even though it says {disp32}.
3651  // This matches GNU assembler.
3652  if (ForcedDispEncoding == DispEncoding_Disp32) {
3653  Inst.setOpcode(is16BitMode() ? X86::JCC_2 : X86::JCC_4);
3654  return true;
3655  }
3656 
3657  return false;
3658  case X86::VMOVZPQILo2PQIrr:
3659  case X86::VMOVAPDrr:
3660  case X86::VMOVAPDYrr:
3661  case X86::VMOVAPSrr:
3662  case X86::VMOVAPSYrr:
3663  case X86::VMOVDQArr:
3664  case X86::VMOVDQAYrr:
3665  case X86::VMOVDQUrr:
3666  case X86::VMOVDQUYrr:
3667  case X86::VMOVUPDrr:
3668  case X86::VMOVUPDYrr:
3669  case X86::VMOVUPSrr:
3670  case X86::VMOVUPSYrr: {
3671  // We can get a smaller encoding by using VEX.R instead of VEX.B if one of
3672  // the registers is extended, but other isn't.
3673  if (ForcedVEXEncoding == VEXEncoding_VEX3 ||
3674  MRI->getEncodingValue(Inst.getOperand(0).getReg()) >= 8 ||
3675  MRI->getEncodingValue(Inst.getOperand(1).getReg()) < 8)
3676  return false;
3677 
3678  unsigned NewOpc;
3679  switch (Inst.getOpcode()) {
3680  default: llvm_unreachable("Invalid opcode");
3681  case X86::VMOVZPQILo2PQIrr: NewOpc = X86::VMOVPQI2QIrr; break;
3682  case X86::VMOVAPDrr: NewOpc = X86::VMOVAPDrr_REV; break;
3683  case X86::VMOVAPDYrr: NewOpc = X86::VMOVAPDYrr_REV; break;
3684  case X86::VMOVAPSrr: NewOpc = X86::VMOVAPSrr_REV; break;
3685  case X86::VMOVAPSYrr: NewOpc = X86::VMOVAPSYrr_REV; break;
3686  case X86::VMOVDQArr: NewOpc = X86::VMOVDQArr_REV; break;
3687  case X86::VMOVDQAYrr: NewOpc = X86::VMOVDQAYrr_REV; break;
3688  case X86::VMOVDQUrr: NewOpc = X86::VMOVDQUrr_REV; break;
3689  case X86::VMOVDQUYrr: NewOpc = X86::VMOVDQUYrr_REV; break;
3690  case X86::VMOVUPDrr: NewOpc = X86::VMOVUPDrr_REV; break;
3691  case X86::VMOVUPDYrr: NewOpc = X86::VMOVUPDYrr_REV; break;
3692  case X86::VMOVUPSrr: NewOpc = X86::VMOVUPSrr_REV; break;
3693  case X86::VMOVUPSYrr: NewOpc = X86::VMOVUPSYrr_REV; break;
3694  }
3695  Inst.setOpcode(NewOpc);
3696  return true;
3697  }
3698  case X86::VMOVSDrr:
3699  case X86::VMOVSSrr: {
3700  // We can get a smaller encoding by using VEX.R instead of VEX.B if one of
3701  // the registers is extended, but other isn't.
3702  if (ForcedVEXEncoding == VEXEncoding_VEX3 ||
3703  MRI->getEncodingValue(Inst.getOperand(0).getReg()) >= 8 ||
3704  MRI->getEncodingValue(Inst.getOperand(2).getReg()) < 8)
3705  return false;
3706 
3707  unsigned NewOpc;
3708  switch (Inst.getOpcode()) {
3709  default: llvm_unreachable("Invalid opcode");
3710  case X86::VMOVSDrr: NewOpc = X86::VMOVSDrr_REV; break;
3711  case X86::VMOVSSrr: NewOpc = X86::VMOVSSrr_REV; break;
3712  }
3713  Inst.setOpcode(NewOpc);
3714  return true;
3715  }
3716  case X86::RCR8ri: case X86::RCR16ri: case X86::RCR32ri: case X86::RCR64ri:
3717  case X86::RCL8ri: case X86::RCL16ri: case X86::RCL32ri: case X86::RCL64ri:
3718  case X86::ROR8ri: case X86::ROR16ri: case X86::ROR32ri: case X86::ROR64ri:
3719  case X86::ROL8ri: case X86::ROL16ri: case X86::ROL32ri: case X86::ROL64ri:
3720  case X86::SAR8ri: case X86::SAR16ri: case X86::SAR32ri: case X86::SAR64ri:
3721  case X86::SHR8ri: case X86::SHR16ri: case X86::SHR32ri: case X86::SHR64ri:
3722  case X86::SHL8ri: case X86::SHL16ri: case X86::SHL32ri: case X86::SHL64ri: {
3723  // Optimize s{hr,ar,hl} $1, <op> to "shift <op>". Similar for rotate.
3724  // FIXME: It would be great if we could just do this with an InstAlias.
3725  if (!Inst.getOperand(2).isImm() || Inst.getOperand(2).getImm() != 1)
3726  return false;
3727 
3728  unsigned NewOpc;
3729  switch (Inst.getOpcode()) {
3730  default: llvm_unreachable("Invalid opcode");
3731  case X86::RCR8ri: NewOpc = X86::RCR8r1; break;
3732  case X86::RCR16ri: NewOpc = X86::RCR16r1; break;
3733  case X86::RCR32ri: NewOpc = X86::RCR32r1; break;
3734  case X86::RCR64ri: NewOpc = X86::RCR64r1; break;
3735  case X86::RCL8ri: NewOpc = X86::RCL8r1; break;
3736  case X86::RCL16ri: NewOpc = X86::RCL16r1; break;
3737  case X86::RCL32ri: NewOpc = X86::RCL32r1; break;
3738  case X86::RCL64ri: NewOpc = X86::RCL64r1; break;
3739  case X86::ROR8ri: NewOpc = X86::ROR8r1; break;
3740  case X86::ROR16ri: NewOpc = X86::ROR16r1; break;
3741  case X86::ROR32ri: NewOpc = X86::ROR32r1; break;
3742  case X86::ROR64ri: NewOpc = X86::ROR64r1; break;
3743  case X86::ROL8ri: NewOpc = X86::ROL8r1; break;
3744  case X86::ROL16ri: NewOpc = X86::ROL16r1; break;
3745  case X86::ROL32ri: NewOpc = X86::ROL32r1; break;
3746  case X86::ROL64ri: NewOpc = X86::ROL64r1; break;
3747  case X86::SAR8ri: NewOpc = X86::SAR8r1; break;
3748  case X86::SAR16ri: NewOpc = X86::SAR16r1; break;
3749  case X86::SAR32ri: NewOpc = X86::SAR32r1; break;
3750  case X86::SAR64ri: NewOpc = X86::SAR64r1; break;
3751  case X86::SHR8ri: NewOpc = X86::SHR8r1; break;
3752  case X86::SHR16ri: NewOpc = X86::SHR16r1; break;
3753  case X86::SHR32ri: NewOpc = X86::SHR32r1; break;
3754  case X86::SHR64ri: NewOpc = X86::SHR64r1; break;
3755  case X86::SHL8ri: NewOpc = X86::SHL8r1; break;
3756  case X86::SHL16ri: NewOpc = X86::SHL16r1; break;
3757  case X86::SHL32ri: NewOpc = X86::SHL32r1; break;
3758  case X86::SHL64ri: NewOpc = X86::SHL64r1; break;
3759  }
3760 
3761  MCInst TmpInst;
3762  TmpInst.setOpcode(NewOpc);
3763  TmpInst.addOperand(Inst.getOperand(0));
3764  TmpInst.addOperand(Inst.getOperand(1));
3765  Inst = TmpInst;
3766  return true;
3767  }
3768  case X86::RCR8mi: case X86::RCR16mi: case X86::RCR32mi: case X86::RCR64mi:
3769  case X86::RCL8mi: case X86::RCL16mi: case X86::RCL32mi: case X86::RCL64mi:
3770  case X86::ROR8mi: case X86::ROR16mi: case X86::ROR32mi: case X86::ROR64mi:
3771  case X86::ROL8mi: case X86::ROL16mi: case X86::ROL32mi: case X86::ROL64mi:
3772  case X86::SAR8mi: case X86::SAR16mi: case X86::SAR32mi: case X86::SAR64mi:
3773  case X86::SHR8mi: case X86::SHR16mi: case X86::SHR32mi: case X86::SHR64mi:
3774  case X86::SHL8mi: case X86::SHL16mi: case X86::SHL32mi: case X86::SHL64mi: {
3775  // Optimize s{hr,ar,hl} $1, <op> to "shift <op>". Similar for rotate.
3776  // FIXME: It would be great if we could just do this with an InstAlias.
3777  if (!Inst.getOperand(X86::AddrNumOperands).isImm() ||
3778  Inst.getOperand(X86::AddrNumOperands).getImm() != 1)
3779  return false;
3780 
3781  unsigned NewOpc;
3782  switch (Inst.getOpcode()) {
3783  default: llvm_unreachable("Invalid opcode");
3784  case X86::RCR8mi: NewOpc = X86::RCR8m1; break;
3785  case X86::RCR16mi: NewOpc = X86::RCR16m1; break;
3786  case X86::RCR32mi: NewOpc = X86::RCR32m1; break;
3787  case X86::RCR64mi: NewOpc = X86::RCR64m1; break;
3788  case X86::RCL8mi: NewOpc = X86::RCL8m1; break;
3789  case X86::RCL16mi: NewOpc = X86::RCL16m1; break;
3790  case X86::RCL32mi: NewOpc = X86::RCL32m1; break;
3791  case X86::RCL64mi: NewOpc = X86::RCL64m1; break;
3792  case X86::ROR8mi: NewOpc = X86::ROR8m1; break;
3793  case X86::ROR16mi: NewOpc = X86::ROR16m1; break;
3794  case X86::ROR32mi: NewOpc = X86::ROR32m1; break;
3795  case X86::ROR64mi: NewOpc = X86::ROR64m1; break;
3796  case X86::ROL8mi: NewOpc = X86::ROL8m1; break;
3797  case X86::ROL16mi: NewOpc = X86::ROL16m1; break;
3798  case X86::ROL32mi: NewOpc = X86::ROL32m1; break;
3799  case X86::ROL64mi: NewOpc = X86::ROL64m1; break;
3800  case X86::SAR8mi: NewOpc = X86::SAR8m1; break;
3801  case X86::SAR16mi: NewOpc = X86::SAR16m1; break;
3802  case X86::SAR32mi: NewOpc = X86::SAR32m1; break;
3803  case X86::SAR64mi: NewOpc = X86::SAR64m1; break;
3804  case X86::SHR8mi: NewOpc = X86::SHR8m1; break;
3805  case X86::SHR16mi: NewOpc = X86::SHR16m1; break;
3806  case X86::SHR32mi: NewOpc = X86::SHR32m1; break;
3807  case X86::SHR64mi: NewOpc = X86::SHR64m1; break;
3808  case X86::SHL8mi: NewOpc = X86::SHL8m1; break;
3809  case X86::SHL16mi: NewOpc = X86::SHL16m1; break;
3810  case X86::SHL32mi: NewOpc = X86::SHL32m1; break;
3811  case X86::SHL64mi: NewOpc = X86::SHL64m1; break;
3812  }
3813 
3814  MCInst TmpInst;
3815  TmpInst.setOpcode(NewOpc);
3816  for (int i = 0; i != X86::AddrNumOperands; ++i)
3817  TmpInst.addOperand(Inst.getOperand(i));
3818  Inst = TmpInst;
3819  return true;
3820  }
3821  case X86::INT: {
3822  // Transforms "int $3" into "int3" as a size optimization. We can't write an
3823  // instalias with an immediate operand yet.
3824  if (!Inst.getOperand(0).isImm() || Inst.getOperand(0).getImm() != 3)
3825  return false;
3826 
3827  MCInst TmpInst;
3828  TmpInst.setOpcode(X86::INT3);
3829  Inst = TmpInst;
3830  return true;
3831  }
3832  }
3833 }
3834 
3835 bool X86AsmParser::validateInstruction(MCInst &Inst, const OperandVector &Ops) {
3836  using namespace X86;
3837  const MCRegisterInfo *MRI = getContext().getRegisterInfo();
3838  unsigned Opcode = Inst.getOpcode();
3839  uint64_t TSFlags = MII.get(Opcode).TSFlags;
3840  if (isVFCMADDCPH(Opcode) || isVFCMADDCSH(Opcode) || isVFMADDCPH(Opcode) ||
3841  isVFMADDCSH(Opcode)) {
3842  unsigned Dest = Inst.getOperand(0).getReg();
3843  for (unsigned i = 2; i < Inst.getNumOperands(); i++)
3844  if (Inst.getOperand(i).isReg() && Dest == Inst.getOperand(i).getReg())
3845  return Warning(Ops[0]->getStartLoc(), "Destination register should be "
3846  "distinct from source registers");
3847  } else if (isVFCMULCPH(Opcode) || isVFCMULCSH(Opcode) || isVFMULCPH(Opcode) ||
3848  isVFMULCSH(Opcode)) {
3849  unsigned Dest = Inst.getOperand(0).getReg();
3850  for (unsigned i = 1; i < Inst.getNumOperands(); i++)
3851  if (Inst.getOperand(i).isReg() && Dest == Inst.getOperand(i).getReg())
3852  return Warning(Ops[0]->getStartLoc(), "Destination register should be "
3853  "distinct from source registers");
3854  } else if (isV4FMADDPS(Opcode) || isV4FMADDSS(Opcode) ||
3855  isV4FNMADDPS(Opcode) || isV4FNMADDSS(Opcode) ||
3856  isVP4DPWSSDS(Opcode) || isVP4DPWSSD(Opcode)) {
3857  unsigned Src2 = Inst.getOperand(Inst.getNumOperands() -
3859  unsigned Src2Enc = MRI->getEncodingValue(Src2);
3860  if (Src2Enc % 4 != 0) {
3862  unsigned GroupStart = (Src2Enc / 4) * 4;
3863  unsigned GroupEnd = GroupStart + 3;
3864  return Warning(Ops[0]->getStartLoc(),
3865  "source register '" + RegName + "' implicitly denotes '" +
3866  RegName.take_front(3) + Twine(GroupStart) + "' to '" +
3867  RegName.take_front(3) + Twine(GroupEnd) +
3868  "' source group");
3869  }
3870  } else if (isVGATHERDPD(Opcode) || isVGATHERDPS(Opcode) ||
3871  isVGATHERQPD(Opcode) || isVGATHERQPS(Opcode) ||
3872  isVPGATHERDD(Opcode) || isVPGATHERDQ(Opcode) ||
3873  isVPGATHERQD(Opcode) || isVPGATHERQQ(Opcode)) {
3874  bool HasEVEX = (TSFlags & X86II::EncodingMask) == X86II::EVEX;
3875  if (HasEVEX) {
3876  unsigned Dest = MRI->getEncodingValue(Inst.getOperand(0).getReg());
3877  unsigned Index = MRI->getEncodingValue(
3878  Inst.getOperand(4 + X86::AddrIndexReg).getReg());
3879  if (Dest == Index)
3880  return Warning(Ops[0]->getStartLoc(), "index and destination registers "
3881  "should be distinct");
3882  } else {
3883  unsigned Dest = MRI->getEncodingValue(Inst.getOperand(0).getReg());
3884  unsigned Mask = MRI->getEncodingValue(Inst.getOperand(1).getReg());
3885  unsigned Index = MRI->getEncodingValue(
3886  Inst.getOperand(3 + X86::AddrIndexReg).getReg());
3887  if (Dest == Mask || Dest == Index || Mask == Index)
3888  return Warning(Ops[0]->getStartLoc(), "mask, index, and destination "
3889  "registers should be distinct");
3890  }
3891  }
3892 
3893  // Check that we aren't mixing AH/BH/CH/DH with REX prefix. We only need to
3894  // check this with the legacy encoding, VEX/EVEX/XOP don't use REX.
3895  if ((TSFlags & X86II::EncodingMask) == 0) {
3896  MCPhysReg HReg = X86::NoRegister;
3897  bool UsesRex = TSFlags & X86II::REX_W;
3898  unsigned NumOps = Inst.getNumOperands();
3899  for (unsigned i = 0; i != NumOps; ++i) {
3900  const MCOperand &MO = Inst.getOperand(i);
3901  if (!MO.isReg())
3902  continue;
3903  unsigned Reg = MO.getReg();
3904  if (Reg == X86::AH || Reg == X86::BH || Reg == X86::CH || Reg == X86::DH)
3905  HReg = Reg;
3908  UsesRex = true;
3909  }
3910 
3911  if (UsesRex && HReg != X86::NoRegister) {
3913  return Error(Ops[0]->getStartLoc(),
3914  "can't encode '" + RegName + "' in an instruction requiring "
3915  "REX prefix");
3916  }
3917  }
3918 
3919  return false;
3920 }
3921 
3922 static const char *getSubtargetFeatureName(uint64_t Val);
3923 
3924 void X86AsmParser::emitWarningForSpecialLVIInstruction(SMLoc Loc) {
3925  Warning(Loc, "Instruction may be vulnerable to LVI and "
3926  "requires manual mitigation");
3927  Note(SMLoc(), "See https://software.intel.com/"
3928  "security-software-guidance/insights/"
3929  "deep-dive-load-value-injection#specialinstructions"
3930  " for more information");
3931 }
3932 
3933 /// RET instructions and also instructions that indirect calls/jumps from memory
3934 /// combine a load and a branch within a single instruction. To mitigate these
3935 /// instructions against LVI, they must be decomposed into separate load and
3936 /// branch instructions, with an LFENCE in between. For more details, see:
3937 /// - X86LoadValueInjectionRetHardening.cpp
3938 /// - X86LoadValueInjectionIndirectThunks.cpp
3939 /// - https://software.intel.com/security-software-guidance/insights/deep-dive-load-value-injection
3940 ///
3941 /// Returns `true` if a mitigation was applied or warning was emitted.
3942 void X86AsmParser::applyLVICFIMitigation(MCInst &Inst, MCStreamer &Out) {
3943  // Information on control-flow instructions that require manual mitigation can
3944  // be found here:
3945  // https://software.intel.com/security-software-guidance/insights/deep-dive-load-value-injection#specialinstructions
3946  switch (Inst.getOpcode()) {
3947  case X86::RET16:
3948  case X86::RET32:
3949  case X86::RET64:
3950  case X86::RETI16:
3951  case X86::RETI32:
3952  case X86::RETI64: {
3953  MCInst ShlInst, FenceInst;
3954  bool Parse32 = is32BitMode() || Code16GCC;
3955  unsigned Basereg =
3956  is64BitMode() ? X86::RSP : (Parse32 ? X86::ESP : X86::SP);
3957  const MCExpr *Disp = MCConstantExpr::create(0, getContext());
3958  auto ShlMemOp = X86Operand::CreateMem(getPointerWidth(), /*SegReg=*/0, Disp,
3959  /*BaseReg=*/Basereg, /*IndexReg=*/0,
3960  /*Scale=*/1, SMLoc{}, SMLoc{}, 0);
3961  ShlInst.setOpcode(X86::SHL64mi);
3962  ShlMemOp->addMemOperands(ShlInst, 5);
3963  ShlInst.addOperand(MCOperand::createImm(0));
3964  FenceInst.setOpcode(X86::LFENCE);
3965  Out.emitInstruction(ShlInst, getSTI());
3966  Out.emitInstruction(FenceInst, getSTI());
3967  return;
3968  }
3969  case X86::JMP16m:
3970  case X86::JMP32m:
3971  case X86::JMP64m:
3972  case X86::CALL16m:
3973  case X86::CALL32m:
3974  case X86::CALL64m:
3975  emitWarningForSpecialLVIInstruction(Inst.getLoc());
3976  return;
3977  }
3978 }
3979 
3980 /// To mitigate LVI, every instruction that performs a load can be followed by
3981 /// an LFENCE instruction to squash any potential mis-speculation. There are
3982 /// some instructions that require additional considerations, and may requre
3983 /// manual mitigation. For more details, see:
3984 /// https://software.intel.com/security-software-guidance/insights/deep-dive-load-value-injection
3985 ///
3986 /// Returns `true` if a mitigation was applied or warning was emitted.
3987 void X86AsmParser::applyLVILoadHardeningMitigation(MCInst &Inst,
3988  MCStreamer &Out) {
3989  auto Opcode = Inst.getOpcode();
3990  auto Flags = Inst.getFlags();
3991  if ((Flags & X86::IP_HAS_REPEAT) || (Flags & X86::IP_HAS_REPEAT_NE)) {
3992  // Information on REP string instructions that require manual mitigation can
3993  // be found here:
3994  // https://software.intel.com/security-software-guidance/insights/deep-dive-load-value-injection#specialinstructions
3995  switch (Opcode) {
3996  case X86::CMPSB:
3997  case X86::CMPSW:
3998  case X86::CMPSL:
3999  case X86::CMPSQ:
4000  case X86::SCASB:
4001  case X86::SCASW:
4002  case X86::SCASL:
4003  case X86::SCASQ:
4004  emitWarningForSpecialLVIInstruction(Inst.getLoc());
4005  return;
4006  }
4007  } else if (Opcode == X86::REP_PREFIX || Opcode == X86::REPNE_PREFIX) {
4008  // If a REP instruction is found on its own line, it may or may not be
4009  // followed by a vulnerable instruction. Emit a warning just in case.
4010  emitWarningForSpecialLVIInstruction(Inst.getLoc());
4011  return;
4012  }
4013 
4014  const MCInstrDesc &MCID = MII.get(Inst.getOpcode());
4015 
4016  // Can't mitigate after terminators or calls. A control flow change may have
4017  // already occurred.
4018  if (MCID.isTerminator() || MCID.isCall())
4019  return;
4020 
4021  // LFENCE has the mayLoad property, don't double fence.
4022  if (MCID.mayLoad() && Inst.getOpcode() != X86::LFENCE) {
4023  MCInst FenceInst;
4024  FenceInst.setOpcode(X86::LFENCE);
4025  Out.emitInstruction(FenceInst, getSTI());
4026  }
4027 }
4028 
4029 void X86AsmParser::emitInstruction(MCInst &Inst, OperandVector &Operands,
4030  MCStreamer &Out) {
4031  if (LVIInlineAsmHardening &&
4032  getSTI().getFeatureBits()[X86::FeatureLVIControlFlowIntegrity])
4033  applyLVICFIMitigation(Inst, Out);
4034 
4035  Out.emitInstruction(Inst, getSTI());
4036 
4037  if (LVIInlineAsmHardening &&
4038  getSTI().getFeatureBits()[X86::FeatureLVILoadHardening])
4039  applyLVILoadHardeningMitigation(Inst, Out);
4040 }
4041 
4042 bool X86AsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
4044  MCStreamer &Out, uint64_t &ErrorInfo,
4045  bool MatchingInlineAsm) {
4046  if (isParsingIntelSyntax())
4047  return MatchAndEmitIntelInstruction(IDLoc, Opcode, Operands, Out, ErrorInfo,
4048  MatchingInlineAsm);
4049  return MatchAndEmitATTInstruction(IDLoc, Opcode, Operands, Out, ErrorInfo,
4050  MatchingInlineAsm);
4051 }
4052 
4053 void X86AsmParser::MatchFPUWaitAlias(SMLoc IDLoc, X86Operand &Op,
4055  bool MatchingInlineAsm) {
4056  // FIXME: This should be replaced with a real .td file alias mechanism.
4057  // Also, MatchInstructionImpl should actually *do* the EmitInstruction
4058  // call.
4059  const char *Repl = StringSwitch<const char *>(Op.getToken())
4060  .Case("finit", "fninit")
4061  .Case("fsave", "fnsave")
4062  .Case("fstcw", "fnstcw")
4063  .Case("fstcww", "fnstcw")
4064  .Case("fstenv", "fnstenv")
4065  .Case("fstsw", "fnstsw")
4066  .Case("fstsww", "fnstsw")
4067  .Case("fclex", "fnclex")
4068  .Default(nullptr);
4069  if (Repl) {
4070  MCInst Inst;
4071  Inst.setOpcode(X86::WAIT);
4072  Inst.setLoc(IDLoc);
4073  if (!MatchingInlineAsm)
4074  emitInstruction(Inst, Operands, Out);
4075  Operands[0] = X86Operand::CreateToken(Repl, IDLoc);
4076  }
4077 }
4078 
4079 bool X86AsmParser::ErrorMissingFeature(SMLoc IDLoc,
4080  const FeatureBitset &MissingFeatures,
4081  bool MatchingInlineAsm) {
4082  assert(MissingFeatures.any() && "Unknown missing feature!");
4085  OS << "instruction requires:";
4086  for (unsigned i = 0, e = MissingFeatures.size(); i != e; ++i) {
4087  if (MissingFeatures[i])
4088  OS << ' ' << getSubtargetFeatureName(i);
4089  }
4090  return Error(IDLoc, OS.str(), SMRange(), MatchingInlineAsm);
4091 }
4092 
4094  unsigned Result = 0;
4095  X86Operand &Prefix = static_cast<X86Operand &>(*Operands.back());
4096  if (Prefix.isPrefix()) {
4097  Result = Prefix.getPrefix();
4098  Operands.pop_back();
4099  }
4100  return Result;
4101 }
4102 
4103 unsigned X86AsmParser::checkTargetMatchPredicate(MCInst &Inst) {
4104  unsigned Opc = Inst.getOpcode();
4105  const MCInstrDesc &MCID = MII.get(Opc);
4106 
4107  if (ForcedVEXEncoding == VEXEncoding_EVEX &&
4109  return Match_Unsupported;
4110 
4111  if ((ForcedVEXEncoding == VEXEncoding_VEX ||
4112  ForcedVEXEncoding == VEXEncoding_VEX2 ||
4113  ForcedVEXEncoding == VEXEncoding_VEX3) &&
4115  return Match_Unsupported;
4116 
4117  // These instructions are only available with {vex}, {vex2} or {vex3} prefix
4118  if (MCID.TSFlags & X86II::ExplicitVEXPrefix &&
4119  (ForcedVEXEncoding != VEXEncoding_VEX &&
4120  ForcedVEXEncoding != VEXEncoding_VEX2 &&
4121  ForcedVEXEncoding != VEXEncoding_VEX3))
4122  return Match_Unsupported;
4123 
4124  return Match_Success;
4125 }
4126 
4127 bool X86AsmParser::MatchAndEmitATTInstruction(SMLoc IDLoc, unsigned &Opcode,
4129  MCStreamer &Out,
4131  bool MatchingInlineAsm) {
4132  assert(!Operands.empty() && "Unexpect empty operand list!");
4133  assert((*Operands[0]).isToken() && "Leading operand should always be a mnemonic!");
4134  SMRange EmptyRange = None;
4135 
4136  // First, handle aliases that expand to multiple instructions.
4137  MatchFPUWaitAlias(IDLoc, static_cast<X86Operand &>(*Operands[0]), Operands,
4138  Out, MatchingInlineAsm);
4139  X86Operand &Op = static_cast<X86Operand &>(*Operands[0]);
4140  unsigned Prefixes = getPrefixes(Operands);
4141 
4142  MCInst Inst;
4143 
4144  // If VEX/EVEX encoding is forced, we need to pass the USE_* flag to the
4145  // encoder and printer.
4146  if (ForcedVEXEncoding == VEXEncoding_VEX)
4147  Prefixes |= X86::IP_USE_VEX;
4148  else if (ForcedVEXEncoding == VEXEncoding_VEX2)
4149  Prefixes |= X86::IP_USE_VEX2;
4150  else if (ForcedVEXEncoding == VEXEncoding_VEX3)
4151  Prefixes |= X86::IP_USE_VEX3;
4152  else if (ForcedVEXEncoding == VEXEncoding_EVEX)
4153  Prefixes |= X86::IP_USE_EVEX;
4154 
4155  // Set encoded flags for {disp8} and {disp32}.
4156  if (ForcedDispEncoding == DispEncoding_Disp8)
4157  Prefixes |= X86::IP_USE_DISP8;
4158  else if (ForcedDispEncoding == DispEncoding_Disp32)
4159  Prefixes |= X86::IP_USE_DISP32;
4160 
4161  if (Prefixes)
4162  Inst.setFlags(Prefixes);
4163 
4164  // In 16-bit mode, if data32 is specified, temporarily switch to 32-bit mode
4165  // when matching the instruction.
4166  if (ForcedDataPrefix == X86::Is32Bit)
4167  SwitchMode(X86::Is32Bit);
4168  // First, try a direct match.
4169  FeatureBitset MissingFeatures;
4170  unsigned OriginalError = MatchInstruction(Operands, Inst, ErrorInfo,
4171  MissingFeatures, MatchingInlineAsm,
4172  isParsingIntelSyntax());
4173  if (ForcedDataPrefix == X86::Is32Bit) {
4174  SwitchMode(X86::Is16Bit);
4175  ForcedDataPrefix = 0;
4176  }
4177  switch (OriginalError) {
4178  default: llvm_unreachable("Unexpected match result!");
4179  case Match_Success:
4180  if (!MatchingInlineAsm && validateInstruction(Inst, Operands))
4181  return true;
4182  // Some instructions need post-processing to, for example, tweak which
4183  // encoding is selected. Loop on it while changes happen so the
4184  // individual transformations can chain off each other.
4185  if (!MatchingInlineAsm)
4186  while (processInstruction(Inst, Operands))
4187  ;
4188 
4189  Inst.setLoc(IDLoc);
4190  if (!MatchingInlineAsm)
4191  emitInstruction(Inst, Operands, Out);
4192  Opcode = Inst.getOpcode();
4193  return false;
4194  case Match_InvalidImmUnsignedi4: {
4195  SMLoc ErrorLoc = ((X86Operand &)*Operands[ErrorInfo]).getStartLoc();
4196  if (ErrorLoc == SMLoc())
4197  ErrorLoc = IDLoc;
4198  return Error(ErrorLoc, "immediate must be an integer in range [0, 15]",
4199  EmptyRange, MatchingInlineAsm);
4200  }
4201  case Match_MissingFeature:
4202  return ErrorMissingFeature(IDLoc, MissingFeatures, MatchingInlineAsm);
4203  case Match_InvalidOperand:
4204  case Match_MnemonicFail:
4205  case Match_Unsupported:
4206  break;
4207  }
4208  if (Op.getToken().empty()) {
4209  Error(IDLoc, "instruction must have size higher than 0", EmptyRange,
4210  MatchingInlineAsm);
4211  return true;
4212  }
4213 
4214  // FIXME: Ideally, we would only attempt suffix matches for things which are
4215  // valid prefixes, and we could just infer the right unambiguous
4216  // type. However, that requires substantially more matcher support than the
4217  // following hack.
4218 
4219  // Change the operand to point to a temporary token.
4220  StringRef Base = Op.getToken();
4221  SmallString<16> Tmp;
4222  Tmp += Base;
4223  Tmp += ' ';
4224  Op.setTokenValue(Tmp);
4225 
4226  // If this instruction starts with an 'f', then it is a floating point stack
4227  // instruction. These come in up to three forms for 32-bit, 64-bit, and
4228  // 80-bit floating point, which use the suffixes s,l,t respectively.
4229  //
4230  // Otherwise, we assume that this may be an integer instruction, which comes
4231  // in 8/16/32/64-bit forms using the b,w,l,q suffixes respectively.
4232  const char *Suffixes = Base[0] != 'f' ? "bwlq" : "slt\0";
4233  // MemSize corresponding to Suffixes. { 8, 16, 32, 64 } { 32, 64, 80, 0 }
4234  const char *MemSize = Base[0] != 'f' ? "\x08\x10\x20\x40" : "\x20\x40\x50\0";
4235 
4236  // Check for the various suffix matches.
4237  uint64_t ErrorInfoIgnore;
4238  FeatureBitset ErrorInfoMissingFeatures; // Init suppresses compiler warnings.
4239  unsigned Match[4];
4240 
4241  // Some instruction like VPMULDQ is NOT the variant of VPMULD but a new one.
4242  // So we should make sure the suffix matcher only works for memory variant
4243  // that has the same size with the suffix.
4244  // FIXME: This flag is a workaround for legacy instructions that didn't
4245  // declare non suffix variant assembly.
4246  bool HasVectorReg = false;
4247  X86Operand *MemOp = nullptr;
4248  for (const auto &Op : Operands) {
4249  X86Operand *X86Op = static_cast<X86Operand *>(Op.get());
4250  if (X86Op->isVectorReg())
4251  HasVectorReg = true;
4252  else if (X86Op->isMem()) {
4253  MemOp = X86Op;
4254  assert(MemOp->Mem.Size == 0 && "Memory size always 0 under ATT syntax");
4255  // Have we found an unqualified memory operand,
4256  // break. IA allows only one memory operand.
4257  break;
4258  }
4259  }
4260 
4261  for (unsigned I = 0, E = array_lengthof(Match); I != E; ++I) {
4262  Tmp.back() = Suffixes[I];
4263  if (MemOp && HasVectorReg)
4264  MemOp->Mem.Size = MemSize[I];
4265  Match[I] = Match_MnemonicFail;
4266  if (MemOp || !HasVectorReg) {
4267  Match[I] =
4268  MatchInstruction(Operands, Inst, ErrorInfoIgnore, MissingFeatures,
4269  MatchingInlineAsm, isParsingIntelSyntax());
4270  // If this returned as a missing feature failure, remember that.
4271  if (Match[I] == Match_MissingFeature)
4272  ErrorInfoMissingFeatures = MissingFeatures;
4273  }
4274  }
4275 
4276  // Restore the old token.
4277  Op.setTokenValue(Base);
4278 
4279  // If exactly one matched, then we treat that as a successful match (and the
4280  // instruction will already have been filled in correctly, since the failing
4281  // matches won't have modified it).
4282  unsigned NumSuccessfulMatches = llvm::count(Match, Match_Success);
4283  if (NumSuccessfulMatches == 1) {
4284  if (!MatchingInlineAsm && validateInstruction(Inst, Operands))
4285  return true;
4286  // Some instructions need post-processing to, for example, tweak which
4287  // encoding is selected. Loop on it while changes happen so the
4288  // individual transformations can chain off each other.
4289  if (!MatchingInlineAsm)
4290  while (processInstruction(Inst, Operands))
4291  ;
4292 
4293  Inst.setLoc(IDLoc);
4294  if (!MatchingInlineAsm)
4295  emitInstruction(Inst, Operands, Out);
4296  Opcode = Inst.getOpcode();
4297  return false;
4298  }
4299 
4300  // Otherwise, the match failed, try to produce a decent error message.
4301 
4302  // If we had multiple suffix matches, then identify this as an ambiguous
4303  // match.
4304  if (NumSuccessfulMatches > 1) {
4305  char MatchChars[4];
4306  unsigned NumMatches = 0;
4307  for (unsigned I = 0, E = array_lengthof(Match); I != E; ++I)
4308  if (Match[I] == Match_Success)
4309  MatchChars[NumMatches++] = Suffixes[I];
4310 
4313  OS << "ambiguous instructions require an explicit suffix (could be ";
4314  for (unsigned i = 0; i != NumMatches; ++i) {
4315  if (i != 0)
4316  OS << ", ";
4317  if (i + 1 == NumMatches)
4318  OS << "or ";
4319  OS << "'" << Base << MatchChars[i] << "'";
4320  }
4321  OS << ")";
4322  Error(IDLoc, OS.str(), EmptyRange, MatchingInlineAsm);
4323  return true;
4324  }
4325 
4326  // Okay, we know that none of the variants matched successfully.
4327 
4328  // If all of the instructions reported an invalid mnemonic, then the original
4329  // mnemonic was invalid.
4330  if (llvm::count(Match, Match_MnemonicFail) == 4) {
4331  if (OriginalError == Match_MnemonicFail)
4332  return Error(IDLoc, "invalid instruction mnemonic '" + Base + "'",
4333  Op.getLocRange(), MatchingInlineAsm);
4334 
4335  if (OriginalError == Match_Unsupported)
4336  return Error(IDLoc, "unsupported instruction", EmptyRange,
4337  MatchingInlineAsm);
4338 
4339  assert(OriginalError == Match_InvalidOperand && "Unexpected error");
4340  // Recover location info for the operand if we know which was the problem.
4341  if (ErrorInfo != ~0ULL) {
4342  if (ErrorInfo >= Operands.size())
4343  return Error(IDLoc, "too few operands for instruction", EmptyRange,
4344  MatchingInlineAsm);
4345 
4346  X86Operand &Operand = (X86Operand &)*Operands[ErrorInfo];
4347  if (Operand.getStartLoc().isValid()) {
4348  SMRange OperandRange = Operand.getLocRange();
4349  return Error(Operand.getStartLoc(), "invalid operand for instruction",
4350  OperandRange, MatchingInlineAsm);
4351  }
4352  }
4353 
4354  return Error(IDLoc, "invalid operand for instruction", EmptyRange,
4355  MatchingInlineAsm);
4356  }
4357 
4358  // If one instruction matched as unsupported, report this as unsupported.
4359  if (llvm::count(Match, Match_Unsupported) == 1) {
4360  return Error(IDLoc, "unsupported instruction", EmptyRange,
4361  MatchingInlineAsm);
4362  }
4363 
4364  // If one instruction matched with a missing feature, report this as a
4365  // missing feature.
4366  if (llvm::count(Match, Match_MissingFeature) == 1) {
4367  ErrorInfo = Match_MissingFeature;
4368  return ErrorMissingFeature(IDLoc, ErrorInfoMissingFeatures,
4369  MatchingInlineAsm);
4370  }
4371 
4372  // If one instruction matched with an invalid operand, report this as an
4373  // operand failure.
4374  if (llvm::count(Match, Match_InvalidOperand) == 1) {
4375  return Error(IDLoc, "invalid operand for instruction", EmptyRange,
4376  MatchingInlineAsm);
4377  }
4378 
4379  // If all of these were an outright failure, report it in a useless way.
4380  Error(IDLoc, "unknown use of instruction mnemonic without a size suffix",
4381  EmptyRange, MatchingInlineAsm);
4382  return true;
4383 }
4384 
4385 bool X86AsmParser::MatchAndEmitIntelInstruction(SMLoc IDLoc, unsigned &Opcode,
4387  MCStreamer &Out,
4389  bool MatchingInlineAsm) {
4390  assert(!Operands.empty() && "Unexpect empty operand list!");
4391  assert((*Operands[0]).isToken() && "Leading operand should always be a mnemonic!");
4392  StringRef Mnemonic = (static_cast<X86Operand &>(*Operands[0])).getToken();
4393  SMRange EmptyRange = None;
4394  StringRef Base = (static_cast<X86Operand &>(*Operands[0])).getToken();
4395  unsigned Prefixes = getPrefixes(Operands);
4396 
4397  // First, handle aliases that expand to multiple instructions.
4398  MatchFPUWaitAlias(IDLoc, static_cast<X86Operand &>(*Operands[0]), Operands, Out, MatchingInlineAsm);
4399  X86Operand &Op = static_cast<X86Operand &>(*Operands[0]);
4400 
4401  MCInst Inst;
4402 
4403  // If VEX/EVEX encoding is forced, we need to pass the USE_* flag to the
4404  // encoder and printer.
4405  if (ForcedVEXEncoding == VEXEncoding_VEX)
4406  Prefixes |= X86::IP_USE_VEX;
4407  else if (ForcedVEXEncoding == VEXEncoding_VEX2)
4408  Prefixes |= X86::IP_USE_VEX2;
4409  else if (ForcedVEXEncoding == VEXEncoding_VEX3)
4410  Prefixes |= X86::IP_USE_VEX3;
4411  else if (ForcedVEXEncoding == VEXEncoding_EVEX)
4412  Prefixes |= X86::IP_USE_EVEX;
4413 
4414  // Set encoded flags for {disp8} and {disp32}.
4415  if (ForcedDispEncoding == DispEncoding_Disp8)
4416  Prefixes |= X86::IP_USE_DISP8;
4417  else if (ForcedDispEncoding == DispEncoding_Disp32)
4418  Prefixes |= X86::IP_USE_DISP32;
4419 
4420  if (Prefixes)
4421  Inst.setFlags(Prefixes);
4422 
4423  // Find one unsized memory operand, if present.
4424  X86Operand *UnsizedMemOp = nullptr;
4425  for (const auto &Op : Operands) {
4426  X86Operand *X86Op = static_cast<X86Operand *>(Op.get());
4427  if (X86Op->isMemUnsized()) {
4428  UnsizedMemOp = X86Op;
4429  // Have we found an unqualified memory operand,
4430  // break. IA allows only one memory operand.
4431  break;
4432  }
4433  }
4434 
4435  // Allow some instructions to have implicitly pointer-sized operands. This is
4436  // compatible with gas.
4437  if (UnsizedMemOp) {
4438  static const char *const PtrSizedInstrs[] = {"call", "jmp", "push"};
4439  for (const char *Instr : PtrSizedInstrs) {
4440  if (Mnemonic == Instr) {
4441  UnsizedMemOp->Mem.Size = getPointerWidth();
4442  break;
4443  }
4444  }
4445  }
4446 
4448  FeatureBitset ErrorInfoMissingFeatures;
4449  FeatureBitset MissingFeatures;
4450 
4451  // If unsized push has immediate operand we should default the default pointer
4452  // size for the size.
4453  if (Mnemonic == "push" && Operands.size() == 2) {
4454  auto *X86Op = static_cast<X86Operand *>(Operands[1].get());
4455  if (X86Op->isImm()) {
4456  // If it's not a constant fall through and let remainder take care of it.
4457  const auto *CE = dyn_cast<MCConstantExpr>(X86Op->getImm());
4458  unsigned Size = getPointerWidth();
4459  if (CE &&
4460  (isIntN(Size, CE->getValue()) || isUIntN(Size, CE->getValue()))) {
4461  SmallString<16> Tmp;
4462  Tmp += Base;
4463  Tmp += (is64BitMode())
4464  ? "q"
4465  : (is32BitMode()) ? "l" : (is16BitMode()) ? "w" : " ";
4466  Op.setTokenValue(Tmp);
4467  // Do match in ATT mode to allow explicit suffix usage.
4468  Match.push_back(MatchInstruction(Operands, Inst, ErrorInfo,
4469  MissingFeatures, MatchingInlineAsm,
4470  false /*isParsingIntelSyntax()*/));
4471  Op.setTokenValue(Base);
4472  }
4473  }
4474  }
4475 
4476  // If an unsized memory operand is present, try to match with each memory
4477  // operand size. In Intel assembly, the size is not part of the instruction
4478  // mnemonic.
4479  if (UnsizedMemOp && UnsizedMemOp->isMemUnsized()) {
4480  static const unsigned MopSizes[] = {8, 16, 32, 64, 80, 128, 256, 512};
4481  for (unsigned Size : MopSizes) {
4482  UnsizedMemOp->Mem.Size = Size;
4483  uint64_t ErrorInfoIgnore;
4484  unsigned LastOpcode = Inst.getOpcode();
4485  unsigned M = MatchInstruction(Operands, Inst, ErrorInfoIgnore,
4486  MissingFeatures, MatchingInlineAsm,
4487  isParsingIntelSyntax());
4488  if (Match.empty() || LastOpcode != Inst.getOpcode())
4489  Match.push_back(M);
4490 
4491  // If this returned as a missing feature failure, remember that.
4492  if (Match.back() == Match_MissingFeature)
4493  ErrorInfoMissingFeatures = MissingFeatures;
4494  }
4495 
4496  // Restore the size of the unsized memory operand if we modified it.
4497  UnsizedMemOp->Mem.Size = 0;
4498  }
4499 
4500  // If we haven't matched anything yet, this is not a basic integer or FPU
4501  // operation. There shouldn't be any ambiguity in our mnemonic table, so try
4502  // matching with the unsized operand.
4503  if (Match.empty()) {
4504  Match.push_back(MatchInstruction(
4505  Operands, Inst, ErrorInfo, MissingFeatures, MatchingInlineAsm,
4506  isParsingIntelSyntax()));
4507  // If this returned as a missing feature failure, remember that.
4508  if (Match.back() == Match_MissingFeature)
4509  ErrorInfoMissingFeatures = MissingFeatures;
4510  }
4511 
4512  // Restore the size of the unsized memory operand if we modified it.
4513  if (UnsizedMemOp)
4514  UnsizedMemOp->Mem.Size = 0;
4515 
4516  // If it's a bad mnemonic, all results will be the same.
4517  if (Match.back() == Match_MnemonicFail) {
4518  return Error(IDLoc, "invalid instruction mnemonic '" + Mnemonic + "'",
4519  Op.getLocRange(), MatchingInlineAsm);
4520  }
4521 
4522  unsigned NumSuccessfulMatches = llvm::count(Match, Match_Success);
4523 
4524  // If matching was ambiguous and we had size information from the frontend,
4525  // try again with that. This handles cases like "movxz eax, m8/m16".
4526  if (UnsizedMemOp && NumSuccessfulMatches > 1 &&
4527  UnsizedMemOp->getMemFrontendSize()) {
4528  UnsizedMemOp->Mem.Size = UnsizedMemOp->getMemFrontendSize();
4529  unsigned M = MatchInstruction(
4530  Operands, Inst, ErrorInfo, MissingFeatures, MatchingInlineAsm,
4531  isParsingIntelSyntax());
4532  if (M == Match_Success)
4533  NumSuccessfulMatches = 1;
4534 
4535  // Add a rewrite that encodes the size information we used from the
4536  // frontend.
4537  InstInfo->AsmRewrites->emplace_back(
4538  AOK_SizeDirective, UnsizedMemOp->getStartLoc(),
4539  /*Len=*/0, UnsizedMemOp->getMemFrontendSize());
4540  }
4541 
4542  // If exactly one matched, then we treat that as a successful match (and the
4543  // instruction will already have been filled in correctly, since the failing
4544  // matches won't have modified it).
4545  if (NumSuccessfulMatches == 1) {
4546  if (!MatchingInlineAsm && validateInstruction(Inst, Operands))
4547  return true;
4548  // Some instructions need post-processing to, for example, tweak which
4549  // encoding is selected. Loop on it while changes happen so the individual
4550  // transformations can chain off each other.
4551  if (!MatchingInlineAsm)
4552  while (processInstruction(Inst, Operands))
4553  ;
4554  Inst.setLoc(IDLoc);
4555  if (!MatchingInlineAsm)
4556  emitInstruction(Inst, Operands, Out);
4557  Opcode = Inst.getOpcode();
4558  return false;
4559  } else if (NumSuccessfulMatches > 1) {
4560  assert(UnsizedMemOp &&
4561  "multiple matches only possible with unsized memory operands");
4562  return Error(UnsizedMemOp->getStartLoc(),
4563  "ambiguous operand size for instruction '" + Mnemonic + "\'",
4564  UnsizedMemOp->getLocRange());
4565  }
4566 
4567  // If one instruction matched as unsupported, report this as unsupported.
4568  if (llvm::count(Match, Match_Unsupported) == 1) {
4569  return Error(IDLoc, "unsupported instruction", EmptyRange,
4570  MatchingInlineAsm);
4571  }
4572 
4573  // If one instruction matched with a missing feature, report this as a
4574  // missing feature.
4575  if (llvm::count(Match, Match_MissingFeature) == 1) {
4576  ErrorInfo = Match_MissingFeature;
4577  return ErrorMissingFeature(IDLoc, ErrorInfoMissingFeatures,
4578  MatchingInlineAsm);
4579  }
4580 
4581  // If one instruction matched with an invalid operand, report this as an
4582  // operand failure.
4583  if (llvm::count(Match, Match_InvalidOperand) == 1) {
4584  return Error(IDLoc, "invalid operand for instruction", EmptyRange,
4585  MatchingInlineAsm);
4586  }
4587 
4588  if (llvm::count(Match, Match_InvalidImmUnsignedi4) == 1) {
4589  SMLoc ErrorLoc = ((X86Operand &)*Operands[ErrorInfo]).getStartLoc();
4590  if (ErrorLoc == SMLoc())
4591  ErrorLoc = IDLoc;
4592  return Error(ErrorLoc, "immediate must be an integer in range [0, 15]",
4593  EmptyRange, MatchingInlineAsm);
4594  }
4595 
4596  // If all of these were an outright failure, report it in a useless way.
4597  return Error(IDLoc, "unknown instruction mnemonic", EmptyRange,
4598  MatchingInlineAsm);
4599 }
4600 
4601 bool X86AsmParser::OmitRegisterFromClobberLists(unsigned RegNo) {
4602  return X86MCRegisterClasses[X86::SEGMENT_REGRegClassID].contains(RegNo);
4603 }
4604 
4605 bool X86AsmParser::ParseDirective(AsmToken DirectiveID) {
4606  MCAsmParser &Parser = getParser();
4607  StringRef IDVal = DirectiveID.getIdentifier();
4608  if (IDVal.startswith(".arch"))
4609  return parseDirectiveArch();
4610  if (IDVal.startswith(".code"))
4611  return ParseDirectiveCode(IDVal, DirectiveID.getLoc());
4612  else if (IDVal.startswith(".att_syntax")) {
4613  if (getLexer().isNot(AsmToken::EndOfStatement)) {
4614  if (Parser.getTok().getString() == "prefix")
4615  Parser.Lex();
4616  else if (Parser.getTok().getString() == "noprefix")
4617  return Error(DirectiveID.getLoc(), "'.att_syntax noprefix' is not "
4618  "supported: registers must have a "
4619  "'%' prefix in .att_syntax");
4620  }
4621  getParser().setAssemblerDialect(0);
4622  return false;
4623  } else if (IDVal.startswith(".intel_syntax")) {
4624  getParser().setAssemblerDialect(1);
4625  if (getLexer().isNot(AsmToken::EndOfStatement)) {
4626  if (Parser.getTok().getString() == "noprefix")
4627  Parser.Lex();
4628  else if (Parser.getTok().getString() == "prefix")
4629  return Error(DirectiveID.getLoc(), "'.intel_syntax prefix' is not "
4630  "supported: registers must not have "
4631  "a '%' prefix in .intel_syntax");
4632  }
4633  return false;
4634  } else if (IDVal == ".nops")
4635  return parseDirectiveNops(DirectiveID.getLoc());
4636  else if (IDVal == ".even")
4637  return parseDirectiveEven(DirectiveID.getLoc());
4638  else if (IDVal == ".cv_fpo_proc")
4639  return parseDirectiveFPOProc(DirectiveID.getLoc());
4640  else if (IDVal == ".cv_fpo_setframe")
4641  return parseDirectiveFPOSetFrame(DirectiveID.getLoc());
4642  else if (IDVal == ".cv_fpo_pushreg")
4643  return parseDirectiveFPOPushReg(DirectiveID.getLoc());
4644  else if (IDVal == ".cv_fpo_stackalloc")
4645  return parseDirectiveFPOStackAlloc(DirectiveID.getLoc());
4646  else if (IDVal == ".cv_fpo_stackalign")
4647  return parseDirectiveFPOStackAlign(DirectiveID.getLoc());
4648  else if (IDVal == ".cv_fpo_endprologue")
4649  return parseDirectiveFPOEndPrologue(DirectiveID.getLoc());
4650  else if (IDVal == ".cv_fpo_endproc")
4651  return parseDirectiveFPOEndProc(DirectiveID.getLoc());
4652  else if (IDVal == ".seh_pushreg" ||
4653  (Parser.isParsingMasm() && IDVal.equals_insensitive(".pushreg")))
4654  return parseDirectiveSEHPushReg(DirectiveID.getLoc());
4655  else if (IDVal == ".seh_setframe" ||
4656  (Parser.isParsingMasm() && IDVal.equals_insensitive(".setframe")))
4657  return parseDirectiveSEHSetFrame(DirectiveID.getLoc());
4658  else if (IDVal == ".seh_savereg" ||
4659  (Parser.isParsingMasm() && IDVal.equals_insensitive(".savereg")))
4660  return parseDirectiveSEHSaveReg(DirectiveID.getLoc());
4661  else if (IDVal == ".seh_savexmm" ||
4662  (Parser.isParsingMasm() && IDVal.equals_insensitive(".savexmm128")))
4663  return parseDirectiveSEHSaveXMM(DirectiveID.getLoc());
4664  else if (IDVal == ".seh_pushframe" ||
4665  (Parser.isParsingMasm() && IDVal.equals_insensitive(".pushframe")))
4666  return parseDirectiveSEHPushFrame(DirectiveID.getLoc());
4667 
4668  return true;
4669 }
4670 
4671 bool X86AsmParser::parseDirectiveArch() {
4672  // Ignore .arch for now.
4673  getParser().parseStringToEndOfStatement();
4674  return false;
4675 }
4676 
4677 /// parseDirectiveNops
4678 /// ::= .nops size[, control]
4679 bool X86AsmParser::parseDirectiveNops(SMLoc L) {
4680  int64_t NumBytes = 0, Control = 0;
4681  SMLoc NumBytesLoc, ControlLoc;
4682  const MCSubtargetInfo& STI = getSTI();
4683  NumBytesLoc = getTok().getLoc();
4684  if (getParser().checkForValidSection() ||
4685  getParser().parseAbsoluteExpression(NumBytes))
4686  return true;
4687 
4688  if (parseOptionalToken(AsmToken::Comma)) {
4689  ControlLoc = getTok().getLoc();
4690  if (getParser().parseAbsoluteExpression(Control))
4691  return true;
4692  }
4693  if (getParser().parseEOL())
4694  return true;
4695 
4696  if (NumBytes <= 0) {
4697  Error(NumBytesLoc, "'.nops' directive with non-positive size");
4698  return false;
4699  }
4700 
4701  if (Control < 0) {
4702  Error(ControlLoc, "'.nops' directive with negative NOP size");
4703  return false;
4704  }
4705 
4706  /// Emit nops
4707  getParser().getStreamer().emitNops(NumBytes, Control, L, STI);
4708 
4709  return false;
4710 }
4711 
4712 /// parseDirectiveEven
4713 /// ::= .even
4714 bool X86AsmParser::parseDirectiveEven(SMLoc L) {
4715  if (parseEOL())
4716  return false;
4717 
4718  const MCSection *Section = getStreamer().getCurrentSectionOnly();
4719  if (!Section) {
4720  getStreamer().initSections(false, getSTI());
4721  Section = getStreamer().getCurrentSectionOnly();
4722  }
4723  if (Section->useCodeAlign())
4724  getStreamer().emitCodeAlignment(2, &getSTI(), 0);
4725  else
4726  getStreamer().emitValueToAlignment(2, 0, 1, 0);
4727  return false;
4728 }
4729 
4730 /// ParseDirectiveCode
4731 /// ::= .code16 | .code32 | .code64
4732 bool X86AsmParser::ParseDirectiveCode(StringRef IDVal, SMLoc L) {
4733  MCAsmParser &Parser = getParser();
4734  Code16GCC = false;
4735  if (IDVal == ".code16") {
4736  Parser.Lex();
4737  if (!is16BitMode()) {
4738  SwitchMode(X86::Is16Bit);
4739  getParser().getStreamer().emitAssemblerFlag(MCAF_Code16);
4740  }
4741  } else if (IDVal == ".code16gcc") {
4742  // .code16gcc parses as if in 32-bit mode, but emits code in 16-bit mode.
4743  Parser.Lex();
4744  Code16GCC = true;
4745  if (!is16BitMode()) {
4746  SwitchMode(X86::Is16Bit);
4747  getParser().getStreamer().emitAssemblerFlag(MCAF_Code16);
4748  }
4749  } else if (IDVal == ".code32") {
4750  Parser.Lex();
4751  if (!is32BitMode()) {
4752  SwitchMode(X86::Is32Bit);
4753  getParser().getStreamer().emitAssemblerFlag(MCAF_Code32);
4754  }
4755  } else if (IDVal == ".code64") {
4756  Parser.Lex();
4757  if (!is64BitMode()) {
4758  SwitchMode(X86::Is64Bit);
4759  getParser().getStreamer().emitAssemblerFlag(MCAF_Code64);
4760  }
4761  } else {
4762  Error(L, "unknown directive " + IDVal);
4763  return false;
4764  }
4765 
4766  return false;
4767 }
4768 
4769 // .cv_fpo_proc foo
4770 bool X86AsmParser::parseDirectiveFPOProc(SMLoc L) {
4771  MCAsmParser &Parser = getParser();
4772  StringRef ProcName;
4773  int64_t ParamsSize;
4774  if (Parser.parseIdentifier(ProcName))
4775  return Parser.TokError("expected symbol name");
4776  if (Parser.parseIntToken(ParamsSize, "expected parameter byte count"))
4777  return true;
4778  if (!isUIntN(32, ParamsSize))
4779  return Parser.TokError("parameters size out of range");
4780  if (parseEOL())
4781  return true;
4782  MCSymbol *ProcSym = getContext().getOrCreateSymbol(ProcName);
4783  return getTargetStreamer().emitFPOProc(ProcSym, ParamsSize, L);
4784 }
4785 
4786 // .cv_fpo_setframe ebp
4787 bool X86AsmParser::parseDirectiveFPOSetFrame(SMLoc L) {
4788  unsigned Reg;
4789  SMLoc DummyLoc;
4790  if (ParseRegister(Reg, DummyLoc, DummyLoc) || parseEOL())
4791  return true;
4792  return getTargetStreamer().emitFPOSetFrame(Reg, L);
4793 }
4794 
4795 // .cv_fpo_pushreg ebx
4796 bool X86AsmParser::parseDirectiveFPOPushReg(SMLoc L) {
4797  unsigned Reg;
4798  SMLoc DummyLoc;
4799  if (ParseRegister(Reg, DummyLoc, DummyLoc) || parseEOL())
4800  return true;
4801  return getTargetStreamer().emitFPOPushReg(Reg, L);
4802 }
4803 
4804 // .cv_fpo_stackalloc 20
4805 bool X86AsmParser::parseDirectiveFPOStackAlloc(SMLoc L) {
4806  MCAsmParser &Parser = getParser();
4807  int64_t Offset;
4808  if (Parser.parseIntToken(Offset, "expected offset") || parseEOL())
4809  return true;
4810  return getTargetStreamer().emitFPOStackAlloc(Offset, L);
4811 }
4812 
4813 // .cv_fpo_stackalign 8
4814 bool X86AsmParser::parseDirectiveFPOStackAlign(SMLoc L) {
4815  MCAsmParser &Parser = getParser();
4816  int64_t Offset;
4817  if (Parser.parseIntToken(Offset, "expected offset") || parseEOL())
4818  return true;
4819  return getTargetStreamer().emitFPOStackAlign(Offset, L);
4820 }
4821 
4822 // .cv_fpo_endprologue
4823 bool X86AsmParser::parseDirectiveFPOEndPrologue(SMLoc L) {
4824  MCAsmParser &Parser = getParser();
4825  if (Parser.parseEOL())
4826  return true;
4827  return getTargetStreamer().emitFPOEndPrologue(L);
4828 }
4829 
4830 // .cv_fpo_endproc
4831 bool X86AsmParser::parseDirectiveFPOEndProc(SMLoc L) {
4832  MCAsmParser &Parser = getParser();
4833  if (Parser.parseEOL())
4834  return true;
4835  return getTargetStreamer().emitFPOEndProc(L);
4836 }
4837 
4838 bool X86AsmParser::parseSEHRegisterNumber(unsigned RegClassID,
4839  unsigned &RegNo) {
4840  SMLoc startLoc = getLexer().getLoc();
4841  const MCRegisterInfo *MRI = getContext().getRegisterInfo();
4842 
4843  // Try parsing the argument as a register first.
4844  if (getLexer().getTok().isNot(AsmToken::Integer)) {
4845  SMLoc endLoc;
4846  if (ParseRegister(RegNo, startLoc, endLoc))
4847  return true;
4848 
4849  if (!X86MCRegisterClasses[RegClassID].contains(RegNo)) {
4850  return Error(startLoc,
4851  "register is not supported for use with this directive");
4852  }
4853  } else {
4854  // Otherwise, an integer number matching the encoding of the desired
4855  // register may appear.
4856  int64_t EncodedReg;
4857  if (getParser().parseAbsoluteExpression(EncodedReg))
4858  return true;
4859 
4860  // The SEH register number is the same as the encoding register number. Map
4861  // from the encoding back to the LLVM register number.
4862  RegNo = 0;
4863  for (MCPhysReg Reg : X86MCRegisterClasses[RegClassID]) {
4864  if (MRI->getEncodingValue(Reg) == EncodedReg) {
4865  RegNo = Reg;
4866  break;
4867  }
4868  }
4869  if (RegNo == 0) {
4870  return Error(startLoc,
4871  "incorrect register number for use with this directive");
4872  }
4873  }
4874 
4875  return false;
4876 }
4877 
4878 bool X86AsmParser::parseDirectiveSEHPushReg(SMLoc Loc) {
4879  unsigned Reg = 0;
4880  if (parseSEHRegisterNumber(X86::GR64RegClassID, Reg))
4881  return true;
4882 
4883  if (getLexer().isNot(AsmToken::EndOfStatement))
4884  return TokError("unexpected token in directive");
4885 
4886  getParser().Lex();
4887  getStreamer().emitWinCFIPushReg(Reg, Loc);
4888  return false;
4889 }
4890 
4891 bool X86AsmParser::parseDirectiveSEHSetFrame(SMLoc Loc) {
4892  unsigned Reg = 0;
4893  int64_t Off;
4894  if (parseSEHRegisterNumber(X86::GR64RegClassID, Reg))
4895  return true;
4896  if (getLexer().isNot(AsmToken::Comma))
4897  return TokError("you must specify a stack pointer offset");
4898 
4899  getParser().Lex();
4900  if (getParser().parseAbsoluteExpression(Off))
4901  return true;
4902 
4903  if (getLexer().isNot(AsmToken::EndOfStatement))
4904  return TokError("unexpected token in directive");
4905 
4906  getParser().Lex();
4907  getStreamer().emitWinCFISetFrame(Reg, Off, Loc);
4908  return false;
4909 }
4910 
4911 bool X86AsmParser::parseDirectiveSEHSaveReg(SMLoc Loc) {
4912  unsigned Reg = 0;
4913  int64_t Off;
4914  if (parseSEHRegisterNumber(X86::GR64RegClassID, Reg))
4915  return true;
4916  if (getLexer().isNot(AsmToken::Comma))
4917  return TokError("you must specify an offset on the stack");
4918 
4919  getParser().Lex();
4920  if (getParser().parseAbsoluteExpression(Off))
4921  return true;
4922 
4923  if (getLexer().isNot(AsmToken::EndOfStatement))
4924  return TokError("unexpected token in directive");
4925 
4926  getParser().Lex();
4927  getStreamer().emitWinCFISaveReg(Reg, Off, Loc);
4928  return false;
4929 }
4930 
4931 bool X86AsmParser::parseDirectiveSEHSaveXMM(SMLoc Loc) {
4932  unsigned Reg = 0;
4933  int64_t Off;
4934  if (parseSEHRegisterNumber(X86::VR128XRegClassID, Reg))
4935  return true;
4936  if (getLexer().isNot(AsmToken::Comma))
4937  return TokError("you must specify an offset on the stack");
4938 
4939  getParser().Lex();
4940  if (getParser().parseAbsoluteExpression(Off))
4941  return true;
4942 
4943  if (getLexer().isNot(AsmToken::EndOfStatement))
4944  return TokError("unexpected token in directive");
4945 
4946  getParser().Lex();
4947  getStreamer().emitWinCFISaveXMM(Reg, Off, Loc);
4948  return false;
4949 }
4950 
4951 bool X86AsmParser::parseDirectiveSEHPushFrame(SMLoc Loc) {
4952  bool Code = false;
4953  StringRef CodeID;
4954  if (getLexer().is(AsmToken::At)) {
4955  SMLoc startLoc = getLexer().getLoc();
4956  getParser().Lex();
4957  if (!getParser().parseIdentifier(CodeID)) {
4958  if (CodeID != "code")
4959  return Error(startLoc, "expected @code");
4960  Code = true;
4961  }
4962  }
4963 
4964  if (getLexer().isNot(AsmToken::EndOfStatement))
4965  return TokError("unexpected token in directive");
4966 
4967  getParser().Lex();
4968  getStreamer().emitWinCFIPushFrame(Code, Loc);
4969  return false;
4970 }
4971 
4972 // Force static initialization.
4976 }
4977 
4978 #define GET_REGISTER_MATCHER
4979 #define GET_MATCHER_IMPLEMENTATION
4980 #define GET_SUBTARGET_FEATURE_NAME
4981 #include "X86GenAsmMatcher.inc"
llvm::Check::Size
@ Size
Definition: FileCheck.h:77
llvm::StringSwitch::Case
StringSwitch & Case(StringLiteral S, T Value)
Definition: StringSwitch.h:69
llvm::MCTargetStreamer::getStreamer
MCStreamer & getStreamer()
Definition: MCStreamer.h:101
checkScale
static bool checkScale(unsigned Scale, StringRef &ErrMsg)
Definition: X86AsmParser.cpp:50
i
i
Definition: README.txt:29
llvm::MCAsmParser
Generic assembler parser interface, for use by target specific assembly parsers.
Definition: MCAsmParser.h:124
llvm::StringRef::back
LLVM_NODISCARD char back() const
back - Get the last character in the string.
Definition: StringRef.h:168
llvm::AsmFieldInfo::Offset
unsigned Offset
Definition: MCAsmParser.h:105
is
should just be implemented with a CLZ instruction Since there are other e that share this it would be best to implement this in a target independent as zero is the default value for the binary encoder e add r0 add r5 Register operands should be distinct That is
Definition: README.txt:725
llvm::StringRef::startswith
LLVM_NODISCARD bool startswith(StringRef Prefix) const
Check if this string starts with the given Prefix.
Definition: StringRef.h:290
llvm::MCAsmParser::Error
bool Error(SMLoc L, const Twine &Msg, SMRange Range=None)
Return an error at the location L, with the message Msg.
Definition: MCAsmParser.cpp:99
getSubtargetFeatureName
static const char * getSubtargetFeatureName(uint64_t Val)
llvm::X86Operand::getEndLoc
SMLoc getEndLoc() const override
getEndLoc - Get the location of the last token of this operand.
Definition: X86Operand.h:101
llvm
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:17
llvm::X86Operand::CreateMem
static std::unique_ptr< X86Operand > CreateMem(unsigned ModeSize, const MCExpr *Disp, SMLoc StartLoc, SMLoc EndLoc, unsigned Size=0, StringRef SymName=StringRef(), void *OpDecl=nullptr, unsigned FrontendSize=0, bool UseUpRegs=false, bool MaybeDirectBranchDest=true)
Create an absolute memory operand.
Definition: X86Operand.h:682
llvm::X86::AddrIndexReg
@ AddrIndexReg
Definition: X86BaseInfo.h:34
llvm::MCSymbol
MCSymbol - Instances of this class represent a symbol name in the MC file, and MCSymbols are created ...
Definition: MCSymbol.h:41
M
We currently emits eax Perhaps this is what we really should generate is Is imull three or four cycles eax eax The current instruction priority is based on pattern complexity The former is more complex because it folds a load so the latter will not be emitted Perhaps we should use AddedComplexity to give LEA32r a higher priority We should always try to match LEA first since the LEA matching code does some estimate to determine whether the match is profitable if we care more about code then imull is better It s two bytes shorter than movl leal On a Pentium M
Definition: README.txt:252
llvm::AsmToken::is
bool is(TokenKind K) const
Definition: MCAsmMacro.h:82
llvm::X86Operand::Mem
struct MemOp Mem
Definition: X86Operand.h:86
llvm::MCAsmLexer
Generic assembler lexer interface, for use by target specific assembly lexers.
Definition: MCAsmLexer.h:37
llvm::MCOperand::isReg
bool isReg() const
Definition: MCInst.h:61
llvm::cl::Prefix
@ Prefix
Definition: CommandLine.h:160
llvm::pdb::PDB_DataKind::Member
@ Member
llvm::StringRef::endswith
LLVM_NODISCARD bool endswith(StringRef Suffix) const
Check if this string ends with the given Suffix.
Definition: StringRef.h:301
llvm::X86::TO_POS_INF
@ TO_POS_INF
Definition: X86BaseInfo.h:49
llvm::MCOperand::createImm
static MCOperand createImm(int64_t Val)
Definition: MCInst.h:141
llvm::AsmToken::LBrac
@ LBrac
Definition: MCAsmMacro.h:48
Note
We currently emits eax Perhaps this is what we really should generate is Is imull three or four cycles Note
Definition: README.txt:239
llvm::AsmToken::Dot
@ Dot
Definition: MCAsmMacro.h:49
llvm::StringSwitch::Default
LLVM_NODISCARD R Default(T Value)
Definition: StringSwitch.h:183
llvm::getTheX86_64Target
Target & getTheX86_64Target()
Definition: X86TargetInfo.cpp:17
llvm::AsmToken::EndOfStatement
@ EndOfStatement
Definition: MCAsmMacro.h:42
llvm::X86Operand::isMem8
bool isMem8() const
Definition: X86Operand.h:309
llvm::MCAsmParser::parseEOL
bool parseEOL()
Definition: MCAsmParser.cpp:47
llvm::AsmToken::getIntVal
int64_t getIntVal() const
Definition: MCAsmMacro.h:115
llvm::MCConstantExpr::create
static const MCConstantExpr * create(int64_t Value, MCContext &Ctx, bool PrintInHex=false, unsigned SizeInBytes=0)
Definition: MCExpr.cpp:194
contains
return AArch64::GPR64RegClass contains(Reg)
llvm::StringRef::npos
static constexpr size_t npos
Definition: StringRef.h:60
llvm::X86::COND_BE
@ COND_BE
Definition: X86BaseInfo.h:87
llvm::SmallVector
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1185
llvm::tgtok::Code
@ Code
Definition: TGLexer.h:50
MCParsedAsmOperand.h
llvm::MCAsmParser::parseIdentifier
virtual bool parseIdentifier(StringRef &Res)=0
Parse an identifier or string (as a quoted identifier) and set Res to the identifier contents.
llvm::X86Disassembler::Reg
Reg
All possible values of the reg field in the ModR/M byte.
Definition: X86DisassemblerDecoder.h:462
CH
AMD64 Optimization Manual has some nice information about optimizing integer multiplication by a constant How much of it applies to Intel s X86 implementation There are definite trade offs to xmm0 cvttss2siq rdx jb L3 subss xmm0 rax cvttss2siq rdx xorq rdx rax ret instead of xmm1 cvttss2siq rcx movaps xmm2 subss xmm2 cvttss2siq rax rdx xorq rax ucomiss xmm0 cmovb rax ret Seems like the jb branch has high likelihood of being taken It would have saved a few instructions It s not possible to reference CH
Definition: README-X86-64.txt:44
llvm::X86::COND_P
@ COND_P
Definition: X86BaseInfo.h:91
llvm::X86::COND_GE
@ COND_GE
Definition: X86BaseInfo.h:94
llvm::MCSymbol::isUndefined
bool isUndefined(bool SetUsed=true) const
isUndefined - Check if this symbol undefined (i.e., implicitly defined).
Definition: MCSymbol.h:252
llvm::pdb::PDB_BuiltinType::Variant
@ Variant
llvm::cl::Hidden
@ Hidden
Definition: CommandLine.h:139
llvm::MemOp
Definition: TargetLowering.h:111
llvm::X86::COND_L
@ COND_L
Definition: X86BaseInfo.h:93
llvm::X86::IP_HAS_LOCK
@ IP_HAS_LOCK
Definition: X86BaseInfo.h:62
startswith
static bool startswith(StringRef Magic, const char(&S)[N])
Definition: Magic.cpp:28
llvm::Type
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
llvm::X86::IP_USE_DISP8
@ IP_USE_DISP8
Definition: X86BaseInfo.h:68
llvm::AsmToken::Integer
@ Integer
Definition: MCAsmMacro.h:32
llvm::MCAsmParser::parseOptionalToken
bool parseOptionalToken(AsmToken::TokenKind T)
Attempt to parse and consume token, returning true on success.
Definition: MCAsmParser.cpp:78
llvm::AsmTypeInfo::Name
StringRef Name
Definition: MCAsmParser.h:97
llvm::FenceInst
An instruction for ordering other memory operations.
Definition: Instructions.h:432
llvm::X86Operand::isMem
bool isMem() const override
isMem - Is this a memory operand?
Definition: X86Operand.h:305
llvm::FeatureBitset
Container class for subtarget features.
Definition: SubtargetFeature.h:40
llvm::InlineAsmIdentifierInfo
Definition: MCAsmParser.h:37
STLExtras.h
llvm::DiagnosticPredicateTy::Match
@ Match
llvm::SmallVectorImpl::pop_back_val
LLVM_NODISCARD T pop_back_val()
Definition: SmallVector.h:654
llvm::MCInst
Instances of this class represent a single low-level machine instruction.
Definition: MCInst.h:184
RHS
Value * RHS
Definition: X86PartialReduction.cpp:76
llvm::X86::CondCode
CondCode
Definition: X86BaseInfo.h:80
llvm::InlineAsmIdentifierInfo::IK_EnumVal
@ IK_EnumVal
Definition: MCAsmParser.h:41
llvm::StringRef::slice
LLVM_NODISCARD StringRef slice(size_t Start, size_t End) const
Return a reference to the substring from [Start, End).
Definition: StringRef.h:736
llvm::X86::COND_INVALID
@ COND_INVALID
Definition: X86BaseInfo.h:107
llvm::getX86SubSuperRegisterOrZero
MCRegister getX86SubSuperRegisterOrZero(MCRegister, unsigned, bool High=false)
Returns the sub or super register of a specific X86 register.
Definition: X86MCTargetDesc.cpp:740
getSym
static ModuleSymbolTable::Symbol getSym(DataRefImpl &Symb)
Definition: IRObjectFile.cpp:37
MCAsmParser.h
llvm::X86::COND_S
@ COND_S
Definition: X86BaseInfo.h:89
llvm::MCInst::getNumOperands
unsigned getNumOperands() const
Definition: MCInst.h:208
MCTargetAsmParser.h
llvm::X86::IP_HAS_NOTRACK
@ IP_HAS_NOTRACK
Definition: X86BaseInfo.h:63
llvm::X86Operand::isReg
bool isReg() const override
isReg - Is this a register operand?
Definition: X86Operand.h:478
llvm::MCAsmParser::parseAbsoluteExpression
virtual bool parseAbsoluteExpression(int64_t &Res)=0
Parse an expression which must evaluate to an absolute value.
llvm::AsmToken
Target independent representation for an assembler token.
Definition: MCAsmMacro.h:21
llvm::X86::COND_O
@ COND_O
Definition: X86BaseInfo.h:81
llvm::X86Operand::CreatePrefix
static std::unique_ptr< X86Operand > CreatePrefix(unsigned Prefixes, SMLoc StartLoc, SMLoc EndLoc)
Definition: X86Operand.h:660
llvm::MCInst::setOpcode
void setOpcode(unsigned Op)
Definition: MCInst.h:197
llvm::AsmToken::Minus
@ Minus
Definition: MCAsmMacro.h:45
llvm::ARMBuildAttrs::Section
@ Section
Legacy Tags.
Definition: ARMBuildAttributes.h:82
llvm::StringRef::substr
LLVM_NODISCARD StringRef substr(size_t Start, size_t N=npos) const
Return a reference to the substring from [Start, Start + N).
Definition: StringRef.h:615
llvm::AsmToken::LParen
@ LParen
Definition: MCAsmMacro.h:48
llvm::BitmaskEnumDetail::Mask
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
Definition: BitmaskEnum.h:80
CommandLine.h
llvm::MCAsmParser::Lex
virtual const AsmToken & Lex()=0
Get the next AsmToken in the stream, possibly handling file inclusion first.
llvm::MCStreamer
Streaming machine code generation interface.
Definition: MCStreamer.h:212
LHS
Value * LHS
Definition: X86PartialReduction.cpp:75
x3
In x86 we generate this spiffy xmm0 xmm0 ret in x86 we generate this which could be xmm1 movss xmm1 xmm0 ret In sse4 we could use insertps to make both better Here s another testcase that could use x3
Definition: README-SSE.txt:547
llvm::AsmToken::Dollar
@ Dollar
Definition: MCAsmMacro.h:49
llvm::Intrinsic::getType
FunctionType * getType(LLVMContext &Context, ID id, ArrayRef< Type * > Tys=None)
Return the function type for an intrinsic.
Definition: Function.cpp:1374
llvm::MCInstrDesc::TSFlags
uint64_t TSFlags
Definition: MCInstrDesc.h:205
llvm::MCAsmParser::parseExpression
virtual bool parseExpression(const MCExpr *&Res, SMLoc &EndLoc)=0
Parse an arbitrary expression.
llvm::RegisterMCAsmParser
RegisterMCAsmParser - Helper template for registering a target specific assembly parser,...
Definition: TargetRegistry.h:1360
llvm::MatchOperand_Success
@ MatchOperand_Success
Definition: MCTargetAsmParser.h:127
llvm::SMLoc
Represents a location in source code.
Definition: SMLoc.h:23
SmallString.h
E
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
llvm::StringRef::split
LLVM_NODISCARD std::pair< StringRef, StringRef > split(char Separator) const
Split into two substrings around the first occurrence of a separator character.
Definition: StringRef.h:753
getPrefixes
static unsigned getPrefixes(OperandVector &Operands)
Definition: X86AsmParser.cpp:4093
llvm::AsmToken::GreaterGreater
@ GreaterGreater
Definition: MCAsmMacro.h:54
llvm::AsmTypeInfo::Size
unsigned Size
Definition: MCAsmParser.h:98
Twine.h
llvm::MCAsmParser::parseIntToken
bool parseIntToken(int64_t &V, const Twine &ErrMsg)
Definition: MCAsmParser.cpp:70
llvm::X86::IP_USE_EVEX
@ IP_USE_EVEX
Definition: X86BaseInfo.h:67
MCContext.h
Y
static GCMetadataPrinterRegistry::Add< OcamlGCMetadataPrinter > Y("ocaml", "ocaml 3.10-compatible collector")
llvm::AsmTypeInfo
Definition: MCAsmParser.h:96
MCInstrInfo.h
llvm::MCOperand::getImm
int64_t getImm() const
Definition: MCInst.h:80
MCSymbol.h
llvm::ms_demangle::QualifierMangleMode::Result
@ Result
llvm::X86II::isX86_64ExtendedReg
bool isX86_64ExtendedReg(unsigned RegNo)
Definition: X86BaseInfo.h:1185
llvm::MCAF_Code16
@ MCAF_Code16
.code16 (X86) / .code 16 (ARM)
Definition: MCDirectives.h:54
MCInst.h
false
Definition: StackSlotColoring.cpp:141
llvm::X86II::VEX
@ VEX
Definition: X86BaseInfo.h:908
llvm::MCInstrDesc::isTerminator
bool isTerminator() const
Returns true if this instruction part of the terminator for a basic block.
Definition: MCInstrDesc.h:298
check
#define check(cond)
llvm::MCSymbol::getVariableValue
const MCExpr * getVariableValue(bool SetUsed=true) const
getVariableValue - Get the value for variable symbols.
Definition: MCSymbol.h:298
llvm::dwarf::Index
Index
Definition: Dwarf.h:472
llvm::dwarf::toStringRef
StringRef toStringRef(const Optional< DWARFFormValue > &V, StringRef Default={})
Take an optional DWARFFormValue and try to extract a string value from it.
Definition: DWARFFormValue.h:193
llvm::MCInstrDesc
Describe properties that are true of each instruction in the target description file.
Definition: MCInstrDesc.h:197
llvm::isUIntN
bool isUIntN(unsigned N, uint64_t x)
Checks if an unsigned integer fits into the given (dynamic) bit width.
Definition: MathExtras.h:455
MCSubtargetInfo.h
llvm::X86II::REX_W
@ REX_W
Definition: X86BaseInfo.h:838
llvm::MCSubtargetInfo::getFeatureBits
const FeatureBitset & getFeatureBits() const
Definition: MCSubtargetInfo.h:112
llvm::AsmToken::Star
@ Star
Definition: MCAsmMacro.h:49
llvm::MCAsmParser::getContext
virtual MCContext & getContext()=0
llvm::ParseInstructionInfo::AsmRewrites
SmallVectorImpl< AsmRewrite > * AsmRewrites
Definition: MCTargetAsmParser.h:119
llvm::MCAsmParser::isParsingMasm
virtual bool isParsingMasm() const
Definition: MCAsmParser.h:188
llvm::X86::COND_A
@ COND_A
Definition: X86BaseInfo.h:88
llvm::report_fatal_error
void report_fatal_error(Error Err, bool gen_crash_diag=true)
Report a serious error, calling any installed error handler.
Definition: Error.cpp:143
llvm::APInt::getZExtValue
uint64_t getZExtValue() const
Get zero extended value.
Definition: APInt.h:1466
Options
const char LLVMTargetMachineRef LLVMPassBuilderOptionsRef Options
Definition: PassBuilderBindings.cpp:48
llvm::X86Operand::isImm
bool isImm() const override
isImm - Is this an immediate operand?
Definition: X86Operand.h:224
llvm::MCAsmParser::addAliasForDirective
virtual void addAliasForDirective(StringRef Directive, StringRef Alias)=0
llvm::MCInst::setFlags
void setFlags(unsigned F)
Definition: MCInst.h:200
llvm::AsmToken::getKind
TokenKind getKind() const
Definition: MCAsmMacro.h:81
llvm::MCTargetStreamer
Target specific streamer interface.
Definition: MCStreamer.h:93
llvm::InlineAsmIdentifierInfo::IK_Label
@ IK_Label
Definition: MCAsmParser.h:40
Info
Analysis containing CSE Info
Definition: CSEInfo.cpp:27
llvm::X86::TO_NEG_INF
@ TO_NEG_INF
Definition: X86BaseInfo.h:48
llvm::MCSymbol::getName
StringRef getName() const
getName - Get the symbol name.
Definition: MCSymbol.h:198
X86Operand.h
llvm::MCExpr::getKind
ExprKind getKind() const
Definition: MCExpr.h:81
isNot
static bool isNot(const MachineRegisterInfo &MRI, const MachineInstr &MI)
Definition: AMDGPULegalizerInfo.cpp:3219
llvm::AsmToken::Percent
@ Percent
Definition: MCAsmMacro.h:52
llvm::array_lengthof
constexpr size_t array_lengthof(T(&)[N])
Find the length of an array.
Definition: STLArrayExtras.h:29
llvm::isIntN
bool isIntN(unsigned N, int64_t x)
Checks if an signed integer fits into the given (dynamic) bit width.
Definition: MathExtras.h:460
llvm::StringRef::getAsInteger
std::enable_if_t< std::numeric_limits< T >::is_signed, bool > getAsInteger(unsigned Radix, T &Result) const
Parse the current string as an integer of the specified radix.
Definition: StringRef.h:514
llvm::None
const NoneType None
Definition: None.h:24
llvm::X86::IP_USE_DISP32
@ IP_USE_DISP32
Definition: X86BaseInfo.h:69
llvm::CallingConv::ID
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition: CallingConv.h:24
SourceMgr.h
X
static GCMetadataPrinterRegistry::Add< ErlangGCPrinter > X("erlang", "erlang-compatible garbage collector")
llvm::MCAsmParser::parsePrimaryExpr
virtual bool parsePrimaryExpr(const MCExpr *&Res, SMLoc &EndLoc, AsmTypeInfo *TypeInfo)=0
Parse a primary expression.
llvm::StringRef::equals
LLVM_NODISCARD bool equals(StringRef RHS) const
equals - Check for string equality, this is more efficient than compare() when the relative ordering ...
Definition: StringRef.h:187
llvm::X86Operand::getLocRange
SMRange getLocRange() const
getLocRange - Get the range between the first and last token of this operand.
Definition: X86Operand.h:105
llvm::MCInst::addOperand
void addOperand(const MCOperand Op)
Definition: MCInst.h:210
llvm::SmallString
SmallString - A SmallString is just a SmallVector with methods and accessors that make it work better...
Definition: SmallString.h:26
Operands
mir Rename Register Operands
Definition: MIRNamerPass.cpp:74
llvm::MCSubtargetInfo::ToggleFeature
FeatureBitset ToggleFeature(uint64_t FB)
Toggle a feature and return the re-computed feature bits.
Definition: MCSubtargetInfo.cpp:240
llvm::AsmToken::Error
@ Error
Definition: MCAsmMacro.h:25
X86MCExpr.h
llvm::X86::COND_NO
@ COND_NO
Definition: X86BaseInfo.h:82
llvm::MCInstrDesc::mayLoad
bool mayLoad() const
Return true if this instruction could possibly read memory.
Definition: MCInstrDesc.h:435
llvm::X86::AddrNumOperands
@ AddrNumOperands
AddrNumOperands - Total number of operands in a memory reference.
Definition: X86BaseInfo.h:41
llvm::cl::opt< bool >
llvm::AsmTypeInfo::ElementSize
unsigned ElementSize
Definition: MCAsmParser.h:99
llvm::getTheX86_32Target
Target & getTheX86_32Target()
Definition: X86TargetInfo.cpp:13
x2
gcc mainline compiles it x2(%rip)
MCAsmLexer.h
llvm::MCOperand::isImm
bool isImm() const
Definition: MCInst.h:62
X86MCTargetDesc.h
llvm::StringRef::equals_insensitive
LLVM_NODISCARD bool equals_insensitive(StringRef RHS) const
Check for string equality, ignoring case.
Definition: StringRef.h:194
llvm::AMDGPU::Hwreg::Offset
Offset
Definition: SIDefines.h:413
llvm::X86::COND_AE
@ COND_AE
Definition: X86BaseInfo.h:84
llvm::ParseInstructionInfo
Definition: MCTargetAsmParser.h:118
llvm::MCSymbolRefExpr::VariantKind
VariantKind
Definition: MCExpr.h:194
llvm::StringRef::empty
constexpr LLVM_NODISCARD bool empty() const
empty - Check if the string is empty.
Definition: StringRef.h:153
llvm::count
auto count(R &&Range, const E &Element)
Wrapper function around std::count to count the number of times an element Element occurs in the give...
Definition: STLExtras.h:1709
llvm::InlineAsmIdentifierInfo::IK_Invalid
@ IK_Invalid
Definition: MCAsmParser.h:39
llvm::X86::IP_HAS_REPEAT
@ IP_HAS_REPEAT
Definition: X86BaseInfo.h:61
llvm::SMLoc::isValid
bool isValid() const
Definition: SMLoc.h:29
llvm::IntelExpr
Definition: MCTargetAsmParser.h:64
uint64_t
llvm::X86Operand::isVectorReg
bool isVectorReg() const
Definition: X86Operand.h:494
llvm::X86Operand
X86Operand - Instances of this class represent a parsed X86 machine instruction.
Definition: X86Operand.h:31
llvm::MCInstrDesc::isCall
bool isCall() const
Return true if the instruction is a call.
Definition: MCInstrDesc.h:285
LLVM_EXTERNAL_VISIBILITY
#define LLVM_EXTERNAL_VISIBILITY
Definition: Compiler.h:126
AH
AMD64 Optimization Manual has some nice information about optimizing integer multiplication by a constant How much of it applies to Intel s X86 implementation There are definite trade offs to xmm0 cvttss2siq rdx jb L3 subss xmm0 rax cvttss2siq rdx xorq rdx rax ret instead of xmm1 cvttss2siq rcx movaps xmm2 subss xmm2 cvttss2siq rax rdx xorq rax ucomiss xmm0 cmovb rax ret Seems like the jb branch has high likelihood of being taken It would have saved a few instructions It s not possible to reference AH
Definition: README-X86-64.txt:44
llvm::AsmToken::At
@ At
Definition: MCAsmMacro.h:54
LLVMInitializeX86AsmParser
LLVM_EXTERNAL_VISIBILITY void LLVMInitializeX86AsmParser()
Definition: X86AsmParser.cpp:4973
X86TargetStreamer.h
llvm::X86Operand::CreateImm
static std::unique_ptr< X86Operand > CreateImm(const MCExpr *Val, SMLoc StartLoc, SMLoc EndLoc, StringRef SymName=StringRef(), void *OpDecl=nullptr, bool GlobalRef=true)
Definition: X86Operand.h:666
move
compiles ldr LCPI1_0 ldr ldr mov lsr tst moveq r1 ldr LCPI1_1 and r0 bx lr It would be better to do something like to fold the shift into the conditional move
Definition: README.txt:546
llvm::InlineAsmIdentifierInfo::Enum
EnumIdentifier Enum
Definition: MCAsmParser.h:62
llvm::numbers::e
constexpr double e
Definition: MathExtras.h:57
llvm::MCStr