LLVM 17.0.0git
X86AsmParser.cpp
Go to the documentation of this file.
1//===-- X86AsmParser.cpp - Parse X86 assembly to MCInst instructions ------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
16#include "X86AsmParserCommon.h"
17#include "X86Operand.h"
18#include "llvm/ADT/STLExtras.h"
22#include "llvm/ADT/Twine.h"
23#include "llvm/MC/MCContext.h"
24#include "llvm/MC/MCExpr.h"
25#include "llvm/MC/MCInst.h"
26#include "llvm/MC/MCInstrInfo.h"
32#include "llvm/MC/MCSection.h"
33#include "llvm/MC/MCStreamer.h"
35#include "llvm/MC/MCSymbol.h"
41#include <algorithm>
42#include <memory>
43
44using namespace llvm;
45
47 "x86-experimental-lvi-inline-asm-hardening",
48 cl::desc("Harden inline assembly code that may be vulnerable to Load Value"
49 " Injection (LVI). This feature is experimental."), cl::Hidden);
50
51static bool checkScale(unsigned Scale, StringRef &ErrMsg) {
52 if (Scale != 1 && Scale != 2 && Scale != 4 && Scale != 8) {
53 ErrMsg = "scale factor in address must be 1, 2, 4 or 8";
54 return true;
55 }
56 return false;
57}
58
59namespace {
60
61static const char OpPrecedence[] = {
62 0, // IC_OR
63 1, // IC_XOR
64 2, // IC_AND
65 4, // IC_LSHIFT
66 4, // IC_RSHIFT
67 5, // IC_PLUS
68 5, // IC_MINUS
69 6, // IC_MULTIPLY
70 6, // IC_DIVIDE
71 6, // IC_MOD
72 7, // IC_NOT
73 8, // IC_NEG
74 9, // IC_RPAREN
75 10, // IC_LPAREN
76 0, // IC_IMM
77 0, // IC_REGISTER
78 3, // IC_EQ
79 3, // IC_NE
80 3, // IC_LT
81 3, // IC_LE
82 3, // IC_GT
83 3 // IC_GE
84};
85
86class X86AsmParser : public MCTargetAsmParser {
87 ParseInstructionInfo *InstInfo;
88 bool Code16GCC;
89 unsigned ForcedDataPrefix = 0;
90
91 enum VEXEncoding {
92 VEXEncoding_Default,
93 VEXEncoding_VEX,
94 VEXEncoding_VEX2,
95 VEXEncoding_VEX3,
96 VEXEncoding_EVEX,
97 };
98
99 VEXEncoding ForcedVEXEncoding = VEXEncoding_Default;
100
101 enum DispEncoding {
102 DispEncoding_Default,
103 DispEncoding_Disp8,
104 DispEncoding_Disp32,
105 };
106
107 DispEncoding ForcedDispEncoding = DispEncoding_Default;
108
109private:
110 SMLoc consumeToken() {
111 MCAsmParser &Parser = getParser();
112 SMLoc Result = Parser.getTok().getLoc();
113 Parser.Lex();
114 return Result;
115 }
116
117 X86TargetStreamer &getTargetStreamer() {
118 assert(getParser().getStreamer().getTargetStreamer() &&
119 "do not have a target streamer");
121 return static_cast<X86TargetStreamer &>(TS);
122 }
123
124 unsigned MatchInstruction(const OperandVector &Operands, MCInst &Inst,
125 uint64_t &ErrorInfo, FeatureBitset &MissingFeatures,
126 bool matchingInlineAsm, unsigned VariantID = 0) {
127 // In Code16GCC mode, match as 32-bit.
128 if (Code16GCC)
129 SwitchMode(X86::Is32Bit);
130 unsigned rv = MatchInstructionImpl(Operands, Inst, ErrorInfo,
131 MissingFeatures, matchingInlineAsm,
132 VariantID);
133 if (Code16GCC)
134 SwitchMode(X86::Is16Bit);
135 return rv;
136 }
137
138 enum InfixCalculatorTok {
139 IC_OR = 0,
140 IC_XOR,
141 IC_AND,
142 IC_LSHIFT,
143 IC_RSHIFT,
144 IC_PLUS,
145 IC_MINUS,
146 IC_MULTIPLY,
147 IC_DIVIDE,
148 IC_MOD,
149 IC_NOT,
150 IC_NEG,
151 IC_RPAREN,
152 IC_LPAREN,
153 IC_IMM,
154 IC_REGISTER,
155 IC_EQ,
156 IC_NE,
157 IC_LT,
158 IC_LE,
159 IC_GT,
160 IC_GE
161 };
162
163 enum IntelOperatorKind {
164 IOK_INVALID = 0,
165 IOK_LENGTH,
166 IOK_SIZE,
167 IOK_TYPE,
168 };
169
170 enum MasmOperatorKind {
171 MOK_INVALID = 0,
172 MOK_LENGTHOF,
173 MOK_SIZEOF,
174 MOK_TYPE,
175 };
176
177 class InfixCalculator {
178 typedef std::pair< InfixCalculatorTok, int64_t > ICToken;
179 SmallVector<InfixCalculatorTok, 4> InfixOperatorStack;
180 SmallVector<ICToken, 4> PostfixStack;
181
182 bool isUnaryOperator(InfixCalculatorTok Op) const {
183 return Op == IC_NEG || Op == IC_NOT;
184 }
185
186 public:
187 int64_t popOperand() {
188 assert (!PostfixStack.empty() && "Poped an empty stack!");
189 ICToken Op = PostfixStack.pop_back_val();
190 if (!(Op.first == IC_IMM || Op.first == IC_REGISTER))
191 return -1; // The invalid Scale value will be caught later by checkScale
192 return Op.second;
193 }
194 void pushOperand(InfixCalculatorTok Op, int64_t Val = 0) {
195 assert ((Op == IC_IMM || Op == IC_REGISTER) &&
196 "Unexpected operand!");
197 PostfixStack.push_back(std::make_pair(Op, Val));
198 }
199
200 void popOperator() { InfixOperatorStack.pop_back(); }
201 void pushOperator(InfixCalculatorTok Op) {
202 // Push the new operator if the stack is empty.
203 if (InfixOperatorStack.empty()) {
204 InfixOperatorStack.push_back(Op);
205 return;
206 }
207
208 // Push the new operator if it has a higher precedence than the operator
209 // on the top of the stack or the operator on the top of the stack is a
210 // left parentheses.
211 unsigned Idx = InfixOperatorStack.size() - 1;
212 InfixCalculatorTok StackOp = InfixOperatorStack[Idx];
213 if (OpPrecedence[Op] > OpPrecedence[StackOp] || StackOp == IC_LPAREN) {
214 InfixOperatorStack.push_back(Op);
215 return;
216 }
217
218 // The operator on the top of the stack has higher precedence than the
219 // new operator.
220 unsigned ParenCount = 0;
221 while (true) {
222 // Nothing to process.
223 if (InfixOperatorStack.empty())
224 break;
225
226 Idx = InfixOperatorStack.size() - 1;
227 StackOp = InfixOperatorStack[Idx];
228 if (!(OpPrecedence[StackOp] >= OpPrecedence[Op] || ParenCount))
229 break;
230
231 // If we have an even parentheses count and we see a left parentheses,
232 // then stop processing.
233 if (!ParenCount && StackOp == IC_LPAREN)
234 break;
235
236 if (StackOp == IC_RPAREN) {
237 ++ParenCount;
238 InfixOperatorStack.pop_back();
239 } else if (StackOp == IC_LPAREN) {
240 --ParenCount;
241 InfixOperatorStack.pop_back();
242 } else {
243 InfixOperatorStack.pop_back();
244 PostfixStack.push_back(std::make_pair(StackOp, 0));
245 }
246 }
247 // Push the new operator.
248 InfixOperatorStack.push_back(Op);
249 }
250
251 int64_t execute() {
252 // Push any remaining operators onto the postfix stack.
253 while (!InfixOperatorStack.empty()) {
254 InfixCalculatorTok StackOp = InfixOperatorStack.pop_back_val();
255 if (StackOp != IC_LPAREN && StackOp != IC_RPAREN)
256 PostfixStack.push_back(std::make_pair(StackOp, 0));
257 }
258
259 if (PostfixStack.empty())
260 return 0;
261
262 SmallVector<ICToken, 16> OperandStack;
263 for (unsigned i = 0, e = PostfixStack.size(); i != e; ++i) {
264 ICToken Op = PostfixStack[i];
265 if (Op.first == IC_IMM || Op.first == IC_REGISTER) {
266 OperandStack.push_back(Op);
267 } else if (isUnaryOperator(Op.first)) {
268 assert (OperandStack.size() > 0 && "Too few operands.");
269 ICToken Operand = OperandStack.pop_back_val();
270 assert (Operand.first == IC_IMM &&
271 "Unary operation with a register!");
272 switch (Op.first) {
273 default:
274 report_fatal_error("Unexpected operator!");
275 break;
276 case IC_NEG:
277 OperandStack.push_back(std::make_pair(IC_IMM, -Operand.second));
278 break;
279 case IC_NOT:
280 OperandStack.push_back(std::make_pair(IC_IMM, ~Operand.second));
281 break;
282 }
283 } else {
284 assert (OperandStack.size() > 1 && "Too few operands.");
285 int64_t Val;
286 ICToken Op2 = OperandStack.pop_back_val();
287 ICToken Op1 = OperandStack.pop_back_val();
288 switch (Op.first) {
289 default:
290 report_fatal_error("Unexpected operator!");
291 break;
292 case IC_PLUS:
293 Val = Op1.second + Op2.second;
294 OperandStack.push_back(std::make_pair(IC_IMM, Val));
295 break;
296 case IC_MINUS:
297 Val = Op1.second - Op2.second;
298 OperandStack.push_back(std::make_pair(IC_IMM, Val));
299 break;
300 case IC_MULTIPLY:
301 assert (Op1.first == IC_IMM && Op2.first == IC_IMM &&
302 "Multiply operation with an immediate and a register!");
303 Val = Op1.second * Op2.second;
304 OperandStack.push_back(std::make_pair(IC_IMM, Val));
305 break;
306 case IC_DIVIDE:
307 assert (Op1.first == IC_IMM && Op2.first == IC_IMM &&
308 "Divide operation with an immediate and a register!");
309 assert (Op2.second != 0 && "Division by zero!");
310 Val = Op1.second / Op2.second;
311 OperandStack.push_back(std::make_pair(IC_IMM, Val));
312 break;
313 case IC_MOD:
314 assert (Op1.first == IC_IMM && Op2.first == IC_IMM &&
315 "Modulo operation with an immediate and a register!");
316 Val = Op1.second % Op2.second;
317 OperandStack.push_back(std::make_pair(IC_IMM, Val));
318 break;
319 case IC_OR:
320 assert (Op1.first == IC_IMM && Op2.first == IC_IMM &&
321 "Or operation with an immediate and a register!");
322 Val = Op1.second | Op2.second;
323 OperandStack.push_back(std::make_pair(IC_IMM, Val));
324 break;
325 case IC_XOR:
326 assert(Op1.first == IC_IMM && Op2.first == IC_IMM &&
327 "Xor operation with an immediate and a register!");
328 Val = Op1.second ^ Op2.second;
329 OperandStack.push_back(std::make_pair(IC_IMM, Val));
330 break;
331 case IC_AND:
332 assert (Op1.first == IC_IMM && Op2.first == IC_IMM &&
333 "And operation with an immediate and a register!");
334 Val = Op1.second & Op2.second;
335 OperandStack.push_back(std::make_pair(IC_IMM, Val));
336 break;
337 case IC_LSHIFT:
338 assert (Op1.first == IC_IMM && Op2.first == IC_IMM &&
339 "Left shift operation with an immediate and a register!");
340 Val = Op1.second << Op2.second;
341 OperandStack.push_back(std::make_pair(IC_IMM, Val));
342 break;
343 case IC_RSHIFT:
344 assert (Op1.first == IC_IMM && Op2.first == IC_IMM &&
345 "Right shift operation with an immediate and a register!");
346 Val = Op1.second >> Op2.second;
347 OperandStack.push_back(std::make_pair(IC_IMM, Val));
348 break;
349 case IC_EQ:
350 assert(Op1.first == IC_IMM && Op2.first == IC_IMM &&
351 "Equals operation with an immediate and a register!");
352 Val = (Op1.second == Op2.second) ? -1 : 0;
353 OperandStack.push_back(std::make_pair(IC_IMM, Val));
354 break;
355 case IC_NE:
356 assert(Op1.first == IC_IMM && Op2.first == IC_IMM &&
357 "Not-equals operation with an immediate and a register!");
358 Val = (Op1.second != Op2.second) ? -1 : 0;
359 OperandStack.push_back(std::make_pair(IC_IMM, Val));
360 break;
361 case IC_LT:
362 assert(Op1.first == IC_IMM && Op2.first == IC_IMM &&
363 "Less-than operation with an immediate and a register!");
364 Val = (Op1.second < Op2.second) ? -1 : 0;
365 OperandStack.push_back(std::make_pair(IC_IMM, Val));
366 break;
367 case IC_LE:
368 assert(Op1.first == IC_IMM && Op2.first == IC_IMM &&
369 "Less-than-or-equal operation with an immediate and a "
370 "register!");
371 Val = (Op1.second <= Op2.second) ? -1 : 0;
372 OperandStack.push_back(std::make_pair(IC_IMM, Val));
373 break;
374 case IC_GT:
375 assert(Op1.first == IC_IMM && Op2.first == IC_IMM &&
376 "Greater-than operation with an immediate and a register!");
377 Val = (Op1.second > Op2.second) ? -1 : 0;
378 OperandStack.push_back(std::make_pair(IC_IMM, Val));
379 break;
380 case IC_GE:
381 assert(Op1.first == IC_IMM && Op2.first == IC_IMM &&
382 "Greater-than-or-equal operation with an immediate and a "
383 "register!");
384 Val = (Op1.second >= Op2.second) ? -1 : 0;
385 OperandStack.push_back(std::make_pair(IC_IMM, Val));
386 break;
387 }
388 }
389 }
390 assert (OperandStack.size() == 1 && "Expected a single result.");
391 return OperandStack.pop_back_val().second;
392 }
393 };
394
395 enum IntelExprState {
396 IES_INIT,
397 IES_OR,
398 IES_XOR,
399 IES_AND,
400 IES_EQ,
401 IES_NE,
402 IES_LT,
403 IES_LE,
404 IES_GT,
405 IES_GE,
406 IES_LSHIFT,
407 IES_RSHIFT,
408 IES_PLUS,
409 IES_MINUS,
410 IES_OFFSET,
411 IES_CAST,
412 IES_NOT,
413 IES_MULTIPLY,
414 IES_DIVIDE,
415 IES_MOD,
416 IES_LBRAC,
417 IES_RBRAC,
418 IES_LPAREN,
419 IES_RPAREN,
420 IES_REGISTER,
421 IES_INTEGER,
422 IES_IDENTIFIER,
423 IES_ERROR
424 };
425
426 class IntelExprStateMachine {
427 IntelExprState State = IES_INIT, PrevState = IES_ERROR;
428 unsigned BaseReg = 0, IndexReg = 0, TmpReg = 0, Scale = 0;
429 int64_t Imm = 0;
430 const MCExpr *Sym = nullptr;
431 StringRef SymName;
432 InfixCalculator IC;
434 short BracCount = 0;
435 bool MemExpr = false;
436 bool BracketUsed = false;
437 bool OffsetOperator = false;
438 bool AttachToOperandIdx = false;
439 bool IsPIC = false;
440 SMLoc OffsetOperatorLoc;
441 AsmTypeInfo CurType;
442
443 bool setSymRef(const MCExpr *Val, StringRef ID, StringRef &ErrMsg) {
444 if (Sym) {
445 ErrMsg = "cannot use more than one symbol in memory operand";
446 return true;
447 }
448 Sym = Val;
449 SymName = ID;
450 return false;
451 }
452
453 public:
454 IntelExprStateMachine() = default;
455
456 void addImm(int64_t imm) { Imm += imm; }
457 short getBracCount() const { return BracCount; }
458 bool isMemExpr() const { return MemExpr; }
459 bool isBracketUsed() const { return BracketUsed; }
460 bool isOffsetOperator() const { return OffsetOperator; }
461 SMLoc getOffsetLoc() const { return OffsetOperatorLoc; }
462 unsigned getBaseReg() const { return BaseReg; }
463 unsigned getIndexReg() const { return IndexReg; }
464 unsigned getScale() const { return Scale; }
465 const MCExpr *getSym() const { return Sym; }
466 StringRef getSymName() const { return SymName; }
467 StringRef getType() const { return CurType.Name; }
468 unsigned getSize() const { return CurType.Size; }
469 unsigned getElementSize() const { return CurType.ElementSize; }
470 unsigned getLength() const { return CurType.Length; }
471 int64_t getImm() { return Imm + IC.execute(); }
472 bool isValidEndState() const {
473 return State == IES_RBRAC || State == IES_INTEGER;
474 }
475
476 // Is the intel expression appended after an operand index.
477 // [OperandIdx][Intel Expression]
478 // This is neccessary for checking if it is an independent
479 // intel expression at back end when parse inline asm.
480 void setAppendAfterOperand() { AttachToOperandIdx = true; }
481
482 bool isPIC() const { return IsPIC; }
483 void setPIC() { IsPIC = true; }
484
485 bool hadError() const { return State == IES_ERROR; }
486 const InlineAsmIdentifierInfo &getIdentifierInfo() const { return Info; }
487
488 bool regsUseUpError(StringRef &ErrMsg) {
489 // This case mostly happen in inline asm, e.g. Arr[BaseReg + IndexReg]
490 // can not intruduce additional register in inline asm in PIC model.
491 if (IsPIC && AttachToOperandIdx)
492 ErrMsg = "Don't use 2 or more regs for mem offset in PIC model!";
493 else
494 ErrMsg = "BaseReg/IndexReg already set!";
495 return true;
496 }
497
498 void onOr() {
499 IntelExprState CurrState = State;
500 switch (State) {
501 default:
502 State = IES_ERROR;
503 break;
504 case IES_INTEGER:
505 case IES_RPAREN:
506 case IES_REGISTER:
507 State = IES_OR;
508 IC.pushOperator(IC_OR);
509 break;
510 }
511 PrevState = CurrState;
512 }
513 void onXor() {
514 IntelExprState CurrState = State;
515 switch (State) {
516 default:
517 State = IES_ERROR;
518 break;
519 case IES_INTEGER:
520 case IES_RPAREN:
521 case IES_REGISTER:
522 State = IES_XOR;
523 IC.pushOperator(IC_XOR);
524 break;
525 }
526 PrevState = CurrState;
527 }
528 void onAnd() {
529 IntelExprState CurrState = State;
530 switch (State) {
531 default:
532 State = IES_ERROR;
533 break;
534 case IES_INTEGER:
535 case IES_RPAREN:
536 case IES_REGISTER:
537 State = IES_AND;
538 IC.pushOperator(IC_AND);
539 break;
540 }
541 PrevState = CurrState;
542 }
543 void onEq() {
544 IntelExprState CurrState = State;
545 switch (State) {
546 default:
547 State = IES_ERROR;
548 break;
549 case IES_INTEGER:
550 case IES_RPAREN:
551 case IES_REGISTER:
552 State = IES_EQ;
553 IC.pushOperator(IC_EQ);
554 break;
555 }
556 PrevState = CurrState;
557 }
558 void onNE() {
559 IntelExprState CurrState = State;
560 switch (State) {
561 default:
562 State = IES_ERROR;
563 break;
564 case IES_INTEGER:
565 case IES_RPAREN:
566 case IES_REGISTER:
567 State = IES_NE;
568 IC.pushOperator(IC_NE);
569 break;
570 }
571 PrevState = CurrState;
572 }
573 void onLT() {
574 IntelExprState CurrState = State;
575 switch (State) {
576 default:
577 State = IES_ERROR;
578 break;
579 case IES_INTEGER:
580 case IES_RPAREN:
581 case IES_REGISTER:
582 State = IES_LT;
583 IC.pushOperator(IC_LT);
584 break;
585 }
586 PrevState = CurrState;
587 }
588 void onLE() {
589 IntelExprState CurrState = State;
590 switch (State) {
591 default:
592 State = IES_ERROR;
593 break;
594 case IES_INTEGER:
595 case IES_RPAREN:
596 case IES_REGISTER:
597 State = IES_LE;
598 IC.pushOperator(IC_LE);
599 break;
600 }
601 PrevState = CurrState;
602 }
603 void onGT() {
604 IntelExprState CurrState = State;
605 switch (State) {
606 default:
607 State = IES_ERROR;
608 break;
609 case IES_INTEGER:
610 case IES_RPAREN:
611 case IES_REGISTER:
612 State = IES_GT;
613 IC.pushOperator(IC_GT);
614 break;
615 }
616 PrevState = CurrState;
617 }
618 void onGE() {
619 IntelExprState CurrState = State;
620 switch (State) {
621 default:
622 State = IES_ERROR;
623 break;
624 case IES_INTEGER:
625 case IES_RPAREN:
626 case IES_REGISTER:
627 State = IES_GE;
628 IC.pushOperator(IC_GE);
629 break;
630 }
631 PrevState = CurrState;
632 }
633 void onLShift() {
634 IntelExprState CurrState = State;
635 switch (State) {
636 default:
637 State = IES_ERROR;
638 break;
639 case IES_INTEGER:
640 case IES_RPAREN:
641 case IES_REGISTER:
642 State = IES_LSHIFT;
643 IC.pushOperator(IC_LSHIFT);
644 break;
645 }
646 PrevState = CurrState;
647 }
648 void onRShift() {
649 IntelExprState CurrState = State;
650 switch (State) {
651 default:
652 State = IES_ERROR;
653 break;
654 case IES_INTEGER:
655 case IES_RPAREN:
656 case IES_REGISTER:
657 State = IES_RSHIFT;
658 IC.pushOperator(IC_RSHIFT);
659 break;
660 }
661 PrevState = CurrState;
662 }
663 bool onPlus(StringRef &ErrMsg) {
664 IntelExprState CurrState = State;
665 switch (State) {
666 default:
667 State = IES_ERROR;
668 break;
669 case IES_INTEGER:
670 case IES_RPAREN:
671 case IES_REGISTER:
672 case IES_OFFSET:
673 State = IES_PLUS;
674 IC.pushOperator(IC_PLUS);
675 if (CurrState == IES_REGISTER && PrevState != IES_MULTIPLY) {
676 // If we already have a BaseReg, then assume this is the IndexReg with
677 // no explicit scale.
678 if (!BaseReg) {
679 BaseReg = TmpReg;
680 } else {
681 if (IndexReg)
682 return regsUseUpError(ErrMsg);
683 IndexReg = TmpReg;
684 Scale = 0;
685 }
686 }
687 break;
688 }
689 PrevState = CurrState;
690 return false;
691 }
692 bool onMinus(StringRef &ErrMsg) {
693 IntelExprState CurrState = State;
694 switch (State) {
695 default:
696 State = IES_ERROR;
697 break;
698 case IES_OR:
699 case IES_XOR:
700 case IES_AND:
701 case IES_EQ:
702 case IES_NE:
703 case IES_LT:
704 case IES_LE:
705 case IES_GT:
706 case IES_GE:
707 case IES_LSHIFT:
708 case IES_RSHIFT:
709 case IES_PLUS:
710 case IES_NOT:
711 case IES_MULTIPLY:
712 case IES_DIVIDE:
713 case IES_MOD:
714 case IES_LPAREN:
715 case IES_RPAREN:
716 case IES_LBRAC:
717 case IES_RBRAC:
718 case IES_INTEGER:
719 case IES_REGISTER:
720 case IES_INIT:
721 case IES_OFFSET:
722 State = IES_MINUS;
723 // push minus operator if it is not a negate operator
724 if (CurrState == IES_REGISTER || CurrState == IES_RPAREN ||
725 CurrState == IES_INTEGER || CurrState == IES_RBRAC ||
726 CurrState == IES_OFFSET)
727 IC.pushOperator(IC_MINUS);
728 else if (PrevState == IES_REGISTER && CurrState == IES_MULTIPLY) {
729 // We have negate operator for Scale: it's illegal
730 ErrMsg = "Scale can't be negative";
731 return true;
732 } else
733 IC.pushOperator(IC_NEG);
734 if (CurrState == IES_REGISTER && PrevState != IES_MULTIPLY) {
735 // If we already have a BaseReg, then assume this is the IndexReg with
736 // no explicit scale.
737 if (!BaseReg) {
738 BaseReg = TmpReg;
739 } else {
740 if (IndexReg)
741 return regsUseUpError(ErrMsg);
742 IndexReg = TmpReg;
743 Scale = 0;
744 }
745 }
746 break;
747 }
748 PrevState = CurrState;
749 return false;
750 }
751 void onNot() {
752 IntelExprState CurrState = State;
753 switch (State) {
754 default:
755 State = IES_ERROR;
756 break;
757 case IES_OR:
758 case IES_XOR:
759 case IES_AND:
760 case IES_EQ:
761 case IES_NE:
762 case IES_LT:
763 case IES_LE:
764 case IES_GT:
765 case IES_GE:
766 case IES_LSHIFT:
767 case IES_RSHIFT:
768 case IES_PLUS:
769 case IES_MINUS:
770 case IES_NOT:
771 case IES_MULTIPLY:
772 case IES_DIVIDE:
773 case IES_MOD:
774 case IES_LPAREN:
775 case IES_LBRAC:
776 case IES_INIT:
777 State = IES_NOT;
778 IC.pushOperator(IC_NOT);
779 break;
780 }
781 PrevState = CurrState;
782 }
783 bool onRegister(unsigned Reg, StringRef &ErrMsg) {
784 IntelExprState CurrState = State;
785 switch (State) {
786 default:
787 State = IES_ERROR;
788 break;
789 case IES_PLUS:
790 case IES_LPAREN:
791 case IES_LBRAC:
792 State = IES_REGISTER;
793 TmpReg = Reg;
794 IC.pushOperand(IC_REGISTER);
795 break;
796 case IES_MULTIPLY:
797 // Index Register - Scale * Register
798 if (PrevState == IES_INTEGER) {
799 if (IndexReg)
800 return regsUseUpError(ErrMsg);
801 State = IES_REGISTER;
802 IndexReg = Reg;
803 // Get the scale and replace the 'Scale * Register' with '0'.
804 Scale = IC.popOperand();
805 if (checkScale(Scale, ErrMsg))
806 return true;
807 IC.pushOperand(IC_IMM);
808 IC.popOperator();
809 } else {
810 State = IES_ERROR;
811 }
812 break;
813 }
814 PrevState = CurrState;
815 return false;
816 }
817 bool onIdentifierExpr(const MCExpr *SymRef, StringRef SymRefName,
818 const InlineAsmIdentifierInfo &IDInfo,
819 const AsmTypeInfo &Type, bool ParsingMSInlineAsm,
820 StringRef &ErrMsg) {
821 // InlineAsm: Treat an enum value as an integer
822 if (ParsingMSInlineAsm)
824 return onInteger(IDInfo.Enum.EnumVal, ErrMsg);
825 // Treat a symbolic constant like an integer
826 if (auto *CE = dyn_cast<MCConstantExpr>(SymRef))
827 return onInteger(CE->getValue(), ErrMsg);
828 PrevState = State;
829 switch (State) {
830 default:
831 State = IES_ERROR;
832 break;
833 case IES_CAST:
834 case IES_PLUS:
835 case IES_MINUS:
836 case IES_NOT:
837 case IES_INIT:
838 case IES_LBRAC:
839 case IES_LPAREN:
840 if (setSymRef(SymRef, SymRefName, ErrMsg))
841 return true;
842 MemExpr = true;
843 State = IES_INTEGER;
844 IC.pushOperand(IC_IMM);
845 if (ParsingMSInlineAsm)
846 Info = IDInfo;
847 setTypeInfo(Type);
848 break;
849 }
850 return false;
851 }
852 bool onInteger(int64_t TmpInt, StringRef &ErrMsg) {
853 IntelExprState CurrState = State;
854 switch (State) {
855 default:
856 State = IES_ERROR;
857 break;
858 case IES_PLUS:
859 case IES_MINUS:
860 case IES_NOT:
861 case IES_OR:
862 case IES_XOR:
863 case IES_AND:
864 case IES_EQ:
865 case IES_NE:
866 case IES_LT:
867 case IES_LE:
868 case IES_GT:
869 case IES_GE:
870 case IES_LSHIFT:
871 case IES_RSHIFT:
872 case IES_DIVIDE:
873 case IES_MOD:
874 case IES_MULTIPLY:
875 case IES_LPAREN:
876 case IES_INIT:
877 case IES_LBRAC:
878 State = IES_INTEGER;
879 if (PrevState == IES_REGISTER && CurrState == IES_MULTIPLY) {
880 // Index Register - Register * Scale
881 if (IndexReg)
882 return regsUseUpError(ErrMsg);
883 IndexReg = TmpReg;
884 Scale = TmpInt;
885 if (checkScale(Scale, ErrMsg))
886 return true;
887 // Get the scale and replace the 'Register * Scale' with '0'.
888 IC.popOperator();
889 } else {
890 IC.pushOperand(IC_IMM, TmpInt);
891 }
892 break;
893 }
894 PrevState = CurrState;
895 return false;
896 }
897 void onStar() {
898 PrevState = State;
899 switch (State) {
900 default:
901 State = IES_ERROR;
902 break;
903 case IES_INTEGER:
904 case IES_REGISTER:
905 case IES_RPAREN:
906 State = IES_MULTIPLY;
907 IC.pushOperator(IC_MULTIPLY);
908 break;
909 }
910 }
911 void onDivide() {
912 PrevState = State;
913 switch (State) {
914 default:
915 State = IES_ERROR;
916 break;
917 case IES_INTEGER:
918 case IES_RPAREN:
919 State = IES_DIVIDE;
920 IC.pushOperator(IC_DIVIDE);
921 break;
922 }
923 }
924 void onMod() {
925 PrevState = State;
926 switch (State) {
927 default:
928 State = IES_ERROR;
929 break;
930 case IES_INTEGER:
931 case IES_RPAREN:
932 State = IES_MOD;
933 IC.pushOperator(IC_MOD);
934 break;
935 }
936 }
937 bool onLBrac() {
938 if (BracCount)
939 return true;
940 PrevState = State;
941 switch (State) {
942 default:
943 State = IES_ERROR;
944 break;
945 case IES_RBRAC:
946 case IES_INTEGER:
947 case IES_RPAREN:
948 State = IES_PLUS;
949 IC.pushOperator(IC_PLUS);
950 CurType.Length = 1;
951 CurType.Size = CurType.ElementSize;
952 break;
953 case IES_INIT:
954 case IES_CAST:
955 assert(!BracCount && "BracCount should be zero on parsing's start");
956 State = IES_LBRAC;
957 break;
958 }
959 MemExpr = true;
960 BracketUsed = true;
961 BracCount++;
962 return false;
963 }
964 bool onRBrac(StringRef &ErrMsg) {
965 IntelExprState CurrState = State;
966 switch (State) {
967 default:
968 State = IES_ERROR;
969 break;
970 case IES_INTEGER:
971 case IES_OFFSET:
972 case IES_REGISTER:
973 case IES_RPAREN:
974 if (BracCount-- != 1) {
975 ErrMsg = "unexpected bracket encountered";
976 return true;
977 }
978 State = IES_RBRAC;
979 if (CurrState == IES_REGISTER && PrevState != IES_MULTIPLY) {
980 // If we already have a BaseReg, then assume this is the IndexReg with
981 // no explicit scale.
982 if (!BaseReg) {
983 BaseReg = TmpReg;
984 } else {
985 if (IndexReg)
986 return regsUseUpError(ErrMsg);
987 IndexReg = TmpReg;
988 Scale = 0;
989 }
990 }
991 break;
992 }
993 PrevState = CurrState;
994 return false;
995 }
996 void onLParen() {
997 IntelExprState CurrState = State;
998 switch (State) {
999 default:
1000 State = IES_ERROR;
1001 break;
1002 case IES_PLUS:
1003 case IES_MINUS:
1004 case IES_NOT:
1005 case IES_OR:
1006 case IES_XOR:
1007 case IES_AND:
1008 case IES_EQ:
1009 case IES_NE:
1010 case IES_LT:
1011 case IES_LE:
1012 case IES_GT:
1013 case IES_GE:
1014 case IES_LSHIFT:
1015 case IES_RSHIFT:
1016 case IES_MULTIPLY:
1017 case IES_DIVIDE:
1018 case IES_MOD:
1019 case IES_LPAREN:
1020 case IES_INIT:
1021 case IES_LBRAC:
1022 State = IES_LPAREN;
1023 IC.pushOperator(IC_LPAREN);
1024 break;
1025 }
1026 PrevState = CurrState;
1027 }
1028 void onRParen() {
1029 PrevState = State;
1030 switch (State) {
1031 default:
1032 State = IES_ERROR;
1033 break;
1034 case IES_INTEGER:
1035 case IES_OFFSET:
1036 case IES_REGISTER:
1037 case IES_RBRAC:
1038 case IES_RPAREN:
1039 State = IES_RPAREN;
1040 IC.pushOperator(IC_RPAREN);
1041 break;
1042 }
1043 }
1044 bool onOffset(const MCExpr *Val, SMLoc OffsetLoc, StringRef ID,
1045 const InlineAsmIdentifierInfo &IDInfo,
1046 bool ParsingMSInlineAsm, StringRef &ErrMsg) {
1047 PrevState = State;
1048 switch (State) {
1049 default:
1050 ErrMsg = "unexpected offset operator expression";
1051 return true;
1052 case IES_PLUS:
1053 case IES_INIT:
1054 case IES_LBRAC:
1055 if (setSymRef(Val, ID, ErrMsg))
1056 return true;
1057 OffsetOperator = true;
1058 OffsetOperatorLoc = OffsetLoc;
1059 State = IES_OFFSET;
1060 // As we cannot yet resolve the actual value (offset), we retain
1061 // the requested semantics by pushing a '0' to the operands stack
1062 IC.pushOperand(IC_IMM);
1063 if (ParsingMSInlineAsm) {
1064 Info = IDInfo;
1065 }
1066 break;
1067 }
1068 return false;
1069 }
1070 void onCast(AsmTypeInfo Info) {
1071 PrevState = State;
1072 switch (State) {
1073 default:
1074 State = IES_ERROR;
1075 break;
1076 case IES_LPAREN:
1077 setTypeInfo(Info);
1078 State = IES_CAST;
1079 break;
1080 }
1081 }
1082 void setTypeInfo(AsmTypeInfo Type) { CurType = Type; }
1083 };
1084
1085 bool Error(SMLoc L, const Twine &Msg, SMRange Range = std::nullopt,
1086 bool MatchingInlineAsm = false) {
1087 MCAsmParser &Parser = getParser();
1088 if (MatchingInlineAsm) {
1089 if (!getLexer().isAtStartOfStatement())
1090 Parser.eatToEndOfStatement();
1091 return false;
1092 }
1093 return Parser.Error(L, Msg, Range);
1094 }
1095
1096 bool MatchRegisterByName(MCRegister &RegNo, StringRef RegName, SMLoc StartLoc,
1097 SMLoc EndLoc);
1098 bool ParseRegister(MCRegister &RegNo, SMLoc &StartLoc, SMLoc &EndLoc,
1099 bool RestoreOnFailure);
1100
1101 std::unique_ptr<X86Operand> DefaultMemSIOperand(SMLoc Loc);
1102 std::unique_ptr<X86Operand> DefaultMemDIOperand(SMLoc Loc);
1103 bool IsSIReg(unsigned Reg);
1104 unsigned GetSIDIForRegClass(unsigned RegClassID, unsigned Reg, bool IsSIReg);
1105 void
1106 AddDefaultSrcDestOperands(OperandVector &Operands,
1107 std::unique_ptr<llvm::MCParsedAsmOperand> &&Src,
1108 std::unique_ptr<llvm::MCParsedAsmOperand> &&Dst);
1109 bool VerifyAndAdjustOperands(OperandVector &OrigOperands,
1110 OperandVector &FinalOperands);
1111 bool parseOperand(OperandVector &Operands, StringRef Name);
1112 bool parseATTOperand(OperandVector &Operands);
1113 bool parseIntelOperand(OperandVector &Operands, StringRef Name);
1114 bool ParseIntelOffsetOperator(const MCExpr *&Val, StringRef &ID,
1116 bool ParseIntelDotOperator(IntelExprStateMachine &SM, SMLoc &End);
1117 unsigned IdentifyIntelInlineAsmOperator(StringRef Name);
1118 unsigned ParseIntelInlineAsmOperator(unsigned OpKind);
1119 unsigned IdentifyMasmOperator(StringRef Name);
1120 bool ParseMasmOperator(unsigned OpKind, int64_t &Val);
1121 bool ParseRoundingModeOp(SMLoc Start, OperandVector &Operands);
1122 bool ParseIntelNamedOperator(StringRef Name, IntelExprStateMachine &SM,
1123 bool &ParseError, SMLoc &End);
1124 bool ParseMasmNamedOperator(StringRef Name, IntelExprStateMachine &SM,
1125 bool &ParseError, SMLoc &End);
1126 void RewriteIntelExpression(IntelExprStateMachine &SM, SMLoc Start,
1127 SMLoc End);
1128 bool ParseIntelExpression(IntelExprStateMachine &SM, SMLoc &End);
1129 bool ParseIntelInlineAsmIdentifier(const MCExpr *&Val, StringRef &Identifier,
1131 bool IsUnevaluatedOperand, SMLoc &End,
1132 bool IsParsingOffsetOperator = false);
1133 void tryParseOperandIdx(AsmToken::TokenKind PrevTK,
1134 IntelExprStateMachine &SM);
1135
1136 bool ParseMemOperand(unsigned SegReg, const MCExpr *Disp, SMLoc StartLoc,
1137 SMLoc EndLoc, OperandVector &Operands);
1138
1139 X86::CondCode ParseConditionCode(StringRef CCode);
1140
1141 bool ParseIntelMemoryOperandSize(unsigned &Size);
1142 bool CreateMemForMSInlineAsm(unsigned SegReg, const MCExpr *Disp,
1143 unsigned BaseReg, unsigned IndexReg,
1144 unsigned Scale, SMLoc Start, SMLoc End,
1145 unsigned Size, StringRef Identifier,
1146 const InlineAsmIdentifierInfo &Info,
1148
1149 bool parseDirectiveArch();
1150 bool parseDirectiveNops(SMLoc L);
1151 bool parseDirectiveEven(SMLoc L);
1152 bool ParseDirectiveCode(StringRef IDVal, SMLoc L);
1153
1154 /// CodeView FPO data directives.
1155 bool parseDirectiveFPOProc(SMLoc L);
1156 bool parseDirectiveFPOSetFrame(SMLoc L);
1157 bool parseDirectiveFPOPushReg(SMLoc L);
1158 bool parseDirectiveFPOStackAlloc(SMLoc L);
1159 bool parseDirectiveFPOStackAlign(SMLoc L);
1160 bool parseDirectiveFPOEndPrologue(SMLoc L);
1161 bool parseDirectiveFPOEndProc(SMLoc L);
1162
1163 /// SEH directives.
1164 bool parseSEHRegisterNumber(unsigned RegClassID, MCRegister &RegNo);
1165 bool parseDirectiveSEHPushReg(SMLoc);
1166 bool parseDirectiveSEHSetFrame(SMLoc);
1167 bool parseDirectiveSEHSaveReg(SMLoc);
1168 bool parseDirectiveSEHSaveXMM(SMLoc);
1169 bool parseDirectiveSEHPushFrame(SMLoc);
1170
1171 unsigned checkTargetMatchPredicate(MCInst &Inst) override;
1172
1173 bool validateInstruction(MCInst &Inst, const OperandVector &Ops);
1174 bool processInstruction(MCInst &Inst, const OperandVector &Ops);
1175
1176 // Load Value Injection (LVI) Mitigations for machine code
1177 void emitWarningForSpecialLVIInstruction(SMLoc Loc);
1178 void applyLVICFIMitigation(MCInst &Inst, MCStreamer &Out);
1179 void applyLVILoadHardeningMitigation(MCInst &Inst, MCStreamer &Out);
1180
1181 /// Wrapper around MCStreamer::emitInstruction(). Possibly adds
1182 /// instrumentation around Inst.
1183 void emitInstruction(MCInst &Inst, OperandVector &Operands, MCStreamer &Out);
1184
1185 bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
1188 bool MatchingInlineAsm) override;
1189
1190 void MatchFPUWaitAlias(SMLoc IDLoc, X86Operand &Op, OperandVector &Operands,
1191 MCStreamer &Out, bool MatchingInlineAsm);
1192
1193 bool ErrorMissingFeature(SMLoc IDLoc, const FeatureBitset &MissingFeatures,
1194 bool MatchingInlineAsm);
1195
1196 bool MatchAndEmitATTInstruction(SMLoc IDLoc, unsigned &Opcode,
1199 bool MatchingInlineAsm);
1200
1201 bool MatchAndEmitIntelInstruction(SMLoc IDLoc, unsigned &Opcode,
1204 bool MatchingInlineAsm);
1205
1206 bool OmitRegisterFromClobberLists(unsigned RegNo) override;
1207
1208 /// Parses AVX512 specific operand primitives: masked registers ({%k<NUM>}, {z})
1209 /// and memory broadcasting ({1to<NUM>}) primitives, updating Operands vector if required.
1210 /// return false if no parsing errors occurred, true otherwise.
1211 bool HandleAVX512Operand(OperandVector &Operands);
1212
1213 bool ParseZ(std::unique_ptr<X86Operand> &Z, const SMLoc &StartLoc);
1214
1215 bool is64BitMode() const {
1216 // FIXME: Can tablegen auto-generate this?
1217 return getSTI().hasFeature(X86::Is64Bit);
1218 }
1219 bool is32BitMode() const {
1220 // FIXME: Can tablegen auto-generate this?
1221 return getSTI().hasFeature(X86::Is32Bit);
1222 }
1223 bool is16BitMode() const {
1224 // FIXME: Can tablegen auto-generate this?
1225 return getSTI().hasFeature(X86::Is16Bit);
1226 }
1227 void SwitchMode(unsigned mode) {
1228 MCSubtargetInfo &STI = copySTI();
1229 FeatureBitset AllModes({X86::Is64Bit, X86::Is32Bit, X86::Is16Bit});
1230 FeatureBitset OldMode = STI.getFeatureBits() & AllModes;
1231 FeatureBitset FB = ComputeAvailableFeatures(
1232 STI.ToggleFeature(OldMode.flip(mode)));
1234
1235 assert(FeatureBitset({mode}) == (STI.getFeatureBits() & AllModes));
1236 }
1237
1238 unsigned getPointerWidth() {
1239 if (is16BitMode()) return 16;
1240 if (is32BitMode()) return 32;
1241 if (is64BitMode()) return 64;
1242 llvm_unreachable("invalid mode");
1243 }
1244
1245 bool isParsingIntelSyntax() {
1246 return getParser().getAssemblerDialect();
1247 }
1248
1249 /// @name Auto-generated Matcher Functions
1250 /// {
1251
1252#define GET_ASSEMBLER_HEADER
1253#include "X86GenAsmMatcher.inc"
1254
1255 /// }
1256
1257public:
1258 enum X86MatchResultTy {
1259 Match_Unsupported = FIRST_TARGET_MATCH_RESULT_TY,
1260#define GET_OPERAND_DIAGNOSTIC_TYPES
1261#include "X86GenAsmMatcher.inc"
1262 };
1263
1264 X86AsmParser(const MCSubtargetInfo &sti, MCAsmParser &Parser,
1265 const MCInstrInfo &mii, const MCTargetOptions &Options)
1266 : MCTargetAsmParser(Options, sti, mii), InstInfo(nullptr),
1267 Code16GCC(false) {
1268
1269 Parser.addAliasForDirective(".word", ".2byte");
1270
1271 // Initialize the set of available features.
1272 setAvailableFeatures(ComputeAvailableFeatures(getSTI().getFeatureBits()));
1273 }
1274
1275 bool parseRegister(MCRegister &RegNo, SMLoc &StartLoc,
1276 SMLoc &EndLoc) override;
1278 SMLoc &EndLoc) override;
1279
1280 bool parsePrimaryExpr(const MCExpr *&Res, SMLoc &EndLoc) override;
1281
1283 SMLoc NameLoc, OperandVector &Operands) override;
1284
1285 bool ParseDirective(AsmToken DirectiveID) override;
1286};
1287} // end anonymous namespace
1288
1289#define GET_REGISTER_MATCHER
1290#define GET_SUBTARGET_FEATURE_NAME
1291#include "X86GenAsmMatcher.inc"
1292
1293static bool CheckBaseRegAndIndexRegAndScale(unsigned BaseReg, unsigned IndexReg,
1294 unsigned Scale, bool Is64BitMode,
1295 StringRef &ErrMsg) {
1296 // If we have both a base register and an index register make sure they are
1297 // both 64-bit or 32-bit registers.
1298 // To support VSIB, IndexReg can be 128-bit or 256-bit registers.
1299
1300 if (BaseReg != 0 &&
1301 !(BaseReg == X86::RIP || BaseReg == X86::EIP ||
1302 X86MCRegisterClasses[X86::GR16RegClassID].contains(BaseReg) ||
1303 X86MCRegisterClasses[X86::GR32RegClassID].contains(BaseReg) ||
1304 X86MCRegisterClasses[X86::GR64RegClassID].contains(BaseReg))) {
1305 ErrMsg = "invalid base+index expression";
1306 return true;
1307 }
1308
1309 if (IndexReg != 0 &&
1310 !(IndexReg == X86::EIZ || IndexReg == X86::RIZ ||
1311 X86MCRegisterClasses[X86::GR16RegClassID].contains(IndexReg) ||
1312 X86MCRegisterClasses[X86::GR32RegClassID].contains(IndexReg) ||
1313 X86MCRegisterClasses[X86::GR64RegClassID].contains(IndexReg) ||
1314 X86MCRegisterClasses[X86::VR128XRegClassID].contains(IndexReg) ||
1315 X86MCRegisterClasses[X86::VR256XRegClassID].contains(IndexReg) ||
1316 X86MCRegisterClasses[X86::VR512RegClassID].contains(IndexReg))) {
1317 ErrMsg = "invalid base+index expression";
1318 return true;
1319 }
1320
1321 if (((BaseReg == X86::RIP || BaseReg == X86::EIP) && IndexReg != 0) ||
1322 IndexReg == X86::EIP || IndexReg == X86::RIP ||
1323 IndexReg == X86::ESP || IndexReg == X86::RSP) {
1324 ErrMsg = "invalid base+index expression";
1325 return true;
1326 }
1327
1328 // Check for use of invalid 16-bit registers. Only BX/BP/SI/DI are allowed,
1329 // and then only in non-64-bit modes.
1330 if (X86MCRegisterClasses[X86::GR16RegClassID].contains(BaseReg) &&
1331 (Is64BitMode || (BaseReg != X86::BX && BaseReg != X86::BP &&
1332 BaseReg != X86::SI && BaseReg != X86::DI))) {
1333 ErrMsg = "invalid 16-bit base register";
1334 return true;
1335 }
1336
1337 if (BaseReg == 0 &&
1338 X86MCRegisterClasses[X86::GR16RegClassID].contains(IndexReg)) {
1339 ErrMsg = "16-bit memory operand may not include only index register";
1340 return true;
1341 }
1342
1343 if (BaseReg != 0 && IndexReg != 0) {
1344 if (X86MCRegisterClasses[X86::GR64RegClassID].contains(BaseReg) &&
1345 (X86MCRegisterClasses[X86::GR16RegClassID].contains(IndexReg) ||
1346 X86MCRegisterClasses[X86::GR32RegClassID].contains(IndexReg) ||
1347 IndexReg == X86::EIZ)) {
1348 ErrMsg = "base register is 64-bit, but index register is not";
1349 return true;
1350 }
1351 if (X86MCRegisterClasses[X86::GR32RegClassID].contains(BaseReg) &&
1352 (X86MCRegisterClasses[X86::GR16RegClassID].contains(IndexReg) ||
1353 X86MCRegisterClasses[X86::GR64RegClassID].contains(IndexReg) ||
1354 IndexReg == X86::RIZ)) {
1355 ErrMsg = "base register is 32-bit, but index register is not";
1356 return true;
1357 }
1358 if (X86MCRegisterClasses[X86::GR16RegClassID].contains(BaseReg)) {
1359 if (X86MCRegisterClasses[X86::GR32RegClassID].contains(IndexReg) ||
1360 X86MCRegisterClasses[X86::GR64RegClassID].contains(IndexReg)) {
1361 ErrMsg = "base register is 16-bit, but index register is not";
1362 return true;
1363 }
1364 if ((BaseReg != X86::BX && BaseReg != X86::BP) ||
1365 (IndexReg != X86::SI && IndexReg != X86::DI)) {
1366 ErrMsg = "invalid 16-bit base/index register combination";
1367 return true;
1368 }
1369 }
1370 }
1371
1372 // RIP/EIP-relative addressing is only supported in 64-bit mode.
1373 if (!Is64BitMode && BaseReg != 0 &&
1374 (BaseReg == X86::RIP || BaseReg == X86::EIP)) {
1375 ErrMsg = "IP-relative addressing requires 64-bit mode";
1376 return true;
1377 }
1378
1379 return checkScale(Scale, ErrMsg);
1380}
1381
1382bool X86AsmParser::MatchRegisterByName(MCRegister &RegNo, StringRef RegName,
1383 SMLoc StartLoc, SMLoc EndLoc) {
1384 // If we encounter a %, ignore it. This code handles registers with and
1385 // without the prefix, unprefixed registers can occur in cfi directives.
1386 RegName.consume_front("%");
1387
1388 RegNo = MatchRegisterName(RegName);
1389
1390 // If the match failed, try the register name as lowercase.
1391 if (RegNo == 0)
1392 RegNo = MatchRegisterName(RegName.lower());
1393
1394 // The "flags" and "mxcsr" registers cannot be referenced directly.
1395 // Treat it as an identifier instead.
1396 if (isParsingMSInlineAsm() && isParsingIntelSyntax() &&
1397 (RegNo == X86::EFLAGS || RegNo == X86::MXCSR))
1398 RegNo = 0;
1399
1400 if (!is64BitMode()) {
1401 // FIXME: This should be done using Requires<Not64BitMode> and
1402 // Requires<In64BitMode> so "eiz" usage in 64-bit instructions can be also
1403 // checked.
1404 if (RegNo == X86::RIZ || RegNo == X86::RIP ||
1405 X86MCRegisterClasses[X86::GR64RegClassID].contains(RegNo) ||
1408 return Error(StartLoc,
1409 "register %" + RegName + " is only available in 64-bit mode",
1410 SMRange(StartLoc, EndLoc));
1411 }
1412 }
1413
1414 // If this is "db[0-15]", match it as an alias
1415 // for dr[0-15].
1416 if (RegNo == 0 && RegName.startswith("db")) {
1417 if (RegName.size() == 3) {
1418 switch (RegName[2]) {
1419 case '0':
1420 RegNo = X86::DR0;
1421 break;
1422 case '1':
1423 RegNo = X86::DR1;
1424 break;
1425 case '2':
1426 RegNo = X86::DR2;
1427 break;
1428 case '3':
1429 RegNo = X86::DR3;
1430 break;
1431 case '4':
1432 RegNo = X86::DR4;
1433 break;
1434 case '5':
1435 RegNo = X86::DR5;
1436 break;
1437 case '6':
1438 RegNo = X86::DR6;
1439 break;
1440 case '7':
1441 RegNo = X86::DR7;
1442 break;
1443 case '8':
1444 RegNo = X86::DR8;
1445 break;
1446 case '9':
1447 RegNo = X86::DR9;
1448 break;
1449 }
1450 } else if (RegName.size() == 4 && RegName[2] == '1') {
1451 switch (RegName[3]) {
1452 case '0':
1453 RegNo = X86::DR10;
1454 break;
1455 case '1':
1456 RegNo = X86::DR11;
1457 break;
1458 case '2':
1459 RegNo = X86::DR12;
1460 break;
1461 case '3':
1462 RegNo = X86::DR13;
1463 break;
1464 case '4':
1465 RegNo = X86::DR14;
1466 break;
1467 case '5':
1468 RegNo = X86::DR15;
1469 break;
1470 }
1471 }
1472 }
1473
1474 if (RegNo == 0) {
1475 if (isParsingIntelSyntax())
1476 return true;
1477 return Error(StartLoc, "invalid register name", SMRange(StartLoc, EndLoc));
1478 }
1479 return false;
1480}
1481
1482bool X86AsmParser::ParseRegister(MCRegister &RegNo, SMLoc &StartLoc,
1483 SMLoc &EndLoc, bool RestoreOnFailure) {
1484 MCAsmParser &Parser = getParser();
1485 MCAsmLexer &Lexer = getLexer();
1486 RegNo = 0;
1487
1489 auto OnFailure = [RestoreOnFailure, &Lexer, &Tokens]() {
1490 if (RestoreOnFailure) {
1491 while (!Tokens.empty()) {
1492 Lexer.UnLex(Tokens.pop_back_val());
1493 }
1494 }
1495 };
1496
1497 const AsmToken &PercentTok = Parser.getTok();
1498 StartLoc = PercentTok.getLoc();
1499
1500 // If we encounter a %, ignore it. This code handles registers with and
1501 // without the prefix, unprefixed registers can occur in cfi directives.
1502 if (!isParsingIntelSyntax() && PercentTok.is(AsmToken::Percent)) {
1503 Tokens.push_back(PercentTok);
1504 Parser.Lex(); // Eat percent token.
1505 }
1506
1507 const AsmToken &Tok = Parser.getTok();
1508 EndLoc = Tok.getEndLoc();
1509
1510 if (Tok.isNot(AsmToken::Identifier)) {
1511 OnFailure();
1512 if (isParsingIntelSyntax()) return true;
1513 return Error(StartLoc, "invalid register name",
1514 SMRange(StartLoc, EndLoc));
1515 }
1516
1517 if (MatchRegisterByName(RegNo, Tok.getString(), StartLoc, EndLoc)) {
1518 OnFailure();
1519 return true;
1520 }
1521
1522 // Parse "%st" as "%st(0)" and "%st(1)", which is multiple tokens.
1523 if (RegNo == X86::ST0) {
1524 Tokens.push_back(Tok);
1525 Parser.Lex(); // Eat 'st'
1526
1527 // Check to see if we have '(4)' after %st.
1528 if (Lexer.isNot(AsmToken::LParen))
1529 return false;
1530 // Lex the paren.
1531 Tokens.push_back(Parser.getTok());
1532 Parser.Lex();
1533
1534 const AsmToken &IntTok = Parser.getTok();
1535 if (IntTok.isNot(AsmToken::Integer)) {
1536 OnFailure();
1537 return Error(IntTok.getLoc(), "expected stack index");
1538 }
1539 switch (IntTok.getIntVal()) {
1540 case 0: RegNo = X86::ST0; break;
1541 case 1: RegNo = X86::ST1; break;
1542 case 2: RegNo = X86::ST2; break;
1543 case 3: RegNo = X86::ST3; break;
1544 case 4: RegNo = X86::ST4; break;
1545 case 5: RegNo = X86::ST5; break;
1546 case 6: RegNo = X86::ST6; break;
1547 case 7: RegNo = X86::ST7; break;
1548 default:
1549 OnFailure();
1550 return Error(IntTok.getLoc(), "invalid stack index");
1551 }
1552
1553 // Lex IntTok
1554 Tokens.push_back(IntTok);
1555 Parser.Lex();
1556 if (Lexer.isNot(AsmToken::RParen)) {
1557 OnFailure();
1558 return Error(Parser.getTok().getLoc(), "expected ')'");
1559 }
1560
1561 EndLoc = Parser.getTok().getEndLoc();
1562 Parser.Lex(); // Eat ')'
1563 return false;
1564 }
1565
1566 EndLoc = Parser.getTok().getEndLoc();
1567
1568 if (RegNo == 0) {
1569 OnFailure();
1570 if (isParsingIntelSyntax()) return true;
1571 return Error(StartLoc, "invalid register name",
1572 SMRange(StartLoc, EndLoc));
1573 }
1574
1575 Parser.Lex(); // Eat identifier token.
1576 return false;
1577}
1578
1579bool X86AsmParser::parseRegister(MCRegister &RegNo, SMLoc &StartLoc,
1580 SMLoc &EndLoc) {
1581 return ParseRegister(RegNo, StartLoc, EndLoc, /*RestoreOnFailure=*/false);
1582}
1583
1584OperandMatchResultTy X86AsmParser::tryParseRegister(MCRegister &RegNo,
1585 SMLoc &StartLoc,
1586 SMLoc &EndLoc) {
1587 bool Result =
1588 ParseRegister(RegNo, StartLoc, EndLoc, /*RestoreOnFailure=*/true);
1589 bool PendingErrors = getParser().hasPendingError();
1590 getParser().clearPendingErrors();
1591 if (PendingErrors)
1593 if (Result)
1594 return MatchOperand_NoMatch;
1595 return MatchOperand_Success;
1596}
1597
1598std::unique_ptr<X86Operand> X86AsmParser::DefaultMemSIOperand(SMLoc Loc) {
1599 bool Parse32 = is32BitMode() || Code16GCC;
1600 unsigned Basereg = is64BitMode() ? X86::RSI : (Parse32 ? X86::ESI : X86::SI);
1601 const MCExpr *Disp = MCConstantExpr::create(0, getContext());
1602 return X86Operand::CreateMem(getPointerWidth(), /*SegReg=*/0, Disp,
1603 /*BaseReg=*/Basereg, /*IndexReg=*/0, /*Scale=*/1,
1604 Loc, Loc, 0);
1605}
1606
1607std::unique_ptr<X86Operand> X86AsmParser::DefaultMemDIOperand(SMLoc Loc) {
1608 bool Parse32 = is32BitMode() || Code16GCC;
1609 unsigned Basereg = is64BitMode() ? X86::RDI : (Parse32 ? X86::EDI : X86::DI);
1610 const MCExpr *Disp = MCConstantExpr::create(0, getContext());
1611 return X86Operand::CreateMem(getPointerWidth(), /*SegReg=*/0, Disp,
1612 /*BaseReg=*/Basereg, /*IndexReg=*/0, /*Scale=*/1,
1613 Loc, Loc, 0);
1614}
1615
1616bool X86AsmParser::IsSIReg(unsigned Reg) {
1617 switch (Reg) {
1618 default: llvm_unreachable("Only (R|E)SI and (R|E)DI are expected!");
1619 case X86::RSI:
1620 case X86::ESI:
1621 case X86::SI:
1622 return true;
1623 case X86::RDI:
1624 case X86::EDI:
1625 case X86::DI:
1626 return false;
1627 }
1628}
1629
1630unsigned X86AsmParser::GetSIDIForRegClass(unsigned RegClassID, unsigned Reg,
1631 bool IsSIReg) {
1632 switch (RegClassID) {
1633 default: llvm_unreachable("Unexpected register class");
1634 case X86::GR64RegClassID:
1635 return IsSIReg ? X86::RSI : X86::RDI;
1636 case X86::GR32RegClassID:
1637 return IsSIReg ? X86::ESI : X86::EDI;
1638 case X86::GR16RegClassID:
1639 return IsSIReg ? X86::SI : X86::DI;
1640 }
1641}
1642
1643void X86AsmParser::AddDefaultSrcDestOperands(
1644 OperandVector& Operands, std::unique_ptr<llvm::MCParsedAsmOperand> &&Src,
1645 std::unique_ptr<llvm::MCParsedAsmOperand> &&Dst) {
1646 if (isParsingIntelSyntax()) {
1647 Operands.push_back(std::move(Dst));
1648 Operands.push_back(std::move(Src));
1649 }
1650 else {
1651 Operands.push_back(std::move(Src));
1652 Operands.push_back(std::move(Dst));
1653 }
1654}
1655
1656bool X86AsmParser::VerifyAndAdjustOperands(OperandVector &OrigOperands,
1657 OperandVector &FinalOperands) {
1658
1659 if (OrigOperands.size() > 1) {
1660 // Check if sizes match, OrigOperands also contains the instruction name
1661 assert(OrigOperands.size() == FinalOperands.size() + 1 &&
1662 "Operand size mismatch");
1663
1665 // Verify types match
1666 int RegClassID = -1;
1667 for (unsigned int i = 0; i < FinalOperands.size(); ++i) {
1668 X86Operand &OrigOp = static_cast<X86Operand &>(*OrigOperands[i + 1]);
1669 X86Operand &FinalOp = static_cast<X86Operand &>(*FinalOperands[i]);
1670
1671 if (FinalOp.isReg() &&
1672 (!OrigOp.isReg() || FinalOp.getReg() != OrigOp.getReg()))
1673 // Return false and let a normal complaint about bogus operands happen
1674 return false;
1675
1676 if (FinalOp.isMem()) {
1677
1678 if (!OrigOp.isMem())
1679 // Return false and let a normal complaint about bogus operands happen
1680 return false;
1681
1682 unsigned OrigReg = OrigOp.Mem.BaseReg;
1683 unsigned FinalReg = FinalOp.Mem.BaseReg;
1684
1685 // If we've already encounterd a register class, make sure all register
1686 // bases are of the same register class
1687 if (RegClassID != -1 &&
1688 !X86MCRegisterClasses[RegClassID].contains(OrigReg)) {
1689 return Error(OrigOp.getStartLoc(),
1690 "mismatching source and destination index registers");
1691 }
1692
1693 if (X86MCRegisterClasses[X86::GR64RegClassID].contains(OrigReg))
1694 RegClassID = X86::GR64RegClassID;
1695 else if (X86MCRegisterClasses[X86::GR32RegClassID].contains(OrigReg))
1696 RegClassID = X86::GR32RegClassID;
1697 else if (X86MCRegisterClasses[X86::GR16RegClassID].contains(OrigReg))
1698 RegClassID = X86::GR16RegClassID;
1699 else
1700 // Unexpected register class type
1701 // Return false and let a normal complaint about bogus operands happen
1702 return false;
1703
1704 bool IsSI = IsSIReg(FinalReg);
1705 FinalReg = GetSIDIForRegClass(RegClassID, FinalReg, IsSI);
1706
1707 if (FinalReg != OrigReg) {
1708 std::string RegName = IsSI ? "ES:(R|E)SI" : "ES:(R|E)DI";
1709 Warnings.push_back(std::make_pair(
1710 OrigOp.getStartLoc(),
1711 "memory operand is only for determining the size, " + RegName +
1712 " will be used for the location"));
1713 }
1714
1715 FinalOp.Mem.Size = OrigOp.Mem.Size;
1716 FinalOp.Mem.SegReg = OrigOp.Mem.SegReg;
1717 FinalOp.Mem.BaseReg = FinalReg;
1718 }
1719 }
1720
1721 // Produce warnings only if all the operands passed the adjustment - prevent
1722 // legal cases like "movsd (%rax), %xmm0" mistakenly produce warnings
1723 for (auto &WarningMsg : Warnings) {
1724 Warning(WarningMsg.first, WarningMsg.second);
1725 }
1726
1727 // Remove old operands
1728 for (unsigned int i = 0; i < FinalOperands.size(); ++i)
1729 OrigOperands.pop_back();
1730 }
1731 // OrigOperands.append(FinalOperands.begin(), FinalOperands.end());
1732 for (unsigned int i = 0; i < FinalOperands.size(); ++i)
1733 OrigOperands.push_back(std::move(FinalOperands[i]));
1734
1735 return false;
1736}
1737
1738bool X86AsmParser::parseOperand(OperandVector &Operands, StringRef Name) {
1739 if (isParsingIntelSyntax())
1740 return parseIntelOperand(Operands, Name);
1741
1742 return parseATTOperand(Operands);
1743}
1744
1745bool X86AsmParser::CreateMemForMSInlineAsm(
1746 unsigned SegReg, const MCExpr *Disp, unsigned BaseReg, unsigned IndexReg,
1747 unsigned Scale, SMLoc Start, SMLoc End, unsigned Size, StringRef Identifier,
1749 // If we found a decl other than a VarDecl, then assume it is a FuncDecl or
1750 // some other label reference.
1752 // Create an absolute memory reference in order to match against
1753 // instructions taking a PC relative operand.
1754 Operands.push_back(X86Operand::CreateMem(getPointerWidth(), Disp, Start,
1755 End, Size, Identifier,
1756 Info.Label.Decl));
1757 return false;
1758 }
1759 // We either have a direct symbol reference, or an offset from a symbol. The
1760 // parser always puts the symbol on the LHS, so look there for size
1761 // calculation purposes.
1762 unsigned FrontendSize = 0;
1763 void *Decl = nullptr;
1764 bool IsGlobalLV = false;
1766 // Size is in terms of bits in this context.
1767 FrontendSize = Info.Var.Type * 8;
1768 Decl = Info.Var.Decl;
1769 IsGlobalLV = Info.Var.IsGlobalLV;
1770 }
1771 // It is widely common for MS InlineAsm to use a global variable and one/two
1772 // registers in a mmory expression, and though unaccessible via rip/eip.
1773 if (IsGlobalLV && (BaseReg || IndexReg)) {
1774 Operands.push_back(X86Operand::CreateMem(getPointerWidth(), Disp, Start,
1775 End, Size, Identifier, Decl, 0,
1776 BaseReg && IndexReg));
1777 return false;
1778 }
1779 // Otherwise, we set the base register to a non-zero value
1780 // if we don't know the actual value at this time. This is necessary to
1781 // get the matching correct in some cases.
1782 BaseReg = BaseReg ? BaseReg : 1;
1784 getPointerWidth(), SegReg, Disp, BaseReg, IndexReg, Scale, Start, End,
1785 Size,
1786 /*DefaultBaseReg=*/X86::RIP, Identifier, Decl, FrontendSize));
1787 return false;
1788}
1789
1790// Some binary bitwise operators have a named synonymous
1791// Query a candidate string for being such a named operator
1792// and if so - invoke the appropriate handler
1793bool X86AsmParser::ParseIntelNamedOperator(StringRef Name,
1794 IntelExprStateMachine &SM,
1795 bool &ParseError, SMLoc &End) {
1796 // A named operator should be either lower or upper case, but not a mix...
1797 // except in MASM, which uses full case-insensitivity.
1798 if (Name.compare(Name.lower()) && Name.compare(Name.upper()) &&
1799 !getParser().isParsingMasm())
1800 return false;
1801 if (Name.equals_insensitive("not")) {
1802 SM.onNot();
1803 } else if (Name.equals_insensitive("or")) {
1804 SM.onOr();
1805 } else if (Name.equals_insensitive("shl")) {
1806 SM.onLShift();
1807 } else if (Name.equals_insensitive("shr")) {
1808 SM.onRShift();
1809 } else if (Name.equals_insensitive("xor")) {
1810 SM.onXor();
1811 } else if (Name.equals_insensitive("and")) {
1812 SM.onAnd();
1813 } else if (Name.equals_insensitive("mod")) {
1814 SM.onMod();
1815 } else if (Name.equals_insensitive("offset")) {
1816 SMLoc OffsetLoc = getTok().getLoc();
1817 const MCExpr *Val = nullptr;
1818 StringRef ID;
1820 ParseError = ParseIntelOffsetOperator(Val, ID, Info, End);
1821 if (ParseError)
1822 return true;
1823 StringRef ErrMsg;
1824 ParseError =
1825 SM.onOffset(Val, OffsetLoc, ID, Info, isParsingMSInlineAsm(), ErrMsg);
1826 if (ParseError)
1827 return Error(SMLoc::getFromPointer(Name.data()), ErrMsg);
1828 } else {
1829 return false;
1830 }
1831 if (!Name.equals_insensitive("offset"))
1832 End = consumeToken();
1833 return true;
1834}
1835bool X86AsmParser::ParseMasmNamedOperator(StringRef Name,
1836 IntelExprStateMachine &SM,
1837 bool &ParseError, SMLoc &End) {
1838 if (Name.equals_insensitive("eq")) {
1839 SM.onEq();
1840 } else if (Name.equals_insensitive("ne")) {
1841 SM.onNE();
1842 } else if (Name.equals_insensitive("lt")) {
1843 SM.onLT();
1844 } else if (Name.equals_insensitive("le")) {
1845 SM.onLE();
1846 } else if (Name.equals_insensitive("gt")) {
1847 SM.onGT();
1848 } else if (Name.equals_insensitive("ge")) {
1849 SM.onGE();
1850 } else {
1851 return false;
1852 }
1853 End = consumeToken();
1854 return true;
1855}
1856
1857// Check if current intel expression append after an operand.
1858// Like: [Operand][Intel Expression]
1859void X86AsmParser::tryParseOperandIdx(AsmToken::TokenKind PrevTK,
1860 IntelExprStateMachine &SM) {
1861 if (PrevTK != AsmToken::RBrac)
1862 return;
1863
1864 SM.setAppendAfterOperand();
1865}
1866
1867bool X86AsmParser::ParseIntelExpression(IntelExprStateMachine &SM, SMLoc &End) {
1868 MCAsmParser &Parser = getParser();
1869 StringRef ErrMsg;
1870
1872
1873 if (getContext().getObjectFileInfo()->isPositionIndependent())
1874 SM.setPIC();
1875
1876 bool Done = false;
1877 while (!Done) {
1878 // Get a fresh reference on each loop iteration in case the previous
1879 // iteration moved the token storage during UnLex().
1880 const AsmToken &Tok = Parser.getTok();
1881
1882 bool UpdateLocLex = true;
1883 AsmToken::TokenKind TK = getLexer().getKind();
1884
1885 switch (TK) {
1886 default:
1887 if ((Done = SM.isValidEndState()))
1888 break;
1889 return Error(Tok.getLoc(), "unknown token in expression");
1890 case AsmToken::Error:
1891 return Error(getLexer().getErrLoc(), getLexer().getErr());
1892 break;
1894 Done = true;
1895 break;
1896 case AsmToken::Real:
1897 // DotOperator: [ebx].0
1898 UpdateLocLex = false;
1899 if (ParseIntelDotOperator(SM, End))
1900 return true;
1901 break;
1902 case AsmToken::Dot:
1903 if (!Parser.isParsingMasm()) {
1904 if ((Done = SM.isValidEndState()))
1905 break;
1906 return Error(Tok.getLoc(), "unknown token in expression");
1907 }
1908 // MASM allows spaces around the dot operator (e.g., "var . x")
1909 Lex();
1910 UpdateLocLex = false;
1911 if (ParseIntelDotOperator(SM, End))
1912 return true;
1913 break;
1914 case AsmToken::Dollar:
1915 if (!Parser.isParsingMasm()) {
1916 if ((Done = SM.isValidEndState()))
1917 break;
1918 return Error(Tok.getLoc(), "unknown token in expression");
1919 }
1920 [[fallthrough]];
1921 case AsmToken::String: {
1922 if (Parser.isParsingMasm()) {
1923 // MASM parsers handle strings in expressions as constants.
1924 SMLoc ValueLoc = Tok.getLoc();
1925 int64_t Res;
1926 const MCExpr *Val;
1927 if (Parser.parsePrimaryExpr(Val, End, nullptr))
1928 return true;
1929 UpdateLocLex = false;
1930 if (!Val->evaluateAsAbsolute(Res, getStreamer().getAssemblerPtr()))
1931 return Error(ValueLoc, "expected absolute value");
1932 if (SM.onInteger(Res, ErrMsg))
1933 return Error(ValueLoc, ErrMsg);
1934 break;
1935 }
1936 [[fallthrough]];
1937 }
1938 case AsmToken::At:
1939 case AsmToken::Identifier: {
1940 SMLoc IdentLoc = Tok.getLoc();
1942 UpdateLocLex = false;
1943 if (Parser.isParsingMasm()) {
1944 size_t DotOffset = Identifier.find_first_of('.');
1945 if (DotOffset != StringRef::npos) {
1946 consumeToken();
1947 StringRef LHS = Identifier.slice(0, DotOffset);
1948 StringRef Dot = Identifier.slice(DotOffset, DotOffset + 1);
1949 StringRef RHS = Identifier.slice(DotOffset + 1, StringRef::npos);
1950 if (!RHS.empty()) {
1951 getLexer().UnLex(AsmToken(AsmToken::Identifier, RHS));
1952 }
1953 getLexer().UnLex(AsmToken(AsmToken::Dot, Dot));
1954 if (!LHS.empty()) {
1955 getLexer().UnLex(AsmToken(AsmToken::Identifier, LHS));
1956 }
1957 break;
1958 }
1959 }
1960 // (MASM only) <TYPE> PTR operator
1961 if (Parser.isParsingMasm()) {
1962 const AsmToken &NextTok = getLexer().peekTok();
1963 if (NextTok.is(AsmToken::Identifier) &&
1964 NextTok.getIdentifier().equals_insensitive("ptr")) {
1966 if (Parser.lookUpType(Identifier, Info))
1967 return Error(Tok.getLoc(), "unknown type");
1968 SM.onCast(Info);
1969 // Eat type and PTR.
1970 consumeToken();
1971 End = consumeToken();
1972 break;
1973 }
1974 }
1975 // Register, or (MASM only) <register>.<field>
1977 if (Tok.is(AsmToken::Identifier)) {
1978 if (!ParseRegister(Reg, IdentLoc, End, /*RestoreOnFailure=*/true)) {
1979 if (SM.onRegister(Reg, ErrMsg))
1980 return Error(IdentLoc, ErrMsg);
1981 break;
1982 }
1983 if (Parser.isParsingMasm()) {
1984 const std::pair<StringRef, StringRef> IDField =
1985 Tok.getString().split('.');
1986 const StringRef ID = IDField.first, Field = IDField.second;
1987 SMLoc IDEndLoc = SMLoc::getFromPointer(ID.data() + ID.size());
1988 if (!Field.empty() &&
1989 !MatchRegisterByName(Reg, ID, IdentLoc, IDEndLoc)) {
1990 if (SM.onRegister(Reg, ErrMsg))
1991 return Error(IdentLoc, ErrMsg);
1992
1994 SMLoc FieldStartLoc = SMLoc::getFromPointer(Field.data());
1995 if (Parser.lookUpField(Field, Info))
1996 return Error(FieldStartLoc, "unknown offset");
1997 else if (SM.onPlus(ErrMsg))
1998 return Error(getTok().getLoc(), ErrMsg);
1999 else if (SM.onInteger(Info.Offset, ErrMsg))
2000 return Error(IdentLoc, ErrMsg);
2001 SM.setTypeInfo(Info.Type);
2002
2003 End = consumeToken();
2004 break;
2005 }
2006 }
2007 }
2008 // Operator synonymous ("not", "or" etc.)
2009 bool ParseError = false;
2010 if (ParseIntelNamedOperator(Identifier, SM, ParseError, End)) {
2011 if (ParseError)
2012 return true;
2013 break;
2014 }
2015 if (Parser.isParsingMasm() &&
2016 ParseMasmNamedOperator(Identifier, SM, ParseError, End)) {
2017 if (ParseError)
2018 return true;
2019 break;
2020 }
2021 // Symbol reference, when parsing assembly content
2023 AsmFieldInfo FieldInfo;
2024 const MCExpr *Val;
2025 if (isParsingMSInlineAsm() || Parser.isParsingMasm()) {
2026 // MS Dot Operator expression
2027 if (Identifier.count('.') &&
2028 (PrevTK == AsmToken::RBrac || PrevTK == AsmToken::RParen)) {
2029 if (ParseIntelDotOperator(SM, End))
2030 return true;
2031 break;
2032 }
2033 }
2034 if (isParsingMSInlineAsm()) {
2035 // MS InlineAsm operators (TYPE/LENGTH/SIZE)
2036 if (unsigned OpKind = IdentifyIntelInlineAsmOperator(Identifier)) {
2037 if (int64_t Val = ParseIntelInlineAsmOperator(OpKind)) {
2038 if (SM.onInteger(Val, ErrMsg))
2039 return Error(IdentLoc, ErrMsg);
2040 } else {
2041 return true;
2042 }
2043 break;
2044 }
2045 // MS InlineAsm identifier
2046 // Call parseIdentifier() to combine @ with the identifier behind it.
2047 if (TK == AsmToken::At && Parser.parseIdentifier(Identifier))
2048 return Error(IdentLoc, "expected identifier");
2049 if (ParseIntelInlineAsmIdentifier(Val, Identifier, Info, false, End))
2050 return true;
2051 else if (SM.onIdentifierExpr(Val, Identifier, Info, FieldInfo.Type,
2052 true, ErrMsg))
2053 return Error(IdentLoc, ErrMsg);
2054 break;
2055 }
2056 if (Parser.isParsingMasm()) {
2057 if (unsigned OpKind = IdentifyMasmOperator(Identifier)) {
2058 int64_t Val;
2059 if (ParseMasmOperator(OpKind, Val))
2060 return true;
2061 if (SM.onInteger(Val, ErrMsg))
2062 return Error(IdentLoc, ErrMsg);
2063 break;
2064 }
2065 if (!getParser().lookUpType(Identifier, FieldInfo.Type)) {
2066 // Field offset immediate; <TYPE>.<field specification>
2067 Lex(); // eat type
2068 bool EndDot = parseOptionalToken(AsmToken::Dot);
2069 while (EndDot || (getTok().is(AsmToken::Identifier) &&
2070 getTok().getString().startswith("."))) {
2071 getParser().parseIdentifier(Identifier);
2072 if (!EndDot)
2073 Identifier.consume_front(".");
2074 EndDot = Identifier.consume_back(".");
2075 if (getParser().lookUpField(FieldInfo.Type.Name, Identifier,
2076 FieldInfo)) {
2077 SMLoc IDEnd =
2079 return Error(IdentLoc, "Unable to lookup field reference!",
2080 SMRange(IdentLoc, IDEnd));
2081 }
2082 if (!EndDot)
2083 EndDot = parseOptionalToken(AsmToken::Dot);
2084 }
2085 if (SM.onInteger(FieldInfo.Offset, ErrMsg))
2086 return Error(IdentLoc, ErrMsg);
2087 break;
2088 }
2089 }
2090 if (getParser().parsePrimaryExpr(Val, End, &FieldInfo.Type)) {
2091 return Error(Tok.getLoc(), "Unexpected identifier!");
2092 } else if (SM.onIdentifierExpr(Val, Identifier, Info, FieldInfo.Type,
2093 false, ErrMsg)) {
2094 return Error(IdentLoc, ErrMsg);
2095 }
2096 break;
2097 }
2098 case AsmToken::Integer: {
2099 // Look for 'b' or 'f' following an Integer as a directional label
2100 SMLoc Loc = getTok().getLoc();
2101 int64_t IntVal = getTok().getIntVal();
2102 End = consumeToken();
2103 UpdateLocLex = false;
2104 if (getLexer().getKind() == AsmToken::Identifier) {
2105 StringRef IDVal = getTok().getString();
2106 if (IDVal == "f" || IDVal == "b") {
2107 MCSymbol *Sym =
2108 getContext().getDirectionalLocalSymbol(IntVal, IDVal == "b");
2110 const MCExpr *Val =
2111 MCSymbolRefExpr::create(Sym, Variant, getContext());
2112 if (IDVal == "b" && Sym->isUndefined())
2113 return Error(Loc, "invalid reference to undefined symbol");
2114 StringRef Identifier = Sym->getName();
2117 if (SM.onIdentifierExpr(Val, Identifier, Info, Type,
2118 isParsingMSInlineAsm(), ErrMsg))
2119 return Error(Loc, ErrMsg);
2120 End = consumeToken();
2121 } else {
2122 if (SM.onInteger(IntVal, ErrMsg))
2123 return Error(Loc, ErrMsg);
2124 }
2125 } else {
2126 if (SM.onInteger(IntVal, ErrMsg))
2127 return Error(Loc, ErrMsg);
2128 }
2129 break;
2130 }
2131 case AsmToken::Plus:
2132 if (SM.onPlus(ErrMsg))
2133 return Error(getTok().getLoc(), ErrMsg);
2134 break;
2135 case AsmToken::Minus:
2136 if (SM.onMinus(ErrMsg))
2137 return Error(getTok().getLoc(), ErrMsg);
2138 break;
2139 case AsmToken::Tilde: SM.onNot(); break;
2140 case AsmToken::Star: SM.onStar(); break;
2141 case AsmToken::Slash: SM.onDivide(); break;
2142 case AsmToken::Percent: SM.onMod(); break;
2143 case AsmToken::Pipe: SM.onOr(); break;
2144 case AsmToken::Caret: SM.onXor(); break;
2145 case AsmToken::Amp: SM.onAnd(); break;
2146 case AsmToken::LessLess:
2147 SM.onLShift(); break;
2149 SM.onRShift(); break;
2150 case AsmToken::LBrac:
2151 if (SM.onLBrac())
2152 return Error(Tok.getLoc(), "unexpected bracket encountered");
2153 tryParseOperandIdx(PrevTK, SM);
2154 break;
2155 case AsmToken::RBrac:
2156 if (SM.onRBrac(ErrMsg)) {
2157 return Error(Tok.getLoc(), ErrMsg);
2158 }
2159 break;
2160 case AsmToken::LParen: SM.onLParen(); break;
2161 case AsmToken::RParen: SM.onRParen(); break;
2162 }
2163 if (SM.hadError())
2164 return Error(Tok.getLoc(), "unknown token in expression");
2165
2166 if (!Done && UpdateLocLex)
2167 End = consumeToken();
2168
2169 PrevTK = TK;
2170 }
2171 return false;
2172}
2173
2174void X86AsmParser::RewriteIntelExpression(IntelExprStateMachine &SM,
2175 SMLoc Start, SMLoc End) {
2176 SMLoc Loc = Start;
2177 unsigned ExprLen = End.getPointer() - Start.getPointer();
2178 // Skip everything before a symbol displacement (if we have one)
2179 if (SM.getSym() && !SM.isOffsetOperator()) {
2180 StringRef SymName = SM.getSymName();
2181 if (unsigned Len = SymName.data() - Start.getPointer())
2182 InstInfo->AsmRewrites->emplace_back(AOK_Skip, Start, Len);
2183 Loc = SMLoc::getFromPointer(SymName.data() + SymName.size());
2184 ExprLen = End.getPointer() - (SymName.data() + SymName.size());
2185 // If we have only a symbol than there's no need for complex rewrite,
2186 // simply skip everything after it
2187 if (!(SM.getBaseReg() || SM.getIndexReg() || SM.getImm())) {
2188 if (ExprLen)
2189 InstInfo->AsmRewrites->emplace_back(AOK_Skip, Loc, ExprLen);
2190 return;
2191 }
2192 }
2193 // Build an Intel Expression rewrite
2194 StringRef BaseRegStr;
2195 StringRef IndexRegStr;
2196 StringRef OffsetNameStr;
2197 if (SM.getBaseReg())
2198 BaseRegStr = X86IntelInstPrinter::getRegisterName(SM.getBaseReg());
2199 if (SM.getIndexReg())
2200 IndexRegStr = X86IntelInstPrinter::getRegisterName(SM.getIndexReg());
2201 if (SM.isOffsetOperator())
2202 OffsetNameStr = SM.getSymName();
2203 // Emit it
2204 IntelExpr Expr(BaseRegStr, IndexRegStr, SM.getScale(), OffsetNameStr,
2205 SM.getImm(), SM.isMemExpr());
2206 InstInfo->AsmRewrites->emplace_back(Loc, ExprLen, Expr);
2207}
2208
2209// Inline assembly may use variable names with namespace alias qualifiers.
2210bool X86AsmParser::ParseIntelInlineAsmIdentifier(
2211 const MCExpr *&Val, StringRef &Identifier, InlineAsmIdentifierInfo &Info,
2212 bool IsUnevaluatedOperand, SMLoc &End, bool IsParsingOffsetOperator) {
2213 MCAsmParser &Parser = getParser();
2214 assert(isParsingMSInlineAsm() && "Expected to be parsing inline assembly.");
2215 Val = nullptr;
2216
2217 StringRef LineBuf(Identifier.data());
2218 SemaCallback->LookupInlineAsmIdentifier(LineBuf, Info, IsUnevaluatedOperand);
2219
2220 const AsmToken &Tok = Parser.getTok();
2221 SMLoc Loc = Tok.getLoc();
2222
2223 // Advance the token stream until the end of the current token is
2224 // after the end of what the frontend claimed.
2225 const char *EndPtr = Tok.getLoc().getPointer() + LineBuf.size();
2226 do {
2227 End = Tok.getEndLoc();
2228 getLexer().Lex();
2229 } while (End.getPointer() < EndPtr);
2230 Identifier = LineBuf;
2231
2232 // The frontend should end parsing on an assembler token boundary, unless it
2233 // failed parsing.
2234 assert((End.getPointer() == EndPtr ||
2236 "frontend claimed part of a token?");
2237
2238 // If the identifier lookup was unsuccessful, assume that we are dealing with
2239 // a label.
2241 StringRef InternalName =
2242 SemaCallback->LookupInlineAsmLabel(Identifier, getSourceManager(),
2243 Loc, false);
2244 assert(InternalName.size() && "We should have an internal name here.");
2245 // Push a rewrite for replacing the identifier name with the internal name,
2246 // unless we are parsing the operand of an offset operator
2247 if (!IsParsingOffsetOperator)
2248 InstInfo->AsmRewrites->emplace_back(AOK_Label, Loc, Identifier.size(),
2249 InternalName);
2250 else
2251 Identifier = InternalName;
2252 } else if (Info.isKind(InlineAsmIdentifierInfo::IK_EnumVal))
2253 return false;
2254 // Create the symbol reference.
2255 MCSymbol *Sym = getContext().getOrCreateSymbol(Identifier);
2257 Val = MCSymbolRefExpr::create(Sym, Variant, getParser().getContext());
2258 return false;
2259}
2260
2261//ParseRoundingModeOp - Parse AVX-512 rounding mode operand
2262bool X86AsmParser::ParseRoundingModeOp(SMLoc Start, OperandVector &Operands) {
2263 MCAsmParser &Parser = getParser();
2264 const AsmToken &Tok = Parser.getTok();
2265 // Eat "{" and mark the current place.
2266 const SMLoc consumedToken = consumeToken();
2267 if (Tok.isNot(AsmToken::Identifier))
2268 return Error(Tok.getLoc(), "Expected an identifier after {");
2269 if (Tok.getIdentifier().startswith("r")){
2270 int rndMode = StringSwitch<int>(Tok.getIdentifier())
2271 .Case("rn", X86::STATIC_ROUNDING::TO_NEAREST_INT)
2272 .Case("rd", X86::STATIC_ROUNDING::TO_NEG_INF)
2273 .Case("ru", X86::STATIC_ROUNDING::TO_POS_INF)
2274 .Case("rz", X86::STATIC_ROUNDING::TO_ZERO)
2275 .Default(-1);
2276 if (-1 == rndMode)
2277 return Error(Tok.getLoc(), "Invalid rounding mode.");
2278 Parser.Lex(); // Eat "r*" of r*-sae
2279 if (!getLexer().is(AsmToken::Minus))
2280 return Error(Tok.getLoc(), "Expected - at this point");
2281 Parser.Lex(); // Eat "-"
2282 Parser.Lex(); // Eat the sae
2283 if (!getLexer().is(AsmToken::RCurly))
2284 return Error(Tok.getLoc(), "Expected } at this point");
2285 SMLoc End = Tok.getEndLoc();
2286 Parser.Lex(); // Eat "}"
2287 const MCExpr *RndModeOp =
2288 MCConstantExpr::create(rndMode, Parser.getContext());
2289 Operands.push_back(X86Operand::CreateImm(RndModeOp, Start, End));
2290 return false;
2291 }
2292 if(Tok.getIdentifier().equals("sae")){
2293 Parser.Lex(); // Eat the sae
2294 if (!getLexer().is(AsmToken::RCurly))
2295 return Error(Tok.getLoc(), "Expected } at this point");
2296 Parser.Lex(); // Eat "}"
2297 Operands.push_back(X86Operand::CreateToken("{sae}", consumedToken));
2298 return false;
2299 }
2300 return Error(Tok.getLoc(), "unknown token in expression");
2301}
2302
2303/// Parse the '.' operator.
2304bool X86AsmParser::ParseIntelDotOperator(IntelExprStateMachine &SM,
2305 SMLoc &End) {
2306 const AsmToken &Tok = getTok();
2308
2309 // Drop the optional '.'.
2310 StringRef DotDispStr = Tok.getString();
2311 if (DotDispStr.startswith("."))
2312 DotDispStr = DotDispStr.drop_front(1);
2313 StringRef TrailingDot;
2314
2315 // .Imm gets lexed as a real.
2316 if (Tok.is(AsmToken::Real)) {
2317 APInt DotDisp;
2318 if (DotDispStr.getAsInteger(10, DotDisp))
2319 return Error(Tok.getLoc(), "Unexpected offset");
2320 Info.Offset = DotDisp.getZExtValue();
2321 } else if ((isParsingMSInlineAsm() || getParser().isParsingMasm()) &&
2322 Tok.is(AsmToken::Identifier)) {
2323 if (DotDispStr.endswith(".")) {
2324 TrailingDot = DotDispStr.substr(DotDispStr.size() - 1);
2325 DotDispStr = DotDispStr.drop_back(1);
2326 }
2327 const std::pair<StringRef, StringRef> BaseMember = DotDispStr.split('.');
2328 const StringRef Base = BaseMember.first, Member = BaseMember.second;
2329 if (getParser().lookUpField(SM.getType(), DotDispStr, Info) &&
2330 getParser().lookUpField(SM.getSymName(), DotDispStr, Info) &&
2331 getParser().lookUpField(DotDispStr, Info) &&
2332 (!SemaCallback ||
2333 SemaCallback->LookupInlineAsmField(Base, Member, Info.Offset)))
2334 return Error(Tok.getLoc(), "Unable to lookup field reference!");
2335 } else {
2336 return Error(Tok.getLoc(), "Unexpected token type!");
2337 }
2338
2339 // Eat the DotExpression and update End
2340 End = SMLoc::getFromPointer(DotDispStr.data());
2341 const char *DotExprEndLoc = DotDispStr.data() + DotDispStr.size();
2342 while (Tok.getLoc().getPointer() < DotExprEndLoc)
2343 Lex();
2344 if (!TrailingDot.empty())
2345 getLexer().UnLex(AsmToken(AsmToken::Dot, TrailingDot));
2346 SM.addImm(Info.Offset);
2347 SM.setTypeInfo(Info.Type);
2348 return false;
2349}
2350
2351/// Parse the 'offset' operator.
2352/// This operator is used to specify the location of a given operand
2353bool X86AsmParser::ParseIntelOffsetOperator(const MCExpr *&Val, StringRef &ID,
2355 SMLoc &End) {
2356 // Eat offset, mark start of identifier.
2357 SMLoc Start = Lex().getLoc();
2358 ID = getTok().getString();
2359 if (!isParsingMSInlineAsm()) {
2360 if ((getTok().isNot(AsmToken::Identifier) &&
2361 getTok().isNot(AsmToken::String)) ||
2362 getParser().parsePrimaryExpr(Val, End, nullptr))
2363 return Error(Start, "unexpected token!");
2364 } else if (ParseIntelInlineAsmIdentifier(Val, ID, Info, false, End, true)) {
2365 return Error(Start, "unable to lookup expression");
2366 } else if (Info.isKind(InlineAsmIdentifierInfo::IK_EnumVal)) {
2367 return Error(Start, "offset operator cannot yet handle constants");
2368 }
2369 return false;
2370}
2371
2372// Query a candidate string for being an Intel assembly operator
2373// Report back its kind, or IOK_INVALID if does not evaluated as a known one
2374unsigned X86AsmParser::IdentifyIntelInlineAsmOperator(StringRef Name) {
2376 .Cases("TYPE","type",IOK_TYPE)
2377 .Cases("SIZE","size",IOK_SIZE)
2378 .Cases("LENGTH","length",IOK_LENGTH)
2379 .Default(IOK_INVALID);
2380}
2381
2382/// Parse the 'LENGTH', 'TYPE' and 'SIZE' operators. The LENGTH operator
2383/// returns the number of elements in an array. It returns the value 1 for
2384/// non-array variables. The SIZE operator returns the size of a C or C++
2385/// variable. A variable's size is the product of its LENGTH and TYPE. The
2386/// TYPE operator returns the size of a C or C++ type or variable. If the
2387/// variable is an array, TYPE returns the size of a single element.
2388unsigned X86AsmParser::ParseIntelInlineAsmOperator(unsigned OpKind) {
2389 MCAsmParser &Parser = getParser();
2390 const AsmToken &Tok = Parser.getTok();
2391 Parser.Lex(); // Eat operator.
2392
2393 const MCExpr *Val = nullptr;
2395 SMLoc Start = Tok.getLoc(), End;
2397 if (ParseIntelInlineAsmIdentifier(Val, Identifier, Info,
2398 /*IsUnevaluatedOperand=*/true, End))
2399 return 0;
2400
2402 Error(Start, "unable to lookup expression");
2403 return 0;
2404 }
2405
2406 unsigned CVal = 0;
2407 switch(OpKind) {
2408 default: llvm_unreachable("Unexpected operand kind!");
2409 case IOK_LENGTH: CVal = Info.Var.Length; break;
2410 case IOK_SIZE: CVal = Info.Var.Size; break;
2411 case IOK_TYPE: CVal = Info.Var.Type; break;
2412 }
2413
2414 return CVal;
2415}
2416
2417// Query a candidate string for being an Intel assembly operator
2418// Report back its kind, or IOK_INVALID if does not evaluated as a known one
2419unsigned X86AsmParser::IdentifyMasmOperator(StringRef Name) {
2420 return StringSwitch<unsigned>(Name.lower())
2421 .Case("type", MOK_TYPE)
2422 .Cases("size", "sizeof", MOK_SIZEOF)
2423 .Cases("length", "lengthof", MOK_LENGTHOF)
2424 .Default(MOK_INVALID);
2425}
2426
2427/// Parse the 'LENGTHOF', 'SIZEOF', and 'TYPE' operators. The LENGTHOF operator
2428/// returns the number of elements in an array. It returns the value 1 for
2429/// non-array variables. The SIZEOF operator returns the size of a type or
2430/// variable in bytes. A variable's size is the product of its LENGTH and TYPE.
2431/// The TYPE operator returns the size of a variable. If the variable is an
2432/// array, TYPE returns the size of a single element.
2433bool X86AsmParser::ParseMasmOperator(unsigned OpKind, int64_t &Val) {
2434 MCAsmParser &Parser = getParser();
2435 SMLoc OpLoc = Parser.getTok().getLoc();
2436 Parser.Lex(); // Eat operator.
2437
2438 Val = 0;
2439 if (OpKind == MOK_SIZEOF || OpKind == MOK_TYPE) {
2440 // Check for SIZEOF(<type>) and TYPE(<type>).
2441 bool InParens = Parser.getTok().is(AsmToken::LParen);
2442 const AsmToken &IDTok = InParens ? getLexer().peekTok() : Parser.getTok();
2444 if (IDTok.is(AsmToken::Identifier) &&
2445 !Parser.lookUpType(IDTok.getIdentifier(), Type)) {
2446 Val = Type.Size;
2447
2448 // Eat tokens.
2449 if (InParens)
2450 parseToken(AsmToken::LParen);
2451 parseToken(AsmToken::Identifier);
2452 if (InParens)
2453 parseToken(AsmToken::RParen);
2454 }
2455 }
2456
2457 if (!Val) {
2458 IntelExprStateMachine SM;
2459 SMLoc End, Start = Parser.getTok().getLoc();
2460 if (ParseIntelExpression(SM, End))
2461 return true;
2462
2463 switch (OpKind) {
2464 default:
2465 llvm_unreachable("Unexpected operand kind!");
2466 case MOK_SIZEOF:
2467 Val = SM.getSize();
2468 break;
2469 case MOK_LENGTHOF:
2470 Val = SM.getLength();
2471 break;
2472 case MOK_TYPE:
2473 Val = SM.getElementSize();
2474 break;
2475 }
2476
2477 if (!Val)
2478 return Error(OpLoc, "expression has unknown type", SMRange(Start, End));
2479 }
2480
2481 return false;
2482}
2483
2484bool X86AsmParser::ParseIntelMemoryOperandSize(unsigned &Size) {
2485 Size = StringSwitch<unsigned>(getTok().getString())
2486 .Cases("BYTE", "byte", 8)
2487 .Cases("WORD", "word", 16)
2488 .Cases("DWORD", "dword", 32)
2489 .Cases("FLOAT", "float", 32)
2490 .Cases("LONG", "long", 32)
2491 .Cases("FWORD", "fword", 48)
2492 .Cases("DOUBLE", "double", 64)
2493 .Cases("QWORD", "qword", 64)
2494 .Cases("MMWORD","mmword", 64)
2495 .Cases("XWORD", "xword", 80)
2496 .Cases("TBYTE", "tbyte", 80)
2497 .Cases("XMMWORD", "xmmword", 128)
2498 .Cases("YMMWORD", "ymmword", 256)
2499 .Cases("ZMMWORD", "zmmword", 512)
2500 .Default(0);
2501 if (Size) {
2502 const AsmToken &Tok = Lex(); // Eat operand size (e.g., byte, word).
2503 if (!(Tok.getString().equals("PTR") || Tok.getString().equals("ptr")))
2504 return Error(Tok.getLoc(), "Expected 'PTR' or 'ptr' token!");
2505 Lex(); // Eat ptr.
2506 }
2507 return false;
2508}
2509
2510bool X86AsmParser::parseIntelOperand(OperandVector &Operands, StringRef Name) {
2511 MCAsmParser &Parser = getParser();
2512 const AsmToken &Tok = Parser.getTok();
2513 SMLoc Start, End;
2514
2515 // Parse optional Size directive.
2516 unsigned Size;
2517 if (ParseIntelMemoryOperandSize(Size))
2518 return true;
2519 bool PtrInOperand = bool(Size);
2520
2521 Start = Tok.getLoc();
2522
2523 // Rounding mode operand.
2524 if (getLexer().is(AsmToken::LCurly))
2525 return ParseRoundingModeOp(Start, Operands);
2526
2527 // Register operand.
2528 MCRegister RegNo;
2529 if (Tok.is(AsmToken::Identifier) && !parseRegister(RegNo, Start, End)) {
2530 if (RegNo == X86::RIP)
2531 return Error(Start, "rip can only be used as a base register");
2532 // A Register followed by ':' is considered a segment override
2533 if (Tok.isNot(AsmToken::Colon)) {
2534 if (PtrInOperand)
2535 return Error(Start, "expected memory operand after 'ptr', "
2536 "found register operand instead");
2537 Operands.push_back(X86Operand::CreateReg(RegNo, Start, End));
2538 return false;
2539 }
2540 // An alleged segment override. check if we have a valid segment register
2541 if (!X86MCRegisterClasses[X86::SEGMENT_REGRegClassID].contains(RegNo))
2542 return Error(Start, "invalid segment register");
2543 // Eat ':' and update Start location
2544 Start = Lex().getLoc();
2545 }
2546
2547 // Immediates and Memory
2548 IntelExprStateMachine SM;
2549 if (ParseIntelExpression(SM, End))
2550 return true;
2551
2552 if (isParsingMSInlineAsm())
2553 RewriteIntelExpression(SM, Start, Tok.getLoc());
2554
2555 int64_t Imm = SM.getImm();
2556 const MCExpr *Disp = SM.getSym();
2557 const MCExpr *ImmDisp = MCConstantExpr::create(Imm, getContext());
2558 if (Disp && Imm)
2559 Disp = MCBinaryExpr::createAdd(Disp, ImmDisp, getContext());
2560 if (!Disp)
2561 Disp = ImmDisp;
2562
2563 // RegNo != 0 specifies a valid segment register,
2564 // and we are parsing a segment override
2565 if (!SM.isMemExpr() && !RegNo) {
2566 if (isParsingMSInlineAsm() && SM.isOffsetOperator()) {
2567 const InlineAsmIdentifierInfo &Info = SM.getIdentifierInfo();
2569 // Disp includes the address of a variable; make sure this is recorded
2570 // for later handling.
2571 Operands.push_back(X86Operand::CreateImm(Disp, Start, End,
2572 SM.getSymName(), Info.Var.Decl,
2573 Info.Var.IsGlobalLV));
2574 return false;
2575 }
2576 }
2577
2578 Operands.push_back(X86Operand::CreateImm(Disp, Start, End));
2579 return false;
2580 }
2581
2582 StringRef ErrMsg;
2583 unsigned BaseReg = SM.getBaseReg();
2584 unsigned IndexReg = SM.getIndexReg();
2585 if (IndexReg && BaseReg == X86::RIP)
2586 BaseReg = 0;
2587 unsigned Scale = SM.getScale();
2588 if (!PtrInOperand)
2589 Size = SM.getElementSize() << 3;
2590
2591 if (Scale == 0 && BaseReg != X86::ESP && BaseReg != X86::RSP &&
2592 (IndexReg == X86::ESP || IndexReg == X86::RSP))
2593 std::swap(BaseReg, IndexReg);
2594
2595 // If BaseReg is a vector register and IndexReg is not, swap them unless
2596 // Scale was specified in which case it would be an error.
2597 if (Scale == 0 &&
2598 !(X86MCRegisterClasses[X86::VR128XRegClassID].contains(IndexReg) ||
2599 X86MCRegisterClasses[X86::VR256XRegClassID].contains(IndexReg) ||
2600 X86MCRegisterClasses[X86::VR512RegClassID].contains(IndexReg)) &&
2601 (X86MCRegisterClasses[X86::VR128XRegClassID].contains(BaseReg) ||
2602 X86MCRegisterClasses[X86::VR256XRegClassID].contains(BaseReg) ||
2603 X86MCRegisterClasses[X86::VR512RegClassID].contains(BaseReg)))
2604 std::swap(BaseReg, IndexReg);
2605
2606 if (Scale != 0 &&
2607 X86MCRegisterClasses[X86::GR16RegClassID].contains(IndexReg))
2608 return Error(Start, "16-bit addresses cannot have a scale");
2609
2610 // If there was no explicit scale specified, change it to 1.
2611 if (Scale == 0)
2612 Scale = 1;
2613
2614 // If this is a 16-bit addressing mode with the base and index in the wrong
2615 // order, swap them so CheckBaseRegAndIndexRegAndScale doesn't fail. It is
2616 // shared with att syntax where order matters.
2617 if ((BaseReg == X86::SI || BaseReg == X86::DI) &&
2618 (IndexReg == X86::BX || IndexReg == X86::BP))
2619 std::swap(BaseReg, IndexReg);
2620
2621 if ((BaseReg || IndexReg) &&
2622 CheckBaseRegAndIndexRegAndScale(BaseReg, IndexReg, Scale, is64BitMode(),
2623 ErrMsg))
2624 return Error(Start, ErrMsg);
2625 if (isParsingMSInlineAsm())
2626 return CreateMemForMSInlineAsm(RegNo, Disp, BaseReg, IndexReg, Scale, Start,
2627 End, Size, SM.getSymName(),
2628 SM.getIdentifierInfo(), Operands);
2629
2630 // When parsing x64 MS-style assembly, all non-absolute references to a named
2631 // variable default to RIP-relative.
2632 unsigned DefaultBaseReg = X86::NoRegister;
2633 bool MaybeDirectBranchDest = true;
2634
2635 bool IsUnconditionalBranch =
2636 Name.equals_insensitive("jmp") || Name.equals_insensitive("call");
2637 if (Parser.isParsingMasm()) {
2638 if (is64BitMode() && SM.getElementSize() > 0) {
2639 DefaultBaseReg = X86::RIP;
2640 }
2641 if (IsUnconditionalBranch) {
2642 if (PtrInOperand) {
2643 MaybeDirectBranchDest = false;
2644 if (is64BitMode())
2645 DefaultBaseReg = X86::RIP;
2646 } else if (!BaseReg && !IndexReg && Disp &&
2647 Disp->getKind() == MCExpr::SymbolRef) {
2648 if (is64BitMode()) {
2649 if (SM.getSize() == 8) {
2650 MaybeDirectBranchDest = false;
2651 DefaultBaseReg = X86::RIP;
2652 }
2653 } else {
2654 if (SM.getSize() == 4 || SM.getSize() == 2)
2655 MaybeDirectBranchDest = false;
2656 }
2657 }
2658 }
2659 } else if (IsUnconditionalBranch) {
2660 // Treat `call [offset fn_ref]` (or `jmp`) syntax as an error.
2661 if (!PtrInOperand && SM.isOffsetOperator())
2662 return Error(
2663 Start, "`OFFSET` operator cannot be used in an unconditional branch");
2664 if (PtrInOperand || SM.isBracketUsed())
2665 MaybeDirectBranchDest = false;
2666 }
2667
2668 if ((BaseReg || IndexReg || RegNo || DefaultBaseReg != X86::NoRegister))
2670 getPointerWidth(), RegNo, Disp, BaseReg, IndexReg, Scale, Start, End,
2671 Size, DefaultBaseReg, /*SymName=*/StringRef(), /*OpDecl=*/nullptr,
2672 /*FrontendSize=*/0, /*UseUpRegs=*/false, MaybeDirectBranchDest));
2673 else
2675 getPointerWidth(), Disp, Start, End, Size, /*SymName=*/StringRef(),
2676 /*OpDecl=*/nullptr, /*FrontendSize=*/0, /*UseUpRegs=*/false,
2677 MaybeDirectBranchDest));
2678 return false;
2679}
2680
2681bool X86AsmParser::parseATTOperand(OperandVector &Operands) {
2682 MCAsmParser &Parser = getParser();
2683 switch (getLexer().getKind()) {
2684 case AsmToken::Dollar: {
2685 // $42 or $ID -> immediate.
2686 SMLoc Start = Parser.getTok().getLoc(), End;
2687 Parser.Lex();
2688 const MCExpr *Val;
2689 // This is an immediate, so we should not parse a register. Do a precheck
2690 // for '%' to supercede intra-register parse errors.
2691 SMLoc L = Parser.getTok().getLoc();
2692 if (check(getLexer().is(AsmToken::Percent), L,
2693 "expected immediate expression") ||
2694 getParser().parseExpression(Val, End) ||
2695 check(isa<X86MCExpr>(Val), L, "expected immediate expression"))
2696 return true;
2697 Operands.push_back(X86Operand::CreateImm(Val, Start, End));
2698 return false;
2699 }
2700 case AsmToken::LCurly: {
2701 SMLoc Start = Parser.getTok().getLoc();
2702 return ParseRoundingModeOp(Start, Operands);
2703 }
2704 default: {
2705 // This a memory operand or a register. We have some parsing complications
2706 // as a '(' may be part of an immediate expression or the addressing mode
2707 // block. This is complicated by the fact that an assembler-level variable
2708 // may refer either to a register or an immediate expression.
2709
2710 SMLoc Loc = Parser.getTok().getLoc(), EndLoc;
2711 const MCExpr *Expr = nullptr;
2712 unsigned Reg = 0;
2713 if (getLexer().isNot(AsmToken::LParen)) {
2714 // No '(' so this is either a displacement expression or a register.
2715 if (Parser.parseExpression(Expr, EndLoc))
2716 return true;
2717 if (auto *RE = dyn_cast<X86MCExpr>(Expr)) {
2718 // Segment Register. Reset Expr and copy value to register.
2719 Expr = nullptr;
2720 Reg = RE->getRegNo();
2721
2722 // Check the register.
2723 if (Reg == X86::EIZ || Reg == X86::RIZ)
2724 return Error(
2725 Loc, "%eiz and %riz can only be used as index registers",
2726 SMRange(Loc, EndLoc));
2727 if (Reg == X86::RIP)
2728 return Error(Loc, "%rip can only be used as a base register",
2729 SMRange(Loc, EndLoc));
2730 // Return register that are not segment prefixes immediately.
2731 if (!Parser.parseOptionalToken(AsmToken::Colon)) {
2732 Operands.push_back(X86Operand::CreateReg(Reg, Loc, EndLoc));
2733 return false;
2734 }
2735 if (!X86MCRegisterClasses[X86::SEGMENT_REGRegClassID].contains(Reg))
2736 return Error(Loc, "invalid segment register");
2737 // Accept a '*' absolute memory reference after the segment. Place it
2738 // before the full memory operand.
2739 if (getLexer().is(AsmToken::Star))
2740 Operands.push_back(X86Operand::CreateToken("*", consumeToken()));
2741 }
2742 }
2743 // This is a Memory operand.
2744 return ParseMemOperand(Reg, Expr, Loc, EndLoc, Operands);
2745 }
2746 }
2747}
2748
2749// X86::COND_INVALID if not a recognized condition code or alternate mnemonic,
2750// otherwise the EFLAGS Condition Code enumerator.
2751X86::CondCode X86AsmParser::ParseConditionCode(StringRef CC) {
2753 .Case("o", X86::COND_O) // Overflow
2754 .Case("no", X86::COND_NO) // No Overflow
2755 .Cases("b", "nae", X86::COND_B) // Below/Neither Above nor Equal
2756 .Cases("ae", "nb", X86::COND_AE) // Above or Equal/Not Below
2757 .Cases("e", "z", X86::COND_E) // Equal/Zero
2758 .Cases("ne", "nz", X86::COND_NE) // Not Equal/Not Zero
2759 .Cases("be", "na", X86::COND_BE) // Below or Equal/Not Above
2760 .Cases("a", "nbe", X86::COND_A) // Above/Neither Below nor Equal
2761 .Case("s", X86::COND_S) // Sign
2762 .Case("ns", X86::COND_NS) // No Sign
2763 .Cases("p", "pe", X86::COND_P) // Parity/Parity Even
2764 .Cases("np", "po", X86::COND_NP) // No Parity/Parity Odd
2765 .Cases("l", "nge", X86::COND_L) // Less/Neither Greater nor Equal
2766 .Cases("ge", "nl", X86::COND_GE) // Greater or Equal/Not Less
2767 .Cases("le", "ng", X86::COND_LE) // Less or Equal/Not Greater
2768 .Cases("g", "nle", X86::COND_G) // Greater/Neither Less nor Equal
2770}
2771
2772// true on failure, false otherwise
2773// If no {z} mark was found - Parser doesn't advance
2774bool X86AsmParser::ParseZ(std::unique_ptr<X86Operand> &Z,
2775 const SMLoc &StartLoc) {
2776 MCAsmParser &Parser = getParser();
2777 // Assuming we are just pass the '{' mark, quering the next token
2778 // Searched for {z}, but none was found. Return false, as no parsing error was
2779 // encountered
2780 if (!(getLexer().is(AsmToken::Identifier) &&
2781 (getLexer().getTok().getIdentifier() == "z")))
2782 return false;
2783 Parser.Lex(); // Eat z
2784 // Query and eat the '}' mark
2785 if (!getLexer().is(AsmToken::RCurly))
2786 return Error(getLexer().getLoc(), "Expected } at this point");
2787 Parser.Lex(); // Eat '}'
2788 // Assign Z with the {z} mark operand
2789 Z = X86Operand::CreateToken("{z}", StartLoc);
2790 return false;
2791}
2792
2793// true on failure, false otherwise
2794bool X86AsmParser::HandleAVX512Operand(OperandVector &Operands) {
2795 MCAsmParser &Parser = getParser();
2796 if (getLexer().is(AsmToken::LCurly)) {
2797 // Eat "{" and mark the current place.
2798 const SMLoc consumedToken = consumeToken();
2799 // Distinguish {1to<NUM>} from {%k<NUM>}.
2800 if(getLexer().is(AsmToken::Integer)) {
2801 // Parse memory broadcasting ({1to<NUM>}).
2802 if (getLexer().getTok().getIntVal() != 1)
2803 return TokError("Expected 1to<NUM> at this point");
2804 StringRef Prefix = getLexer().getTok().getString();
2805 Parser.Lex(); // Eat first token of 1to8
2806 if (!getLexer().is(AsmToken::Identifier))
2807 return TokError("Expected 1to<NUM> at this point");
2808 // Recognize only reasonable suffixes.
2809 SmallVector<char, 5> BroadcastVector;
2810 StringRef BroadcastString = (Prefix + getLexer().getTok().getIdentifier())
2811 .toStringRef(BroadcastVector);
2812 if (!BroadcastString.startswith("1to"))
2813 return TokError("Expected 1to<NUM> at this point");
2814 const char *BroadcastPrimitive =
2815 StringSwitch<const char *>(BroadcastString)
2816 .Case("1to2", "{1to2}")
2817 .Case("1to4", "{1to4}")
2818 .Case("1to8", "{1to8}")
2819 .Case("1to16", "{1to16}")
2820 .Case("1to32", "{1to32}")
2821 .Default(nullptr);
2822 if (!BroadcastPrimitive)
2823 return TokError("Invalid memory broadcast primitive.");
2824 Parser.Lex(); // Eat trailing token of 1toN
2825 if (!getLexer().is(AsmToken::RCurly))
2826 return TokError("Expected } at this point");
2827 Parser.Lex(); // Eat "}"
2828 Operands.push_back(X86Operand::CreateToken(BroadcastPrimitive,
2829 consumedToken));
2830 // No AVX512 specific primitives can pass
2831 // after memory broadcasting, so return.
2832 return false;
2833 } else {
2834 // Parse either {k}{z}, {z}{k}, {k} or {z}
2835 // last one have no meaning, but GCC accepts it
2836 // Currently, we're just pass a '{' mark
2837 std::unique_ptr<X86Operand> Z;
2838 if (ParseZ(Z, consumedToken))
2839 return true;
2840 // Reaching here means that parsing of the allegadly '{z}' mark yielded
2841 // no errors.
2842 // Query for the need of further parsing for a {%k<NUM>} mark
2843 if (!Z || getLexer().is(AsmToken::LCurly)) {
2844 SMLoc StartLoc = Z ? consumeToken() : consumedToken;
2845 // Parse an op-mask register mark ({%k<NUM>}), which is now to be
2846 // expected
2847 MCRegister RegNo;
2848 SMLoc RegLoc;
2849 if (!parseRegister(RegNo, RegLoc, StartLoc) &&
2850 X86MCRegisterClasses[X86::VK1RegClassID].contains(RegNo)) {
2851 if (RegNo == X86::K0)
2852 return Error(RegLoc, "Register k0 can't be used as write mask");
2853 if (!getLexer().is(AsmToken::RCurly))
2854 return Error(getLexer().getLoc(), "Expected } at this point");
2855 Operands.push_back(X86Operand::CreateToken("{", StartLoc));
2856 Operands.push_back(
2857 X86Operand::CreateReg(RegNo, StartLoc, StartLoc));
2858 Operands.push_back(X86Operand::CreateToken("}", consumeToken()));
2859 } else
2860 return Error(getLexer().getLoc(),
2861 "Expected an op-mask register at this point");
2862 // {%k<NUM>} mark is found, inquire for {z}
2863 if (getLexer().is(AsmToken::LCurly) && !Z) {
2864 // Have we've found a parsing error, or found no (expected) {z} mark
2865 // - report an error
2866 if (ParseZ(Z, consumeToken()) || !Z)
2867 return Error(getLexer().getLoc(),
2868 "Expected a {z} mark at this point");
2869
2870 }
2871 // '{z}' on its own is meaningless, hence should be ignored.
2872 // on the contrary - have it been accompanied by a K register,
2873 // allow it.
2874 if (Z)
2875 Operands.push_back(std::move(Z));
2876 }
2877 }
2878 }
2879 return false;
2880}
2881
2882/// ParseMemOperand: 'seg : disp(basereg, indexreg, scale)'. The '%ds:' prefix
2883/// has already been parsed if present. disp may be provided as well.
2884bool X86AsmParser::ParseMemOperand(unsigned SegReg, const MCExpr *Disp,
2885 SMLoc StartLoc, SMLoc EndLoc,
2887 MCAsmParser &Parser = getParser();
2888 SMLoc Loc;
2889 // Based on the initial passed values, we may be in any of these cases, we are
2890 // in one of these cases (with current position (*)):
2891
2892 // 1. seg : * disp (base-index-scale-expr)
2893 // 2. seg : *(disp) (base-index-scale-expr)
2894 // 3. seg : *(base-index-scale-expr)
2895 // 4. disp *(base-index-scale-expr)
2896 // 5. *(disp) (base-index-scale-expr)
2897 // 6. *(base-index-scale-expr)
2898 // 7. disp *
2899 // 8. *(disp)
2900
2901 // If we do not have an displacement yet, check if we're in cases 4 or 6 by
2902 // checking if the first object after the parenthesis is a register (or an
2903 // identifier referring to a register) and parse the displacement or default
2904 // to 0 as appropriate.
2905 auto isAtMemOperand = [this]() {
2906 if (this->getLexer().isNot(AsmToken::LParen))
2907 return false;
2908 AsmToken Buf[2];
2909 StringRef Id;
2910 auto TokCount = this->getLexer().peekTokens(Buf, true);
2911 if (TokCount == 0)
2912 return false;
2913 switch (Buf[0].getKind()) {
2914 case AsmToken::Percent:
2915 case AsmToken::Comma:
2916 return true;
2917 // These lower cases are doing a peekIdentifier.
2918 case AsmToken::At:
2919 case AsmToken::Dollar:
2920 if ((TokCount > 1) &&
2921 (Buf[1].is(AsmToken::Identifier) || Buf[1].is(AsmToken::String)) &&
2922 (Buf[0].getLoc().getPointer() + 1 == Buf[1].getLoc().getPointer()))
2923 Id = StringRef(Buf[0].getLoc().getPointer(),
2924 Buf[1].getIdentifier().size() + 1);
2925 break;
2927 case AsmToken::String:
2928 Id = Buf[0].getIdentifier();
2929 break;
2930 default:
2931 return false;
2932 }
2933 // We have an ID. Check if it is bound to a register.
2934 if (!Id.empty()) {
2935 MCSymbol *Sym = this->getContext().getOrCreateSymbol(Id);
2936 if (Sym->isVariable()) {
2937 auto V = Sym->getVariableValue(/*SetUsed*/ false);
2938 return isa<X86MCExpr>(V);
2939 }
2940 }
2941 return false;
2942 };
2943
2944 if (!Disp) {
2945 // Parse immediate if we're not at a mem operand yet.
2946 if (!isAtMemOperand()) {
2947 if (Parser.parseTokenLoc(Loc) || Parser.parseExpression(Disp, EndLoc))
2948 return true;
2949 assert(!isa<X86MCExpr>(Disp) && "Expected non-register here.");
2950 } else {
2951 // Disp is implicitly zero if we haven't parsed it yet.
2952 Disp = MCConstantExpr::create(0, Parser.getContext());
2953 }
2954 }
2955
2956 // We are now either at the end of the operand or at the '(' at the start of a
2957 // base-index-scale-expr.
2958
2959 if (!parseOptionalToken(AsmToken::LParen)) {
2960 if (SegReg == 0)
2961 Operands.push_back(
2962 X86Operand::CreateMem(getPointerWidth(), Disp, StartLoc, EndLoc));
2963 else
2964 Operands.push_back(X86Operand::CreateMem(getPointerWidth(), SegReg, Disp,
2965 0, 0, 1, StartLoc, EndLoc));
2966 return false;
2967 }
2968
2969 // If we reached here, then eat the '(' and Process
2970 // the rest of the memory operand.
2971 unsigned BaseReg = 0, IndexReg = 0, Scale = 1;
2972 SMLoc BaseLoc = getLexer().getLoc();
2973 const MCExpr *E;
2974 StringRef ErrMsg;
2975
2976 // Parse BaseReg if one is provided.
2977 if (getLexer().isNot(AsmToken::Comma) && getLexer().isNot(AsmToken::RParen)) {
2978 if (Parser.parseExpression(E, EndLoc) ||
2979 check(!isa<X86MCExpr>(E), BaseLoc, "expected register here"))
2980 return true;
2981
2982 // Check the register.
2983 BaseReg = cast<X86MCExpr>(E)->getRegNo();
2984 if (BaseReg == X86::EIZ || BaseReg == X86::RIZ)
2985 return Error(BaseLoc, "eiz and riz can only be used as index registers",
2986 SMRange(BaseLoc, EndLoc));
2987 }
2988
2989 if (parseOptionalToken(AsmToken::Comma)) {
2990 // Following the comma we should have either an index register, or a scale
2991 // value. We don't support the later form, but we want to parse it
2992 // correctly.
2993 //
2994 // Even though it would be completely consistent to support syntax like
2995 // "1(%eax,,1)", the assembler doesn't. Use "eiz" or "riz" for this.
2996 if (getLexer().isNot(AsmToken::RParen)) {
2997 if (Parser.parseTokenLoc(Loc) || Parser.parseExpression(E, EndLoc))
2998 return true;
2999
3000 if (!isa<X86MCExpr>(E)) {
3001 // We've parsed an unexpected Scale Value instead of an index
3002 // register. Interpret it as an absolute.
3003 int64_t ScaleVal;
3004 if (!E->evaluateAsAbsolute(ScaleVal, getStreamer().getAssemblerPtr()))
3005 return Error(Loc, "expected absolute expression");
3006 if (ScaleVal != 1)
3007 Warning(Loc, "scale factor without index register is ignored");
3008 Scale = 1;
3009 } else { // IndexReg Found.
3010 IndexReg = cast<X86MCExpr>(E)->getRegNo();
3011
3012 if (BaseReg == X86::RIP)
3013 return Error(Loc,
3014 "%rip as base register can not have an index register");
3015 if (IndexReg == X86::RIP)
3016 return Error(Loc, "%rip is not allowed as an index register");
3017
3018 if (parseOptionalToken(AsmToken::Comma)) {
3019 // Parse the scale amount:
3020 // ::= ',' [scale-expression]
3021
3022 // A scale amount without an index is ignored.
3023 if (getLexer().isNot(AsmToken::RParen)) {
3024 int64_t ScaleVal;
3025 if (Parser.parseTokenLoc(Loc) ||
3026 Parser.parseAbsoluteExpression(ScaleVal))
3027 return Error(Loc, "expected scale expression");
3028 Scale = (unsigned)ScaleVal;
3029 // Validate the scale amount.
3030 if (X86MCRegisterClasses[X86::GR16RegClassID].contains(BaseReg) &&
3031 Scale != 1)
3032 return Error(Loc, "scale factor in 16-bit address must be 1");
3033 if (checkScale(Scale, ErrMsg))
3034 return Error(Loc, ErrMsg);
3035 }
3036 }
3037 }
3038 }
3039 }
3040
3041 // Ok, we've eaten the memory operand, verify we have a ')' and eat it too.
3042 if (parseToken(AsmToken::RParen, "unexpected token in memory operand"))
3043 return true;
3044
3045 // This is to support otherwise illegal operand (%dx) found in various
3046 // unofficial manuals examples (e.g. "out[s]?[bwl]? %al, (%dx)") and must now
3047 // be supported. Mark such DX variants separately fix only in special cases.
3048 if (BaseReg == X86::DX && IndexReg == 0 && Scale == 1 && SegReg == 0 &&
3049 isa<MCConstantExpr>(Disp) &&
3050 cast<MCConstantExpr>(Disp)->getValue() == 0) {
3051 Operands.push_back(X86Operand::CreateDXReg(BaseLoc, BaseLoc));
3052 return false;
3053 }
3054
3055 if (CheckBaseRegAndIndexRegAndScale(BaseReg, IndexReg, Scale, is64BitMode(),
3056 ErrMsg))
3057 return Error(BaseLoc, ErrMsg);
3058
3059 if (SegReg || BaseReg || IndexReg)
3060 Operands.push_back(X86Operand::CreateMem(getPointerWidth(), SegReg, Disp,
3061 BaseReg, IndexReg, Scale, StartLoc,
3062 EndLoc));
3063 else
3064 Operands.push_back(
3065 X86Operand::CreateMem(getPointerWidth(), Disp, StartLoc, EndLoc));
3066 return false;
3067}
3068
3069// Parse either a standard primary expression or a register.
3070bool X86AsmParser::parsePrimaryExpr(const MCExpr *&Res, SMLoc &EndLoc) {
3071 MCAsmParser &Parser = getParser();
3072 // See if this is a register first.
3073 if (getTok().is(AsmToken::Percent) ||
3074 (isParsingIntelSyntax() && getTok().is(AsmToken::Identifier) &&
3075 MatchRegisterName(Parser.getTok().getString()))) {
3076 SMLoc StartLoc = Parser.getTok().getLoc();
3077 MCRegister RegNo;
3078 if (parseRegister(RegNo, StartLoc, EndLoc))
3079 return true;
3080 Res = X86MCExpr::create(RegNo, Parser.getContext());
3081 return false;
3082 }
3083 return Parser.parsePrimaryExpr(Res, EndLoc, nullptr);
3084}
3085
3086bool X86AsmParser::ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
3087 SMLoc NameLoc, OperandVector &Operands) {
3088 MCAsmParser &Parser = getParser();
3089 InstInfo = &Info;
3090
3091 // Reset the forced VEX encoding.
3092 ForcedVEXEncoding = VEXEncoding_Default;
3093 ForcedDispEncoding = DispEncoding_Default;
3094
3095 // Parse pseudo prefixes.
3096 while (true) {
3097 if (Name == "{") {
3098 if (getLexer().isNot(AsmToken::Identifier))
3099 return Error(Parser.getTok().getLoc(), "Unexpected token after '{'");
3100 std::string Prefix = Parser.getTok().getString().lower();
3101 Parser.Lex(); // Eat identifier.
3102 if (getLexer().isNot(AsmToken::RCurly))
3103 return Error(Parser.getTok().getLoc(), "Expected '}'");
3104 Parser.Lex(); // Eat curly.
3105
3106 if (Prefix == "vex")
3107 ForcedVEXEncoding = VEXEncoding_VEX;
3108 else if (Prefix == "vex2")
3109 ForcedVEXEncoding = VEXEncoding_VEX2;
3110 else if (Prefix == "vex3")
3111 ForcedVEXEncoding = VEXEncoding_VEX3;
3112 else if (Prefix == "evex")
3113 ForcedVEXEncoding = VEXEncoding_EVEX;
3114 else if (Prefix == "disp8")
3115 ForcedDispEncoding = DispEncoding_Disp8;
3116 else if (Prefix == "disp32")
3117 ForcedDispEncoding = DispEncoding_Disp32;
3118 else
3119 return Error(NameLoc, "unknown prefix");
3120
3121 NameLoc = Parser.getTok().getLoc();
3122 if (getLexer().is(AsmToken::LCurly)) {
3123 Parser.Lex();
3124 Name = "{";
3125 } else {
3126 if (getLexer().isNot(AsmToken::Identifier))
3127 return Error(Parser.getTok().getLoc(), "Expected identifier");
3128 // FIXME: The mnemonic won't match correctly if its not in lower case.
3129 Name = Parser.getTok().getString();
3130 Parser.Lex();
3131 }
3132 continue;
3133 }
3134 // Parse MASM style pseudo prefixes.
3135 if (isParsingMSInlineAsm()) {
3136 if (Name.equals_insensitive("vex"))
3137 ForcedVEXEncoding = VEXEncoding_VEX;
3138 else if (Name.equals_insensitive("vex2"))
3139 ForcedVEXEncoding = VEXEncoding_VEX2;
3140 else if (Name.equals_insensitive("vex3"))
3141 ForcedVEXEncoding = VEXEncoding_VEX3;
3142 else if (Name.equals_insensitive("evex"))
3143 ForcedVEXEncoding = VEXEncoding_EVEX;
3144
3145 if (ForcedVEXEncoding != VEXEncoding_Default) {
3146 if (getLexer().isNot(AsmToken::Identifier))
3147 return Error(Parser.getTok().getLoc(), "Expected identifier");
3148 // FIXME: The mnemonic won't match correctly if its not in lower case.
3149 Name = Parser.getTok().getString();
3150 NameLoc = Parser.getTok().getLoc();
3151 Parser.Lex();
3152 }
3153 }
3154 break;
3155 }
3156
3157 // Support the suffix syntax for overriding displacement size as well.
3158 if (Name.consume_back(".d32")) {
3159 ForcedDispEncoding = DispEncoding_Disp32;
3160 } else if (Name.consume_back(".d8")) {
3161 ForcedDispEncoding = DispEncoding_Disp8;
3162 }
3163
3164 StringRef PatchedName = Name;
3165
3166 // Hack to skip "short" following Jcc.
3167 if (isParsingIntelSyntax() &&
3168 (PatchedName == "jmp" || PatchedName == "jc" || PatchedName == "jnc" ||
3169 PatchedName == "jcxz" || PatchedName == "jecxz" ||
3170 (PatchedName.startswith("j") &&
3171 ParseConditionCode(PatchedName.substr(1)) != X86::COND_INVALID))) {
3172 StringRef NextTok = Parser.getTok().getString();
3173 if (Parser.isParsingMasm() ? NextTok.equals_insensitive("short")
3174 : NextTok == "short") {
3175 SMLoc NameEndLoc =
3176 NameLoc.getFromPointer(NameLoc.getPointer() + Name.size());
3177 // Eat the short keyword.
3178 Parser.Lex();
3179 // MS and GAS ignore the short keyword; they both determine the jmp type
3180 // based on the distance of the label. (NASM does emit different code with
3181 // and without "short," though.)
3182 InstInfo->AsmRewrites->emplace_back(AOK_Skip, NameEndLoc,
3183 NextTok.size() + 1);
3184 }
3185 }
3186
3187 // FIXME: Hack to recognize setneb as setne.
3188 if (PatchedName.startswith("set") && PatchedName.endswith("b") &&
3189 PatchedName != "setb" && PatchedName != "setnb")
3190 PatchedName = PatchedName.substr(0, Name.size()-1);
3191
3192 unsigned ComparisonPredicate = ~0U;
3193
3194 // FIXME: Hack to recognize cmp<comparison code>{sh,ss,sd,ph,ps,pd}.
3195 if ((PatchedName.startswith("cmp") || PatchedName.startswith("vcmp")) &&
3196 (PatchedName.endswith("ss") || PatchedName.endswith("sd") ||
3197 PatchedName.endswith("sh") || PatchedName.endswith("ph") ||
3198 PatchedName.endswith("ps") || PatchedName.endswith("pd"))) {
3199 bool IsVCMP = PatchedName[0] == 'v';
3200 unsigned CCIdx = IsVCMP ? 4 : 3;
3201 unsigned CC = StringSwitch<unsigned>(
3202 PatchedName.slice(CCIdx, PatchedName.size() - 2))
3203 .Case("eq", 0x00)
3204 .Case("eq_oq", 0x00)
3205 .Case("lt", 0x01)
3206 .Case("lt_os", 0x01)
3207 .Case("le", 0x02)
3208 .Case("le_os", 0x02)
3209 .Case("unord", 0x03)
3210 .Case("unord_q", 0x03)
3211 .Case("neq", 0x04)
3212 .Case("neq_uq", 0x04)
3213 .Case("nlt", 0x05)
3214 .Case("nlt_us", 0x05)
3215 .Case("nle", 0x06)
3216 .Case("nle_us", 0x06)
3217 .Case("ord", 0x07)
3218 .Case("ord_q", 0x07)
3219 /* AVX only from here */
3220 .Case("eq_uq", 0x08)
3221 .Case("nge", 0x09)
3222 .Case("nge_us", 0x09)
3223 .Case("ngt", 0x0A)
3224 .Case("ngt_us", 0x0A)
3225 .Case("false", 0x0B)
3226 .Case("false_oq", 0x0B)
3227 .Case("neq_oq", 0x0C)
3228 .Case("ge", 0x0D)
3229 .Case("ge_os", 0x0D)
3230 .Case("gt", 0x0E)
3231 .Case("gt_os", 0x0E)
3232 .Case("true", 0x0F)
3233 .Case("true_uq", 0x0F)
3234 .Case("eq_os", 0x10)
3235 .Case("lt_oq", 0x11)
3236 .Case("le_oq", 0x12)
3237 .Case("unord_s", 0x13)
3238 .Case("neq_us", 0x14)
3239 .Case("nlt_uq", 0x15)
3240 .Case("nle_uq", 0x16)
3241 .Case("ord_s", 0x17)
3242 .Case("eq_us", 0x18)
3243 .Case("nge_uq", 0x19)
3244 .Case("ngt_uq", 0x1A)
3245 .Case("false_os", 0x1B)
3246 .Case("neq_os", 0x1C)
3247 .Case("ge_oq", 0x1D)
3248 .Case("gt_oq", 0x1E)
3249 .Case("true_us", 0x1F)
3250 .Default(~0U);
3251 if (CC != ~0U && (IsVCMP || CC < 8) &&
3252 (IsVCMP || PatchedName.back() != 'h')) {
3253 if (PatchedName.endswith("ss"))
3254 PatchedName = IsVCMP ? "vcmpss" : "cmpss";
3255 else if (PatchedName.endswith("sd"))
3256 PatchedName = IsVCMP ? "vcmpsd" : "cmpsd";
3257 else if (PatchedName.endswith("ps"))
3258 PatchedName = IsVCMP ? "vcmpps" : "cmpps";
3259 else if (PatchedName.endswith("pd"))
3260 PatchedName = IsVCMP ? "vcmppd" : "cmppd";
3261 else if (PatchedName.endswith("sh"))
3262 PatchedName = "vcmpsh";
3263 else if (PatchedName.endswith("ph"))
3264 PatchedName = "vcmpph";
3265 else
3266 llvm_unreachable("Unexpected suffix!");
3267
3268 ComparisonPredicate = CC;
3269 }
3270 }
3271
3272 // FIXME: Hack to recognize vpcmp<comparison code>{ub,uw,ud,uq,b,w,d,q}.
3273 if (PatchedName.startswith("vpcmp") &&
3274 (PatchedName.back() == 'b' || PatchedName.back() == 'w' ||
3275 PatchedName.back() == 'd' || PatchedName.back() == 'q')) {
3276 unsigned SuffixSize = PatchedName.drop_back().back() == 'u' ? 2 : 1;
3277 unsigned CC = StringSwitch<unsigned>(
3278 PatchedName.slice(5, PatchedName.size() - SuffixSize))
3279 .Case("eq", 0x0) // Only allowed on unsigned. Checked below.
3280 .Case("lt", 0x1)
3281 .Case("le", 0x2)
3282 //.Case("false", 0x3) // Not a documented alias.
3283 .Case("neq", 0x4)
3284 .Case("nlt", 0x5)
3285 .Case("nle", 0x6)
3286 //.Case("true", 0x7) // Not a documented alias.
3287 .Default(~0U);
3288 if (CC != ~0U && (CC != 0 || SuffixSize == 2)) {
3289 switch (PatchedName.back()) {
3290 default: llvm_unreachable("Unexpected character!");
3291 case 'b': PatchedName = SuffixSize == 2 ? "vpcmpub" : "vpcmpb"; break;
3292 case 'w': PatchedName = SuffixSize == 2 ? "vpcmpuw" : "vpcmpw"; break;
3293 case 'd': PatchedName = SuffixSize == 2 ? "vpcmpud" : "vpcmpd"; break;
3294 case 'q': PatchedName = SuffixSize == 2 ? "vpcmpuq" : "vpcmpq"; break;
3295 }
3296 // Set up the immediate to push into the operands later.
3297 ComparisonPredicate = CC;
3298 }
3299 }
3300
3301 // FIXME: Hack to recognize vpcom<comparison code>{ub,uw,ud,uq,b,w,d,q}.
3302 if (PatchedName.startswith("vpcom") &&
3303 (PatchedName.back() == 'b' || PatchedName.back() == 'w' ||
3304 PatchedName.back() == 'd' || PatchedName.back() == 'q')) {
3305 unsigned SuffixSize = PatchedName.drop_back().back() == 'u' ? 2 : 1;
3306 unsigned CC = StringSwitch<unsigned>(
3307 PatchedName.slice(5, PatchedName.size() - SuffixSize))
3308 .Case("lt", 0x0)
3309 .Case("le", 0x1)
3310 .Case("gt", 0x2)
3311 .Case("ge", 0x3)
3312 .Case("eq", 0x4)
3313 .Case("neq", 0x5)
3314 .Case("false", 0x6)
3315 .Case("true", 0x7)
3316 .Default(~0U);
3317 if (CC != ~0U) {
3318 switch (PatchedName.back()) {
3319 default: llvm_unreachable("Unexpected character!");
3320 case 'b': PatchedName = SuffixSize == 2 ? "vpcomub" : "vpcomb"; break;
3321 case 'w': PatchedName = SuffixSize == 2 ? "vpcomuw" : "vpcomw"; break;
3322 case 'd': PatchedName = SuffixSize == 2 ? "vpcomud" : "vpcomd"; break;
3323 case 'q': PatchedName = SuffixSize == 2 ? "vpcomuq" : "vpcomq"; break;
3324 }
3325 // Set up the immediate to push into the operands later.
3326 ComparisonPredicate = CC;
3327 }
3328 }
3329
3330
3331 // Determine whether this is an instruction prefix.
3332 // FIXME:
3333 // Enhance prefixes integrity robustness. for example, following forms
3334 // are currently tolerated:
3335 // repz repnz <insn> ; GAS errors for the use of two similar prefixes
3336 // lock addq %rax, %rbx ; Destination operand must be of memory type
3337 // xacquire <insn> ; xacquire must be accompanied by 'lock'
3338 bool IsPrefix =
3340 .Cases("cs", "ds", "es", "fs", "gs", "ss", true)
3341 .Cases("rex64", "data32", "data16", "addr32", "addr16", true)
3342 .Cases("xacquire", "xrelease", true)
3343 .Cases("acquire", "release", isParsingIntelSyntax())
3344 .Default(false);
3345
3346 auto isLockRepeatNtPrefix = [](StringRef N) {
3347 return StringSwitch<bool>(N)
3348 .Cases("lock", "rep", "repe", "repz", "repne", "repnz", "notrack", true)
3349 .Default(false);
3350 };
3351
3352 bool CurlyAsEndOfStatement = false;
3353
3354 unsigned Flags = X86::IP_NO_PREFIX;
3355 while (isLockRepeatNtPrefix(Name.lower())) {
3356 unsigned Prefix =
3358 .Cases("lock", "lock", X86::IP_HAS_LOCK)
3359 .Cases("rep", "repe", "repz", X86::IP_HAS_REPEAT)
3360 .Cases("repne", "repnz", X86::IP_HAS_REPEAT_NE)
3361 .Cases("notrack", "notrack", X86::IP_HAS_NOTRACK)
3362 .Default(X86::IP_NO_PREFIX); // Invalid prefix (impossible)
3363 Flags |= Prefix;
3364 if (getLexer().is(AsmToken::EndOfStatement)) {
3365 // We don't have real instr with the given prefix
3366 // let's use the prefix as the instr.
3367 // TODO: there could be several prefixes one after another
3369 break;
3370 }
3371 // FIXME: The mnemonic won't match correctly if its not in lower case.
3372 Name = Parser.getTok().getString();
3373 Parser.Lex(); // eat the prefix
3374 // Hack: we could have something like "rep # some comment" or
3375 // "lock; cmpxchg16b $1" or "lock\0A\09incl" or "lock/incl"
3376 while (Name.startswith(";") || Name.startswith("\n") ||
3377 Name.startswith("#") || Name.startswith("\t") ||
3378 Name.startswith("/")) {
3379 // FIXME: The mnemonic won't match correctly if its not in lower case.
3380 Name = Parser.getTok().getString();
3381 Parser.Lex(); // go to next prefix or instr
3382 }
3383 }
3384
3385 if (Flags)
3386 PatchedName = Name;
3387
3388 // Hacks to handle 'data16' and 'data32'
3389 if (PatchedName == "data16" && is16BitMode()) {
3390 return Error(NameLoc, "redundant data16 prefix");
3391 }
3392 if (PatchedName == "data32") {
3393 if (is32BitMode())
3394 return Error(NameLoc, "redundant data32 prefix");
3395 if (is64BitMode())
3396 return Error(NameLoc, "'data32' is not supported in 64-bit mode");
3397 // Hack to 'data16' for the table lookup.
3398 PatchedName = "data16";
3399
3400 if (getLexer().isNot(AsmToken::EndOfStatement)) {
3401 StringRef Next = Parser.getTok().getString();
3402 getLexer().Lex();
3403 // data32 effectively changes the instruction suffix.
3404 // TODO Generalize.
3405 if (Next == "callw")
3406 Next = "calll";
3407 if (Next == "ljmpw")
3408 Next = "ljmpl";
3409
3410 Name = Next;
3411 PatchedName = Name;
3412 ForcedDataPrefix = X86::Is32Bit;
3413 IsPrefix = false;
3414 }
3415 }
3416
3417 Operands.push_back(X86Operand::CreateToken(PatchedName, NameLoc));
3418
3419 // Push the immediate if we extracted one from the mnemonic.
3420 if (ComparisonPredicate != ~0U && !isParsingIntelSyntax()) {
3421 const MCExpr *ImmOp = MCConstantExpr::create(ComparisonPredicate,
3422 getParser().getContext());
3423 Operands.push_back(X86Operand::CreateImm(ImmOp, NameLoc, NameLoc));
3424 }
3425
3426 // This does the actual operand parsing. Don't parse any more if we have a
3427 // prefix juxtaposed with an operation like "lock incl 4(%rax)", because we
3428 // just want to parse the "lock" as the first instruction and the "incl" as
3429 // the next one.
3430 if (getLexer().isNot(AsmToken::EndOfStatement) && !IsPrefix) {
3431 // Parse '*' modifier.
3432 if (getLexer().is(AsmToken::Star))
3433 Operands.push_back(X86Operand::CreateToken("*", consumeToken()));
3434
3435 // Read the operands.
3436 while (true) {
3437 if (parseOperand(Operands, Name))
3438 return true;
3439 if (HandleAVX512Operand(Operands))
3440 return true;
3441
3442 // check for comma and eat it
3443 if (getLexer().is(AsmToken::Comma))
3444 Parser.Lex();
3445 else
3446 break;
3447 }
3448
3449 // In MS inline asm curly braces mark the beginning/end of a block,
3450 // therefore they should be interepreted as end of statement
3451 CurlyAsEndOfStatement =
3452 isParsingIntelSyntax() && isParsingMSInlineAsm() &&
3453 (getLexer().is(AsmToken::LCurly) || getLexer().is(AsmToken::RCurly));
3454 if (getLexer().isNot(AsmToken::EndOfStatement) && !CurlyAsEndOfStatement)
3455 return TokError("unexpected token in argument list");
3456 }
3457
3458 // Push the immediate if we extracted one from the mnemonic.
3459 if (ComparisonPredicate != ~0U && isParsingIntelSyntax()) {
3460 const MCExpr *ImmOp = MCConstantExpr::create(ComparisonPredicate,
3461 getParser().getContext());
3462 Operands.push_back(X86Operand::CreateImm(ImmOp, NameLoc, NameLoc));
3463 }
3464
3465 // Consume the EndOfStatement or the prefix separator Slash
3466 if (getLexer().is(AsmToken::EndOfStatement) ||
3467 (IsPrefix && getLexer().is(AsmToken::Slash)))
3468 Parser.Lex();
3469 else if (CurlyAsEndOfStatement)
3470 // Add an actual EndOfStatement before the curly brace
3471 Info.AsmRewrites->emplace_back(AOK_EndOfStatement,
3472 getLexer().getTok().getLoc(), 0);
3473
3474 // This is for gas compatibility and cannot be done in td.
3475 // Adding "p" for some floating point with no argument.
3476 // For example: fsub --> fsubp
3477 bool IsFp =
3478 Name == "fsub" || Name == "fdiv" || Name == "fsubr" || Name == "fdivr";
3479 if (IsFp && Operands.size() == 1) {
3480 const char *Repl = StringSwitch<const char *>(Name)
3481 .Case("fsub", "fsubp")
3482 .Case("fdiv", "fdivp")
3483 .Case("fsubr", "fsubrp")
3484 .Case("fdivr", "fdivrp");
3485 static_cast<X86Operand &>(*Operands[0]).setTokenValue(Repl);
3486 }
3487
3488 if ((Name == "mov" || Name == "movw" || Name == "movl") &&
3489 (Operands.size() == 3)) {
3490 X86Operand &Op1 = (X86Operand &)*Operands[1];
3491 X86Operand &Op2 = (X86Operand &)*Operands[2];
3492 SMLoc Loc = Op1.getEndLoc();
3493 // Moving a 32 or 16 bit value into a segment register has the same
3494 // behavior. Modify such instructions to always take shorter form.
3495 if (Op1.isReg() && Op2.isReg() &&
3496 X86MCRegisterClasses[X86::SEGMENT_REGRegClassID].contains(
3497 Op2.getReg()) &&
3498 (X86MCRegisterClasses[X86::GR16RegClassID].contains(Op1.getReg()) ||
3499 X86MCRegisterClasses[X86::GR32RegClassID].contains(Op1.getReg()))) {
3500 // Change instruction name to match new instruction.
3501 if (Name != "mov" && Name[3] == (is16BitMode() ? 'l' : 'w')) {
3502 Name = is16BitMode() ? "movw" : "movl";
3503 Operands[0] = X86Operand::CreateToken(Name, NameLoc);
3504 }
3505 // Select the correct equivalent 16-/32-bit source register.
3506 MCRegister Reg =
3507 getX86SubSuperRegister(Op1.getReg(), is16BitMode() ? 16 : 32);
3508 Operands[1] = X86Operand::CreateReg(Reg, Loc, Loc);
3509 }
3510 }
3511
3512 // This is a terrible hack to handle "out[s]?[bwl]? %al, (%dx)" ->
3513 // "outb %al, %dx". Out doesn't take a memory form, but this is a widely
3514 // documented form in various unofficial manuals, so a lot of code uses it.
3515 if ((Name == "outb" || Name == "outsb" || Name == "outw" || Name == "outsw" ||
3516 Name == "outl" || Name == "outsl" || Name == "out" || Name == "outs") &&
3517 Operands.size() == 3) {
3518 X86Operand &Op = (X86Operand &)*Operands.back();
3519 if (Op.isDXReg())
3520 Operands.back() = X86Operand::CreateReg(X86::DX, Op.getStartLoc(),
3521 Op.getEndLoc());
3522 }
3523 // Same hack for "in[s]?[bwl]? (%dx), %al" -> "inb %dx, %al".
3524 if ((Name == "inb" || Name == "insb" || Name == "inw" || Name == "insw" ||
3525 Name == "inl" || Name == "insl" || Name == "in" || Name == "ins") &&
3526 Operands.size() == 3) {
3527 X86Operand &Op = (X86Operand &)*Operands[1];
3528 if (Op.isDXReg())
3529 Operands[1] = X86Operand::CreateReg(X86::DX, Op.getStartLoc(),
3530 Op.getEndLoc());
3531 }
3532
3534 bool HadVerifyError = false;
3535
3536 // Append default arguments to "ins[bwld]"
3537 if (Name.startswith("ins") &&
3538 (Operands.size() == 1 || Operands.size() == 3) &&
3539 (Name == "insb" || Name == "insw" || Name == "insl" || Name == "insd" ||
3540 Name == "ins")) {
3541
3542 AddDefaultSrcDestOperands(TmpOperands,
3543 X86Operand::CreateReg(X86::DX, NameLoc, NameLoc),
3544 DefaultMemDIOperand(NameLoc));
3545 HadVerifyError = VerifyAndAdjustOperands(Operands, TmpOperands);
3546 }
3547
3548 // Append default arguments to "outs[bwld]"
3549 if (Name.startswith("outs") &&
3550 (Operands.size() == 1 || Operands.size() == 3) &&
3551 (Name == "outsb" || Name == "outsw" || Name == "outsl" ||
3552 Name == "outsd" || Name == "outs")) {
3553 AddDefaultSrcDestOperands(TmpOperands, DefaultMemSIOperand(NameLoc),
3554 X86Operand::CreateReg(X86::DX, NameLoc, NameLoc));
3555 HadVerifyError = VerifyAndAdjustOperands(Operands, TmpOperands);
3556 }
3557
3558 // Transform "lods[bwlq]" into "lods[bwlq] ($SIREG)" for appropriate
3559 // values of $SIREG according to the mode. It would be nice if this
3560 // could be achieved with InstAlias in the tables.
3561 if (Name.startswith("lods") &&
3562 (Operands.size() == 1 || Operands.size() == 2) &&
3563 (Name == "lods" || Name == "lodsb" || Name == "lodsw" ||
3564 Name == "lodsl" || Name == "lodsd" || Name == "lodsq")) {
3565 TmpOperands.push_back(DefaultMemSIOperand(NameLoc));
3566 HadVerifyError = VerifyAndAdjustOperands(Operands, TmpOperands);
3567 }
3568
3569 // Transform "stos[bwlq]" into "stos[bwlq] ($DIREG)" for appropriate
3570 // values of $DIREG according to the mode. It would be nice if this
3571 // could be achieved with InstAlias in the tables.
3572 if (Name.startswith("stos") &&
3573 (Operands.size() == 1 || Operands.size() == 2) &&
3574 (Name == "stos" || Name == "stosb" || Name == "stosw" ||
3575 Name == "stosl" || Name == "stosd" || Name == "stosq")) {
3576 TmpOperands.push_back(DefaultMemDIOperand(NameLoc));
3577 HadVerifyError = VerifyAndAdjustOperands(Operands, TmpOperands);
3578 }
3579
3580 // Transform "scas[bwlq]" into "scas[bwlq] ($DIREG)" for appropriate
3581 // values of $DIREG according to the mode. It would be nice if this
3582 // could be achieved with InstAlias in the tables.
3583 if (Name.startswith("scas") &&
3584 (Operands.size() == 1 || Operands.size() == 2) &&
3585 (Name == "scas" || Name == "scasb" || Name == "scasw" ||
3586 Name == "scasl" || Name == "scasd" || Name == "scasq")) {
3587 TmpOperands.push_back(DefaultMemDIOperand(NameLoc));
3588 HadVerifyError = VerifyAndAdjustOperands(Operands, TmpOperands);
3589 }
3590
3591 // Add default SI and DI operands to "cmps[bwlq]".
3592 if (Name.startswith("cmps") &&
3593 (Operands.size() == 1 || Operands.size() == 3) &&
3594 (Name == "cmps" || Name == "cmpsb" || Name == "cmpsw" ||
3595 Name == "cmpsl" || Name == "cmpsd" || Name == "cmpsq")) {
3596 AddDefaultSrcDestOperands(TmpOperands, DefaultMemDIOperand(NameLoc),
3597 DefaultMemSIOperand(NameLoc));
3598 HadVerifyError = VerifyAndAdjustOperands(Operands, TmpOperands);
3599 }
3600
3601 // Add default SI and DI operands to "movs[bwlq]".
3602 if (((Name.startswith("movs") &&
3603 (Name == "movs" || Name == "movsb" || Name == "movsw" ||
3604 Name == "movsl" || Name == "movsd" || Name == "movsq")) ||
3605 (Name.startswith("smov") &&
3606 (Name == "smov" || Name == "smovb" || Name == "smovw" ||
3607 Name == "smovl" || Name == "smovd" || Name == "smovq"))) &&
3608 (Operands.size() == 1 || Operands.size() == 3)) {
3609 if (Name == "movsd" && Operands.size() == 1 && !isParsingIntelSyntax())
3610 Operands.back() = X86Operand::CreateToken("movsl", NameLoc);
3611 AddDefaultSrcDestOperands(TmpOperands, DefaultMemSIOperand(NameLoc),
3612 DefaultMemDIOperand(NameLoc));
3613 HadVerifyError = VerifyAndAdjustOperands(Operands, TmpOperands);
3614 }
3615
3616 // Check if we encountered an error for one the string insturctions
3617 if (HadVerifyError) {
3618 return HadVerifyError;
3619 }
3620
3621 // Transforms "xlat mem8" into "xlatb"
3622 if ((Name == "xlat" || Name == "xlatb") && Operands.size() == 2) {
3623 X86Operand &Op1 = static_cast<X86Operand &>(*Operands[1]);
3624 if (Op1.isMem8()) {
3625 Warning(Op1.getStartLoc(), "memory operand is only for determining the "
3626 "size, (R|E)BX will be used for the location");
3627 Operands.pop_back();
3628 static_cast<X86Operand &>(*Operands[0]).setTokenValue("xlatb");
3629 }
3630 }
3631
3632 if (Flags)
3633 Operands.push_back(X86Operand::CreatePrefix(Flags, NameLoc, NameLoc));
3634 return false;
3635}
3636
3637bool X86AsmParser::processInstruction(MCInst &Inst, const OperandVector &Ops) {
3638 if (ForcedVEXEncoding != VEXEncoding_VEX3 &&
3639 X86::optimizeInstFromVEX3ToVEX2(Inst, MII.get(Inst.getOpcode())))
3640 return true;
3641
3643 return true;
3644
3645 switch (Inst.getOpcode()) {
3646 default: return false;
3647 case X86::JMP_1:
3648 // {disp32} forces a larger displacement as if the instruction was relaxed.
3649 // NOTE: 16-bit mode uses 16-bit displacement even though it says {disp32}.
3650 // This matches GNU assembler.
3651 if (ForcedDispEncoding == DispEncoding_Disp32) {
3652 Inst.setOpcode(is16BitMode() ? X86::JMP_2 : X86::JMP_4);
3653 return true;
3654 }
3655
3656 return false;
3657 case X86::JCC_1:
3658 // {disp32} forces a larger displacement as if the instruction was relaxed.
3659 // NOTE: 16-bit mode uses 16-bit displacement even though it says {disp32}.
3660 // This matches GNU assembler.
3661 if (ForcedDispEncoding == DispEncoding_Disp32) {
3662 Inst.setOpcode(is16BitMode() ? X86::JCC_2 : X86::JCC_4);
3663 return true;
3664 }
3665
3666 return false;
3667 case X86::INT: {
3668 // Transforms "int $3" into "int3" as a size optimization.
3669 // We can't write this as an InstAlias.
3670 if (!Inst.getOperand(0).isImm() || Inst.getOperand(0).getImm() != 3)
3671 return false;
3672 Inst.clear();
3673 Inst.setOpcode(X86::INT3);
3674 return true;
3675 }
3676 }
3677}
3678
3679bool X86AsmParser::validateInstruction(MCInst &Inst, const OperandVector &Ops) {
3680 using namespace X86;
3681 const MCRegisterInfo *MRI = getContext().getRegisterInfo();
3682 unsigned Opcode = Inst.getOpcode();
3683 uint64_t TSFlags = MII.get(Opcode).TSFlags;
3684 if (isVFCMADDCPH(Opcode) || isVFCMADDCSH(Opcode) || isVFMADDCPH(Opcode) ||
3685 isVFMADDCSH(Opcode)) {
3686 unsigned Dest = Inst.getOperand(0).getReg();
3687 for (unsigned i = 2; i < Inst.getNumOperands(); i++)
3688 if (Inst.getOperand(i).isReg() && Dest == Inst.getOperand(i).getReg())
3689 return Warning(Ops[0]->getStartLoc(), "Destination register should be "
3690 "distinct from source registers");
3691 } else if (isVFCMULCPH(Opcode) || isVFCMULCSH(Opcode) || isVFMULCPH(Opcode) ||
3692 isVFMULCSH(Opcode)) {
3693 unsigned Dest = Inst.getOperand(0).getReg();
3694 // The mask variants have different operand list. Scan from the third
3695 // operand to avoid emitting incorrect warning.
3696 // VFMULCPHZrr Dest, Src1, Src2
3697 // VFMULCPHZrrk Dest, Dest, Mask, Src1, Src2
3698 // VFMULCPHZrrkz Dest, Mask, Src1, Src2
3699 for (unsigned i = TSFlags & X86II::EVEX_K ? 2 : 1;
3700 i < Inst.getNumOperands(); i++)
3701 if (Inst.getOperand(i).isReg() && Dest == Inst.getOperand(i).getReg())
3702 return Warning(Ops[0]->getStartLoc(), "Destination register should be "
3703 "distinct from source registers");
3704 } else if (isV4FMADDPS(Opcode) || isV4FMADDSS(Opcode) ||
3705 isV4FNMADDPS(Opcode) || isV4FNMADDSS(Opcode) ||
3706 isVP4DPWSSDS(Opcode) || isVP4DPWSSD(Opcode)) {
3707 unsigned Src2 = Inst.getOperand(Inst.getNumOperands() -
3709 unsigned Src2Enc = MRI->getEncodingValue(Src2);
3710 if (Src2Enc % 4 != 0) {
3712 unsigned GroupStart = (Src2Enc / 4) * 4;
3713 unsigned GroupEnd = GroupStart + 3;
3714 return Warning(Ops[0]->getStartLoc(),
3715 "source register '" + RegName + "' implicitly denotes '" +
3716 RegName.take_front(3) + Twine(GroupStart) + "' to '" +
3717 RegName.take_front(3) + Twine(GroupEnd) +
3718 "' source group");
3719 }
3720 } else if (isVGATHERDPD(Opcode) || isVGATHERDPS(Opcode) ||
3721 isVGATHERQPD(Opcode) || isVGATHERQPS(Opcode) ||
3722 isVPGATHERDD(Opcode) || isVPGATHERDQ(Opcode) ||
3723 isVPGATHERQD(Opcode) || isVPGATHERQQ(Opcode)) {
3724 bool HasEVEX = (TSFlags & X86II::EncodingMask) == X86II::EVEX;
3725 if (HasEVEX) {
3726 unsigned Dest = MRI->getEncodingValue(Inst.getOperand(0).getReg());
3727 unsigned Index = MRI->getEncodingValue(
3728 Inst.getOperand(4 + X86::AddrIndexReg).getReg());
3729 if (Dest == Index)
3730 return Warning(Ops[0]->getStartLoc(), "index and destination registers "
3731 "should be distinct");
3732 } else {
3733 unsigned Dest = MRI->getEncodingValue(Inst.getOperand(0).getReg());
3734 unsigned Mask = MRI->getEncodingValue(Inst.getOperand(1).getReg());
3735 unsigned Index = MRI->getEncodingValue(
3736 Inst.getOperand(3 + X86::AddrIndexReg).getReg());
3737 if (Dest == Mask || Dest == Index || Mask == Index)
3738 return Warning(Ops[0]->getStartLoc(), "mask, index, and destination "
3739 "registers should be distinct");
3740 }
3741 }
3742
3743 // Check that we aren't mixing AH/BH/CH/DH with REX prefix. We only need to
3744 // check this with the legacy encoding, VEX/EVEX/XOP don't use REX.
3745 if ((TSFlags & X86II::EncodingMask) == 0) {
3746 MCPhysReg HReg = X86::NoRegister;
3747 bool UsesRex = TSFlags & X86II::REX_W;
3748 unsigned NumOps = Inst.getNumOperands();
3749 for (unsigned i = 0; i != NumOps; ++i) {
3750 const MCOperand &MO = Inst.getOperand(i);
3751 if (!MO.isReg())
3752 continue;
3753 unsigned Reg = MO.getReg();
3754 if (Reg == X86::AH || Reg == X86::BH || Reg == X86::CH || Reg == X86::DH)
3755 HReg = Reg;
3758 UsesRex = true;
3759 }
3760
3761 if (UsesRex && HReg != X86::NoRegister) {
3763 return Error(Ops[0]->getStartLoc(),
3764 "can't encode '" + RegName + "' in an instruction requiring "
3765 "REX prefix");
3766 }
3767 }
3768
3769 if ((Opcode == X86::PREFETCHIT0 || Opcode == X86::PREFETCHIT1)) {
3770 const MCOperand &MO = Inst.getOperand(X86::AddrBaseReg);
3771 if (!MO.isReg() || MO.getReg() != X86::RIP)
3772 return Warning(
3773 Ops[0]->getStartLoc(),
3774 Twine((Inst.getOpcode() == X86::PREFETCHIT0 ? "'prefetchit0'"
3775 : "'prefetchit1'")) +
3776 " only supports RIP-relative address");
3777 }
3778 return false;
3779}
3780
3781void X86AsmParser::emitWarningForSpecialLVIInstruction(SMLoc Loc) {
3782 Warning(Loc, "Instruction may be vulnerable to LVI and "
3783 "requires manual mitigation");
3784 Note(SMLoc(), "See https://software.intel.com/"
3785 "security-software-guidance/insights/"
3786 "deep-dive-load-value-injection#specialinstructions"
3787 " for more information");
3788}
3789
3790/// RET instructions and also instructions that indirect calls/jumps from memory
3791/// combine a load and a branch within a single instruction. To mitigate these
3792/// instructions against LVI, they must be decomposed into separate load and
3793/// branch instructions, with an LFENCE in between. For more details, see:
3794/// - X86LoadValueInjectionRetHardening.cpp
3795/// - X86LoadValueInjectionIndirectThunks.cpp
3796/// - https://software.intel.com/security-software-guidance/insights/deep-dive-load-value-injection
3797///
3798/// Returns `true` if a mitigation was applied or warning was emitted.
3799void X86AsmParser::applyLVICFIMitigation(MCInst &Inst, MCStreamer &Out) {
3800 // Information on control-flow instructions that require manual mitigation can
3801 // be found here:
3802 // https://software.intel.com/security-software-guidance/insights/deep-dive-load-value-injection#specialinstructions
3803 switch (Inst.getOpcode()) {
3804 case X86::RET16:
3805 case X86::RET32:
3806 case X86::RET64:
3807 case X86::RETI16:
3808 case X86::RETI32:
3809 case X86::RETI64: {
3810 MCInst ShlInst, FenceInst;
3811 bool Parse32 = is32BitMode() || Code16GCC;
3812 unsigned Basereg =
3813 is64BitMode() ? X86::RSP : (Parse32 ? X86::ESP : X86::SP);
3814 const MCExpr *Disp = MCConstantExpr::create(0, getContext());
3815 auto ShlMemOp = X86Operand::CreateMem(getPointerWidth(), /*SegReg=*/0, Disp,
3816 /*BaseReg=*/Basereg, /*IndexReg=*/0,
3817 /*Scale=*/1, SMLoc{}, SMLoc{}, 0);
3818 ShlInst.setOpcode(X86::SHL64mi);
3819 ShlMemOp->addMemOperands(ShlInst, 5);
3820 ShlInst.addOperand(MCOperand::createImm(0));
3821 FenceInst.setOpcode(X86::LFENCE);
3822 Out.emitInstruction(ShlInst, getSTI());
3823 Out.emitInstruction(FenceInst, getSTI());
3824 return;
3825 }
3826 case X86::JMP16m:
3827 case X86::JMP32m:
3828 case X86::JMP64m:
3829 case X86::CALL16m:
3830 case X86::CALL32m:
3831 case X86::CALL64m:
3832 emitWarningForSpecialLVIInstruction(Inst.getLoc());
3833 return;
3834 }
3835}
3836
3837/// To mitigate LVI, every instruction that performs a load can be followed by
3838/// an LFENCE instruction to squash any potential mis-speculation. There are
3839/// some instructions that require additional considerations, and may requre
3840/// manual mitigation. For more details, see:
3841/// https://software.intel.com/security-software-guidance/insights/deep-dive-load-value-injection
3842///
3843/// Returns `true` if a mitigation was applied or warning was emitted.
3844void X86AsmParser::applyLVILoadHardeningMitigation(MCInst &Inst,
3845 MCStreamer &Out) {
3846 auto Opcode = Inst.getOpcode();
3847 auto Flags = Inst.getFlags();
3848 if ((Flags & X86::IP_HAS_REPEAT) || (Flags & X86::IP_HAS_REPEAT_NE)) {
3849 // Information on REP string instructions that require manual mitigation can
3850 // be found here:
3851 // https://software.intel.com/security-software-guidance/insights/deep-dive-load-value-injection#specialinstructions
3852 switch (Opcode) {
3853 case X86::CMPSB:
3854 case X86::CMPSW:
3855 case X86::CMPSL:
3856 case X86::CMPSQ:
3857 case X86::SCASB:
3858 case X86::SCASW:
3859 case X86::SCASL:
3860 case X86::SCASQ:
3861 emitWarningForSpecialLVIInstruction(Inst.getLoc());
3862 return;
3863 }
3864 } else if (Opcode == X86::REP_PREFIX || Opcode == X86::REPNE_PREFIX) {
3865 // If a REP instruction is found on its own line, it may or may not be
3866 // followed by a vulnerable instruction. Emit a warning just in case.
3867 emitWarningForSpecialLVIInstruction(Inst.getLoc());
3868 return;
3869 }
3870
3871 const MCInstrDesc &MCID = MII.get(Inst.getOpcode());
3872
3873 // Can't mitigate after terminators or calls. A control flow change may have
3874 // already occurred.
3875 if (MCID.isTerminator() || MCID.isCall())
3876 return;
3877
3878 // LFENCE has the mayLoad property, don't double fence.
3879 if (MCID.mayLoad() && Inst.getOpcode() != X86::LFENCE) {
3881 FenceInst.setOpcode(X86::LFENCE);
3882 Out.emitInstruction(FenceInst, getSTI());
3883 }
3884}
3885
3886void X86AsmParser::emitInstruction(MCInst &Inst, OperandVector &Operands,
3887 MCStreamer &Out) {
3889 getSTI().hasFeature(X86::FeatureLVIControlFlowIntegrity))
3890 applyLVICFIMitigation(Inst, Out);
3891
3892 Out.emitInstruction(Inst, getSTI());
3893
3895 getSTI().hasFeature(X86::FeatureLVILoadHardening))
3896 applyLVILoadHardeningMitigation(Inst, Out);
3897}
3898
3899bool X86AsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
3902 bool MatchingInlineAsm) {
3903 if (isParsingIntelSyntax())
3904 return MatchAndEmitIntelInstruction(IDLoc, Opcode, Operands, Out, ErrorInfo,
3905 MatchingInlineAsm);
3906 return MatchAndEmitATTInstruction(IDLoc, Opcode, Operands, Out, ErrorInfo,
3907 MatchingInlineAsm);
3908}
3909
3910void X86AsmParser::MatchFPUWaitAlias(SMLoc IDLoc, X86Operand &Op,
3912 bool MatchingInlineAsm) {
3913 // FIXME: This should be replaced with a real .td file alias mechanism.
3914 // Also, MatchInstructionImpl should actually *do* the EmitInstruction
3915 // call.
3916 const char *Repl = StringSwitch<const char *>(Op.getToken())
3917 .Case("finit", "fninit")
3918 .Case("fsave", "fnsave")
3919 .Case("fstcw", "fnstcw")
3920 .Case("fstcww", "fnstcw")
3921 .Case("fstenv", "fnstenv")
3922 .Case("fstsw", "fnstsw")
3923 .Case("fstsww", "fnstsw")
3924 .Case("fclex", "fnclex")
3925 .Default(nullptr);
3926 if (Repl) {
3927 MCInst Inst;
3928 Inst.setOpcode(X86::WAIT);
3929 Inst.setLoc(IDLoc);
3930 if (!MatchingInlineAsm)
3931 emitInstruction(Inst, Operands, Out);
3932 Operands[0] = X86Operand::CreateToken(Repl, IDLoc);
3933 }
3934}
3935
3936bool X86AsmParser::ErrorMissingFeature(SMLoc IDLoc,
3937 const FeatureBitset &MissingFeatures,
3938 bool MatchingInlineAsm) {
3939 assert(MissingFeatures.any() && "Unknown missing feature!");
3942 OS << "instruction requires:";
3943 for (unsigned i = 0, e = MissingFeatures.size(); i != e; ++i) {
3944 if (MissingFeatures[i])
3945 OS << ' ' << getSubtargetFeatureName(i);
3946 }
3947 return Error(IDLoc, OS.str(), SMRange(), MatchingInlineAsm);
3948}
3949
3951 unsigned Result = 0;
3952 X86Operand &Prefix = static_cast<X86Operand &>(*Operands.back());
3953 if (Prefix.isPrefix()) {
3954 Result = Prefix.getPrefix();
3955 Operands.pop_back();
3956 }
3957 return Result;
3958}
3959
3960unsigned X86AsmParser::checkTargetMatchPredicate(MCInst &Inst) {
3961 unsigned Opc = Inst.getOpcode();
3962 const MCInstrDesc &MCID = MII.get(Opc);
3963
3964 if (ForcedVEXEncoding == VEXEncoding_EVEX &&
3966 return Match_Unsupported;
3967
3968 if ((ForcedVEXEncoding == VEXEncoding_VEX ||
3969 ForcedVEXEncoding == VEXEncoding_VEX2 ||
3970 ForcedVEXEncoding == VEXEncoding_VEX3) &&
3972 return Match_Unsupported;
3973
3974 // These instructions are only available with {vex}, {vex2} or {vex3} prefix
3975 if (MCID.TSFlags & X86II::ExplicitVEXPrefix &&
3976 (ForcedVEXEncoding != VEXEncoding_VEX &&
3977 ForcedVEXEncoding != VEXEncoding_VEX2 &&
3978 ForcedVEXEncoding != VEXEncoding_VEX3))
3979 return Match_Unsupported;
3980
3981 return Match_Success;
3982}
3983
3984bool X86AsmParser::MatchAndEmitATTInstruction(SMLoc IDLoc, unsigned &Opcode,
3986 MCStreamer &Out,
3988 bool MatchingInlineAsm) {
3989 assert(!Operands.empty() && "Unexpect empty operand list!");
3990 assert((*Operands[0]).isToken() && "Leading operand should always be a mnemonic!");
3991 SMRange EmptyRange = std::nullopt;
3992
3993 // First, handle aliases that expand to multiple instructions.
3994 MatchFPUWaitAlias(IDLoc, static_cast<X86Operand &>(*Operands[0]), Operands,
3995 Out, MatchingInlineAsm);
3996 X86Operand &Op = static_cast<X86Operand &>(*Operands[0]);
3997 unsigned Prefixes = getPrefixes(Operands);
3998
3999 MCInst Inst;
4000
4001 // If VEX/EVEX encoding is forced, we need to pass the USE_* flag to the
4002 // encoder and printer.
4003 if (ForcedVEXEncoding == VEXEncoding_VEX)
4004 Prefixes |= X86::IP_USE_VEX;
4005 else if (ForcedVEXEncoding == VEXEncoding_VEX2)
4006 Prefixes |= X86::IP_USE_VEX2;
4007 else if (ForcedVEXEncoding == VEXEncoding_VEX3)
4008 Prefixes |= X86::IP_USE_VEX3;
4009 else if (ForcedVEXEncoding == VEXEncoding_EVEX)
4010 Prefixes |= X86::IP_USE_EVEX;
4011
4012 // Set encoded flags for {disp8} and {disp32}.
4013 if (ForcedDispEncoding == DispEncoding_Disp8)
4014 Prefixes |= X86::IP_USE_DISP8;
4015 else if (ForcedDispEncoding == DispEncoding_Disp32)
4016 Prefixes |= X86::IP_USE_DISP32;
4017
4018 if (Prefixes)
4019 Inst.setFlags(Prefixes);
4020
4021 // In 16-bit mode, if data32 is specified, temporarily switch to 32-bit mode
4022 // when matching the instruction.
4023 if (ForcedDataPrefix == X86::Is32Bit)
4024 SwitchMode(X86::Is32Bit);
4025 // First, try a direct match.
4026 FeatureBitset MissingFeatures;
4027 unsigned OriginalError = MatchInstruction(Operands, Inst, ErrorInfo,
4028 MissingFeatures, MatchingInlineAsm,
4029 isParsingIntelSyntax());
4030 if (ForcedDataPrefix == X86::Is32Bit) {
4031 SwitchMode(X86::Is16Bit);
4032 ForcedDataPrefix = 0;
4033 }
4034 switch (OriginalError) {
4035 default: llvm_unreachable("Unexpected match result!");
4036 case Match_Success:
4037 if (!MatchingInlineAsm && validateInstruction(Inst, Operands))
4038 return true;
4039 // Some instructions need post-processing to, for example, tweak which
4040 // encoding is selected. Loop on it while changes happen so the
4041 // individual transformations can chain off each other.
4042 if (!MatchingInlineAsm)
4043 while (processInstruction(Inst, Operands))
4044 ;
4045
4046 Inst.setLoc(IDLoc);
4047 if (!MatchingInlineAsm)
4048 emitInstruction(Inst, Operands, Out);
4049 Opcode = Inst.getOpcode();
4050 return false;
4051 case Match_InvalidImmUnsignedi4: {
4052 SMLoc ErrorLoc = ((X86Operand &)*Operands[ErrorInfo]).getStartLoc();
4053 if (ErrorLoc == SMLoc())
4054 ErrorLoc = IDLoc;
4055 return Error(ErrorLoc, "immediate must be an integer in range [0, 15]",
4056 EmptyRange, MatchingInlineAsm);
4057 }
4058 case Match_MissingFeature:
4059 return ErrorMissingFeature(IDLoc, MissingFeatures, MatchingInlineAsm);
4060 case Match_InvalidOperand:
4061 case Match_MnemonicFail:
4062 case Match_Unsupported:
4063 break;
4064 }
4065 if (Op.getToken().empty()) {
4066 Error(IDLoc, "instruction must have size higher than 0", EmptyRange,
4067 MatchingInlineAsm);
4068 return true;
4069 }
4070
4071 // FIXME: Ideally, we would only attempt suffix matches for things which are
4072 // valid prefixes, and we could just infer the right unambiguous
4073 // type. However, that requires substantially more matcher support than the
4074 // following hack.
4075
4076 // Change the operand to point to a temporary token.
4077 StringRef Base = Op.getToken();
4078 SmallString<16> Tmp;
4079 Tmp += Base;
4080 Tmp += ' ';
4081 Op.setTokenValue(Tmp);
4082
4083 // If this instruction starts with an 'f', then it is a floating point stack
4084 // instruction. These come in up to three forms for 32-bit, 64-bit, and
4085 // 80-bit floating point, which use the suffixes s,l,t respectively.
4086 //
4087 // Otherwise, we assume that this may be an integer instruction, which comes
4088 // in 8/16/32/64-bit forms using the b,w,l,q suffixes respectively.
4089 const char *Suffixes = Base[0] != 'f' ? "bwlq" : "slt\0";
4090 // MemSize corresponding to Suffixes. { 8, 16, 32, 64 } { 32, 64, 80, 0 }
4091 const char *MemSize = Base[0] != 'f' ? "\x08\x10\x20\x40" : "\x20\x40\x50\0";
4092
4093 // Check for the various suffix matches.
4094 uint64_t ErrorInfoIgnore;
4095 FeatureBitset ErrorInfoMissingFeatures; // Init suppresses compiler warnings.
4096 unsigned Match[4];
4097
4098 // Some instruction like VPMULDQ is NOT the variant of VPMULD but a new one.
4099 // So we should make sure the suffix matcher only works for memory variant
4100 // that has the same size with the suffix.
4101 // FIXME: This flag is a workaround for legacy instructions that didn't
4102 // declare non suffix variant assembly.
4103 bool HasVectorReg = false;
4104 X86Operand *MemOp = nullptr;
4105 for (const auto &Op : Operands) {
4106 X86Operand *X86Op = static_cast<X86Operand *>(Op.get());
4107 if (X86Op->isVectorReg())
4108 HasVectorReg = true;
4109 else if (X86Op->isMem()) {
4110 MemOp = X86Op;
4111 assert(MemOp->Mem.Size == 0 && "Memory size always 0 under ATT syntax");
4112 // Have we found an unqualified memory operand,
4113 // break. IA allows only one memory operand.
4114 break;
4115 }
4116 }
4117
4118 for (unsigned I = 0, E = std::size(Match); I != E; ++I) {
4119 Tmp.back() = Suffixes[I];
4120 if (MemOp && HasVectorReg)
4121 MemOp->Mem.Size = MemSize[I];
4122 Match[I] = Match_MnemonicFail;
4123 if (MemOp || !HasVectorReg) {
4124 Match[I] =
4125 MatchInstruction(Operands, Inst, ErrorInfoIgnore, MissingFeatures,
4126 MatchingInlineAsm, isParsingIntelSyntax());
4127 // If this returned as a missing feature failure, remember that.
4128 if (Match[I] == Match_MissingFeature)
4129 ErrorInfoMissingFeatures = MissingFeatures;
4130 }
4131 }
4132
4133 // Restore the old token.
4134 Op.setTokenValue(Base);
4135
4136 // If exactly one matched, then we treat that as a successful match (and the
4137 // instruction will already have been filled in correctly, since the failing
4138 // matches won't have modified it).
4139 unsigned NumSuccessfulMatches = llvm::count(Match, Match_Success);
4140 if (NumSuccessfulMatches == 1) {
4141 if (!MatchingInlineAsm && validateInstruction(Inst, Operands))
4142 return true;
4143 // Some instructions need post-processing to, for example, tweak which
4144 // encoding is selected. Loop on it while changes happen so the
4145 // individual transformations can chain off each other.
4146 if (!MatchingInlineAsm)
4147 while (processInstruction(Inst, Operands))
4148 ;
4149
4150 Inst.setLoc(IDLoc);
4151 if (!MatchingInlineAsm)
4152 emitInstruction(Inst, Operands, Out);
4153 Opcode = Inst.getOpcode();
4154 return false;
4155 }
4156
4157 // Otherwise, the match failed, try to produce a decent error message.
4158
4159 // If we had multiple suffix matches, then identify this as an ambiguous
4160 // match.
4161 if (NumSuccessfulMatches > 1) {
4162 char MatchChars[4];
4163 unsigned NumMatches = 0;
4164 for (unsigned I = 0, E = std::size(Match); I != E; ++I)
4165 if (Match[I] == Match_Success)
4166 MatchChars[NumMatches++] = Suffixes[I];
4167
4170 OS << "ambiguous instructions require an explicit suffix (could be ";
4171 for (unsigned i = 0; i != NumMatches; ++i) {
4172 if (i != 0)
4173 OS << ", ";
4174 if (i + 1 == NumMatches)
4175 OS << "or ";
4176 OS << "'" << Base << MatchChars[i] << "'";
4177 }
4178 OS << ")";
4179 Error(IDLoc, OS.str(), EmptyRange, MatchingInlineAsm);
4180 return true;
4181 }
4182
4183 // Okay, we know that none of the variants matched successfully.
4184
4185 // If all of the instructions reported an invalid mnemonic, then the original
4186 // mnemonic was invalid.
4187 if (llvm::count(Match, Match_MnemonicFail) == 4) {
4188 if (OriginalError == Match_MnemonicFail)
4189 return Error(IDLoc, "invalid instruction mnemonic '" + Base + "'",
4190 Op.getLocRange(), MatchingInlineAsm);
4191
4192 if (OriginalError == Match_Unsupported)
4193 return Error(IDLoc, "unsupported instruction", EmptyRange,
4194 MatchingInlineAsm);
4195
4196 assert(OriginalError == Match_InvalidOperand && "Unexpected error");
4197 // Recover location info for the operand if we know which was the problem.
4198 if (ErrorInfo != ~0ULL) {
4199 if (ErrorInfo >= Operands.size())
4200 return Error(IDLoc, "too few operands for instruction", EmptyRange,
4201 MatchingInlineAsm);
4202
4203 X86Operand &Operand = (X86Operand &)*Operands[ErrorInfo];
4204 if (Operand.getStartLoc().isValid()) {
4205 SMRange OperandRange = Operand.getLocRange();
4206 return Error(Operand.getStartLoc(), "invalid operand for instruction",
4207 OperandRange, MatchingInlineAsm);
4208 }
4209 }
4210
4211 return Error(IDLoc, "invalid operand for instruction", EmptyRange,
4212 MatchingInlineAsm);
4213 }
4214
4215 // If one instruction matched as unsupported, report this as unsupported.
4216 if (llvm::count(Match, Match_Unsupported) == 1) {
4217 return Error(IDLoc, "unsupported instruction", EmptyRange,
4218 MatchingInlineAsm);
4219 }
4220
4221 // If one instruction matched with a missing feature, report this as a
4222 // missing feature.
4223 if (llvm::count(Match, Match_MissingFeature) == 1) {
4224 ErrorInfo = Match_MissingFeature;
4225 return ErrorMissingFeature(IDLoc, ErrorInfoMissingFeatures,
4226 MatchingInlineAsm);
4227 }
4228
4229 // If one instruction matched with an invalid operand, report this as an
4230 // operand failure.
4231 if (llvm::count(Match, Match_InvalidOperand) == 1) {
4232 return Error(IDLoc, "invalid operand for instruction", EmptyRange,
4233 MatchingInlineAsm);
4234 }
4235
4236 // If all of these were an outright failure, report it in a useless way.
4237 Error(IDLoc, "unknown use of instruction mnemonic without a size suffix",
4238 EmptyRange, MatchingInlineAsm);
4239 return true;
4240}
4241
4242bool X86AsmParser::MatchAndEmitIntelInstruction(SMLoc IDLoc, unsigned &Opcode,
4244 MCStreamer &Out,
4246 bool MatchingInlineAsm) {
4247 assert(!Operands.empty() && "Unexpect empty operand list!");
4248 assert((*Operands[0]).isToken() && "Leading operand should always be a mnemonic!");
4249 StringRef Mnemonic = (static_cast<X86Operand &>(*Operands[0])).getToken();
4250 SMRange EmptyRange = std::nullopt;
4251 StringRef Base = (static_cast<X86Operand &>(*Operands[0])).getToken();
4252 unsigned Prefixes = getPrefixes(Operands);
4253
4254 // First, handle aliases that expand to multiple instructions.
4255 MatchFPUWaitAlias(IDLoc, static_cast<X86Operand &>(*Operands[0]), Operands, Out, MatchingInlineAsm);
4256 X86Operand &Op = static_cast<X86Operand &>(*Operands[0]);
4257
4258 MCInst Inst;
4259
4260 // If VEX/EVEX encoding is forced, we need to pass the USE_* flag to the
4261 // encoder and printer.
4262 if (ForcedVEXEncoding == VEXEncoding_VEX)
4263 Prefixes |= X86::IP_USE_VEX;
4264 else if (ForcedVEXEncoding == VEXEncoding_VEX2)
4265 Prefixes |= X86::IP_USE_VEX2;
4266 else if (ForcedVEXEncoding == VEXEncoding_VEX3)
4267 Prefixes |= X86::IP_USE_VEX3;
4268 else if (ForcedVEXEncoding == VEXEncoding_EVEX)
4269 Prefixes |= X86::IP_USE_EVEX;
4270
4271 // Set encoded flags for {disp8} and {disp32}.
4272 if (ForcedDispEncoding == DispEncoding_Disp8)
4273 Prefixes |= X86::IP_USE_DISP8;
4274 else if (ForcedDispEncoding == DispEncoding_Disp32)
4275 Prefixes |= X86::IP_USE_DISP32;
4276
4277 if (Prefixes)
4278 Inst.setFlags(Prefixes);
4279
4280 // Find one unsized memory operand, if present.
4281 X86Operand *UnsizedMemOp = nullptr;
4282 for (const auto &Op : Operands) {
4283 X86Operand *X86Op = static_cast<X86Operand *>(Op.get());
4284 if (X86Op->isMemUnsized()) {
4285 UnsizedMemOp = X86Op;
4286 // Have we found an unqualified memory operand,
4287 // break. IA allows only one memory operand.
4288 break;
4289 }
4290 }
4291
4292 // Allow some instructions to have implicitly pointer-sized operands. This is
4293 // compatible with gas.
4294 if (UnsizedMemOp) {
4295 static const char *const PtrSizedInstrs[] = {"call", "jmp", "push"};
4296 for (const char *Instr : PtrSizedInstrs) {
4297 if (Mnemonic == Instr) {
4298 UnsizedMemOp->Mem.Size = getPointerWidth();
4299 break;
4300 }
4301 }
4302 }
4303
4305 FeatureBitset ErrorInfoMissingFeatures;
4306 FeatureBitset MissingFeatures;
4307
4308 // If unsized push has immediate operand we should default the default pointer
4309 // size for the size.
4310 if (Mnemonic == "push" && Operands.size() == 2) {
4311 auto *X86Op = static_cast<X86Operand *>(Operands[1].get());
4312 if (X86Op->isImm()) {
4313 // If it's not a constant fall through and let remainder take care of it.
4314 const auto *CE = dyn_cast<MCConstantExpr>(X86Op->getImm());
4315 unsigned Size = getPointerWidth();
4316 if (CE &&
4317 (isIntN(Size, CE->getValue()) || isUIntN(Size, CE->getValue()))) {
4318 SmallString<16> Tmp;
4319 Tmp += Base;
4320 Tmp += (is64BitMode())
4321 ? "q"
4322 : (is32BitMode()) ? "l" : (is16BitMode()) ? "w" : " ";
4323 Op.setTokenValue(Tmp);
4324 // Do match in ATT mode to allow explicit suffix usage.
4325 Match.push_back(MatchInstruction(Operands, Inst, ErrorInfo,
4326 MissingFeatures, MatchingInlineAsm,
4327 false /*isParsingIntelSyntax()*/));
4328 Op.setTokenValue(Base);
4329 }
4330 }
4331 }
4332
4333 // If an unsized memory operand is present, try to match with each memory
4334 // operand size. In Intel assembly, the size is not part of the instruction
4335 // mnemonic.
4336 if (UnsizedMemOp && UnsizedMemOp->isMemUnsized()) {
4337 static const unsigned MopSizes[] = {8, 16, 32, 64, 80, 128, 256, 512};
4338 for (unsigned Size : MopSizes) {
4339 UnsizedMemOp->Mem.Size = Size;
4340 uint64_t ErrorInfoIgnore;
4341 unsigned LastOpcode = Inst.getOpcode();
4342 unsigned M = MatchInstruction(Operands, Inst, ErrorInfoIgnore,
4343 MissingFeatures, MatchingInlineAsm,
4344 isParsingIntelSyntax());
4345 if (Match.empty() || LastOpcode != Inst.getOpcode())
4346 Match.push_back(M);
4347
4348 // If this returned as a missing feature failure, remember that.
4349 if (Match.back() == Match_MissingFeature)
4350 ErrorInfoMissingFeatures = MissingFeatures;
4351 }
4352
4353 // Restore the size of the unsized memory operand if we modified it.
4354 UnsizedMemOp->Mem.Size = 0;
4355 }
4356
4357 // If we haven't matched anything yet, this is not a basic integer or FPU
4358 // operation. There shouldn't be any ambiguity in our mnemonic table, so try
4359 // matching with the unsized operand.
4360 if (Match.empty()) {
4361 Match.push_back(MatchInstruction(
4362 Operands, Inst, ErrorInfo, MissingFeatures, MatchingInlineAsm,
4363 isParsingIntelSyntax()));
4364 // If this returned as a missing feature failure, remember that.
4365 if (Match.back() == Match_MissingFeature)
4366 ErrorInfoMissingFeatures = MissingFeatures;
4367 }
4368
4369 // Restore the size of the unsized memory operand if we modified it.
4370 if (UnsizedMemOp)
4371 UnsizedMemOp->Mem.Size = 0;
4372
4373 // If it's a bad mnemonic, all results will be the same.
4374 if (Match.back() == Match_MnemonicFail) {
4375 return Error(IDLoc, "invalid instruction mnemonic '" + Mnemonic + "'",
4376 Op.getLocRange(), MatchingInlineAsm);
4377 }
4378
4379 unsigned NumSuccessfulMatches = llvm::count(Match, Match_Success);
4380
4381 // If matching was ambiguous and we had size information from the frontend,
4382 // try again with that. This handles cases like "movxz eax, m8/m16".
4383 if (UnsizedMemOp && NumSuccessfulMatches > 1 &&
4384 UnsizedMemOp->getMemFrontendSize()) {
4385 UnsizedMemOp->Mem.Size = UnsizedMemOp->getMemFrontendSize();
4386 unsigned M = MatchInstruction(
4387 Operands, Inst, ErrorInfo, MissingFeatures, MatchingInlineAsm,
4388 isParsingIntelSyntax());
4389 if (M == Match_Success)
4390 NumSuccessfulMatches = 1;
4391
4392 // Add a rewrite that encodes the size information we used from the
4393 // frontend.
4394 InstInfo->AsmRewrites->emplace_back(
4395 AOK_SizeDirective, UnsizedMemOp->getStartLoc(),
4396 /*Len=*/0, UnsizedMemOp->getMemFrontendSize());
4397 }
4398
4399 // If exactly one matched, then we treat that as a successful match (and the
4400 // instruction will already have been filled in correctly, since the failing
4401 // matches won't have modified it).
4402 if (NumSuccessfulMatches == 1) {
4403 if (!MatchingInlineAsm && validateInstruction(Inst, Operands))
4404 return true;
4405 // Some instructions need post-processing to, for example, tweak which
4406 // encoding is selected. Loop on it while changes happen so the individual
4407 // transformations can chain off each other.
4408 if (!MatchingInlineAsm)
4409 while (processInstruction(Inst, Operands))
4410 ;
4411 Inst.setLoc(IDLoc);
4412 if (!MatchingInlineAsm)
4413 emitInstruction(Inst, Operands, Out);
4414 Opcode = Inst.getOpcode();
4415 return false;
4416 } else if (NumSuccessfulMatches > 1) {
4417 assert(UnsizedMemOp &&
4418 "multiple matches only possible with unsized memory operands");
4419 return Error(UnsizedMemOp->getStartLoc(),
4420 "ambiguous operand size for instruction '" + Mnemonic + "\'",
4421 UnsizedMemOp->getLocRange());
4422 }
4423
4424 // If one instruction matched as unsupported, report this as unsupported.
4425 if (llvm::count(Match, Match_Unsupported) == 1) {
4426 return Error(IDLoc, "unsupported instruction", EmptyRange,
4427 MatchingInlineAsm);
4428 }
4429
4430 // If one instruction matched with a missing feature, report this as a
4431 // missing feature.
4432 if (llvm::count(Match, Match_MissingFeature) == 1) {
4433 ErrorInfo = Match_MissingFeature;
4434 return ErrorMissingFeature(IDLoc, ErrorInfoMissingFeatures,
4435 MatchingInlineAsm);
4436 }
4437
4438 // If one instruction matched with an invalid operand, report this as an
4439 // operand failure.
4440 if (llvm::count(Match, Match_InvalidOperand) == 1) {
4441 return Error(IDLoc, "invalid operand for instruction", EmptyRange,
4442 MatchingInlineAsm);
4443 }
4444
4445 if (llvm::count(Match, Match_InvalidImmUnsignedi4) == 1) {
4446 SMLoc ErrorLoc = ((X86Operand &)*Operands[ErrorInfo]).getStartLoc();
4447 if (ErrorLoc == SMLoc())
4448 ErrorLoc = IDLoc;
4449 return Error(ErrorLoc, "immediate must be an integer in range [0, 15]",
4450 EmptyRange, MatchingInlineAsm);
4451 }
4452
4453 // If all of these were an outright failure, report it in a useless way.
4454 return Error(IDLoc, "unknown instruction mnemonic", EmptyRange,
4455 MatchingInlineAsm);
4456}
4457
4458bool X86AsmParser::OmitRegisterFromClobberLists(unsigned RegNo) {
4459 return X86MCRegisterClasses[X86::SEGMENT_REGRegClassID].contains(RegNo);
4460}
4461
4462bool X86AsmParser::ParseDirective(AsmToken DirectiveID) {
4463 MCAsmParser &Parser = getParser();
4464 StringRef IDVal = DirectiveID.getIdentifier();
4465 if (IDVal.startswith(".arch"))
4466 return parseDirectiveArch();
4467 if (IDVal.startswith(".code"))
4468 return ParseDirectiveCode(IDVal, DirectiveID.getLoc());
4469 else if (IDVal.startswith(".att_syntax")) {
4470 if (getLexer().isNot(AsmToken::EndOfStatement)) {
4471 if (Parser.getTok().getString() == "prefix")
4472 Parser.Lex();
4473 else if (Parser.getTok().getString() == "noprefix")
4474 return Error(DirectiveID.getLoc(), "'.att_syntax noprefix' is not "
4475 "supported: registers must have a "
4476 "'%' prefix in .att_syntax");
4477 }
4478 getParser().setAssemblerDialect(0);
4479 return false;
4480 } else if (IDVal.startswith(".intel_syntax")) {
4481 getParser().setAssemblerDialect(1);
4482 if (getLexer().isNot(AsmToken::EndOfStatement)) {
4483 if (Parser.getTok().getString() == "noprefix")
4484 Parser.Lex();
4485 else if (Parser.getTok().getString() == "prefix")
4486 return Error(DirectiveID.getLoc(), "'.intel_syntax prefix' is not "
4487 "supported: registers must not have "
4488 "a '%' prefix in .intel_syntax");
4489 }
4490 return false;
4491 } else if (IDVal == ".nops")
4492 return parseDirectiveNops(DirectiveID.getLoc());
4493 else if (IDVal == ".even")
4494 return parseDirectiveEven(DirectiveID.getLoc());
4495 else if (IDVal == ".cv_fpo_proc")
4496 return parseDirectiveFPOProc(DirectiveID.getLoc());
4497 else if (IDVal == ".cv_fpo_setframe")
4498 return parseDirectiveFPOSetFrame(DirectiveID.getLoc());
4499 else if (IDVal == ".cv_fpo_pushreg")
4500 return parseDirectiveFPOPushReg(DirectiveID.getLoc());
4501 else if (IDVal == ".cv_fpo_stackalloc")
4502 return parseDirectiveFPOStackAlloc(DirectiveID.getLoc());
4503 else if (IDVal == ".cv_fpo_stackalign")
4504 return parseDirectiveFPOStackAlign(DirectiveID.getLoc());
4505 else if (IDVal == ".cv_fpo_endprologue")
4506 return parseDirectiveFPOEndPrologue(DirectiveID.getLoc());
4507 else if (IDVal == ".cv_fpo_endproc")
4508 return parseDirectiveFPOEndProc(DirectiveID.getLoc());
4509 else if (IDVal == ".seh_pushreg" ||
4510 (Parser.isParsingMasm() && IDVal.equals_insensitive(".pushreg")))
4511 return parseDirectiveSEHPushReg(DirectiveID.getLoc());
4512 else if (IDVal == ".seh_setframe" ||
4513 (Parser.isParsingMasm() && IDVal.equals_insensitive(".setframe")))
4514 return parseDirectiveSEHSetFrame(DirectiveID.getLoc());
4515 else if (IDVal == ".seh_savereg" ||
4516 (Parser.isParsingMasm() && IDVal.equals_insensitive(".savereg")))
4517 return parseDirectiveSEHSaveReg(DirectiveID.getLoc());
4518 else if (IDVal == ".seh_savexmm" ||
4519 (Parser.isParsingMasm() && IDVal.equals_insensitive(".savexmm128")))
4520 return parseDirectiveSEHSaveXMM(DirectiveID.getLoc());
4521 else if (IDVal == ".seh_pushframe" ||
4522 (Parser.isParsingMasm() && IDVal.equals_insensitive(".pushframe")))
4523 return parseDirectiveSEHPushFrame(DirectiveID.getLoc());
4524
4525 return true;
4526}
4527
4528bool X86AsmParser::parseDirectiveArch() {
4529 // Ignore .arch for now.
4530 getParser().parseStringToEndOfStatement();
4531 return false;
4532}
4533
4534/// parseDirectiveNops
4535/// ::= .nops size[, control]
4536bool X86AsmParser::parseDirectiveNops(SMLoc L) {
4537 int64_t NumBytes = 0, Control = 0;
4538 SMLoc NumBytesLoc, ControlLoc;
4539 const MCSubtargetInfo& STI = getSTI();
4540 NumBytesLoc = getTok().getLoc();
4541 if (getParser().checkForValidSection() ||
4542 getParser().parseAbsoluteExpression(NumBytes))
4543 return true;
4544
4545 if (parseOptionalToken(AsmToken::Comma)) {
4546 ControlLoc = getTok().getLoc();
4547 if (getParser().parseAbsoluteExpression(Control))
4548 return true;
4549 }
4550 if (getParser().parseEOL())
4551 return true;
4552
4553 if (NumBytes <= 0) {
4554 Error(NumBytesLoc, "'.nops' directive with non-positive size");
4555 return false;
4556 }
4557
4558 if (Control < 0) {
4559 Error(ControlLoc, "'.nops' directive with negative NOP size");
4560 return false;
4561 }
4562
4563 /// Emit nops
4564 getParser().getStreamer().emitNops(NumBytes, Control, L, STI);
4565
4566 return false;
4567}
4568
4569/// parseDirectiveEven
4570/// ::= .even
4571bool X86AsmParser::parseDirectiveEven(SMLoc L) {
4572 if (parseEOL())
4573 return false;
4574
4575 const MCSection *Section = getStreamer().getCurrentSectionOnly();
4576 if (!Section) {
4577 getStreamer().initSections(false, getSTI());
4578 Section = getStreamer().getCurrentSectionOnly();
4579 }
4580 if (Section->useCodeAlign())
4581 getStreamer().emitCodeAlignment(Align(2), &getSTI(), 0);
4582 else
4583 getStreamer().emitValueToAlignment(Align(2), 0, 1, 0);
4584 return false;
4585}
4586
4587/// ParseDirectiveCode
4588/// ::= .code16 | .code32 | .code64
4589bool X86AsmParser::ParseDirectiveCode(StringRef IDVal, SMLoc L) {
4590 MCAsmParser &Parser = getParser();
4591 Code16GCC = false;
4592 if (IDVal == ".code16") {
4593 Parser.Lex();
4594 if (!is16BitMode()) {
4595 SwitchMode(X86::Is16Bit);
4596 getParser().getStreamer().emitAssemblerFlag(MCAF_Code16);
4597 }
4598 } else if (IDVal == ".code16gcc") {
4599 // .code16gcc parses as if in 32-bit mode, but emits code in 16-bit mode.
4600 Parser.Lex();
4601 Code16GCC = true;
4602 if (!is16BitMode()) {
4603 SwitchMode(X86::Is16Bit);
4604 getParser().getStreamer().emitAssemblerFlag(MCAF_Code16);
4605 }
4606 } else if (IDVal == ".code32") {
4607 Parser.Lex();
4608 if (!is32BitMode()) {
4609 SwitchMode(X86::Is32Bit);
4610 getParser().getStreamer().emitAssemblerFlag(MCAF_Code32);
4611 }
4612 } else if (IDVal == ".code64") {
4613 Parser.Lex();
4614 if (!is64BitMode()) {
4615 SwitchMode(X86::Is64Bit);
4616 getParser().getStreamer().emitAssemblerFlag(MCAF_Code64);
4617 }
4618 } else {
4619 Error(L, "unknown directive " + IDVal);
4620 return false;
4621 }
4622
4623 return false;
4624}
4625
4626// .cv_fpo_proc foo
4627bool X86AsmParser::parseDirectiveFPOProc(SMLoc L) {
4628 MCAsmParser &Parser = getParser();
4629 StringRef ProcName;
4630 int64_t ParamsSize;
4631 if (Parser.parseIdentifier(ProcName))
4632 return Parser.TokError("expected symbol name");
4633 if (Parser.parseIntToken(ParamsSize, "expected parameter byte count"))
4634 return true;
4635 if (!isUIntN(32, ParamsSize))
4636 return Parser.TokError("parameters size out of range");
4637 if (parseEOL())
4638 return true;
4639 MCSymbol *ProcSym = getContext().getOrCreateSymbol(ProcName);
4640 return getTargetStreamer().emitFPOProc(ProcSym, ParamsSize, L);
4641}
4642
4643// .cv_fpo_setframe ebp
4644bool X86AsmParser::parseDirectiveFPOSetFrame(SMLoc L) {
4646 SMLoc DummyLoc;
4647 if (parseRegister(Reg, DummyLoc, DummyLoc) || parseEOL())
4648 return true;
4649 return getTargetStreamer().emitFPOSetFrame(Reg, L);
4650}
4651
4652// .cv_fpo_pushreg ebx
4653bool X86AsmParser::parseDirectiveFPOPushReg(SMLoc L) {
4655 SMLoc DummyLoc;
4656 if (parseRegister(Reg, DummyLoc, DummyLoc) || parseEOL())
4657 return true;
4658 return getTargetStreamer().emitFPOPushReg(Reg, L);
4659}
4660
4661// .cv_fpo_stackalloc 20
4662bool X86AsmParser::parseDirectiveFPOStackAlloc(SMLoc L) {
4663 MCAsmParser &Parser = getParser();
4664 int64_t Offset;
4665 if (Parser.parseIntToken(Offset, "expected offset") || parseEOL())
4666 return true;
4667 return getTargetStreamer().emitFPOStackAlloc(Offset, L);
4668}
4669
4670// .cv_fpo_stackalign 8
4671bool X86AsmParser::parseDirectiveFPOStackAlign(SMLoc L) {
4672 MCAsmParser &Parser = getParser();
4673 int64_t Offset;
4674 if (Parser.parseIntToken(Offset, "expected offset") || parseEOL())
4675 return true;
4676 return getTargetStreamer().emitFPOStackAlign(Offset, L);
4677}
4678
4679// .cv_fpo_endprologue
4680bool X86AsmParser::parseDirectiveFPOEndPrologue(SMLoc L) {
4681 MCAsmParser &Parser = getParser();
4682 if (Parser.parseEOL())
4683 return true;
4684 return getTargetStreamer().emitFPOEndPrologue(L);
4685}
4686
4687// .cv_fpo_endproc
4688bool X86AsmParser::parseDirectiveFPOEndProc(SMLoc L) {
4689 MCAsmParser &Parser = getParser();
4690 if (Parser.parseEOL())
4691 return true;
4692 return getTargetStreamer().emitFPOEndProc(L);
4693}
4694
4695bool X86AsmParser::parseSEHRegisterNumber(unsigned RegClassID,
4696 MCRegister &RegNo) {
4697 SMLoc startLoc = getLexer().getLoc();
4698 const MCRegisterInfo *MRI = getContext().getRegisterInfo();
4699
4700 // Try parsing the argument as a register first.
4701 if (getLexer().getTok().isNot(AsmToken::Integer)) {
4702 SMLoc endLoc;
4703 if (parseRegister(RegNo, startLoc, endLoc))
4704 return true;
4705
4706 if (!X86MCRegisterClasses[RegClassID].contains(RegNo)) {
4707 return Error(startLoc,
4708 "register is not supported for use with this directive");
4709 }
4710 } else {
4711 // Otherwise, an integer number matching the encoding of the desired
4712 // register may appear.
4713 int64_t EncodedReg;
4714 if (getParser().parseAbsoluteExpression(EncodedReg))
4715 return true;
4716
4717 // The SEH register number is the same as the encoding register number. Map
4718 // from the encoding back to the LLVM register number.
4719 RegNo = 0;
4720 for (MCPhysReg Reg : X86MCRegisterClasses[RegClassID]) {
4721 if (MRI->getEncodingValue(Reg) == EncodedReg) {
4722 RegNo = Reg;
4723 break;
4724 }
4725 }
4726 if (RegNo == 0) {
4727 return Error(startLoc,
4728 "incorrect register number for use with this directive");
4729 }
4730 }
4731
4732 return false;
4733}
4734
4735bool X86AsmParser::parseDirectiveSEHPushReg(SMLoc Loc) {
4737 if (parseSEHRegisterNumber(X86::GR64RegClassID, Reg))
4738 return true;
4739
4740 if (getLexer().isNot(AsmToken::EndOfStatement))
4741 return TokError("expected end of directive");
4742
4743 getParser().Lex();
4744 getStreamer().emitWinCFIPushReg(Reg, Loc);
4745 return false;
4746}
4747
4748bool X86AsmParser::parseDirectiveSEHSetFrame(SMLoc Loc) {
4750 int64_t Off;
4751 if (parseSEHRegisterNumber(X86::GR64RegClassID, Reg))
4752 return true;
4753 if (getLexer().isNot(AsmToken::Comma))
4754 return TokError("you must specify a stack pointer offset");
4755
4756 getParser().Lex();
4757 if (getParser().parseAbsoluteExpression(Off))
4758 return true;
4759
4760 if (getLexer().isNot(AsmToken::EndOfStatement))
4761 return TokError("expected end of directive");
4762
4763 getParser().Lex();
4764 getStreamer().emitWinCFISetFrame(Reg, Off, Loc);
4765 return false;
4766}
4767
4768bool X86AsmParser::parseDirectiveSEHSaveReg(SMLoc Loc) {
4770 int64_t Off;
4771 if (parseSEHRegisterNumber(X86::GR64RegClassID, Reg))
4772 return true;
4773 if (getLexer().isNot(AsmToken::Comma))
4774 return TokError("you must specify an offset on the stack");
4775
4776 getParser().Lex();
4777 if (getParser().parseAbsoluteExpression(Off))
4778 return true;
4779
4780 if (getLexer().isNot(AsmToken::EndOfStatement))
4781 return TokError("expected end of directive");
4782
4783 getParser().Lex();
4784 getStreamer().emitWinCFISaveReg(Reg, Off, Loc);
4785 return false;
4786}
4787
4788bool X86AsmParser::parseDirectiveSEHSaveXMM(SMLoc Loc) {
4790 int64_t Off;
4791 if (parseSEHRegisterNumber(X86::VR128XRegClassID, Reg))
4792 return true;
4793 if (getLexer().isNot(AsmToken::Comma))
4794 return TokError("you must specify an offset on the stack");
4795
4796 getParser().Lex();
4797 if (getParser().parseAbsoluteExpression(Off))
4798 return true;
4799
4800 if (getLexer().isNot(AsmToken::EndOfStatement))
4801 return TokError("expected end of directive");
4802
4803 getParser().Lex();
4804 getStreamer().emitWinCFISaveXMM(Reg, Off, Loc);
4805 return false;
4806}
4807
4808bool X86AsmParser::parseDirectiveSEHPushFrame(SMLoc Loc) {
4809 bool Code = false;
4810 StringRef CodeID;
4811 if (getLexer().is(AsmToken::At)) {
4812 SMLoc startLoc = getLexer().getLoc();
4813 getParser().Lex();
4814 if (!getParser().parseIdentifier(CodeID)) {
4815 if (CodeID != "code")
4816 return Error(startLoc, "expected @code");
4817 Code = true;
4818 }
4819 }
4820
4821 if (getLexer().isNot(AsmToken::EndOfStatement))
4822 return TokError("expected end of directive");
4823
4824 getParser().Lex();
4825 getStreamer().emitWinCFIPushFrame(Code, Loc);
4826 return false;
4827}
4828
4829// Force static initialization.
4833}
4834
4835#define GET_MATCHER_IMPLEMENTATION
4836#include "X86GenAsmMatcher.inc"
unsigned const MachineRegisterInfo * MRI
static const char * getSubtargetFeatureName(uint64_t Val)
static unsigned MatchRegisterName(StringRef Name)
static bool isNot(const MachineRegisterInfo &MRI, const MachineInstr &MI)
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
Analysis containing CSE Info
Definition: CSEInfo.cpp:27
#define LLVM_EXTERNAL_VISIBILITY
Definition: Compiler.h:127
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
std::string Name
uint64_t Size
bool End
Definition: ELF_riscv.cpp:464
Symbol * Sym
Definition: ELF_riscv.cpp:463
static GCMetadataPrinterRegistry::Add< ErlangGCPrinter > X("erlang", "erlang-compatible garbage collector")
#define check(cond)
amode Optimize addressing mode
static ModuleSymbolTable::Symbol getSym(DataRefImpl &Symb)
#define RegName(no)
static LVOptions Options
Definition: LVOptions.cpp:25
#define I(x, y, z)
Definition: MD5.cpp:58
mir Rename Register Operands
static bool IsVCMP(unsigned Opcode)
static bool startswith(StringRef Magic, const char(&S)[N])
Definition: Magic.cpp:28
static GCMetadataPrinterRegistry::Add< OcamlGCMetadataPrinter > Y("ocaml", "ocaml 3.10-compatible collector")
uint64_t TSFlags
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
This file contains some templates that are useful if you are working with the STL at all.
raw_pwrite_stream & OS
This file defines the SmallString class.
This file defines the SmallVector class.
This file implements the StringSwitch template, which mimics a switch() statement whose cases are str...
static SymbolRef::Type getType(const Symbol *Sym)
Definition: TapiFile.cpp:40
@ Flags
Definition: TextStubV5.cpp:93
static bool contains(SmallPtrSetImpl< ConstantExpr * > &Cache, ConstantExpr *Expr, Constant *C)
Definition: Value.cpp:470
static cl::opt< bool > LVIInlineAsmHardening("x86-experimental-lvi-inline-asm-hardening", cl::desc("Harden inline assembly code that may be vulnerable to Load Value" " Injection (LVI). This feature is experimental."), cl::Hidden)
static bool checkScale(unsigned Scale, StringRef &ErrMsg)
LLVM_EXTERNAL_VISIBILITY void LLVMInitializeX86AsmParser()
static unsigned getPrefixes(OperandVector &Operands)
static bool CheckBaseRegAndIndexRegAndScale(unsigned BaseReg, unsigned IndexReg, unsigned Scale, bool Is64BitMode, StringRef &ErrMsg)
Value * RHS
Value * LHS
static unsigned getSize(unsigned Kind)
Class for arbitrary precision integers.
Definition: APInt.h:75
uint64_t getZExtValue() const
Get zero extended value.
Definition: APInt.h:1498
Target independent representation for an assembler token.
Definition: MCAsmMacro.h:21
SMLoc getLoc() const
Definition: MCAsmLexer.cpp:26
int64_t getIntVal() const
Definition: MCAsmMacro.h:115
bool isNot(TokenKind K) const
Definition: MCAsmMacro.h:83
StringRef getString() const
Get the string for the current token, this includes all characters (for example, the quotes on string...
Definition: MCAsmMacro.h:110
bool is(TokenKind K) const
Definition: MCAsmMacro.h:82
TokenKind getKind() const
Definition: MCAsmMacro.h:81
SMLoc getEndLoc() const
Definition: MCAsmLexer.cpp:30
StringRef getIdentifier() const
Get the identifier string for the current token, which should be an identifier or a string.
Definition: MCAsmMacro.h:99
Base class for user error types.
Definition: Error.h:348
Lightweight error class with error context and mandatory checking.
Definition: Error.h:156
Container class for subtarget features.
constexpr size_t size() const
An instruction for ordering other memory operations.
Definition: Instructions.h:436
Generic assembler lexer interface, for use by target specific assembly lexers.
Definition: MCAsmLexer.h:37
void UnLex(AsmToken const &Token)
Definition: MCAsmLexer.h:93
bool isNot(AsmToken::TokenKind K) const
Check if the current token has kind K.
Definition: MCAsmLexer.h:144
Generic assembler parser interface, for use by target specific assembly parsers.
Definition: MCAsmParser.h:123
virtual void eatToEndOfStatement()=0
Skip to the end of the current statement, for error recovery.
virtual MCStreamer & getStreamer()=0
Return the output streamer for the assembler.
virtual bool parseExpression(const MCExpr *&Res, SMLoc &EndLoc)=0
Parse an arbitrary expression.
virtual bool parsePrimaryExpr(const MCExpr *&Res, SMLoc &EndLoc, AsmTypeInfo *TypeInfo)=0
Parse a primary expression.
const AsmToken & getTok() const
Get the current AsmToken from the stream.
Definition: MCAsmParser.cpp:40
virtual bool isParsingMasm() const
Definition: MCAsmParser.h:187
virtual bool parseIdentifier(StringRef &Res)=0
Parse an identifier or string (as a quoted identifier) and set Res to the identifier contents.
bool parseOptionalToken(AsmToken::TokenKind T)
Attempt to parse and consume token, returning true on success.
Definition: MCAsmParser.cpp:80
bool parseIntToken(int64_t &V, const Twine &ErrMsg)
Definition: MCAsmParser.cpp:72
virtual const AsmToken & Lex()=0
Get the next AsmToken in the stream, possibly handling file inclusion first.
virtual unsigned getAssemblerDialect()
Definition: MCAsmParser.h:173
virtual void addAliasForDirective(StringRef Directive, StringRef Alias)=0
virtual bool lookUpType(StringRef Name, AsmTypeInfo &Info) const
Definition: MCAsmParser.h:199
bool TokError(const Twine &Msg, SMRange Range=std::nullopt)
Report an error at the current lexer location.
Definition: MCAsmParser.cpp:97
virtual bool parseAbsoluteExpression(int64_t &Res)=0
Parse an expression which must evaluate to an absolute value.
virtual bool lookUpField(StringRef Name, AsmFieldInfo &Info) const
Definition: MCAsmParser.h:191
bool parseTokenLoc(SMLoc &Loc)
Definition: MCAsmParser.cpp:44
virtual MCContext & getContext()=0
bool Error(SMLoc L, const Twine &Msg, SMRange Range=std::nullopt)
Return an error at the location L, with the message Msg.
static const MCBinaryExpr * createAdd(const MCExpr *LHS, const MCExpr *RHS, MCContext &Ctx)
Definition: MCExpr.h:525
static const MCConstantExpr * create(int64_t Value, MCContext &Ctx, bool PrintInHex=false, unsigned SizeInBytes=0)
Definition: MCExpr.cpp:194
Base class for the full range of assembler expressions which are needed for parsing.
Definition: MCExpr.h:35
@ SymbolRef
References to labels and assigned expressions.
Definition: MCExpr.h:40
ExprKind getKind() const
Definition: MCExpr.h:81
Instances of this class represent a single low-level machine instruction.
Definition: MCInst.h:184