LLVM 20.0.0git
X86AsmParser.cpp
Go to the documentation of this file.
1//===-- X86AsmParser.cpp - Parse X86 assembly to MCInst instructions ------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
16#include "X86AsmParserCommon.h"
17#include "X86Operand.h"
18#include "llvm/ADT/STLExtras.h"
22#include "llvm/ADT/Twine.h"
23#include "llvm/MC/MCContext.h"
24#include "llvm/MC/MCExpr.h"
25#include "llvm/MC/MCInst.h"
26#include "llvm/MC/MCInstrInfo.h"
32#include "llvm/MC/MCSection.h"
33#include "llvm/MC/MCStreamer.h"
35#include "llvm/MC/MCSymbol.h"
41#include <algorithm>
42#include <memory>
43
44using namespace llvm;
45
47 "x86-experimental-lvi-inline-asm-hardening",
48 cl::desc("Harden inline assembly code that may be vulnerable to Load Value"
49 " Injection (LVI). This feature is experimental."), cl::Hidden);
50
51static bool checkScale(unsigned Scale, StringRef &ErrMsg) {
52 if (Scale != 1 && Scale != 2 && Scale != 4 && Scale != 8) {
53 ErrMsg = "scale factor in address must be 1, 2, 4 or 8";
54 return true;
55 }
56 return false;
57}
58
59namespace {
60
61// Including the generated SSE2AVX compression tables.
62#define GET_X86_SSE2AVX_TABLE
63#include "X86GenInstrMapping.inc"
64
65static const char OpPrecedence[] = {
66 0, // IC_OR
67 1, // IC_XOR
68 2, // IC_AND
69 4, // IC_LSHIFT
70 4, // IC_RSHIFT
71 5, // IC_PLUS
72 5, // IC_MINUS
73 6, // IC_MULTIPLY
74 6, // IC_DIVIDE
75 6, // IC_MOD
76 7, // IC_NOT
77 8, // IC_NEG
78 9, // IC_RPAREN
79 10, // IC_LPAREN
80 0, // IC_IMM
81 0, // IC_REGISTER
82 3, // IC_EQ
83 3, // IC_NE
84 3, // IC_LT
85 3, // IC_LE
86 3, // IC_GT
87 3 // IC_GE
88};
89
90class X86AsmParser : public MCTargetAsmParser {
91 ParseInstructionInfo *InstInfo;
92 bool Code16GCC;
93 unsigned ForcedDataPrefix = 0;
94
95 enum OpcodePrefix {
96 OpcodePrefix_Default,
97 OpcodePrefix_REX,
98 OpcodePrefix_REX2,
99 OpcodePrefix_VEX,
100 OpcodePrefix_VEX2,
101 OpcodePrefix_VEX3,
102 OpcodePrefix_EVEX,
103 };
104
105 OpcodePrefix ForcedOpcodePrefix = OpcodePrefix_Default;
106
107 enum DispEncoding {
108 DispEncoding_Default,
109 DispEncoding_Disp8,
110 DispEncoding_Disp32,
111 };
112
113 DispEncoding ForcedDispEncoding = DispEncoding_Default;
114
115 // Does this instruction use apx extended register?
116 bool UseApxExtendedReg = false;
117 // Is this instruction explicitly required not to update flags?
118 bool ForcedNoFlag = false;
119
120private:
121 SMLoc consumeToken() {
122 MCAsmParser &Parser = getParser();
123 SMLoc Result = Parser.getTok().getLoc();
124 Parser.Lex();
125 return Result;
126 }
127
128 X86TargetStreamer &getTargetStreamer() {
129 assert(getParser().getStreamer().getTargetStreamer() &&
130 "do not have a target streamer");
132 return static_cast<X86TargetStreamer &>(TS);
133 }
134
135 unsigned MatchInstruction(const OperandVector &Operands, MCInst &Inst,
136 uint64_t &ErrorInfo, FeatureBitset &MissingFeatures,
137 bool matchingInlineAsm, unsigned VariantID = 0) {
138 // In Code16GCC mode, match as 32-bit.
139 if (Code16GCC)
140 SwitchMode(X86::Is32Bit);
141 unsigned rv = MatchInstructionImpl(Operands, Inst, ErrorInfo,
142 MissingFeatures, matchingInlineAsm,
143 VariantID);
144 if (Code16GCC)
145 SwitchMode(X86::Is16Bit);
146 return rv;
147 }
148
149 enum InfixCalculatorTok {
150 IC_OR = 0,
151 IC_XOR,
152 IC_AND,
153 IC_LSHIFT,
154 IC_RSHIFT,
155 IC_PLUS,
156 IC_MINUS,
157 IC_MULTIPLY,
158 IC_DIVIDE,
159 IC_MOD,
160 IC_NOT,
161 IC_NEG,
162 IC_RPAREN,
163 IC_LPAREN,
164 IC_IMM,
165 IC_REGISTER,
166 IC_EQ,
167 IC_NE,
168 IC_LT,
169 IC_LE,
170 IC_GT,
171 IC_GE
172 };
173
174 enum IntelOperatorKind {
175 IOK_INVALID = 0,
176 IOK_LENGTH,
177 IOK_SIZE,
178 IOK_TYPE,
179 };
180
181 enum MasmOperatorKind {
182 MOK_INVALID = 0,
183 MOK_LENGTHOF,
184 MOK_SIZEOF,
185 MOK_TYPE,
186 };
187
188 class InfixCalculator {
189 typedef std::pair< InfixCalculatorTok, int64_t > ICToken;
190 SmallVector<InfixCalculatorTok, 4> InfixOperatorStack;
191 SmallVector<ICToken, 4> PostfixStack;
192
193 bool isUnaryOperator(InfixCalculatorTok Op) const {
194 return Op == IC_NEG || Op == IC_NOT;
195 }
196
197 public:
198 int64_t popOperand() {
199 assert (!PostfixStack.empty() && "Poped an empty stack!");
200 ICToken Op = PostfixStack.pop_back_val();
201 if (!(Op.first == IC_IMM || Op.first == IC_REGISTER))
202 return -1; // The invalid Scale value will be caught later by checkScale
203 return Op.second;
204 }
205 void pushOperand(InfixCalculatorTok Op, int64_t Val = 0) {
206 assert ((Op == IC_IMM || Op == IC_REGISTER) &&
207 "Unexpected operand!");
208 PostfixStack.push_back(std::make_pair(Op, Val));
209 }
210
211 void popOperator() { InfixOperatorStack.pop_back(); }
212 void pushOperator(InfixCalculatorTok Op) {
213 // Push the new operator if the stack is empty.
214 if (InfixOperatorStack.empty()) {
215 InfixOperatorStack.push_back(Op);
216 return;
217 }
218
219 // Push the new operator if it has a higher precedence than the operator
220 // on the top of the stack or the operator on the top of the stack is a
221 // left parentheses.
222 unsigned Idx = InfixOperatorStack.size() - 1;
223 InfixCalculatorTok StackOp = InfixOperatorStack[Idx];
224 if (OpPrecedence[Op] > OpPrecedence[StackOp] || StackOp == IC_LPAREN) {
225 InfixOperatorStack.push_back(Op);
226 return;
227 }
228
229 // The operator on the top of the stack has higher precedence than the
230 // new operator.
231 unsigned ParenCount = 0;
232 while (true) {
233 // Nothing to process.
234 if (InfixOperatorStack.empty())
235 break;
236
237 Idx = InfixOperatorStack.size() - 1;
238 StackOp = InfixOperatorStack[Idx];
239 if (!(OpPrecedence[StackOp] >= OpPrecedence[Op] || ParenCount))
240 break;
241
242 // If we have an even parentheses count and we see a left parentheses,
243 // then stop processing.
244 if (!ParenCount && StackOp == IC_LPAREN)
245 break;
246
247 if (StackOp == IC_RPAREN) {
248 ++ParenCount;
249 InfixOperatorStack.pop_back();
250 } else if (StackOp == IC_LPAREN) {
251 --ParenCount;
252 InfixOperatorStack.pop_back();
253 } else {
254 InfixOperatorStack.pop_back();
255 PostfixStack.push_back(std::make_pair(StackOp, 0));
256 }
257 }
258 // Push the new operator.
259 InfixOperatorStack.push_back(Op);
260 }
261
262 int64_t execute() {
263 // Push any remaining operators onto the postfix stack.
264 while (!InfixOperatorStack.empty()) {
265 InfixCalculatorTok StackOp = InfixOperatorStack.pop_back_val();
266 if (StackOp != IC_LPAREN && StackOp != IC_RPAREN)
267 PostfixStack.push_back(std::make_pair(StackOp, 0));
268 }
269
270 if (PostfixStack.empty())
271 return 0;
272
273 SmallVector<ICToken, 16> OperandStack;
274 for (const ICToken &Op : PostfixStack) {
275 if (Op.first == IC_IMM || Op.first == IC_REGISTER) {
276 OperandStack.push_back(Op);
277 } else if (isUnaryOperator(Op.first)) {
278 assert (OperandStack.size() > 0 && "Too few operands.");
279 ICToken Operand = OperandStack.pop_back_val();
280 assert (Operand.first == IC_IMM &&
281 "Unary operation with a register!");
282 switch (Op.first) {
283 default:
284 report_fatal_error("Unexpected operator!");
285 break;
286 case IC_NEG:
287 OperandStack.push_back(std::make_pair(IC_IMM, -Operand.second));
288 break;
289 case IC_NOT:
290 OperandStack.push_back(std::make_pair(IC_IMM, ~Operand.second));
291 break;
292 }
293 } else {
294 assert (OperandStack.size() > 1 && "Too few operands.");
295 int64_t Val;
296 ICToken Op2 = OperandStack.pop_back_val();
297 ICToken Op1 = OperandStack.pop_back_val();
298 switch (Op.first) {
299 default:
300 report_fatal_error("Unexpected operator!");
301 break;
302 case IC_PLUS:
303 Val = Op1.second + Op2.second;
304 OperandStack.push_back(std::make_pair(IC_IMM, Val));
305 break;
306 case IC_MINUS:
307 Val = Op1.second - Op2.second;
308 OperandStack.push_back(std::make_pair(IC_IMM, Val));
309 break;
310 case IC_MULTIPLY:
311 assert (Op1.first == IC_IMM && Op2.first == IC_IMM &&
312 "Multiply operation with an immediate and a register!");
313 Val = Op1.second * Op2.second;
314 OperandStack.push_back(std::make_pair(IC_IMM, Val));
315 break;
316 case IC_DIVIDE:
317 assert (Op1.first == IC_IMM && Op2.first == IC_IMM &&
318 "Divide operation with an immediate and a register!");
319 assert (Op2.second != 0 && "Division by zero!");
320 Val = Op1.second / Op2.second;
321 OperandStack.push_back(std::make_pair(IC_IMM, Val));
322 break;
323 case IC_MOD:
324 assert (Op1.first == IC_IMM && Op2.first == IC_IMM &&
325 "Modulo operation with an immediate and a register!");
326 Val = Op1.second % Op2.second;
327 OperandStack.push_back(std::make_pair(IC_IMM, Val));
328 break;
329 case IC_OR:
330 assert (Op1.first == IC_IMM && Op2.first == IC_IMM &&
331 "Or operation with an immediate and a register!");
332 Val = Op1.second | Op2.second;
333 OperandStack.push_back(std::make_pair(IC_IMM, Val));
334 break;
335 case IC_XOR:
336 assert(Op1.first == IC_IMM && Op2.first == IC_IMM &&
337 "Xor operation with an immediate and a register!");
338 Val = Op1.second ^ Op2.second;
339 OperandStack.push_back(std::make_pair(IC_IMM, Val));
340 break;
341 case IC_AND:
342 assert (Op1.first == IC_IMM && Op2.first == IC_IMM &&
343 "And operation with an immediate and a register!");
344 Val = Op1.second & Op2.second;
345 OperandStack.push_back(std::make_pair(IC_IMM, Val));
346 break;
347 case IC_LSHIFT:
348 assert (Op1.first == IC_IMM && Op2.first == IC_IMM &&
349 "Left shift operation with an immediate and a register!");
350 Val = Op1.second << Op2.second;
351 OperandStack.push_back(std::make_pair(IC_IMM, Val));
352 break;
353 case IC_RSHIFT:
354 assert (Op1.first == IC_IMM && Op2.first == IC_IMM &&
355 "Right shift operation with an immediate and a register!");
356 Val = Op1.second >> Op2.second;
357 OperandStack.push_back(std::make_pair(IC_IMM, Val));
358 break;
359 case IC_EQ:
360 assert(Op1.first == IC_IMM && Op2.first == IC_IMM &&
361 "Equals operation with an immediate and a register!");
362 Val = (Op1.second == Op2.second) ? -1 : 0;
363 OperandStack.push_back(std::make_pair(IC_IMM, Val));
364 break;
365 case IC_NE:
366 assert(Op1.first == IC_IMM && Op2.first == IC_IMM &&
367 "Not-equals operation with an immediate and a register!");
368 Val = (Op1.second != Op2.second) ? -1 : 0;
369 OperandStack.push_back(std::make_pair(IC_IMM, Val));
370 break;
371 case IC_LT:
372 assert(Op1.first == IC_IMM && Op2.first == IC_IMM &&
373 "Less-than operation with an immediate and a register!");
374 Val = (Op1.second < Op2.second) ? -1 : 0;
375 OperandStack.push_back(std::make_pair(IC_IMM, Val));
376 break;
377 case IC_LE:
378 assert(Op1.first == IC_IMM && Op2.first == IC_IMM &&
379 "Less-than-or-equal operation with an immediate and a "
380 "register!");
381 Val = (Op1.second <= Op2.second) ? -1 : 0;
382 OperandStack.push_back(std::make_pair(IC_IMM, Val));
383 break;
384 case IC_GT:
385 assert(Op1.first == IC_IMM && Op2.first == IC_IMM &&
386 "Greater-than operation with an immediate and a register!");
387 Val = (Op1.second > Op2.second) ? -1 : 0;
388 OperandStack.push_back(std::make_pair(IC_IMM, Val));
389 break;
390 case IC_GE:
391 assert(Op1.first == IC_IMM && Op2.first == IC_IMM &&
392 "Greater-than-or-equal operation with an immediate and a "
393 "register!");
394 Val = (Op1.second >= Op2.second) ? -1 : 0;
395 OperandStack.push_back(std::make_pair(IC_IMM, Val));
396 break;
397 }
398 }
399 }
400 assert (OperandStack.size() == 1 && "Expected a single result.");
401 return OperandStack.pop_back_val().second;
402 }
403 };
404
405 enum IntelExprState {
406 IES_INIT,
407 IES_OR,
408 IES_XOR,
409 IES_AND,
410 IES_EQ,
411 IES_NE,
412 IES_LT,
413 IES_LE,
414 IES_GT,
415 IES_GE,
416 IES_LSHIFT,
417 IES_RSHIFT,
418 IES_PLUS,
419 IES_MINUS,
420 IES_OFFSET,
421 IES_CAST,
422 IES_NOT,
423 IES_MULTIPLY,
424 IES_DIVIDE,
425 IES_MOD,
426 IES_LBRAC,
427 IES_RBRAC,
428 IES_LPAREN,
429 IES_RPAREN,
430 IES_REGISTER,
431 IES_INTEGER,
432 IES_ERROR
433 };
434
435 class IntelExprStateMachine {
436 IntelExprState State = IES_INIT, PrevState = IES_ERROR;
437 unsigned BaseReg = 0, IndexReg = 0, TmpReg = 0, Scale = 0;
438 int64_t Imm = 0;
439 const MCExpr *Sym = nullptr;
440 StringRef SymName;
441 InfixCalculator IC;
443 short BracCount = 0;
444 bool MemExpr = false;
445 bool BracketUsed = false;
446 bool OffsetOperator = false;
447 bool AttachToOperandIdx = false;
448 bool IsPIC = false;
449 SMLoc OffsetOperatorLoc;
450 AsmTypeInfo CurType;
451
452 bool setSymRef(const MCExpr *Val, StringRef ID, StringRef &ErrMsg) {
453 if (Sym) {
454 ErrMsg = "cannot use more than one symbol in memory operand";
455 return true;
456 }
457 Sym = Val;
458 SymName = ID;
459 return false;
460 }
461
462 public:
463 IntelExprStateMachine() = default;
464
465 void addImm(int64_t imm) { Imm += imm; }
466 short getBracCount() const { return BracCount; }
467 bool isMemExpr() const { return MemExpr; }
468 bool isBracketUsed() const { return BracketUsed; }
469 bool isOffsetOperator() const { return OffsetOperator; }
470 SMLoc getOffsetLoc() const { return OffsetOperatorLoc; }
471 unsigned getBaseReg() const { return BaseReg; }
472 unsigned getIndexReg() const { return IndexReg; }
473 unsigned getScale() const { return Scale; }
474 const MCExpr *getSym() const { return Sym; }
475 StringRef getSymName() const { return SymName; }
476 StringRef getType() const { return CurType.Name; }
477 unsigned getSize() const { return CurType.Size; }
478 unsigned getElementSize() const { return CurType.ElementSize; }
479 unsigned getLength() const { return CurType.Length; }
480 int64_t getImm() { return Imm + IC.execute(); }
481 bool isValidEndState() const {
482 return State == IES_RBRAC || State == IES_RPAREN ||
483 State == IES_INTEGER || State == IES_REGISTER ||
484 State == IES_OFFSET;
485 }
486
487 // Is the intel expression appended after an operand index.
488 // [OperandIdx][Intel Expression]
489 // This is neccessary for checking if it is an independent
490 // intel expression at back end when parse inline asm.
491 void setAppendAfterOperand() { AttachToOperandIdx = true; }
492
493 bool isPIC() const { return IsPIC; }
494 void setPIC() { IsPIC = true; }
495
496 bool hadError() const { return State == IES_ERROR; }
497 const InlineAsmIdentifierInfo &getIdentifierInfo() const { return Info; }
498
499 bool regsUseUpError(StringRef &ErrMsg) {
500 // This case mostly happen in inline asm, e.g. Arr[BaseReg + IndexReg]
501 // can not intruduce additional register in inline asm in PIC model.
502 if (IsPIC && AttachToOperandIdx)
503 ErrMsg = "Don't use 2 or more regs for mem offset in PIC model!";
504 else
505 ErrMsg = "BaseReg/IndexReg already set!";
506 return true;
507 }
508
509 void onOr() {
510 IntelExprState CurrState = State;
511 switch (State) {
512 default:
513 State = IES_ERROR;
514 break;
515 case IES_INTEGER:
516 case IES_RPAREN:
517 case IES_REGISTER:
518 State = IES_OR;
519 IC.pushOperator(IC_OR);
520 break;
521 }
522 PrevState = CurrState;
523 }
524 void onXor() {
525 IntelExprState CurrState = State;
526 switch (State) {
527 default:
528 State = IES_ERROR;
529 break;
530 case IES_INTEGER:
531 case IES_RPAREN:
532 case IES_REGISTER:
533 State = IES_XOR;
534 IC.pushOperator(IC_XOR);
535 break;
536 }
537 PrevState = CurrState;
538 }
539 void onAnd() {
540 IntelExprState CurrState = State;
541 switch (State) {
542 default:
543 State = IES_ERROR;
544 break;
545 case IES_INTEGER:
546 case IES_RPAREN:
547 case IES_REGISTER:
548 State = IES_AND;
549 IC.pushOperator(IC_AND);
550 break;
551 }
552 PrevState = CurrState;
553 }
554 void onEq() {
555 IntelExprState CurrState = State;
556 switch (State) {
557 default:
558 State = IES_ERROR;
559 break;
560 case IES_INTEGER:
561 case IES_RPAREN:
562 case IES_REGISTER:
563 State = IES_EQ;
564 IC.pushOperator(IC_EQ);
565 break;
566 }
567 PrevState = CurrState;
568 }
569 void onNE() {
570 IntelExprState CurrState = State;
571 switch (State) {
572 default:
573 State = IES_ERROR;
574 break;
575 case IES_INTEGER:
576 case IES_RPAREN:
577 case IES_REGISTER:
578 State = IES_NE;
579 IC.pushOperator(IC_NE);
580 break;
581 }
582 PrevState = CurrState;
583 }
584 void onLT() {
585 IntelExprState CurrState = State;
586 switch (State) {
587 default:
588 State = IES_ERROR;
589 break;
590 case IES_INTEGER:
591 case IES_RPAREN:
592 case IES_REGISTER:
593 State = IES_LT;
594 IC.pushOperator(IC_LT);
595 break;
596 }
597 PrevState = CurrState;
598 }
599 void onLE() {
600 IntelExprState CurrState = State;
601 switch (State) {
602 default:
603 State = IES_ERROR;
604 break;
605 case IES_INTEGER:
606 case IES_RPAREN:
607 case IES_REGISTER:
608 State = IES_LE;
609 IC.pushOperator(IC_LE);
610 break;
611 }
612 PrevState = CurrState;
613 }
614 void onGT() {
615 IntelExprState CurrState = State;
616 switch (State) {
617 default:
618 State = IES_ERROR;
619 break;
620 case IES_INTEGER:
621 case IES_RPAREN:
622 case IES_REGISTER:
623 State = IES_GT;
624 IC.pushOperator(IC_GT);
625 break;
626 }
627 PrevState = CurrState;
628 }
629 void onGE() {
630 IntelExprState CurrState = State;
631 switch (State) {
632 default:
633 State = IES_ERROR;
634 break;
635 case IES_INTEGER:
636 case IES_RPAREN:
637 case IES_REGISTER:
638 State = IES_GE;
639 IC.pushOperator(IC_GE);
640 break;
641 }
642 PrevState = CurrState;
643 }
644 void onLShift() {
645 IntelExprState CurrState = State;
646 switch (State) {
647 default:
648 State = IES_ERROR;
649 break;
650 case IES_INTEGER:
651 case IES_RPAREN:
652 case IES_REGISTER:
653 State = IES_LSHIFT;
654 IC.pushOperator(IC_LSHIFT);
655 break;
656 }
657 PrevState = CurrState;
658 }
659 void onRShift() {
660 IntelExprState CurrState = State;
661 switch (State) {
662 default:
663 State = IES_ERROR;
664 break;
665 case IES_INTEGER:
666 case IES_RPAREN:
667 case IES_REGISTER:
668 State = IES_RSHIFT;
669 IC.pushOperator(IC_RSHIFT);
670 break;
671 }
672 PrevState = CurrState;
673 }
674 bool onPlus(StringRef &ErrMsg) {
675 IntelExprState CurrState = State;
676 switch (State) {
677 default:
678 State = IES_ERROR;
679 break;
680 case IES_INTEGER:
681 case IES_RPAREN:
682 case IES_REGISTER:
683 case IES_OFFSET:
684 State = IES_PLUS;
685 IC.pushOperator(IC_PLUS);
686 if (CurrState == IES_REGISTER && PrevState != IES_MULTIPLY) {
687 // If we already have a BaseReg, then assume this is the IndexReg with
688 // no explicit scale.
689 if (!BaseReg) {
690 BaseReg = TmpReg;
691 } else {
692 if (IndexReg)
693 return regsUseUpError(ErrMsg);
694 IndexReg = TmpReg;
695 Scale = 0;
696 }
697 }
698 break;
699 }
700 PrevState = CurrState;
701 return false;
702 }
703 bool onMinus(StringRef &ErrMsg) {
704 IntelExprState CurrState = State;
705 switch (State) {
706 default:
707 State = IES_ERROR;
708 break;
709 case IES_OR:
710 case IES_XOR:
711 case IES_AND:
712 case IES_EQ:
713 case IES_NE:
714 case IES_LT:
715 case IES_LE:
716 case IES_GT:
717 case IES_GE:
718 case IES_LSHIFT:
719 case IES_RSHIFT:
720 case IES_PLUS:
721 case IES_NOT:
722 case IES_MULTIPLY:
723 case IES_DIVIDE:
724 case IES_MOD:
725 case IES_LPAREN:
726 case IES_RPAREN:
727 case IES_LBRAC:
728 case IES_RBRAC:
729 case IES_INTEGER:
730 case IES_REGISTER:
731 case IES_INIT:
732 case IES_OFFSET:
733 State = IES_MINUS;
734 // push minus operator if it is not a negate operator
735 if (CurrState == IES_REGISTER || CurrState == IES_RPAREN ||
736 CurrState == IES_INTEGER || CurrState == IES_RBRAC ||
737 CurrState == IES_OFFSET)
738 IC.pushOperator(IC_MINUS);
739 else if (PrevState == IES_REGISTER && CurrState == IES_MULTIPLY) {
740 // We have negate operator for Scale: it's illegal
741 ErrMsg = "Scale can't be negative";
742 return true;
743 } else
744 IC.pushOperator(IC_NEG);
745 if (CurrState == IES_REGISTER && PrevState != IES_MULTIPLY) {
746 // If we already have a BaseReg, then assume this is the IndexReg with
747 // no explicit scale.
748 if (!BaseReg) {
749 BaseReg = TmpReg;
750 } else {
751 if (IndexReg)
752 return regsUseUpError(ErrMsg);
753 IndexReg = TmpReg;
754 Scale = 0;
755 }
756 }
757 break;
758 }
759 PrevState = CurrState;
760 return false;
761 }
762 void onNot() {
763 IntelExprState CurrState = State;
764 switch (State) {
765 default:
766 State = IES_ERROR;
767 break;
768 case IES_OR:
769 case IES_XOR:
770 case IES_AND:
771 case IES_EQ:
772 case IES_NE:
773 case IES_LT:
774 case IES_LE:
775 case IES_GT:
776 case IES_GE:
777 case IES_LSHIFT:
778 case IES_RSHIFT:
779 case IES_PLUS:
780 case IES_MINUS:
781 case IES_NOT:
782 case IES_MULTIPLY:
783 case IES_DIVIDE:
784 case IES_MOD:
785 case IES_LPAREN:
786 case IES_LBRAC:
787 case IES_INIT:
788 State = IES_NOT;
789 IC.pushOperator(IC_NOT);
790 break;
791 }
792 PrevState = CurrState;
793 }
794 bool onRegister(unsigned Reg, StringRef &ErrMsg) {
795 IntelExprState CurrState = State;
796 switch (State) {
797 default:
798 State = IES_ERROR;
799 break;
800 case IES_PLUS:
801 case IES_LPAREN:
802 case IES_LBRAC:
803 State = IES_REGISTER;
804 TmpReg = Reg;
805 IC.pushOperand(IC_REGISTER);
806 break;
807 case IES_MULTIPLY:
808 // Index Register - Scale * Register
809 if (PrevState == IES_INTEGER) {
810 if (IndexReg)
811 return regsUseUpError(ErrMsg);
812 State = IES_REGISTER;
813 IndexReg = Reg;
814 // Get the scale and replace the 'Scale * Register' with '0'.
815 Scale = IC.popOperand();
816 if (checkScale(Scale, ErrMsg))
817 return true;
818 IC.pushOperand(IC_IMM);
819 IC.popOperator();
820 } else {
821 State = IES_ERROR;
822 }
823 break;
824 }
825 PrevState = CurrState;
826 return false;
827 }
828 bool onIdentifierExpr(const MCExpr *SymRef, StringRef SymRefName,
829 const InlineAsmIdentifierInfo &IDInfo,
830 const AsmTypeInfo &Type, bool ParsingMSInlineAsm,
831 StringRef &ErrMsg) {
832 // InlineAsm: Treat an enum value as an integer
833 if (ParsingMSInlineAsm)
835 return onInteger(IDInfo.Enum.EnumVal, ErrMsg);
836 // Treat a symbolic constant like an integer
837 if (auto *CE = dyn_cast<MCConstantExpr>(SymRef))
838 return onInteger(CE->getValue(), ErrMsg);
839 PrevState = State;
840 switch (State) {
841 default:
842 State = IES_ERROR;
843 break;
844 case IES_CAST:
845 case IES_PLUS:
846 case IES_MINUS:
847 case IES_NOT:
848 case IES_INIT:
849 case IES_LBRAC:
850 case IES_LPAREN:
851 if (setSymRef(SymRef, SymRefName, ErrMsg))
852 return true;
853 MemExpr = true;
854 State = IES_INTEGER;
855 IC.pushOperand(IC_IMM);
856 if (ParsingMSInlineAsm)
857 Info = IDInfo;
858 setTypeInfo(Type);
859 break;
860 }
861 return false;
862 }
863 bool onInteger(int64_t TmpInt, StringRef &ErrMsg) {
864 IntelExprState CurrState = State;
865 switch (State) {
866 default:
867 State = IES_ERROR;
868 break;
869 case IES_PLUS:
870 case IES_MINUS:
871 case IES_NOT:
872 case IES_OR:
873 case IES_XOR:
874 case IES_AND:
875 case IES_EQ:
876 case IES_NE:
877 case IES_LT:
878 case IES_LE:
879 case IES_GT:
880 case IES_GE:
881 case IES_LSHIFT:
882 case IES_RSHIFT:
883 case IES_DIVIDE:
884 case IES_MOD:
885 case IES_MULTIPLY:
886 case IES_LPAREN:
887 case IES_INIT:
888 case IES_LBRAC:
889 State = IES_INTEGER;
890 if (PrevState == IES_REGISTER && CurrState == IES_MULTIPLY) {
891 // Index Register - Register * Scale
892 if (IndexReg)
893 return regsUseUpError(ErrMsg);
894 IndexReg = TmpReg;
895 Scale = TmpInt;
896 if (checkScale(Scale, ErrMsg))
897 return true;
898 // Get the scale and replace the 'Register * Scale' with '0'.
899 IC.popOperator();
900 } else {
901 IC.pushOperand(IC_IMM, TmpInt);
902 }
903 break;
904 }
905 PrevState = CurrState;
906 return false;
907 }
908 void onStar() {
909 PrevState = State;
910 switch (State) {
911 default:
912 State = IES_ERROR;
913 break;
914 case IES_INTEGER:
915 case IES_REGISTER:
916 case IES_RPAREN:
917 State = IES_MULTIPLY;
918 IC.pushOperator(IC_MULTIPLY);
919 break;
920 }
921 }
922 void onDivide() {
923 PrevState = State;
924 switch (State) {
925 default:
926 State = IES_ERROR;
927 break;
928 case IES_INTEGER:
929 case IES_RPAREN:
930 State = IES_DIVIDE;
931 IC.pushOperator(IC_DIVIDE);
932 break;
933 }
934 }
935 void onMod() {
936 PrevState = State;
937 switch (State) {
938 default:
939 State = IES_ERROR;
940 break;
941 case IES_INTEGER:
942 case IES_RPAREN:
943 State = IES_MOD;
944 IC.pushOperator(IC_MOD);
945 break;
946 }
947 }
948 bool onLBrac() {
949 if (BracCount)
950 return true;
951 PrevState = State;
952 switch (State) {
953 default:
954 State = IES_ERROR;
955 break;
956 case IES_RBRAC:
957 case IES_INTEGER:
958 case IES_RPAREN:
959 State = IES_PLUS;
960 IC.pushOperator(IC_PLUS);
961 CurType.Length = 1;
962 CurType.Size = CurType.ElementSize;
963 break;
964 case IES_INIT:
965 case IES_CAST:
966 assert(!BracCount && "BracCount should be zero on parsing's start");
967 State = IES_LBRAC;
968 break;
969 }
970 MemExpr = true;
971 BracketUsed = true;
972 BracCount++;
973 return false;
974 }
975 bool onRBrac(StringRef &ErrMsg) {
976 IntelExprState CurrState = State;
977 switch (State) {
978 default:
979 State = IES_ERROR;
980 break;
981 case IES_INTEGER:
982 case IES_OFFSET:
983 case IES_REGISTER:
984 case IES_RPAREN:
985 if (BracCount-- != 1) {
986 ErrMsg = "unexpected bracket encountered";
987 return true;
988 }
989 State = IES_RBRAC;
990 if (CurrState == IES_REGISTER && PrevState != IES_MULTIPLY) {
991 // If we already have a BaseReg, then assume this is the IndexReg with
992 // no explicit scale.
993 if (!BaseReg) {
994 BaseReg = TmpReg;
995 } else {
996 if (IndexReg)
997 return regsUseUpError(ErrMsg);
998 IndexReg = TmpReg;
999 Scale = 0;
1000 }
1001 }
1002 break;
1003 }
1004 PrevState = CurrState;
1005 return false;
1006 }
1007 void onLParen() {
1008 IntelExprState CurrState = State;
1009 switch (State) {
1010 default:
1011 State = IES_ERROR;
1012 break;
1013 case IES_PLUS:
1014 case IES_MINUS:
1015 case IES_NOT:
1016 case IES_OR:
1017 case IES_XOR:
1018 case IES_AND:
1019 case IES_EQ:
1020 case IES_NE:
1021 case IES_LT:
1022 case IES_LE:
1023 case IES_GT:
1024 case IES_GE:
1025 case IES_LSHIFT:
1026 case IES_RSHIFT:
1027 case IES_MULTIPLY:
1028 case IES_DIVIDE:
1029 case IES_MOD:
1030 case IES_LPAREN:
1031 case IES_INIT:
1032 case IES_LBRAC:
1033 State = IES_LPAREN;
1034 IC.pushOperator(IC_LPAREN);
1035 break;
1036 }
1037 PrevState = CurrState;
1038 }
1039 void onRParen() {
1040 PrevState = State;
1041 switch (State) {
1042 default:
1043 State = IES_ERROR;
1044 break;
1045 case IES_INTEGER:
1046 case IES_OFFSET:
1047 case IES_REGISTER:
1048 case IES_RBRAC:
1049 case IES_RPAREN:
1050 State = IES_RPAREN;
1051 IC.pushOperator(IC_RPAREN);
1052 break;
1053 }
1054 }
1055 bool onOffset(const MCExpr *Val, SMLoc OffsetLoc, StringRef ID,
1056 const InlineAsmIdentifierInfo &IDInfo,
1057 bool ParsingMSInlineAsm, StringRef &ErrMsg) {
1058 PrevState = State;
1059 switch (State) {
1060 default:
1061 ErrMsg = "unexpected offset operator expression";
1062 return true;
1063 case IES_PLUS:
1064 case IES_INIT:
1065 case IES_LBRAC:
1066 if (setSymRef(Val, ID, ErrMsg))
1067 return true;
1068 OffsetOperator = true;
1069 OffsetOperatorLoc = OffsetLoc;
1070 State = IES_OFFSET;
1071 // As we cannot yet resolve the actual value (offset), we retain
1072 // the requested semantics by pushing a '0' to the operands stack
1073 IC.pushOperand(IC_IMM);
1074 if (ParsingMSInlineAsm) {
1075 Info = IDInfo;
1076 }
1077 break;
1078 }
1079 return false;
1080 }
1081 void onCast(AsmTypeInfo Info) {
1082 PrevState = State;
1083 switch (State) {
1084 default:
1085 State = IES_ERROR;
1086 break;
1087 case IES_LPAREN:
1088 setTypeInfo(Info);
1089 State = IES_CAST;
1090 break;
1091 }
1092 }
1093 void setTypeInfo(AsmTypeInfo Type) { CurType = Type; }
1094 };
1095
1096 bool Error(SMLoc L, const Twine &Msg, SMRange Range = std::nullopt,
1097 bool MatchingInlineAsm = false) {
1098 MCAsmParser &Parser = getParser();
1099 if (MatchingInlineAsm) {
1100 if (!getLexer().isAtStartOfStatement())
1101 Parser.eatToEndOfStatement();
1102 return false;
1103 }
1104 return Parser.Error(L, Msg, Range);
1105 }
1106
1107 bool MatchRegisterByName(MCRegister &RegNo, StringRef RegName, SMLoc StartLoc,
1108 SMLoc EndLoc);
1109 bool ParseRegister(MCRegister &RegNo, SMLoc &StartLoc, SMLoc &EndLoc,
1110 bool RestoreOnFailure);
1111
1112 std::unique_ptr<X86Operand> DefaultMemSIOperand(SMLoc Loc);
1113 std::unique_ptr<X86Operand> DefaultMemDIOperand(SMLoc Loc);
1114 bool IsSIReg(unsigned Reg);
1115 unsigned GetSIDIForRegClass(unsigned RegClassID, unsigned Reg, bool IsSIReg);
1116 void
1117 AddDefaultSrcDestOperands(OperandVector &Operands,
1118 std::unique_ptr<llvm::MCParsedAsmOperand> &&Src,
1119 std::unique_ptr<llvm::MCParsedAsmOperand> &&Dst);
1120 bool VerifyAndAdjustOperands(OperandVector &OrigOperands,
1121 OperandVector &FinalOperands);
1122 bool parseOperand(OperandVector &Operands, StringRef Name);
1123 bool parseATTOperand(OperandVector &Operands);
1124 bool parseIntelOperand(OperandVector &Operands, StringRef Name);
1125 bool ParseIntelOffsetOperator(const MCExpr *&Val, StringRef &ID,
1127 bool ParseIntelDotOperator(IntelExprStateMachine &SM, SMLoc &End);
1128 unsigned IdentifyIntelInlineAsmOperator(StringRef Name);
1129 unsigned ParseIntelInlineAsmOperator(unsigned OpKind);
1130 unsigned IdentifyMasmOperator(StringRef Name);
1131 bool ParseMasmOperator(unsigned OpKind, int64_t &Val);
1132 bool ParseRoundingModeOp(SMLoc Start, OperandVector &Operands);
1133 bool parseCFlagsOp(OperandVector &Operands);
1134 bool ParseIntelNamedOperator(StringRef Name, IntelExprStateMachine &SM,
1135 bool &ParseError, SMLoc &End);
1136 bool ParseMasmNamedOperator(StringRef Name, IntelExprStateMachine &SM,
1137 bool &ParseError, SMLoc &End);
1138 void RewriteIntelExpression(IntelExprStateMachine &SM, SMLoc Start,
1139 SMLoc End);
1140 bool ParseIntelExpression(IntelExprStateMachine &SM, SMLoc &End);
1141 bool ParseIntelInlineAsmIdentifier(const MCExpr *&Val, StringRef &Identifier,
1143 bool IsUnevaluatedOperand, SMLoc &End,
1144 bool IsParsingOffsetOperator = false);
1145 void tryParseOperandIdx(AsmToken::TokenKind PrevTK,
1146 IntelExprStateMachine &SM);
1147
1148 bool ParseMemOperand(unsigned SegReg, const MCExpr *Disp, SMLoc StartLoc,
1149 SMLoc EndLoc, OperandVector &Operands);
1150
1151 X86::CondCode ParseConditionCode(StringRef CCode);
1152
1153 bool ParseIntelMemoryOperandSize(unsigned &Size);
1154 bool CreateMemForMSInlineAsm(unsigned SegReg, const MCExpr *Disp,
1155 unsigned BaseReg, unsigned IndexReg,
1156 unsigned Scale, bool NonAbsMem, SMLoc Start,
1157 SMLoc End, unsigned Size, StringRef Identifier,
1158 const InlineAsmIdentifierInfo &Info,
1160
1161 bool parseDirectiveArch();
1162 bool parseDirectiveNops(SMLoc L);
1163 bool parseDirectiveEven(SMLoc L);
1164 bool ParseDirectiveCode(StringRef IDVal, SMLoc L);
1165
1166 /// CodeView FPO data directives.
1167 bool parseDirectiveFPOProc(SMLoc L);
1168 bool parseDirectiveFPOSetFrame(SMLoc L);
1169 bool parseDirectiveFPOPushReg(SMLoc L);
1170 bool parseDirectiveFPOStackAlloc(SMLoc L);
1171 bool parseDirectiveFPOStackAlign(SMLoc L);
1172 bool parseDirectiveFPOEndPrologue(SMLoc L);
1173 bool parseDirectiveFPOEndProc(SMLoc L);
1174
1175 /// SEH directives.
1176 bool parseSEHRegisterNumber(unsigned RegClassID, MCRegister &RegNo);
1177 bool parseDirectiveSEHPushReg(SMLoc);
1178 bool parseDirectiveSEHSetFrame(SMLoc);
1179 bool parseDirectiveSEHSaveReg(SMLoc);
1180 bool parseDirectiveSEHSaveXMM(SMLoc);
1181 bool parseDirectiveSEHPushFrame(SMLoc);
1182
1183 unsigned checkTargetMatchPredicate(MCInst &Inst) override;
1184
1185 bool validateInstruction(MCInst &Inst, const OperandVector &Ops);
1186 bool processInstruction(MCInst &Inst, const OperandVector &Ops);
1187
1188 // Load Value Injection (LVI) Mitigations for machine code
1189 void emitWarningForSpecialLVIInstruction(SMLoc Loc);
1190 void applyLVICFIMitigation(MCInst &Inst, MCStreamer &Out);
1191 void applyLVILoadHardeningMitigation(MCInst &Inst, MCStreamer &Out);
1192
1193 /// Wrapper around MCStreamer::emitInstruction(). Possibly adds
1194 /// instrumentation around Inst.
1196
1197 bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
1200 bool MatchingInlineAsm) override;
1201
1202 void MatchFPUWaitAlias(SMLoc IDLoc, X86Operand &Op, OperandVector &Operands,
1203 MCStreamer &Out, bool MatchingInlineAsm);
1204
1205 bool ErrorMissingFeature(SMLoc IDLoc, const FeatureBitset &MissingFeatures,
1206 bool MatchingInlineAsm);
1207
1208 bool matchAndEmitATTInstruction(SMLoc IDLoc, unsigned &Opcode, MCInst &Inst,
1210 uint64_t &ErrorInfo, bool MatchingInlineAsm);
1211
1212 bool matchAndEmitIntelInstruction(SMLoc IDLoc, unsigned &Opcode, MCInst &Inst,
1215 bool MatchingInlineAsm);
1216
1217 bool OmitRegisterFromClobberLists(unsigned RegNo) override;
1218
1219 /// Parses AVX512 specific operand primitives: masked registers ({%k<NUM>}, {z})
1220 /// and memory broadcasting ({1to<NUM>}) primitives, updating Operands vector if required.
1221 /// return false if no parsing errors occurred, true otherwise.
1222 bool HandleAVX512Operand(OperandVector &Operands);
1223
1224 bool ParseZ(std::unique_ptr<X86Operand> &Z, const SMLoc &StartLoc);
1225
1226 bool is64BitMode() const {
1227 // FIXME: Can tablegen auto-generate this?
1228 return getSTI().hasFeature(X86::Is64Bit);
1229 }
1230 bool is32BitMode() const {
1231 // FIXME: Can tablegen auto-generate this?
1232 return getSTI().hasFeature(X86::Is32Bit);
1233 }
1234 bool is16BitMode() const {
1235 // FIXME: Can tablegen auto-generate this?
1236 return getSTI().hasFeature(X86::Is16Bit);
1237 }
1238 void SwitchMode(unsigned mode) {
1239 MCSubtargetInfo &STI = copySTI();
1240 FeatureBitset AllModes({X86::Is64Bit, X86::Is32Bit, X86::Is16Bit});
1241 FeatureBitset OldMode = STI.getFeatureBits() & AllModes;
1242 FeatureBitset FB = ComputeAvailableFeatures(
1243 STI.ToggleFeature(OldMode.flip(mode)));
1245
1246 assert(FeatureBitset({mode}) == (STI.getFeatureBits() & AllModes));
1247 }
1248
1249 unsigned getPointerWidth() {
1250 if (is16BitMode()) return 16;
1251 if (is32BitMode()) return 32;
1252 if (is64BitMode()) return 64;
1253 llvm_unreachable("invalid mode");
1254 }
1255
1256 bool isParsingIntelSyntax() {
1257 return getParser().getAssemblerDialect();
1258 }
1259
1260 /// @name Auto-generated Matcher Functions
1261 /// {
1262
1263#define GET_ASSEMBLER_HEADER
1264#include "X86GenAsmMatcher.inc"
1265
1266 /// }
1267
1268public:
1269 enum X86MatchResultTy {
1270 Match_Unsupported = FIRST_TARGET_MATCH_RESULT_TY,
1271#define GET_OPERAND_DIAGNOSTIC_TYPES
1272#include "X86GenAsmMatcher.inc"
1273 };
1274
1275 X86AsmParser(const MCSubtargetInfo &sti, MCAsmParser &Parser,
1276 const MCInstrInfo &mii, const MCTargetOptions &Options)
1277 : MCTargetAsmParser(Options, sti, mii), InstInfo(nullptr),
1278 Code16GCC(false) {
1279
1280 Parser.addAliasForDirective(".word", ".2byte");
1281
1282 // Initialize the set of available features.
1283 setAvailableFeatures(ComputeAvailableFeatures(getSTI().getFeatureBits()));
1284 }
1285
1286 bool parseRegister(MCRegister &Reg, SMLoc &StartLoc, SMLoc &EndLoc) override;
1288 SMLoc &EndLoc) override;
1289
1290 bool parsePrimaryExpr(const MCExpr *&Res, SMLoc &EndLoc) override;
1291
1293 SMLoc NameLoc, OperandVector &Operands) override;
1294
1295 bool ParseDirective(AsmToken DirectiveID) override;
1296};
1297} // end anonymous namespace
1298
1299#define GET_REGISTER_MATCHER
1300#define GET_SUBTARGET_FEATURE_NAME
1301#include "X86GenAsmMatcher.inc"
1302
1303static bool CheckBaseRegAndIndexRegAndScale(unsigned BaseReg, unsigned IndexReg,
1304 unsigned Scale, bool Is64BitMode,
1305 StringRef &ErrMsg) {
1306 // If we have both a base register and an index register make sure they are
1307 // both 64-bit or 32-bit registers.
1308 // To support VSIB, IndexReg can be 128-bit or 256-bit registers.
1309
1310 if (BaseReg != 0 &&
1311 !(BaseReg == X86::RIP || BaseReg == X86::EIP ||
1312 X86MCRegisterClasses[X86::GR16RegClassID].contains(BaseReg) ||
1313 X86MCRegisterClasses[X86::GR32RegClassID].contains(BaseReg) ||
1314 X86MCRegisterClasses[X86::GR64RegClassID].contains(BaseReg))) {
1315 ErrMsg = "invalid base+index expression";
1316 return true;
1317 }
1318
1319 if (IndexReg != 0 &&
1320 !(IndexReg == X86::EIZ || IndexReg == X86::RIZ ||
1321 X86MCRegisterClasses[X86::GR16RegClassID].contains(IndexReg) ||
1322 X86MCRegisterClasses[X86::GR32RegClassID].contains(IndexReg) ||
1323 X86MCRegisterClasses[X86::GR64RegClassID].contains(IndexReg) ||
1324 X86MCRegisterClasses[X86::VR128XRegClassID].contains(IndexReg) ||
1325 X86MCRegisterClasses[X86::VR256XRegClassID].contains(IndexReg) ||
1326 X86MCRegisterClasses[X86::VR512RegClassID].contains(IndexReg))) {
1327 ErrMsg = "invalid base+index expression";
1328 return true;
1329 }
1330
1331 if (((BaseReg == X86::RIP || BaseReg == X86::EIP) && IndexReg != 0) ||
1332 IndexReg == X86::EIP || IndexReg == X86::RIP ||
1333 IndexReg == X86::ESP || IndexReg == X86::RSP) {
1334 ErrMsg = "invalid base+index expression";
1335 return true;
1336 }
1337
1338 // Check for use of invalid 16-bit registers. Only BX/BP/SI/DI are allowed,
1339 // and then only in non-64-bit modes.
1340 if (X86MCRegisterClasses[X86::GR16RegClassID].contains(BaseReg) &&
1341 (Is64BitMode || (BaseReg != X86::BX && BaseReg != X86::BP &&
1342 BaseReg != X86::SI && BaseReg != X86::DI))) {
1343 ErrMsg = "invalid 16-bit base register";
1344 return true;
1345 }
1346
1347 if (BaseReg == 0 &&
1348 X86MCRegisterClasses[X86::GR16RegClassID].contains(IndexReg)) {
1349 ErrMsg = "16-bit memory operand may not include only index register";
1350 return true;
1351 }
1352
1353 if (BaseReg != 0 && IndexReg != 0) {
1354 if (X86MCRegisterClasses[X86::GR64RegClassID].contains(BaseReg) &&
1355 (X86MCRegisterClasses[X86::GR16RegClassID].contains(IndexReg) ||
1356 X86MCRegisterClasses[X86::GR32RegClassID].contains(IndexReg) ||
1357 IndexReg == X86::EIZ)) {
1358 ErrMsg = "base register is 64-bit, but index register is not";
1359 return true;
1360 }
1361 if (X86MCRegisterClasses[X86::GR32RegClassID].contains(BaseReg) &&
1362 (X86MCRegisterClasses[X86::GR16RegClassID].contains(IndexReg) ||
1363 X86MCRegisterClasses[X86::GR64RegClassID].contains(IndexReg) ||
1364 IndexReg == X86::RIZ)) {
1365 ErrMsg = "base register is 32-bit, but index register is not";
1366 return true;
1367 }
1368 if (X86MCRegisterClasses[X86::GR16RegClassID].contains(BaseReg)) {
1369 if (X86MCRegisterClasses[X86::GR32RegClassID].contains(IndexReg) ||
1370 X86MCRegisterClasses[X86::GR64RegClassID].contains(IndexReg)) {
1371 ErrMsg = "base register is 16-bit, but index register is not";
1372 return true;
1373 }
1374 if ((BaseReg != X86::BX && BaseReg != X86::BP) ||
1375 (IndexReg != X86::SI && IndexReg != X86::DI)) {
1376 ErrMsg = "invalid 16-bit base/index register combination";
1377 return true;
1378 }
1379 }
1380 }
1381
1382 // RIP/EIP-relative addressing is only supported in 64-bit mode.
1383 if (!Is64BitMode && BaseReg != 0 &&
1384 (BaseReg == X86::RIP || BaseReg == X86::EIP)) {
1385 ErrMsg = "IP-relative addressing requires 64-bit mode";
1386 return true;
1387 }
1388
1389 return checkScale(Scale, ErrMsg);
1390}
1391
1392bool X86AsmParser::MatchRegisterByName(MCRegister &RegNo, StringRef RegName,
1393 SMLoc StartLoc, SMLoc EndLoc) {
1394 // If we encounter a %, ignore it. This code handles registers with and
1395 // without the prefix, unprefixed registers can occur in cfi directives.
1396 RegName.consume_front("%");
1397
1398 RegNo = MatchRegisterName(RegName);
1399
1400 // If the match failed, try the register name as lowercase.
1401 if (RegNo == 0)
1402 RegNo = MatchRegisterName(RegName.lower());
1403
1404 // The "flags" and "mxcsr" registers cannot be referenced directly.
1405 // Treat it as an identifier instead.
1406 if (isParsingMSInlineAsm() && isParsingIntelSyntax() &&
1407 (RegNo == X86::EFLAGS || RegNo == X86::MXCSR))
1408 RegNo = 0;
1409
1410 if (!is64BitMode()) {
1411 // FIXME: This should be done using Requires<Not64BitMode> and
1412 // Requires<In64BitMode> so "eiz" usage in 64-bit instructions can be also
1413 // checked.
1414 if (RegNo == X86::RIZ || RegNo == X86::RIP ||
1415 X86MCRegisterClasses[X86::GR64RegClassID].contains(RegNo) ||
1418 return Error(StartLoc,
1419 "register %" + RegName + " is only available in 64-bit mode",
1420 SMRange(StartLoc, EndLoc));
1421 }
1422 }
1423
1424 if (X86II::isApxExtendedReg(RegNo))
1425 UseApxExtendedReg = true;
1426
1427 // If this is "db[0-15]", match it as an alias
1428 // for dr[0-15].
1429 if (RegNo == 0 && RegName.starts_with("db")) {
1430 if (RegName.size() == 3) {
1431 switch (RegName[2]) {
1432 case '0':
1433 RegNo = X86::DR0;
1434 break;
1435 case '1':
1436 RegNo = X86::DR1;
1437 break;
1438 case '2':
1439 RegNo = X86::DR2;
1440 break;
1441 case '3':
1442 RegNo = X86::DR3;
1443 break;
1444 case '4':
1445 RegNo = X86::DR4;
1446 break;
1447 case '5':
1448 RegNo = X86::DR5;
1449 break;
1450 case '6':
1451 RegNo = X86::DR6;
1452 break;
1453 case '7':
1454 RegNo = X86::DR7;
1455 break;
1456 case '8':
1457 RegNo = X86::DR8;
1458 break;
1459 case '9':
1460 RegNo = X86::DR9;
1461 break;
1462 }
1463 } else if (RegName.size() == 4 && RegName[2] == '1') {
1464 switch (RegName[3]) {
1465 case '0':
1466 RegNo = X86::DR10;
1467 break;
1468 case '1':
1469 RegNo = X86::DR11;
1470 break;
1471 case '2':
1472 RegNo = X86::DR12;
1473 break;
1474 case '3':
1475 RegNo = X86::DR13;
1476 break;
1477 case '4':
1478 RegNo = X86::DR14;
1479 break;
1480 case '5':
1481 RegNo = X86::DR15;
1482 break;
1483 }
1484 }
1485 }
1486
1487 if (RegNo == 0) {
1488 if (isParsingIntelSyntax())
1489 return true;
1490 return Error(StartLoc, "invalid register name", SMRange(StartLoc, EndLoc));
1491 }
1492 return false;
1493}
1494
1495bool X86AsmParser::ParseRegister(MCRegister &RegNo, SMLoc &StartLoc,
1496 SMLoc &EndLoc, bool RestoreOnFailure) {
1497 MCAsmParser &Parser = getParser();
1498 MCAsmLexer &Lexer = getLexer();
1499 RegNo = 0;
1500
1502 auto OnFailure = [RestoreOnFailure, &Lexer, &Tokens]() {
1503 if (RestoreOnFailure) {
1504 while (!Tokens.empty()) {
1505 Lexer.UnLex(Tokens.pop_back_val());
1506 }
1507 }
1508 };
1509
1510 const AsmToken &PercentTok = Parser.getTok();
1511 StartLoc = PercentTok.getLoc();
1512
1513 // If we encounter a %, ignore it. This code handles registers with and
1514 // without the prefix, unprefixed registers can occur in cfi directives.
1515 if (!isParsingIntelSyntax() && PercentTok.is(AsmToken::Percent)) {
1516 Tokens.push_back(PercentTok);
1517 Parser.Lex(); // Eat percent token.
1518 }
1519
1520 const AsmToken &Tok = Parser.getTok();
1521 EndLoc = Tok.getEndLoc();
1522
1523 if (Tok.isNot(AsmToken::Identifier)) {
1524 OnFailure();
1525 if (isParsingIntelSyntax()) return true;
1526 return Error(StartLoc, "invalid register name",
1527 SMRange(StartLoc, EndLoc));
1528 }
1529
1530 if (MatchRegisterByName(RegNo, Tok.getString(), StartLoc, EndLoc)) {
1531 OnFailure();
1532 return true;
1533 }
1534
1535 // Parse "%st" as "%st(0)" and "%st(1)", which is multiple tokens.
1536 if (RegNo == X86::ST0) {
1537 Tokens.push_back(Tok);
1538 Parser.Lex(); // Eat 'st'
1539
1540 // Check to see if we have '(4)' after %st.
1541 if (Lexer.isNot(AsmToken::LParen))
1542 return false;
1543 // Lex the paren.
1544 Tokens.push_back(Parser.getTok());
1545 Parser.Lex();
1546
1547 const AsmToken &IntTok = Parser.getTok();
1548 if (IntTok.isNot(AsmToken::Integer)) {
1549 OnFailure();
1550 return Error(IntTok.getLoc(), "expected stack index");
1551 }
1552 switch (IntTok.getIntVal()) {
1553 case 0: RegNo = X86::ST0; break;
1554 case 1: RegNo = X86::ST1; break;
1555 case 2: RegNo = X86::ST2; break;
1556 case 3: RegNo = X86::ST3; break;
1557 case 4: RegNo = X86::ST4; break;
1558 case 5: RegNo = X86::ST5; break;
1559 case 6: RegNo = X86::ST6; break;
1560 case 7: RegNo = X86::ST7; break;
1561 default:
1562 OnFailure();
1563 return Error(IntTok.getLoc(), "invalid stack index");
1564 }
1565
1566 // Lex IntTok
1567 Tokens.push_back(IntTok);
1568 Parser.Lex();
1569 if (Lexer.isNot(AsmToken::RParen)) {
1570 OnFailure();
1571 return Error(Parser.getTok().getLoc(), "expected ')'");
1572 }
1573
1574 EndLoc = Parser.getTok().getEndLoc();
1575 Parser.Lex(); // Eat ')'
1576 return false;
1577 }
1578
1579 EndLoc = Parser.getTok().getEndLoc();
1580
1581 if (RegNo == 0) {
1582 OnFailure();
1583 if (isParsingIntelSyntax()) return true;
1584 return Error(StartLoc, "invalid register name",
1585 SMRange(StartLoc, EndLoc));
1586 }
1587
1588 Parser.Lex(); // Eat identifier token.
1589 return false;
1590}
1591
1592bool X86AsmParser::parseRegister(MCRegister &Reg, SMLoc &StartLoc,
1593 SMLoc &EndLoc) {
1594 return ParseRegister(Reg, StartLoc, EndLoc, /*RestoreOnFailure=*/false);
1595}
1596
1597ParseStatus X86AsmParser::tryParseRegister(MCRegister &Reg, SMLoc &StartLoc,
1598 SMLoc &EndLoc) {
1599 bool Result = ParseRegister(Reg, StartLoc, EndLoc, /*RestoreOnFailure=*/true);
1600 bool PendingErrors = getParser().hasPendingError();
1601 getParser().clearPendingErrors();
1602 if (PendingErrors)
1603 return ParseStatus::Failure;
1604 if (Result)
1605 return ParseStatus::NoMatch;
1606 return ParseStatus::Success;
1607}
1608
1609std::unique_ptr<X86Operand> X86AsmParser::DefaultMemSIOperand(SMLoc Loc) {
1610 bool Parse32 = is32BitMode() || Code16GCC;
1611 unsigned Basereg = is64BitMode() ? X86::RSI : (Parse32 ? X86::ESI : X86::SI);
1612 const MCExpr *Disp = MCConstantExpr::create(0, getContext());
1613 return X86Operand::CreateMem(getPointerWidth(), /*SegReg=*/0, Disp,
1614 /*BaseReg=*/Basereg, /*IndexReg=*/0, /*Scale=*/1,
1615 Loc, Loc, 0);
1616}
1617
1618std::unique_ptr<X86Operand> X86AsmParser::DefaultMemDIOperand(SMLoc Loc) {
1619 bool Parse32 = is32BitMode() || Code16GCC;
1620 unsigned Basereg = is64BitMode() ? X86::RDI : (Parse32 ? X86::EDI : X86::DI);
1621 const MCExpr *Disp = MCConstantExpr::create(0, getContext());
1622 return X86Operand::CreateMem(getPointerWidth(), /*SegReg=*/0, Disp,
1623 /*BaseReg=*/Basereg, /*IndexReg=*/0, /*Scale=*/1,
1624 Loc, Loc, 0);
1625}
1626
1627bool X86AsmParser::IsSIReg(unsigned Reg) {
1628 switch (Reg) {
1629 default: llvm_unreachable("Only (R|E)SI and (R|E)DI are expected!");
1630 case X86::RSI:
1631 case X86::ESI:
1632 case X86::SI:
1633 return true;
1634 case X86::RDI:
1635 case X86::EDI:
1636 case X86::DI:
1637 return false;
1638 }
1639}
1640
1641unsigned X86AsmParser::GetSIDIForRegClass(unsigned RegClassID, unsigned Reg,
1642 bool IsSIReg) {
1643 switch (RegClassID) {
1644 default: llvm_unreachable("Unexpected register class");
1645 case X86::GR64RegClassID:
1646 return IsSIReg ? X86::RSI : X86::RDI;
1647 case X86::GR32RegClassID:
1648 return IsSIReg ? X86::ESI : X86::EDI;
1649 case X86::GR16RegClassID:
1650 return IsSIReg ? X86::SI : X86::DI;
1651 }
1652}
1653
1654void X86AsmParser::AddDefaultSrcDestOperands(
1655 OperandVector& Operands, std::unique_ptr<llvm::MCParsedAsmOperand> &&Src,
1656 std::unique_ptr<llvm::MCParsedAsmOperand> &&Dst) {
1657 if (isParsingIntelSyntax()) {
1658 Operands.push_back(std::move(Dst));
1659 Operands.push_back(std::move(Src));
1660 }
1661 else {
1662 Operands.push_back(std::move(Src));
1663 Operands.push_back(std::move(Dst));
1664 }
1665}
1666
1667bool X86AsmParser::VerifyAndAdjustOperands(OperandVector &OrigOperands,
1668 OperandVector &FinalOperands) {
1669
1670 if (OrigOperands.size() > 1) {
1671 // Check if sizes match, OrigOperands also contains the instruction name
1672 assert(OrigOperands.size() == FinalOperands.size() + 1 &&
1673 "Operand size mismatch");
1674
1676 // Verify types match
1677 int RegClassID = -1;
1678 for (unsigned int i = 0; i < FinalOperands.size(); ++i) {
1679 X86Operand &OrigOp = static_cast<X86Operand &>(*OrigOperands[i + 1]);
1680 X86Operand &FinalOp = static_cast<X86Operand &>(*FinalOperands[i]);
1681
1682 if (FinalOp.isReg() &&
1683 (!OrigOp.isReg() || FinalOp.getReg() != OrigOp.getReg()))
1684 // Return false and let a normal complaint about bogus operands happen
1685 return false;
1686
1687 if (FinalOp.isMem()) {
1688
1689 if (!OrigOp.isMem())
1690 // Return false and let a normal complaint about bogus operands happen
1691 return false;
1692
1693 unsigned OrigReg = OrigOp.Mem.BaseReg;
1694 unsigned FinalReg = FinalOp.Mem.BaseReg;
1695
1696 // If we've already encounterd a register class, make sure all register
1697 // bases are of the same register class
1698 if (RegClassID != -1 &&
1699 !X86MCRegisterClasses[RegClassID].contains(OrigReg)) {
1700 return Error(OrigOp.getStartLoc(),
1701 "mismatching source and destination index registers");
1702 }
1703
1704 if (X86MCRegisterClasses[X86::GR64RegClassID].contains(OrigReg))
1705 RegClassID = X86::GR64RegClassID;
1706 else if (X86MCRegisterClasses[X86::GR32RegClassID].contains(OrigReg))
1707 RegClassID = X86::GR32RegClassID;
1708 else if (X86MCRegisterClasses[X86::GR16RegClassID].contains(OrigReg))
1709 RegClassID = X86::GR16RegClassID;
1710 else
1711 // Unexpected register class type
1712 // Return false and let a normal complaint about bogus operands happen
1713 return false;
1714
1715 bool IsSI = IsSIReg(FinalReg);
1716 FinalReg = GetSIDIForRegClass(RegClassID, FinalReg, IsSI);
1717
1718 if (FinalReg != OrigReg) {
1719 std::string RegName = IsSI ? "ES:(R|E)SI" : "ES:(R|E)DI";
1720 Warnings.push_back(std::make_pair(
1721 OrigOp.getStartLoc(),
1722 "memory operand is only for determining the size, " + RegName +
1723 " will be used for the location"));
1724 }
1725
1726 FinalOp.Mem.Size = OrigOp.Mem.Size;
1727 FinalOp.Mem.SegReg = OrigOp.Mem.SegReg;
1728 FinalOp.Mem.BaseReg = FinalReg;
1729 }
1730 }
1731
1732 // Produce warnings only if all the operands passed the adjustment - prevent
1733 // legal cases like "movsd (%rax), %xmm0" mistakenly produce warnings
1734 for (auto &WarningMsg : Warnings) {
1735 Warning(WarningMsg.first, WarningMsg.second);
1736 }
1737
1738 // Remove old operands
1739 for (unsigned int i = 0; i < FinalOperands.size(); ++i)
1740 OrigOperands.pop_back();
1741 }
1742 // OrigOperands.append(FinalOperands.begin(), FinalOperands.end());
1743 for (auto &Op : FinalOperands)
1744 OrigOperands.push_back(std::move(Op));
1745
1746 return false;
1747}
1748
1749bool X86AsmParser::parseOperand(OperandVector &Operands, StringRef Name) {
1750 if (isParsingIntelSyntax())
1751 return parseIntelOperand(Operands, Name);
1752
1753 return parseATTOperand(Operands);
1754}
1755
1756bool X86AsmParser::CreateMemForMSInlineAsm(unsigned SegReg, const MCExpr *Disp,
1757 unsigned BaseReg, unsigned IndexReg,
1758 unsigned Scale, bool NonAbsMem,
1759 SMLoc Start, SMLoc End,
1760 unsigned Size, StringRef Identifier,
1761 const InlineAsmIdentifierInfo &Info,
1763 // If we found a decl other than a VarDecl, then assume it is a FuncDecl or
1764 // some other label reference.
1766 // Create an absolute memory reference in order to match against
1767 // instructions taking a PC relative operand.
1768 Operands.push_back(X86Operand::CreateMem(getPointerWidth(), Disp, Start,
1769 End, Size, Identifier,
1770 Info.Label.Decl));
1771 return false;
1772 }
1773 // We either have a direct symbol reference, or an offset from a symbol. The
1774 // parser always puts the symbol on the LHS, so look there for size
1775 // calculation purposes.
1776 unsigned FrontendSize = 0;
1777 void *Decl = nullptr;
1778 bool IsGlobalLV = false;
1780 // Size is in terms of bits in this context.
1781 FrontendSize = Info.Var.Type * 8;
1782 Decl = Info.Var.Decl;
1783 IsGlobalLV = Info.Var.IsGlobalLV;
1784 }
1785 // It is widely common for MS InlineAsm to use a global variable and one/two
1786 // registers in a mmory expression, and though unaccessible via rip/eip.
1787 if (IsGlobalLV) {
1788 if (BaseReg || IndexReg) {
1789 Operands.push_back(X86Operand::CreateMem(getPointerWidth(), Disp, Start,
1790 End, Size, Identifier, Decl, 0,
1791 BaseReg && IndexReg));
1792 return false;
1793 }
1794 if (NonAbsMem)
1795 BaseReg = 1; // Make isAbsMem() false
1796 }
1798 getPointerWidth(), SegReg, Disp, BaseReg, IndexReg, Scale, Start, End,
1799 Size,
1800 /*DefaultBaseReg=*/X86::RIP, Identifier, Decl, FrontendSize));
1801 return false;
1802}
1803
1804// Some binary bitwise operators have a named synonymous
1805// Query a candidate string for being such a named operator
1806// and if so - invoke the appropriate handler
1807bool X86AsmParser::ParseIntelNamedOperator(StringRef Name,
1808 IntelExprStateMachine &SM,
1809 bool &ParseError, SMLoc &End) {
1810 // A named operator should be either lower or upper case, but not a mix...
1811 // except in MASM, which uses full case-insensitivity.
1812 if (Name != Name.lower() && Name != Name.upper() &&
1813 !getParser().isParsingMasm())
1814 return false;
1815 if (Name.equals_insensitive("not")) {
1816 SM.onNot();
1817 } else if (Name.equals_insensitive("or")) {
1818 SM.onOr();
1819 } else if (Name.equals_insensitive("shl")) {
1820 SM.onLShift();
1821 } else if (Name.equals_insensitive("shr")) {
1822 SM.onRShift();
1823 } else if (Name.equals_insensitive("xor")) {
1824 SM.onXor();
1825 } else if (Name.equals_insensitive("and")) {
1826 SM.onAnd();
1827 } else if (Name.equals_insensitive("mod")) {
1828 SM.onMod();
1829 } else if (Name.equals_insensitive("offset")) {
1830 SMLoc OffsetLoc = getTok().getLoc();
1831 const MCExpr *Val = nullptr;
1832 StringRef ID;
1834 ParseError = ParseIntelOffsetOperator(Val, ID, Info, End);
1835 if (ParseError)
1836 return true;
1837 StringRef ErrMsg;
1838 ParseError =
1839 SM.onOffset(Val, OffsetLoc, ID, Info, isParsingMSInlineAsm(), ErrMsg);
1840 if (ParseError)
1841 return Error(SMLoc::getFromPointer(Name.data()), ErrMsg);
1842 } else {
1843 return false;
1844 }
1845 if (!Name.equals_insensitive("offset"))
1846 End = consumeToken();
1847 return true;
1848}
1849bool X86AsmParser::ParseMasmNamedOperator(StringRef Name,
1850 IntelExprStateMachine &SM,
1851 bool &ParseError, SMLoc &End) {
1852 if (Name.equals_insensitive("eq")) {
1853 SM.onEq();
1854 } else if (Name.equals_insensitive("ne")) {
1855 SM.onNE();
1856 } else if (Name.equals_insensitive("lt")) {
1857 SM.onLT();
1858 } else if (Name.equals_insensitive("le")) {
1859 SM.onLE();
1860 } else if (Name.equals_insensitive("gt")) {
1861 SM.onGT();
1862 } else if (Name.equals_insensitive("ge")) {
1863 SM.onGE();
1864 } else {
1865 return false;
1866 }
1867 End = consumeToken();
1868 return true;
1869}
1870
1871// Check if current intel expression append after an operand.
1872// Like: [Operand][Intel Expression]
1873void X86AsmParser::tryParseOperandIdx(AsmToken::TokenKind PrevTK,
1874 IntelExprStateMachine &SM) {
1875 if (PrevTK != AsmToken::RBrac)
1876 return;
1877
1878 SM.setAppendAfterOperand();
1879}
1880
1881bool X86AsmParser::ParseIntelExpression(IntelExprStateMachine &SM, SMLoc &End) {
1882 MCAsmParser &Parser = getParser();
1883 StringRef ErrMsg;
1884
1886
1887 if (getContext().getObjectFileInfo()->isPositionIndependent())
1888 SM.setPIC();
1889
1890 bool Done = false;
1891 while (!Done) {
1892 // Get a fresh reference on each loop iteration in case the previous
1893 // iteration moved the token storage during UnLex().
1894 const AsmToken &Tok = Parser.getTok();
1895
1896 bool UpdateLocLex = true;
1897 AsmToken::TokenKind TK = getLexer().getKind();
1898
1899 switch (TK) {
1900 default:
1901 if ((Done = SM.isValidEndState()))
1902 break;
1903 return Error(Tok.getLoc(), "unknown token in expression");
1904 case AsmToken::Error:
1905 return Error(getLexer().getErrLoc(), getLexer().getErr());
1906 break;
1907 case AsmToken::Real:
1908 // DotOperator: [ebx].0
1909 UpdateLocLex = false;
1910 if (ParseIntelDotOperator(SM, End))
1911 return true;
1912 break;
1913 case AsmToken::Dot:
1914 if (!Parser.isParsingMasm()) {
1915 if ((Done = SM.isValidEndState()))
1916 break;
1917 return Error(Tok.getLoc(), "unknown token in expression");
1918 }
1919 // MASM allows spaces around the dot operator (e.g., "var . x")
1920 Lex();
1921 UpdateLocLex = false;
1922 if (ParseIntelDotOperator(SM, End))
1923 return true;
1924 break;
1925 case AsmToken::Dollar:
1926 if (!Parser.isParsingMasm()) {
1927 if ((Done = SM.isValidEndState()))
1928 break;
1929 return Error(Tok.getLoc(), "unknown token in expression");
1930 }
1931 [[fallthrough]];
1932 case AsmToken::String: {
1933 if (Parser.isParsingMasm()) {
1934 // MASM parsers handle strings in expressions as constants.
1935 SMLoc ValueLoc = Tok.getLoc();
1936 int64_t Res;
1937 const MCExpr *Val;
1938 if (Parser.parsePrimaryExpr(Val, End, nullptr))
1939 return true;
1940 UpdateLocLex = false;
1941 if (!Val->evaluateAsAbsolute(Res, getStreamer().getAssemblerPtr()))
1942 return Error(ValueLoc, "expected absolute value");
1943 if (SM.onInteger(Res, ErrMsg))
1944 return Error(ValueLoc, ErrMsg);
1945 break;
1946 }
1947 [[fallthrough]];
1948 }
1949 case AsmToken::At:
1950 case AsmToken::Identifier: {
1951 SMLoc IdentLoc = Tok.getLoc();
1953 UpdateLocLex = false;
1954 if (Parser.isParsingMasm()) {
1955 size_t DotOffset = Identifier.find_first_of('.');
1956 if (DotOffset != StringRef::npos) {
1957 consumeToken();
1958 StringRef LHS = Identifier.slice(0, DotOffset);
1959 StringRef Dot = Identifier.slice(DotOffset, DotOffset + 1);
1960 StringRef RHS = Identifier.slice(DotOffset + 1, StringRef::npos);
1961 if (!RHS.empty()) {
1962 getLexer().UnLex(AsmToken(AsmToken::Identifier, RHS));
1963 }
1964 getLexer().UnLex(AsmToken(AsmToken::Dot, Dot));
1965 if (!LHS.empty()) {
1966 getLexer().UnLex(AsmToken(AsmToken::Identifier, LHS));
1967 }
1968 break;
1969 }
1970 }
1971 // (MASM only) <TYPE> PTR operator
1972 if (Parser.isParsingMasm()) {
1973 const AsmToken &NextTok = getLexer().peekTok();
1974 if (NextTok.is(AsmToken::Identifier) &&
1975 NextTok.getIdentifier().equals_insensitive("ptr")) {
1977 if (Parser.lookUpType(Identifier, Info))
1978 return Error(Tok.getLoc(), "unknown type");
1979 SM.onCast(Info);
1980 // Eat type and PTR.
1981 consumeToken();
1982 End = consumeToken();
1983 break;
1984 }
1985 }
1986 // Register, or (MASM only) <register>.<field>
1988 if (Tok.is(AsmToken::Identifier)) {
1989 if (!ParseRegister(Reg, IdentLoc, End, /*RestoreOnFailure=*/true)) {
1990 if (SM.onRegister(Reg, ErrMsg))
1991 return Error(IdentLoc, ErrMsg);
1992 break;
1993 }
1994 if (Parser.isParsingMasm()) {
1995 const std::pair<StringRef, StringRef> IDField =
1996 Tok.getString().split('.');
1997 const StringRef ID = IDField.first, Field = IDField.second;
1998 SMLoc IDEndLoc = SMLoc::getFromPointer(ID.data() + ID.size());
1999 if (!Field.empty() &&
2000 !MatchRegisterByName(Reg, ID, IdentLoc, IDEndLoc)) {
2001 if (SM.onRegister(Reg, ErrMsg))
2002 return Error(IdentLoc, ErrMsg);
2003
2005 SMLoc FieldStartLoc = SMLoc::getFromPointer(Field.data());
2006 if (Parser.lookUpField(Field, Info))
2007 return Error(FieldStartLoc, "unknown offset");
2008 else if (SM.onPlus(ErrMsg))
2009 return Error(getTok().getLoc(), ErrMsg);
2010 else if (SM.onInteger(Info.Offset, ErrMsg))
2011 return Error(IdentLoc, ErrMsg);
2012 SM.setTypeInfo(Info.Type);
2013
2014 End = consumeToken();
2015 break;
2016 }
2017 }
2018 }
2019 // Operator synonymous ("not", "or" etc.)
2020 bool ParseError = false;
2021 if (ParseIntelNamedOperator(Identifier, SM, ParseError, End)) {
2022 if (ParseError)
2023 return true;
2024 break;
2025 }
2026 if (Parser.isParsingMasm() &&
2027 ParseMasmNamedOperator(Identifier, SM, ParseError, End)) {
2028 if (ParseError)
2029 return true;
2030 break;
2031 }
2032 // Symbol reference, when parsing assembly content
2034 AsmFieldInfo FieldInfo;
2035 const MCExpr *Val;
2036 if (isParsingMSInlineAsm() || Parser.isParsingMasm()) {
2037 // MS Dot Operator expression
2038 if (Identifier.count('.') &&
2039 (PrevTK == AsmToken::RBrac || PrevTK == AsmToken::RParen)) {
2040 if (ParseIntelDotOperator(SM, End))
2041 return true;
2042 break;
2043 }
2044 }
2045 if (isParsingMSInlineAsm()) {
2046 // MS InlineAsm operators (TYPE/LENGTH/SIZE)
2047 if (unsigned OpKind = IdentifyIntelInlineAsmOperator(Identifier)) {
2048 if (int64_t Val = ParseIntelInlineAsmOperator(OpKind)) {
2049 if (SM.onInteger(Val, ErrMsg))
2050 return Error(IdentLoc, ErrMsg);
2051 } else {
2052 return true;
2053 }
2054 break;
2055 }
2056 // MS InlineAsm identifier
2057 // Call parseIdentifier() to combine @ with the identifier behind it.
2058 if (TK == AsmToken::At && Parser.parseIdentifier(Identifier))
2059 return Error(IdentLoc, "expected identifier");
2060 if (ParseIntelInlineAsmIdentifier(Val, Identifier, Info, false, End))
2061 return true;
2062 else if (SM.onIdentifierExpr(Val, Identifier, Info, FieldInfo.Type,
2063 true, ErrMsg))
2064 return Error(IdentLoc, ErrMsg);
2065 break;
2066 }
2067 if (Parser.isParsingMasm()) {
2068 if (unsigned OpKind = IdentifyMasmOperator(Identifier)) {
2069 int64_t Val;
2070 if (ParseMasmOperator(OpKind, Val))
2071 return true;
2072 if (SM.onInteger(Val, ErrMsg))
2073 return Error(IdentLoc, ErrMsg);
2074 break;
2075 }
2076 if (!getParser().lookUpType(Identifier, FieldInfo.Type)) {
2077 // Field offset immediate; <TYPE>.<field specification>
2078 Lex(); // eat type
2079 bool EndDot = parseOptionalToken(AsmToken::Dot);
2080 while (EndDot || (getTok().is(AsmToken::Identifier) &&
2081 getTok().getString().starts_with("."))) {
2082 getParser().parseIdentifier(Identifier);
2083 if (!EndDot)
2084 Identifier.consume_front(".");
2085 EndDot = Identifier.consume_back(".");
2086 if (getParser().lookUpField(FieldInfo.Type.Name, Identifier,
2087 FieldInfo)) {
2088 SMLoc IDEnd =
2090 return Error(IdentLoc, "Unable to lookup field reference!",
2091 SMRange(IdentLoc, IDEnd));
2092 }
2093 if (!EndDot)
2094 EndDot = parseOptionalToken(AsmToken::Dot);
2095 }
2096 if (SM.onInteger(FieldInfo.Offset, ErrMsg))
2097 return Error(IdentLoc, ErrMsg);
2098 break;
2099 }
2100 }
2101 if (getParser().parsePrimaryExpr(Val, End, &FieldInfo.Type)) {
2102 return Error(Tok.getLoc(), "Unexpected identifier!");
2103 } else if (SM.onIdentifierExpr(Val, Identifier, Info, FieldInfo.Type,
2104 false, ErrMsg)) {
2105 return Error(IdentLoc, ErrMsg);
2106 }
2107 break;
2108 }
2109 case AsmToken::Integer: {
2110 // Look for 'b' or 'f' following an Integer as a directional label
2111 SMLoc Loc = getTok().getLoc();
2112 int64_t IntVal = getTok().getIntVal();
2113 End = consumeToken();
2114 UpdateLocLex = false;
2115 if (getLexer().getKind() == AsmToken::Identifier) {
2116 StringRef IDVal = getTok().getString();
2117 if (IDVal == "f" || IDVal == "b") {
2118 MCSymbol *Sym =
2119 getContext().getDirectionalLocalSymbol(IntVal, IDVal == "b");
2121 const MCExpr *Val =
2122 MCSymbolRefExpr::create(Sym, Variant, getContext());
2123 if (IDVal == "b" && Sym->isUndefined())
2124 return Error(Loc, "invalid reference to undefined symbol");
2125 StringRef Identifier = Sym->getName();
2128 if (SM.onIdentifierExpr(Val, Identifier, Info, Type,
2129 isParsingMSInlineAsm(), ErrMsg))
2130 return Error(Loc, ErrMsg);
2131 End = consumeToken();
2132 } else {
2133 if (SM.onInteger(IntVal, ErrMsg))
2134 return Error(Loc, ErrMsg);
2135 }
2136 } else {
2137 if (SM.onInteger(IntVal, ErrMsg))
2138 return Error(Loc, ErrMsg);
2139 }
2140 break;
2141 }
2142 case AsmToken::Plus:
2143 if (SM.onPlus(ErrMsg))
2144 return Error(getTok().getLoc(), ErrMsg);
2145 break;
2146 case AsmToken::Minus:
2147 if (SM.onMinus(ErrMsg))
2148 return Error(getTok().getLoc(), ErrMsg);
2149 break;
2150 case AsmToken::Tilde: SM.onNot(); break;
2151 case AsmToken::Star: SM.onStar(); break;
2152 case AsmToken::Slash: SM.onDivide(); break;
2153 case AsmToken::Percent: SM.onMod(); break;
2154 case AsmToken::Pipe: SM.onOr(); break;
2155 case AsmToken::Caret: SM.onXor(); break;
2156 case AsmToken::Amp: SM.onAnd(); break;
2157 case AsmToken::LessLess:
2158 SM.onLShift(); break;
2160 SM.onRShift(); break;
2161 case AsmToken::LBrac:
2162 if (SM.onLBrac())
2163 return Error(Tok.getLoc(), "unexpected bracket encountered");
2164 tryParseOperandIdx(PrevTK, SM);
2165 break;
2166 case AsmToken::RBrac:
2167 if (SM.onRBrac(ErrMsg)) {
2168 return Error(Tok.getLoc(), ErrMsg);
2169 }
2170 break;
2171 case AsmToken::LParen: SM.onLParen(); break;
2172 case AsmToken::RParen: SM.onRParen(); break;
2173 }
2174 if (SM.hadError())
2175 return Error(Tok.getLoc(), "unknown token in expression");
2176
2177 if (!Done && UpdateLocLex)
2178 End = consumeToken();
2179
2180 PrevTK = TK;
2181 }
2182 return false;
2183}
2184
2185void X86AsmParser::RewriteIntelExpression(IntelExprStateMachine &SM,
2186 SMLoc Start, SMLoc End) {
2187 SMLoc Loc = Start;
2188 unsigned ExprLen = End.getPointer() - Start.getPointer();
2189 // Skip everything before a symbol displacement (if we have one)
2190 if (SM.getSym() && !SM.isOffsetOperator()) {
2191 StringRef SymName = SM.getSymName();
2192 if (unsigned Len = SymName.data() - Start.getPointer())
2193 InstInfo->AsmRewrites->emplace_back(AOK_Skip, Start, Len);
2194 Loc = SMLoc::getFromPointer(SymName.data() + SymName.size());
2195 ExprLen = End.getPointer() - (SymName.data() + SymName.size());
2196 // If we have only a symbol than there's no need for complex rewrite,
2197 // simply skip everything after it
2198 if (!(SM.getBaseReg() || SM.getIndexReg() || SM.getImm())) {
2199 if (ExprLen)
2200 InstInfo->AsmRewrites->emplace_back(AOK_Skip, Loc, ExprLen);
2201 return;
2202 }
2203 }
2204 // Build an Intel Expression rewrite
2205 StringRef BaseRegStr;
2206 StringRef IndexRegStr;
2207 StringRef OffsetNameStr;
2208 if (SM.getBaseReg())
2209 BaseRegStr = X86IntelInstPrinter::getRegisterName(SM.getBaseReg());
2210 if (SM.getIndexReg())
2211 IndexRegStr = X86IntelInstPrinter::getRegisterName(SM.getIndexReg());
2212 if (SM.isOffsetOperator())
2213 OffsetNameStr = SM.getSymName();
2214 // Emit it
2215 IntelExpr Expr(BaseRegStr, IndexRegStr, SM.getScale(), OffsetNameStr,
2216 SM.getImm(), SM.isMemExpr());
2217 InstInfo->AsmRewrites->emplace_back(Loc, ExprLen, Expr);
2218}
2219
2220// Inline assembly may use variable names with namespace alias qualifiers.
2221bool X86AsmParser::ParseIntelInlineAsmIdentifier(
2222 const MCExpr *&Val, StringRef &Identifier, InlineAsmIdentifierInfo &Info,
2223 bool IsUnevaluatedOperand, SMLoc &End, bool IsParsingOffsetOperator) {
2224 MCAsmParser &Parser = getParser();
2225 assert(isParsingMSInlineAsm() && "Expected to be parsing inline assembly.");
2226 Val = nullptr;
2227
2228 StringRef LineBuf(Identifier.data());
2229 SemaCallback->LookupInlineAsmIdentifier(LineBuf, Info, IsUnevaluatedOperand);
2230
2231 const AsmToken &Tok = Parser.getTok();
2232 SMLoc Loc = Tok.getLoc();
2233
2234 // Advance the token stream until the end of the current token is
2235 // after the end of what the frontend claimed.
2236 const char *EndPtr = Tok.getLoc().getPointer() + LineBuf.size();
2237 do {
2238 End = Tok.getEndLoc();
2239 getLexer().Lex();
2240 } while (End.getPointer() < EndPtr);
2241 Identifier = LineBuf;
2242
2243 // The frontend should end parsing on an assembler token boundary, unless it
2244 // failed parsing.
2245 assert((End.getPointer() == EndPtr ||
2247 "frontend claimed part of a token?");
2248
2249 // If the identifier lookup was unsuccessful, assume that we are dealing with
2250 // a label.
2252 StringRef InternalName =
2253 SemaCallback->LookupInlineAsmLabel(Identifier, getSourceManager(),
2254 Loc, false);
2255 assert(InternalName.size() && "We should have an internal name here.");
2256 // Push a rewrite for replacing the identifier name with the internal name,
2257 // unless we are parsing the operand of an offset operator
2258 if (!IsParsingOffsetOperator)
2259 InstInfo->AsmRewrites->emplace_back(AOK_Label, Loc, Identifier.size(),
2260 InternalName);
2261 else
2262 Identifier = InternalName;
2263 } else if (Info.isKind(InlineAsmIdentifierInfo::IK_EnumVal))
2264 return false;
2265 // Create the symbol reference.
2266 MCSymbol *Sym = getContext().getOrCreateSymbol(Identifier);
2268 Val = MCSymbolRefExpr::create(Sym, Variant, getParser().getContext());
2269 return false;
2270}
2271
2272//ParseRoundingModeOp - Parse AVX-512 rounding mode operand
2273bool X86AsmParser::ParseRoundingModeOp(SMLoc Start, OperandVector &Operands) {
2274 MCAsmParser &Parser = getParser();
2275 const AsmToken &Tok = Parser.getTok();
2276 // Eat "{" and mark the current place.
2277 const SMLoc consumedToken = consumeToken();
2278 if (Tok.isNot(AsmToken::Identifier))
2279 return Error(Tok.getLoc(), "Expected an identifier after {");
2280 if (Tok.getIdentifier().starts_with("r")) {
2281 int rndMode = StringSwitch<int>(Tok.getIdentifier())
2282 .Case("rn", X86::STATIC_ROUNDING::TO_NEAREST_INT)
2283 .Case("rd", X86::STATIC_ROUNDING::TO_NEG_INF)
2284 .Case("ru", X86::STATIC_ROUNDING::TO_POS_INF)
2285 .Case("rz", X86::STATIC_ROUNDING::TO_ZERO)
2286 .Default(-1);
2287 if (-1 == rndMode)
2288 return Error(Tok.getLoc(), "Invalid rounding mode.");
2289 Parser.Lex(); // Eat "r*" of r*-sae
2290 if (!getLexer().is(AsmToken::Minus))
2291 return Error(Tok.getLoc(), "Expected - at this point");
2292 Parser.Lex(); // Eat "-"
2293 Parser.Lex(); // Eat the sae
2294 if (!getLexer().is(AsmToken::RCurly))
2295 return Error(Tok.getLoc(), "Expected } at this point");
2296 SMLoc End = Tok.getEndLoc();
2297 Parser.Lex(); // Eat "}"
2298 const MCExpr *RndModeOp =
2299 MCConstantExpr::create(rndMode, Parser.getContext());
2300 Operands.push_back(X86Operand::CreateImm(RndModeOp, Start, End));
2301 return false;
2302 }
2303 if (Tok.getIdentifier() == "sae") {
2304 Parser.Lex(); // Eat the sae
2305 if (!getLexer().is(AsmToken::RCurly))
2306 return Error(Tok.getLoc(), "Expected } at this point");
2307 Parser.Lex(); // Eat "}"
2308 Operands.push_back(X86Operand::CreateToken("{sae}", consumedToken));
2309 return false;
2310 }
2311 return Error(Tok.getLoc(), "unknown token in expression");
2312}
2313
2314/// Parse condtional flags for CCMP/CTEST, e.g {dfv=of,sf,zf,cf} right after
2315/// mnemonic.
2316bool X86AsmParser::parseCFlagsOp(OperandVector &Operands) {
2317 MCAsmParser &Parser = getParser();
2318 AsmToken Tok = Parser.getTok();
2319 const SMLoc Start = Tok.getLoc();
2320 if (!Tok.is(AsmToken::LCurly))
2321 return Error(Tok.getLoc(), "Expected { at this point");
2322 Parser.Lex(); // Eat "{"
2323 Tok = Parser.getTok();
2324 if (Tok.getIdentifier().lower() != "dfv")
2325 return Error(Tok.getLoc(), "Expected dfv at this point");
2326 Parser.Lex(); // Eat "dfv"
2327 Tok = Parser.getTok();
2328 if (!Tok.is(AsmToken::Equal))
2329 return Error(Tok.getLoc(), "Expected = at this point");
2330 Parser.Lex(); // Eat "="
2331
2332 Tok = Parser.getTok();
2333 SMLoc End;
2334 if (Tok.is(AsmToken::RCurly)) {
2335 End = Tok.getEndLoc();
2337 MCConstantExpr::create(0, Parser.getContext()), Start, End));
2338 Parser.Lex(); // Eat "}"
2339 return false;
2340 }
2341 unsigned CFlags = 0;
2342 for (unsigned I = 0; I < 4; ++I) {
2343 Tok = Parser.getTok();
2344 unsigned CFlag = StringSwitch<unsigned>(Tok.getIdentifier().lower())
2345 .Case("of", 0x8)
2346 .Case("sf", 0x4)
2347 .Case("zf", 0x2)
2348 .Case("cf", 0x1)
2349 .Default(~0U);
2350 if (CFlag == ~0U)
2351 return Error(Tok.getLoc(), "Invalid conditional flags");
2352
2353 if (CFlags & CFlag)
2354 return Error(Tok.getLoc(), "Duplicated conditional flag");
2355 CFlags |= CFlag;
2356
2357 Parser.Lex(); // Eat one conditional flag
2358 Tok = Parser.getTok();
2359 if (Tok.is(AsmToken::RCurly)) {
2360 End = Tok.getEndLoc();
2362 MCConstantExpr::create(CFlags, Parser.getContext()), Start, End));
2363 Parser.Lex(); // Eat "}"
2364 return false;
2365 } else if (I == 3) {
2366 return Error(Tok.getLoc(), "Expected } at this point");
2367 } else if (Tok.isNot(AsmToken::Comma)) {
2368 return Error(Tok.getLoc(), "Expected } or , at this point");
2369 }
2370 Parser.Lex(); // Eat ","
2371 }
2372 llvm_unreachable("Unexpected control flow");
2373}
2374
2375/// Parse the '.' operator.
2376bool X86AsmParser::ParseIntelDotOperator(IntelExprStateMachine &SM,
2377 SMLoc &End) {
2378 const AsmToken &Tok = getTok();
2380
2381 // Drop the optional '.'.
2382 StringRef DotDispStr = Tok.getString();
2383 DotDispStr.consume_front(".");
2384 StringRef TrailingDot;
2385
2386 // .Imm gets lexed as a real.
2387 if (Tok.is(AsmToken::Real)) {
2388 APInt DotDisp;
2389 if (DotDispStr.getAsInteger(10, DotDisp))
2390 return Error(Tok.getLoc(), "Unexpected offset");
2391 Info.Offset = DotDisp.getZExtValue();
2392 } else if ((isParsingMSInlineAsm() || getParser().isParsingMasm()) &&
2393 Tok.is(AsmToken::Identifier)) {
2394 if (DotDispStr.ends_with(".")) {
2395 TrailingDot = DotDispStr.substr(DotDispStr.size() - 1);
2396 DotDispStr = DotDispStr.drop_back(1);
2397 }
2398 const std::pair<StringRef, StringRef> BaseMember = DotDispStr.split('.');
2399 const StringRef Base = BaseMember.first, Member = BaseMember.second;
2400 if (getParser().lookUpField(SM.getType(), DotDispStr, Info) &&
2401 getParser().lookUpField(SM.getSymName(), DotDispStr, Info) &&
2402 getParser().lookUpField(DotDispStr, Info) &&
2403 (!SemaCallback ||
2404 SemaCallback->LookupInlineAsmField(Base, Member, Info.Offset)))
2405 return Error(Tok.getLoc(), "Unable to lookup field reference!");
2406 } else {
2407 return Error(Tok.getLoc(), "Unexpected token type!");
2408 }
2409
2410 // Eat the DotExpression and update End
2411 End = SMLoc::getFromPointer(DotDispStr.data());
2412 const char *DotExprEndLoc = DotDispStr.data() + DotDispStr.size();
2413 while (Tok.getLoc().getPointer() < DotExprEndLoc)
2414 Lex();
2415 if (!TrailingDot.empty())
2416 getLexer().UnLex(AsmToken(AsmToken::Dot, TrailingDot));
2417 SM.addImm(Info.Offset);
2418 SM.setTypeInfo(Info.Type);
2419 return false;
2420}
2421
2422/// Parse the 'offset' operator.
2423/// This operator is used to specify the location of a given operand
2424bool X86AsmParser::ParseIntelOffsetOperator(const MCExpr *&Val, StringRef &ID,
2426 SMLoc &End) {
2427 // Eat offset, mark start of identifier.
2428 SMLoc Start = Lex().getLoc();
2429 ID = getTok().getString();
2430 if (!isParsingMSInlineAsm()) {
2431 if ((getTok().isNot(AsmToken::Identifier) &&
2432 getTok().isNot(AsmToken::String)) ||
2433 getParser().parsePrimaryExpr(Val, End, nullptr))
2434 return Error(Start, "unexpected token!");
2435 } else if (ParseIntelInlineAsmIdentifier(Val, ID, Info, false, End, true)) {
2436 return Error(Start, "unable to lookup expression");
2437 } else if (Info.isKind(InlineAsmIdentifierInfo::IK_EnumVal)) {
2438 return Error(Start, "offset operator cannot yet handle constants");
2439 }
2440 return false;
2441}
2442
2443// Query a candidate string for being an Intel assembly operator
2444// Report back its kind, or IOK_INVALID if does not evaluated as a known one
2445unsigned X86AsmParser::IdentifyIntelInlineAsmOperator(StringRef Name) {
2447 .Cases("TYPE","type",IOK_TYPE)
2448 .Cases("SIZE","size",IOK_SIZE)
2449 .Cases("LENGTH","length",IOK_LENGTH)
2450 .Default(IOK_INVALID);
2451}
2452
2453/// Parse the 'LENGTH', 'TYPE' and 'SIZE' operators. The LENGTH operator
2454/// returns the number of elements in an array. It returns the value 1 for
2455/// non-array variables. The SIZE operator returns the size of a C or C++
2456/// variable. A variable's size is the product of its LENGTH and TYPE. The
2457/// TYPE operator returns the size of a C or C++ type or variable. If the
2458/// variable is an array, TYPE returns the size of a single element.
2459unsigned X86AsmParser::ParseIntelInlineAsmOperator(unsigned OpKind) {
2460 MCAsmParser &Parser = getParser();
2461 const AsmToken &Tok = Parser.getTok();
2462 Parser.Lex(); // Eat operator.
2463
2464 const MCExpr *Val = nullptr;
2466 SMLoc Start = Tok.getLoc(), End;
2468 if (ParseIntelInlineAsmIdentifier(Val, Identifier, Info,
2469 /*IsUnevaluatedOperand=*/true, End))
2470 return 0;
2471
2473 Error(Start, "unable to lookup expression");
2474 return 0;
2475 }
2476
2477 unsigned CVal = 0;
2478 switch(OpKind) {
2479 default: llvm_unreachable("Unexpected operand kind!");
2480 case IOK_LENGTH: CVal = Info.Var.Length; break;
2481 case IOK_SIZE: CVal = Info.Var.Size; break;
2482 case IOK_TYPE: CVal = Info.Var.Type; break;
2483 }
2484
2485 return CVal;
2486}
2487
2488// Query a candidate string for being an Intel assembly operator
2489// Report back its kind, or IOK_INVALID if does not evaluated as a known one
2490unsigned X86AsmParser::IdentifyMasmOperator(StringRef Name) {
2491 return StringSwitch<unsigned>(Name.lower())
2492 .Case("type", MOK_TYPE)
2493 .Cases("size", "sizeof", MOK_SIZEOF)
2494 .Cases("length", "lengthof", MOK_LENGTHOF)
2495 .Default(MOK_INVALID);
2496}
2497
2498/// Parse the 'LENGTHOF', 'SIZEOF', and 'TYPE' operators. The LENGTHOF operator
2499/// returns the number of elements in an array. It returns the value 1 for
2500/// non-array variables. The SIZEOF operator returns the size of a type or
2501/// variable in bytes. A variable's size is the product of its LENGTH and TYPE.
2502/// The TYPE operator returns the size of a variable. If the variable is an
2503/// array, TYPE returns the size of a single element.
2504bool X86AsmParser::ParseMasmOperator(unsigned OpKind, int64_t &Val) {
2505 MCAsmParser &Parser = getParser();
2506 SMLoc OpLoc = Parser.getTok().getLoc();
2507 Parser.Lex(); // Eat operator.
2508
2509 Val = 0;
2510 if (OpKind == MOK_SIZEOF || OpKind == MOK_TYPE) {
2511 // Check for SIZEOF(<type>) and TYPE(<type>).
2512 bool InParens = Parser.getTok().is(AsmToken::LParen);
2513 const AsmToken &IDTok = InParens ? getLexer().peekTok() : Parser.getTok();
2515 if (IDTok.is(AsmToken::Identifier) &&
2516 !Parser.lookUpType(IDTok.getIdentifier(), Type)) {
2517 Val = Type.Size;
2518
2519 // Eat tokens.
2520 if (InParens)
2521 parseToken(AsmToken::LParen);
2522 parseToken(AsmToken::Identifier);
2523 if (InParens)
2524 parseToken(AsmToken::RParen);
2525 }
2526 }
2527
2528 if (!Val) {
2529 IntelExprStateMachine SM;
2530 SMLoc End, Start = Parser.getTok().getLoc();
2531 if (ParseIntelExpression(SM, End))
2532 return true;
2533
2534 switch (OpKind) {
2535 default:
2536 llvm_unreachable("Unexpected operand kind!");
2537 case MOK_SIZEOF:
2538 Val = SM.getSize();
2539 break;
2540 case MOK_LENGTHOF:
2541 Val = SM.getLength();
2542 break;
2543 case MOK_TYPE:
2544 Val = SM.getElementSize();
2545 break;
2546 }
2547
2548 if (!Val)
2549 return Error(OpLoc, "expression has unknown type", SMRange(Start, End));
2550 }
2551
2552 return false;
2553}
2554
2555bool X86AsmParser::ParseIntelMemoryOperandSize(unsigned &Size) {
2556 Size = StringSwitch<unsigned>(getTok().getString())
2557 .Cases("BYTE", "byte", 8)
2558 .Cases("WORD", "word", 16)
2559 .Cases("DWORD", "dword", 32)
2560 .Cases("FLOAT", "float", 32)
2561 .Cases("LONG", "long", 32)
2562 .Cases("FWORD", "fword", 48)
2563 .Cases("DOUBLE", "double", 64)
2564 .Cases("QWORD", "qword", 64)
2565 .Cases("MMWORD","mmword", 64)
2566 .Cases("XWORD", "xword", 80)
2567 .Cases("TBYTE", "tbyte", 80)
2568 .Cases("XMMWORD", "xmmword", 128)
2569 .Cases("YMMWORD", "ymmword", 256)
2570 .Cases("ZMMWORD", "zmmword", 512)
2571 .Default(0);
2572 if (Size) {
2573 const AsmToken &Tok = Lex(); // Eat operand size (e.g., byte, word).
2574 if (!(Tok.getString() == "PTR" || Tok.getString() == "ptr"))
2575 return Error(Tok.getLoc(), "Expected 'PTR' or 'ptr' token!");
2576 Lex(); // Eat ptr.
2577 }
2578 return false;
2579}
2580
2581bool X86AsmParser::parseIntelOperand(OperandVector &Operands, StringRef Name) {
2582 MCAsmParser &Parser = getParser();
2583 const AsmToken &Tok = Parser.getTok();
2584 SMLoc Start, End;
2585
2586 // Parse optional Size directive.
2587 unsigned Size;
2588 if (ParseIntelMemoryOperandSize(Size))
2589 return true;
2590 bool PtrInOperand = bool(Size);
2591
2592 Start = Tok.getLoc();
2593
2594 // Rounding mode operand.
2595 if (getLexer().is(AsmToken::LCurly))
2596 return ParseRoundingModeOp(Start, Operands);
2597
2598 // Register operand.
2599 MCRegister RegNo;
2600 if (Tok.is(AsmToken::Identifier) && !parseRegister(RegNo, Start, End)) {
2601 if (RegNo == X86::RIP)
2602 return Error(Start, "rip can only be used as a base register");
2603 // A Register followed by ':' is considered a segment override
2604 if (Tok.isNot(AsmToken::Colon)) {
2605 if (PtrInOperand)
2606 return Error(Start, "expected memory operand after 'ptr', "
2607 "found register operand instead");
2608 Operands.push_back(X86Operand::CreateReg(RegNo, Start, End));
2609 return false;
2610 }
2611 // An alleged segment override. check if we have a valid segment register
2612 if (!X86MCRegisterClasses[X86::SEGMENT_REGRegClassID].contains(RegNo))
2613 return Error(Start, "invalid segment register");
2614 // Eat ':' and update Start location
2615 Start = Lex().getLoc();
2616 }
2617
2618 // Immediates and Memory
2619 IntelExprStateMachine SM;
2620 if (ParseIntelExpression(SM, End))
2621 return true;
2622
2623 if (isParsingMSInlineAsm())
2624 RewriteIntelExpression(SM, Start, Tok.getLoc());
2625
2626 int64_t Imm = SM.getImm();
2627 const MCExpr *Disp = SM.getSym();
2628 const MCExpr *ImmDisp = MCConstantExpr::create(Imm, getContext());
2629 if (Disp && Imm)
2630 Disp = MCBinaryExpr::createAdd(Disp, ImmDisp, getContext());
2631 if (!Disp)
2632 Disp = ImmDisp;
2633
2634 // RegNo != 0 specifies a valid segment register,
2635 // and we are parsing a segment override
2636 if (!SM.isMemExpr() && !RegNo) {
2637 if (isParsingMSInlineAsm() && SM.isOffsetOperator()) {
2638 const InlineAsmIdentifierInfo &Info = SM.getIdentifierInfo();
2640 // Disp includes the address of a variable; make sure this is recorded
2641 // for later handling.
2642 Operands.push_back(X86Operand::CreateImm(Disp, Start, End,
2643 SM.getSymName(), Info.Var.Decl,
2644 Info.Var.IsGlobalLV));
2645 return false;
2646 }
2647 }
2648
2649 Operands.push_back(X86Operand::CreateImm(Disp, Start, End));
2650 return false;
2651 }
2652
2653 StringRef ErrMsg;
2654 unsigned BaseReg = SM.getBaseReg();
2655 unsigned IndexReg = SM.getIndexReg();
2656 if (IndexReg && BaseReg == X86::RIP)
2657 BaseReg = 0;
2658 unsigned Scale = SM.getScale();
2659 if (!PtrInOperand)
2660 Size = SM.getElementSize() << 3;
2661
2662 if (Scale == 0 && BaseReg != X86::ESP && BaseReg != X86::RSP &&
2663 (IndexReg == X86::ESP || IndexReg == X86::RSP))
2664 std::swap(BaseReg, IndexReg);
2665
2666 // If BaseReg is a vector register and IndexReg is not, swap them unless
2667 // Scale was specified in which case it would be an error.
2668 if (Scale == 0 &&
2669 !(X86MCRegisterClasses[X86::VR128XRegClassID].contains(IndexReg) ||
2670 X86MCRegisterClasses[X86::VR256XRegClassID].contains(IndexReg) ||
2671 X86MCRegisterClasses[X86::VR512RegClassID].contains(IndexReg)) &&
2672 (X86MCRegisterClasses[X86::VR128XRegClassID].contains(BaseReg) ||
2673 X86MCRegisterClasses[X86::VR256XRegClassID].contains(BaseReg) ||
2674 X86MCRegisterClasses[X86::VR512RegClassID].contains(BaseReg)))
2675 std::swap(BaseReg, IndexReg);
2676
2677 if (Scale != 0 &&
2678 X86MCRegisterClasses[X86::GR16RegClassID].contains(IndexReg))
2679 return Error(Start, "16-bit addresses cannot have a scale");
2680
2681 // If there was no explicit scale specified, change it to 1.
2682 if (Scale == 0)
2683 Scale = 1;
2684
2685 // If this is a 16-bit addressing mode with the base and index in the wrong
2686 // order, swap them so CheckBaseRegAndIndexRegAndScale doesn't fail. It is
2687 // shared with att syntax where order matters.
2688 if ((BaseReg == X86::SI || BaseReg == X86::DI) &&
2689 (IndexReg == X86::BX || IndexReg == X86::BP))
2690 std::swap(BaseReg, IndexReg);
2691
2692 if ((BaseReg || IndexReg) &&
2693 CheckBaseRegAndIndexRegAndScale(BaseReg, IndexReg, Scale, is64BitMode(),
2694 ErrMsg))
2695 return Error(Start, ErrMsg);
2696 bool IsUnconditionalBranch =
2697 Name.equals_insensitive("jmp") || Name.equals_insensitive("call");
2698 if (isParsingMSInlineAsm())
2699 return CreateMemForMSInlineAsm(RegNo, Disp, BaseReg, IndexReg, Scale,
2700 IsUnconditionalBranch && is64BitMode(),
2701 Start, End, Size, SM.getSymName(),
2702 SM.getIdentifierInfo(), Operands);
2703
2704 // When parsing x64 MS-style assembly, all non-absolute references to a named
2705 // variable default to RIP-relative.
2706 unsigned DefaultBaseReg = X86::NoRegister;
2707 bool MaybeDirectBranchDest = true;
2708
2709 if (Parser.isParsingMasm()) {
2710 if (is64BitMode() && SM.getElementSize() > 0) {
2711 DefaultBaseReg = X86::RIP;
2712 }
2713 if (IsUnconditionalBranch) {
2714 if (PtrInOperand) {
2715 MaybeDirectBranchDest = false;
2716 if (is64BitMode())
2717 DefaultBaseReg = X86::RIP;
2718 } else if (!BaseReg && !IndexReg && Disp &&
2719 Disp->getKind() == MCExpr::SymbolRef) {
2720 if (is64BitMode()) {
2721 if (SM.getSize() == 8) {
2722 MaybeDirectBranchDest = false;
2723 DefaultBaseReg = X86::RIP;
2724 }
2725 } else {
2726 if (SM.getSize() == 4 || SM.getSize() == 2)
2727 MaybeDirectBranchDest = false;
2728 }
2729 }
2730 }
2731 } else if (IsUnconditionalBranch) {
2732 // Treat `call [offset fn_ref]` (or `jmp`) syntax as an error.
2733 if (!PtrInOperand && SM.isOffsetOperator())
2734 return Error(
2735 Start, "`OFFSET` operator cannot be used in an unconditional branch");
2736 if (PtrInOperand || SM.isBracketUsed())
2737 MaybeDirectBranchDest = false;
2738 }
2739
2740 if ((BaseReg || IndexReg || RegNo || DefaultBaseReg != X86::NoRegister))
2742 getPointerWidth(), RegNo, Disp, BaseReg, IndexReg, Scale, Start, End,
2743 Size, DefaultBaseReg, /*SymName=*/StringRef(), /*OpDecl=*/nullptr,
2744 /*FrontendSize=*/0, /*UseUpRegs=*/false, MaybeDirectBranchDest));
2745 else
2747 getPointerWidth(), Disp, Start, End, Size, /*SymName=*/StringRef(),
2748 /*OpDecl=*/nullptr, /*FrontendSize=*/0, /*UseUpRegs=*/false,
2749 MaybeDirectBranchDest));
2750 return false;
2751}
2752
2753bool X86AsmParser::parseATTOperand(OperandVector &Operands) {
2754 MCAsmParser &Parser = getParser();
2755 switch (getLexer().getKind()) {
2756 case AsmToken::Dollar: {
2757 // $42 or $ID -> immediate.
2758 SMLoc Start = Parser.getTok().getLoc(), End;
2759 Parser.Lex();
2760 const MCExpr *Val;
2761 // This is an immediate, so we should not parse a register. Do a precheck
2762 // for '%' to supercede intra-register parse errors.
2763 SMLoc L = Parser.getTok().getLoc();
2764 if (check(getLexer().is(AsmToken::Percent), L,
2765 "expected immediate expression") ||
2766 getParser().parseExpression(Val, End) ||
2767 check(isa<X86MCExpr>(Val), L, "expected immediate expression"))
2768 return true;
2769 Operands.push_back(X86Operand::CreateImm(Val, Start, End));
2770 return false;
2771 }
2772 case AsmToken::LCurly: {
2773 SMLoc Start = Parser.getTok().getLoc();
2774 return ParseRoundingModeOp(Start, Operands);
2775 }
2776 default: {
2777 // This a memory operand or a register. We have some parsing complications
2778 // as a '(' may be part of an immediate expression or the addressing mode
2779 // block. This is complicated by the fact that an assembler-level variable
2780 // may refer either to a register or an immediate expression.
2781
2782 SMLoc Loc = Parser.getTok().getLoc(), EndLoc;
2783 const MCExpr *Expr = nullptr;
2784 unsigned Reg = 0;
2785 if (getLexer().isNot(AsmToken::LParen)) {
2786 // No '(' so this is either a displacement expression or a register.
2787 if (Parser.parseExpression(Expr, EndLoc))
2788 return true;
2789 if (auto *RE = dyn_cast<X86MCExpr>(Expr)) {
2790 // Segment Register. Reset Expr and copy value to register.
2791 Expr = nullptr;
2792 Reg = RE->getRegNo();
2793
2794 // Check the register.
2795 if (Reg == X86::EIZ || Reg == X86::RIZ)
2796 return Error(
2797 Loc, "%eiz and %riz can only be used as index registers",
2798 SMRange(Loc, EndLoc));
2799 if (Reg == X86::RIP)
2800 return Error(Loc, "%rip can only be used as a base register",
2801 SMRange(Loc, EndLoc));
2802 // Return register that are not segment prefixes immediately.
2803 if (!Parser.parseOptionalToken(AsmToken::Colon)) {
2804 Operands.push_back(X86Operand::CreateReg(Reg, Loc, EndLoc));
2805 return false;
2806 }
2807 if (!X86MCRegisterClasses[X86::SEGMENT_REGRegClassID].contains(Reg))
2808 return Error(Loc, "invalid segment register");
2809 // Accept a '*' absolute memory reference after the segment. Place it
2810 // before the full memory operand.
2811 if (getLexer().is(AsmToken::Star))
2812 Operands.push_back(X86Operand::CreateToken("*", consumeToken()));
2813 }
2814 }
2815 // This is a Memory operand.
2816 return ParseMemOperand(Reg, Expr, Loc, EndLoc, Operands);
2817 }
2818 }
2819}
2820
2821// X86::COND_INVALID if not a recognized condition code or alternate mnemonic,
2822// otherwise the EFLAGS Condition Code enumerator.
2823X86::CondCode X86AsmParser::ParseConditionCode(StringRef CC) {
2825 .Case("o", X86::COND_O) // Overflow
2826 .Case("no", X86::COND_NO) // No Overflow
2827 .Cases("b", "nae", X86::COND_B) // Below/Neither Above nor Equal
2828 .Cases("ae", "nb", X86::COND_AE) // Above or Equal/Not Below
2829 .Cases("e", "z", X86::COND_E) // Equal/Zero
2830 .Cases("ne", "nz", X86::COND_NE) // Not Equal/Not Zero
2831 .Cases("be", "na", X86::COND_BE) // Below or Equal/Not Above
2832 .Cases("a", "nbe", X86::COND_A) // Above/Neither Below nor Equal
2833 .Case("s", X86::COND_S) // Sign
2834 .Case("ns", X86::COND_NS) // No Sign
2835 .Cases("p", "pe", X86::COND_P) // Parity/Parity Even
2836 .Cases("np", "po", X86::COND_NP) // No Parity/Parity Odd
2837 .Cases("l", "nge", X86::COND_L) // Less/Neither Greater nor Equal
2838 .Cases("ge", "nl", X86::COND_GE) // Greater or Equal/Not Less
2839 .Cases("le", "ng", X86::COND_LE) // Less or Equal/Not Greater
2840 .Cases("g", "nle", X86::COND_G) // Greater/Neither Less nor Equal
2842}
2843
2844// true on failure, false otherwise
2845// If no {z} mark was found - Parser doesn't advance
2846bool X86AsmParser::ParseZ(std::unique_ptr<X86Operand> &Z,
2847 const SMLoc &StartLoc) {
2848 MCAsmParser &Parser = getParser();
2849 // Assuming we are just pass the '{' mark, quering the next token
2850 // Searched for {z}, but none was found. Return false, as no parsing error was
2851 // encountered
2852 if (!(getLexer().is(AsmToken::Identifier) &&
2853 (getLexer().getTok().getIdentifier() == "z")))
2854 return false;
2855 Parser.Lex(); // Eat z
2856 // Query and eat the '}' mark
2857 if (!getLexer().is(AsmToken::RCurly))
2858 return Error(getLexer().getLoc(), "Expected } at this point");
2859 Parser.Lex(); // Eat '}'
2860 // Assign Z with the {z} mark operand
2861 Z = X86Operand::CreateToken("{z}", StartLoc);
2862 return false;
2863}
2864
2865// true on failure, false otherwise
2866bool X86AsmParser::HandleAVX512Operand(OperandVector &Operands) {
2867 MCAsmParser &Parser = getParser();
2868 if (getLexer().is(AsmToken::LCurly)) {
2869 // Eat "{" and mark the current place.
2870 const SMLoc consumedToken = consumeToken();
2871 // Distinguish {1to<NUM>} from {%k<NUM>}.
2872 if(getLexer().is(AsmToken::Integer)) {
2873 // Parse memory broadcasting ({1to<NUM>}).
2874 if (getLexer().getTok().getIntVal() != 1)
2875 return TokError("Expected 1to<NUM> at this point");
2876 StringRef Prefix = getLexer().getTok().getString();
2877 Parser.Lex(); // Eat first token of 1to8
2878 if (!getLexer().is(AsmToken::Identifier))
2879 return TokError("Expected 1to<NUM> at this point");
2880 // Recognize only reasonable suffixes.
2881 SmallVector<char, 5> BroadcastVector;
2882 StringRef BroadcastString = (Prefix + getLexer().getTok().getIdentifier())
2883 .toStringRef(BroadcastVector);
2884 if (!BroadcastString.starts_with("1to"))
2885 return TokError("Expected 1to<NUM> at this point");
2886 const char *BroadcastPrimitive =
2887 StringSwitch<const char *>(BroadcastString)
2888 .Case("1to2", "{1to2}")
2889 .Case("1to4", "{1to4}")
2890 .Case("1to8", "{1to8}")
2891 .Case("1to16", "{1to16}")
2892 .Case("1to32", "{1to32}")
2893 .Default(nullptr);
2894 if (!BroadcastPrimitive)
2895 return TokError("Invalid memory broadcast primitive.");
2896 Parser.Lex(); // Eat trailing token of 1toN
2897 if (!getLexer().is(AsmToken::RCurly))
2898 return TokError("Expected } at this point");
2899 Parser.Lex(); // Eat "}"
2900 Operands.push_back(X86Operand::CreateToken(BroadcastPrimitive,
2901 consumedToken));
2902 // No AVX512 specific primitives can pass
2903 // after memory broadcasting, so return.
2904 return false;
2905 } else {
2906 // Parse either {k}{z}, {z}{k}, {k} or {z}
2907 // last one have no meaning, but GCC accepts it
2908 // Currently, we're just pass a '{' mark
2909 std::unique_ptr<X86Operand> Z;
2910 if (ParseZ(Z, consumedToken))
2911 return true;
2912 // Reaching here means that parsing of the allegadly '{z}' mark yielded
2913 // no errors.
2914 // Query for the need of further parsing for a {%k<NUM>} mark
2915 if (!Z || getLexer().is(AsmToken::LCurly)) {
2916 SMLoc StartLoc = Z ? consumeToken() : consumedToken;
2917 // Parse an op-mask register mark ({%k<NUM>}), which is now to be
2918 // expected
2919 MCRegister RegNo;
2920 SMLoc RegLoc;
2921 if (!parseRegister(RegNo, RegLoc, StartLoc) &&
2922 X86MCRegisterClasses[X86::VK1RegClassID].contains(RegNo)) {
2923 if (RegNo == X86::K0)
2924 return Error(RegLoc, "Register k0 can't be used as write mask");
2925 if (!getLexer().is(AsmToken::RCurly))
2926 return Error(getLexer().getLoc(), "Expected } at this point");
2927 Operands.push_back(X86Operand::CreateToken("{", StartLoc));
2928 Operands.push_back(
2929 X86Operand::CreateReg(RegNo, StartLoc, StartLoc));
2930 Operands.push_back(X86Operand::CreateToken("}", consumeToken()));
2931 } else
2932 return Error(getLexer().getLoc(),
2933 "Expected an op-mask register at this point");
2934 // {%k<NUM>} mark is found, inquire for {z}
2935 if (getLexer().is(AsmToken::LCurly) && !Z) {
2936 // Have we've found a parsing error, or found no (expected) {z} mark
2937 // - report an error
2938 if (ParseZ(Z, consumeToken()) || !Z)
2939 return Error(getLexer().getLoc(),
2940 "Expected a {z} mark at this point");
2941
2942 }
2943 // '{z}' on its own is meaningless, hence should be ignored.
2944 // on the contrary - have it been accompanied by a K register,
2945 // allow it.
2946 if (Z)
2947 Operands.push_back(std::move(Z));
2948 }
2949 }
2950 }
2951 return false;
2952}
2953
2954/// ParseMemOperand: 'seg : disp(basereg, indexreg, scale)'. The '%ds:' prefix
2955/// has already been parsed if present. disp may be provided as well.
2956bool X86AsmParser::ParseMemOperand(unsigned SegReg, const MCExpr *Disp,
2957 SMLoc StartLoc, SMLoc EndLoc,
2959 MCAsmParser &Parser = getParser();
2960 SMLoc Loc;
2961 // Based on the initial passed values, we may be in any of these cases, we are
2962 // in one of these cases (with current position (*)):
2963
2964 // 1. seg : * disp (base-index-scale-expr)
2965 // 2. seg : *(disp) (base-index-scale-expr)
2966 // 3. seg : *(base-index-scale-expr)
2967 // 4. disp *(base-index-scale-expr)
2968 // 5. *(disp) (base-index-scale-expr)
2969 // 6. *(base-index-scale-expr)
2970 // 7. disp *
2971 // 8. *(disp)
2972
2973 // If we do not have an displacement yet, check if we're in cases 4 or 6 by
2974 // checking if the first object after the parenthesis is a register (or an
2975 // identifier referring to a register) and parse the displacement or default
2976 // to 0 as appropriate.
2977 auto isAtMemOperand = [this]() {
2978 if (this->getLexer().isNot(AsmToken::LParen))
2979 return false;
2980 AsmToken Buf[2];
2981 StringRef Id;
2982 auto TokCount = this->getLexer().peekTokens(Buf, true);
2983 if (TokCount == 0)
2984 return false;
2985 switch (Buf[0].getKind()) {
2986 case AsmToken::Percent:
2987 case AsmToken::Comma:
2988 return true;
2989 // These lower cases are doing a peekIdentifier.
2990 case AsmToken::At:
2991 case AsmToken::Dollar:
2992 if ((TokCount > 1) &&
2993 (Buf[1].is(AsmToken::Identifier) || Buf[1].is(AsmToken::String)) &&
2994 (Buf[0].getLoc().getPointer() + 1 == Buf[1].getLoc().getPointer()))
2995 Id = StringRef(Buf[0].getLoc().getPointer(),
2996 Buf[1].getIdentifier().size() + 1);
2997 break;
2999 case AsmToken::String:
3000 Id = Buf[0].getIdentifier();
3001 break;
3002 default:
3003 return false;
3004 }
3005 // We have an ID. Check if it is bound to a register.
3006 if (!Id.empty()) {
3007 MCSymbol *Sym = this->getContext().getOrCreateSymbol(Id);
3008 if (Sym->isVariable()) {
3009 auto V = Sym->getVariableValue(/*SetUsed*/ false);
3010 return isa<X86MCExpr>(V);
3011 }
3012 }
3013 return false;
3014 };
3015
3016 if (!Disp) {
3017 // Parse immediate if we're not at a mem operand yet.
3018 if (!isAtMemOperand()) {
3019 if (Parser.parseTokenLoc(Loc) || Parser.parseExpression(Disp, EndLoc))
3020 return true;
3021 assert(!isa<X86MCExpr>(Disp) && "Expected non-register here.");
3022 } else {
3023 // Disp is implicitly zero if we haven't parsed it yet.
3024 Disp = MCConstantExpr::create(0, Parser.getContext());
3025 }
3026 }
3027
3028 // We are now either at the end of the operand or at the '(' at the start of a
3029 // base-index-scale-expr.
3030
3031 if (!parseOptionalToken(AsmToken::LParen)) {
3032 if (SegReg == 0)
3033 Operands.push_back(
3034 X86Operand::CreateMem(getPointerWidth(), Disp, StartLoc, EndLoc));
3035 else
3036 Operands.push_back(X86Operand::CreateMem(getPointerWidth(), SegReg, Disp,
3037 0, 0, 1, StartLoc, EndLoc));
3038 return false;
3039 }
3040
3041 // If we reached here, then eat the '(' and Process
3042 // the rest of the memory operand.
3043 unsigned BaseReg = 0, IndexReg = 0, Scale = 1;
3044 SMLoc BaseLoc = getLexer().getLoc();
3045 const MCExpr *E;
3046 StringRef ErrMsg;
3047
3048 // Parse BaseReg if one is provided.
3049 if (getLexer().isNot(AsmToken::Comma) && getLexer().isNot(AsmToken::RParen)) {
3050 if (Parser.parseExpression(E, EndLoc) ||
3051 check(!isa<X86MCExpr>(E), BaseLoc, "expected register here"))
3052 return true;
3053
3054 // Check the register.
3055 BaseReg = cast<X86MCExpr>(E)->getRegNo();
3056 if (BaseReg == X86::EIZ || BaseReg == X86::RIZ)
3057 return Error(BaseLoc, "eiz and riz can only be used as index registers",
3058 SMRange(BaseLoc, EndLoc));
3059 }
3060
3061 if (parseOptionalToken(AsmToken::Comma)) {
3062 // Following the comma we should have either an index register, or a scale
3063 // value. We don't support the later form, but we want to parse it
3064 // correctly.
3065 //
3066 // Even though it would be completely consistent to support syntax like
3067 // "1(%eax,,1)", the assembler doesn't. Use "eiz" or "riz" for this.
3068 if (getLexer().isNot(AsmToken::RParen)) {
3069 if (Parser.parseTokenLoc(Loc) || Parser.parseExpression(E, EndLoc))
3070 return true;
3071
3072 if (!isa<X86MCExpr>(E)) {
3073 // We've parsed an unexpected Scale Value instead of an index
3074 // register. Interpret it as an absolute.
3075 int64_t ScaleVal;
3076 if (!E->evaluateAsAbsolute(ScaleVal, getStreamer().getAssemblerPtr()))
3077 return Error(Loc, "expected absolute expression");
3078 if (ScaleVal != 1)
3079 Warning(Loc, "scale factor without index register is ignored");
3080 Scale = 1;
3081 } else { // IndexReg Found.
3082 IndexReg = cast<X86MCExpr>(E)->getRegNo();
3083
3084 if (BaseReg == X86::RIP)
3085 return Error(Loc,
3086 "%rip as base register can not have an index register");
3087 if (IndexReg == X86::RIP)
3088 return Error(Loc, "%rip is not allowed as an index register");
3089
3090 if (parseOptionalToken(AsmToken::Comma)) {
3091 // Parse the scale amount:
3092 // ::= ',' [scale-expression]
3093
3094 // A scale amount without an index is ignored.
3095 if (getLexer().isNot(AsmToken::RParen)) {
3096 int64_t ScaleVal;
3097 if (Parser.parseTokenLoc(Loc) ||
3098 Parser.parseAbsoluteExpression(ScaleVal))
3099 return Error(Loc, "expected scale expression");
3100 Scale = (unsigned)ScaleVal;
3101 // Validate the scale amount.
3102 if (X86MCRegisterClasses[X86::GR16RegClassID].contains(BaseReg) &&
3103 Scale != 1)
3104 return Error(Loc, "scale factor in 16-bit address must be 1");
3105 if (checkScale(Scale, ErrMsg))
3106 return Error(Loc, ErrMsg);
3107 }
3108 }
3109 }
3110 }
3111 }
3112
3113 // Ok, we've eaten the memory operand, verify we have a ')' and eat it too.
3114 if (parseToken(AsmToken::RParen, "unexpected token in memory operand"))
3115 return true;
3116
3117 // This is to support otherwise illegal operand (%dx) found in various
3118 // unofficial manuals examples (e.g. "out[s]?[bwl]? %al, (%dx)") and must now
3119 // be supported. Mark such DX variants separately fix only in special cases.
3120 if (BaseReg == X86::DX && IndexReg == 0 && Scale == 1 && SegReg == 0 &&
3121 isa<MCConstantExpr>(Disp) &&
3122 cast<MCConstantExpr>(Disp)->getValue() == 0) {
3123 Operands.push_back(X86Operand::CreateDXReg(BaseLoc, BaseLoc));
3124 return false;
3125 }
3126
3127 if (CheckBaseRegAndIndexRegAndScale(BaseReg, IndexReg, Scale, is64BitMode(),
3128 ErrMsg))
3129 return Error(BaseLoc, ErrMsg);
3130
3131 // If the displacement is a constant, check overflows. For 64-bit addressing,
3132 // gas requires isInt<32> and otherwise reports an error. For others, gas
3133 // reports a warning and allows a wider range. E.g. gas allows
3134 // [-0xffffffff,0xffffffff] for 32-bit addressing (e.g. Linux kernel uses
3135 // `leal -__PAGE_OFFSET(%ecx),%esp` where __PAGE_OFFSET is 0xc0000000).
3136 if (BaseReg || IndexReg) {
3137 if (auto CE = dyn_cast<MCConstantExpr>(Disp)) {
3138 auto Imm = CE->getValue();
3139 bool Is64 = X86MCRegisterClasses[X86::GR64RegClassID].contains(BaseReg) ||
3140 X86MCRegisterClasses[X86::GR64RegClassID].contains(IndexReg);
3141 bool Is16 = X86MCRegisterClasses[X86::GR16RegClassID].contains(BaseReg);
3142 if (Is64) {
3143 if (!isInt<32>(Imm))
3144 return Error(BaseLoc, "displacement " + Twine(Imm) +
3145 " is not within [-2147483648, 2147483647]");
3146 } else if (!Is16) {
3147 if (!isUInt<32>(Imm < 0 ? -uint64_t(Imm) : uint64_t(Imm))) {
3148 Warning(BaseLoc, "displacement " + Twine(Imm) +
3149 " shortened to 32-bit signed " +
3150 Twine(static_cast<int32_t>(Imm)));
3151 }
3152 } else if (!isUInt<16>(Imm < 0 ? -uint64_t(Imm) : uint64_t(Imm))) {
3153 Warning(BaseLoc, "displacement " + Twine(Imm) +
3154 " shortened to 16-bit signed " +
3155 Twine(static_cast<int16_t>(Imm)));
3156 }
3157 }
3158 }
3159
3160 if (SegReg || BaseReg || IndexReg)
3161 Operands.push_back(X86Operand::CreateMem(getPointerWidth(), SegReg, Disp,
3162 BaseReg, IndexReg, Scale, StartLoc,
3163 EndLoc));
3164 else
3165 Operands.push_back(
3166 X86Operand::CreateMem(getPointerWidth(), Disp, StartLoc, EndLoc));
3167 return false;
3168}
3169
3170// Parse either a standard primary expression or a register.
3171bool X86AsmParser::parsePrimaryExpr(const MCExpr *&Res, SMLoc &EndLoc) {
3172 MCAsmParser &Parser = getParser();
3173 // See if this is a register first.
3174 if (getTok().is(AsmToken::Percent) ||
3175 (isParsingIntelSyntax() && getTok().is(AsmToken::Identifier) &&
3176 MatchRegisterName(Parser.getTok().getString()))) {
3177 SMLoc StartLoc = Parser.getTok().getLoc();
3178 MCRegister RegNo;
3179 if (parseRegister(RegNo, StartLoc, EndLoc))
3180 return true;
3181 Res = X86MCExpr::create(RegNo, Parser.getContext());
3182 return false;
3183 }
3184 return Parser.parsePrimaryExpr(Res, EndLoc, nullptr);
3185}
3186
3187bool X86AsmParser::ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
3188 SMLoc NameLoc, OperandVector &Operands) {
3189 MCAsmParser &Parser = getParser();
3190 InstInfo = &Info;
3191
3192 // Reset the forced VEX encoding.
3193 ForcedOpcodePrefix = OpcodePrefix_Default;
3194 ForcedDispEncoding = DispEncoding_Default;
3195 UseApxExtendedReg = false;
3196 ForcedNoFlag = false;
3197
3198 // Parse pseudo prefixes.
3199 while (true) {
3200 if (Name == "{") {
3201 if (getLexer().isNot(AsmToken::Identifier))
3202 return Error(Parser.getTok().getLoc(), "Unexpected token after '{'");
3203 std::string Prefix = Parser.getTok().getString().lower();
3204 Parser.Lex(); // Eat identifier.
3205 if (getLexer().isNot(AsmToken::RCurly))
3206 return Error(Parser.getTok().getLoc(), "Expected '}'");
3207 Parser.Lex(); // Eat curly.
3208
3209 if (Prefix == "rex")
3210 ForcedOpcodePrefix = OpcodePrefix_REX;
3211 else if (Prefix == "rex2")
3212 ForcedOpcodePrefix = OpcodePrefix_REX2;
3213 else if (Prefix == "vex")
3214 ForcedOpcodePrefix = OpcodePrefix_VEX;
3215 else if (Prefix == "vex2")
3216 ForcedOpcodePrefix = OpcodePrefix_VEX2;
3217 else if (Prefix == "vex3")
3218 ForcedOpcodePrefix = OpcodePrefix_VEX3;
3219 else if (Prefix == "evex")
3220 ForcedOpcodePrefix = OpcodePrefix_EVEX;
3221 else if (Prefix == "disp8")
3222 ForcedDispEncoding = DispEncoding_Disp8;
3223 else if (Prefix == "disp32")
3224 ForcedDispEncoding = DispEncoding_Disp32;
3225 else if (Prefix == "nf")
3226 ForcedNoFlag = true;
3227 else
3228 return Error(NameLoc, "unknown prefix");
3229
3230 NameLoc = Parser.getTok().getLoc();
3231 if (getLexer().is(AsmToken::LCurly)) {
3232 Parser.Lex();
3233 Name = "{";
3234 } else {
3235 if (getLexer().isNot(AsmToken::Identifier))
3236 return Error(Parser.getTok().getLoc(), "Expected identifier");
3237 // FIXME: The mnemonic won't match correctly if its not in lower case.
3238 Name = Parser.getTok().getString();
3239 Parser.Lex();
3240 }
3241 continue;
3242 }
3243 // Parse MASM style pseudo prefixes.
3244 if (isParsingMSInlineAsm()) {
3245 if (Name.equals_insensitive("vex"))
3246 ForcedOpcodePrefix = OpcodePrefix_VEX;
3247 else if (Name.equals_insensitive("vex2"))
3248 ForcedOpcodePrefix = OpcodePrefix_VEX2;
3249 else if (Name.equals_insensitive("vex3"))
3250 ForcedOpcodePrefix = OpcodePrefix_VEX3;
3251 else if (Name.equals_insensitive("evex"))
3252 ForcedOpcodePrefix = OpcodePrefix_EVEX;
3253
3254 if (ForcedOpcodePrefix != OpcodePrefix_Default) {
3255 if (getLexer().isNot(AsmToken::Identifier))
3256 return Error(Parser.getTok().getLoc(), "Expected identifier");
3257 // FIXME: The mnemonic won't match correctly if its not in lower case.
3258 Name = Parser.getTok().getString();
3259 NameLoc = Parser.getTok().getLoc();
3260 Parser.Lex();
3261 }
3262 }
3263 break;
3264 }
3265
3266 // Support the suffix syntax for overriding displacement size as well.
3267 if (Name.consume_back(".d32")) {
3268 ForcedDispEncoding = DispEncoding_Disp32;
3269 } else if (Name.consume_back(".d8")) {
3270 ForcedDispEncoding = DispEncoding_Disp8;
3271 }
3272
3273 StringRef PatchedName = Name;
3274
3275 // Hack to skip "short" following Jcc.
3276 if (isParsingIntelSyntax() &&
3277 (PatchedName == "jmp" || PatchedName == "jc" || PatchedName == "jnc" ||
3278 PatchedName == "jcxz" || PatchedName == "jecxz" ||
3279 (PatchedName.starts_with("j") &&
3280 ParseConditionCode(PatchedName.substr(1)) != X86::COND_INVALID))) {
3281 StringRef NextTok = Parser.getTok().getString();
3282 if (Parser.isParsingMasm() ? NextTok.equals_insensitive("short")
3283 : NextTok == "short") {
3284 SMLoc NameEndLoc =
3285 NameLoc.getFromPointer(NameLoc.getPointer() + Name.size());
3286 // Eat the short keyword.
3287 Parser.Lex();
3288 // MS and GAS ignore the short keyword; they both determine the jmp type
3289 // based on the distance of the label. (NASM does emit different code with
3290 // and without "short," though.)
3291 InstInfo->AsmRewrites->emplace_back(AOK_Skip, NameEndLoc,
3292 NextTok.size() + 1);
3293 }
3294 }
3295
3296 // FIXME: Hack to recognize setneb as setne.
3297 if (PatchedName.starts_with("set") && PatchedName.ends_with("b") &&
3298 PatchedName != "setzub" && PatchedName != "setzunb" &&
3299 PatchedName != "setb" && PatchedName != "setnb")
3300 PatchedName = PatchedName.substr(0, Name.size()-1);
3301
3302 unsigned ComparisonPredicate = ~0U;
3303
3304 // FIXME: Hack to recognize cmp<comparison code>{sh,ss,sd,ph,ps,pd}.
3305 if ((PatchedName.starts_with("cmp") || PatchedName.starts_with("vcmp")) &&
3306 (PatchedName.ends_with("ss") || PatchedName.ends_with("sd") ||
3307 PatchedName.ends_with("sh") || PatchedName.ends_with("ph") ||
3308 PatchedName.ends_with("ps") || PatchedName.ends_with("pd"))) {
3309 bool IsVCMP = PatchedName[0] == 'v';
3310 unsigned CCIdx = IsVCMP ? 4 : 3;
3311 unsigned CC = StringSwitch<unsigned>(
3312 PatchedName.slice(CCIdx, PatchedName.size() - 2))
3313 .Case("eq", 0x00)
3314 .Case("eq_oq", 0x00)
3315 .Case("lt", 0x01)
3316 .Case("lt_os", 0x01)
3317 .Case("le", 0x02)
3318 .Case("le_os", 0x02)
3319 .Case("unord", 0x03)
3320 .Case("unord_q", 0x03)
3321 .Case("neq", 0x04)
3322 .Case("neq_uq", 0x04)
3323 .Case("nlt", 0x05)
3324 .Case("nlt_us", 0x05)
3325 .Case("nle", 0x06)
3326 .Case("nle_us", 0x06)
3327 .Case("ord", 0x07)
3328 .Case("ord_q", 0x07)
3329 /* AVX only from here */
3330 .Case("eq_uq", 0x08)
3331 .Case("nge", 0x09)
3332 .Case("nge_us", 0x09)
3333 .Case("ngt", 0x0A)
3334 .Case("ngt_us", 0x0A)
3335 .Case("false", 0x0B)
3336 .Case("false_oq", 0x0B)
3337 .Case("neq_oq", 0x0C)
3338 .Case("ge", 0x0D)
3339 .Case("ge_os", 0x0D)
3340 .Case("gt", 0x0E)
3341 .Case("gt_os", 0x0E)
3342 .Case("true", 0x0F)
3343 .Case("true_uq", 0x0F)
3344 .Case("eq_os", 0x10)
3345 .Case("lt_oq", 0x11)
3346 .Case("le_oq", 0x12)
3347 .Case("unord_s", 0x13)
3348 .Case("neq_us", 0x14)
3349 .Case("nlt_uq", 0x15)
3350 .Case("nle_uq", 0x16)
3351 .Case("ord_s", 0x17)
3352 .Case("eq_us", 0x18)
3353 .Case("nge_uq", 0x19)
3354 .Case("ngt_uq", 0x1A)
3355 .Case("false_os", 0x1B)
3356 .Case("neq_os", 0x1C)
3357 .Case("ge_oq", 0x1D)
3358 .Case("gt_oq", 0x1E)
3359 .Case("true_us", 0x1F)
3360 .Default(~0U);
3361 if (CC != ~0U && (IsVCMP || CC < 8) &&
3362 (IsVCMP || PatchedName.back() != 'h')) {
3363 if (PatchedName.ends_with("ss"))
3364 PatchedName = IsVCMP ? "vcmpss" : "cmpss";
3365 else if (PatchedName.ends_with("sd"))
3366 PatchedName = IsVCMP ? "vcmpsd" : "cmpsd";
3367 else if (PatchedName.ends_with("ps"))
3368 PatchedName = IsVCMP ? "vcmpps" : "cmpps";
3369 else if (PatchedName.ends_with("pd"))
3370 PatchedName = IsVCMP ? "vcmppd" : "cmppd";
3371 else if (PatchedName.ends_with("sh"))
3372 PatchedName = "vcmpsh";
3373 else if (PatchedName.ends_with("ph"))
3374 PatchedName = "vcmpph";
3375 else
3376 llvm_unreachable("Unexpected suffix!");
3377
3378 ComparisonPredicate = CC;
3379 }
3380 }
3381
3382 // FIXME: Hack to recognize vpcmp<comparison code>{ub,uw,ud,uq,b,w,d,q}.
3383 if (PatchedName.starts_with("vpcmp") &&
3384 (PatchedName.back() == 'b' || PatchedName.back() == 'w' ||
3385 PatchedName.back() == 'd' || PatchedName.back() == 'q')) {
3386 unsigned SuffixSize = PatchedName.drop_back().back() == 'u' ? 2 : 1;
3387 unsigned CC = StringSwitch<unsigned>(
3388 PatchedName.slice(5, PatchedName.size() - SuffixSize))
3389 .Case("eq", 0x0) // Only allowed on unsigned. Checked below.
3390 .Case("lt", 0x1)
3391 .Case("le", 0x2)
3392 //.Case("false", 0x3) // Not a documented alias.
3393 .Case("neq", 0x4)
3394 .Case("nlt", 0x5)
3395 .Case("nle", 0x6)
3396 //.Case("true", 0x7) // Not a documented alias.
3397 .Default(~0U);
3398 if (CC != ~0U && (CC != 0 || SuffixSize == 2)) {
3399 switch (PatchedName.back()) {
3400 default: llvm_unreachable("Unexpected character!");
3401 case 'b': PatchedName = SuffixSize == 2 ? "vpcmpub" : "vpcmpb"; break;
3402 case 'w': PatchedName = SuffixSize == 2 ? "vpcmpuw" : "vpcmpw"; break;
3403 case 'd': PatchedName = SuffixSize == 2 ? "vpcmpud" : "vpcmpd"; break;
3404 case 'q': PatchedName = SuffixSize == 2 ? "vpcmpuq" : "vpcmpq"; break;
3405 }
3406 // Set up the immediate to push into the operands later.
3407 ComparisonPredicate = CC;
3408 }
3409 }
3410
3411 // FIXME: Hack to recognize vpcom<comparison code>{ub,uw,ud,uq,b,w,d,q}.
3412 if (PatchedName.starts_with("vpcom") &&
3413 (PatchedName.back() == 'b' || PatchedName.back() == 'w' ||
3414 PatchedName.back() == 'd' || PatchedName.back() == 'q')) {
3415 unsigned SuffixSize = PatchedName.drop_back().back() == 'u' ? 2 : 1;
3416 unsigned CC = StringSwitch<unsigned>(
3417 PatchedName.slice(5, PatchedName.size() - SuffixSize))
3418 .Case("lt", 0x0)
3419 .Case("le", 0x1)
3420 .Case("gt", 0x2)
3421 .Case("ge", 0x3)
3422 .Case("eq", 0x4)
3423 .Case("neq", 0x5)
3424 .Case("false", 0x6)
3425 .Case("true", 0x7)
3426 .Default(~0U);
3427 if (CC != ~0U) {
3428 switch (PatchedName.back()) {
3429 default: llvm_unreachable("Unexpected character!");
3430 case 'b': PatchedName = SuffixSize == 2 ? "vpcomub" : "vpcomb"; break;
3431 case 'w': PatchedName = SuffixSize == 2 ? "vpcomuw" : "vpcomw"; break;
3432 case 'd': PatchedName = SuffixSize == 2 ? "vpcomud" : "vpcomd"; break;
3433 case 'q': PatchedName = SuffixSize == 2 ? "vpcomuq" : "vpcomq"; break;
3434 }
3435 // Set up the immediate to push into the operands later.
3436 ComparisonPredicate = CC;
3437 }
3438 }
3439
3440 // Determine whether this is an instruction prefix.
3441 // FIXME:
3442 // Enhance prefixes integrity robustness. for example, following forms
3443 // are currently tolerated:
3444 // repz repnz <insn> ; GAS errors for the use of two similar prefixes
3445 // lock addq %rax, %rbx ; Destination operand must be of memory type
3446 // xacquire <insn> ; xacquire must be accompanied by 'lock'
3447 bool IsPrefix =
3449 .Cases("cs", "ds", "es", "fs", "gs", "ss", true)
3450 .Cases("rex64", "data32", "data16", "addr32", "addr16", true)
3451 .Cases("xacquire", "xrelease", true)
3452 .Cases("acquire", "release", isParsingIntelSyntax())
3453 .Default(false);
3454
3455 auto isLockRepeatNtPrefix = [](StringRef N) {
3456 return StringSwitch<bool>(N)
3457 .Cases("lock", "rep", "repe", "repz", "repne", "repnz", "notrack", true)
3458 .Default(false);
3459 };
3460
3461 bool CurlyAsEndOfStatement = false;
3462
3463 unsigned Flags = X86::IP_NO_PREFIX;
3464 while (isLockRepeatNtPrefix(Name.lower())) {
3465 unsigned Prefix =
3467 .Cases("lock", "lock", X86::IP_HAS_LOCK)
3468 .Cases("rep", "repe", "repz", X86::IP_HAS_REPEAT)
3469 .Cases("repne", "repnz", X86::IP_HAS_REPEAT_NE)
3470 .Cases("notrack", "notrack", X86::IP_HAS_NOTRACK)
3471 .Default(X86::IP_NO_PREFIX); // Invalid prefix (impossible)
3472 Flags |= Prefix;
3473 if (getLexer().is(AsmToken::EndOfStatement)) {
3474 // We don't have real instr with the given prefix
3475 // let's use the prefix as the instr.
3476 // TODO: there could be several prefixes one after another
3478 break;
3479 }
3480 // FIXME: The mnemonic won't match correctly if its not in lower case.
3481 Name = Parser.getTok().getString();
3482 Parser.Lex(); // eat the prefix
3483 // Hack: we could have something like "rep # some comment" or
3484 // "lock; cmpxchg16b $1" or "lock\0A\09incl" or "lock/incl"
3485 while (Name.starts_with(";") || Name.starts_with("\n") ||
3486 Name.starts_with("#") || Name.starts_with("\t") ||
3487 Name.starts_with("/")) {
3488 // FIXME: The mnemonic won't match correctly if its not in lower case.
3489 Name = Parser.getTok().getString();
3490 Parser.Lex(); // go to next prefix or instr
3491 }
3492 }
3493
3494 if (Flags)
3495 PatchedName = Name;
3496
3497 // Hacks to handle 'data16' and 'data32'
3498 if (PatchedName == "data16" && is16BitMode()) {
3499 return Error(NameLoc, "redundant data16 prefix");
3500 }
3501 if (PatchedName == "data32") {
3502 if (is32BitMode())
3503 return Error(NameLoc, "redundant data32 prefix");
3504 if (is64BitMode())
3505 return Error(NameLoc, "'data32' is not supported in 64-bit mode");
3506 // Hack to 'data16' for the table lookup.
3507 PatchedName = "data16";
3508
3509 if (getLexer().isNot(AsmToken::EndOfStatement)) {
3510 StringRef Next = Parser.getTok().getString();
3511 getLexer().Lex();
3512 // data32 effectively changes the instruction suffix.
3513 // TODO Generalize.
3514 if (Next == "callw")
3515 Next = "calll";
3516 if (Next == "ljmpw")
3517 Next = "ljmpl";
3518
3519 Name = Next;
3520 PatchedName = Name;
3521 ForcedDataPrefix = X86::Is32Bit;
3522 IsPrefix = false;
3523 }
3524 }
3525
3526 Operands.push_back(X86Operand::CreateToken(PatchedName, NameLoc));
3527
3528 // Push the immediate if we extracted one from the mnemonic.
3529 if (ComparisonPredicate != ~0U && !isParsingIntelSyntax()) {
3530 const MCExpr *ImmOp = MCConstantExpr::create(ComparisonPredicate,
3531 getParser().getContext());
3532 Operands.push_back(X86Operand::CreateImm(ImmOp, NameLoc, NameLoc));
3533 }
3534
3535 // Parse condtional flags after mnemonic.
3536 if ((Name.starts_with("ccmp") || Name.starts_with("ctest")) &&
3537 parseCFlagsOp(Operands))
3538 return true;
3539
3540 // This does the actual operand parsing. Don't parse any more if we have a
3541 // prefix juxtaposed with an operation like "lock incl 4(%rax)", because we
3542 // just want to parse the "lock" as the first instruction and the "incl" as
3543 // the next one.
3544 if (getLexer().isNot(AsmToken::EndOfStatement) && !IsPrefix) {
3545 // Parse '*' modifier.
3546 if (getLexer().is(AsmToken::Star))
3547 Operands.push_back(X86Operand::CreateToken("*", consumeToken()));
3548
3549 // Read the operands.
3550 while (true) {
3551 if (parseOperand(Operands, Name))
3552 return true;
3553 if (HandleAVX512Operand(Operands))
3554 return true;
3555
3556 // check for comma and eat it
3557 if (getLexer().is(AsmToken::Comma))
3558 Parser.Lex();
3559 else
3560 break;
3561 }
3562
3563 // In MS inline asm curly braces mark the beginning/end of a block,
3564 // therefore they should be interepreted as end of statement
3565 CurlyAsEndOfStatement =
3566 isParsingIntelSyntax() && isParsingMSInlineAsm() &&
3567 (getLexer().is(AsmToken::LCurly) || getLexer().is(AsmToken::RCurly));
3568 if (getLexer().isNot(AsmToken::EndOfStatement) && !CurlyAsEndOfStatement)
3569 return TokError("unexpected token in argument list");
3570 }
3571
3572 // Push the immediate if we extracted one from the mnemonic.
3573 if (ComparisonPredicate != ~0U && isParsingIntelSyntax()) {
3574 const MCExpr *ImmOp = MCConstantExpr::create(ComparisonPredicate,
3575 getParser().getContext());
3576 Operands.push_back(X86Operand::CreateImm(ImmOp, NameLoc, NameLoc));
3577 }
3578
3579 // Consume the EndOfStatement or the prefix separator Slash
3580 if (getLexer().is(AsmToken::EndOfStatement) ||
3581 (IsPrefix && getLexer().is(AsmToken::Slash)))
3582 Parser.Lex();
3583 else if (CurlyAsEndOfStatement)
3584 // Add an actual EndOfStatement before the curly brace
3585 Info.AsmRewrites->emplace_back(AOK_EndOfStatement,
3586 getLexer().getTok().getLoc(), 0);
3587
3588 // This is for gas compatibility and cannot be done in td.
3589 // Adding "p" for some floating point with no argument.
3590 // For example: fsub --> fsubp
3591 bool IsFp =
3592 Name == "fsub" || Name == "fdiv" || Name == "fsubr" || Name == "fdivr";
3593 if (IsFp && Operands.size() == 1) {
3594 const char *Repl = StringSwitch<const char *>(Name)
3595 .Case("fsub", "fsubp")
3596 .Case("fdiv", "fdivp")
3597 .Case("fsubr", "fsubrp")
3598 .Case("fdivr", "fdivrp");
3599 static_cast<X86Operand &>(*Operands[0]).setTokenValue(Repl);
3600 }
3601
3602 if ((Name == "mov" || Name == "movw" || Name == "movl") &&
3603 (Operands.size() == 3)) {
3604 X86Operand &Op1 = (X86Operand &)*Operands[1];
3605 X86Operand &Op2 = (X86Operand &)*Operands[2];
3606 SMLoc Loc = Op1.getEndLoc();
3607 // Moving a 32 or 16 bit value into a segment register has the same
3608 // behavior. Modify such instructions to always take shorter form.
3609 if (Op1.isReg() && Op2.isReg() &&
3610 X86MCRegisterClasses[X86::SEGMENT_REGRegClassID].contains(
3611 Op2.getReg()) &&
3612 (X86MCRegisterClasses[X86::GR16RegClassID].contains(Op1.getReg()) ||
3613 X86MCRegisterClasses[X86::GR32RegClassID].contains(Op1.getReg()))) {
3614 // Change instruction name to match new instruction.
3615 if (Name != "mov" && Name[3] == (is16BitMode() ? 'l' : 'w')) {
3616 Name = is16BitMode() ? "movw" : "movl";
3617 Operands[0] = X86Operand::CreateToken(Name, NameLoc);
3618 }
3619 // Select the correct equivalent 16-/32-bit source register.
3620 MCRegister Reg =
3621 getX86SubSuperRegister(Op1.getReg(), is16BitMode() ? 16 : 32);
3622 Operands[1] = X86Operand::CreateReg(Reg, Loc, Loc);
3623 }
3624 }
3625
3626 // This is a terrible hack to handle "out[s]?[bwl]? %al, (%dx)" ->
3627 // "outb %al, %dx". Out doesn't take a memory form, but this is a widely
3628 // documented form in various unofficial manuals, so a lot of code uses it.
3629 if ((Name == "outb" || Name == "outsb" || Name == "outw" || Name == "outsw" ||
3630 Name == "outl" || Name == "outsl" || Name == "out" || Name == "outs") &&
3631 Operands.size() == 3) {
3632 X86Operand &Op = (X86Operand &)*Operands.back();
3633 if (Op.isDXReg())
3634 Operands.back() = X86Operand::CreateReg(X86::DX, Op.getStartLoc(),
3635 Op.getEndLoc());
3636 }
3637 // Same hack for "in[s]?[bwl]? (%dx), %al" -> "inb %dx, %al".
3638 if ((Name == "inb" || Name == "insb" || Name == "inw" || Name == "insw" ||
3639 Name == "inl" || Name == "insl" || Name == "in" || Name == "ins") &&
3640 Operands.size() == 3) {
3641 X86Operand &Op = (X86Operand &)*Operands[1];
3642 if (Op.isDXReg())
3643 Operands[1] = X86Operand::CreateReg(X86::DX, Op.getStartLoc(),
3644 Op.getEndLoc());
3645 }
3646
3648 bool HadVerifyError = false;
3649
3650 // Append default arguments to "ins[bwld]"
3651 if (Name.starts_with("ins") &&
3652 (Operands.size() == 1 || Operands.size() == 3) &&
3653 (Name == "insb" || Name == "insw" || Name == "insl" || Name == "insd" ||
3654 Name == "ins")) {
3655
3656 AddDefaultSrcDestOperands(TmpOperands,
3657 X86Operand::CreateReg(X86::DX, NameLoc, NameLoc),
3658 DefaultMemDIOperand(NameLoc));
3659 HadVerifyError = VerifyAndAdjustOperands(Operands, TmpOperands);
3660 }
3661
3662 // Append default arguments to "outs[bwld]"
3663 if (Name.starts_with("outs") &&
3664 (Operands.size() == 1 || Operands.size() == 3) &&
3665 (Name == "outsb" || Name == "outsw" || Name == "outsl" ||
3666 Name == "outsd" || Name == "outs")) {
3667 AddDefaultSrcDestOperands(TmpOperands, DefaultMemSIOperand(NameLoc),
3668 X86Operand::CreateReg(X86::DX, NameLoc, NameLoc));
3669 HadVerifyError = VerifyAndAdjustOperands(Operands, TmpOperands);
3670 }
3671
3672 // Transform "lods[bwlq]" into "lods[bwlq] ($SIREG)" for appropriate
3673 // values of $SIREG according to the mode. It would be nice if this
3674 // could be achieved with InstAlias in the tables.
3675 if (Name.starts_with("lods") &&
3676 (Operands.size() == 1 || Operands.size() == 2) &&
3677 (Name == "lods" || Name == "lodsb" || Name == "lodsw" ||
3678 Name == "lodsl" || Name == "lodsd" || Name == "lodsq")) {
3679 TmpOperands.push_back(DefaultMemSIOperand(NameLoc));
3680 HadVerifyError = VerifyAndAdjustOperands(Operands, TmpOperands);
3681 }
3682
3683 // Transform "stos[bwlq]" into "stos[bwlq] ($DIREG)" for appropriate
3684 // values of $DIREG according to the mode. It would be nice if this
3685 // could be achieved with InstAlias in the tables.
3686 if (Name.starts_with("stos") &&
3687 (Operands.size() == 1 || Operands.size() == 2) &&
3688 (Name == "stos" || Name == "stosb" || Name == "stosw" ||
3689 Name == "stosl" || Name == "stosd" || Name == "stosq")) {
3690 TmpOperands.push_back(DefaultMemDIOperand(NameLoc));
3691 HadVerifyError = VerifyAndAdjustOperands(Operands, TmpOperands);
3692 }
3693
3694 // Transform "scas[bwlq]" into "scas[bwlq] ($DIREG)" for appropriate
3695 // values of $DIREG according to the mode. It would be nice if this
3696 // could be achieved with InstAlias in the tables.
3697 if (Name.starts_with("scas") &&
3698 (Operands.size() == 1 || Operands.size() == 2) &&
3699 (Name == "scas" || Name == "scasb" || Name == "scasw" ||
3700 Name == "scasl" || Name == "scasd" || Name == "scasq")) {
3701 TmpOperands.push_back(DefaultMemDIOperand(NameLoc));
3702 HadVerifyError = VerifyAndAdjustOperands(Operands, TmpOperands);
3703 }
3704
3705 // Add default SI and DI operands to "cmps[bwlq]".
3706 if (Name.starts_with("cmps") &&
3707 (Operands.size() == 1 || Operands.size() == 3) &&
3708 (Name == "cmps" || Name == "cmpsb" || Name == "cmpsw" ||
3709 Name == "cmpsl" || Name == "cmpsd" || Name == "cmpsq")) {
3710 AddDefaultSrcDestOperands(TmpOperands, DefaultMemDIOperand(NameLoc),
3711 DefaultMemSIOperand(NameLoc));
3712 HadVerifyError = VerifyAndAdjustOperands(Operands, TmpOperands);
3713 }
3714
3715 // Add default SI and DI operands to "movs[bwlq]".
3716 if (((Name.starts_with("movs") &&
3717 (Name == "movs" || Name == "movsb" || Name == "movsw" ||
3718 Name == "movsl" || Name == "movsd" || Name == "movsq")) ||
3719 (Name.starts_with("smov") &&
3720 (Name == "smov" || Name == "smovb" || Name == "smovw" ||
3721 Name == "smovl" || Name == "smovd" || Name == "smovq"))) &&
3722 (Operands.size() == 1 || Operands.size() == 3)) {
3723 if (Name == "movsd" && Operands.size() == 1 && !isParsingIntelSyntax())
3724 Operands.back() = X86Operand::CreateToken("movsl", NameLoc);
3725 AddDefaultSrcDestOperands(TmpOperands, DefaultMemSIOperand(NameLoc),
3726 DefaultMemDIOperand(NameLoc));
3727 HadVerifyError = VerifyAndAdjustOperands(Operands, TmpOperands);
3728 }
3729
3730 // Check if we encountered an error for one the string insturctions
3731 if (HadVerifyError) {
3732 return HadVerifyError;
3733 }
3734
3735 // Transforms "xlat mem8" into "xlatb"
3736 if ((Name == "xlat" || Name == "xlatb") && Operands.size() == 2) {
3737 X86Operand &Op1 = static_cast<X86Operand &>(*Operands[1]);
3738 if (Op1.isMem8()) {
3739 Warning(Op1.getStartLoc(), "memory operand is only for determining the "
3740 "size, (R|E)BX will be used for the location");
3741 Operands.pop_back();
3742 static_cast<X86Operand &>(*Operands[0]).setTokenValue("xlatb");
3743 }
3744 }
3745
3746 if (Flags)
3747 Operands.push_back(X86Operand::CreatePrefix(Flags, NameLoc, NameLoc));
3748 return false;
3749}
3750
3751static bool convertSSEToAVX(MCInst &Inst) {
3752 ArrayRef<X86TableEntry> Table{X86SSE2AVXTable};
3753 unsigned Opcode = Inst.getOpcode();
3754 const auto I = llvm::lower_bound(Table, Opcode);
3755 if (I == Table.end() || I->OldOpc != Opcode)
3756 return false;
3757
3758 Inst.setOpcode(I->NewOpc);
3759 // AVX variant of BLENDVPD/BLENDVPS/PBLENDVB instructions has more
3760 // operand compare to SSE variant, which is added below
3761 if (X86::isBLENDVPD(Opcode) || X86::isBLENDVPS(Opcode) ||
3762 X86::isPBLENDVB(Opcode))
3763 Inst.addOperand(Inst.getOperand(2));
3764
3765 return true;
3766}
3767
3768bool X86AsmParser::processInstruction(MCInst &Inst, const OperandVector &Ops) {
3769 if (MCOptions.X86Sse2Avx && convertSSEToAVX(Inst))
3770 return true;
3771
3772 if (ForcedOpcodePrefix != OpcodePrefix_VEX3 &&
3773 X86::optimizeInstFromVEX3ToVEX2(Inst, MII.get(Inst.getOpcode())))
3774 return true;
3775
3777 return true;
3778
3779 auto replaceWithCCMPCTEST = [&](unsigned Opcode) -> bool {
3780 if (ForcedOpcodePrefix == OpcodePrefix_EVEX) {
3781 Inst.setFlags(~(X86::IP_USE_EVEX)&Inst.getFlags());
3782 Inst.setOpcode(Opcode);
3785 return true;
3786 }
3787 return false;
3788 };
3789
3790 switch (Inst.getOpcode()) {
3791 default: return false;
3792 case X86::JMP_1:
3793 // {disp32} forces a larger displacement as if the instruction was relaxed.
3794 // NOTE: 16-bit mode uses 16-bit displacement even though it says {disp32}.
3795 // This matches GNU assembler.
3796 if (ForcedDispEncoding == DispEncoding_Disp32) {
3797 Inst.setOpcode(is16BitMode() ? X86::JMP_2 : X86::JMP_4);
3798 return true;
3799 }
3800
3801 return false;
3802 case X86::JCC_1:
3803 // {disp32} forces a larger displacement as if the instruction was relaxed.
3804 // NOTE: 16-bit mode uses 16-bit displacement even though it says {disp32}.
3805 // This matches GNU assembler.
3806 if (ForcedDispEncoding == DispEncoding_Disp32) {
3807 Inst.setOpcode(is16BitMode() ? X86::JCC_2 : X86::JCC_4);
3808 return true;
3809 }
3810
3811 return false;
3812 case X86::INT: {
3813 // Transforms "int $3" into "int3" as a size optimization.
3814 // We can't write this as an InstAlias.
3815 if (!Inst.getOperand(0).isImm() || Inst.getOperand(0).getImm() != 3)
3816 return false;
3817 Inst.clear();
3818 Inst.setOpcode(X86::INT3);
3819 return true;
3820 }
3821 // `{evex} cmp <>, <>` is alias of `ccmpt {dfv=} <>, <>`, and
3822 // `{evex} test <>, <>` is alias of `ctest {dfv=} <>, <>`
3823#define FROM_TO(FROM, TO) \
3824 case X86::FROM: \
3825 return replaceWithCCMPCTEST(X86::TO);
3826 FROM_TO(CMP64rr, CCMP64rr)
3827 FROM_TO(CMP64mi32, CCMP64mi32)
3828 FROM_TO(CMP64mi8, CCMP64mi8)
3829 FROM_TO(CMP64mr, CCMP64mr)
3830 FROM_TO(CMP64ri32, CCMP64ri32)
3831 FROM_TO(CMP64ri8, CCMP64ri8)
3832 FROM_TO(CMP64rm, CCMP64rm)
3833
3834 FROM_TO(CMP32rr, CCMP32rr)
3835 FROM_TO(CMP32mi, CCMP32mi)
3836 FROM_TO(CMP32mi8, CCMP32mi8)
3837 FROM_TO(CMP32mr, CCMP32mr)
3838 FROM_TO(CMP32ri, CCMP32ri)
3839 FROM_TO(CMP32ri8, CCMP32ri8)
3840 FROM_TO(CMP32rm, CCMP32rm)
3841
3842 FROM_TO(CMP16rr, CCMP16rr)
3843 FROM_TO(CMP16mi, CCMP16mi)
3844 FROM_TO(CMP16mi8, CCMP16mi8)
3845 FROM_TO(CMP16mr, CCMP16mr)
3846 FROM_TO(CMP16ri, CCMP16ri)
3847 FROM_TO(CMP16ri8, CCMP16ri8)
3848 FROM_TO(CMP16rm, CCMP16rm)
3849
3850 FROM_TO(CMP8rr, CCMP8rr)
3851 FROM_TO(CMP8mi, CCMP8mi)
3852 FROM_TO(CMP8mr, CCMP8mr)
3853 FROM_TO(CMP8ri, CCMP8ri)
3854 FROM_TO(CMP8rm, CCMP8rm)
3855
3856 FROM_TO(TEST64rr, CTEST64rr)
3857 FROM_TO(TEST64mi32, CTEST64mi32)
3858 FROM_TO(TEST64mr, CTEST64mr)
3859 FROM_TO(TEST64ri32, CTEST64ri32)
3860
3861 FROM_TO(TEST32rr, CTEST32rr)
3862 FROM_TO(TEST32mi, CTEST32mi)
3863 FROM_TO(TEST32mr, CTEST32mr)
3864 FROM_TO(TEST32ri, CTEST32ri)
3865
3866 FROM_TO(TEST16rr, CTEST16rr)
3867 FROM_TO(TEST16mi, CTEST16mi)
3868 FROM_TO(TEST16mr, CTEST16mr)
3869 FROM_TO(TEST16ri, CTEST16ri)
3870
3871 FROM_TO(TEST8rr, CTEST8rr)
3872 FROM_TO(TEST8mi, CTEST8mi)
3873 FROM_TO(TEST8mr, CTEST8mr)
3874 FROM_TO(TEST8ri, CTEST8ri)
3875#undef FROM_TO
3876 }
3877}
3878
3879bool X86AsmParser::validateInstruction(MCInst &Inst, const OperandVector &Ops) {
3880 using namespace X86;
3881 const MCRegisterInfo *MRI = getContext().getRegisterInfo();
3882 unsigned Opcode = Inst.getOpcode();
3883 uint64_t TSFlags = MII.get(Opcode).TSFlags;
3884 if (isVFCMADDCPH(Opcode) || isVFCMADDCSH(Opcode) || isVFMADDCPH(Opcode) ||
3885 isVFMADDCSH(Opcode)) {
3886 unsigned Dest = Inst.getOperand(0).getReg();
3887 for (unsigned i = 2; i < Inst.getNumOperands(); i++)
3888 if (Inst.getOperand(i).isReg() && Dest == Inst.getOperand(i).getReg())
3889 return Warning(Ops[0]->getStartLoc(), "Destination register should be "
3890 "distinct from source registers");
3891 } else if (isVFCMULCPH(Opcode) || isVFCMULCSH(Opcode) || isVFMULCPH(Opcode) ||
3892 isVFMULCSH(Opcode)) {
3893 unsigned Dest = Inst.getOperand(0).getReg();
3894 // The mask variants have different operand list. Scan from the third
3895 // operand to avoid emitting incorrect warning.
3896 // VFMULCPHZrr Dest, Src1, Src2
3897 // VFMULCPHZrrk Dest, Dest, Mask, Src1, Src2
3898 // VFMULCPHZrrkz Dest, Mask, Src1, Src2
3899 for (unsigned i = ((TSFlags & X86II::EVEX_K) ? 2 : 1);
3900 i < Inst.getNumOperands(); i++)
3901 if (Inst.getOperand(i).isReg() && Dest == Inst.getOperand(i).getReg())
3902 return Warning(Ops[0]->getStartLoc(), "Destination register should be "
3903 "distinct from source registers");
3904 } else if (isV4FMADDPS(Opcode) || isV4FMADDSS(Opcode) ||
3905 isV4FNMADDPS(Opcode) || isV4FNMADDSS(Opcode) ||
3906 isVP4DPWSSDS(Opcode) || isVP4DPWSSD(Opcode)) {
3907 unsigned Src2 = Inst.getOperand(Inst.getNumOperands() -
3909 unsigned Src2Enc = MRI->getEncodingValue(Src2);
3910 if (Src2Enc % 4 != 0) {
3912 unsigned GroupStart = (Src2Enc / 4) * 4;
3913 unsigned GroupEnd = GroupStart + 3;
3914 return Warning(Ops[0]->getStartLoc(),
3915 "source register '" + RegName + "' implicitly denotes '" +
3916 RegName.take_front(3) + Twine(GroupStart) + "' to '" +
3917 RegName.take_front(3) + Twine(GroupEnd) +
3918 "' source group");
3919 }
3920 } else if (isVGATHERDPD(Opcode) || isVGATHERDPS(Opcode) ||
3921 isVGATHERQPD(Opcode) || isVGATHERQPS(Opcode) ||
3922 isVPGATHERDD(Opcode) || isVPGATHERDQ(Opcode) ||
3923 isVPGATHERQD(Opcode) || isVPGATHERQQ(Opcode)) {
3924 bool HasEVEX = (TSFlags & X86II::EncodingMask) == X86II::EVEX;
3925 if (HasEVEX) {
3926 unsigned Dest = MRI->getEncodingValue(Inst.getOperand(0).getReg());
3927 unsigned Index = MRI->getEncodingValue(
3928 Inst.getOperand(4 + X86::AddrIndexReg).getReg());
3929 if (Dest == Index)
3930 return Warning(Ops[0]->getStartLoc(), "index and destination registers "
3931 "should be distinct");
3932 } else {
3933 unsigned Dest = MRI->getEncodingValue(Inst.getOperand(0).getReg());
3934 unsigned Mask = MRI->getEncodingValue(Inst.getOperand(1).getReg());
3935 unsigned Index = MRI->getEncodingValue(
3936 Inst.getOperand(3 + X86::AddrIndexReg).getReg());
3937 if (Dest == Mask || Dest == Index || Mask == Index)
3938 return Warning(Ops[0]->getStartLoc(), "mask, index, and destination "
3939 "registers should be distinct");
3940 }
3941 } else if (isTCMMIMFP16PS(Opcode) || isTCMMRLFP16PS(Opcode) ||
3942 isTDPBF16PS(Opcode) || isTDPFP16PS(Opcode) || isTDPBSSD(Opcode) ||
3943 isTDPBSUD(Opcode) || isTDPBUSD(Opcode) || isTDPBUUD(Opcode)) {
3944 unsigned SrcDest = Inst.getOperand(0).getReg();
3945 unsigned Src1 = Inst.getOperand(2).getReg();
3946 unsigned Src2 = Inst.getOperand(3).getReg();
3947 if (SrcDest == Src1 || SrcDest == Src2 || Src1 == Src2)
3948 return Error(Ops[0]->getStartLoc(), "all tmm registers must be distinct");
3949 }
3950
3951 // Check that we aren't mixing AH/BH/CH/DH with REX prefix. We only need to
3952 // check this with the legacy encoding, VEX/EVEX/XOP don't use REX.
3953 if ((TSFlags & X86II::EncodingMask) == 0) {
3954 MCPhysReg HReg = X86::NoRegister;
3955 bool UsesRex = TSFlags & X86II::REX_W;
3956 unsigned NumOps = Inst.getNumOperands();
3957 for (unsigned i = 0; i != NumOps; ++i) {
3958 const MCOperand &MO = Inst.getOperand(i);
3959 if (!MO.isReg())
3960 continue;
3961 unsigned Reg = MO.getReg();
3962 if (Reg == X86::AH || Reg == X86::BH || Reg == X86::CH || Reg == X86::DH)
3963 HReg = Reg;
3966 UsesRex = true;
3967 }
3968
3969 if (UsesRex && HReg != X86::NoRegister) {
3971 return Error(Ops[0]->getStartLoc(),
3972 "can't encode '" + RegName + "' in an instruction requiring "
3973 "REX prefix");
3974 }
3975 }
3976
3977 if ((Opcode == X86::PREFETCHIT0 || Opcode == X86::PREFETCHIT1)) {
3978 const MCOperand &MO = Inst.getOperand(X86::AddrBaseReg);
3979 if (!MO.isReg() || MO.getReg() != X86::RIP)
3980 return Warning(
3981 Ops[0]->getStartLoc(),
3982 Twine((Inst.getOpcode() == X86::PREFETCHIT0 ? "'prefetchit0'"
3983 : "'prefetchit1'")) +
3984 " only supports RIP-relative address");
3985 }
3986 return false;
3987}
3988
3989void X86AsmParser::emitWarningForSpecialLVIInstruction(SMLoc Loc) {
3990 Warning(Loc, "Instruction may be vulnerable to LVI and "
3991 "requires manual mitigation");
3992 Note(SMLoc(), "See https://software.intel.com/"
3993 "security-software-guidance/insights/"
3994 "deep-dive-load-value-injection#specialinstructions"
3995 " for more information");
3996}
3997
3998/// RET instructions and also instructions that indirect calls/jumps from memory
3999/// combine a load and a branch within a single instruction. To mitigate these
4000/// instructions against LVI, they must be decomposed into separate load and
4001/// branch instructions, with an LFENCE in between. For more details, see:
4002/// - X86LoadValueInjectionRetHardening.cpp
4003/// - X86LoadValueInjectionIndirectThunks.cpp
4004/// - https://software.intel.com/security-software-guidance/insights/deep-dive-load-value-injection
4005///
4006/// Returns `true` if a mitigation was applied or warning was emitted.
4007void X86AsmParser::applyLVICFIMitigation(MCInst &Inst, MCStreamer &Out) {
4008 // Information on control-flow instructions that require manual mitigation can
4009 // be found here:
4010 // https://software.intel.com/security-software-guidance/insights/deep-dive-load-value-injection#specialinstructions
4011 switch (Inst.getOpcode()) {
4012 case X86::RET16:
4013 case X86::RET32:
4014 case X86::RET64:
4015 case X86::RETI16:
4016 case X86::RETI32:
4017 case X86::RETI64: {
4018 MCInst ShlInst, FenceInst;
4019 bool Parse32 = is32BitMode() || Code16GCC;
4020 unsigned Basereg =
4021 is64BitMode() ? X86::RSP : (Parse32 ? X86::ESP : X86::SP);
4022 const MCExpr *Disp = MCConstantExpr::create(0, getContext());
4023 auto ShlMemOp = X86Operand::CreateMem(getPointerWidth(), /*SegReg=*/0, Disp,
4024 /*BaseReg=*/Basereg, /*IndexReg=*/0,
4025 /*Scale=*/1, SMLoc{}, SMLoc{}, 0);
4026 ShlInst.setOpcode(X86::SHL64mi);
4027 ShlMemOp->addMemOperands(ShlInst, 5);
4028 ShlInst.addOperand(MCOperand::createImm(0));
4029 FenceInst.setOpcode(X86::LFENCE);
4030 Out.emitInstruction(ShlInst, getSTI());
4031 Out.emitInstruction(FenceInst, getSTI());
4032 return;
4033 }
4034 case X86::JMP16m:
4035 case X86::JMP32m:
4036 case X86::JMP64m:
4037 case X86::CALL16m:
4038 case X86::CALL32m:
4039 case X86::CALL64m:
4040 emitWarningForSpecialLVIInstruction(Inst.getLoc());
4041 return;
4042 }
4043}
4044
4045/// To mitigate LVI, every instruction that performs a load can be followed by
4046/// an LFENCE instruction to squash any potential mis-speculation. There are
4047/// some instructions that require additional considerations, and may requre
4048/// manual mitigation. For more details, see:
4049/// https://software.intel.com/security-software-guidance/insights/deep-dive-load-value-injection
4050///
4051/// Returns `true` if a mitigation was applied or warning was emitted.
4052void X86AsmParser::applyLVILoadHardeningMitigation(MCInst &Inst,
4053 MCStreamer &Out) {
4054 auto Opcode = Inst.getOpcode();
4055 auto Flags = Inst.getFlags();
4056 if ((Flags & X86::IP_HAS_REPEAT) || (Flags & X86::IP_HAS_REPEAT_NE)) {
4057 // Information on REP string instructions that require manual mitigation can
4058 // be found here:
4059 // https://software.intel.com/security-software-guidance/insights/deep-dive-load-value-injection#specialinstructions
4060 switch (Opcode) {
4061 case X86::CMPSB:
4062 case X86::CMPSW:
4063 case X86::CMPSL:
4064 case X86::CMPSQ:
4065 case X86::SCASB:
4066 case X86::SCASW:
4067 case X86::SCASL:
4068 case X86::SCASQ:
4069 emitWarningForSpecialLVIInstruction(Inst.getLoc());
4070 return;
4071 }
4072 } else if (Opcode == X86::REP_PREFIX || Opcode == X86::REPNE_PREFIX) {
4073 // If a REP instruction is found on its own line, it may or may not be
4074 // followed by a vulnerable instruction. Emit a warning just in case.
4075 emitWarningForSpecialLVIInstruction(Inst.getLoc());
4076 return;
4077 }
4078
4079 const MCInstrDesc &MCID = MII.get(Inst.getOpcode());
4080
4081 // Can't mitigate after terminators or calls. A control flow change may have
4082 // already occurred.
4083 if (MCID.isTerminator() || MCID.isCall())
4084 return;
4085
4086 // LFENCE has the mayLoad property, don't double fence.
4087 if (MCID.mayLoad() && Inst.getOpcode() != X86::LFENCE) {
4089 FenceInst.setOpcode(X86::LFENCE);
4090 Out.emitInstruction(FenceInst, getSTI());
4091 }
4092}
4093
4094void X86AsmParser::emitInstruction(MCInst &Inst, OperandVector &Operands,
4095 MCStreamer &Out) {
4097 getSTI().hasFeature(X86::FeatureLVIControlFlowIntegrity))
4098 applyLVICFIMitigation(Inst, Out);
4099
4100 Out.emitInstruction(Inst, getSTI());
4101
4103 getSTI().hasFeature(X86::FeatureLVILoadHardening))
4104 applyLVILoadHardeningMitigation(Inst, Out);
4105}
4106
4108 unsigned Result = 0;
4109 X86Operand &Prefix = static_cast<X86Operand &>(*Operands.back());
4110 if (Prefix.isPrefix()) {
4111 Result = Prefix.getPrefix();
4112 Operands.pop_back();
4113 }
4114 return Result;
4115}
4116
4117bool X86AsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
4120 bool MatchingInlineAsm) {
4121 assert(!Operands.empty() && "Unexpect empty operand list!");
4122 assert((*Operands[0]).isToken() && "Leading operand should always be a mnemonic!");
4123
4124 // First, handle aliases that expand to multiple instructions.
4125 MatchFPUWaitAlias(IDLoc, static_cast<X86Operand &>(*Operands[0]), Operands,
4126 Out, MatchingInlineAsm);
4127 unsigned Prefixes = getPrefixes(Operands);
4128
4129 MCInst Inst;
4130
4131 // If REX/REX2/VEX/EVEX encoding is forced, we need to pass the USE_* flag to
4132 // the encoder and printer.
4133 if (ForcedOpcodePrefix == OpcodePrefix_REX)
4134 Prefixes |= X86::IP_USE_REX;
4135 else if (ForcedOpcodePrefix == OpcodePrefix_REX2)
4136 Prefixes |= X86::IP_USE_REX2;
4137 else if (ForcedOpcodePrefix == OpcodePrefix_VEX)
4138 Prefixes |= X86::IP_USE_VEX;
4139 else if (ForcedOpcodePrefix == OpcodePrefix_VEX2)
4140 Prefixes |= X86::IP_USE_VEX2;
4141 else if (ForcedOpcodePrefix == OpcodePrefix_VEX3)
4142 Prefixes |= X86::IP_USE_VEX3;
4143 else if (ForcedOpcodePrefix == OpcodePrefix_EVEX)
4144 Prefixes |= X86::IP_USE_EVEX;
4145
4146 // Set encoded flags for {disp8} and {disp32}.
4147 if (ForcedDispEncoding == DispEncoding_Disp8)
4148 Prefixes |= X86::IP_USE_DISP8;
4149 else if (ForcedDispEncoding == DispEncoding_Disp32)
4150 Prefixes |= X86::IP_USE_DISP32;
4151
4152 if (Prefixes)
4153 Inst.setFlags(Prefixes);
4154
4155 return isParsingIntelSyntax()
4156 ? matchAndEmitIntelInstruction(IDLoc, Opcode, Inst, Operands, Out,
4157 ErrorInfo, MatchingInlineAsm)
4158 : matchAndEmitATTInstruction(IDLoc, Opcode, Inst, Operands, Out,
4159 ErrorInfo, MatchingInlineAsm);
4160}
4161
4162void X86AsmParser::MatchFPUWaitAlias(SMLoc IDLoc, X86Operand &Op,
4164 bool MatchingInlineAsm) {
4165 // FIXME: This should be replaced with a real .td file alias mechanism.
4166 // Also, MatchInstructionImpl should actually *do* the EmitInstruction
4167 // call.
4168 const char *Repl = StringSwitch<const char *>(Op.getToken())
4169 .Case("finit", "fninit")
4170 .Case("fsave", "fnsave")
4171 .Case("fstcw", "fnstcw")
4172 .Case("fstcww", "fnstcw")
4173 .Case("fstenv", "fnstenv")
4174 .Case("fstsw", "fnstsw")
4175 .Case("fstsww", "fnstsw")
4176 .Case("fclex", "fnclex")
4177 .Default(nullptr);
4178 if (Repl) {
4179 MCInst Inst;
4180 Inst.setOpcode(X86::WAIT);
4181 Inst.setLoc(IDLoc);
4182 if (!MatchingInlineAsm)
4183 emitInstruction(Inst, Operands, Out);
4184 Operands[0] = X86Operand::CreateToken(Repl, IDLoc);
4185 }
4186}
4187
4188bool X86AsmParser::ErrorMissingFeature(SMLoc IDLoc,
4189 const FeatureBitset &MissingFeatures,
4190 bool MatchingInlineAsm) {
4191 assert(MissingFeatures.any() && "Unknown missing feature!");
4192 SmallString<126> Msg;
4194 OS << "instruction requires:";
4195 for (unsigned i = 0, e = MissingFeatures.size(); i != e; ++i) {
4196 if (MissingFeatures[i])
4197 OS << ' ' << getSubtargetFeatureName(i);
4198 }
4199 return Error(IDLoc, OS.str(), SMRange(), MatchingInlineAsm);
4200}
4201
4202unsigned X86AsmParser::checkTargetMatchPredicate(MCInst &Inst) {
4203 unsigned Opc = Inst.getOpcode();
4204 const MCInstrDesc &MCID = MII.get(Opc);
4205 uint64_t TSFlags = MCID.TSFlags;
4206
4207 if (UseApxExtendedReg && !X86II::canUseApxExtendedReg(MCID))
4208 return Match_Unsupported;
4209 if (ForcedNoFlag == !(TSFlags & X86II::EVEX_NF) && !X86::isCFCMOVCC(Opc))
4210 return Match_Unsupported;
4211
4212 switch (ForcedOpcodePrefix) {
4213 case OpcodePrefix_Default:
4214 break;
4215 case OpcodePrefix_REX:
4216 case OpcodePrefix_REX2:
4217 if (TSFlags & X86II::EncodingMask)
4218 return Match_Unsupported;
4219 break;
4220 case OpcodePrefix_VEX:
4221 case OpcodePrefix_VEX2:
4222 case OpcodePrefix_VEX3:
4223 if ((TSFlags & X86II::EncodingMask) != X86II::VEX)
4224 return Match_Unsupported;
4225 break;
4226 case OpcodePrefix_EVEX:
4227 if (is64BitMode() && (TSFlags & X86II::EncodingMask) != X86II::EVEX &&
4228 !X86::isCMP(Opc) && !X86::isTEST(Opc))
4229 return Match_Unsupported;
4230 if (!is64BitMode() && (TSFlags & X86II::EncodingMask) != X86II::EVEX)
4231 return Match_Unsupported;
4232 break;
4233 }
4234
4236 (ForcedOpcodePrefix != OpcodePrefix_VEX &&
4237 ForcedOpcodePrefix != OpcodePrefix_VEX2 &&
4238 ForcedOpcodePrefix != OpcodePrefix_VEX3))
4239 return Match_Unsupported;
4240
4241 return Match_Success;
4242}
4243
4244bool X86AsmParser::matchAndEmitATTInstruction(
4245 SMLoc IDLoc, unsigned &Opcode, MCInst &Inst, OperandVector &Operands,
4246 MCStreamer &Out, uint64_t &ErrorInfo, bool MatchingInlineAsm) {
4247 X86Operand &Op = static_cast<X86Operand &>(*Operands[0]);
4248 SMRange EmptyRange = std::nullopt;
4249 // In 16-bit mode, if data32 is specified, temporarily switch to 32-bit mode
4250 // when matching the instruction.
4251 if (ForcedDataPrefix == X86::Is32Bit)
4252 SwitchMode(X86::Is32Bit);
4253 // First, try a direct match.
4254 FeatureBitset MissingFeatures;
4255 unsigned OriginalError = MatchInstruction(Operands, Inst, ErrorInfo,
4256 MissingFeatures, MatchingInlineAsm,
4257 isParsingIntelSyntax());
4258 if (ForcedDataPrefix == X86::Is32Bit) {
4259 SwitchMode(X86::Is16Bit);
4260 ForcedDataPrefix = 0;
4261 }
4262 switch (OriginalError) {
4263 default: llvm_unreachable("Unexpected match result!");
4264 case Match_Success:
4265 if (!MatchingInlineAsm && validateInstruction(Inst, Operands))
4266 return true;
4267 // Some instructions need post-processing to, for example, tweak which
4268 // encoding is selected. Loop on it while changes happen so the
4269 // individual transformations can chain off each other.
4270 if (!MatchingInlineAsm)
4271 while (processInstruction(Inst, Operands))
4272 ;
4273
4274 Inst.setLoc(IDLoc);
4275 if (!MatchingInlineAsm)
4276 emitInstruction(Inst, Operands, Out);
4277 Opcode = Inst.getOpcode();
4278 return false;
4279 case Match_InvalidImmUnsignedi4: {
4280 SMLoc ErrorLoc = ((X86Operand &)*Operands[ErrorInfo]).getStartLoc();
4281 if (ErrorLoc == SMLoc())
4282 ErrorLoc = IDLoc;
4283 return Error(ErrorLoc, "immediate must be an integer in range [0, 15]",
4284 EmptyRange, MatchingInlineAsm);
4285 }
4286 case Match_MissingFeature:
4287 return ErrorMissingFeature(IDLoc, MissingFeatures, MatchingInlineAsm);
4288 case Match_InvalidOperand:
4289 case Match_MnemonicFail:
4290 case Match_Unsupported:
4291 break;
4292 }
4293 if (Op.getToken().empty()) {
4294 Error(IDLoc, "instruction must have size higher than 0", EmptyRange,
4295 MatchingInlineAsm);
4296 return true;
4297 }
4298
4299 // FIXME: Ideally, we would only attempt suffix matches for things which are
4300 // valid prefixes, and we could just infer the right unambiguous
4301 // type. However, that requires substantially more matcher support than the
4302 // following hack.
4303
4304 // Change the operand to point to a temporary token.
4305 StringRef Base = Op.getToken();
4306 SmallString<16> Tmp;
4307 Tmp += Base;
4308 Tmp += ' ';
4309 Op.setTokenValue(Tmp);
4310
4311 // If this instruction starts with an 'f', then it is a floating point stack
4312 // instruction. These come in up to three forms for 32-bit, 64-bit, and
4313 // 80-bit floating point, which use the suffixes s,l,t respectively.
4314 //
4315 // Otherwise, we assume that this may be an integer instruction, which comes
4316 // in 8/16/32/64-bit forms using the b,w,l,q suffixes respectively.
4317 const char *Suffixes = Base[0] != 'f' ? "bwlq" : "slt\0";
4318 // MemSize corresponding to Suffixes. { 8, 16, 32, 64 } { 32, 64, 80, 0 }
4319 const char *MemSize = Base[0] != 'f' ? "\x08\x10\x20\x40" : "\x20\x40\x50\0";
4320
4321 // Check for the various suffix matches.
4322 uint64_t ErrorInfoIgnore;
4323 FeatureBitset ErrorInfoMissingFeatures; // Init suppresses compiler warnings.
4324 unsigned Match[4];
4325
4326 // Some instruction like VPMULDQ is NOT the variant of VPMULD but a new one.
4327 // So we should make sure the suffix matcher only works for memory variant
4328 // that has the same size with the suffix.
4329 // FIXME: This flag is a workaround for legacy instructions that didn't
4330 // declare non suffix variant assembly.
4331 bool HasVectorReg = false;
4332 X86Operand *MemOp = nullptr;
4333 for (const auto &Op : Operands) {
4334 X86Operand *X86Op = static_cast<X86Operand *>(Op.get());
4335 if (X86Op->isVectorReg())
4336 HasVectorReg = true;
4337 else if (X86Op->isMem()) {
4338 MemOp = X86Op;
4339 assert(MemOp->Mem.Size == 0 && "Memory size always 0 under ATT syntax");
4340 // Have we found an unqualified memory operand,
4341 // break. IA allows only one memory operand.
4342 break;
4343 }
4344 }
4345
4346 for (unsigned I = 0, E = std::size(Match); I != E; ++I) {
4347 Tmp.back() = Suffixes[I];
4348 if (MemOp && HasVectorReg)
4349 MemOp->Mem.Size = MemSize[I];
4350 Match[I] = Match_MnemonicFail;
4351 if (MemOp || !HasVectorReg) {
4352 Match[I] =
4353 MatchInstruction(Operands, Inst, ErrorInfoIgnore, MissingFeatures,
4354 MatchingInlineAsm, isParsingIntelSyntax());
4355 // If this returned as a missing feature failure, remember that.
4356 if (Match[I] == Match_MissingFeature)
4357 ErrorInfoMissingFeatures = MissingFeatures;
4358 }
4359 }
4360
4361 // Restore the old token.
4362 Op.setTokenValue(Base);
4363
4364 // If exactly one matched, then we treat that as a successful match (and the
4365 // instruction will already have been filled in correctly, since the failing
4366 // matches won't have modified it).
4367 unsigned NumSuccessfulMatches = llvm::count(Match, Match_Success);
4368 if (NumSuccessfulMatches == 1) {
4369 if (!MatchingInlineAsm && validateInstruction(Inst, Operands))
4370 return true;
4371 // Some instructions need post-processing to, for example, tweak which
4372 // encoding is selected. Loop on it while changes happen so the
4373 // individual transformations can chain off each other.
4374 if (!MatchingInlineAsm)
4375 while (processInstruction(Inst, Operands))
4376 ;
4377
4378 Inst.setLoc(IDLoc);
4379 if (!MatchingInlineAsm)
4380 emitInstruction(Inst, Operands, Out);
4381 Opcode = Inst.getOpcode();
4382 return false;
4383 }
4384
4385 // Otherwise, the match failed, try to produce a decent error message.
4386
4387 // If we had multiple suffix matches, then identify this as an ambiguous
4388 // match.
4389 if (NumSuccessfulMatches > 1) {
4390 char MatchChars[4];
4391 unsigned NumMatches = 0;
4392 for (unsigned I = 0, E = std::size(Match); I != E; ++I)
4393 if (Match[I] == Match_Success)
4394 MatchChars[NumMatches++] = Suffixes[I];
4395
4396 SmallString<126> Msg;
4398 OS << "ambiguous instructions require an explicit suffix (could be ";
4399 for (unsigned i = 0; i != NumMatches; ++i) {
4400 if (i != 0)
4401 OS << ", ";
4402 if (i + 1 == NumMatches)
4403 OS << "or ";
4404 OS << "'" << Base << MatchChars[i] << "'";
4405 }
4406 OS << ")";
4407 Error(IDLoc, OS.str(), EmptyRange, MatchingInlineAsm);
4408 return true;
4409 }
4410
4411 // Okay, we know that none of the variants matched successfully.
4412
4413 // If all of the instructions reported an invalid mnemonic, then the original
4414 // mnemonic was invalid.
4415 if (llvm::count(Match, Match_MnemonicFail) == 4) {
4416 if (OriginalError == Match_MnemonicFail)
4417 return Error(IDLoc, "invalid instruction mnemonic '" + Base + "'",
4418 Op.getLocRange(), MatchingInlineAsm);
4419
4420 if (OriginalError == Match_Unsupported)
4421 return Error(IDLoc, "unsupported instruction", EmptyRange,
4422 MatchingInlineAsm);
4423
4424 assert(OriginalError == Match_InvalidOperand && "Unexpected error");
4425 // Recover location info for the operand if we know which was the problem.
4426 if (ErrorInfo != ~0ULL) {
4427 if (ErrorInfo >= Operands.size())
4428 return Error(IDLoc, "too few operands for instruction", EmptyRange,
4429 MatchingInlineAsm);
4430
4431 X86Operand &Operand = (X86Operand &)*Operands[ErrorInfo];
4432 if (Operand.getStartLoc().isValid()) {
4433 SMRange OperandRange = Operand.getLocRange();
4434 return Error(Operand.getStartLoc(), "invalid operand for instruction",
4435 OperandRange, MatchingInlineAsm);
4436 }
4437 }
4438
4439 return Error(IDLoc, "invalid operand for instruction", EmptyRange,
4440 MatchingInlineAsm);
4441 }
4442
4443 // If one instruction matched as unsupported, report this as unsupported.
4444 if (llvm::count(Match, Match_Unsupported) == 1) {
4445 return Error(IDLoc, "unsupported instruction", EmptyRange,
4446 MatchingInlineAsm);
4447 }
4448
4449 // If one instruction matched with a missing feature, report this as a
4450 // missing feature.
4451 if (llvm::count(Match, Match_MissingFeature) == 1) {
4452 ErrorInfo = Match_MissingFeature;
4453 return ErrorMissingFeature(IDLoc, ErrorInfoMissingFeatures,
4454 MatchingInlineAsm);
4455 }
4456
4457 // If one instruction matched with an invalid operand, report this as an
4458 // operand failure.
4459 if (llvm::count(Match, Match_InvalidOperand) == 1) {
4460 return Error(IDLoc, "invalid operand for instruction", EmptyRange,
4461 MatchingInlineAsm);
4462 }
4463
4464 // If all of these were an outright failure, report it in a useless way.
4465 Error(IDLoc, "unknown use of instruction mnemonic without a size suffix",
4466 EmptyRange, MatchingInlineAsm);
4467 return true;
4468}
4469
4470bool X86AsmParser::matchAndEmitIntelInstruction(
4471 SMLoc IDLoc, unsigned &Opcode, MCInst &Inst, OperandVector &Operands,
4472 MCStreamer &Out, uint64_t &ErrorInfo, bool MatchingInlineAsm) {
4473 X86Operand &Op = static_cast<X86Operand &>(*Operands[0]);
4474 SMRange EmptyRange = std::nullopt;
4475 // Find one unsized memory operand, if present.
4476 X86Operand *UnsizedMemOp = nullptr;
4477 for (const auto &Op : Operands) {
4478 X86Operand *X86Op = static_cast<X86Operand *>(Op.get());
4479 if (X86Op->isMemUnsized()) {
4480 UnsizedMemOp = X86Op;
4481 // Have we found an unqualified memory operand,
4482 // break. IA allows only one memory operand.
4483 break;
4484 }
4485 }
4486
4487 // Allow some instructions to have implicitly pointer-sized operands. This is
4488 // compatible with gas.
4489 StringRef Mnemonic = (static_cast<X86Operand &>(*Operands[0])).getToken();
4490 if (UnsizedMemOp) {
4491 static const char *const PtrSizedInstrs[] = {"call", "jmp", "push"};
4492 for (const char *Instr : PtrSizedInstrs) {
4493 if (Mnemonic == Instr) {
4494 UnsizedMemOp->Mem.Size = getPointerWidth();
4495 break;
4496 }
4497 }
4498 }
4499
4501 FeatureBitset ErrorInfoMissingFeatures;
4502 FeatureBitset MissingFeatures;
4503 StringRef Base = (static_cast<X86Operand &>(*Operands[0])).getToken();
4504
4505 // If unsized push has immediate operand we should default the default pointer
4506 // size for the size.
4507 if (Mnemonic == "push" && Operands.size() == 2) {
4508 auto *X86Op = static_cast<X86Operand *>(Operands[1].get());
4509 if (X86Op->isImm()) {
4510 // If it's not a constant fall through and let remainder take care of it.
4511 const auto *CE = dyn_cast<MCConstantExpr>(X86Op->getImm());
4512 unsigned Size = getPointerWidth();
4513 if (CE &&
4514 (isIntN(Size, CE->getValue()) || isUIntN(Size, CE->getValue()))) {
4515 SmallString<16> Tmp;
4516 Tmp += Base;
4517 Tmp += (is64BitMode())
4518 ? "q"
4519 : (is32BitMode()) ? "l" : (is16BitMode()) ? "w" : " ";
4520 Op.setTokenValue(Tmp);
4521 // Do match in ATT mode to allow explicit suffix usage.
4522 Match.push_back(MatchInstruction(Operands, Inst, ErrorInfo,
4523 MissingFeatures, MatchingInlineAsm,
4524 false /*isParsingIntelSyntax()*/));
4525 Op.setTokenValue(Base);
4526 }
4527 }
4528 }
4529
4530 // If an unsized memory operand is present, try to match with each memory
4531 // operand size. In Intel assembly, the size is not part of the instruction
4532 // mnemonic.
4533 if (UnsizedMemOp && UnsizedMemOp->isMemUnsized()) {
4534 static const unsigned MopSizes[] = {8, 16, 32, 64, 80, 128, 256, 512};
4535 for (unsigned Size : MopSizes) {
4536 UnsizedMemOp->Mem.Size = Size;
4537 uint64_t ErrorInfoIgnore;
4538 unsigned LastOpcode = Inst.getOpcode();
4539 unsigned M = MatchInstruction(Operands, Inst, ErrorInfoIgnore,
4540 MissingFeatures, MatchingInlineAsm,
4541 isParsingIntelSyntax());
4542 if (Match.empty() || LastOpcode != Inst.getOpcode())
4543 Match.push_back(M);
4544
4545 // If this returned as a missing feature failure, remember that.
4546 if (Match.back() == Match_MissingFeature)
4547 ErrorInfoMissingFeatures = MissingFeatures;
4548 }
4549
4550 // Restore the size of the unsized memory operand if we modified it.
4551 UnsizedMemOp->Mem.Size = 0;
4552 }
4553
4554 // If we haven't matched anything yet, this is not a basic integer or FPU
4555 // operation. There shouldn't be any ambiguity in our mnemonic table, so try
4556 // matching with the unsized operand.
4557 if (Match.empty()) {
4558 Match.push_back(MatchInstruction(
4559 Operands, Inst, ErrorInfo, MissingFeatures, MatchingInlineAsm,
4560 isParsingIntelSyntax()));
4561 // If this returned as a missing feature failure, remember that.
4562 if (Match.back() == Match_MissingFeature)
4563 ErrorInfoMissingFeatures = MissingFeatures;
4564 }
4565
4566 // Restore the size of the unsized memory operand if we modified it.
4567 if (UnsizedMemOp)
4568 UnsizedMemOp->Mem.Size = 0;
4569
4570 // If it's a bad mnemonic, all results will be the same.
4571 if (Match.back() == Match_MnemonicFail) {
4572 return Error(IDLoc, "invalid instruction mnemonic '" + Mnemonic + "'",
4573 Op.getLocRange(), MatchingInlineAsm);
4574 }
4575
4576 unsigned NumSuccessfulMatches = llvm::count(Match, Match_Success);
4577
4578 // If matching was ambiguous and we had size information from the frontend,
4579 // try again with that. This handles cases like "movxz eax, m8/m16".
4580 if (UnsizedMemOp && NumSuccessfulMatches > 1 &&
4581 UnsizedMemOp->getMemFrontendSize()) {
4582 UnsizedMemOp->Mem.Size = UnsizedMemOp->getMemFrontendSize();
4583 unsigned M = MatchInstruction(
4584 Operands, Inst, ErrorInfo, MissingFeatures, MatchingInlineAsm,
4585 isParsingIntelSyntax());
4586 if (M == Match_Success)
4587 NumSuccessfulMatches = 1;
4588
4589 // Add a rewrite that encodes the size information we used from the
4590 // frontend.
4591 InstInfo->AsmRewrites->emplace_back(
4592 AOK_SizeDirective, UnsizedMemOp->getStartLoc(),
4593 /*Len=*/0, UnsizedMemOp->getMemFrontendSize());
4594 }
4595
4596 // If exactly one matched, then we treat that as a successful match (and the
4597 // instruction will already have been filled in correctly, since the failing
4598 // matches won't have modified it).
4599 if (NumSuccessfulMatches == 1) {
4600 if (!MatchingInlineAsm && validateInstruction(Inst, Operands))
4601 return true;
4602 // Some instructions need post-processing to, for example, tweak which
4603 // encoding is selected. Loop on it while changes happen so the individual
4604 // transformations can chain off each other.
4605 if (!MatchingInlineAsm)
4606 while (processInstruction(Inst, Operands))
4607 ;
4608 Inst.setLoc(IDLoc);
4609 if (!MatchingInlineAsm)
4610 emitInstruction(Inst, Operands, Out);
4611 Opcode = Inst.getOpcode();
4612 return false;
4613 } else if (NumSuccessfulMatches > 1) {
4614 assert(UnsizedMemOp &&
4615 "multiple matches only possible with unsized memory operands");
4616 return Error(UnsizedMemOp->getStartLoc(),
4617 "ambiguous operand size for instruction '" + Mnemonic + "\'",
4618 UnsizedMemOp->getLocRange());
4619 }
4620
4621 // If one instruction matched as unsupported, report this as unsupported.
4622 if (llvm::count(Match, Match_Unsupported) == 1) {
4623 return Error(IDLoc, "unsupported instruction", EmptyRange,
4624 MatchingInlineAsm);
4625 }
4626
4627 // If one instruction matched with a missing feature, report this as a
4628 // missing feature.
4629 if (llvm::count(Match, Match_MissingFeature) == 1) {
4630 ErrorInfo = Match_MissingFeature;
4631 return ErrorMissingFeature(IDLoc, ErrorInfoMissingFeatures,
4632 MatchingInlineAsm);
4633 }
4634
4635 // If one instruction matched with an invalid operand, report this as an
4636 // operand failure.
4637 if (llvm::count(Match, Match_InvalidOperand) == 1) {
4638 return Error(IDLoc, "invalid operand for instruction", EmptyRange,
4639 MatchingInlineAsm);
4640 }
4641
4642 if (llvm::count(Match, Match_InvalidImmUnsignedi4) == 1) {
4643 SMLoc ErrorLoc = ((X86Operand &)*Operands[ErrorInfo]).getStartLoc();
4644 if (ErrorLoc == SMLoc())
4645 ErrorLoc = IDLoc;
4646 return Error(ErrorLoc, "immediate must be an integer in range [0, 15]",
4647 EmptyRange, MatchingInlineAsm);
4648 }
4649
4650 // If all of these were an outright failure, report it in a useless way.
4651 return Error(IDLoc, "unknown instruction mnemonic", EmptyRange,
4652 MatchingInlineAsm);
4653}
4654
4655bool X86AsmParser::OmitRegisterFromClobberLists(unsigned RegNo) {
4656 return X86MCRegisterClasses[X86::SEGMENT_REGRegClassID].contains(RegNo);
4657}
4658
4659bool X86AsmParser::ParseDirective(AsmToken DirectiveID) {
4660 MCAsmParser &Parser = getParser();
4661 StringRef IDVal = DirectiveID.getIdentifier();
4662 if (IDVal.starts_with(".arch"))
4663 return parseDirectiveArch();
4664 if (IDVal.starts_with(".code"))
4665 return ParseDirectiveCode(IDVal, DirectiveID.getLoc());
4666 else if (IDVal.starts_with(".att_syntax")) {
4667 if (getLexer().isNot(AsmToken::EndOfStatement)) {
4668 if (Parser.getTok().getString() == "prefix")
4669 Parser.Lex();
4670 else if (Parser.getTok().getString() == "noprefix")
4671 return Error(DirectiveID.getLoc(), "'.att_syntax noprefix' is not "
4672 "supported: registers must have a "
4673 "'%' prefix in .att_syntax");
4674 }
4675 getParser().setAssemblerDialect(0);
4676 return false;
4677 } else if (IDVal.starts_with(".intel_syntax")) {
4678 getParser().setAssemblerDialect(1);
4679 if (getLexer().isNot(AsmToken::EndOfStatement)) {
4680 if (Parser.getTok().getString() == "noprefix")
4681 Parser.Lex();
4682 else if (Parser.getTok().getString() == "prefix")
4683 return Error(DirectiveID.getLoc(), "'.intel_syntax prefix' is not "
4684 "supported: registers must not have "
4685 "a '%' prefix in .intel_syntax");
4686 }
4687 return false;
4688 } else if (IDVal == ".nops")
4689 return parseDirectiveNops(DirectiveID.getLoc());
4690 else if (IDVal == ".even")
4691 return parseDirectiveEven(DirectiveID.getLoc());
4692 else if (IDVal == ".cv_fpo_proc")
4693 return parseDirectiveFPOProc(DirectiveID.getLoc());
4694 else if (IDVal == ".cv_fpo_setframe")
4695 return parseDirectiveFPOSetFrame(DirectiveID.getLoc());
4696 else if (IDVal == ".cv_fpo_pushreg")
4697 return parseDirectiveFPOPushReg(DirectiveID.getLoc());
4698 else if (IDVal == ".cv_fpo_stackalloc")
4699 return parseDirectiveFPOStackAlloc(DirectiveID.getLoc());
4700 else if (IDVal == ".cv_fpo_stackalign")
4701 return parseDirectiveFPOStackAlign(DirectiveID.getLoc());
4702 else if (IDVal == ".cv_fpo_endprologue")
4703 return parseDirectiveFPOEndPrologue(DirectiveID.getLoc());
4704 else if (IDVal == ".cv_fpo_endproc")
4705 return parseDirectiveFPOEndProc(DirectiveID.getLoc());
4706 else if (IDVal == ".seh_pushreg" ||
4707 (Parser.isParsingMasm() && IDVal.equals_insensitive(".pushreg")))
4708 return parseDirectiveSEHPushReg(DirectiveID.getLoc());
4709 else if (IDVal == ".seh_setframe" ||
4710 (Parser.isParsingMasm() && IDVal.equals_insensitive(".setframe")))
4711 return parseDirectiveSEHSetFrame(DirectiveID.getLoc());
4712 else if (IDVal == ".seh_savereg" ||
4713 (Parser.isParsingMasm() && IDVal.equals_insensitive(".savereg")))
4714 return parseDirectiveSEHSaveReg(DirectiveID.getLoc());
4715 else if (IDVal == ".seh_savexmm" ||
4716 (Parser.isParsingMasm() && IDVal.equals_insensitive(".savexmm128")))
4717 return parseDirectiveSEHSaveXMM(DirectiveID.getLoc());
4718 else if (IDVal == ".seh_pushframe" ||
4719 (Parser.isParsingMasm() && IDVal.equals_insensitive(".pushframe")))
4720 return parseDirectiveSEHPushFrame(DirectiveID.getLoc());
4721
4722 return true;
4723}
4724
4725bool X86AsmParser::parseDirectiveArch() {
4726 // Ignore .arch for now.
4727 getParser().parseStringToEndOfStatement();
4728 return false;
4729}
4730
4731/// parseDirectiveNops
4732/// ::= .nops size[, control]
4733bool X86AsmParser::parseDirectiveNops(SMLoc L) {
4734 int64_t NumBytes = 0, Control = 0;
4735 SMLoc NumBytesLoc, ControlLoc;
4736 const MCSubtargetInfo& STI = getSTI();
4737 NumBytesLoc = getTok().getLoc();
4738 if (getParser().checkForValidSection() ||
4739 getParser().parseAbsoluteExpression(NumBytes))
4740 return true;
4741
4742 if (parseOptionalToken(AsmToken::Comma)) {
4743 ControlLoc = getTok().getLoc();
4744 if (getParser().parseAbsoluteExpression(Control))
4745 return true;
4746 }
4747 if (getParser().parseEOL())
4748 return true;
4749
4750 if (NumBytes <= 0) {
4751 Error(NumBytesLoc, "'.nops' directive with non-positive size");
4752 return false;
4753 }
4754
4755 if (Control < 0) {
4756 Error(ControlLoc, "'.nops' directive with negative NOP size");
4757 return false;
4758 }
4759
4760 /// Emit nops
4761 getParser().getStreamer().emitNops(NumBytes, Control, L, STI);
4762
4763 return false;
4764}
4765
4766/// parseDirectiveEven
4767/// ::= .even
4768bool X86AsmParser::parseDirectiveEven(SMLoc L) {
4769 if (parseEOL())
4770 return false;
4771
4772 const MCSection *Section = getStreamer().getCurrentSectionOnly();
4773 if (!Section) {
4774 getStreamer().initSections(false, getSTI());
4775 Section = getStreamer().getCurrentSectionOnly();
4776 }
4777 if (Section->useCodeAlign())
4778 getStreamer().emitCodeAlignment(Align(2), &getSTI(), 0);
4779 else
4780 getStreamer().emitValueToAlignment(Align(2), 0, 1, 0);
4781 return false;
4782}
4783
4784/// ParseDirectiveCode
4785/// ::= .code16 | .code32 | .code64
4786bool X86AsmParser::ParseDirectiveCode(StringRef IDVal, SMLoc L) {
4787 MCAsmParser &Parser = getParser();
4788 Code16GCC = false;
4789 if (IDVal == ".code16") {
4790 Parser.Lex();
4791 if (!is16BitMode()) {
4792 SwitchMode(X86::Is16Bit);
4793 getParser().getStreamer().emitAssemblerFlag(MCAF_Code16);
4794 }
4795 } else if (IDVal == ".code16gcc") {
4796 // .code16gcc parses as if in 32-bit mode, but emits code in 16-bit mode.
4797 Parser.Lex();
4798 Code16GCC = true;
4799 if (!is16BitMode()) {
4800 SwitchMode(X86::Is16Bit);
4801 getParser().getStreamer().emitAssemblerFlag(MCAF_Code16);
4802 }
4803 } else if (IDVal == ".code32") {
4804 Parser.Lex();
4805 if (!is32BitMode()) {
4806 SwitchMode(X86::Is32Bit);
4807 getParser().getStreamer().emitAssemblerFlag(MCAF_Code32);
4808 }
4809 } else if (IDVal == ".code64") {
4810 Parser.Lex();
4811 if (!is64BitMode()) {
4812 SwitchMode(X86::Is64Bit);
4813 getParser().getStreamer().emitAssemblerFlag(MCAF_Code64);
4814 }
4815 } else {
4816 Error(L, "unknown directive " + IDVal);
4817 return false;
4818 }
4819
4820 return false;
4821}
4822
4823// .cv_fpo_proc foo
4824bool X86AsmParser::parseDirectiveFPOProc(SMLoc L) {
4825 MCAsmParser &Parser = getParser();
4826 StringRef ProcName;
4827 int64_t ParamsSize;
4828 if (Parser.parseIdentifier(ProcName))
4829 return Parser.TokError("expected symbol name");
4830 if (Parser.parseIntToken(ParamsSize, "expected parameter byte count"))
4831 return true;
4832 if (!isUIntN(32, ParamsSize))
4833 return Parser.TokError("parameters size out of range");
4834 if (parseEOL())
4835 return true;
4836 MCSymbol *ProcSym = getContext().getOrCreateSymbol(ProcName);
4837 return getTargetStreamer().emitFPOProc(ProcSym, ParamsSize, L);
4838}
4839
4840// .cv_fpo_setframe ebp
4841bool X86AsmParser::parseDirectiveFPOSetFrame(SMLoc L) {
4843 SMLoc DummyLoc;
4844 if (parseRegister(Reg, DummyLoc, DummyLoc) || parseEOL())
4845 return true;
4846 return getTargetStreamer().emitFPOSetFrame(Reg, L);
4847}
4848
4849// .cv_fpo_pushreg ebx
4850bool X86AsmParser::parseDirectiveFPOPushReg(SMLoc L) {
4852 SMLoc DummyLoc;
4853 if (parseRegister(Reg, DummyLoc, DummyLoc) || parseEOL())
4854 return true;
4855 return getTargetStreamer().emitFPOPushReg(Reg, L);
4856}
4857
4858// .cv_fpo_stackalloc 20
4859bool X86AsmParser::parseDirectiveFPOStackAlloc(SMLoc L) {
4860 MCAsmParser &Parser = getParser();
4861 int64_t Offset;
4862 if (Parser.parseIntToken(Offset, "expected offset") || parseEOL())
4863 return true;
4864 return getTargetStreamer().emitFPOStackAlloc(Offset, L);
4865}
4866
4867// .cv_fpo_stackalign 8
4868bool X86AsmParser::parseDirectiveFPOStackAlign(SMLoc L) {
4869 MCAsmParser &Parser = getParser();
4870 int64_t Offset;
4871 if (Parser.parseIntToken(Offset, "expected offset") || parseEOL())
4872 return true;
4873 return getTargetStreamer().emitFPOStackAlign(Offset, L);
4874}
4875
4876// .cv_fpo_endprologue
4877bool X86AsmParser::parseDirectiveFPOEndPrologue(SMLoc L) {
4878 MCAsmParser &Parser = getParser();
4879 if (Parser.parseEOL())
4880 return true;
4881 return getTargetStreamer().emitFPOEndPrologue(L);
4882}
4883
4884// .cv_fpo_endproc
4885bool X86AsmParser::parseDirectiveFPOEndProc(SMLoc L) {
4886 MCAsmParser &Parser = getParser();
4887 if (Parser.parseEOL())
4888 return true;
4889 return getTargetStreamer().emitFPOEndProc(L);
4890}
4891
4892bool X86AsmParser::parseSEHRegisterNumber(unsigned RegClassID,
4893 MCRegister &RegNo) {
4894 SMLoc startLoc = getLexer().getLoc();
4895 const MCRegisterInfo *MRI = getContext().getRegisterInfo();
4896
4897 // Try parsing the argument as a register first.
4898 if (getLexer().getTok().isNot(AsmToken::Integer)) {
4899 SMLoc endLoc;
4900 if (parseRegister(RegNo, startLoc, endLoc))
4901 return true;
4902
4903 if (!X86MCRegisterClasses[RegClassID].contains(RegNo)) {
4904 return Error(startLoc,
4905 "register is not supported for use with this directive");
4906 }
4907 } else {
4908 // Otherwise, an integer number matching the encoding of the desired
4909 // register may appear.
4910 int64_t EncodedReg;
4911 if (getParser().parseAbsoluteExpression(EncodedReg))
4912 return true;
4913
4914 // The SEH register number is the same as the encoding register number. Map
4915 // from the encoding back to the LLVM register number.
4916 RegNo = 0;
4917 for (MCPhysReg Reg : X86MCRegisterClasses[RegClassID]) {
4918 if (MRI->getEncodingValue(Reg) == EncodedReg) {
4919 RegNo = Reg;
4920 break;
4921 }
4922 }
4923 if (RegNo == 0) {
4924 return Error(startLoc,
4925 "incorrect register number for use with this directive");
4926 }
4927 }
4928
4929 return false;
4930}
4931
4932bool X86AsmParser::parseDirectiveSEHPushReg(SMLoc Loc) {
4934 if (parseSEHRegisterNumber(X86::GR64RegClassID, Reg))
4935 return true;
4936
4937 if (getLexer().isNot(AsmToken::EndOfStatement))
4938 return TokError("expected end of directive");
4939
4940 getParser().Lex();
4941 getStreamer().emitWinCFIPushReg(Reg, Loc);
4942 return false;
4943}
4944
4945bool X86AsmParser::parseDirectiveSEHSetFrame(SMLoc Loc) {
4947 int64_t Off;
4948 if (parseSEHRegisterNumber(X86::GR64RegClassID, Reg))
4949 return true;
4950 if (getLexer().isNot(AsmToken::Comma))
4951 return TokError("you must specify a stack pointer offset");
4952
4953 getParser().Lex();
4954 if (getParser().parseAbsoluteExpression(Off))
4955 return true;
4956
4957 if (getLexer().isNot(AsmToken::EndOfStatement))
4958 return TokError("expected end of directive");
4959
4960 getParser().Lex();
4961 getStreamer().emitWinCFISetFrame(Reg, Off, Loc);
4962 return false;
4963}
4964
4965bool X86AsmParser::parseDirectiveSEHSaveReg(SMLoc Loc) {
4967 int64_t Off;
4968 if (parseSEHRegisterNumber(X86::GR64RegClassID, Reg))
4969 return true;
4970 if (getLexer().isNot(AsmToken::Comma))
4971 return TokError("you must specify an offset on the stack");
4972
4973 getParser().Lex();
4974 if (getParser().parseAbsoluteExpression(Off))
4975 return true;
4976
4977 if (getLexer().isNot(AsmToken::EndOfStatement))
4978 return TokError("expected end of directive");
4979
4980 getParser().Lex();
4981 getStreamer().emitWinCFISaveReg(Reg, Off, Loc);
4982 return false;
4983}
4984
4985bool X86AsmParser::parseDirectiveSEHSaveXMM(SMLoc Loc) {
4987 int64_t Off;
4988 if (parseSEHRegisterNumber(X86::VR128XRegClassID, Reg))
4989 return true;
4990 if (getLexer().isNot(AsmToken::Comma))
4991 return TokError("you must specify an offset on the stack");
4992
4993 getParser().Lex();
4994 if (getParser().parseAbsoluteExpression(Off))
4995 return true;
4996
4997 if (getLexer().isNot(AsmToken::EndOfStatement))
4998 return TokError("expected end of directive");
4999
5000 getParser().Lex();
5001 getStreamer().emitWinCFISaveXMM(Reg, Off, Loc);
5002 return false;
5003}
5004
5005bool X86AsmParser::parseDirectiveSEHPushFrame(SMLoc Loc) {
5006 bool Code = false;
5007 StringRef CodeID;
5008 if (getLexer().is(AsmToken::At)) {
5009 SMLoc startLoc = getLexer().getLoc();
5010 getParser().Lex();
5011 if (!getParser().parseIdentifier(CodeID)) {
5012 if (CodeID != "code")
5013 return Error(startLoc, "expected @code");
5014 Code = true;
5015 }
5016 }
5017
5018 if (getLexer().isNot(AsmToken::EndOfStatement))
5019 return TokError("expected end of directive");
5020
5021 getParser().Lex();
5022 getStreamer().emitWinCFIPushFrame(Code, Loc);
5023 return false;
5024}
5025
5026// Force static initialization.
5030}
5031
5032#define GET_MATCHER_IMPLEMENTATION
5033#include "X86GenAsmMatcher.inc"
unsigned const MachineRegisterInfo * MRI
static MCRegister MatchRegisterName(StringRef Name)
static const char * getSubtargetFeatureName(uint64_t Val)
static bool isNot(const MachineRegisterInfo &MRI, const MachineInstr &MI)
Analysis containing CSE Info
Definition: CSEInfo.cpp:27
#define LLVM_EXTERNAL_VISIBILITY
Definition: Compiler.h:131
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
std::string Name
uint64_t Size
bool End
Definition: ELF_riscv.cpp:480
Symbol * Sym
Definition: ELF_riscv.cpp:479
static GCMetadataPrinterRegistry::Add< ErlangGCPrinter > X("erlang", "erlang-compatible garbage collector")
amode Optimize addressing mode
static ModuleSymbolTable::Symbol getSym(DataRefImpl &Symb)
#define RegName(no)
static LVOptions Options
Definition: LVOptions.cpp:25
#define I(x, y, z)
Definition: MD5.cpp:58
mir Rename Register Operands
static bool IsVCMP(unsigned Opcode)
ConstantRange Range(APInt(BitWidth, Low), APInt(BitWidth, High))
static GCMetadataPrinterRegistry::Add< OcamlGCMetadataPrinter > Y("ocaml", "ocaml 3.10-compatible collector")
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
This file contains some templates that are useful if you are working with the STL at all.
raw_pwrite_stream & OS
This file defines the SmallString class.
This file defines the SmallVector class.
This file implements the StringSwitch template, which mimics a switch() statement whose cases are str...
DEMANGLE_NAMESPACE_BEGIN bool starts_with(std::string_view self, char C) noexcept
static SymbolRef::Type getType(const Symbol *Sym)
Definition: TapiFile.cpp:40
static bool contains(SmallPtrSetImpl< ConstantExpr * > &Cache, ConstantExpr *Expr, Constant *C)
Definition: Value.cpp:469
static cl::opt< bool > LVIInlineAsmHardening("x86-experimental-lvi-inline-asm-hardening", cl::desc("Harden inline assembly code that may be vulnerable to Load Value" " Injection (LVI). This feature is experimental."), cl::Hidden)
static bool checkScale(unsigned Scale, StringRef &ErrMsg)
static bool convertSSEToAVX(MCInst &Inst)
LLVM_EXTERNAL_VISIBILITY void LLVMInitializeX86AsmParser()
static unsigned getPrefixes(OperandVector &Operands)
static bool CheckBaseRegAndIndexRegAndScale(unsigned BaseReg, unsigned IndexReg, unsigned Scale, bool Is64BitMode, StringRef &ErrMsg)
#define FROM_TO(FROM, TO)
Value * RHS
Value * LHS
static unsigned getSize(unsigned Kind)
Class for arbitrary precision integers.
Definition: APInt.h:78
uint64_t getZExtValue() const
Get zero extended value.
Definition: APInt.h:1498
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: ArrayRef.h:41
Target independent representation for an assembler token.
Definition: MCAsmMacro.h:21
SMLoc getLoc() const
Definition: MCAsmLexer.cpp:26
int64_t getIntVal() const
Definition: MCAsmMacro.h:115
bool isNot(TokenKind K) const
Definition: MCAsmMacro.h:83
StringRef getString() const
Get the string for the current token, this includes all characters (for example, the quotes on string...
Definition: MCAsmMacro.h:110
bool is(TokenKind K) const
Definition: MCAsmMacro.h:82
TokenKind getKind() const
Definition: MCAsmMacro.h:81
SMLoc getEndLoc() const
Definition: MCAsmLexer.cpp:30
StringRef getIdentifier() const
Get the identifier string for the current token, which should be an identifier or a string.
Definition: MCAsmMacro.h:99
This class represents an Operation in the Expression.
Base class for user error types.
Definition: Error.h:355
Lightweight error class with error context and mandatory checking.
Definition: Error.h:160
Container class for subtarget features.
constexpr size_t size() const
An instruction for ordering other memory operations.
Definition: Instructions.h:420
Generic assembler lexer interface, for use by target specific assembly lexers.
Definition: MCAsmLexer.h:37
void UnLex(AsmToken const &Token)
Definition: MCAsmLexer.h:93
bool isNot(AsmToken::TokenKind K) const
Check if the current token has kind K.
Definition: MCAsmLexer.h:144
Generic assembler parser interface, for use by target specific assembly parsers.
Definition: MCAsmParser.h:123
virtual void eatToEndOfStatement()=0
Skip to the end of the current statement, for error recovery.
virtual MCStreamer & getStreamer()=0
Return the output streamer for the assembler.
virtual bool parseExpression(const MCExpr *&Res, SMLoc &EndLoc)=0
Parse an arbitrary expression.
virtual bool parsePrimaryExpr(const MCExpr *&Res, SMLoc &EndLoc, AsmTypeInfo *TypeInfo)=0
Parse a primary expression.
const AsmToken & getTok() const
Get the current AsmToken from the stream.
Definition: MCAsmParser.cpp:40
virtual bool isParsingMasm() const
Definition: MCAsmParser.h:187
virtual bool parseIdentifier(StringRef &Res)=0
Parse an identifier or string (as a quoted identifier) and set Res to the identifier contents.
bool parseOptionalToken(AsmToken::TokenKind T)
Attempt to parse and consume token, returning true on success.
Definition: MCAsmParser.cpp:80
bool parseIntToken(int64_t &V, const Twine &ErrMsg)
Definition: MCAsmParser.cpp:72
virtual const AsmToken & Lex()=0
Get the next AsmToken in the stream, possibly handling file inclusion first.
virtual unsigned getAssemblerDialect()
Definition: MCAsmParser.h:173
virtual void addAliasForDirective(StringRef Directive, StringRef Alias)=0
virtual bool lookUpType(StringRef Name, AsmTypeInfo &Info) const
Definition: MCAsmParser.h:199
bool TokError(const Twine &Msg, SMRange Range=std::nullopt)
Report an error at the current lexer location.
Definition: MCAsmParser.cpp:97
virtual bool parseAbsoluteExpression(int64_t &Res)=0
Parse an expression which must evaluate to an absolute value.
virtual bool lookUpField(StringRef Name, AsmFieldInfo &Info) const
Definition: MCAsmParser.h:191
bool parseTokenLoc(SMLoc &Loc)
Definition: MCAsmParser.cpp:44
virtual MCContext & getContext()=0
bool Error(SMLoc L, const Twine &Msg, SMRange Range=std::nullopt)
Return an error at the location L, with the message Msg.
static const MCBinaryExpr * createAdd(const MCExpr *LHS, const MCExpr *RHS, MCContext &Ctx)
Definition: MCExpr.h:532
static const MCConstantExpr * create(int64_t Value, MCContext &Ctx, bool PrintInHex=false, unsigned SizeInBytes=0)
Definition: MCExpr.cpp:193
Base class for the full range of assembler expressions which are needed for parsing.
Definition: MCExpr.h:34
@ SymbolRef
References to labels and assigned expressions.
Definition: MCExpr.h:39
ExprKind getKind() const
Definition: MCExpr.h:78
Instances of this class represent a single low-level machine instruction.
Definition: MCInst.h:184
unsigned getNumOperands() const
Definition: MCInst.h:208
SMLoc getLoc() const
Definition: MCInst.h:204
unsigned getFlags() const
Definition: MCInst.h:201
void setLoc(SMLoc loc)
Definition: MCInst.h:203
unsigned getOpcode() const
Definition: MCInst.h:198
void setFlags(unsigned F)
Definition: MCInst.h:200
void addOperand(const MCOperand Op)
Definition: MCInst.h:210
void setOpcode(unsigned Op)
Definition: MCInst.h:197
void clear()
Definition: MCInst.h:215
const MCOperand & getOperand(unsigned i) const
Definition: MCInst.h:206
Describe properties that are true of each instruction in the target description file.
Definition: MCInstrDesc.h:198
bool mayLoad() const
Return true if this instruction could possibly read memory.
Definition: MCInstrDesc.h:438
bool isCall() const
Return true if the instruction is a call.
Definition: MCInstrDesc.h:288
bool isTerminator() const
Returns true if this instruction part of the terminator for a basic block.
Definition: MCInstrDesc.h:301
Interface to description of machine instruction set.
Definition: MCInstrInfo.h:26
Instances of this class represent operands of the MCInst class.
Definition: MCInst.h:36
int64_t getImm() const
Definition: MCInst.h:80
static MCOperand createImm(int64_t Val)
Definition: MCInst.h:141
bool isImm() const
Definition: MCInst.h:62
unsigned getReg() const
Returns the register number.
Definition: MCInst.h:69
bool isReg() const
Definition: MCInst.h:61
MCRegisterInfo base class - We assume that the target defines a static array of MCRegisterDesc object...
Wrapper class representing physical registers. Should be passed by value.
Definition: MCRegister.h:33
Instances of this class represent a uniqued identifier for a section in the current translation unit.
Definition: MCSection.h:36
Streaming machine code generation interface.
Definition: MCStreamer.h:213
virtual void emitInstruction(const MCInst &Inst, const MCSubtargetInfo &STI)
Emit the given Instruction into the current section.
MCTargetStreamer * getTargetStreamer()
Definition: MCStreamer.h:309
Generic base class for all target subtargets.
bool hasFeature(unsigned Feature) const
const FeatureBitset & getFeatureBits() const
FeatureBitset ToggleFeature(uint64_t FB)
Toggle a feature and return the re-computed feature bits.
static const MCSymbolRefExpr * create(const MCSymbol *Symbol, MCContext &Ctx)
Definition: MCExpr.h:393
MCSymbol - Instances of this class represent a symbol name in the MC file, and MCSymbols are created ...
Definition: MCSymbol.h:41
MCTargetAsmParser - Generic interface to target specific assembly parsers.
MCSubtargetInfo & copySTI()
Create a copy of STI and return a non-const reference to it.
virtual bool parseRegister(MCRegister &Reg, SMLoc &StartLoc, SMLoc &EndLoc)=0
virtual bool ParseDirective(AsmToken DirectiveID)
ParseDirective - Parse a target specific assembler directive This method is deprecated,...
virtual bool parsePrimaryExpr(const MCExpr *&Res, SMLoc &EndLoc)
virtual ParseStatus tryParseRegister(MCRegister &Reg, SMLoc &StartLoc, SMLoc &EndLoc)=0
tryParseRegister - parse one register if possible
void setAvailableFeatures(const FeatureBitset &Value)
const MCSubtargetInfo & getSTI() const
virtual bool OmitRegisterFromClobberLists(unsigned RegNo)
Allows targets to let registers opt out of clobber lists.
virtual bool ParseInstruction(ParseInstructionInfo &Info, StringRef Name, SMLoc NameLoc, OperandVector &Operands)=0
ParseInstruction - Parse one assembly instruction.
virtual unsigned checkTargetMatchPredicate(MCInst &Inst)
checkTargetMatchPredicate - Validate the instruction match against any complex target predicates not ...
virtual bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, OperandVector &Operands, MCStreamer &Out, uint64_t &ErrorInfo, bool MatchingInlineAsm)=0
MatchAndEmitInstruction - Recognize a series of operands of a parsed instruction as an actual MCInst ...
Target specific streamer interface.
Definition: MCStreamer.h:94
Ternary parse status returned by various parse* methods.
static constexpr StatusTy Failure
static constexpr StatusTy Success
static constexpr StatusTy NoMatch
Represents a location in source code.
Definition: SMLoc.h:23
static SMLoc getFromPointer(const char *Ptr)
Definition: SMLoc.h:36
constexpr const char * getPointer() const
Definition: SMLoc.h:34
constexpr bool isValid() const
Definition: SMLoc.h:29
Represents a range in source code.
Definition: SMLoc.h:48
SmallString - A SmallString is just a SmallVector with methods and accessors that make it work better...
Definition: SmallString.h:26
bool empty() const
Definition: SmallVector.h:94
size_t size() const
Definition: SmallVector.h:91
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: SmallVector.h:586
void push_back(const T &Elt)
Definition: SmallVector.h:426
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1209
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:50
std::pair< StringRef, StringRef > split(char Separator) const
Split into two substrings around the first occurrence of a separator character.
Definition: StringRef.h:685
bool getAsInteger(unsigned Radix, T &Result) const
Parse the current string as an integer of the specified radix.
Definition: StringRef.h:455
constexpr StringRef substr(size_t Start, size_t N=npos) const
Return a reference to the substring from [Start, Start + N).
Definition: StringRef.h:556
bool starts_with(StringRef Prefix) const
Check if this string starts with the given Prefix.
Definition: StringRef.h:250
constexpr bool empty() const
empty - Check if the string is empty.
Definition: StringRef.h:134
char back() const
back - Get the last character in the string.
Definition: StringRef.h:146
StringRef slice(size_t Start, size_t End) const
Return a reference to the substring from [Start, End).
Definition: StringRef.h:669
constexpr size_t size() const
size - Get the string size.
Definition: StringRef.h:137
constexpr const char * data() const
data - Get a pointer to the start of the string (which may not be null terminated).
Definition: StringRef.h:131
bool consume_front(StringRef Prefix)
Returns true if this StringRef has the given prefix and removes that prefix.
Definition: StringRef.h:620
std::string lower() const
Definition: StringRef.cpp:111
bool ends_with(StringRef Suffix) const
Check if this string ends with the given Suffix.
Definition: StringRef.h:262
static constexpr size_t npos
Definition: StringRef.h:52
StringRef drop_back(size_t N=1) const
Return a StringRef equal to 'this' but with the last N elements dropped.
Definition: StringRef.h:601
bool equals_insensitive(StringRef RHS) const
Check for string equality, ignoring case.
Definition: StringRef.h:163
A switch()-like statement whose cases are string literals.
Definition: StringSwitch.h:44
StringSwitch & Case(StringLiteral S, T Value)
Definition: StringSwitch.h:69
R Default(T Value)
Definition: StringSwitch.h:182
StringSwitch & Cases(StringLiteral S0, StringLiteral S1, T Value)
Definition: StringSwitch.h:90
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition: Twine.h:81
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
static const char * getRegisterName(MCRegister Reg)
static const X86MCExpr * create(int64_t RegNo, MCContext &Ctx)
Definition: X86MCExpr.h:37
X86 target streamer implementing x86-only assembly directives.
A raw_ostream that writes to an SmallVector or SmallString.
Definition: raw_ostream.h:691
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
Definition: BitmaskEnum.h:121
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition: CallingConv.h:24
std::variant< std::monostate, Loc::Single, Loc::Multi, Loc::MMI, Loc::EntryValue > Variant
Alias for the std::variant specialization base class of DbgVariable.
Definition: DwarfDebug.h:190
@ CE
Windows NT (Windows on ARM)
@ X86
Windows x64, Windows Itanium (IA-64)
Reg
All possible values of the reg field in the ModR/M byte.
bool isX86_64ExtendedReg(unsigned RegNo)
Definition: X86BaseInfo.h:1193
bool isX86_64NonExtLowByteReg(unsigned reg)
Definition: X86BaseInfo.h:1308
bool canUseApxExtendedReg(const MCInstrDesc &Desc)
Definition: X86BaseInfo.h:1260
bool isApxExtendedReg(unsigned RegNo)
Definition: X86BaseInfo.h:1186
@ EVEX
EVEX - Specifies that this instruction use EVEX form which provides syntax support up to 32 512-bit r...
Definition: X86BaseInfo.h:825
@ VEX
VEX - encoding using 0xC4/0xC5.
Definition: X86BaseInfo.h:818
@ ExplicitVEXPrefix
For instructions that use VEX encoding only when {vex}, {vex2} or {vex3} is present.
Definition: X86BaseInfo.h:866
@ ExplicitOpPrefixMask
Definition: X86BaseInfo.h:869
void emitInstruction(MCObjectStreamer &, const MCInst &Inst, const MCSubtargetInfo &STI)
@ AddrIndexReg
Definition: X86BaseInfo.h:31
@ AddrNumOperands
Definition: X86BaseInfo.h:36
bool optimizeShiftRotateWithImmediateOne(MCInst &MI)
bool optimizeInstFromVEX3ToVEX2(MCInst &MI, const MCInstrDesc &Desc)
@ IP_HAS_NOTRACK
Definition: X86BaseInfo.h:58
@ IP_USE_DISP8
Definition: X86BaseInfo.h:65
@ IP_HAS_REPEAT
Definition: X86BaseInfo.h:56
@ IP_USE_DISP32
Definition: X86BaseInfo.h:66
@ IP_NO_PREFIX
Definition: X86BaseInfo.h:52
@ IP_HAS_REPEAT_NE
Definition: X86BaseInfo.h:55
StringRef toStringRef(const std::optional< DWARFFormValue > &V, StringRef Default={})
Take an optional DWARFFormValue and try to extract a string value from it.
NodeAddr< CodeNode * > Code
Definition: RDFGraph.h:388
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
@ Offset
Definition: DWP.cpp:480
auto size(R &&Range, std::enable_if_t< std::is_base_of< std::random_access_iterator_tag, typename std::iterator_traits< decltype(Range.begin())>::iterator_category >::value, void > *=nullptr)
Get the size of a range.
Definition: STLExtras.h:1680
bool isUIntN(unsigned N, uint64_t x)
Checks if an unsigned integer fits into the given (dynamic) bit width.
Definition: MathExtras.h:255
@ Done
Definition: Threading.h:61
@ AOK_EndOfStatement
@ AOK_SizeDirective
MCRegister getX86SubSuperRegister(MCRegister Reg, unsigned Size, bool High=false)
Target & getTheX86_32Target()
void report_fatal_error(Error Err, bool gen_crash_diag=true)
Report a serious error, calling any installed error handler.
Definition: Error.cpp:167
@ MCAF_Code64
.code64 (X86)
Definition: MCDirectives.h:58
@ MCAF_Code16
.code16 (X86) / .code 16 (ARM)
Definition: MCDirectives.h:56
@ MCAF_Code32
.code32 (X86) / .code 32 (ARM)
Definition: MCDirectives.h:57
auto lower_bound(R &&Range, T &&Value)
Provide wrappers to std::lower_bound which take ranges instead of having to pass begin/end explicitly...
Definition: STLExtras.h:1961
bool isIntN(unsigned N, int64_t x)
Checks if an signed integer fits into the given (dynamic) bit width.
Definition: MathExtras.h:260
auto count(R &&Range, const E &Element)
Wrapper function around std::count to count the number of times an element Element occurs in the give...
Definition: STLExtras.h:1921
Target & getTheX86_64Target()
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:860
#define N
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition: Alignment.h:39
AsmTypeInfo Type
Definition: MCAsmParser.h:103
unsigned ElementSize
Definition: MCAsmParser.h:98
StringRef Name
Definition: MCAsmParser.h:96
bool isKind(IdKind kind) const
Definition: MCAsmParser.h:65
SmallVectorImpl< AsmRewrite > * AsmRewrites
RegisterMCAsmParser - Helper template for registering a target specific assembly parser,...
X86Operand - Instances of this class represent a parsed X86 machine instruction.
Definition: X86Operand.h:31
SMLoc getStartLoc() const override
getStartLoc - Get the location of the first token of this operand.
Definition: X86Operand.h:98
bool isImm() const override
isImm - Is this an immediate operand?
Definition: X86Operand.h:224
static std::unique_ptr< X86Operand > CreateImm(const MCExpr *Val, SMLoc StartLoc, SMLoc EndLoc, StringRef SymName=StringRef(), void *OpDecl=nullptr, bool GlobalRef=true)
Definition: X86Operand.h:700
static std::unique_ptr< X86Operand > CreatePrefix(unsigned Prefixes, SMLoc StartLoc, SMLoc EndLoc)
Definition: X86Operand.h:694
static std::unique_ptr< X86Operand > CreateDXReg(SMLoc StartLoc, SMLoc EndLoc)
Definition: X86Operand.h:689
SMRange getLocRange() const
getLocRange - Get the range between the first and last token of this operand.
Definition: X86Operand.h:105
SMLoc getEndLoc() const override
getEndLoc - Get the location of the last token of this operand.
Definition: X86Operand.h:101
bool isReg() const override
isReg - Is this a register operand?
Definition: X86Operand.h:512
bool isMem() const override
isMem - Is this a memory operand?
Definition: X86Operand.h:305
static std::unique_ptr< X86Operand > CreateMem(unsigned ModeSize, const MCExpr *Disp, SMLoc StartLoc, SMLoc EndLoc, unsigned Size=0, StringRef SymName=StringRef(), void *OpDecl=nullptr, unsigned FrontendSize=0, bool UseUpRegs=false, bool MaybeDirectBranchDest=true)
Create an absolute memory operand.
Definition: X86Operand.h:716
struct MemOp Mem
Definition: X86Operand.h:86
bool isVectorReg() const
Definition: X86Operand.h:528
static std::unique_ptr< X86Operand > CreateToken(StringRef Str, SMLoc Loc)
Definition: X86Operand.h:667
bool isMemUnsized() const
Definition: X86Operand.h:306
const MCExpr * getImm() const
Definition: X86Operand.h:180
unsigned getMemFrontendSize() const
Definition: X86Operand.h:213
bool isMem8() const
Definition: X86Operand.h:309
MCRegister getReg() const override
Definition: X86Operand.h:170
static std::unique_ptr< X86Operand > CreateReg(unsigned RegNo, SMLoc StartLoc, SMLoc EndLoc, bool AddressOf=false, SMLoc OffsetOfLoc=SMLoc(), StringRef SymName=StringRef(), void *OpDecl=nullptr)
Definition: X86Operand.h:676