LLVM 20.0.0git
MasmParser.cpp
Go to the documentation of this file.
1//===- AsmParser.cpp - Parser for Assembly Files --------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This class implements the parser for assembly files.
10//
11//===----------------------------------------------------------------------===//
12
13#include "llvm/ADT/APFloat.h"
14#include "llvm/ADT/APInt.h"
15#include "llvm/ADT/ArrayRef.h"
16#include "llvm/ADT/BitVector.h"
17#include "llvm/ADT/STLExtras.h"
21#include "llvm/ADT/StringMap.h"
22#include "llvm/ADT/StringRef.h"
24#include "llvm/ADT/Twine.h"
27#include "llvm/MC/MCAsmInfo.h"
28#include "llvm/MC/MCCodeView.h"
29#include "llvm/MC/MCContext.h"
31#include "llvm/MC/MCDwarf.h"
32#include "llvm/MC/MCExpr.h"
34#include "llvm/MC/MCInstrDesc.h"
35#include "llvm/MC/MCInstrInfo.h"
44#include "llvm/MC/MCSection.h"
45#include "llvm/MC/MCStreamer.h"
47#include "llvm/MC/MCSymbol.h"
52#include "llvm/Support/Format.h"
53#include "llvm/Support/MD5.h"
56#include "llvm/Support/Path.h"
57#include "llvm/Support/SMLoc.h"
60#include <algorithm>
61#include <cassert>
62#include <climits>
63#include <cstddef>
64#include <cstdint>
65#include <ctime>
66#include <deque>
67#include <memory>
68#include <optional>
69#include <sstream>
70#include <string>
71#include <tuple>
72#include <utility>
73#include <vector>
74
75using namespace llvm;
76
77namespace {
78
79/// Helper types for tracking macro definitions.
80typedef std::vector<AsmToken> MCAsmMacroArgument;
81typedef std::vector<MCAsmMacroArgument> MCAsmMacroArguments;
82
83/// Helper class for storing information about an active macro instantiation.
84struct MacroInstantiation {
85 /// The location of the instantiation.
86 SMLoc InstantiationLoc;
87
88 /// The buffer where parsing should resume upon instantiation completion.
89 unsigned ExitBuffer;
90
91 /// The location where parsing should resume upon instantiation completion.
92 SMLoc ExitLoc;
93
94 /// The depth of TheCondStack at the start of the instantiation.
95 size_t CondStackDepth;
96};
97
98struct ParseStatementInfo {
99 /// The parsed operands from the last parsed statement.
101
102 /// The opcode from the last parsed instruction.
103 unsigned Opcode = ~0U;
104
105 /// Was there an error parsing the inline assembly?
106 bool ParseError = false;
107
108 /// The value associated with a macro exit.
109 std::optional<std::string> ExitValue;
110
111 SmallVectorImpl<AsmRewrite> *AsmRewrites = nullptr;
112
113 ParseStatementInfo() = delete;
114 ParseStatementInfo(SmallVectorImpl<AsmRewrite> *rewrites)
115 : AsmRewrites(rewrites) {}
116};
117
118enum FieldType {
119 FT_INTEGRAL, // Initializer: integer expression, stored as an MCExpr.
120 FT_REAL, // Initializer: real number, stored as an APInt.
121 FT_STRUCT // Initializer: struct initializer, stored recursively.
122};
123
124struct FieldInfo;
125struct StructInfo {
127 bool IsUnion = false;
128 bool Initializable = true;
129 unsigned Alignment = 0;
130 unsigned AlignmentSize = 0;
131 unsigned NextOffset = 0;
132 unsigned Size = 0;
133 std::vector<FieldInfo> Fields;
134 StringMap<size_t> FieldsByName;
135
136 FieldInfo &addField(StringRef FieldName, FieldType FT,
137 unsigned FieldAlignmentSize);
138
139 StructInfo() = default;
140 StructInfo(StringRef StructName, bool Union, unsigned AlignmentValue);
141};
142
143// FIXME: This should probably use a class hierarchy, raw pointers between the
144// objects, and dynamic type resolution instead of a union. On the other hand,
145// ownership then becomes much more complicated; the obvious thing would be to
146// use BumpPtrAllocator, but the lack of a destructor makes that messy.
147
148struct StructInitializer;
149struct IntFieldInfo {
151
152 IntFieldInfo() = default;
153 IntFieldInfo(const SmallVector<const MCExpr *, 1> &V) { Values = V; }
154 IntFieldInfo(SmallVector<const MCExpr *, 1> &&V) { Values = std::move(V); }
155};
156struct RealFieldInfo {
157 SmallVector<APInt, 1> AsIntValues;
158
159 RealFieldInfo() = default;
160 RealFieldInfo(const SmallVector<APInt, 1> &V) { AsIntValues = V; }
161 RealFieldInfo(SmallVector<APInt, 1> &&V) { AsIntValues = std::move(V); }
162};
163struct StructFieldInfo {
164 std::vector<StructInitializer> Initializers;
165 StructInfo Structure;
166
167 StructFieldInfo() = default;
168 StructFieldInfo(std::vector<StructInitializer> V, StructInfo S);
169};
170
171class FieldInitializer {
172public:
173 FieldType FT;
174 union {
175 IntFieldInfo IntInfo;
176 RealFieldInfo RealInfo;
177 StructFieldInfo StructInfo;
178 };
179
180 ~FieldInitializer();
181 FieldInitializer(FieldType FT);
182
183 FieldInitializer(SmallVector<const MCExpr *, 1> &&Values);
184 FieldInitializer(SmallVector<APInt, 1> &&AsIntValues);
185 FieldInitializer(std::vector<StructInitializer> &&Initializers,
186 struct StructInfo Structure);
187
188 FieldInitializer(const FieldInitializer &Initializer);
189 FieldInitializer(FieldInitializer &&Initializer);
190
191 FieldInitializer &operator=(const FieldInitializer &Initializer);
192 FieldInitializer &operator=(FieldInitializer &&Initializer);
193};
194
195struct StructInitializer {
196 std::vector<FieldInitializer> FieldInitializers;
197};
198
199struct FieldInfo {
200 // Offset of the field within the containing STRUCT.
201 unsigned Offset = 0;
202
203 // Total size of the field (= LengthOf * Type).
204 unsigned SizeOf = 0;
205
206 // Number of elements in the field (1 if scalar, >1 if an array).
207 unsigned LengthOf = 0;
208
209 // Size of a single entry in this field, in bytes ("type" in MASM standards).
210 unsigned Type = 0;
211
212 FieldInitializer Contents;
213
214 FieldInfo(FieldType FT) : Contents(FT) {}
215};
216
217StructFieldInfo::StructFieldInfo(std::vector<StructInitializer> V,
218 StructInfo S) {
219 Initializers = std::move(V);
220 Structure = S;
221}
222
223StructInfo::StructInfo(StringRef StructName, bool Union,
224 unsigned AlignmentValue)
225 : Name(StructName), IsUnion(Union), Alignment(AlignmentValue) {}
226
227FieldInfo &StructInfo::addField(StringRef FieldName, FieldType FT,
228 unsigned FieldAlignmentSize) {
229 if (!FieldName.empty())
230 FieldsByName[FieldName.lower()] = Fields.size();
231 Fields.emplace_back(FT);
232 FieldInfo &Field = Fields.back();
233 Field.Offset =
234 llvm::alignTo(NextOffset, std::min(Alignment, FieldAlignmentSize));
235 if (!IsUnion) {
236 NextOffset = std::max(NextOffset, Field.Offset);
237 }
238 AlignmentSize = std::max(AlignmentSize, FieldAlignmentSize);
239 return Field;
240}
241
242FieldInitializer::~FieldInitializer() {
243 switch (FT) {
244 case FT_INTEGRAL:
245 IntInfo.~IntFieldInfo();
246 break;
247 case FT_REAL:
248 RealInfo.~RealFieldInfo();
249 break;
250 case FT_STRUCT:
251 StructInfo.~StructFieldInfo();
252 break;
253 }
254}
255
256FieldInitializer::FieldInitializer(FieldType FT) : FT(FT) {
257 switch (FT) {
258 case FT_INTEGRAL:
259 new (&IntInfo) IntFieldInfo();
260 break;
261 case FT_REAL:
262 new (&RealInfo) RealFieldInfo();
263 break;
264 case FT_STRUCT:
265 new (&StructInfo) StructFieldInfo();
266 break;
267 }
268}
269
270FieldInitializer::FieldInitializer(SmallVector<const MCExpr *, 1> &&Values)
271 : FT(FT_INTEGRAL) {
272 new (&IntInfo) IntFieldInfo(std::move(Values));
273}
274
275FieldInitializer::FieldInitializer(SmallVector<APInt, 1> &&AsIntValues)
276 : FT(FT_REAL) {
277 new (&RealInfo) RealFieldInfo(std::move(AsIntValues));
278}
279
280FieldInitializer::FieldInitializer(
281 std::vector<StructInitializer> &&Initializers, struct StructInfo Structure)
282 : FT(FT_STRUCT) {
283 new (&StructInfo) StructFieldInfo(std::move(Initializers), Structure);
284}
285
286FieldInitializer::FieldInitializer(const FieldInitializer &Initializer)
287 : FT(Initializer.FT) {
288 switch (FT) {
289 case FT_INTEGRAL:
290 new (&IntInfo) IntFieldInfo(Initializer.IntInfo);
291 break;
292 case FT_REAL:
293 new (&RealInfo) RealFieldInfo(Initializer.RealInfo);
294 break;
295 case FT_STRUCT:
296 new (&StructInfo) StructFieldInfo(Initializer.StructInfo);
297 break;
298 }
299}
300
301FieldInitializer::FieldInitializer(FieldInitializer &&Initializer)
302 : FT(Initializer.FT) {
303 switch (FT) {
304 case FT_INTEGRAL:
305 new (&IntInfo) IntFieldInfo(Initializer.IntInfo);
306 break;
307 case FT_REAL:
308 new (&RealInfo) RealFieldInfo(Initializer.RealInfo);
309 break;
310 case FT_STRUCT:
311 new (&StructInfo) StructFieldInfo(Initializer.StructInfo);
312 break;
313 }
314}
315
316FieldInitializer &
317FieldInitializer::operator=(const FieldInitializer &Initializer) {
318 if (FT != Initializer.FT) {
319 switch (FT) {
320 case FT_INTEGRAL:
321 IntInfo.~IntFieldInfo();
322 break;
323 case FT_REAL:
324 RealInfo.~RealFieldInfo();
325 break;
326 case FT_STRUCT:
327 StructInfo.~StructFieldInfo();
328 break;
329 }
330 }
331 FT = Initializer.FT;
332 switch (FT) {
333 case FT_INTEGRAL:
334 IntInfo = Initializer.IntInfo;
335 break;
336 case FT_REAL:
337 RealInfo = Initializer.RealInfo;
338 break;
339 case FT_STRUCT:
340 StructInfo = Initializer.StructInfo;
341 break;
342 }
343 return *this;
344}
345
346FieldInitializer &FieldInitializer::operator=(FieldInitializer &&Initializer) {
347 if (FT != Initializer.FT) {
348 switch (FT) {
349 case FT_INTEGRAL:
350 IntInfo.~IntFieldInfo();
351 break;
352 case FT_REAL:
353 RealInfo.~RealFieldInfo();
354 break;
355 case FT_STRUCT:
356 StructInfo.~StructFieldInfo();
357 break;
358 }
359 }
360 FT = Initializer.FT;
361 switch (FT) {
362 case FT_INTEGRAL:
363 IntInfo = Initializer.IntInfo;
364 break;
365 case FT_REAL:
366 RealInfo = Initializer.RealInfo;
367 break;
368 case FT_STRUCT:
369 StructInfo = Initializer.StructInfo;
370 break;
371 }
372 return *this;
373}
374
375/// The concrete assembly parser instance.
376// Note that this is a full MCAsmParser, not an MCAsmParserExtension!
377// It's a peer of AsmParser, not of COFFAsmParser, WasmAsmParser, etc.
378class MasmParser : public MCAsmParser {
379private:
380 AsmLexer Lexer;
381 MCContext &Ctx;
382 MCStreamer &Out;
383 const MCAsmInfo &MAI;
385 SourceMgr::DiagHandlerTy SavedDiagHandler;
386 void *SavedDiagContext;
387 std::unique_ptr<MCAsmParserExtension> PlatformParser;
388
389 /// This is the current buffer index we're lexing from as managed by the
390 /// SourceMgr object.
391 unsigned CurBuffer;
392
393 /// time of assembly
394 struct tm TM;
395
396 BitVector EndStatementAtEOFStack;
397
398 AsmCond TheCondState;
399 std::vector<AsmCond> TheCondStack;
400
401 /// maps directive names to handler methods in parser
402 /// extensions. Extensions register themselves in this map by calling
403 /// addDirectiveHandler.
404 StringMap<ExtensionDirectiveHandler> ExtensionDirectiveMap;
405
406 /// maps assembly-time variable names to variables.
407 struct Variable {
408 enum RedefinableKind { NOT_REDEFINABLE, WARN_ON_REDEFINITION, REDEFINABLE };
409
411 RedefinableKind Redefinable = REDEFINABLE;
412 bool IsText = false;
413 std::string TextValue;
414 };
415 StringMap<Variable> Variables;
416
417 /// Stack of active struct definitions.
418 SmallVector<StructInfo, 1> StructInProgress;
419
420 /// Maps struct tags to struct definitions.
421 StringMap<StructInfo> Structs;
422
423 /// Maps data location names to types.
424 StringMap<AsmTypeInfo> KnownType;
425
426 /// Stack of active macro instantiations.
427 std::vector<MacroInstantiation*> ActiveMacros;
428
429 /// List of bodies of anonymous macros.
430 std::deque<MCAsmMacro> MacroLikeBodies;
431
432 /// Keeps track of how many .macro's have been instantiated.
433 unsigned NumOfMacroInstantiations;
434
435 /// The values from the last parsed cpp hash file line comment if any.
436 struct CppHashInfoTy {
438 int64_t LineNumber;
439 SMLoc Loc;
440 unsigned Buf;
441 CppHashInfoTy() : LineNumber(0), Buf(0) {}
442 };
443 CppHashInfoTy CppHashInfo;
444
445 /// The filename from the first cpp hash file line comment, if any.
446 StringRef FirstCppHashFilename;
447
448 /// List of forward directional labels for diagnosis at the end.
450
451 /// AssemblerDialect. ~OU means unset value and use value provided by MAI.
452 /// Defaults to 1U, meaning Intel.
453 unsigned AssemblerDialect = 1U;
454
455 /// is Darwin compatibility enabled?
456 bool IsDarwin = false;
457
458 /// Are we parsing ms-style inline assembly?
459 bool ParsingMSInlineAsm = false;
460
461 /// Did we already inform the user about inconsistent MD5 usage?
462 bool ReportedInconsistentMD5 = false;
463
464 // Current <...> expression depth.
465 unsigned AngleBracketDepth = 0U;
466
467 // Number of locals defined.
468 uint16_t LocalCounter = 0;
469
470public:
471 MasmParser(SourceMgr &SM, MCContext &Ctx, MCStreamer &Out,
472 const MCAsmInfo &MAI, struct tm TM, unsigned CB = 0);
473 MasmParser(const MasmParser &) = delete;
474 MasmParser &operator=(const MasmParser &) = delete;
475 ~MasmParser() override;
476
477 bool Run(bool NoInitialTextSection, bool NoFinalize = false) override;
478
480 ExtensionDirectiveHandler Handler) override {
481 ExtensionDirectiveMap[Directive] = Handler;
482 DirectiveKindMap.try_emplace(Directive, DK_HANDLER_DIRECTIVE);
483 }
484
485 void addAliasForDirective(StringRef Directive, StringRef Alias) override {
486 DirectiveKindMap[Directive] = DirectiveKindMap[Alias];
487 }
488
489 /// @name MCAsmParser Interface
490 /// {
491
492 SourceMgr &getSourceManager() override { return SrcMgr; }
493 MCAsmLexer &getLexer() override { return Lexer; }
494 MCContext &getContext() override { return Ctx; }
495 MCStreamer &getStreamer() override { return Out; }
496
497 CodeViewContext &getCVContext() { return Ctx.getCVContext(); }
498
499 unsigned getAssemblerDialect() override {
500 if (AssemblerDialect == ~0U)
501 return MAI.getAssemblerDialect();
502 else
503 return AssemblerDialect;
504 }
505 void setAssemblerDialect(unsigned i) override {
506 AssemblerDialect = i;
507 }
508
509 void Note(SMLoc L, const Twine &Msg, SMRange Range = std::nullopt) override;
510 bool Warning(SMLoc L, const Twine &Msg,
511 SMRange Range = std::nullopt) override;
512 bool printError(SMLoc L, const Twine &Msg,
513 SMRange Range = std::nullopt) override;
514
515 enum ExpandKind { ExpandMacros, DoNotExpandMacros };
516 const AsmToken &Lex(ExpandKind ExpandNextToken);
517 const AsmToken &Lex() override { return Lex(ExpandMacros); }
518
519 void setParsingMSInlineAsm(bool V) override {
520 ParsingMSInlineAsm = V;
521 // When parsing MS inline asm, we must lex 0b1101 and 0ABCH as binary and
522 // hex integer literals.
523 Lexer.setLexMasmIntegers(V);
524 }
525 bool isParsingMSInlineAsm() override { return ParsingMSInlineAsm; }
526
527 bool isParsingMasm() const override { return true; }
528
529 bool defineMacro(StringRef Name, StringRef Value) override;
530
531 bool lookUpField(StringRef Name, AsmFieldInfo &Info) const override;
532 bool lookUpField(StringRef Base, StringRef Member,
533 AsmFieldInfo &Info) const override;
534
535 bool lookUpType(StringRef Name, AsmTypeInfo &Info) const override;
536
537 bool parseMSInlineAsm(std::string &AsmString, unsigned &NumOutputs,
538 unsigned &NumInputs,
539 SmallVectorImpl<std::pair<void *, bool>> &OpDecls,
540 SmallVectorImpl<std::string> &Constraints,
542 const MCInstrInfo *MII, MCInstPrinter *IP,
543 MCAsmParserSemaCallback &SI) override;
544
545 bool parseExpression(const MCExpr *&Res);
546 bool parseExpression(const MCExpr *&Res, SMLoc &EndLoc) override;
547 bool parsePrimaryExpr(const MCExpr *&Res, SMLoc &EndLoc,
548 AsmTypeInfo *TypeInfo) override;
549 bool parseParenExpression(const MCExpr *&Res, SMLoc &EndLoc) override;
550 bool parseParenExprOfDepth(unsigned ParenDepth, const MCExpr *&Res,
551 SMLoc &EndLoc) override;
552 bool parseAbsoluteExpression(int64_t &Res) override;
553
554 /// Parse a floating point expression using the float \p Semantics
555 /// and set \p Res to the value.
556 bool parseRealValue(const fltSemantics &Semantics, APInt &Res);
557
558 /// Parse an identifier or string (as a quoted identifier)
559 /// and set \p Res to the identifier contents.
560 enum IdentifierPositionKind { StandardPosition, StartOfStatement };
561 bool parseIdentifier(StringRef &Res, IdentifierPositionKind Position);
562 bool parseIdentifier(StringRef &Res) override {
563 return parseIdentifier(Res, StandardPosition);
564 }
565 void eatToEndOfStatement() override;
566
567 bool checkForValidSection() override;
568
569 /// }
570
571private:
572 bool expandMacros();
573 const AsmToken peekTok(bool ShouldSkipSpace = true);
574
575 bool parseStatement(ParseStatementInfo &Info,
577 bool parseCurlyBlockScope(SmallVectorImpl<AsmRewrite>& AsmStrRewrites);
578 bool parseCppHashLineFilenameComment(SMLoc L);
579
580 bool expandMacro(raw_svector_ostream &OS, StringRef Body,
583 const std::vector<std::string> &Locals, SMLoc L);
584
585 /// Are we inside a macro instantiation?
586 bool isInsideMacroInstantiation() {return !ActiveMacros.empty();}
587
588 /// Handle entry to macro instantiation.
589 ///
590 /// \param M The macro.
591 /// \param NameLoc Instantiation location.
592 bool handleMacroEntry(
593 const MCAsmMacro *M, SMLoc NameLoc,
595
596 /// Handle invocation of macro function.
597 ///
598 /// \param M The macro.
599 /// \param NameLoc Invocation location.
600 bool handleMacroInvocation(const MCAsmMacro *M, SMLoc NameLoc);
601
602 /// Handle exit from macro instantiation.
603 void handleMacroExit();
604
605 /// Extract AsmTokens for a macro argument.
606 bool
607 parseMacroArgument(const MCAsmMacroParameter *MP, MCAsmMacroArgument &MA,
609
610 /// Parse all macro arguments for a given macro.
611 bool
612 parseMacroArguments(const MCAsmMacro *M, MCAsmMacroArguments &A,
614
615 void printMacroInstantiations();
616
617 bool expandStatement(SMLoc Loc);
618
619 void printMessage(SMLoc Loc, SourceMgr::DiagKind Kind, const Twine &Msg,
620 SMRange Range = std::nullopt) const {
622 SrcMgr.PrintMessage(Loc, Kind, Msg, Ranges);
623 }
624 static void DiagHandler(const SMDiagnostic &Diag, void *Context);
625
626 bool lookUpField(const StructInfo &Structure, StringRef Member,
627 AsmFieldInfo &Info) const;
628
629 /// Should we emit DWARF describing this assembler source? (Returns false if
630 /// the source has .file directives, which means we don't want to generate
631 /// info describing the assembler source itself.)
632 bool enabledGenDwarfForAssembly();
633
634 /// Enter the specified file. This returns true on failure.
635 bool enterIncludeFile(const std::string &Filename);
636
637 /// Reset the current lexer position to that given by \p Loc. The
638 /// current token is not set; clients should ensure Lex() is called
639 /// subsequently.
640 ///
641 /// \param InBuffer If not 0, should be the known buffer id that contains the
642 /// location.
643 void jumpToLoc(SMLoc Loc, unsigned InBuffer = 0,
644 bool EndStatementAtEOF = true);
645
646 /// Parse up to a token of kind \p EndTok and return the contents from the
647 /// current token up to (but not including) this token; the current token on
648 /// exit will be either this kind or EOF. Reads through instantiated macro
649 /// functions and text macros.
650 SmallVector<StringRef, 1> parseStringRefsTo(AsmToken::TokenKind EndTok);
651 std::string parseStringTo(AsmToken::TokenKind EndTok);
652
653 /// Parse up to the end of statement and return the contents from the current
654 /// token until the end of the statement; the current token on exit will be
655 /// either the EndOfStatement or EOF.
657
658 bool parseTextItem(std::string &Data);
659
660 unsigned getBinOpPrecedence(AsmToken::TokenKind K,
662
663 bool parseBinOpRHS(unsigned Precedence, const MCExpr *&Res, SMLoc &EndLoc);
664 bool parseParenExpr(const MCExpr *&Res, SMLoc &EndLoc);
665 bool parseBracketExpr(const MCExpr *&Res, SMLoc &EndLoc);
666
667 bool parseRegisterOrRegisterNumber(int64_t &Register, SMLoc DirectiveLoc);
668
669 bool parseCVFunctionId(int64_t &FunctionId, StringRef DirectiveName);
670 bool parseCVFileId(int64_t &FileId, StringRef DirectiveName);
671
672 // Generic (target and platform independent) directive parsing.
673 enum DirectiveKind {
674 DK_NO_DIRECTIVE, // Placeholder
675 DK_HANDLER_DIRECTIVE,
676 DK_ASSIGN,
677 DK_EQU,
678 DK_TEXTEQU,
679 DK_ASCII,
680 DK_ASCIZ,
681 DK_STRING,
682 DK_BYTE,
683 DK_SBYTE,
684 DK_WORD,
685 DK_SWORD,
686 DK_DWORD,
687 DK_SDWORD,
688 DK_FWORD,
689 DK_QWORD,
690 DK_SQWORD,
691 DK_DB,
692 DK_DD,
693 DK_DF,
694 DK_DQ,
695 DK_DW,
696 DK_REAL4,
697 DK_REAL8,
698 DK_REAL10,
699 DK_ALIGN,
700 DK_EVEN,
701 DK_ORG,
702 DK_ENDR,
703 DK_EXTERN,
704 DK_PUBLIC,
705 DK_COMM,
706 DK_COMMENT,
707 DK_INCLUDE,
708 DK_REPEAT,
709 DK_WHILE,
710 DK_FOR,
711 DK_FORC,
712 DK_IF,
713 DK_IFE,
714 DK_IFB,
715 DK_IFNB,
716 DK_IFDEF,
717 DK_IFNDEF,
718 DK_IFDIF,
719 DK_IFDIFI,
720 DK_IFIDN,
721 DK_IFIDNI,
722 DK_ELSEIF,
723 DK_ELSEIFE,
724 DK_ELSEIFB,
725 DK_ELSEIFNB,
726 DK_ELSEIFDEF,
727 DK_ELSEIFNDEF,
728 DK_ELSEIFDIF,
729 DK_ELSEIFDIFI,
730 DK_ELSEIFIDN,
731 DK_ELSEIFIDNI,
732 DK_ELSE,
733 DK_ENDIF,
734 DK_FILE,
735 DK_LINE,
736 DK_LOC,
737 DK_STABS,
738 DK_CV_FILE,
739 DK_CV_FUNC_ID,
740 DK_CV_INLINE_SITE_ID,
741 DK_CV_LOC,
742 DK_CV_LINETABLE,
743 DK_CV_INLINE_LINETABLE,
744 DK_CV_DEF_RANGE,
745 DK_CV_STRINGTABLE,
746 DK_CV_STRING,
747 DK_CV_FILECHECKSUMS,
748 DK_CV_FILECHECKSUM_OFFSET,
749 DK_CV_FPO_DATA,
750 DK_CFI_SECTIONS,
751 DK_CFI_STARTPROC,
752 DK_CFI_ENDPROC,
753 DK_CFI_DEF_CFA,
754 DK_CFI_DEF_CFA_OFFSET,
755 DK_CFI_ADJUST_CFA_OFFSET,
756 DK_CFI_DEF_CFA_REGISTER,
757 DK_CFI_OFFSET,
758 DK_CFI_REL_OFFSET,
759 DK_CFI_PERSONALITY,
760 DK_CFI_LSDA,
761 DK_CFI_REMEMBER_STATE,
762 DK_CFI_RESTORE_STATE,
763 DK_CFI_SAME_VALUE,
764 DK_CFI_RESTORE,
765 DK_CFI_ESCAPE,
766 DK_CFI_RETURN_COLUMN,
767 DK_CFI_SIGNAL_FRAME,
768 DK_CFI_UNDEFINED,
769 DK_CFI_REGISTER,
770 DK_CFI_WINDOW_SAVE,
771 DK_CFI_B_KEY_FRAME,
772 DK_MACRO,
773 DK_EXITM,
774 DK_ENDM,
775 DK_PURGE,
776 DK_ERR,
777 DK_ERRB,
778 DK_ERRNB,
779 DK_ERRDEF,
780 DK_ERRNDEF,
781 DK_ERRDIF,
782 DK_ERRDIFI,
783 DK_ERRIDN,
784 DK_ERRIDNI,
785 DK_ERRE,
786 DK_ERRNZ,
787 DK_ECHO,
788 DK_STRUCT,
789 DK_UNION,
790 DK_ENDS,
791 DK_END,
792 DK_PUSHFRAME,
793 DK_PUSHREG,
794 DK_SAVEREG,
795 DK_SAVEXMM128,
796 DK_SETFRAME,
797 DK_RADIX,
798 };
799
800 /// Maps directive name --> DirectiveKind enum, for directives parsed by this
801 /// class.
802 StringMap<DirectiveKind> DirectiveKindMap;
803
804 bool isMacroLikeDirective();
805
806 // Codeview def_range type parsing.
807 enum CVDefRangeType {
808 CVDR_DEFRANGE = 0, // Placeholder
809 CVDR_DEFRANGE_REGISTER,
810 CVDR_DEFRANGE_FRAMEPOINTER_REL,
811 CVDR_DEFRANGE_SUBFIELD_REGISTER,
812 CVDR_DEFRANGE_REGISTER_REL
813 };
814
815 /// Maps Codeview def_range types --> CVDefRangeType enum, for Codeview
816 /// def_range types parsed by this class.
817 StringMap<CVDefRangeType> CVDefRangeTypeMap;
818
819 // Generic (target and platform independent) directive parsing.
820 enum BuiltinSymbol {
821 BI_NO_SYMBOL, // Placeholder
822 BI_DATE,
823 BI_TIME,
824 BI_VERSION,
825 BI_FILECUR,
826 BI_FILENAME,
827 BI_LINE,
828 BI_CURSEG,
829 BI_CPU,
830 BI_INTERFACE,
831 BI_CODE,
832 BI_DATA,
833 BI_FARDATA,
834 BI_WORDSIZE,
835 BI_CODESIZE,
836 BI_DATASIZE,
837 BI_MODEL,
838 BI_STACK,
839 };
840
841 /// Maps builtin name --> BuiltinSymbol enum, for builtins handled by this
842 /// class.
843 StringMap<BuiltinSymbol> BuiltinSymbolMap;
844
845 const MCExpr *evaluateBuiltinValue(BuiltinSymbol Symbol, SMLoc StartLoc);
846
847 std::optional<std::string> evaluateBuiltinTextMacro(BuiltinSymbol Symbol,
848 SMLoc StartLoc);
849
850 // ".ascii", ".asciz", ".string"
851 bool parseDirectiveAscii(StringRef IDVal, bool ZeroTerminated);
852
853 // "byte", "word", ...
854 bool emitIntValue(const MCExpr *Value, unsigned Size);
855 bool parseScalarInitializer(unsigned Size,
857 unsigned StringPadLength = 0);
858 bool parseScalarInstList(
859 unsigned Size, SmallVectorImpl<const MCExpr *> &Values,
861 bool emitIntegralValues(unsigned Size, unsigned *Count = nullptr);
862 bool addIntegralField(StringRef Name, unsigned Size);
863 bool parseDirectiveValue(StringRef IDVal, unsigned Size);
864 bool parseDirectiveNamedValue(StringRef TypeName, unsigned Size,
865 StringRef Name, SMLoc NameLoc);
866
867 // "real4", "real8", "real10"
868 bool emitRealValues(const fltSemantics &Semantics, unsigned *Count = nullptr);
869 bool addRealField(StringRef Name, const fltSemantics &Semantics, size_t Size);
870 bool parseDirectiveRealValue(StringRef IDVal, const fltSemantics &Semantics,
871 size_t Size);
872 bool parseRealInstList(
873 const fltSemantics &Semantics, SmallVectorImpl<APInt> &Values,
875 bool parseDirectiveNamedRealValue(StringRef TypeName,
876 const fltSemantics &Semantics,
877 unsigned Size, StringRef Name,
878 SMLoc NameLoc);
879
880 bool parseOptionalAngleBracketOpen();
881 bool parseAngleBracketClose(const Twine &Msg = "expected '>'");
882
883 bool parseFieldInitializer(const FieldInfo &Field,
884 FieldInitializer &Initializer);
885 bool parseFieldInitializer(const FieldInfo &Field,
886 const IntFieldInfo &Contents,
887 FieldInitializer &Initializer);
888 bool parseFieldInitializer(const FieldInfo &Field,
889 const RealFieldInfo &Contents,
890 FieldInitializer &Initializer);
891 bool parseFieldInitializer(const FieldInfo &Field,
892 const StructFieldInfo &Contents,
893 FieldInitializer &Initializer);
894
895 bool parseStructInitializer(const StructInfo &Structure,
896 StructInitializer &Initializer);
897 bool parseStructInstList(
898 const StructInfo &Structure, std::vector<StructInitializer> &Initializers,
900
901 bool emitFieldValue(const FieldInfo &Field);
902 bool emitFieldValue(const FieldInfo &Field, const IntFieldInfo &Contents);
903 bool emitFieldValue(const FieldInfo &Field, const RealFieldInfo &Contents);
904 bool emitFieldValue(const FieldInfo &Field, const StructFieldInfo &Contents);
905
906 bool emitFieldInitializer(const FieldInfo &Field,
907 const FieldInitializer &Initializer);
908 bool emitFieldInitializer(const FieldInfo &Field,
909 const IntFieldInfo &Contents,
910 const IntFieldInfo &Initializer);
911 bool emitFieldInitializer(const FieldInfo &Field,
912 const RealFieldInfo &Contents,
913 const RealFieldInfo &Initializer);
914 bool emitFieldInitializer(const FieldInfo &Field,
915 const StructFieldInfo &Contents,
916 const StructFieldInfo &Initializer);
917
918 bool emitStructInitializer(const StructInfo &Structure,
919 const StructInitializer &Initializer);
920
921 // User-defined types (structs, unions):
922 bool emitStructValues(const StructInfo &Structure, unsigned *Count = nullptr);
923 bool addStructField(StringRef Name, const StructInfo &Structure);
924 bool parseDirectiveStructValue(const StructInfo &Structure,
925 StringRef Directive, SMLoc DirLoc);
926 bool parseDirectiveNamedStructValue(const StructInfo &Structure,
927 StringRef Directive, SMLoc DirLoc,
929
930 // "=", "equ", "textequ"
931 bool parseDirectiveEquate(StringRef IDVal, StringRef Name,
932 DirectiveKind DirKind, SMLoc NameLoc);
933
934 bool parseDirectiveOrg(); // "org"
935
936 bool emitAlignTo(int64_t Alignment);
937 bool parseDirectiveAlign(); // "align"
938 bool parseDirectiveEven(); // "even"
939
940 // ".file", ".line", ".loc", ".stabs"
941 bool parseDirectiveFile(SMLoc DirectiveLoc);
942 bool parseDirectiveLine();
943 bool parseDirectiveLoc();
944 bool parseDirectiveStabs();
945
946 // ".cv_file", ".cv_func_id", ".cv_inline_site_id", ".cv_loc", ".cv_linetable",
947 // ".cv_inline_linetable", ".cv_def_range", ".cv_string"
948 bool parseDirectiveCVFile();
949 bool parseDirectiveCVFuncId();
950 bool parseDirectiveCVInlineSiteId();
951 bool parseDirectiveCVLoc();
952 bool parseDirectiveCVLinetable();
953 bool parseDirectiveCVInlineLinetable();
954 bool parseDirectiveCVDefRange();
955 bool parseDirectiveCVString();
956 bool parseDirectiveCVStringTable();
957 bool parseDirectiveCVFileChecksums();
958 bool parseDirectiveCVFileChecksumOffset();
959 bool parseDirectiveCVFPOData();
960
961 // .cfi directives
962 bool parseDirectiveCFIRegister(SMLoc DirectiveLoc);
963 bool parseDirectiveCFIWindowSave(SMLoc DirectiveLoc);
964 bool parseDirectiveCFISections();
965 bool parseDirectiveCFIStartProc();
966 bool parseDirectiveCFIEndProc();
967 bool parseDirectiveCFIDefCfaOffset(SMLoc DirectiveLoc);
968 bool parseDirectiveCFIDefCfa(SMLoc DirectiveLoc);
969 bool parseDirectiveCFIAdjustCfaOffset(SMLoc DirectiveLoc);
970 bool parseDirectiveCFIDefCfaRegister(SMLoc DirectiveLoc);
971 bool parseDirectiveCFIOffset(SMLoc DirectiveLoc);
972 bool parseDirectiveCFIRelOffset(SMLoc DirectiveLoc);
973 bool parseDirectiveCFIPersonalityOrLsda(bool IsPersonality);
974 bool parseDirectiveCFIRememberState(SMLoc DirectiveLoc);
975 bool parseDirectiveCFIRestoreState(SMLoc DirectiveLoc);
976 bool parseDirectiveCFISameValue(SMLoc DirectiveLoc);
977 bool parseDirectiveCFIRestore(SMLoc DirectiveLoc);
978 bool parseDirectiveCFIEscape(SMLoc DirectiveLoc);
979 bool parseDirectiveCFIReturnColumn(SMLoc DirectiveLoc);
980 bool parseDirectiveCFISignalFrame();
981 bool parseDirectiveCFIUndefined(SMLoc DirectiveLoc);
982
983 // macro directives
984 bool parseDirectivePurgeMacro(SMLoc DirectiveLoc);
985 bool parseDirectiveExitMacro(SMLoc DirectiveLoc, StringRef Directive,
986 std::string &Value);
987 bool parseDirectiveEndMacro(StringRef Directive);
988 bool parseDirectiveMacro(StringRef Name, SMLoc NameLoc);
989
990 bool parseDirectiveStruct(StringRef Directive, DirectiveKind DirKind,
991 StringRef Name, SMLoc NameLoc);
992 bool parseDirectiveNestedStruct(StringRef Directive, DirectiveKind DirKind);
993 bool parseDirectiveEnds(StringRef Name, SMLoc NameLoc);
994 bool parseDirectiveNestedEnds();
995
996 bool parseDirectiveExtern();
997
998 /// Parse a directive like ".globl" which accepts a single symbol (which
999 /// should be a label or an external).
1000 bool parseDirectiveSymbolAttribute(MCSymbolAttr Attr);
1001
1002 bool parseDirectiveComm(bool IsLocal); // ".comm" and ".lcomm"
1003
1004 bool parseDirectiveComment(SMLoc DirectiveLoc); // "comment"
1005
1006 bool parseDirectiveInclude(); // "include"
1007
1008 // "if" or "ife"
1009 bool parseDirectiveIf(SMLoc DirectiveLoc, DirectiveKind DirKind);
1010 // "ifb" or "ifnb", depending on ExpectBlank.
1011 bool parseDirectiveIfb(SMLoc DirectiveLoc, bool ExpectBlank);
1012 // "ifidn", "ifdif", "ifidni", or "ifdifi", depending on ExpectEqual and
1013 // CaseInsensitive.
1014 bool parseDirectiveIfidn(SMLoc DirectiveLoc, bool ExpectEqual,
1015 bool CaseInsensitive);
1016 // "ifdef" or "ifndef", depending on expect_defined
1017 bool parseDirectiveIfdef(SMLoc DirectiveLoc, bool expect_defined);
1018 // "elseif" or "elseife"
1019 bool parseDirectiveElseIf(SMLoc DirectiveLoc, DirectiveKind DirKind);
1020 // "elseifb" or "elseifnb", depending on ExpectBlank.
1021 bool parseDirectiveElseIfb(SMLoc DirectiveLoc, bool ExpectBlank);
1022 // ".elseifdef" or ".elseifndef", depending on expect_defined
1023 bool parseDirectiveElseIfdef(SMLoc DirectiveLoc, bool expect_defined);
1024 // "elseifidn", "elseifdif", "elseifidni", or "elseifdifi", depending on
1025 // ExpectEqual and CaseInsensitive.
1026 bool parseDirectiveElseIfidn(SMLoc DirectiveLoc, bool ExpectEqual,
1027 bool CaseInsensitive);
1028 bool parseDirectiveElse(SMLoc DirectiveLoc); // "else"
1029 bool parseDirectiveEndIf(SMLoc DirectiveLoc); // "endif"
1030 bool parseEscapedString(std::string &Data) override;
1031 bool parseAngleBracketString(std::string &Data) override;
1032
1033 // Macro-like directives
1034 MCAsmMacro *parseMacroLikeBody(SMLoc DirectiveLoc);
1035 void instantiateMacroLikeBody(MCAsmMacro *M, SMLoc DirectiveLoc,
1037 void instantiateMacroLikeBody(MCAsmMacro *M, SMLoc DirectiveLoc,
1038 SMLoc ExitLoc, raw_svector_ostream &OS);
1039 bool parseDirectiveRepeat(SMLoc DirectiveLoc, StringRef Directive);
1040 bool parseDirectiveFor(SMLoc DirectiveLoc, StringRef Directive);
1041 bool parseDirectiveForc(SMLoc DirectiveLoc, StringRef Directive);
1042 bool parseDirectiveWhile(SMLoc DirectiveLoc);
1043
1044 // "_emit" or "__emit"
1045 bool parseDirectiveMSEmit(SMLoc DirectiveLoc, ParseStatementInfo &Info,
1046 size_t Len);
1047
1048 // "align"
1049 bool parseDirectiveMSAlign(SMLoc DirectiveLoc, ParseStatementInfo &Info);
1050
1051 // "end"
1052 bool parseDirectiveEnd(SMLoc DirectiveLoc);
1053
1054 // ".err"
1055 bool parseDirectiveError(SMLoc DirectiveLoc);
1056 // ".errb" or ".errnb", depending on ExpectBlank.
1057 bool parseDirectiveErrorIfb(SMLoc DirectiveLoc, bool ExpectBlank);
1058 // ".errdef" or ".errndef", depending on ExpectBlank.
1059 bool parseDirectiveErrorIfdef(SMLoc DirectiveLoc, bool ExpectDefined);
1060 // ".erridn", ".errdif", ".erridni", or ".errdifi", depending on ExpectEqual
1061 // and CaseInsensitive.
1062 bool parseDirectiveErrorIfidn(SMLoc DirectiveLoc, bool ExpectEqual,
1063 bool CaseInsensitive);
1064 // ".erre" or ".errnz", depending on ExpectZero.
1065 bool parseDirectiveErrorIfe(SMLoc DirectiveLoc, bool ExpectZero);
1066
1067 // ".radix"
1068 bool parseDirectiveRadix(SMLoc DirectiveLoc);
1069
1070 // "echo"
1071 bool parseDirectiveEcho(SMLoc DirectiveLoc);
1072
1073 void initializeDirectiveKindMap();
1074 void initializeCVDefRangeTypeMap();
1075 void initializeBuiltinSymbolMap();
1076};
1077
1078} // end anonymous namespace
1079
1080namespace llvm {
1081
1083
1085
1086} // end namespace llvm
1087
1089
1090MasmParser::MasmParser(SourceMgr &SM, MCContext &Ctx, MCStreamer &Out,
1091 const MCAsmInfo &MAI, struct tm TM, unsigned CB)
1092 : Lexer(MAI), Ctx(Ctx), Out(Out), MAI(MAI), SrcMgr(SM),
1093 CurBuffer(CB ? CB : SM.getMainFileID()), TM(TM) {
1094 HadError = false;
1095 // Save the old handler.
1096 SavedDiagHandler = SrcMgr.getDiagHandler();
1097 SavedDiagContext = SrcMgr.getDiagContext();
1098 // Set our own handler which calls the saved handler.
1100 Lexer.setBuffer(SrcMgr.getMemoryBuffer(CurBuffer)->getBuffer());
1101 EndStatementAtEOFStack.push_back(true);
1102
1103 // Initialize the platform / file format parser.
1104 switch (Ctx.getObjectFileType()) {
1105 case MCContext::IsCOFF:
1106 PlatformParser.reset(createCOFFMasmParser());
1107 break;
1108 default:
1109 report_fatal_error("llvm-ml currently supports only COFF output.");
1110 break;
1111 }
1112
1113 initializeDirectiveKindMap();
1114 PlatformParser->Initialize(*this);
1115 initializeCVDefRangeTypeMap();
1116 initializeBuiltinSymbolMap();
1117
1118 NumOfMacroInstantiations = 0;
1119}
1120
1121MasmParser::~MasmParser() {
1122 assert((HadError || ActiveMacros.empty()) &&
1123 "Unexpected active macro instantiation!");
1124
1125 // Restore the saved diagnostics handler and context for use during
1126 // finalization.
1127 SrcMgr.setDiagHandler(SavedDiagHandler, SavedDiagContext);
1128}
1129
1130void MasmParser::printMacroInstantiations() {
1131 // Print the active macro instantiation stack.
1132 for (std::vector<MacroInstantiation *>::const_reverse_iterator
1133 it = ActiveMacros.rbegin(),
1134 ie = ActiveMacros.rend();
1135 it != ie; ++it)
1136 printMessage((*it)->InstantiationLoc, SourceMgr::DK_Note,
1137 "while in macro instantiation");
1138}
1139
1140void MasmParser::Note(SMLoc L, const Twine &Msg, SMRange Range) {
1141 printPendingErrors();
1142 printMessage(L, SourceMgr::DK_Note, Msg, Range);
1143 printMacroInstantiations();
1144}
1145
1146bool MasmParser::Warning(SMLoc L, const Twine &Msg, SMRange Range) {
1147 if (getTargetParser().getTargetOptions().MCNoWarn)
1148 return false;
1149 if (getTargetParser().getTargetOptions().MCFatalWarnings)
1150 return Error(L, Msg, Range);
1151 printMessage(L, SourceMgr::DK_Warning, Msg, Range);
1152 printMacroInstantiations();
1153 return false;
1154}
1155
1156bool MasmParser::printError(SMLoc L, const Twine &Msg, SMRange Range) {
1157 HadError = true;
1158 printMessage(L, SourceMgr::DK_Error, Msg, Range);
1159 printMacroInstantiations();
1160 return true;
1161}
1162
1163bool MasmParser::enterIncludeFile(const std::string &Filename) {
1164 std::string IncludedFile;
1165 unsigned NewBuf =
1166 SrcMgr.AddIncludeFile(Filename, Lexer.getLoc(), IncludedFile);
1167 if (!NewBuf)
1168 return true;
1169
1170 CurBuffer = NewBuf;
1171 Lexer.setBuffer(SrcMgr.getMemoryBuffer(CurBuffer)->getBuffer());
1172 EndStatementAtEOFStack.push_back(true);
1173 return false;
1174}
1175
1176void MasmParser::jumpToLoc(SMLoc Loc, unsigned InBuffer,
1177 bool EndStatementAtEOF) {
1178 CurBuffer = InBuffer ? InBuffer : SrcMgr.FindBufferContainingLoc(Loc);
1179 Lexer.setBuffer(SrcMgr.getMemoryBuffer(CurBuffer)->getBuffer(),
1180 Loc.getPointer(), EndStatementAtEOF);
1181}
1182
1183bool MasmParser::expandMacros() {
1184 const AsmToken &Tok = getTok();
1185 const std::string IDLower = Tok.getIdentifier().lower();
1186
1187 const llvm::MCAsmMacro *M = getContext().lookupMacro(IDLower);
1188 if (M && M->IsFunction && peekTok().is(AsmToken::LParen)) {
1189 // This is a macro function invocation; expand it in place.
1190 const SMLoc MacroLoc = Tok.getLoc();
1191 const StringRef MacroId = Tok.getIdentifier();
1192 Lexer.Lex();
1193 if (handleMacroInvocation(M, MacroLoc)) {
1194 Lexer.UnLex(AsmToken(AsmToken::Error, MacroId));
1195 Lexer.Lex();
1196 }
1197 return false;
1198 }
1199
1200 std::optional<std::string> ExpandedValue;
1201 auto BuiltinIt = BuiltinSymbolMap.find(IDLower);
1202 if (BuiltinIt != BuiltinSymbolMap.end()) {
1203 ExpandedValue =
1204 evaluateBuiltinTextMacro(BuiltinIt->getValue(), Tok.getLoc());
1205 } else {
1206 auto VarIt = Variables.find(IDLower);
1207 if (VarIt != Variables.end() && VarIt->getValue().IsText) {
1208 ExpandedValue = VarIt->getValue().TextValue;
1209 }
1210 }
1211
1212 if (!ExpandedValue)
1213 return true;
1214 std::unique_ptr<MemoryBuffer> Instantiation =
1215 MemoryBuffer::getMemBufferCopy(*ExpandedValue, "<instantiation>");
1216
1217 // Jump to the macro instantiation and prime the lexer.
1218 CurBuffer =
1219 SrcMgr.AddNewSourceBuffer(std::move(Instantiation), Tok.getEndLoc());
1220 Lexer.setBuffer(SrcMgr.getMemoryBuffer(CurBuffer)->getBuffer(), nullptr,
1221 /*EndStatementAtEOF=*/false);
1222 EndStatementAtEOFStack.push_back(false);
1223 Lexer.Lex();
1224 return false;
1225}
1226
1227const AsmToken &MasmParser::Lex(ExpandKind ExpandNextToken) {
1228 if (Lexer.getTok().is(AsmToken::Error))
1229 Error(Lexer.getErrLoc(), Lexer.getErr());
1230
1231 // if it's a end of statement with a comment in it
1232 if (getTok().is(AsmToken::EndOfStatement)) {
1233 // if this is a line comment output it.
1234 if (!getTok().getString().empty() && getTok().getString().front() != '\n' &&
1235 getTok().getString().front() != '\r' && MAI.preserveAsmComments())
1236 Out.addExplicitComment(Twine(getTok().getString()));
1237 }
1238
1239 const AsmToken *tok = &Lexer.Lex();
1240 bool StartOfStatement = Lexer.isAtStartOfStatement();
1241
1242 while (ExpandNextToken == ExpandMacros && tok->is(AsmToken::Identifier)) {
1243 if (StartOfStatement) {
1244 AsmToken NextTok;
1245 MutableArrayRef<AsmToken> Buf(NextTok);
1246 size_t ReadCount = Lexer.peekTokens(Buf);
1247 if (ReadCount && NextTok.is(AsmToken::Identifier) &&
1248 (NextTok.getString().equals_insensitive("equ") ||
1249 NextTok.getString().equals_insensitive("textequ"))) {
1250 // This looks like an EQU or TEXTEQU directive; don't expand the
1251 // identifier, allowing for redefinitions.
1252 break;
1253 }
1254 }
1255 if (expandMacros())
1256 break;
1257 }
1258
1259 // Parse comments here to be deferred until end of next statement.
1260 while (tok->is(AsmToken::Comment)) {
1261 if (MAI.preserveAsmComments())
1262 Out.addExplicitComment(Twine(tok->getString()));
1263 tok = &Lexer.Lex();
1264 }
1265
1266 // Recognize and bypass line continuations.
1267 while (tok->is(AsmToken::BackSlash) &&
1268 peekTok().is(AsmToken::EndOfStatement)) {
1269 // Eat both the backslash and the end of statement.
1270 Lexer.Lex();
1271 tok = &Lexer.Lex();
1272 }
1273
1274 if (tok->is(AsmToken::Eof)) {
1275 // If this is the end of an included file, pop the parent file off the
1276 // include stack.
1277 SMLoc ParentIncludeLoc = SrcMgr.getParentIncludeLoc(CurBuffer);
1278 if (ParentIncludeLoc != SMLoc()) {
1279 EndStatementAtEOFStack.pop_back();
1280 jumpToLoc(ParentIncludeLoc, 0, EndStatementAtEOFStack.back());
1281 return Lex();
1282 }
1283 EndStatementAtEOFStack.pop_back();
1284 assert(EndStatementAtEOFStack.empty());
1285 }
1286
1287 return *tok;
1288}
1289
1290const AsmToken MasmParser::peekTok(bool ShouldSkipSpace) {
1291 AsmToken Tok;
1292
1294 size_t ReadCount = Lexer.peekTokens(Buf, ShouldSkipSpace);
1295
1296 if (ReadCount == 0) {
1297 // If this is the end of an included file, pop the parent file off the
1298 // include stack.
1299 SMLoc ParentIncludeLoc = SrcMgr.getParentIncludeLoc(CurBuffer);
1300 if (ParentIncludeLoc != SMLoc()) {
1301 EndStatementAtEOFStack.pop_back();
1302 jumpToLoc(ParentIncludeLoc, 0, EndStatementAtEOFStack.back());
1303 return peekTok(ShouldSkipSpace);
1304 }
1305 EndStatementAtEOFStack.pop_back();
1306 assert(EndStatementAtEOFStack.empty());
1307 }
1308
1309 assert(ReadCount == 1);
1310 return Tok;
1311}
1312
1313bool MasmParser::enabledGenDwarfForAssembly() {
1314 // Check whether the user specified -g.
1315 if (!getContext().getGenDwarfForAssembly())
1316 return false;
1317 // If we haven't encountered any .file directives (which would imply that
1318 // the assembler source was produced with debug info already) then emit one
1319 // describing the assembler source file itself.
1320 if (getContext().getGenDwarfFileNumber() == 0) {
1321 // Use the first #line directive for this, if any. It's preprocessed, so
1322 // there is no checksum, and of course no source directive.
1323 if (!FirstCppHashFilename.empty())
1324 getContext().setMCLineTableRootFile(
1325 /*CUID=*/0, getContext().getCompilationDir(), FirstCppHashFilename,
1326 /*Cksum=*/std::nullopt, /*Source=*/std::nullopt);
1327 const MCDwarfFile &RootFile =
1328 getContext().getMCDwarfLineTable(/*CUID=*/0).getRootFile();
1329 getContext().setGenDwarfFileNumber(getStreamer().emitDwarfFileDirective(
1330 /*CUID=*/0, getContext().getCompilationDir(), RootFile.Name,
1331 RootFile.Checksum, RootFile.Source));
1332 }
1333 return true;
1334}
1335
1336bool MasmParser::Run(bool NoInitialTextSection, bool NoFinalize) {
1337 // Create the initial section, if requested.
1338 if (!NoInitialTextSection)
1339 Out.initSections(false, getTargetParser().getSTI());
1340
1341 // Prime the lexer.
1342 Lex();
1343
1344 HadError = false;
1345 AsmCond StartingCondState = TheCondState;
1346 SmallVector<AsmRewrite, 4> AsmStrRewrites;
1347
1348 // If we are generating dwarf for assembly source files save the initial text
1349 // section. (Don't use enabledGenDwarfForAssembly() here, as we aren't
1350 // emitting any actual debug info yet and haven't had a chance to parse any
1351 // embedded .file directives.)
1352 if (getContext().getGenDwarfForAssembly()) {
1353 MCSection *Sec = getStreamer().getCurrentSectionOnly();
1354 if (!Sec->getBeginSymbol()) {
1355 MCSymbol *SectionStartSym = getContext().createTempSymbol();
1356 getStreamer().emitLabel(SectionStartSym);
1357 Sec->setBeginSymbol(SectionStartSym);
1358 }
1359 bool InsertResult = getContext().addGenDwarfSection(Sec);
1360 assert(InsertResult && ".text section should not have debug info yet");
1361 (void)InsertResult;
1362 }
1363
1364 getTargetParser().onBeginOfFile();
1365
1366 // While we have input, parse each statement.
1367 while (Lexer.isNot(AsmToken::Eof) ||
1368 SrcMgr.getParentIncludeLoc(CurBuffer) != SMLoc()) {
1369 // Skip through the EOF at the end of an inclusion.
1370 if (Lexer.is(AsmToken::Eof))
1371 Lex();
1372
1373 ParseStatementInfo Info(&AsmStrRewrites);
1374 bool Parsed = parseStatement(Info, nullptr);
1375
1376 // If we have a Lexer Error we are on an Error Token. Load in Lexer Error
1377 // for printing ErrMsg via Lex() only if no (presumably better) parser error
1378 // exists.
1379 if (Parsed && !hasPendingError() && Lexer.getTok().is(AsmToken::Error)) {
1380 Lex();
1381 }
1382
1383 // parseStatement returned true so may need to emit an error.
1384 printPendingErrors();
1385
1386 // Skipping to the next line if needed.
1387 if (Parsed && !getLexer().isAtStartOfStatement())
1388 eatToEndOfStatement();
1389 }
1390
1391 getTargetParser().onEndOfFile();
1392 printPendingErrors();
1393
1394 // All errors should have been emitted.
1395 assert(!hasPendingError() && "unexpected error from parseStatement");
1396
1397 getTargetParser().flushPendingInstructions(getStreamer());
1398
1399 if (TheCondState.TheCond != StartingCondState.TheCond ||
1400 TheCondState.Ignore != StartingCondState.Ignore)
1401 printError(getTok().getLoc(), "unmatched .ifs or .elses");
1402 // Check to see there are no empty DwarfFile slots.
1403 const auto &LineTables = getContext().getMCDwarfLineTables();
1404 if (!LineTables.empty()) {
1405 unsigned Index = 0;
1406 for (const auto &File : LineTables.begin()->second.getMCDwarfFiles()) {
1407 if (File.Name.empty() && Index != 0)
1408 printError(getTok().getLoc(), "unassigned file number: " +
1409 Twine(Index) +
1410 " for .file directives");
1411 ++Index;
1412 }
1413 }
1414
1415 // Check to see that all assembler local symbols were actually defined.
1416 // Targets that don't do subsections via symbols may not want this, though,
1417 // so conservatively exclude them. Only do this if we're finalizing, though,
1418 // as otherwise we won't necessarily have seen everything yet.
1419 if (!NoFinalize) {
1420 if (MAI.hasSubsectionsViaSymbols()) {
1421 for (const auto &TableEntry : getContext().getSymbols()) {
1422 MCSymbol *Sym = TableEntry.getValue().Symbol;
1423 // Variable symbols may not be marked as defined, so check those
1424 // explicitly. If we know it's a variable, we have a definition for
1425 // the purposes of this check.
1426 if (Sym && Sym->isTemporary() && !Sym->isVariable() &&
1427 !Sym->isDefined())
1428 // FIXME: We would really like to refer back to where the symbol was
1429 // first referenced for a source location. We need to add something
1430 // to track that. Currently, we just point to the end of the file.
1431 printError(getTok().getLoc(), "assembler local symbol '" +
1432 Sym->getName() + "' not defined");
1433 }
1434 }
1435
1436 // Temporary symbols like the ones for directional jumps don't go in the
1437 // symbol table. They also need to be diagnosed in all (final) cases.
1438 for (std::tuple<SMLoc, CppHashInfoTy, MCSymbol *> &LocSym : DirLabels) {
1439 if (std::get<2>(LocSym)->isUndefined()) {
1440 // Reset the state of any "# line file" directives we've seen to the
1441 // context as it was at the diagnostic site.
1442 CppHashInfo = std::get<1>(LocSym);
1443 printError(std::get<0>(LocSym), "directional label undefined");
1444 }
1445 }
1446 }
1447
1448 // Finalize the output stream if there are no errors and if the client wants
1449 // us to.
1450 if (!HadError && !NoFinalize)
1451 Out.finish(Lexer.getLoc());
1452
1453 return HadError || getContext().hadError();
1454}
1455
1456bool MasmParser::checkForValidSection() {
1457 if (!ParsingMSInlineAsm && !getStreamer().getCurrentSectionOnly()) {
1458 Out.initSections(false, getTargetParser().getSTI());
1459 return Error(getTok().getLoc(),
1460 "expected section directive before assembly directive");
1461 }
1462 return false;
1463}
1464
1465/// Throw away the rest of the line for testing purposes.
1466void MasmParser::eatToEndOfStatement() {
1467 while (Lexer.isNot(AsmToken::EndOfStatement)) {
1468 if (Lexer.is(AsmToken::Eof)) {
1469 SMLoc ParentIncludeLoc = SrcMgr.getParentIncludeLoc(CurBuffer);
1470 if (ParentIncludeLoc == SMLoc()) {
1471 break;
1472 }
1473
1474 EndStatementAtEOFStack.pop_back();
1475 jumpToLoc(ParentIncludeLoc, 0, EndStatementAtEOFStack.back());
1476 }
1477
1478 Lexer.Lex();
1479 }
1480
1481 // Eat EOL.
1482 if (Lexer.is(AsmToken::EndOfStatement))
1483 Lexer.Lex();
1484}
1485
1487MasmParser::parseStringRefsTo(AsmToken::TokenKind EndTok) {
1489 const char *Start = getTok().getLoc().getPointer();
1490 while (Lexer.isNot(EndTok)) {
1491 if (Lexer.is(AsmToken::Eof)) {
1492 SMLoc ParentIncludeLoc = SrcMgr.getParentIncludeLoc(CurBuffer);
1493 if (ParentIncludeLoc == SMLoc()) {
1494 break;
1495 }
1496 Refs.emplace_back(Start, getTok().getLoc().getPointer() - Start);
1497
1498 EndStatementAtEOFStack.pop_back();
1499 jumpToLoc(ParentIncludeLoc, 0, EndStatementAtEOFStack.back());
1500 Lexer.Lex();
1501 Start = getTok().getLoc().getPointer();
1502 } else {
1503 Lexer.Lex();
1504 }
1505 }
1506 Refs.emplace_back(Start, getTok().getLoc().getPointer() - Start);
1507 return Refs;
1508}
1509
1510std::string MasmParser::parseStringTo(AsmToken::TokenKind EndTok) {
1511 SmallVector<StringRef, 1> Refs = parseStringRefsTo(EndTok);
1512 std::string Str;
1513 for (StringRef S : Refs) {
1514 Str.append(S.str());
1515 }
1516 return Str;
1517}
1518
1519StringRef MasmParser::parseStringToEndOfStatement() {
1520 const char *Start = getTok().getLoc().getPointer();
1521
1522 while (Lexer.isNot(AsmToken::EndOfStatement) && Lexer.isNot(AsmToken::Eof))
1523 Lexer.Lex();
1524
1525 const char *End = getTok().getLoc().getPointer();
1526 return StringRef(Start, End - Start);
1527}
1528
1529/// Parse a paren expression and return it.
1530/// NOTE: This assumes the leading '(' has already been consumed.
1531///
1532/// parenexpr ::= expr)
1533///
1534bool MasmParser::parseParenExpr(const MCExpr *&Res, SMLoc &EndLoc) {
1535 if (parseExpression(Res))
1536 return true;
1537 EndLoc = Lexer.getTok().getEndLoc();
1538 return parseRParen();
1539}
1540
1541/// Parse a bracket expression and return it.
1542/// NOTE: This assumes the leading '[' has already been consumed.
1543///
1544/// bracketexpr ::= expr]
1545///
1546bool MasmParser::parseBracketExpr(const MCExpr *&Res, SMLoc &EndLoc) {
1547 if (parseExpression(Res))
1548 return true;
1549 EndLoc = getTok().getEndLoc();
1550 if (parseToken(AsmToken::RBrac, "expected ']' in brackets expression"))
1551 return true;
1552 return false;
1553}
1554
1555/// Parse a primary expression and return it.
1556/// primaryexpr ::= (parenexpr
1557/// primaryexpr ::= symbol
1558/// primaryexpr ::= number
1559/// primaryexpr ::= '.'
1560/// primaryexpr ::= ~,+,-,'not' primaryexpr
1561/// primaryexpr ::= string
1562/// (a string is interpreted as a 64-bit number in big-endian base-256)
1563bool MasmParser::parsePrimaryExpr(const MCExpr *&Res, SMLoc &EndLoc,
1564 AsmTypeInfo *TypeInfo) {
1565 SMLoc FirstTokenLoc = getLexer().getLoc();
1566 AsmToken::TokenKind FirstTokenKind = Lexer.getKind();
1567 switch (FirstTokenKind) {
1568 default:
1569 return TokError("unknown token in expression");
1570 // If we have an error assume that we've already handled it.
1571 case AsmToken::Error:
1572 return true;
1573 case AsmToken::Exclaim:
1574 Lex(); // Eat the operator.
1575 if (parsePrimaryExpr(Res, EndLoc, nullptr))
1576 return true;
1577 Res = MCUnaryExpr::createLNot(Res, getContext(), FirstTokenLoc);
1578 return false;
1579 case AsmToken::Dollar:
1580 case AsmToken::At:
1581 case AsmToken::Identifier: {
1583 if (parseIdentifier(Identifier)) {
1584 // We may have failed but $ may be a valid token.
1585 if (getTok().is(AsmToken::Dollar)) {
1586 if (Lexer.getMAI().getDollarIsPC()) {
1587 Lex();
1588 // This is a '$' reference, which references the current PC. Emit a
1589 // temporary label to the streamer and refer to it.
1590 MCSymbol *Sym = Ctx.createTempSymbol();
1591 Out.emitLabel(Sym);
1593 getContext());
1594 EndLoc = FirstTokenLoc;
1595 return false;
1596 }
1597 return Error(FirstTokenLoc, "invalid token in expression");
1598 }
1599 }
1600 // Parse named bitwise negation.
1601 if (Identifier.equals_insensitive("not")) {
1602 if (parsePrimaryExpr(Res, EndLoc, nullptr))
1603 return true;
1604 Res = MCUnaryExpr::createNot(Res, getContext(), FirstTokenLoc);
1605 return false;
1606 }
1607 // Parse directional local label references.
1608 if (Identifier.equals_insensitive("@b") ||
1609 Identifier.equals_insensitive("@f")) {
1610 bool Before = Identifier.equals_insensitive("@b");
1611 MCSymbol *Sym = getContext().getDirectionalLocalSymbol(0, Before);
1612 if (Before && Sym->isUndefined())
1613 return Error(FirstTokenLoc, "Expected @@ label before @B reference");
1614 Res = MCSymbolRefExpr::create(Sym, getContext());
1615 return false;
1616 }
1617 // Parse symbol variant.
1618 std::pair<StringRef, StringRef> Split;
1619 if (!MAI.useParensForSymbolVariant()) {
1620 Split = Identifier.split('@');
1621 } else if (Lexer.is(AsmToken::LParen)) {
1622 Lex(); // eat '('.
1623 StringRef VName;
1624 parseIdentifier(VName);
1625 // eat ')'.
1626 if (parseToken(AsmToken::RParen,
1627 "unexpected token in variant, expected ')'"))
1628 return true;
1629 Split = std::make_pair(Identifier, VName);
1630 }
1631
1632 EndLoc = SMLoc::getFromPointer(Identifier.end());
1633
1634 // This is a symbol reference.
1636 if (SymbolName.empty())
1637 return Error(getLexer().getLoc(), "expected a symbol reference");
1638
1640
1641 // Look up the symbol variant if used.
1642 if (!Split.second.empty()) {
1644 if (Variant != MCSymbolRefExpr::VK_Invalid) {
1645 SymbolName = Split.first;
1646 } else if (MAI.doesAllowAtInName() && !MAI.useParensForSymbolVariant()) {
1648 } else {
1649 return Error(SMLoc::getFromPointer(Split.second.begin()),
1650 "invalid variant '" + Split.second + "'");
1651 }
1652 }
1653
1654 // Find the field offset if used.
1656 Split = SymbolName.split('.');
1657 if (Split.second.empty()) {
1658 } else {
1659 SymbolName = Split.first;
1660 if (lookUpField(SymbolName, Split.second, Info)) {
1661 std::pair<StringRef, StringRef> BaseMember = Split.second.split('.');
1662 StringRef Base = BaseMember.first, Member = BaseMember.second;
1663 lookUpField(Base, Member, Info);
1664 } else if (Structs.count(SymbolName.lower())) {
1665 // This is actually a reference to a field offset.
1666 Res = MCConstantExpr::create(Info.Offset, getContext());
1667 return false;
1668 }
1669 }
1670
1671 MCSymbol *Sym = getContext().getInlineAsmLabel(SymbolName);
1672 if (!Sym) {
1673 // If this is a built-in numeric value, treat it as a constant.
1674 auto BuiltinIt = BuiltinSymbolMap.find(SymbolName.lower());
1675 const BuiltinSymbol Symbol = (BuiltinIt == BuiltinSymbolMap.end())
1676 ? BI_NO_SYMBOL
1677 : BuiltinIt->getValue();
1678 if (Symbol != BI_NO_SYMBOL) {
1679 const MCExpr *Value = evaluateBuiltinValue(Symbol, FirstTokenLoc);
1680 if (Value) {
1681 Res = Value;
1682 return false;
1683 }
1684 }
1685
1686 // Variables use case-insensitive symbol names; if this is a variable, we
1687 // find the symbol using its canonical name.
1688 auto VarIt = Variables.find(SymbolName.lower());
1689 if (VarIt != Variables.end())
1690 SymbolName = VarIt->second.Name;
1691 Sym = getContext().getOrCreateSymbol(SymbolName);
1692 }
1693
1694 // If this is an absolute variable reference, substitute it now to preserve
1695 // semantics in the face of reassignment.
1696 if (Sym->isVariable()) {
1697 auto V = Sym->getVariableValue(/*SetUsed=*/false);
1698 bool DoInline = isa<MCConstantExpr>(V) && !Variant;
1699 if (auto TV = dyn_cast<MCTargetExpr>(V))
1700 DoInline = TV->inlineAssignedExpr();
1701 if (DoInline) {
1702 if (Variant)
1703 return Error(EndLoc, "unexpected modifier on variable reference");
1704 Res = Sym->getVariableValue(/*SetUsed=*/false);
1705 return false;
1706 }
1707 }
1708
1709 // Otherwise create a symbol ref.
1710 const MCExpr *SymRef =
1711 MCSymbolRefExpr::create(Sym, Variant, getContext(), FirstTokenLoc);
1712 if (Info.Offset) {
1714 MCBinaryExpr::Add, SymRef,
1715 MCConstantExpr::create(Info.Offset, getContext()), getContext());
1716 } else {
1717 Res = SymRef;
1718 }
1719 if (TypeInfo) {
1720 if (Info.Type.Name.empty()) {
1721 auto TypeIt = KnownType.find(Identifier.lower());
1722 if (TypeIt != KnownType.end()) {
1723 Info.Type = TypeIt->second;
1724 }
1725 }
1726
1727 *TypeInfo = Info.Type;
1728 }
1729 return false;
1730 }
1731 case AsmToken::BigNum:
1732 return TokError("literal value out of range for directive");
1733 case AsmToken::Integer: {
1734 int64_t IntVal = getTok().getIntVal();
1735 Res = MCConstantExpr::create(IntVal, getContext());
1736 EndLoc = Lexer.getTok().getEndLoc();
1737 Lex(); // Eat token.
1738 return false;
1739 }
1740 case AsmToken::String: {
1741 // MASM strings (used as constants) are interpreted as big-endian base-256.
1742 SMLoc ValueLoc = getTok().getLoc();
1743 std::string Value;
1744 if (parseEscapedString(Value))
1745 return true;
1746 if (Value.size() > 8)
1747 return Error(ValueLoc, "literal value out of range");
1748 uint64_t IntValue = 0;
1749 for (const unsigned char CharVal : Value)
1750 IntValue = (IntValue << 8) | CharVal;
1751 Res = MCConstantExpr::create(IntValue, getContext());
1752 return false;
1753 }
1754 case AsmToken::Real: {
1755 APFloat RealVal(APFloat::IEEEdouble(), getTok().getString());
1756 uint64_t IntVal = RealVal.bitcastToAPInt().getZExtValue();
1757 Res = MCConstantExpr::create(IntVal, getContext());
1758 EndLoc = Lexer.getTok().getEndLoc();
1759 Lex(); // Eat token.
1760 return false;
1761 }
1762 case AsmToken::Dot: {
1763 // This is a '.' reference, which references the current PC. Emit a
1764 // temporary label to the streamer and refer to it.
1765 MCSymbol *Sym = Ctx.createTempSymbol();
1766 Out.emitLabel(Sym);
1768 EndLoc = Lexer.getTok().getEndLoc();
1769 Lex(); // Eat identifier.
1770 return false;
1771 }
1772 case AsmToken::LParen:
1773 Lex(); // Eat the '('.
1774 return parseParenExpr(Res, EndLoc);
1775 case AsmToken::LBrac:
1776 if (!PlatformParser->HasBracketExpressions())
1777 return TokError("brackets expression not supported on this target");
1778 Lex(); // Eat the '['.
1779 return parseBracketExpr(Res, EndLoc);
1780 case AsmToken::Minus:
1781 Lex(); // Eat the operator.
1782 if (parsePrimaryExpr(Res, EndLoc, nullptr))
1783 return true;
1784 Res = MCUnaryExpr::createMinus(Res, getContext(), FirstTokenLoc);
1785 return false;
1786 case AsmToken::Plus:
1787 Lex(); // Eat the operator.
1788 if (parsePrimaryExpr(Res, EndLoc, nullptr))
1789 return true;
1790 Res = MCUnaryExpr::createPlus(Res, getContext(), FirstTokenLoc);
1791 return false;
1792 case AsmToken::Tilde:
1793 Lex(); // Eat the operator.
1794 if (parsePrimaryExpr(Res, EndLoc, nullptr))
1795 return true;
1796 Res = MCUnaryExpr::createNot(Res, getContext(), FirstTokenLoc);
1797 return false;
1798 // MIPS unary expression operators. The lexer won't generate these tokens if
1799 // MCAsmInfo::HasMipsExpressions is false for the target.
1824 Lex(); // Eat the operator.
1825 if (Lexer.isNot(AsmToken::LParen))
1826 return TokError("expected '(' after operator");
1827 Lex(); // Eat the operator.
1828 if (parseExpression(Res, EndLoc))
1829 return true;
1830 if (parseRParen())
1831 return true;
1832 Res = getTargetParser().createTargetUnaryExpr(Res, FirstTokenKind, Ctx);
1833 return !Res;
1834 }
1835}
1836
1837bool MasmParser::parseExpression(const MCExpr *&Res) {
1838 SMLoc EndLoc;
1839 return parseExpression(Res, EndLoc);
1840}
1841
1842/// This function checks if the next token is <string> type or arithmetic.
1843/// string that begin with character '<' must end with character '>'.
1844/// otherwise it is arithmetics.
1845/// If the function returns a 'true' value,
1846/// the End argument will be filled with the last location pointed to the '>'
1847/// character.
1848static bool isAngleBracketString(SMLoc &StrLoc, SMLoc &EndLoc) {
1849 assert((StrLoc.getPointer() != nullptr) &&
1850 "Argument to the function cannot be a NULL value");
1851 const char *CharPtr = StrLoc.getPointer();
1852 while ((*CharPtr != '>') && (*CharPtr != '\n') && (*CharPtr != '\r') &&
1853 (*CharPtr != '\0')) {
1854 if (*CharPtr == '!')
1855 CharPtr++;
1856 CharPtr++;
1857 }
1858 if (*CharPtr == '>') {
1859 EndLoc = StrLoc.getFromPointer(CharPtr + 1);
1860 return true;
1861 }
1862 return false;
1863}
1864
1865/// creating a string without the escape characters '!'.
1866static std::string angleBracketString(StringRef BracketContents) {
1867 std::string Res;
1868 for (size_t Pos = 0; Pos < BracketContents.size(); Pos++) {
1869 if (BracketContents[Pos] == '!')
1870 Pos++;
1871 Res += BracketContents[Pos];
1872 }
1873 return Res;
1874}
1875
1876/// Parse an expression and return it.
1877///
1878/// expr ::= expr &&,|| expr -> lowest.
1879/// expr ::= expr |,^,&,! expr
1880/// expr ::= expr ==,!=,<>,<,<=,>,>= expr
1881/// expr ::= expr <<,>> expr
1882/// expr ::= expr +,- expr
1883/// expr ::= expr *,/,% expr -> highest.
1884/// expr ::= primaryexpr
1885///
1886bool MasmParser::parseExpression(const MCExpr *&Res, SMLoc &EndLoc) {
1887 // Parse the expression.
1888 Res = nullptr;
1889 if (getTargetParser().parsePrimaryExpr(Res, EndLoc) ||
1890 parseBinOpRHS(1, Res, EndLoc))
1891 return true;
1892
1893 // Try to constant fold it up front, if possible. Do not exploit
1894 // assembler here.
1895 int64_t Value;
1896 if (Res->evaluateAsAbsolute(Value))
1897 Res = MCConstantExpr::create(Value, getContext());
1898
1899 return false;
1900}
1901
1902bool MasmParser::parseParenExpression(const MCExpr *&Res, SMLoc &EndLoc) {
1903 Res = nullptr;
1904 return parseParenExpr(Res, EndLoc) || parseBinOpRHS(1, Res, EndLoc);
1905}
1906
1907bool MasmParser::parseParenExprOfDepth(unsigned ParenDepth, const MCExpr *&Res,
1908 SMLoc &EndLoc) {
1909 if (parseParenExpr(Res, EndLoc))
1910 return true;
1911
1912 for (; ParenDepth > 0; --ParenDepth) {
1913 if (parseBinOpRHS(1, Res, EndLoc))
1914 return true;
1915
1916 // We don't Lex() the last RParen.
1917 // This is the same behavior as parseParenExpression().
1918 if (ParenDepth - 1 > 0) {
1919 EndLoc = getTok().getEndLoc();
1920 if (parseRParen())
1921 return true;
1922 }
1923 }
1924 return false;
1925}
1926
1927bool MasmParser::parseAbsoluteExpression(int64_t &Res) {
1928 const MCExpr *Expr;
1929
1930 SMLoc StartLoc = Lexer.getLoc();
1931 if (parseExpression(Expr))
1932 return true;
1933
1934 if (!Expr->evaluateAsAbsolute(Res, getStreamer().getAssemblerPtr()))
1935 return Error(StartLoc, "expected absolute expression");
1936
1937 return false;
1938}
1939
1942 bool ShouldUseLogicalShr,
1943 bool EndExpressionAtGreater) {
1944 switch (K) {
1945 default:
1946 return 0; // not a binop.
1947
1948 // Lowest Precedence: &&, ||
1949 case AsmToken::AmpAmp:
1950 Kind = MCBinaryExpr::LAnd;
1951 return 2;
1952 case AsmToken::PipePipe:
1953 Kind = MCBinaryExpr::LOr;
1954 return 1;
1955
1956 // Low Precedence: ==, !=, <>, <, <=, >, >=
1958 Kind = MCBinaryExpr::EQ;
1959 return 3;
1962 Kind = MCBinaryExpr::NE;
1963 return 3;
1964 case AsmToken::Less:
1965 Kind = MCBinaryExpr::LT;
1966 return 3;
1968 Kind = MCBinaryExpr::LTE;
1969 return 3;
1970 case AsmToken::Greater:
1971 if (EndExpressionAtGreater)
1972 return 0;
1973 Kind = MCBinaryExpr::GT;
1974 return 3;
1976 Kind = MCBinaryExpr::GTE;
1977 return 3;
1978
1979 // Low Intermediate Precedence: +, -
1980 case AsmToken::Plus:
1981 Kind = MCBinaryExpr::Add;
1982 return 4;
1983 case AsmToken::Minus:
1984 Kind = MCBinaryExpr::Sub;
1985 return 4;
1986
1987 // High Intermediate Precedence: |, &, ^
1988 case AsmToken::Pipe:
1989 Kind = MCBinaryExpr::Or;
1990 return 5;
1991 case AsmToken::Caret:
1992 Kind = MCBinaryExpr::Xor;
1993 return 5;
1994 case AsmToken::Amp:
1995 Kind = MCBinaryExpr::And;
1996 return 5;
1997
1998 // Highest Precedence: *, /, %, <<, >>
1999 case AsmToken::Star:
2000 Kind = MCBinaryExpr::Mul;
2001 return 6;
2002 case AsmToken::Slash:
2003 Kind = MCBinaryExpr::Div;
2004 return 6;
2005 case AsmToken::Percent:
2006 Kind = MCBinaryExpr::Mod;
2007 return 6;
2008 case AsmToken::LessLess:
2009 Kind = MCBinaryExpr::Shl;
2010 return 6;
2012 if (EndExpressionAtGreater)
2013 return 0;
2014 Kind = ShouldUseLogicalShr ? MCBinaryExpr::LShr : MCBinaryExpr::AShr;
2015 return 6;
2016 }
2017}
2018
2019unsigned MasmParser::getBinOpPrecedence(AsmToken::TokenKind K,
2020 MCBinaryExpr::Opcode &Kind) {
2021 bool ShouldUseLogicalShr = MAI.shouldUseLogicalShr();
2022 return getGNUBinOpPrecedence(K, Kind, ShouldUseLogicalShr,
2023 AngleBracketDepth > 0);
2024}
2025
2026/// Parse all binary operators with precedence >= 'Precedence'.
2027/// Res contains the LHS of the expression on input.
2028bool MasmParser::parseBinOpRHS(unsigned Precedence, const MCExpr *&Res,
2029 SMLoc &EndLoc) {
2030 SMLoc StartLoc = Lexer.getLoc();
2031 while (true) {
2033 if (Lexer.getKind() == AsmToken::Identifier) {
2035 .CaseLower("and", AsmToken::Amp)
2038 .CaseLower("xor", AsmToken::Caret)
2047 .Default(TokKind);
2048 }
2050 unsigned TokPrec = getBinOpPrecedence(TokKind, Kind);
2051
2052 // If the next token is lower precedence than we are allowed to eat, return
2053 // successfully with what we ate already.
2054 if (TokPrec < Precedence)
2055 return false;
2056
2057 Lex();
2058
2059 // Eat the next primary expression.
2060 const MCExpr *RHS;
2061 if (getTargetParser().parsePrimaryExpr(RHS, EndLoc))
2062 return true;
2063
2064 // If BinOp binds less tightly with RHS than the operator after RHS, let
2065 // the pending operator take RHS as its LHS.
2067 unsigned NextTokPrec = getBinOpPrecedence(Lexer.getKind(), Dummy);
2068 if (TokPrec < NextTokPrec && parseBinOpRHS(TokPrec + 1, RHS, EndLoc))
2069 return true;
2070
2071 // Merge LHS and RHS according to operator.
2072 Res = MCBinaryExpr::create(Kind, Res, RHS, getContext(), StartLoc);
2073 }
2074}
2075
2076/// ParseStatement:
2077/// ::= % statement
2078/// ::= EndOfStatement
2079/// ::= Label* Directive ...Operands... EndOfStatement
2080/// ::= Label* Identifier OperandList* EndOfStatement
2081bool MasmParser::parseStatement(ParseStatementInfo &Info,
2083 assert(!hasPendingError() && "parseStatement started with pending error");
2084 // Eat initial spaces and comments.
2085 while (Lexer.is(AsmToken::Space))
2086 Lex();
2087 if (Lexer.is(AsmToken::EndOfStatement)) {
2088 // If this is a line comment we can drop it safely.
2089 if (getTok().getString().empty() || getTok().getString().front() == '\r' ||
2090 getTok().getString().front() == '\n')
2091 Out.addBlankLine();
2092 Lex();
2093 return false;
2094 }
2095
2096 // If preceded by an expansion operator, first expand all text macros and
2097 // macro functions.
2098 if (getTok().is(AsmToken::Percent)) {
2099 SMLoc ExpansionLoc = getTok().getLoc();
2100 if (parseToken(AsmToken::Percent) || expandStatement(ExpansionLoc))
2101 return true;
2102 }
2103
2104 // Statements always start with an identifier, unless we're dealing with a
2105 // processor directive (.386, .686, etc.) that lexes as a real.
2106 AsmToken ID = getTok();
2107 SMLoc IDLoc = ID.getLoc();
2108 StringRef IDVal;
2109 if (Lexer.is(AsmToken::HashDirective))
2110 return parseCppHashLineFilenameComment(IDLoc);
2111 if (Lexer.is(AsmToken::Dot)) {
2112 // Treat '.' as a valid identifier in this context.
2113 Lex();
2114 IDVal = ".";
2115 } else if (Lexer.is(AsmToken::Real)) {
2116 // Treat ".<number>" as a valid identifier in this context.
2117 IDVal = getTok().getString();
2118 Lex(); // always eat a token
2119 if (!IDVal.starts_with("."))
2120 return Error(IDLoc, "unexpected token at start of statement");
2121 } else if (parseIdentifier(IDVal, StartOfStatement)) {
2122 if (!TheCondState.Ignore) {
2123 Lex(); // always eat a token
2124 return Error(IDLoc, "unexpected token at start of statement");
2125 }
2126 IDVal = "";
2127 }
2128
2129 // Handle conditional assembly here before checking for skipping. We
2130 // have to do this so that .endif isn't skipped in a ".if 0" block for
2131 // example.
2133 DirectiveKindMap.find(IDVal.lower());
2134 DirectiveKind DirKind = (DirKindIt == DirectiveKindMap.end())
2135 ? DK_NO_DIRECTIVE
2136 : DirKindIt->getValue();
2137 switch (DirKind) {
2138 default:
2139 break;
2140 case DK_IF:
2141 case DK_IFE:
2142 return parseDirectiveIf(IDLoc, DirKind);
2143 case DK_IFB:
2144 return parseDirectiveIfb(IDLoc, true);
2145 case DK_IFNB:
2146 return parseDirectiveIfb(IDLoc, false);
2147 case DK_IFDEF:
2148 return parseDirectiveIfdef(IDLoc, true);
2149 case DK_IFNDEF:
2150 return parseDirectiveIfdef(IDLoc, false);
2151 case DK_IFDIF:
2152 return parseDirectiveIfidn(IDLoc, /*ExpectEqual=*/false,
2153 /*CaseInsensitive=*/false);
2154 case DK_IFDIFI:
2155 return parseDirectiveIfidn(IDLoc, /*ExpectEqual=*/false,
2156 /*CaseInsensitive=*/true);
2157 case DK_IFIDN:
2158 return parseDirectiveIfidn(IDLoc, /*ExpectEqual=*/true,
2159 /*CaseInsensitive=*/false);
2160 case DK_IFIDNI:
2161 return parseDirectiveIfidn(IDLoc, /*ExpectEqual=*/true,
2162 /*CaseInsensitive=*/true);
2163 case DK_ELSEIF:
2164 case DK_ELSEIFE:
2165 return parseDirectiveElseIf(IDLoc, DirKind);
2166 case DK_ELSEIFB:
2167 return parseDirectiveElseIfb(IDLoc, true);
2168 case DK_ELSEIFNB:
2169 return parseDirectiveElseIfb(IDLoc, false);
2170 case DK_ELSEIFDEF:
2171 return parseDirectiveElseIfdef(IDLoc, true);
2172 case DK_ELSEIFNDEF:
2173 return parseDirectiveElseIfdef(IDLoc, false);
2174 case DK_ELSEIFDIF:
2175 return parseDirectiveElseIfidn(IDLoc, /*ExpectEqual=*/false,
2176 /*CaseInsensitive=*/false);
2177 case DK_ELSEIFDIFI:
2178 return parseDirectiveElseIfidn(IDLoc, /*ExpectEqual=*/false,
2179 /*CaseInsensitive=*/true);
2180 case DK_ELSEIFIDN:
2181 return parseDirectiveElseIfidn(IDLoc, /*ExpectEqual=*/true,
2182 /*CaseInsensitive=*/false);
2183 case DK_ELSEIFIDNI:
2184 return parseDirectiveElseIfidn(IDLoc, /*ExpectEqual=*/true,
2185 /*CaseInsensitive=*/true);
2186 case DK_ELSE:
2187 return parseDirectiveElse(IDLoc);
2188 case DK_ENDIF:
2189 return parseDirectiveEndIf(IDLoc);
2190 }
2191
2192 // Ignore the statement if in the middle of inactive conditional
2193 // (e.g. ".if 0").
2194 if (TheCondState.Ignore) {
2195 eatToEndOfStatement();
2196 return false;
2197 }
2198
2199 // FIXME: Recurse on local labels?
2200
2201 // Check for a label.
2202 // ::= identifier ':'
2203 // ::= number ':'
2204 if (Lexer.is(AsmToken::Colon) && getTargetParser().isLabel(ID)) {
2205 if (checkForValidSection())
2206 return true;
2207
2208 // identifier ':' -> Label.
2209 Lex();
2210
2211 // Diagnose attempt to use '.' as a label.
2212 if (IDVal == ".")
2213 return Error(IDLoc, "invalid use of pseudo-symbol '.' as a label");
2214
2215 // Diagnose attempt to use a variable as a label.
2216 //
2217 // FIXME: Diagnostics. Note the location of the definition as a label.
2218 // FIXME: This doesn't diagnose assignment to a symbol which has been
2219 // implicitly marked as external.
2220 MCSymbol *Sym;
2221 if (ParsingMSInlineAsm && SI) {
2222 StringRef RewrittenLabel =
2223 SI->LookupInlineAsmLabel(IDVal, getSourceManager(), IDLoc, true);
2224 assert(!RewrittenLabel.empty() &&
2225 "We should have an internal name here.");
2226 Info.AsmRewrites->emplace_back(AOK_Label, IDLoc, IDVal.size(),
2227 RewrittenLabel);
2228 IDVal = RewrittenLabel;
2229 }
2230 // Handle directional local labels
2231 if (IDVal == "@@") {
2233 } else {
2234 Sym = getContext().getOrCreateSymbol(IDVal);
2235 }
2236
2237 // End of Labels should be treated as end of line for lexing
2238 // purposes but that information is not available to the Lexer who
2239 // does not understand Labels. This may cause us to see a Hash
2240 // here instead of a preprocessor line comment.
2241 if (getTok().is(AsmToken::Hash)) {
2242 std::string CommentStr = parseStringTo(AsmToken::EndOfStatement);
2243 Lexer.Lex();
2244 Lexer.UnLex(AsmToken(AsmToken::EndOfStatement, CommentStr));
2245 }
2246
2247 // Consume any end of statement token, if present, to avoid spurious
2248 // addBlankLine calls().
2249 if (getTok().is(AsmToken::EndOfStatement)) {
2250 Lex();
2251 }
2252
2253 getTargetParser().doBeforeLabelEmit(Sym, IDLoc);
2254
2255 // Emit the label.
2256 if (!getTargetParser().isParsingMSInlineAsm())
2257 Out.emitLabel(Sym, IDLoc);
2258
2259 // If we are generating dwarf for assembly source files then gather the
2260 // info to make a dwarf label entry for this label if needed.
2261 if (enabledGenDwarfForAssembly())
2262 MCGenDwarfLabelEntry::Make(Sym, &getStreamer(), getSourceManager(),
2263 IDLoc);
2264
2265 getTargetParser().onLabelParsed(Sym);
2266
2267 return false;
2268 }
2269
2270 // If macros are enabled, check to see if this is a macro instantiation.
2271 if (const MCAsmMacro *M = getContext().lookupMacro(IDVal.lower())) {
2272 return handleMacroEntry(M, IDLoc);
2273 }
2274
2275 // Otherwise, we have a normal instruction or directive.
2276
2277 if (DirKind != DK_NO_DIRECTIVE) {
2278 // There are several entities interested in parsing directives:
2279 //
2280 // 1. Asm parser extensions. For example, platform-specific parsers
2281 // (like the ELF parser) register themselves as extensions.
2282 // 2. The target-specific assembly parser. Some directives are target
2283 // specific or may potentially behave differently on certain targets.
2284 // 3. The generic directive parser implemented by this class. These are
2285 // all the directives that behave in a target and platform independent
2286 // manner, or at least have a default behavior that's shared between
2287 // all targets and platforms.
2288
2289 getTargetParser().flushPendingInstructions(getStreamer());
2290
2291 // Special-case handling of structure-end directives at higher priority,
2292 // since ENDS is overloaded as a segment-end directive.
2293 if (IDVal.equals_insensitive("ends") && StructInProgress.size() > 1 &&
2294 getTok().is(AsmToken::EndOfStatement)) {
2295 return parseDirectiveNestedEnds();
2296 }
2297
2298 // First, check the extension directive map to see if any extension has
2299 // registered itself to parse this directive.
2300 std::pair<MCAsmParserExtension *, DirectiveHandler> Handler =
2301 ExtensionDirectiveMap.lookup(IDVal.lower());
2302 if (Handler.first)
2303 return (*Handler.second)(Handler.first, IDVal, IDLoc);
2304
2305 // Next, let the target-specific assembly parser try.
2306 if (ID.isNot(AsmToken::Identifier))
2307 return false;
2308
2309 ParseStatus TPDirectiveReturn = getTargetParser().parseDirective(ID);
2310 assert(TPDirectiveReturn.isFailure() == hasPendingError() &&
2311 "Should only return Failure iff there was an error");
2312 if (TPDirectiveReturn.isFailure())
2313 return true;
2314 if (TPDirectiveReturn.isSuccess())
2315 return false;
2316
2317 // Finally, if no one else is interested in this directive, it must be
2318 // generic and familiar to this class.
2319 switch (DirKind) {
2320 default:
2321 break;
2322 case DK_ASCII:
2323 return parseDirectiveAscii(IDVal, false);
2324 case DK_ASCIZ:
2325 case DK_STRING:
2326 return parseDirectiveAscii(IDVal, true);
2327 case DK_BYTE:
2328 case DK_SBYTE:
2329 case DK_DB:
2330 return parseDirectiveValue(IDVal, 1);
2331 case DK_WORD:
2332 case DK_SWORD:
2333 case DK_DW:
2334 return parseDirectiveValue(IDVal, 2);
2335 case DK_DWORD:
2336 case DK_SDWORD:
2337 case DK_DD:
2338 return parseDirectiveValue(IDVal, 4);
2339 case DK_FWORD:
2340 case DK_DF:
2341 return parseDirectiveValue(IDVal, 6);
2342 case DK_QWORD:
2343 case DK_SQWORD:
2344 case DK_DQ:
2345 return parseDirectiveValue(IDVal, 8);
2346 case DK_REAL4:
2347 return parseDirectiveRealValue(IDVal, APFloat::IEEEsingle(), 4);
2348 case DK_REAL8:
2349 return parseDirectiveRealValue(IDVal, APFloat::IEEEdouble(), 8);
2350 case DK_REAL10:
2351 return parseDirectiveRealValue(IDVal, APFloat::x87DoubleExtended(), 10);
2352 case DK_STRUCT:
2353 case DK_UNION:
2354 return parseDirectiveNestedStruct(IDVal, DirKind);
2355 case DK_ENDS:
2356 return parseDirectiveNestedEnds();
2357 case DK_ALIGN:
2358 return parseDirectiveAlign();
2359 case DK_EVEN:
2360 return parseDirectiveEven();
2361 case DK_ORG:
2362 return parseDirectiveOrg();
2363 case DK_EXTERN:
2364 return parseDirectiveExtern();
2365 case DK_PUBLIC:
2366 return parseDirectiveSymbolAttribute(MCSA_Global);
2367 case DK_COMM:
2368 return parseDirectiveComm(/*IsLocal=*/false);
2369 case DK_COMMENT:
2370 return parseDirectiveComment(IDLoc);
2371 case DK_INCLUDE:
2372 return parseDirectiveInclude();
2373 case DK_REPEAT:
2374 return parseDirectiveRepeat(IDLoc, IDVal);
2375 case DK_WHILE:
2376 return parseDirectiveWhile(IDLoc);
2377 case DK_FOR:
2378 return parseDirectiveFor(IDLoc, IDVal);
2379 case DK_FORC:
2380 return parseDirectiveForc(IDLoc, IDVal);
2381 case DK_FILE:
2382 return parseDirectiveFile(IDLoc);
2383 case DK_LINE:
2384 return parseDirectiveLine();
2385 case DK_LOC:
2386 return parseDirectiveLoc();
2387 case DK_STABS:
2388 return parseDirectiveStabs();
2389 case DK_CV_FILE:
2390 return parseDirectiveCVFile();
2391 case DK_CV_FUNC_ID:
2392 return parseDirectiveCVFuncId();
2393 case DK_CV_INLINE_SITE_ID:
2394 return parseDirectiveCVInlineSiteId();
2395 case DK_CV_LOC:
2396 return parseDirectiveCVLoc();
2397 case DK_CV_LINETABLE:
2398 return parseDirectiveCVLinetable();
2399 case DK_CV_INLINE_LINETABLE:
2400 return parseDirectiveCVInlineLinetable();
2401 case DK_CV_DEF_RANGE:
2402 return parseDirectiveCVDefRange();
2403 case DK_CV_STRING:
2404 return parseDirectiveCVString();
2405 case DK_CV_STRINGTABLE:
2406 return parseDirectiveCVStringTable();
2407 case DK_CV_FILECHECKSUMS:
2408 return parseDirectiveCVFileChecksums();
2409 case DK_CV_FILECHECKSUM_OFFSET:
2410 return parseDirectiveCVFileChecksumOffset();
2411 case DK_CV_FPO_DATA:
2412 return parseDirectiveCVFPOData();
2413 case DK_CFI_SECTIONS:
2414 return parseDirectiveCFISections();
2415 case DK_CFI_STARTPROC:
2416 return parseDirectiveCFIStartProc();
2417 case DK_CFI_ENDPROC:
2418 return parseDirectiveCFIEndProc();
2419 case DK_CFI_DEF_CFA:
2420 return parseDirectiveCFIDefCfa(IDLoc);
2421 case DK_CFI_DEF_CFA_OFFSET:
2422 return parseDirectiveCFIDefCfaOffset(IDLoc);
2423 case DK_CFI_ADJUST_CFA_OFFSET:
2424 return parseDirectiveCFIAdjustCfaOffset(IDLoc);
2425 case DK_CFI_DEF_CFA_REGISTER:
2426 return parseDirectiveCFIDefCfaRegister(IDLoc);
2427 case DK_CFI_OFFSET:
2428 return parseDirectiveCFIOffset(IDLoc);
2429 case DK_CFI_REL_OFFSET:
2430 return parseDirectiveCFIRelOffset(IDLoc);
2431 case DK_CFI_PERSONALITY:
2432 return parseDirectiveCFIPersonalityOrLsda(true);
2433 case DK_CFI_LSDA:
2434 return parseDirectiveCFIPersonalityOrLsda(false);
2435 case DK_CFI_REMEMBER_STATE:
2436 return parseDirectiveCFIRememberState(IDLoc);
2437 case DK_CFI_RESTORE_STATE:
2438 return parseDirectiveCFIRestoreState(IDLoc);
2439 case DK_CFI_SAME_VALUE:
2440 return parseDirectiveCFISameValue(IDLoc);
2441 case DK_CFI_RESTORE:
2442 return parseDirectiveCFIRestore(IDLoc);
2443 case DK_CFI_ESCAPE:
2444 return parseDirectiveCFIEscape(IDLoc);
2445 case DK_CFI_RETURN_COLUMN:
2446 return parseDirectiveCFIReturnColumn(IDLoc);
2447 case DK_CFI_SIGNAL_FRAME:
2448 return parseDirectiveCFISignalFrame();
2449 case DK_CFI_UNDEFINED:
2450 return parseDirectiveCFIUndefined(IDLoc);
2451 case DK_CFI_REGISTER:
2452 return parseDirectiveCFIRegister(IDLoc);
2453 case DK_CFI_WINDOW_SAVE:
2454 return parseDirectiveCFIWindowSave(IDLoc);
2455 case DK_EXITM:
2456 Info.ExitValue = "";
2457 return parseDirectiveExitMacro(IDLoc, IDVal, *Info.ExitValue);
2458 case DK_ENDM:
2459 Info.ExitValue = "";
2460 return parseDirectiveEndMacro(IDVal);
2461 case DK_PURGE:
2462 return parseDirectivePurgeMacro(IDLoc);
2463 case DK_END:
2464 return parseDirectiveEnd(IDLoc);
2465 case DK_ERR:
2466 return parseDirectiveError(IDLoc);
2467 case DK_ERRB:
2468 return parseDirectiveErrorIfb(IDLoc, true);
2469 case DK_ERRNB:
2470 return parseDirectiveErrorIfb(IDLoc, false);
2471 case DK_ERRDEF:
2472 return parseDirectiveErrorIfdef(IDLoc, true);
2473 case DK_ERRNDEF:
2474 return parseDirectiveErrorIfdef(IDLoc, false);
2475 case DK_ERRDIF:
2476 return parseDirectiveErrorIfidn(IDLoc, /*ExpectEqual=*/false,
2477 /*CaseInsensitive=*/false);
2478 case DK_ERRDIFI:
2479 return parseDirectiveErrorIfidn(IDLoc, /*ExpectEqual=*/false,
2480 /*CaseInsensitive=*/true);
2481 case DK_ERRIDN:
2482 return parseDirectiveErrorIfidn(IDLoc, /*ExpectEqual=*/true,
2483 /*CaseInsensitive=*/false);
2484 case DK_ERRIDNI:
2485 return parseDirectiveErrorIfidn(IDLoc, /*ExpectEqual=*/true,
2486 /*CaseInsensitive=*/true);
2487 case DK_ERRE:
2488 return parseDirectiveErrorIfe(IDLoc, true);
2489 case DK_ERRNZ:
2490 return parseDirectiveErrorIfe(IDLoc, false);
2491 case DK_RADIX:
2492 return parseDirectiveRadix(IDLoc);
2493 case DK_ECHO:
2494 return parseDirectiveEcho(IDLoc);
2495 }
2496
2497 return Error(IDLoc, "unknown directive");
2498 }
2499
2500 // We also check if this is allocating memory with user-defined type.
2501 auto IDIt = Structs.find(IDVal.lower());
2502 if (IDIt != Structs.end())
2503 return parseDirectiveStructValue(/*Structure=*/IDIt->getValue(), IDVal,
2504 IDLoc);
2505
2506 // Non-conditional Microsoft directives sometimes follow their first argument.
2507 const AsmToken nextTok = getTok();
2508 const StringRef nextVal = nextTok.getString();
2509 const SMLoc nextLoc = nextTok.getLoc();
2510
2511 const AsmToken afterNextTok = peekTok();
2512
2513 // There are several entities interested in parsing infix directives:
2514 //
2515 // 1. Asm parser extensions. For example, platform-specific parsers
2516 // (like the ELF parser) register themselves as extensions.
2517 // 2. The generic directive parser implemented by this class. These are
2518 // all the directives that behave in a target and platform independent
2519 // manner, or at least have a default behavior that's shared between
2520 // all targets and platforms.
2521
2522 getTargetParser().flushPendingInstructions(getStreamer());
2523
2524 // Special-case handling of structure-end directives at higher priority, since
2525 // ENDS is overloaded as a segment-end directive.
2526 if (nextVal.equals_insensitive("ends") && StructInProgress.size() == 1) {
2527 Lex();
2528 return parseDirectiveEnds(IDVal, IDLoc);
2529 }
2530
2531 // First, check the extension directive map to see if any extension has
2532 // registered itself to parse this directive.
2533 std::pair<MCAsmParserExtension *, DirectiveHandler> Handler =
2534 ExtensionDirectiveMap.lookup(nextVal.lower());
2535 if (Handler.first) {
2536 Lex();
2537 Lexer.UnLex(ID);
2538 return (*Handler.second)(Handler.first, nextVal, nextLoc);
2539 }
2540
2541 // If no one else is interested in this directive, it must be
2542 // generic and familiar to this class.
2543 DirKindIt = DirectiveKindMap.find(nextVal.lower());
2544 DirKind = (DirKindIt == DirectiveKindMap.end())
2545 ? DK_NO_DIRECTIVE
2546 : DirKindIt->getValue();
2547 switch (DirKind) {
2548 default:
2549 break;
2550 case DK_ASSIGN:
2551 case DK_EQU:
2552 case DK_TEXTEQU:
2553 Lex();
2554 return parseDirectiveEquate(nextVal, IDVal, DirKind, IDLoc);
2555 case DK_BYTE:
2556 if (afterNextTok.is(AsmToken::Identifier) &&
2557 afterNextTok.getString().equals_insensitive("ptr")) {
2558 // Size directive; part of an instruction.
2559 break;
2560 }
2561 [[fallthrough]];
2562 case DK_SBYTE:
2563 case DK_DB:
2564 Lex();
2565 return parseDirectiveNamedValue(nextVal, 1, IDVal, IDLoc);
2566 case DK_WORD:
2567 if (afterNextTok.is(AsmToken::Identifier) &&
2568 afterNextTok.getString().equals_insensitive("ptr")) {
2569 // Size directive; part of an instruction.
2570 break;
2571 }
2572 [[fallthrough]];
2573 case DK_SWORD:
2574 case DK_DW:
2575 Lex();
2576 return parseDirectiveNamedValue(nextVal, 2, IDVal, IDLoc);
2577 case DK_DWORD:
2578 if (afterNextTok.is(AsmToken::Identifier) &&
2579 afterNextTok.getString().equals_insensitive("ptr")) {
2580 // Size directive; part of an instruction.
2581 break;
2582 }
2583 [[fallthrough]];
2584 case DK_SDWORD:
2585 case DK_DD:
2586 Lex();
2587 return parseDirectiveNamedValue(nextVal, 4, IDVal, IDLoc);
2588 case DK_FWORD:
2589 if (afterNextTok.is(AsmToken::Identifier) &&
2590 afterNextTok.getString().equals_insensitive("ptr")) {
2591 // Size directive; part of an instruction.
2592 break;
2593 }
2594 [[fallthrough]];
2595 case DK_DF:
2596 Lex();
2597 return parseDirectiveNamedValue(nextVal, 6, IDVal, IDLoc);
2598 case DK_QWORD:
2599 if (afterNextTok.is(AsmToken::Identifier) &&
2600 afterNextTok.getString().equals_insensitive("ptr")) {
2601 // Size directive; part of an instruction.
2602 break;
2603 }
2604 [[fallthrough]];
2605 case DK_SQWORD:
2606 case DK_DQ:
2607 Lex();
2608 return parseDirectiveNamedValue(nextVal, 8, IDVal, IDLoc);
2609 case DK_REAL4:
2610 Lex();
2611 return parseDirectiveNamedRealValue(nextVal, APFloat::IEEEsingle(), 4,
2612 IDVal, IDLoc);
2613 case DK_REAL8:
2614 Lex();
2615 return parseDirectiveNamedRealValue(nextVal, APFloat::IEEEdouble(), 8,
2616 IDVal, IDLoc);
2617 case DK_REAL10:
2618 Lex();
2619 return parseDirectiveNamedRealValue(nextVal, APFloat::x87DoubleExtended(),
2620 10, IDVal, IDLoc);
2621 case DK_STRUCT:
2622 case DK_UNION:
2623 Lex();
2624 return parseDirectiveStruct(nextVal, DirKind, IDVal, IDLoc);
2625 case DK_ENDS:
2626 Lex();
2627 return parseDirectiveEnds(IDVal, IDLoc);
2628 case DK_MACRO:
2629 Lex();
2630 return parseDirectiveMacro(IDVal, IDLoc);
2631 }
2632
2633 // Finally, we check if this is allocating a variable with user-defined type.
2634 auto NextIt = Structs.find(nextVal.lower());
2635 if (NextIt != Structs.end()) {
2636 Lex();
2637 return parseDirectiveNamedStructValue(/*Structure=*/NextIt->getValue(),
2638 nextVal, nextLoc, IDVal);
2639 }
2640
2641 // __asm _emit or __asm __emit
2642 if (ParsingMSInlineAsm && (IDVal == "_emit" || IDVal == "__emit" ||
2643 IDVal == "_EMIT" || IDVal == "__EMIT"))
2644 return parseDirectiveMSEmit(IDLoc, Info, IDVal.size());
2645
2646 // __asm align
2647 if (ParsingMSInlineAsm && (IDVal == "align" || IDVal == "ALIGN"))
2648 return parseDirectiveMSAlign(IDLoc, Info);
2649
2650 if (ParsingMSInlineAsm && (IDVal == "even" || IDVal == "EVEN"))
2651 Info.AsmRewrites->emplace_back(AOK_EVEN, IDLoc, 4);
2652 if (checkForValidSection())
2653 return true;
2654
2655 // Canonicalize the opcode to lower case.
2656 std::string OpcodeStr = IDVal.lower();
2657 ParseInstructionInfo IInfo(Info.AsmRewrites);
2658 bool ParseHadError = getTargetParser().parseInstruction(IInfo, OpcodeStr, ID,
2659 Info.ParsedOperands);
2660 Info.ParseError = ParseHadError;
2661
2662 // Dump the parsed representation, if requested.
2663 if (getShowParsedOperands()) {
2664 SmallString<256> Str;
2666 OS << "parsed instruction: [";
2667 for (unsigned i = 0; i != Info.ParsedOperands.size(); ++i) {
2668 if (i != 0)
2669 OS << ", ";
2670 Info.ParsedOperands[i]->print(OS);
2671 }
2672 OS << "]";
2673
2674 printMessage(IDLoc, SourceMgr::DK_Note, OS.str());
2675 }
2676
2677 // Fail even if ParseInstruction erroneously returns false.
2678 if (hasPendingError() || ParseHadError)
2679 return true;
2680
2681 // If we are generating dwarf for the current section then generate a .loc
2682 // directive for the instruction.
2683 if (!ParseHadError && enabledGenDwarfForAssembly() &&
2684 getContext().getGenDwarfSectionSyms().count(
2685 getStreamer().getCurrentSectionOnly())) {
2686 unsigned Line;
2687 if (ActiveMacros.empty())
2688 Line = SrcMgr.FindLineNumber(IDLoc, CurBuffer);
2689 else
2690 Line = SrcMgr.FindLineNumber(ActiveMacros.front()->InstantiationLoc,
2691 ActiveMacros.front()->ExitBuffer);
2692
2693 // If we previously parsed a cpp hash file line comment then make sure the
2694 // current Dwarf File is for the CppHashFilename if not then emit the
2695 // Dwarf File table for it and adjust the line number for the .loc.
2696 if (!CppHashInfo.Filename.empty()) {
2697 unsigned FileNumber = getStreamer().emitDwarfFileDirective(
2698 0, StringRef(), CppHashInfo.Filename);
2699 getContext().setGenDwarfFileNumber(FileNumber);
2700
2701 unsigned CppHashLocLineNo =
2702 SrcMgr.FindLineNumber(CppHashInfo.Loc, CppHashInfo.Buf);
2703 Line = CppHashInfo.LineNumber - 1 + (Line - CppHashLocLineNo);
2704 }
2705
2706 getStreamer().emitDwarfLocDirective(
2707 getContext().getGenDwarfFileNumber(), Line, 0,
2709 StringRef());
2710 }
2711
2712 // If parsing succeeded, match the instruction.
2713 if (!ParseHadError) {
2715 if (getTargetParser().matchAndEmitInstruction(
2716 IDLoc, Info.Opcode, Info.ParsedOperands, Out, ErrorInfo,
2717 getTargetParser().isParsingMSInlineAsm()))
2718 return true;
2719 }
2720 return false;
2721}
2722
2723// Parse and erase curly braces marking block start/end.
2724bool MasmParser::parseCurlyBlockScope(
2725 SmallVectorImpl<AsmRewrite> &AsmStrRewrites) {
2726 // Identify curly brace marking block start/end.
2727 if (Lexer.isNot(AsmToken::LCurly) && Lexer.isNot(AsmToken::RCurly))
2728 return false;
2729
2730 SMLoc StartLoc = Lexer.getLoc();
2731 Lex(); // Eat the brace.
2732 if (Lexer.is(AsmToken::EndOfStatement))
2733 Lex(); // Eat EndOfStatement following the brace.
2734
2735 // Erase the block start/end brace from the output asm string.
2736 AsmStrRewrites.emplace_back(AOK_Skip, StartLoc, Lexer.getLoc().getPointer() -
2737 StartLoc.getPointer());
2738 return true;
2739}
2740
2741/// parseCppHashLineFilenameComment as this:
2742/// ::= # number "filename"
2743bool MasmParser::parseCppHashLineFilenameComment(SMLoc L) {
2744 Lex(); // Eat the hash token.
2745 // Lexer only ever emits HashDirective if it fully formed if it's
2746 // done the checking already so this is an internal error.
2747 assert(getTok().is(AsmToken::Integer) &&
2748 "Lexing Cpp line comment: Expected Integer");
2749 int64_t LineNumber = getTok().getIntVal();
2750 Lex();
2751 assert(getTok().is(AsmToken::String) &&
2752 "Lexing Cpp line comment: Expected String");
2753 StringRef Filename = getTok().getString();
2754 Lex();
2755
2756 // Get rid of the enclosing quotes.
2757 Filename = Filename.substr(1, Filename.size() - 2);
2758
2759 // Save the SMLoc, Filename and LineNumber for later use by diagnostics
2760 // and possibly DWARF file info.
2761 CppHashInfo.Loc = L;
2762 CppHashInfo.Filename = Filename;
2763 CppHashInfo.LineNumber = LineNumber;
2764 CppHashInfo.Buf = CurBuffer;
2765 if (FirstCppHashFilename.empty())
2766 FirstCppHashFilename = Filename;
2767 return false;
2768}
2769
2770/// will use the last parsed cpp hash line filename comment
2771/// for the Filename and LineNo if any in the diagnostic.
2772void MasmParser::DiagHandler(const SMDiagnostic &Diag, void *Context) {
2773 const MasmParser *Parser = static_cast<const MasmParser *>(Context);
2774 raw_ostream &OS = errs();
2775
2776 const SourceMgr &DiagSrcMgr = *Diag.getSourceMgr();
2777 SMLoc DiagLoc = Diag.getLoc();
2778 unsigned DiagBuf = DiagSrcMgr.FindBufferContainingLoc(DiagLoc);
2779 unsigned CppHashBuf =
2780 Parser->SrcMgr.FindBufferContainingLoc(Parser->CppHashInfo.Loc);
2781
2782 // Like SourceMgr::printMessage() we need to print the include stack if any
2783 // before printing the message.
2784 unsigned DiagCurBuffer = DiagSrcMgr.FindBufferContainingLoc(DiagLoc);
2785 if (!Parser->SavedDiagHandler && DiagCurBuffer &&
2786 DiagCurBuffer != DiagSrcMgr.getMainFileID()) {
2787 SMLoc ParentIncludeLoc = DiagSrcMgr.getParentIncludeLoc(DiagCurBuffer);
2788 DiagSrcMgr.PrintIncludeStack(ParentIncludeLoc, OS);
2789 }
2790
2791 // If we have not parsed a cpp hash line filename comment or the source
2792 // manager changed or buffer changed (like in a nested include) then just
2793 // print the normal diagnostic using its Filename and LineNo.
2794 if (!Parser->CppHashInfo.LineNumber || &DiagSrcMgr != &Parser->SrcMgr ||
2795 DiagBuf != CppHashBuf) {
2796 if (Parser->SavedDiagHandler)
2797 Parser->SavedDiagHandler(Diag, Parser->SavedDiagContext);
2798 else
2799 Diag.print(nullptr, OS);
2800 return;
2801 }
2802
2803 // Use the CppHashFilename and calculate a line number based on the
2804 // CppHashInfo.Loc and CppHashInfo.LineNumber relative to this Diag's SMLoc
2805 // for the diagnostic.
2806 const std::string &Filename = std::string(Parser->CppHashInfo.Filename);
2807
2808 int DiagLocLineNo = DiagSrcMgr.FindLineNumber(DiagLoc, DiagBuf);
2809 int CppHashLocLineNo =
2810 Parser->SrcMgr.FindLineNumber(Parser->CppHashInfo.Loc, CppHashBuf);
2811 int LineNo =
2812 Parser->CppHashInfo.LineNumber - 1 + (DiagLocLineNo - CppHashLocLineNo);
2813
2814 SMDiagnostic NewDiag(*Diag.getSourceMgr(), Diag.getLoc(), Filename, LineNo,
2815 Diag.getColumnNo(), Diag.getKind(), Diag.getMessage(),
2816 Diag.getLineContents(), Diag.getRanges());
2817
2818 if (Parser->SavedDiagHandler)
2819 Parser->SavedDiagHandler(NewDiag, Parser->SavedDiagContext);
2820 else
2821 NewDiag.print(nullptr, OS);
2822}
2823
2824// This is similar to the IsIdentifierChar function in AsmLexer.cpp, but does
2825// not accept '.'.
2826static bool isMacroParameterChar(char C) {
2827 return isAlnum(C) || C == '_' || C == '$' || C == '@' || C == '?';
2828}
2829
2830bool MasmParser::expandMacro(raw_svector_ostream &OS, StringRef Body,
2833 const std::vector<std::string> &Locals, SMLoc L) {
2834 unsigned NParameters = Parameters.size();
2835 if (NParameters != A.size())
2836 return Error(L, "Wrong number of arguments");
2837 StringMap<std::string> LocalSymbols;
2838 std::string Name;
2839 Name.reserve(6);
2840 for (StringRef Local : Locals) {
2842 LocalName << "??"
2843 << format_hex_no_prefix(LocalCounter++, 4, /*Upper=*/true);
2844 LocalSymbols.insert({Local, Name});
2845 Name.clear();
2846 }
2847
2848 std::optional<char> CurrentQuote;
2849 while (!Body.empty()) {
2850 // Scan for the next substitution.
2851 std::size_t End = Body.size(), Pos = 0;
2852 std::size_t IdentifierPos = End;
2853 for (; Pos != End; ++Pos) {
2854 // Find the next possible macro parameter, including preceding a '&'
2855 // inside quotes.
2856 if (Body[Pos] == '&')
2857 break;
2858 if (isMacroParameterChar(Body[Pos])) {
2859 if (!CurrentQuote)
2860 break;
2861 if (IdentifierPos == End)
2862 IdentifierPos = Pos;
2863 } else {
2864 IdentifierPos = End;
2865 }
2866
2867 // Track quotation status
2868 if (!CurrentQuote) {
2869 if (Body[Pos] == '\'' || Body[Pos] == '"')
2870 CurrentQuote = Body[Pos];
2871 } else if (Body[Pos] == CurrentQuote) {
2872 if (Pos + 1 != End && Body[Pos + 1] == CurrentQuote) {
2873 // Escaped quote, and quotes aren't identifier chars; skip
2874 ++Pos;
2875 continue;
2876 } else {
2877 CurrentQuote.reset();
2878 }
2879 }
2880 }
2881 if (IdentifierPos != End) {
2882 // We've recognized an identifier before an apostrophe inside quotes;
2883 // check once to see if we can expand it.
2884 Pos = IdentifierPos;
2885 IdentifierPos = End;
2886 }
2887
2888 // Add the prefix.
2889 OS << Body.slice(0, Pos);
2890
2891 // Check if we reached the end.
2892 if (Pos == End)
2893 break;
2894
2895 unsigned I = Pos;
2896 bool InitialAmpersand = (Body[I] == '&');
2897 if (InitialAmpersand) {
2898 ++I;
2899 ++Pos;
2900 }
2901 while (I < End && isMacroParameterChar(Body[I]))
2902 ++I;
2903
2904 const char *Begin = Body.data() + Pos;
2905 StringRef Argument(Begin, I - Pos);
2906 const std::string ArgumentLower = Argument.lower();
2907 unsigned Index = 0;
2908
2909 for (; Index < NParameters; ++Index)
2910 if (Parameters[Index].Name.equals_insensitive(ArgumentLower))
2911 break;
2912
2913 if (Index == NParameters) {
2914 if (InitialAmpersand)
2915 OS << '&';
2916 auto it = LocalSymbols.find(ArgumentLower);
2917 if (it != LocalSymbols.end())
2918 OS << it->second;
2919 else
2920 OS << Argument;
2921 Pos = I;
2922 } else {
2923 for (const AsmToken &Token : A[Index]) {
2924 // In MASM, you can write '%expr'.
2925 // The prefix '%' evaluates the expression 'expr'
2926 // and uses the result as a string (e.g. replace %(1+2) with the
2927 // string "3").
2928 // Here, we identify the integer token which is the result of the
2929 // absolute expression evaluation and replace it with its string
2930 // representation.
2931 if (Token.getString().front() == '%' && Token.is(AsmToken::Integer))
2932 // Emit an integer value to the buffer.
2933 OS << Token.getIntVal();
2934 else
2935 OS << Token.getString();
2936 }
2937
2938 Pos += Argument.size();
2939 if (Pos < End && Body[Pos] == '&') {
2940 ++Pos;
2941 }
2942 }
2943 // Update the scan point.
2944 Body = Body.substr(Pos);
2945 }
2946
2947 return false;
2948}
2949
2951 switch (kind) {
2952 default:
2953 return false;
2954 case AsmToken::Plus:
2955 case AsmToken::Minus:
2956 case AsmToken::Tilde:
2957 case AsmToken::Slash:
2958 case AsmToken::Star:
2959 case AsmToken::Dot:
2960 case AsmToken::Equal:
2962 case AsmToken::Pipe:
2963 case AsmToken::PipePipe:
2964 case AsmToken::Caret:
2965 case AsmToken::Amp:
2966 case AsmToken::AmpAmp:
2967 case AsmToken::Exclaim:
2969 case AsmToken::Less:
2971 case AsmToken::LessLess:
2973 case AsmToken::Greater:
2976 return true;
2977 }
2978}
2979
2980namespace {
2981
2982class AsmLexerSkipSpaceRAII {
2983public:
2984 AsmLexerSkipSpaceRAII(AsmLexer &Lexer, bool SkipSpace) : Lexer(Lexer) {
2985 Lexer.setSkipSpace(SkipSpace);
2986 }
2987
2988 ~AsmLexerSkipSpaceRAII() {
2989 Lexer.setSkipSpace(true);
2990 }
2991
2992private:
2993 AsmLexer &Lexer;
2994};
2995
2996} // end anonymous namespace
2997
2998bool MasmParser::parseMacroArgument(const MCAsmMacroParameter *MP,
2999 MCAsmMacroArgument &MA,
3000 AsmToken::TokenKind EndTok) {
3001 if (MP && MP->Vararg) {
3002 if (Lexer.isNot(EndTok)) {
3003 SmallVector<StringRef, 1> Str = parseStringRefsTo(EndTok);
3004 for (StringRef S : Str) {
3005 MA.emplace_back(AsmToken::String, S);
3006 }
3007 }
3008 return false;
3009 }
3010
3011 SMLoc StrLoc = Lexer.getLoc(), EndLoc;
3012 if (Lexer.is(AsmToken::Less) && isAngleBracketString(StrLoc, EndLoc)) {
3013 const char *StrChar = StrLoc.getPointer() + 1;
3014 const char *EndChar = EndLoc.getPointer() - 1;
3015 jumpToLoc(EndLoc, CurBuffer, EndStatementAtEOFStack.back());
3016 /// Eat from '<' to '>'.
3017 Lex();
3018 MA.emplace_back(AsmToken::String, StringRef(StrChar, EndChar - StrChar));
3019 return false;
3020 }
3021
3022 unsigned ParenLevel = 0;
3023
3024 // Darwin doesn't use spaces to delmit arguments.
3025 AsmLexerSkipSpaceRAII ScopedSkipSpace(Lexer, IsDarwin);
3026
3027 bool SpaceEaten;
3028
3029 while (true) {
3030 SpaceEaten = false;
3031 if (Lexer.is(AsmToken::Eof) || Lexer.is(AsmToken::Equal))
3032 return TokError("unexpected token");
3033
3034 if (ParenLevel == 0) {
3035 if (Lexer.is(AsmToken::Comma))
3036 break;
3037
3038 if (Lexer.is(AsmToken::Space)) {
3039 SpaceEaten = true;
3040 Lex(); // Eat spaces.
3041 }
3042
3043 // Spaces can delimit parameters, but could also be part an expression.
3044 // If the token after a space is an operator, add the token and the next
3045 // one into this argument
3046 if (!IsDarwin) {
3047 if (isOperator(Lexer.getKind()) && Lexer.isNot(EndTok)) {
3048 MA.push_back(getTok());
3049 Lex();
3050
3051 // Whitespace after an operator can be ignored.
3052 if (Lexer.is(AsmToken::Space))
3053 Lex();
3054
3055 continue;
3056 }
3057 }
3058 if (SpaceEaten)
3059 break;
3060 }
3061
3062 // handleMacroEntry relies on not advancing the lexer here
3063 // to be able to fill in the remaining default parameter values
3064 if (Lexer.is(EndTok) && (EndTok != AsmToken::RParen || ParenLevel == 0))
3065 break;
3066
3067 // Adjust the current parentheses level.
3068 if (Lexer.is(AsmToken::LParen))
3069 ++ParenLevel;
3070 else if (Lexer.is(AsmToken::RParen) && ParenLevel)
3071 --ParenLevel;
3072
3073 // Append the token to the current argument list.
3074 MA.push_back(getTok());
3075 Lex();
3076 }
3077
3078 if (ParenLevel != 0)
3079 return TokError("unbalanced parentheses in argument");
3080
3081 if (MA.empty() && MP) {
3082 if (MP->Required) {
3083 return TokError("missing value for required parameter '" + MP->Name +
3084 "'");
3085 } else {
3086 MA = MP->Value;
3087 }
3088 }
3089 return false;
3090}
3091
3092// Parse the macro instantiation arguments.
3093bool MasmParser::parseMacroArguments(const MCAsmMacro *M,
3094 MCAsmMacroArguments &A,
3095 AsmToken::TokenKind EndTok) {
3096 const unsigned NParameters = M ? M->Parameters.size() : 0;
3097 bool NamedParametersFound = false;
3098 SmallVector<SMLoc, 4> FALocs;
3099
3100 A.resize(NParameters);
3101 FALocs.resize(NParameters);
3102
3103 // Parse two kinds of macro invocations:
3104 // - macros defined without any parameters accept an arbitrary number of them
3105 // - macros defined with parameters accept at most that many of them
3106 for (unsigned Parameter = 0; !NParameters || Parameter < NParameters;
3107 ++Parameter) {
3108 SMLoc IDLoc = Lexer.getLoc();
3110
3111 if (Lexer.is(AsmToken::Identifier) && peekTok().is(AsmToken::Equal)) {
3112 if (parseIdentifier(FA.Name))
3113 return Error(IDLoc, "invalid argument identifier for formal argument");
3114
3115 if (Lexer.isNot(AsmToken::Equal))
3116 return TokError("expected '=' after formal parameter identifier");
3117
3118 Lex();
3119
3120 NamedParametersFound = true;
3121 }
3122
3123 if (NamedParametersFound && FA.Name.empty())
3124 return Error(IDLoc, "cannot mix positional and keyword arguments");
3125
3126 unsigned PI = Parameter;
3127 if (!FA.Name.empty()) {
3128 assert(M && "expected macro to be defined");
3129 unsigned FAI = 0;
3130 for (FAI = 0; FAI < NParameters; ++FAI)
3131 if (M->Parameters[FAI].Name == FA.Name)
3132 break;
3133
3134 if (FAI >= NParameters) {
3135 return Error(IDLoc, "parameter named '" + FA.Name +
3136 "' does not exist for macro '" + M->Name + "'");
3137 }
3138 PI = FAI;
3139 }
3140 const MCAsmMacroParameter *MP = nullptr;
3141 if (M && PI < NParameters)
3142 MP = &M->Parameters[PI];
3143
3144 SMLoc StrLoc = Lexer.getLoc();
3145 SMLoc EndLoc;
3146 if (Lexer.is(AsmToken::Percent)) {
3147 const MCExpr *AbsoluteExp;
3148 int64_t Value;
3149 /// Eat '%'.
3150 Lex();
3151 if (parseExpression(AbsoluteExp, EndLoc))
3152 return false;
3153 if (!AbsoluteExp->evaluateAsAbsolute(Value,
3154 getStreamer().getAssemblerPtr()))
3155 return Error(StrLoc, "expected absolute expression");
3156 const char *StrChar = StrLoc.getPointer();
3157 const char *EndChar = EndLoc.getPointer();
3158 AsmToken newToken(AsmToken::Integer,
3159 StringRef(StrChar, EndChar - StrChar), Value);
3160 FA.Value.push_back(newToken);
3161 } else if (parseMacroArgument(MP, FA.Value, EndTok)) {
3162 if (M)
3163 return addErrorSuffix(" in '" + M->Name + "' macro");
3164 else
3165 return true;
3166 }
3167
3168 if (!FA.Value.empty()) {
3169 if (A.size() <= PI)
3170 A.resize(PI + 1);
3171 A[PI] = FA.Value;
3172
3173 if (FALocs.size() <= PI)
3174 FALocs.resize(PI + 1);
3175
3176 FALocs[PI] = Lexer.getLoc();
3177 }
3178
3179 // At the end of the statement, fill in remaining arguments that have
3180 // default values. If there aren't any, then the next argument is
3181 // required but missing
3182 if (Lexer.is(EndTok)) {
3183 bool Failure = false;
3184 for (unsigned FAI = 0; FAI < NParameters; ++FAI) {
3185 if (A[FAI].empty()) {
3186 if (M->Parameters[FAI].Required) {
3187 Error(FALocs[FAI].isValid() ? FALocs[FAI] : Lexer.getLoc(),
3188 "missing value for required parameter "
3189 "'" +
3190 M->Parameters[FAI].Name + "' in macro '" + M->Name + "'");
3191 Failure = true;
3192 }
3193
3194 if (!M->Parameters[FAI].Value.empty())
3195 A[FAI] = M->Parameters[FAI].Value;
3196 }
3197 }
3198 return Failure;
3199 }
3200
3201 if (Lexer.is(AsmToken::Comma))
3202 Lex();
3203 }
3204
3205 return TokError("too many positional arguments");
3206}
3207
3208bool MasmParser::handleMacroEntry(const MCAsmMacro *M, SMLoc NameLoc,
3209 AsmToken::TokenKind ArgumentEndTok) {
3210 // Arbitrarily limit macro nesting depth (default matches 'as'). We can
3211 // eliminate this, although we should protect against infinite loops.
3212 unsigned MaxNestingDepth = AsmMacroMaxNestingDepth;
3213 if (ActiveMacros.size() == MaxNestingDepth) {
3214 std::ostringstream MaxNestingDepthError;
3215 MaxNestingDepthError << "macros cannot be nested more than "
3216 << MaxNestingDepth << " levels deep."
3217 << " Use -asm-macro-max-nesting-depth to increase "
3218 "this limit.";
3219 return TokError(MaxNestingDepthError.str());
3220 }
3221
3222 MCAsmMacroArguments A;
3223 if (parseMacroArguments(M, A, ArgumentEndTok))
3224 return true;
3225
3226 // Macro instantiation is lexical, unfortunately. We construct a new buffer
3227 // to hold the macro body with substitutions.
3228 SmallString<256> Buf;
3229 StringRef Body = M->Body;
3231
3232 if (expandMacro(OS, Body, M->Parameters, A, M->Locals, getTok().getLoc()))
3233 return true;
3234
3235 // We include the endm in the buffer as our cue to exit the macro
3236 // instantiation.
3237 OS << "endm\n";
3238
3239 std::unique_ptr<MemoryBuffer> Instantiation =
3240 MemoryBuffer::getMemBufferCopy(OS.str(), "<instantiation>");
3241
3242 // Create the macro instantiation object and add to the current macro
3243 // instantiation stack.
3244 MacroInstantiation *MI = new MacroInstantiation{
3245 NameLoc, CurBuffer, getTok().getLoc(), TheCondStack.size()};
3246 ActiveMacros.push_back(MI);
3247
3248 ++NumOfMacroInstantiations;
3249
3250 // Jump to the macro instantiation and prime the lexer.
3251 CurBuffer = SrcMgr.AddNewSourceBuffer(std::move(Instantiation), SMLoc());
3252 Lexer.setBuffer(SrcMgr.getMemoryBuffer(CurBuffer)->getBuffer());
3253 EndStatementAtEOFStack.push_back(true);
3254 Lex();
3255
3256 return false;
3257}
3258
3259void MasmParser::handleMacroExit() {
3260 // Jump to the token we should return to, and consume it.
3261 EndStatementAtEOFStack.pop_back();
3262 jumpToLoc(ActiveMacros.back()->ExitLoc, ActiveMacros.back()->ExitBuffer,
3263 EndStatementAtEOFStack.back());
3264 Lex();
3265
3266 // Pop the instantiation entry.
3267 delete ActiveMacros.back();
3268 ActiveMacros.pop_back();
3269}
3270
3271bool MasmParser::handleMacroInvocation(const MCAsmMacro *M, SMLoc NameLoc) {
3272 if (!M->IsFunction)
3273 return Error(NameLoc, "cannot invoke macro procedure as function");
3274
3275 if (parseToken(AsmToken::LParen, "invoking macro function '" + M->Name +
3276 "' requires arguments in parentheses") ||
3277 handleMacroEntry(M, NameLoc, AsmToken::RParen))
3278 return true;
3279
3280 // Parse all statements in the macro, retrieving the exit value when it ends.
3281 std::string ExitValue;
3282 SmallVector<AsmRewrite, 4> AsmStrRewrites;
3283 while (Lexer.isNot(AsmToken::Eof)) {
3284 ParseStatementInfo Info(&AsmStrRewrites);
3285 bool Parsed = parseStatement(Info, nullptr);
3286
3287 if (!Parsed && Info.ExitValue) {
3288 ExitValue = std::move(*Info.ExitValue);
3289 break;
3290 }
3291
3292 // If we have a Lexer Error we are on an Error Token. Load in Lexer Error
3293 // for printing ErrMsg via Lex() only if no (presumably better) parser error
3294 // exists.
3295 if (Parsed && !hasPendingError() && Lexer.getTok().is(AsmToken::Error)) {
3296 Lex();
3297 }
3298
3299 // parseStatement returned true so may need to emit an error.
3300 printPendingErrors();
3301
3302 // Skipping to the next line if needed.
3303 if (Parsed && !getLexer().isAtStartOfStatement())
3304 eatToEndOfStatement();
3305 }
3306
3307 // Consume the right-parenthesis on the other side of the arguments.
3308 if (parseRParen())
3309 return true;
3310
3311 // Exit values may require lexing, unfortunately. We construct a new buffer to
3312 // hold the exit value.
3313 std::unique_ptr<MemoryBuffer> MacroValue =
3314 MemoryBuffer::getMemBufferCopy(ExitValue, "<macro-value>");
3315
3316 // Jump from this location to the instantiated exit value, and prime the
3317 // lexer.
3318 CurBuffer = SrcMgr.AddNewSourceBuffer(std::move(MacroValue), Lexer.getLoc());
3319 Lexer.setBuffer(SrcMgr.getMemoryBuffer(CurBuffer)->getBuffer(), nullptr,
3320 /*EndStatementAtEOF=*/false);
3321 EndStatementAtEOFStack.push_back(false);
3322 Lex();
3323
3324 return false;
3325}
3326
3327/// parseIdentifier:
3328/// ::= identifier
3329/// ::= string
3330bool MasmParser::parseIdentifier(StringRef &Res,
3331 IdentifierPositionKind Position) {
3332 // The assembler has relaxed rules for accepting identifiers, in particular we
3333 // allow things like '.globl $foo' and '.def @feat.00', which would normally
3334 // be separate tokens. At this level, we have already lexed so we cannot
3335 // (currently) handle this as a context dependent token, instead we detect
3336 // adjacent tokens and return the combined identifier.
3337 if (Lexer.is(AsmToken::Dollar) || Lexer.is(AsmToken::At)) {
3338 SMLoc PrefixLoc = getLexer().getLoc();
3339
3340 // Consume the prefix character, and check for a following identifier.
3341
3342 AsmToken nextTok = peekTok(false);
3343
3344 if (nextTok.isNot(AsmToken::Identifier))
3345 return true;
3346
3347 // We have a '$' or '@' followed by an identifier, make sure they are adjacent.
3348 if (PrefixLoc.getPointer() + 1 != nextTok.getLoc().getPointer())
3349 return true;
3350
3351 // eat $ or @
3352 Lexer.Lex(); // Lexer's Lex guarantees consecutive token.
3353 // Construct the joined identifier and consume the token.
3354 Res =
3355 StringRef(PrefixLoc.getPointer(), getTok().getIdentifier().size() + 1);
3356 Lex(); // Parser Lex to maintain invariants.
3357 return false;
3358 }
3359
3360 if (Lexer.isNot(AsmToken::Identifier) && Lexer.isNot(AsmToken::String))
3361 return true;
3362
3363 Res = getTok().getIdentifier();
3364
3365 // Consume the identifier token - but if parsing certain directives, avoid
3366 // lexical expansion of the next token.
3367 ExpandKind ExpandNextToken = ExpandMacros;
3368 if (Position == StartOfStatement &&
3370 .CaseLower("echo", true)
3371 .CasesLower("ifdef", "ifndef", "elseifdef", "elseifndef", true)
3372 .Default(false)) {
3373 ExpandNextToken = DoNotExpandMacros;
3374 }
3375 Lex(ExpandNextToken);
3376
3377 return false;
3378}
3379
3380/// parseDirectiveEquate:
3381/// ::= name "=" expression
3382/// | name "equ" expression (not redefinable)
3383/// | name "equ" text-list
3384/// | name "textequ" text-list (redefinability unspecified)
3385bool MasmParser::parseDirectiveEquate(StringRef IDVal, StringRef Name,
3386 DirectiveKind DirKind, SMLoc NameLoc) {
3387 auto BuiltinIt = BuiltinSymbolMap.find(Name.lower());
3388 if (BuiltinIt != BuiltinSymbolMap.end())
3389 return Error(NameLoc, "cannot redefine a built-in symbol");
3390
3391 Variable &Var = Variables[Name.lower()];
3392 if (Var.Name.empty()) {
3393 Var.Name = Name;
3394 }
3395
3396 SMLoc StartLoc = Lexer.getLoc();
3397 if (DirKind == DK_EQU || DirKind == DK_TEXTEQU) {
3398 // "equ" and "textequ" both allow text expressions.
3399 std::string Value;
3400 std::string TextItem;
3401 if (!parseTextItem(TextItem)) {
3402 Value += TextItem;
3403
3404 // Accept a text-list, not just one text-item.
3405 auto parseItem = [&]() -> bool {
3406 if (parseTextItem(TextItem))
3407 return TokError("expected text item");
3408 Value += TextItem;
3409 return false;
3410 };
3411 if (parseOptionalToken(AsmToken::Comma) && parseMany(parseItem))
3412 return addErrorSuffix(" in '" + Twine(IDVal) + "' directive");
3413
3414 if (!Var.IsText || Var.TextValue != Value) {
3415 switch (Var.Redefinable) {
3416 case Variable::NOT_REDEFINABLE:
3417 return Error(getTok().getLoc(), "invalid variable redefinition");
3418 case Variable::WARN_ON_REDEFINITION:
3419 if (Warning(NameLoc, "redefining '" + Name +
3420 "', already defined on the command line")) {
3421 return true;
3422 }
3423 break;
3424 default:
3425 break;
3426 }
3427 }
3428 Var.IsText = true;
3429 Var.TextValue = Value;
3430 Var.Redefinable = Variable::REDEFINABLE;
3431
3432 return false;
3433 }
3434 }
3435 if (DirKind == DK_TEXTEQU)
3436 return TokError("expected <text> in '" + Twine(IDVal) + "' directive");
3437
3438 // Parse as expression assignment.
3439 const MCExpr *Expr;
3440 SMLoc EndLoc;
3441 if (parseExpression(Expr, EndLoc))
3442 return addErrorSuffix(" in '" + Twine(IDVal) + "' directive");
3443 StringRef ExprAsString = StringRef(
3444 StartLoc.getPointer(), EndLoc.getPointer() - StartLoc.getPointer());
3445
3446 int64_t Value;
3447 if (!Expr->evaluateAsAbsolute(Value, getStreamer().getAssemblerPtr())) {
3448 if (DirKind == DK_ASSIGN)
3449 return Error(
3450 StartLoc,
3451 "expected absolute expression; not all symbols have known values",
3452 {StartLoc, EndLoc});
3453
3454 // Not an absolute expression; define as a text replacement.
3455 if (!Var.IsText || Var.TextValue != ExprAsString) {
3456 switch (Var.Redefinable) {
3457 case Variable::NOT_REDEFINABLE:
3458 return Error(getTok().getLoc(), "invalid variable redefinition");
3459 case Variable::WARN_ON_REDEFINITION:
3460 if (Warning(NameLoc, "redefining '" + Name +
3461 "', already defined on the command line")) {
3462 return true;
3463 }
3464 break;
3465 default:
3466 break;
3467 }
3468 }
3469
3470 Var.IsText = true;
3471 Var.TextValue = ExprAsString.str();
3472 Var.Redefinable = Variable::REDEFINABLE;
3473
3474 return false;
3475 }
3476
3477 MCSymbol *Sym = getContext().getOrCreateSymbol(Var.Name);
3478
3479 const MCConstantExpr *PrevValue =
3480 Sym->isVariable() ? dyn_cast_or_null<MCConstantExpr>(
3481 Sym->getVariableValue(/*SetUsed=*/false))
3482 : nullptr;
3483 if (Var.IsText || !PrevValue || PrevValue->getValue() != Value) {
3484 switch (Var.Redefinable) {
3485 case Variable::NOT_REDEFINABLE:
3486 return Error(getTok().getLoc(), "invalid variable redefinition");
3487 case Variable::WARN_ON_REDEFINITION:
3488 if (Warning(NameLoc, "redefining '" + Name +
3489 "', already defined on the command line")) {
3490 return true;
3491 }
3492 break;
3493 default:
3494 break;
3495 }
3496 }
3497
3498 Var.IsText = false;
3499 Var.TextValue.clear();
3500 Var.Redefinable = (DirKind == DK_ASSIGN) ? Variable::REDEFINABLE
3501 : Variable::NOT_REDEFINABLE;
3502
3503 Sym->setRedefinable(Var.Redefinable != Variable::NOT_REDEFINABLE);
3504 Sym->setVariableValue(Expr);
3505 Sym->setExternal(false);
3506
3507 return false;
3508}
3509
3510bool MasmParser::parseEscapedString(std::string &Data) {
3511 if (check(getTok().isNot(AsmToken::String), "expected string"))
3512 return true;
3513
3514 Data = "";
3515 char Quote = getTok().getString().front();
3516 StringRef Str = getTok().getStringContents();
3517 Data.reserve(Str.size());
3518 for (size_t i = 0, e = Str.size(); i != e; ++i) {
3519 Data.push_back(Str[i]);
3520 if (Str[i] == Quote) {
3521 // MASM treats doubled delimiting quotes as an escaped delimiting quote.
3522 // If we're escaping the string's trailing delimiter, we're definitely
3523 // missing a quotation mark.
3524 if (i + 1 == Str.size())
3525 return Error(getTok().getLoc(), "missing quotation mark in string");
3526 if (Str[i + 1] == Quote)
3527 ++i;
3528 }
3529 }
3530
3531 Lex();
3532 return false;
3533}
3534
3535bool MasmParser::parseAngleBracketString(std::string &Data) {
3536 SMLoc EndLoc, StartLoc = getTok().getLoc();
3537 if (isAngleBracketString(StartLoc, EndLoc)) {
3538 const char *StartChar = StartLoc.getPointer() + 1;
3539 const char *EndChar = EndLoc.getPointer() - 1;
3540 jumpToLoc(EndLoc, CurBuffer, EndStatementAtEOFStack.back());
3541 // Eat from '<' to '>'.
3542 Lex();
3543
3544 Data = angleBracketString(StringRef(StartChar, EndChar - StartChar));
3545 return false;
3546 }
3547 return true;
3548}
3549
3550/// textItem ::= textLiteral | textMacroID | % constExpr
3551bool MasmParser::parseTextItem(std::string &Data) {
3552 switch (getTok().getKind()) {
3553 default:
3554 return true;
3555 case AsmToken::Percent: {
3556 int64_t Res;
3557 if (parseToken(AsmToken::Percent) || parseAbsoluteExpression(Res))
3558 return true;
3559 Data = std::to_string(Res);
3560 return false;
3561 }
3562 case AsmToken::Less:
3564 case AsmToken::LessLess:
3566 return parseAngleBracketString(Data);
3567 case AsmToken::Identifier: {
3568 // This must be a text macro; we need to expand it accordingly.
3569 StringRef ID;
3570 SMLoc StartLoc = getTok().getLoc();
3571 if (parseIdentifier(ID))
3572 return true;
3573 Data = ID.str();
3574
3575 bool Expanded = false;
3576 while (true) {
3577 // Try to resolve as a built-in text macro
3578 auto BuiltinIt = BuiltinSymbolMap.find(ID.lower());
3579 if (BuiltinIt != BuiltinSymbolMap.end()) {
3580 std::optional<std::string> BuiltinText =
3581 evaluateBuiltinTextMacro(BuiltinIt->getValue(), StartLoc);
3582 if (!BuiltinText) {
3583 // Not a text macro; break without substituting
3584 break;
3585 }
3586 Data = std::move(*BuiltinText);
3587 ID = StringRef(Data);
3588 Expanded = true;
3589 continue;
3590 }
3591
3592 // Try to resolve as a variable text macro
3593 auto VarIt = Variables.find(ID.lower());
3594 if (VarIt != Variables.end()) {
3595 const Variable &Var = VarIt->getValue();
3596 if (!Var.IsText) {
3597 // Not a text macro; break without substituting
3598 break;
3599 }
3600 Data = Var.TextValue;
3601 ID = StringRef(Data);
3602 Expanded = true;
3603 continue;
3604 }
3605
3606 break;
3607 }
3608
3609 if (!Expanded) {
3610 // Not a text macro; not usable in TextItem context. Since we haven't used
3611 // the token, put it back for better error recovery.
3612 getLexer().UnLex(AsmToken(AsmToken::Identifier, ID));
3613 return true;
3614 }
3615 return false;
3616 }
3617 }
3618 llvm_unreachable("unhandled token kind");
3619}
3620
3621/// parseDirectiveAscii:
3622/// ::= ( .ascii | .asciz | .string ) [ "string" ( , "string" )* ]
3623bool MasmParser::parseDirectiveAscii(StringRef IDVal, bool ZeroTerminated) {
3624 auto parseOp = [&]() -> bool {
3625 std::string Data;
3626 if (checkForValidSection() || parseEscapedString(Data))
3627 return true;
3628 getStreamer().emitBytes(Data);
3629 if (ZeroTerminated)
3630 getStreamer().emitBytes(StringRef("\0", 1));
3631 return false;
3632 };
3633
3634 if (parseMany(parseOp))
3635 return addErrorSuffix(" in '" + Twine(IDVal) + "' directive");
3636 return false;
3637}
3638
3639bool MasmParser::emitIntValue(const MCExpr *Value, unsigned Size) {
3640 // Special case constant expressions to match code generator.
3641 if (const MCConstantExpr *MCE = dyn_cast<MCConstantExpr>(Value)) {
3642 assert(Size <= 8 && "Invalid size");
3643 int64_t IntValue = MCE->getValue();
3644 if (!isUIntN(8 * Size, IntValue) && !isIntN(8 * Size, IntValue))
3645 return Error(MCE->getLoc(), "out of range literal value");
3646 getStreamer().emitIntValue(IntValue, Size);
3647 } else {
3648 const MCSymbolRefExpr *MSE = dyn_cast<MCSymbolRefExpr>(Value);
3649 if (MSE && MSE->getSymbol().getName() == "?") {
3650 // ? initializer; treat as 0.
3651 getStreamer().emitIntValue(0, Size);
3652 } else {
3653 getStreamer().emitValue(Value, Size, Value->getLoc());
3654 }
3655 }
3656 return false;
3657}
3658
3659bool MasmParser::parseScalarInitializer(unsigned Size,
3661 unsigned StringPadLength) {
3662 if (Size == 1 && getTok().is(AsmToken::String)) {
3663 std::string Value;
3664 if (parseEscapedString(Value))
3665 return true;
3666 // Treat each character as an initializer.
3667 for (const unsigned char CharVal : Value)
3668 Values.push_back(MCConstantExpr::create(CharVal, getContext()));
3669
3670 // Pad the string with spaces to the specified length.
3671 for (size_t i = Value.size(); i < StringPadLength; ++i)
3672 Values.push_back(MCConstantExpr::create(' ', getContext()));
3673 } else {
3674 const MCExpr *Value;
3675 if (parseExpression(Value))
3676 return true;
3677 if (getTok().is(AsmToken::Identifier) &&
3678 getTok().getString().equals_insensitive("dup")) {
3679 Lex(); // Eat 'dup'.
3680 const MCConstantExpr *MCE = dyn_cast<MCConstantExpr>(Value);
3681 if (!MCE)
3682 return Error(Value->getLoc(),
3683 "cannot repeat value a non-constant number of times");
3684 const int64_t Repetitions = MCE->getValue();
3685 if (Repetitions < 0)
3686 return Error(Value->getLoc(),
3687 "cannot repeat value a negative number of times");
3688
3689 SmallVector<const MCExpr *, 1> DuplicatedValues;
3690 if (parseToken(AsmToken::LParen,
3691 "parentheses required for 'dup' contents") ||
3692 parseScalarInstList(Size, DuplicatedValues) || parseRParen())
3693 return true;
3694
3695 for (int i = 0; i < Repetitions; ++i)
3696 Values.append(DuplicatedValues.begin(), DuplicatedValues.end());
3697 } else {
3698 Values.push_back(Value);
3699 }
3700 }
3701 return false;
3702}
3703
3704bool MasmParser::parseScalarInstList(unsigned Size,
3706 const AsmToken::TokenKind EndToken) {
3707 while (getTok().isNot(EndToken) &&
3708 (EndToken != AsmToken::Greater ||
3709 getTok().isNot(AsmToken::GreaterGreater))) {
3710 parseScalarInitializer(Size, Values);
3711
3712 // If we see a comma, continue, and allow line continuation.
3713 if (!parseOptionalToken(AsmToken::Comma))
3714 break;
3715 parseOptionalToken(AsmToken::EndOfStatement);
3716 }
3717 return false;
3718}
3719
3720bool MasmParser::emitIntegralValues(unsigned Size, unsigned *Count) {
3722 if (checkForValidSection() || parseScalarInstList(Size, Values))
3723 return true;
3724
3725 for (const auto *Value : Values) {
3726 emitIntValue(Value, Size);
3727 }
3728 if (Count)
3729 *Count = Values.size();
3730 return false;
3731}
3732
3733// Add a field to the current structure.
3734bool MasmParser::addIntegralField(StringRef Name, unsigned Size) {
3735 StructInfo &Struct = StructInProgress.back();
3736 FieldInfo &Field = Struct.addField(Name, FT_INTEGRAL, Size);
3737 IntFieldInfo &IntInfo = Field.Contents.IntInfo;
3738
3739 Field.Type = Size;
3740
3741 if (parseScalarInstList(Size, IntInfo.Values))
3742 return true;
3743
3744 Field.SizeOf = Field.Type * IntInfo.Values.size();
3745 Field.LengthOf = IntInfo.Values.size();
3746 const unsigned FieldEnd = Field.Offset + Field.SizeOf;
3747 if (!Struct.IsUnion) {
3748 Struct.NextOffset = FieldEnd;
3749 }
3750 Struct.Size = std::max(Struct.Size, FieldEnd);
3751 return false;
3752}
3753
3754/// parseDirectiveValue
3755/// ::= (byte | word | ... ) [ expression (, expression)* ]
3756bool MasmParser::parseDirectiveValue(StringRef IDVal, unsigned Size) {
3757 if (StructInProgress.empty()) {
3758 // Initialize data value.
3759 if (emitIntegralValues(Size))
3760 return addErrorSuffix(" in '" + Twine(IDVal) + "' directive");
3761 } else if (addIntegralField("", Size)) {
3762 return addErrorSuffix(" in '" + Twine(IDVal) + "' directive");
3763 }
3764
3765 return false;
3766}
3767
3768/// parseDirectiveNamedValue
3769/// ::= name (byte | word | ... ) [ expression (, expression)* ]
3770bool MasmParser::parseDirectiveNamedValue(StringRef TypeName, unsigned Size,
3771 StringRef Name, SMLoc NameLoc) {
3772 if (StructInProgress.empty()) {
3773 // Initialize named data value.
3774 MCSymbol *Sym = getContext().getOrCreateSymbol(Name);
3775 getStreamer().emitLabel(Sym);
3776 unsigned Count;
3777 if (emitIntegralValues(Size, &Count))
3778 return addErrorSuffix(" in '" + Twine(TypeName) + "' directive");
3779
3781 Type.Name = TypeName;
3782 Type.Size = Size * Count;
3783 Type.ElementSize = Size;
3784 Type.Length = Count;
3785 KnownType[Name.lower()] = Type;
3786 } else if (addIntegralField(Name, Size)) {
3787 return addErrorSuffix(" in '" + Twine(TypeName) + "' directive");
3788 }
3789
3790 return false;
3791}
3792
3793static bool parseHexOcta(MasmParser &Asm, uint64_t &hi, uint64_t &lo) {
3794 if (Asm.getTok().isNot(AsmToken::Integer) &&
3795 Asm.getTok().isNot(AsmToken::BigNum))
3796 return Asm.TokError("unknown token in expression");
3797 SMLoc ExprLoc = Asm.getTok().getLoc();
3798 APInt IntValue = Asm.getTok().getAPIntVal();
3799 Asm.Lex();
3800 if (!IntValue.isIntN(128))
3801 return Asm.Error(ExprLoc, "out of range literal value");
3802 if (!IntValue.isIntN(64)) {
3803 hi = IntValue.getHiBits(IntValue.getBitWidth() - 64).getZExtValue();
3804 lo = IntValue.getLoBits(64).getZExtValue();
3805 } else {
3806 hi = 0;
3807 lo = IntValue.getZExtValue();
3808 }
3809 return false;
3810}
3811
3812bool MasmParser::parseRealValue(const fltSemantics &Semantics, APInt &Res) {
3813 // We don't truly support arithmetic on floating point expressions, so we
3814 // have to manually parse unary prefixes.
3815 bool IsNeg = false;
3816 SMLoc SignLoc;
3817 if (getLexer().is(AsmToken::Minus)) {
3818 SignLoc = getLexer().getLoc();
3819 Lexer.Lex();
3820 IsNeg = true;
3821 } else if (getLexer().is(AsmToken::Plus)) {
3822 SignLoc = getLexer().getLoc();
3823 Lexer.Lex();
3824 }
3825
3826 if (Lexer.is(AsmToken::Error))
3827 return TokError(Lexer.getErr());
3828 if (Lexer.isNot(AsmToken::Integer) && Lexer.isNot(AsmToken::Real) &&
3830 return TokError("unexpected token in directive");
3831
3832 // Convert to an APFloat.
3833 APFloat Value(Semantics);
3834 StringRef IDVal = getTok().getString();
3835 if (getLexer().is(AsmToken::Identifier)) {
3836 if (IDVal.equals_insensitive("infinity") || IDVal.equals_insensitive("inf"))
3837 Value = APFloat::getInf(Semantics);
3838 else if (IDVal.equals_insensitive("nan"))
3839 Value = APFloat::getNaN(Semantics, false, ~0);
3840 else if (IDVal.equals_insensitive("?"))
3841 Value = APFloat::getZero(Semantics);
3842 else
3843 return TokError("invalid floating point literal");
3844 } else if (IDVal.consume_back("r") || IDVal.consume_back("R")) {
3845 // MASM hexadecimal floating-point literal; no APFloat conversion needed.
3846 // To match ML64.exe, ignore the initial sign.
3847 unsigned SizeInBits = Value.getSizeInBits(Semantics);
3848 if (SizeInBits != (IDVal.size() << 2))
3849 return TokError("invalid floating point literal");
3850
3851 // Consume the numeric token.
3852 Lex();
3853
3854 Res = APInt(SizeInBits, IDVal, 16);
3855 if (SignLoc.isValid())
3856 return Warning(SignLoc, "MASM-style hex floats ignore explicit sign");
3857 return false;
3858 } else if (errorToBool(
3859 Value.convertFromString(IDVal, APFloat::rmNearestTiesToEven)
3860 .takeError())) {
3861 return TokError("invalid floating point literal");
3862 }
3863 if (IsNeg)
3864 Value.changeSign();
3865
3866 // Consume the numeric token.
3867 Lex();
3868
3869 Res = Value.bitcastToAPInt();
3870
3871 return false;
3872}
3873
3874bool MasmParser::parseRealInstList(const fltSemantics &Semantics,
3875 SmallVectorImpl<APInt> &ValuesAsInt,
3876 const AsmToken::TokenKind EndToken) {
3877 while (getTok().isNot(EndToken) ||
3878 (EndToken == AsmToken::Greater &&
3879 getTok().isNot(AsmToken::GreaterGreater))) {
3880 const AsmToken NextTok = peekTok();
3881 if (NextTok.is(AsmToken::Identifier) &&
3882 NextTok.getString().equals_insensitive("dup")) {
3883 const MCExpr *Value;
3884 if (parseExpression(Value) || parseToken(AsmToken::Identifier))
3885 return true;
3886 const MCConstantExpr *MCE = dyn_cast<MCConstantExpr>(Value);
3887 if (!MCE)
3888 return Error(Value->getLoc(),
3889 "cannot repeat value a non-constant number of times");
3890 const int64_t Repetitions = MCE->getValue();
3891 if (Repetitions < 0)
3892 return Error(Value->getLoc(),
3893 "cannot repeat value a negative number of times");
3894
3895 SmallVector<APInt, 1> DuplicatedValues;
3896 if (parseToken(AsmToken::LParen,
3897 "parentheses required for 'dup' contents") ||
3898 parseRealInstList(Semantics, DuplicatedValues) || parseRParen())
3899 return true;
3900
3901 for (int i = 0; i < Repetitions; ++i)
3902 ValuesAsInt.append(DuplicatedValues.begin(), DuplicatedValues.end());
3903 } else {
3904 APInt AsInt;
3905 if (parseRealValue(Semantics, AsInt))
3906 return true;
3907 ValuesAsInt.push_back(AsInt);
3908 }
3909
3910 // Continue if we see a comma. (Also, allow line continuation.)
3911 if (!parseOptionalToken(AsmToken::Comma))
3912 break;
3913 parseOptionalToken(AsmToken::EndOfStatement);
3914 }
3915
3916 return false;
3917}
3918
3919// Initialize real data values.
3920bool MasmParser::emitRealValues(const fltSemantics &Semantics,
3921 unsigned *Count) {
3922 if (checkForValidSection())
3923 return true;
3924
3925 SmallVector<APInt, 1> ValuesAsInt;
3926 if (parseRealInstList(Semantics, ValuesAsInt))
3927 return true;
3928
3929 for (const APInt &AsInt : ValuesAsInt) {
3930 getStreamer().emitIntValue(AsInt);
3931 }
3932 if (Count)
3933 *Count = ValuesAsInt.size();
3934 return false;
3935}
3936
3937// Add a real field to the current struct.
3938bool MasmParser::addRealField(StringRef Name, const fltSemantics &Semantics,
3939 size_t Size) {
3940 StructInfo &Struct = StructInProgress.back();
3941 FieldInfo &Field = Struct.addField(Name, FT_REAL, Size);
3942 RealFieldInfo &RealInfo = Field.Contents.RealInfo;
3943
3944 Field.SizeOf = 0;
3945
3946 if (parseRealInstList(Semantics, RealInfo.AsIntValues))
3947 return true;
3948
3949 Field.Type = RealInfo.AsIntValues.back().getBitWidth() / 8;
3950 Field.LengthOf = RealInfo.AsIntValues.size();
3951 Field.SizeOf = Field.Type * Field.LengthOf;
3952
3953 const unsigned FieldEnd = Field.Offset + Field.SizeOf;
3954 if (!Struct.IsUnion) {
3955 Struct.NextOffset = FieldEnd;
3956 }
3957 Struct.Size = std::max(Struct.Size, FieldEnd);
3958 return false;
3959}
3960
3961/// parseDirectiveRealValue
3962/// ::= (real4 | real8 | real10) [ expression (, expression)* ]
3963bool MasmParser::parseDirectiveRealValue(StringRef IDVal,
3964 const fltSemantics &Semantics,
3965 size_t Size) {
3966 if (StructInProgress.empty()) {
3967 // Initialize data value.
3968 if (emitRealValues(Semantics))
3969 return addErrorSuffix(" in '" + Twine(IDVal) + "' directive");
3970 } else if (addRealField("", Semantics, Size)) {
3971 return addErrorSuffix(" in '" + Twine(IDVal) + "' directive");
3972 }
3973 return false;
3974}
3975
3976/// parseDirectiveNamedRealValue
3977/// ::= name (real4 | real8 | real10) [ expression (, expression)* ]
3978bool MasmParser::parseDirectiveNamedRealValue(StringRef TypeName,
3979 const fltSemantics &Semantics,
3980 unsigned Size, StringRef Name,
3981 SMLoc NameLoc) {
3982 if (StructInProgress.empty()) {
3983 // Initialize named data value.
3984 MCSymbol *Sym = getContext().getOrCreateSymbol(Name);
3985 getStreamer().emitLabel(Sym);
3986 unsigned Count;
3987 if (emitRealValues(Semantics, &Count))
3988 return addErrorSuffix(" in '" + TypeName + "' directive");
3989
3991 Type.Name = TypeName;
3992 Type.Size = Size * Count;
3993 Type.ElementSize = Size;
3994 Type.Length = Count;
3995 KnownType[Name.lower()] = Type;
3996 } else if (addRealField(Name, Semantics, Size)) {
3997 return addErrorSuffix(" in '" + TypeName + "' directive");
3998 }
3999 return false;
4000}
4001
4002bool MasmParser::parseOptionalAngleBracketOpen() {
4003 const AsmToken Tok = getTok();
4004 if (parseOptionalToken(AsmToken::LessLess)) {
4005 AngleBracketDepth++;
4006 Lexer.UnLex(AsmToken(AsmToken::Less, Tok.getString().substr(1)));
4007 return true;
4008 } else if (parseOptionalToken(AsmToken::LessGreater)) {
4009 AngleBracketDepth++;
4011 return true;
4012 } else if (parseOptionalToken(AsmToken::Less)) {
4013 AngleBracketDepth++;
4014 return true;
4015 }
4016
4017 return false;
4018}
4019
4020bool MasmParser::parseAngleBracketClose(const Twine &Msg) {
4021 const AsmToken Tok = getTok();
4022 if (parseOptionalToken(AsmToken::GreaterGreater)) {
4024 } else if (parseToken(AsmToken::Greater, Msg)) {
4025 return true;
4026 }
4027 AngleBracketDepth--;
4028 return false;
4029}
4030
4031bool MasmParser::parseFieldInitializer(const FieldInfo &Field,
4032 const IntFieldInfo &Contents,
4033 FieldInitializer &Initializer) {
4034 SMLoc Loc = getTok().getLoc();
4035
4037 if (parseOptionalToken(AsmToken::LCurly)) {
4038 if (Field.LengthOf == 1 && Field.Type > 1)
4039 return Error(Loc, "Cannot initialize scalar field with array value");
4040 if (parseScalarInstList(Field.Type, Values, AsmToken::RCurly) ||
4041 parseToken(AsmToken::RCurly))
4042 return true;
4043 } else if (parseOptionalAngleBracketOpen()) {
4044 if (Field.LengthOf == 1 && Field.Type > 1)
4045 return Error(Loc, "Cannot initialize scalar field with array value");
4046 if (parseScalarInstList(Field.Type, Values, AsmToken::Greater) ||
4047 parseAngleBracketClose())
4048 return true;
4049 } else if (Field.LengthOf > 1 && Field.Type > 1) {
4050 return Error(Loc, "Cannot initialize array field with scalar value");
4051 } else if (parseScalarInitializer(Field.Type, Values,
4052 /*StringPadLength=*/Field.LengthOf)) {
4053 return true;
4054 }
4055
4056 if (Values.size() > Field.LengthOf) {
4057 return Error(Loc, "Initializer too long for field; expected at most " +
4058 std::to_string(Field.LengthOf) + " elements, got " +
4059 std::to_string(Values.size()));
4060 }
4061 // Default-initialize all remaining values.
4062 Values.append(Contents.Values.begin() + Values.size(), Contents.Values.end());
4063
4064 Initializer = FieldInitializer(std::move(Values));
4065 return false;
4066}
4067
4068bool MasmParser::parseFieldInitializer(const FieldInfo &Field,
4069 const RealFieldInfo &Contents,
4070 FieldInitializer &Initializer) {
4071 const fltSemantics *Semantics;
4072 switch (Field.Type) {
4073 case 4:
4074 Semantics = &APFloat::IEEEsingle();
4075 break;
4076 case 8:
4077 Semantics = &APFloat::IEEEdouble();
4078 break;
4079 case 10:
4080 Semantics = &APFloat::x87DoubleExtended();
4081 break;
4082 default:
4083 llvm_unreachable("unknown real field type");
4084 }
4085
4086 SMLoc Loc = getTok().getLoc();
4087
4088 SmallVector<APInt, 1> AsIntValues;
4089 if (parseOptionalToken(AsmToken::LCurly)) {
4090 if (Field.LengthOf == 1)
4091 return Error(Loc, "Cannot initialize scalar field with array value");
4092 if (parseRealInstList(*Semantics, AsIntValues, AsmToken::RCurly) ||
4093 parseToken(AsmToken::RCurly))
4094 return true;
4095 } else if (parseOptionalAngleBracketOpen()) {
4096 if (Field.LengthOf == 1)
4097 return Error(Loc, "Cannot initialize scalar field with array value");
4098 if (parseRealInstList(*Semantics, AsIntValues, AsmToken::Greater) ||
4099 parseAngleBracketClose())
4100 return true;
4101 } else if (Field.LengthOf > 1) {
4102 return Error(Loc, "Cannot initialize array field with scalar value");
4103 } else {
4104 AsIntValues.emplace_back();
4105 if (parseRealValue(*Semantics, AsIntValues.back()))
4106 return true;
4107 }
4108
4109 if (AsIntValues.size() > Field.LengthOf) {
4110 return Error(Loc, "Initializer too long for field; expected at most " +
4111 std::to_string(Field.LengthOf) + " elements, got " +
4112 std::to_string(AsIntValues.size()));
4113 }
4114 // Default-initialize all remaining values.
4115 AsIntValues.append(Contents.AsIntValues.begin() + AsIntValues.size(),
4116 Contents.AsIntValues.end());
4117
4118 Initializer = FieldInitializer(std::move(AsIntValues));
4119 return false;
4120}
4121
4122bool MasmParser::parseFieldInitializer(const FieldInfo &Field,
4123 const StructFieldInfo &Contents,
4124 FieldInitializer &Initializer) {
4125 SMLoc Loc = getTok().getLoc();
4126
4127 std::vector<StructInitializer> Initializers;
4128 if (Field.LengthOf > 1) {
4129 if (parseOptionalToken(AsmToken::LCurly)) {
4130 if (parseStructInstList(Contents.Structure, Initializers,
4132 parseToken(AsmToken::RCurly))
4133 return true;
4134 } else if (parseOptionalAngleBracketOpen()) {
4135 if (parseStructInstList(Contents.Structure, Initializers,
4137 parseAngleBracketClose())
4138 return true;
4139 } else {
4140 return Error(Loc, "Cannot initialize array field with scalar value");
4141 }
4142 } else {
4143 Initializers.emplace_back();
4144 if (parseStructInitializer(Contents.Structure, Initializers.back()))
4145 return true;
4146 }
4147
4148 if (Initializers.size() > Field.LengthOf) {
4149 return Error(Loc, "Initializer too long for field; expected at most " +
4150 std::to_string(Field.LengthOf) + " elements, got " +
4151 std::to_string(Initializers.size()));
4152 }
4153 // Default-initialize all remaining values.
4154 Initializers.insert(Initializers.end(),
4155 Contents.Initializers.begin() + Initializers.size(),
4156 Contents.Initializers.end());
4157
4158 Initializer = FieldInitializer(std::move(Initializers), Contents.Structure);
4159 return false;
4160}
4161
4162bool MasmParser::parseFieldInitializer(const FieldInfo &Field,
4163 FieldInitializer &Initializer) {
4164 switch (Field.Contents.FT) {
4165 case FT_INTEGRAL:
4166 return parseFieldInitializer(Field, Field.Contents.IntInfo, Initializer);
4167 case FT_REAL:
4168 return parseFieldInitializer(Field, Field.Contents.RealInfo, Initializer);
4169 case FT_STRUCT:
4170 return parseFieldInitializer(Field, Field.Contents.StructInfo, Initializer);
4171 }
4172 llvm_unreachable("Unhandled FieldType enum");
4173}
4174
4175bool MasmParser::parseStructInitializer(const StructInfo &Structure,
4176 StructInitializer &Initializer) {
4177 const AsmToken FirstToken = getTok();
4178
4179 std::optional<AsmToken::TokenKind> EndToken;
4180 if (parseOptionalToken(AsmToken::LCurly)) {
4181 EndToken = AsmToken::RCurly;
4182 } else if (parseOptionalAngleBracketOpen()) {
4183 EndToken = AsmToken::Greater;
4184 AngleBracketDepth++;
4185 } else if (FirstToken.is(AsmToken::Identifier) &&
4186 FirstToken.getString() == "?") {
4187 // ? initializer; leave EndToken uninitialized to treat as empty.
4188 if (parseToken(AsmToken::Identifier))
4189 return true;
4190 } else {
4191 return Error(FirstToken.getLoc(), "Expected struct initializer");
4192 }
4193
4194 auto &FieldInitializers = Initializer.FieldInitializers;
4195 size_t FieldIndex = 0;
4196 if (EndToken) {
4197 // Initialize all fields with given initializers.
4198 while (getTok().isNot(*EndToken) && FieldIndex < Structure.Fields.size()) {
4199 const FieldInfo &Field = Structure.Fields[FieldIndex++];
4200 if (parseOptionalToken(AsmToken::Comma)) {
4201 // Empty initializer; use the default and continue. (Also, allow line
4202 // continuation.)
4203 FieldInitializers.push_back(Field.Contents);
4204 parseOptionalToken(AsmToken::EndOfStatement);
4205 continue;
4206 }
4207 FieldInitializers.emplace_back(Field.Contents.FT);
4208 if (parseFieldInitializer(Field, FieldInitializers.back()))
4209 return true;
4210
4211 // Continue if we see a comma. (Also, allow line continuation.)
4212 SMLoc CommaLoc = getTok().getLoc();
4213 if (!parseOptionalToken(AsmToken::Comma))
4214 break;
4215 if (FieldIndex == Structure.Fields.size())
4216 return Error(CommaLoc, "'" + Structure.Name +
4217 "' initializer initializes too many fields");
4218 parseOptionalToken(AsmToken::EndOfStatement);
4219 }
4220 }
4221 // Default-initialize all remaining fields.
4222 for (const FieldInfo &Field : llvm::drop_begin(Structure.Fields, FieldIndex))
4223 FieldInitializers.push_back(Field.Contents);
4224
4225 if (EndToken) {
4226 if (*EndToken == AsmToken::Greater)
4227 return parseAngleBracketClose();
4228
4229 return parseToken(*EndToken);
4230 }
4231
4232 return false;
4233}
4234
4235bool MasmParser::parseStructInstList(
4236 const StructInfo &Structure, std::vector<StructInitializer> &Initializers,
4237 const AsmToken::TokenKind EndToken) {
4238 while (getTok().isNot(EndToken) ||
4239 (EndToken == AsmToken::Greater &&
4240 getTok().isNot(AsmToken::GreaterGreater))) {
4241 const AsmToken NextTok = peekTok();
4242 if (NextTok.is(AsmToken::Identifier) &&
4243 NextTok.getString().equals_insensitive("dup")) {
4244 const MCExpr *Value;
4245 if (parseExpression(Value) || parseToken(AsmToken::Identifier))
4246 return true;
4247 const MCConstantExpr *MCE = dyn_cast<MCConstantExpr>(Value);
4248 if (!MCE)
4249 return Error(Value->getLoc(),
4250 "cannot repeat value a non-constant number of times");
4251 const int64_t Repetitions = MCE->getValue();
4252 if (Repetitions < 0)
4253 return Error(Value->getLoc(),
4254 "cannot repeat value a negative number of times");
4255
4256 std::vector<StructInitializer> DuplicatedValues;
4257 if (parseToken(AsmToken::LParen,
4258 "parentheses required for 'dup' contents") ||
4259 parseStructInstList(Structure, DuplicatedValues) || parseRParen())
4260 return true;
4261
4262 for (int i = 0; i < Repetitions; ++i)
4263 llvm::append_range(Initializers, DuplicatedValues);
4264 } else {
4265 Initializers.emplace_back();
4266 if (parseStructInitializer(Structure, Initializers.back()))
4267 return true;
4268 }
4269
4270 // Continue if we see a comma. (Also, allow line continuation.)
4271 if (!parseOptionalToken(AsmToken::Comma))
4272 break;
4273 parseOptionalToken(AsmToken::EndOfStatement);
4274 }
4275
4276 return false;
4277}
4278
4279bool MasmParser::emitFieldValue(const FieldInfo &Field,
4280 const IntFieldInfo &Contents) {
4281 // Default-initialize all values.
4282 for (const MCExpr *Value : Contents.Values) {
4283 if (emitIntValue(Value, Field.Type))
4284 return true;
4285 }
4286 return false;
4287}
4288
4289bool MasmParser::emitFieldValue(const FieldInfo &Field,
4290 const RealFieldInfo &Contents) {
4291 for (const APInt &AsInt : Contents.AsIntValues) {
4292 getStreamer().emitIntValue(AsInt.getLimitedValue(),
4293 AsInt.getBitWidth() / 8);
4294 }
4295 return false;
4296}
4297
4298bool MasmParser::emitFieldValue(const FieldInfo &Field,
4299 const StructFieldInfo &Contents) {
4300 for (const auto &Initializer : Contents.Initializers) {
4301 size_t Index = 0, Offset = 0;
4302 for (const auto &SubField : Contents.Structure.Fields) {
4303 getStreamer().emitZeros(SubField.Offset - Offset);
4304 Offset = SubField.Offset + SubField.SizeOf;
4305 emitFieldInitializer(SubField, Initializer.FieldInitializers[Index++]);
4306 }
4307 }
4308 return false;
4309}
4310
4311bool MasmParser::emitFieldValue(const FieldInfo &Field) {
4312 switch (Field.Contents.FT) {
4313 case FT_INTEGRAL:
4314 return emitFieldValue(Field, Field.Contents.IntInfo);
4315 case FT_REAL:
4316 return emitFieldValue(Field, Field.Contents.RealInfo);
4317 case FT_STRUCT:
4318 return emitFieldValue(Field, Field.Contents.StructInfo);
4319 }
4320 llvm_unreachable("Unhandled FieldType enum");
4321}
4322
4323bool MasmParser::emitFieldInitializer(const FieldInfo &Field,
4324 const IntFieldInfo &Contents,
4325 const IntFieldInfo &Initializer) {
4326 for (const auto &Value : Initializer.Values) {
4327 if (emitIntValue(Value, Field.Type))
4328 return true;
4329 }
4330 // Default-initialize all remaining values.
4331 for (const auto &Value :
4332 llvm::drop_begin(Contents.Values, Initializer.Values.size())) {
4333 if (emitIntValue(Value, Field.Type))
4334 return true;
4335 }
4336 return false;
4337}
4338
4339bool MasmParser::emitFieldInitializer(const FieldInfo &Field,
4340 const RealFieldInfo &Contents,
4341 const RealFieldInfo &Initializer) {
4342 for (const auto &AsInt : Initializer.AsIntValues) {
4343 getStreamer().emitIntValue(AsInt.getLimitedValue(),
4344 AsInt.getBitWidth() / 8);
4345 }
4346 // Default-initialize all remaining values.
4347 for (const auto &AsInt :
4348 llvm::drop_begin(Contents.AsIntValues, Initializer.AsIntValues.size())) {
4349 getStreamer().emitIntValue(AsInt.getLimitedValue(),
4350 AsInt.getBitWidth() / 8);
4351 }
4352 return false;
4353}
4354
4355bool MasmParser::emitFieldInitializer(const FieldInfo &Field,
4356 const StructFieldInfo &Contents,
4357 const StructFieldInfo &Initializer) {
4358 for (const auto &Init : Initializer.Initializers) {
4359 if (emitStructInitializer(Contents.Structure, Init))
4360 return true;
4361 }
4362 // Default-initialize all remaining values.
4363 for (const auto &Init : llvm::drop_begin(Contents.Initializers,
4364 Initializer.Initializers.size())) {
4365 if (emitStructInitializer(Contents.Structure, Init))
4366 return true;
4367 }
4368 return false;
4369}
4370
4371bool MasmParser::emitFieldInitializer(const FieldInfo &Field,
4372 const FieldInitializer &Initializer) {
4373 switch (Field.Contents.FT) {
4374 case FT_INTEGRAL:
4375 return emitFieldInitializer(Field, Field.Contents.IntInfo,
4376 Initializer.IntInfo);
4377 case FT_REAL:
4378 return emitFieldInitializer(Field, Field.Contents.RealInfo,
4379 Initializer.RealInfo);
4380 case FT_STRUCT:
4381 return emitFieldInitializer(Field, Field.Contents.StructInfo,
4382 Initializer.StructInfo);
4383 }
4384 llvm_unreachable("Unhandled FieldType enum");
4385}
4386
4387bool MasmParser::emitStructInitializer(const StructInfo &Structure,
4388 const StructInitializer &Initializer) {
4389 if (!Structure.Initializable)
4390 return Error(getLexer().getLoc(),
4391 "cannot initialize a value of type '" + Structure.Name +
4392 "'; 'org' was used in the type's declaration");
4393 size_t Index = 0, Offset = 0;
4394 for (const auto &Init : Initializer.FieldInitializers) {
4395 const auto &Field = Structure.Fields[Index++];
4396 getStreamer().emitZeros(Field.Offset - Offset);
4397 Offset = Field.Offset + Field.SizeOf;
4398 if (emitFieldInitializer(Field, Init))
4399 return true;
4400 }
4401 // Default-initialize all remaining fields.
4402 for (const auto &Field : llvm::drop_begin(
4403 Structure.Fields, Initializer.FieldInitializers.size())) {
4404 getStreamer().emitZeros(Field.Offset - Offset);
4405 Offset = Field.Offset + Field.SizeOf;
4406 if (emitFieldValue(Field))
4407 return true;
4408 }
4409 // Add final padding.
4410 if (Offset != Structure.Size)
4411 getStreamer().emitZeros(Structure.Size - Offset);
4412 return false;
4413}
4414
4415// Set data values from initializers.
4416bool MasmParser::emitStructValues(const StructInfo &Structure,
4417 unsigned *Count) {
4418 std::vector<StructInitializer> Initializers;
4419 if (parseStructInstList(Structure, Initializers))
4420 return true;
4421
4422 for (const auto &Initializer : Initializers) {
4423 if (emitStructInitializer(Structure, Initializer))
4424 return true;
4425 }
4426
4427 if (Count)
4428 *Count = Initializers.size();
4429 return false;
4430}
4431
4432// Declare a field in the current struct.
4433bool MasmParser::addStructField(StringRef Name, const StructInfo &Structure) {
4434 StructInfo &OwningStruct = StructInProgress.back();
4435 FieldInfo &Field =
4436 OwningStruct.addField(Name, FT_STRUCT, Structure.AlignmentSize);
4437 StructFieldInfo &StructInfo = Field.Contents.StructInfo;
4438
4439 StructInfo.Structure = Structure;
4440 Field.Type = Structure.Size;
4441
4442 if (parseStructInstList(Structure, StructInfo.Initializers))
4443 return true;
4444
4445 Field.LengthOf = StructInfo.Initializers.size();
4446 Field.SizeOf = Field.Type * Field.LengthOf;
4447
4448 const unsigned FieldEnd = Field.Offset + Field.SizeOf;
4449 if (!OwningStruct.IsUnion) {
4450 OwningStruct.NextOffset = FieldEnd;
4451 }
4452 OwningStruct.Size = std::max(OwningStruct.Size, FieldEnd);
4453
4454 return false;
4455}
4456
4457/// parseDirectiveStructValue
4458/// ::= struct-id (<struct-initializer> | {struct-initializer})
4459/// [, (<struct-initializer> | {struct-initializer})]*
4460bool MasmParser::parseDirectiveStructValue(const StructInfo &Structure,
4461 StringRef Directive, SMLoc DirLoc) {
4462 if (StructInProgress.empty()) {
4463 if (emitStructValues(Structure))
4464 return true;
4465 } else if (addStructField("", Structure)) {
4466 return addErrorSuffix(" in '" + Twine(Directive) + "' directive");
4467 }
4468
4469 return false;
4470}
4471
4472/// parseDirectiveNamedValue
4473/// ::= name (byte | word | ... ) [ expression (, expression)* ]
4474bool MasmParser::parseDirectiveNamedStructValue(const StructInfo &Structure,
4476 SMLoc DirLoc, StringRef Name) {
4477 if (StructInProgress.empty()) {
4478 // Initialize named data value.
4479 MCSymbol *Sym = getContext().getOrCreateSymbol(Name);
4480 getStreamer().emitLabel(Sym);
4481 unsigned Count;
4482 if (emitStructValues(Structure, &Count))
4483 return true;
4485 Type.Name = Structure.Name;
4486 Type.Size = Structure.Size * Count;
4487 Type.ElementSize = Structure.Size;
4488 Type.Length = Count;
4489 KnownType[Name.lower()] = Type;
4490 } else if (addStructField(Name, Structure)) {
4491 return addErrorSuffix(" in '" + Twine(Directive) + "' directive");
4492 }
4493
4494 return false;
4495}
4496
4497/// parseDirectiveStruct
4498/// ::= <name> (STRUC | STRUCT | UNION) [fieldAlign] [, NONUNIQUE]
4499/// (dataDir | generalDir | offsetDir | nestedStruct)+
4500/// <name> ENDS
4501////// dataDir = data declaration
4502////// offsetDir = EVEN, ORG, ALIGN
4503bool MasmParser::parseDirectiveStruct(StringRef Directive,
4504 DirectiveKind DirKind, StringRef Name,
4505 SMLoc NameLoc) {
4506 // We ignore NONUNIQUE; we do not support OPTION M510 or OPTION OLDSTRUCTS
4507 // anyway, so all field accesses must be qualified.
4508 AsmToken NextTok = getTok();
4509 int64_t AlignmentValue = 1;
4510 if (NextTok.isNot(AsmToken::Comma) &&
4512 parseAbsoluteExpression(AlignmentValue)) {
4513 return addErrorSuffix(" in alignment value for '" + Twine(Directive) +
4514 "' directive");
4515 }
4516 if (!isPowerOf2_64(AlignmentValue)) {
4517 return Error(NextTok.getLoc(), "alignment must be a power of two; was " +
4518 std::to_string(AlignmentValue));
4519 }
4520
4522 SMLoc QualifierLoc;
4523 if (parseOptionalToken(AsmToken::Comma)) {
4524 QualifierLoc = getTok().getLoc();
4525 if (parseIdentifier(Qualifier))
4526 return addErrorSuffix(" in '" + Twine(Directive) + "' directive");
4527 if (!Qualifier.equals_insensitive("nonunique"))
4528 return Error(QualifierLoc, "Unrecognized qualifier for '" +
4529 Twine(Directive) +
4530 "' directive; expected none or NONUNIQUE");
4531 }
4532
4533 if (parseEOL())
4534 return addErrorSuffix(" in '" + Twine(Directive) + "' directive");
4535
4536 StructInProgress.emplace_back(Name, DirKind == DK_UNION, AlignmentValue);
4537 return false;
4538}
4539
4540/// parseDirectiveNestedStruct
4541/// ::= (STRUC | STRUCT | UNION) [name]
4542/// (dataDir | generalDir | offsetDir | nestedStruct)+
4543/// ENDS
4544bool MasmParser::parseDirectiveNestedStruct(StringRef Directive,
4545 DirectiveKind DirKind) {
4546 if (StructInProgress.empty())
4547 return TokError("missing name in top-level '" + Twine(Directive) +
4548 "' directive");
4549
4551 if (getTok().is(AsmToken::Identifier)) {
4552 Name = getTok().getIdentifier();
4553 parseToken(AsmToken::Identifier);
4554 }
4555 if (parseEOL())
4556 return addErrorSuffix(" in '" + Twine(Directive) + "' directive");
4557
4558 // Reserve space to ensure Alignment doesn't get invalidated when
4559 // StructInProgress grows.
4560 StructInProgress.reserve(StructInProgress.size() + 1);
4561 StructInProgress.emplace_back(Name, DirKind == DK_UNION,
4562 StructInProgress.back().Alignment);
4563 return false;
4564}
4565
4566bool MasmParser::parseDirectiveEnds(StringRef Name, SMLoc NameLoc) {
4567 if (StructInProgress.empty())
4568 return Error(NameLoc, "ENDS directive without matching STRUC/STRUCT/UNION");
4569 if (StructInProgress.size() > 1)
4570 return Error(NameLoc, "unexpected name in nested ENDS directive");
4571 if (StructInProgress.back().Name.compare_insensitive(Name))
4572 return Error(NameLoc, "mismatched name in ENDS directive; expected '" +
4573 StructInProgress.back().Name + "'");
4574 StructInfo Structure = StructInProgress.pop_back_val();
4575 // Pad to make the structure's size divisible by the smaller of its alignment
4576 // and the size of its largest field.
4577 Structure.Size = llvm::alignTo(
4578 Structure.Size, std::min(Structure.Alignment, Structure.AlignmentSize));
4579 Structs[Name.lower()] = Structure;
4580
4581 if (parseEOL())
4582 return addErrorSuffix(" in ENDS directive");
4583
4584 return false;
4585}
4586
4587bool MasmParser::parseDirectiveNestedEnds() {
4588 if (StructInProgress.empty())
4589 return TokError("ENDS directive without matching STRUC/STRUCT/UNION");
4590 if (StructInProgress.size() == 1)
4591 return TokError("missing name in top-level ENDS directive");
4592
4593 if (parseEOL())
4594 return addErrorSuffix(" in nested ENDS directive");
4595
4596 StructInfo Structure = StructInProgress.pop_back_val();
4597 // Pad to make the structure's size divisible by its alignment.
4598 Structure.Size = llvm::alignTo(Structure.Size, Structure.Alignment);
4599
4600 StructInfo &ParentStruct = StructInProgress.back();
4601 if (Structure.Name.empty()) {
4602 // Anonymous substructures' fields are addressed as if they belong to the
4603 // parent structure - so we transfer them to the parent here.
4604 const size_t OldFields = ParentStruct.Fields.size();
4605 ParentStruct.Fields.insert(
4606 ParentStruct.Fields.end(),
4607 std::make_move_iterator(Structure.Fields.begin()),
4608 std::make_move_iterator(Structure.Fields.end()));
4609 for (const auto &FieldByName : Structure.FieldsByName) {
4610 ParentStruct.FieldsByName[FieldByName.getKey()] =
4611 FieldByName.getValue() + OldFields;
4612 }
4613
4614 unsigned FirstFieldOffset = 0;
4615 if (!Structure.Fields.empty() && !ParentStruct.IsUnion) {
4616 FirstFieldOffset = llvm::alignTo(
4617 ParentStruct.NextOffset,
4618 std::min(ParentStruct.Alignment, Structure.AlignmentSize));
4619 }
4620
4621 if (ParentStruct.IsUnion) {
4622 ParentStruct.Size = std::max(ParentStruct.Size, Structure.Size);
4623 } else {
4624 for (auto &Field : llvm::drop_begin(ParentStruct.Fields, OldFields))
4625 Field.Offset += FirstFieldOffset;
4626
4627 const unsigned StructureEnd = FirstFieldOffset + Structure.Size;
4628 if (!ParentStruct.IsUnion) {
4629 ParentStruct.NextOffset = StructureEnd;
4630 }
4631 ParentStruct.Size = std::max(ParentStruct.Size, StructureEnd);
4632 }
4633 } else {
4634 FieldInfo &Field = ParentStruct.addField(Structure.Name, FT_STRUCT,
4635 Structure.AlignmentSize);
4636 StructFieldInfo &StructInfo = Field.Contents.StructInfo;
4637 Field.Type = Structure.Size;
4638 Field.LengthOf = 1;
4639 Field.SizeOf = Structure.Size;
4640
4641 const unsigned StructureEnd = Field.Offset + Field.SizeOf;
4642 if (!ParentStruct.IsUnion) {
4643 ParentStruct.NextOffset = StructureEnd;
4644 }
4645 ParentStruct.Size = std::max(ParentStruct.Size, StructureEnd);
4646
4647 StructInfo.Structure = Structure;
4648 StructInfo.Initializers.emplace_back();
4649 auto &FieldInitializers = StructInfo.Initializers.back().FieldInitializers;
4650 for (const auto &SubField : Structure.Fields) {
4651 FieldInitializers.push_back(SubField.Contents);
4652 }
4653 }
4654
4655 return false;
4656}
4657
4658/// parseDirectiveOrg
4659/// ::= org expression
4660bool MasmParser::parseDirectiveOrg() {
4661 const MCExpr *Offset;
4662 SMLoc OffsetLoc = Lexer.getLoc();
4663 if (checkForValidSection() || parseExpression(Offset))
4664 return true;
4665 if (parseEOL())
4666 return addErrorSuffix(" in 'org' directive");
4667
4668 if (StructInProgress.empty()) {
4669 // Not in a struct; change the offset for the next instruction or data
4670 if (checkForValidSection())
4671 return addErrorSuffix(" in 'org' directive");
4672
4673 getStreamer().emitValueToOffset(Offset, 0, OffsetLoc);
4674 } else {
4675 // Offset the next field of this struct
4676 StructInfo &Structure = StructInProgress.back();
4677 int64_t OffsetRes;
4678 if (!Offset->evaluateAsAbsolute(OffsetRes, getStreamer().getAssemblerPtr()))
4679 return Error(OffsetLoc,
4680 "expected absolute expression in 'org' directive");
4681 if (OffsetRes < 0)
4682 return Error(
4683 OffsetLoc,
4684 "expected non-negative value in struct's 'org' directive; was " +
4685 std::to_string(OffsetRes));
4686 Structure.NextOffset = static_cast<unsigned>(OffsetRes);
4687
4688 // ORG-affected structures cannot be initialized
4689 Structure.Initializable = false;
4690 }
4691
4692 return false;
4693}
4694
4695bool MasmParser::emitAlignTo(int64_t Alignment) {
4696 if (StructInProgress.empty()) {
4697 // Not in a struct; align the next instruction or data
4698 if (checkForValidSection())
4699 return true;
4700
4701 // Check whether we should use optimal code alignment for this align
4702 // directive.
4703 const MCSection *Section = getStreamer().getCurrentSectionOnly();
4704 assert(Section && "must have section to emit alignment");
4705 if (Section->useCodeAlign()) {
4706 getStreamer().emitCodeAlignment(Align(Alignment),
4707 &getTargetParser().getSTI(),
4708 /*MaxBytesToEmit=*/0);
4709 } else {
4710 // FIXME: Target specific behavior about how the "extra" bytes are filled.
4711 getStreamer().emitValueToAlignment(Align(Alignment), /*Value=*/0,
4712 /*ValueSize=*/1,
4713 /*MaxBytesToEmit=*/0);
4714 }
4715 } else {
4716 // Align the next field of this struct
4717 StructInfo &Structure = StructInProgress.back();
4718 Structure.NextOffset = llvm::alignTo(Structure.NextOffset, Alignment);
4719 }
4720
4721 return false;
4722}
4723
4724/// parseDirectiveAlign
4725/// ::= align expression
4726bool MasmParser::parseDirectiveAlign() {
4727 SMLoc AlignmentLoc = getLexer().getLoc();
4728 int64_t Alignment;
4729
4730 // Ignore empty 'align' directives.
4731 if (getTok().is(AsmToken::EndOfStatement)) {
4732 return Warning(AlignmentLoc,
4733 "align directive with no operand is ignored") &&
4734 parseEOL();
4735 }
4736 if (parseAbsoluteExpression(Alignment) || parseEOL())
4737 return addErrorSuffix(" in align directive");
4738
4739 // Always emit an alignment here even if we throw an error.
4740 bool ReturnVal = false;
4741
4742 // Reject alignments that aren't either a power of two or zero, for ML.exe
4743 // compatibility. Alignment of zero is silently rounded up to one.
4744 if (Alignment == 0)
4745 Alignment = 1;
4746 if (!isPowerOf2_64(Alignment))
4747 ReturnVal |= Error(AlignmentLoc, "alignment must be a power of 2; was " +
4748 std::to_string(Alignment));
4749
4750 if (emitAlignTo(Alignment))
4751 ReturnVal |= addErrorSuffix(" in align directive");
4752
4753 return ReturnVal;
4754}
4755
4756/// parseDirectiveEven
4757/// ::= even
4758bool MasmParser::parseDirectiveEven() {
4759 if (parseEOL() || emitAlignTo(2))
4760 return addErrorSuffix(" in even directive");
4761
4762 return false;
4763}
4764
4765/// parseDirectiveFile
4766/// ::= .file filename
4767/// ::= .file number [directory] filename [md5 checksum] [source source-text]
4768bool MasmParser::parseDirectiveFile(SMLoc DirectiveLoc) {
4769 // FIXME: I'm not sure what this is.
4770 int64_t FileNumber = -1;
4771 if (getLexer().is(AsmToken::Integer)) {
4772 FileNumber = getTok().getIntVal();
4773 Lex();
4774
4775 if (FileNumber < 0)
4776 return TokError("negative file number");
4777 }
4778
4779 std::string Path;
4780
4781 // Usually the directory and filename together, otherwise just the directory.
4782 // Allow the strings to have escaped octal character sequence.
4783 if (check(getTok().isNot(AsmToken::String),
4784 "unexpected token in '.file' directive") ||
4785 parseEscapedString(Path))
4786 return true;
4787
4788 StringRef Directory;
4790 std::string FilenameData;
4791 if (getLexer().is(AsmToken::String)) {
4792 if (check(FileNumber == -1,
4793 "explicit path specified, but no file number") ||
4794 parseEscapedString(FilenameData))
4795 return true;
4796 Filename = FilenameData;
4797 Directory = Path;
4798 } else {
4799 Filename = Path;
4800 }
4801
4802 uint64_t MD5Hi, MD5Lo;
4803 bool HasMD5 = false;
4804
4805 std::optional<StringRef> Source;
4806 bool HasSource = false;
4807 std::string SourceString;
4808
4809 while (!parseOptionalToken(AsmToken::EndOfStatement)) {
4811 if (check(getTok().isNot(AsmToken::Identifier),
4812 "unexpected token in '.file' directive") ||
4813 parseIdentifier(Keyword))
4814 return true;
4815 if (Keyword == "md5") {
4816 HasMD5 = true;
4817 if (check(FileNumber == -1,
4818 "MD5 checksum specified, but no file number") ||
4819 parseHexOcta(*this, MD5Hi, MD5Lo))
4820 return true;
4821 } else if (Keyword == "source") {
4822 HasSource = true;
4823 if (check(FileNumber == -1,
4824 "source specified, but no file number") ||
4825 check(getTok().isNot(AsmToken::String),
4826 "unexpected token in '.file' directive") ||
4827 parseEscapedString(SourceString))
4828 return true;
4829 } else {
4830 return TokError("unexpected token in '.file' directive");
4831 }
4832 }
4833
4834 if (FileNumber == -1) {
4835 // Ignore the directive if there is no number and the target doesn't support
4836 // numberless .file directives. This allows some portability of assembler
4837 // between different object file formats.
4838 if (getContext().getAsmInfo()->hasSingleParameterDotFile())
4839 getStreamer().emitFileDirective(Filename);
4840 } else {
4841 // In case there is a -g option as well as debug info from directive .file,
4842 // we turn off the -g option, directly use the existing debug info instead.
4843 // Throw away any implicit file table for the assembler source.
4844 if (Ctx.getGenDwarfForAssembly()) {
4846 Ctx.setGenDwarfForAssembly(false);
4847 }
4848
4849 std::optional<MD5::MD5Result> CKMem;
4850 if (HasMD5) {
4851 MD5::MD5Result Sum;
4852 for (unsigned i = 0; i != 8; ++i) {
4853 Sum[i] = uint8_t(MD5Hi >> ((7 - i) * 8));
4854 Sum[i + 8] = uint8_t(MD5Lo >> ((7 - i) * 8));
4855 }
4856 CKMem = Sum;
4857 }
4858 if (HasSource) {
4859 char *SourceBuf = static_cast<char *>(Ctx.allocate(SourceString.size()));
4860 memcpy(SourceBuf, SourceString.data(), SourceString.size());
4861 Source = StringRef(SourceBuf, SourceString.size());
4862 }
4863 if (FileNumber == 0) {
4864 if (Ctx.getDwarfVersion() < 5)
4865 return Warning(DirectiveLoc, "file 0 not supported prior to DWARF-5");
4866 getStreamer().emitDwarfFile0Directive(Directory, Filename, CKMem, Source);
4867 } else {
4868 Expected<unsigned> FileNumOrErr = getStreamer().tryEmitDwarfFileDirective(
4869 FileNumber, Directory, Filename, CKMem, Source);
4870 if (!FileNumOrErr)
4871 return Error(DirectiveLoc, toString(FileNumOrErr.takeError()));
4872 }
4873 // Alert the user if there are some .file directives with MD5 and some not.
4874 // But only do that once.
4875 if (!ReportedInconsistentMD5 && !Ctx.isDwarfMD5UsageConsistent(0)) {
4876 ReportedInconsistentMD5 = true;
4877 return Warning(DirectiveLoc, "inconsistent use of MD5 checksums");
4878 }
4879 }
4880
4881 return false;
4882}
4883
4884/// parseDirectiveLine
4885/// ::= .line [number]
4886bool MasmParser::parseDirectiveLine() {
4887 int64_t LineNumber;
4888 if (getLexer().is(AsmToken::Integer)) {
4889 if (parseIntToken(LineNumber, "unexpected token in '.line' directive"))
4890 return true;
4891 (void)LineNumber;
4892 // FIXME: Do something with the .line.
4893 }
4894 if (parseEOL())
4895 return true;
4896
4897 return false;
4898}
4899
4900/// parseDirectiveLoc
4901/// ::= .loc FileNumber [LineNumber] [ColumnPos] [basic_block] [prologue_end]
4902/// [epilogue_begin] [is_stmt VALUE] [isa VALUE]
4903/// The first number is a file number, must have been previously assigned with
4904/// a .file directive, the second number is the line number and optionally the
4905/// third number is a column position (zero if not specified). The remaining
4906/// optional items are .loc sub-directives.
4907bool MasmParser::parseDirectiveLoc() {
4908 int64_t FileNumber = 0, LineNumber = 0;
4909 SMLoc Loc = getTok().getLoc();
4910 if (parseIntToken(FileNumber, "unexpected token in '.loc' directive") ||
4911 check(FileNumber < 1 && Ctx.getDwarfVersion() < 5, Loc,
4912 "file number less than one in '.loc' directive") ||
4913 check(!getContext().isValidDwarfFileNumber(FileNumber), Loc,
4914 "unassigned file number in '.loc' directive"))
4915 return true;
4916
4917 // optional
4918 if (getLexer().is(AsmToken::Integer)) {
4919 LineNumber = getTok().getIntVal();
4920 if (LineNumber < 0)
4921 return TokError("line number less than zero in '.loc' directive");
4922 Lex();
4923 }
4924
4925 int64_t ColumnPos = 0;
4926 if (getLexer().is(AsmToken::Integer)) {
4927 ColumnPos = getTok().getIntVal();
4928 if (ColumnPos < 0)
4929 return TokError("column position less than zero in '.loc' directive");
4930 Lex();
4931 }
4932
4933 auto PrevFlags = getContext().getCurrentDwarfLoc().getFlags();
4934 unsigned Flags = PrevFlags & DWARF2_FLAG_IS_STMT;
4935 unsigned Isa = 0;
4936 int64_t Discriminator = 0;
4937
4938 auto parseLocOp = [&]() -> bool {
4940 SMLoc Loc = getTok().getLoc();
4941 if (parseIdentifier(Name))
4942 return TokError("unexpected token in '.loc' directive");
4943
4944 if (Name == "basic_block")
4946 else if (Name == "prologue_end")
4948 else if (Name == "epilogue_begin")
4950 else if (Name == "is_stmt") {
4951 Loc = getTok().getLoc();
4952 const MCExpr *Value;
4953 if (parseExpression(Value))
4954 return true;
4955 // The expression must be the constant 0 or 1.
4956 if (const MCConstantExpr *MCE = dyn_cast<MCConstantExpr>(Value)) {
4957 int Value = MCE->getValue();
4958 if (Value == 0)
4959 Flags &= ~DWARF2_FLAG_IS_STMT;
4960 else if (Value == 1)
4962 else
4963 return Error(Loc, "is_stmt value not 0 or 1");
4964 } else {
4965 return Error(Loc, "is_stmt value not the constant value of 0 or 1");
4966 }
4967 } else if (Name == "isa") {
4968 Loc = getTok().getLoc();
4969 const MCExpr *Value;
4970 if (parseExpression(Value))
4971 return true;
4972 // The expression must be a constant greater or equal to 0.
4973 if (const MCConstantExpr *MCE = dyn_cast<MCConstantExpr>(Value)) {
4974 int Value = MCE->getValue();
4975 if (Value < 0)
4976 return Error(Loc, "isa number less than zero");
4977 Isa = Value;
4978 } else {
4979 return Error(Loc, "isa number not a constant value");
4980 }
4981 } else if (Name == "discriminator") {
4982 if (parseAbsoluteExpression(Discriminator))
4983 return true;
4984 } else {
4985 return Error(Loc, "unknown sub-directive in '.loc' directive");
4986 }
4987 return false;
4988 };
4989
4990 if (parseMany(parseLocOp, false /*hasComma*/))
4991 return true;
4992
4993 getStreamer().emitDwarfLocDirective(FileNumber, LineNumber, ColumnPos, Flags,
4994 Isa, Discriminator, StringRef());
4995
4996 return false;
4997}
4998
4999/// parseDirectiveStabs
5000/// ::= .stabs string, number, number, number
5001bool MasmParser::parseDirectiveStabs() {
5002 return TokError("unsupported directive '.stabs'");
5003}
5004
5005/// parseDirectiveCVFile
5006/// ::= .cv_file number filename [checksum] [checksumkind]
5007bool MasmParser::parseDirectiveCVFile() {
5008 SMLoc FileNumberLoc = getTok().getLoc();
5009 int64_t FileNumber;
5010 std::string Filename;
5011 std::string Checksum;
5012 int64_t ChecksumKind = 0;
5013
5014 if (parseIntToken(FileNumber,
5015 "expected file number in '.cv_file' directive") ||
5016 check(FileNumber < 1, FileNumberLoc, "file number less than one") ||
5017 check(getTok().isNot(AsmToken::String),
5018 "unexpected token in '.cv_file' directive") ||
5019 parseEscapedString(Filename))
5020 return true;
5021 if (!parseOptionalToken(AsmToken::EndOfStatement)) {
5022 if (check(getTok().isNot(AsmToken::String),
5023 "unexpected token in '.cv_file' directive") ||
5024 parseEscapedString(Checksum) ||
5025 parseIntToken(ChecksumKind,
5026 "expected checksum kind in '.cv_file' directive") ||
5027 parseEOL())
5028 return true;
5029 }
5030
5031 Checksum = fromHex(Checksum);
5032 void *CKMem = Ctx.allocate(Checksum.size(), 1);
5033 memcpy(CKMem, Checksum.data(), Checksum.size());
5034 ArrayRef<uint8_t> ChecksumAsBytes(reinterpret_cast<const uint8_t *>(CKMem),
5035 Checksum.size());
5036
5037 if (!getStreamer().emitCVFileDirective(FileNumber, Filename, ChecksumAsBytes,
5038 static_cast<uint8_t>(ChecksumKind)))
5039 return Error(FileNumberLoc, "file number already allocated");
5040
5041 return false;
5042}
5043
5044bool MasmParser::parseCVFunctionId(int64_t &FunctionId,
5045 StringRef DirectiveName) {
5046 SMLoc Loc;
5047 return parseTokenLoc(Loc) ||
5048 parseIntToken(FunctionId, "expected function id in '" + DirectiveName +
5049 "' directive") ||
5050 check(FunctionId < 0 || FunctionId >= UINT_MAX, Loc,
5051 "expected function id within range [0, UINT_MAX)");
5052}
5053
5054bool MasmParser::parseCVFileId(int64_t &FileNumber, StringRef DirectiveName) {
5055 SMLoc Loc;
5056 return parseTokenLoc(Loc) ||
5057 parseIntToken(FileNumber, "expected integer in '" + DirectiveName +
5058 "' directive") ||
5059 check(FileNumber < 1, Loc, "file number less than one in '" +
5060 DirectiveName + "' directive") ||
5061 check(!getCVContext().isValidFileNumber(FileNumber), Loc,
5062 "unassigned file number in '" + DirectiveName + "' directive");
5063}
5064
5065/// parseDirectiveCVFuncId
5066/// ::= .cv_func_id FunctionId
5067///
5068/// Introduces a function ID that can be used with .cv_loc.
5069bool MasmParser::parseDirectiveCVFuncId() {
5070 SMLoc FunctionIdLoc = getTok().getLoc();
5071 int64_t FunctionId;
5072
5073 if (parseCVFunctionId(FunctionId, ".cv_func_id") || parseEOL())
5074 return true;
5075
5076 if (!getStreamer().emitCVFuncIdDirective(FunctionId))
5077 return Error(FunctionIdLoc, "function id already allocated");
5078
5079 return false;
5080}
5081
5082/// parseDirectiveCVInlineSiteId
5083/// ::= .cv_inline_site_id FunctionId
5084/// "within" IAFunc
5085/// "inlined_at" IAFile IALine [IACol]
5086///
5087/// Introduces a function ID that can be used with .cv_loc. Includes "inlined
5088/// at" source location information for use in the line table of the caller,
5089/// whether the caller is a real function or another inlined call site.
5090bool MasmParser::parseDirectiveCVInlineSiteId() {
5091 SMLoc FunctionIdLoc = getTok().getLoc();
5092 int64_t FunctionId;
5093 int64_t IAFunc;
5094 int64_t IAFile;
5095 int64_t IALine;
5096 int64_t IACol = 0;
5097
5098 // FunctionId
5099 if (parseCVFunctionId(FunctionId, ".cv_inline_site_id"))
5100 return true;
5101
5102 // "within"
5103 if (check((getLexer().isNot(AsmToken::Identifier) ||
5104 getTok().getIdentifier() != "within"),
5105 "expected 'within' identifier in '.cv_inline_site_id' directive"))
5106 return true;
5107 Lex();
5108
5109 // IAFunc
5110 if (parseCVFunctionId(IAFunc, ".cv_inline_site_id"))
5111 return true;
5112
5113 // "inlined_at"
5114 if (check((getLexer().isNot(AsmToken::Identifier) ||
5115 getTok().getIdentifier() != "inlined_at"),
5116 "expected 'inlined_at' identifier in '.cv_inline_site_id' "
5117 "directive") )
5118 return true;
5119 Lex();
5120
5121 // IAFile IALine
5122 if (parseCVFileId(IAFile, ".cv_inline_site_id") ||
5123 parseIntToken(IALine, "expected line number after 'inlined_at'"))
5124 return true;
5125
5126 // [IACol]
5127 if (getLexer().is(AsmToken::Integer)) {
5128 IACol = getTok().getIntVal();
5129 Lex();
5130 }
5131
5132 if (parseEOL())
5133 return true;
5134
5135 if (!getStreamer().emitCVInlineSiteIdDirective(FunctionId, IAFunc, IAFile,
5136 IALine, IACol, FunctionIdLoc))
5137 return Error(FunctionIdLoc, "function id already allocated");
5138
5139 return false;
5140}
5141
5142/// parseDirectiveCVLoc
5143/// ::= .cv_loc FunctionId FileNumber [LineNumber] [ColumnPos] [prologue_end]
5144/// [is_stmt VALUE]
5145/// The first number is a file number, must have been previously assigned with
5146/// a .file directive, the second number is the line number and optionally the
5147/// third number is a column position (zero if not specified). The remaining
5148/// optional items are .loc sub-directives.
5149bool MasmParser::parseDirectiveCVLoc() {
5150 SMLoc DirectiveLoc = getTok().getLoc();
5151 int64_t FunctionId, FileNumber;
5152 if (parseCVFunctionId(FunctionId, ".cv_loc") ||
5153 parseCVFileId(FileNumber, ".cv_loc"))
5154 return true;
5155
5156 int64_t LineNumber = 0;
5157 if (getLexer().is(AsmToken::Integer)) {
5158 LineNumber = getTok().getIntVal();
5159 if (LineNumber < 0)
5160 return TokError("line number less than zero in '.cv_loc' directive");
5161 Lex();
5162 }
5163
5164 int64_t ColumnPos = 0;
5165 if (getLexer().is(AsmToken::Integer)) {
5166 ColumnPos = getTok().getIntVal();
5167 if (ColumnPos < 0)
5168 return TokError("column position less than zero in '.cv_loc' directive");
5169 Lex();
5170 }
5171
5172 bool PrologueEnd = false;
5173 uint64_t IsStmt = 0;
5174
5175 auto parseOp = [&]() -> bool {
5177 SMLoc Loc = getTok().getLoc();
5178 if (parseIdentifier(Name))
5179 return TokError("unexpected token in '.cv_loc' directive");
5180 if (Name == "prologue_end")
5181 PrologueEnd = true;
5182 else if (Name == "is_stmt") {
5183 Loc = getTok().getLoc();
5184 const MCExpr *Value;
5185 if (parseExpression(Value))
5186 return true;
5187 // The expression must be the constant 0 or 1.
5188 IsStmt = ~0ULL;
5189 if (const auto *MCE = dyn_cast<MCConstantExpr>(Value))
5190 IsStmt = MCE->getValue();
5191
5192 if (IsStmt > 1)
5193 return Error(Loc, "is_stmt value not 0 or 1");
5194 } else {
5195 return Error(Loc, "unknown sub-directive in '.cv_loc' directive");
5196 }
5197 return false;
5198 };
5199
5200 if (parseMany(parseOp, false /*hasComma*/))
5201 return true;
5202
5203 getStreamer().emitCVLocDirective(FunctionId, FileNumber, LineNumber,
5204 ColumnPos, PrologueEnd, IsStmt, StringRef(),
5205 DirectiveLoc);
5206 return false;
5207}
5208
5209/// parseDirectiveCVLinetable
5210/// ::= .cv_linetable FunctionId, FnStart, FnEnd
5211bool MasmParser::parseDirectiveCVLinetable() {
5212 int64_t FunctionId;
5213 StringRef FnStartName, FnEndName;
5214 SMLoc Loc = getTok().getLoc();
5215 if (parseCVFunctionId(FunctionId, ".cv_linetable") ||
5216 parseToken(AsmToken::Comma,
5217 "unexpected token in '.cv_linetable' directive") ||
5218 parseTokenLoc(Loc) || check(parseIdentifier(FnStartName), Loc,
5219 "expected identifier in directive") ||
5220 parseToken(AsmToken::Comma,
5221 "unexpected token in '.cv_linetable' directive") ||
5222 parseTokenLoc(Loc) || check(parseIdentifier(FnEndName), Loc,
5223 "expected identifier in directive"))
5224 return true;
5225
5226 MCSymbol *FnStartSym = getContext().getOrCreateSymbol(FnStartName);
5227 MCSymbol *FnEndSym = getContext().getOrCreateSymbol(FnEndName);
5228
5229 getStreamer().emitCVLinetableDirective(FunctionId, FnStartSym, FnEndSym);
5230 return false;
5231}
5232
5233/// parseDirectiveCVInlineLinetable
5234/// ::= .cv_inline_linetable PrimaryFunctionId FileId LineNum FnStart FnEnd
5235bool MasmParser::parseDirectiveCVInlineLinetable() {
5236 int64_t PrimaryFunctionId, SourceFileId, SourceLineNum;
5237 StringRef FnStartName, FnEndName;
5238 SMLoc Loc = getTok().getLoc();
5239 if (parseCVFunctionId(PrimaryFunctionId, ".cv_inline_linetable") ||
5240 parseTokenLoc(Loc) ||
5241 parseIntToken(
5242 SourceFileId,
5243 "expected SourceField in '.cv_inline_linetable' directive") ||
5244 check(SourceFileId <= 0, Loc,
5245 "File id less than zero in '.cv_inline_linetable' directive") ||
5246 parseTokenLoc(Loc) ||
5247 parseIntToken(
5248 SourceLineNum,
5249 "expected SourceLineNum in '.cv_inline_linetable' directive") ||
5250 check(SourceLineNum < 0, Loc,
5251 "Line number less than zero in '.cv_inline_linetable' directive") ||
5252 parseTokenLoc(Loc) || check(parseIdentifier(FnStartName), Loc,
5253 "expected identifier in directive") ||
5254 parseTokenLoc(Loc) || check(parseIdentifier(FnEndName), Loc,
5255 "expected identifier in directive"))
5256 return true;
5257
5258 if (parseEOL())
5259 return true;
5260
5261 MCSymbol *FnStartSym = getContext().getOrCreateSymbol(FnStartName);
5262 MCSymbol *FnEndSym = getContext().getOrCreateSymbol(FnEndName);
5263 getStreamer().emitCVInlineLinetableDirective(PrimaryFunctionId, SourceFileId,
5264 SourceLineNum, FnStartSym,
5265 FnEndSym);
5266 return false;
5267}
5268
5269void MasmParser::initializeCVDefRangeTypeMap() {
5270 CVDefRangeTypeMap["reg"] = CVDR_DEFRANGE_REGISTER;
5271 CVDefRangeTypeMap["frame_ptr_rel"] = CVDR_DEFRANGE_FRAMEPOINTER_REL;
5272 CVDefRangeTypeMap["subfield_reg"] = CVDR_DEFRANGE_SUBFIELD_REGISTER;
5273 CVDefRangeTypeMap["reg_rel"] = CVDR_DEFRANGE_REGISTER_REL;
5274}
5275
5276/// parseDirectiveCVDefRange
5277/// ::= .cv_def_range RangeStart RangeEnd (GapStart GapEnd)*, bytes*
5278bool MasmParser::parseDirectiveCVDefRange() {
5279 SMLoc Loc;
5280 std::vector<std::pair<const MCSymbol *, const MCSymbol *>> Ranges;
5281 while (getLexer().is(AsmToken::Identifier)) {
5282 Loc = getLexer().getLoc();
5283 StringRef GapStartName;
5284 if (parseIdentifier(GapStartName))
5285 return Error(Loc, "expected identifier in directive");
5286 MCSymbol *GapStartSym = getContext().getOrCreateSymbol(GapStartName);
5287
5288 Loc = getLexer().getLoc();
5289 StringRef GapEndName;
5290 if (parseIdentifier(GapEndName))
5291 return Error(Loc, "expected identifier in directive");
5292 MCSymbol *GapEndSym = getContext().getOrCreateSymbol(GapEndName);
5293
5294 Ranges.push_back({GapStartSym, GapEndSym});
5295 }
5296
5297 StringRef CVDefRangeTypeStr;
5298 if (parseToken(
5300 "expected comma before def_range type in .cv_def_range directive") ||
5301 parseIdentifier(CVDefRangeTypeStr))
5302 return Error(Loc, "expected def_range type in directive");
5303
5305 CVDefRangeTypeMap.find(CVDefRangeTypeStr);
5306 CVDefRangeType CVDRType = (CVTypeIt == CVDefRangeTypeMap.end())
5307 ? CVDR_DEFRANGE
5308 : CVTypeIt->getValue();
5309 switch (CVDRType) {
5310 case CVDR_DEFRANGE_REGISTER: {
5311 int64_t DRRegister;
5312 if (parseToken(AsmToken::Comma, "expected comma before register number in "
5313 ".cv_def_range directive") ||
5314 parseAbsoluteExpression(DRRegister))
5315 return Error(Loc, "expected register number");
5316
5318 DRHdr.Register = DRRegister;
5319 DRHdr.MayHaveNoName = 0;
5320 getStreamer().emitCVDefRangeDirective(Ranges, DRHdr);
5321 break;
5322 }
5323 case CVDR_DEFRANGE_FRAMEPOINTER_REL: {
5324 int64_t DROffset;
5325 if (parseToken(AsmToken::Comma,
5326 "expected comma before offset in .cv_def_range directive") ||
5327 parseAbsoluteExpression(DROffset))
5328 return Error(Loc, "expected offset value");
5329
5331 DRHdr.Offset = DROffset;
5332 getStreamer().emitCVDefRangeDirective(Ranges, DRHdr);
5333 break;
5334 }
5335 case CVDR_DEFRANGE_SUBFIELD_REGISTER: {
5336 int64_t DRRegister;
5337 int64_t DROffsetInParent;
5338 if (parseToken(AsmToken::Comma, "expected comma before register number in "
5339 ".cv_def_range directive") ||
5340 parseAbsoluteExpression(DRRegister))
5341 return Error(Loc, "expected register number");
5342 if (parseToken(AsmToken::Comma,
5343 "expected comma before offset in .cv_def_range directive") ||
5344 parseAbsoluteExpression(DROffsetInParent))
5345 return Error(Loc, "expected offset value");
5346
5348 DRHdr.Register = DRRegister;
5349 DRHdr.MayHaveNoName = 0;
5350 DRHdr.OffsetInParent = DROffsetInParent;
5351 getStreamer().emitCVDefRangeDirective(Ranges, DRHdr);
5352 break;
5353 }
5354 case CVDR_DEFRANGE_REGISTER_REL: {
5355 int64_t DRRegister;
5356 int64_t DRFlags;
5357 int64_t DRBasePointerOffset;
5358 if (parseToken(AsmToken::Comma, "expected comma before register number in "
5359 ".cv_def_range directive") ||
5360 parseAbsoluteExpression(DRRegister))
5361 return Error(Loc, "expected register value");
5362 if (parseToken(
5364 "expected comma before flag value in .cv_def_range directive") ||
5365 parseAbsoluteExpression(DRFlags))
5366 return Error(Loc, "expected flag value");
5367 if (parseToken(AsmToken::Comma, "expected comma before base pointer offset "
5368 "in .cv_def_range directive") ||
5369 parseAbsoluteExpression(DRBasePointerOffset))
5370 return Error(Loc, "expected base pointer offset value");
5371
5373 DRHdr.Register = DRRegister;
5374 DRHdr.Flags = DRFlags;
5375 DRHdr.BasePointerOffset = DRBasePointerOffset;
5376 getStreamer().emitCVDefRangeDirective(Ranges, DRHdr);
5377 break;
5378 }
5379 default:
5380 return Error(Loc, "unexpected def_range type in .cv_def_range directive");
5381 }
5382 return true;
5383}
5384
5385/// parseDirectiveCVString
5386/// ::= .cv_stringtable "string"
5387bool MasmParser::parseDirectiveCVString() {
5388 std::string Data;
5389 if (checkForValidSection() || parseEscapedString(Data))
5390 return addErrorSuffix(" in '.cv_string' directive");
5391
5392 // Put the string in the table and emit the offset.
5393 std::pair<StringRef, unsigned> Insertion =
5394 getCVContext().addToStringTable(Data);
5395 getStreamer().emitIntValue(Insertion.second, 4);
5396 return false;
5397}
5398
5399/// parseDirectiveCVStringTable
5400/// ::= .cv_stringtable
5401bool MasmParser::parseDirectiveCVStringTable() {
5402 getStreamer().emitCVStringTableDirective();
5403 return false;
5404}
5405
5406/// parseDirectiveCVFileChecksums
5407/// ::= .cv_filechecksums
5408bool MasmParser::parseDirectiveCVFileChecksums() {
5409 getStreamer().emitCVFileChecksumsDirective();
5410 return false;
5411}
5412
5413/// parseDirectiveCVFileChecksumOffset
5414/// ::= .cv_filechecksumoffset fileno
5415bool MasmParser::parseDirectiveCVFileChecksumOffset() {
5416 int64_t FileNo;
5417 if (parseIntToken(FileNo, "expected identifier in directive"))
5418 return true;
5419 if (parseEOL())
5420 return true;
5421 getStreamer().emitCVFileChecksumOffsetDirective(FileNo);
5422 return false;
5423}
5424
5425/// parseDirectiveCVFPOData
5426/// ::= .cv_fpo_data procsym
5427bool MasmParser::parseDirectiveCVFPOData() {
5428 SMLoc DirLoc = getLexer().getLoc();
5429 StringRef ProcName;
5430 if (parseIdentifier(ProcName))
5431 return TokError("expected symbol name");
5432 if (parseEOL("unexpected tokens"))
5433 return addErrorSuffix(" in '.cv_fpo_data' directive");
5434 MCSymbol *ProcSym = getContext().getOrCreateSymbol(ProcName);
5435 getStreamer().emitCVFPOData(ProcSym, DirLoc);
5436 return false;
5437}
5438
5439/// parseDirectiveCFISections
5440/// ::= .cfi_sections section [, section]
5441bool MasmParser::parseDirectiveCFISections() {
5443 bool EH = false;
5444 bool Debug = false;
5445
5446 if (parseIdentifier(Name))
5447 return TokError("Expected an identifier");
5448
5449 if (Name == ".eh_frame")
5450 EH = true;
5451 else if (Name == ".debug_frame")
5452 Debug = true;
5453
5454 if (getLexer().is(AsmToken::Comma)) {
5455 Lex();