LLVM  14.0.0git
MasmParser.cpp
Go to the documentation of this file.
1 //===- AsmParser.cpp - Parser for Assembly Files --------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This class implements the parser for assembly files.
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #include "llvm/ADT/APFloat.h"
14 #include "llvm/ADT/APInt.h"
15 #include "llvm/ADT/ArrayRef.h"
16 #include "llvm/ADT/DenseMap.h"
17 #include "llvm/ADT/None.h"
18 #include "llvm/ADT/Optional.h"
19 #include "llvm/ADT/STLExtras.h"
20 #include "llvm/ADT/SmallString.h"
21 #include "llvm/ADT/SmallVector.h"
22 #include "llvm/ADT/StringExtras.h"
23 #include "llvm/ADT/StringMap.h"
24 #include "llvm/ADT/StringRef.h"
25 #include "llvm/ADT/StringSwitch.h"
26 #include "llvm/ADT/Twine.h"
29 #include "llvm/MC/MCAsmInfo.h"
30 #include "llvm/MC/MCCodeView.h"
31 #include "llvm/MC/MCContext.h"
32 #include "llvm/MC/MCDirectives.h"
33 #include "llvm/MC/MCDwarf.h"
34 #include "llvm/MC/MCExpr.h"
35 #include "llvm/MC/MCInstPrinter.h"
36 #include "llvm/MC/MCInstrDesc.h"
37 #include "llvm/MC/MCInstrInfo.h"
47 #include "llvm/MC/MCRegisterInfo.h"
48 #include "llvm/MC/MCSection.h"
49 #include "llvm/MC/MCStreamer.h"
50 #include "llvm/MC/MCSymbol.h"
52 #include "llvm/MC/MCValue.h"
53 #include "llvm/Support/Casting.h"
56 #include "llvm/Support/Format.h"
57 #include "llvm/Support/MD5.h"
60 #include "llvm/Support/Path.h"
61 #include "llvm/Support/SMLoc.h"
62 #include "llvm/Support/SourceMgr.h"
64 #include <algorithm>
65 #include <cassert>
66 #include <cctype>
67 #include <climits>
68 #include <cstddef>
69 #include <cstdint>
70 #include <ctime>
71 #include <deque>
72 #include <memory>
73 #include <sstream>
74 #include <string>
75 #include <tuple>
76 #include <utility>
77 #include <vector>
78 
79 using namespace llvm;
80 
82 
83 namespace {
84 
85 /// Helper types for tracking macro definitions.
86 typedef std::vector<AsmToken> MCAsmMacroArgument;
87 typedef std::vector<MCAsmMacroArgument> MCAsmMacroArguments;
88 
89 /// Helper class for storing information about an active macro instantiation.
90 struct MacroInstantiation {
91  /// The location of the instantiation.
92  SMLoc InstantiationLoc;
93 
94  /// The buffer where parsing should resume upon instantiation completion.
95  unsigned ExitBuffer;
96 
97  /// The location where parsing should resume upon instantiation completion.
98  SMLoc ExitLoc;
99 
100  /// The depth of TheCondStack at the start of the instantiation.
101  size_t CondStackDepth;
102 };
103 
104 struct ParseStatementInfo {
105  /// The parsed operands from the last parsed statement.
107 
108  /// The opcode from the last parsed instruction.
109  unsigned Opcode = ~0U;
110 
111  /// Was there an error parsing the inline assembly?
112  bool ParseError = false;
113 
114  /// The value associated with a macro exit.
115  Optional<std::string> ExitValue;
116 
117  SmallVectorImpl<AsmRewrite> *AsmRewrites = nullptr;
118 
119  ParseStatementInfo() = delete;
120  ParseStatementInfo(SmallVectorImpl<AsmRewrite> *rewrites)
121  : AsmRewrites(rewrites) {}
122 };
123 
124 enum FieldType {
125  FT_INTEGRAL, // Initializer: integer expression, stored as an MCExpr.
126  FT_REAL, // Initializer: real number, stored as an APInt.
127  FT_STRUCT // Initializer: struct initializer, stored recursively.
128 };
129 
130 struct FieldInfo;
131 struct StructInfo {
132  StringRef Name;
133  bool IsUnion = false;
134  bool Initializable = true;
135  unsigned Alignment = 0;
136  unsigned AlignmentSize = 0;
137  unsigned NextOffset = 0;
138  unsigned Size = 0;
139  std::vector<FieldInfo> Fields;
140  StringMap<size_t> FieldsByName;
141 
142  FieldInfo &addField(StringRef FieldName, FieldType FT,
143  unsigned FieldAlignmentSize);
144 
145  StructInfo() = default;
146 
147  StructInfo(StringRef StructName, bool Union, unsigned AlignmentValue)
148  : Name(StructName), IsUnion(Union), Alignment(AlignmentValue) {}
149 };
150 
151 // FIXME: This should probably use a class hierarchy, raw pointers between the
152 // objects, and dynamic type resolution instead of a union. On the other hand,
153 // ownership then becomes much more complicated; the obvious thing would be to
154 // use BumpPtrAllocator, but the lack of a destructor makes that messy.
155 
156 struct StructInitializer;
157 struct IntFieldInfo {
159 
160  IntFieldInfo() = default;
161  IntFieldInfo(const SmallVector<const MCExpr *, 1> &V) { Values = V; }
162  IntFieldInfo(SmallVector<const MCExpr *, 1> &&V) { Values = V; }
163 };
164 struct RealFieldInfo {
165  SmallVector<APInt, 1> AsIntValues;
166 
167  RealFieldInfo() = default;
168  RealFieldInfo(const SmallVector<APInt, 1> &V) { AsIntValues = V; }
169  RealFieldInfo(SmallVector<APInt, 1> &&V) { AsIntValues = V; }
170 };
171 struct StructFieldInfo {
172  std::vector<StructInitializer> Initializers;
173  StructInfo Structure;
174 
175  StructFieldInfo() = default;
176  StructFieldInfo(const std::vector<StructInitializer> &V, StructInfo S) {
177  Initializers = V;
178  Structure = S;
179  }
180  StructFieldInfo(std::vector<StructInitializer> &&V, StructInfo S) {
181  Initializers = V;
182  Structure = S;
183  }
184 };
185 
186 class FieldInitializer {
187 public:
188  FieldType FT;
189  union {
190  IntFieldInfo IntInfo;
191  RealFieldInfo RealInfo;
192  StructFieldInfo StructInfo;
193  };
194 
195  ~FieldInitializer() {
196  switch (FT) {
197  case FT_INTEGRAL:
198  IntInfo.~IntFieldInfo();
199  break;
200  case FT_REAL:
201  RealInfo.~RealFieldInfo();
202  break;
203  case FT_STRUCT:
204  StructInfo.~StructFieldInfo();
205  break;
206  }
207  }
208 
209  FieldInitializer(FieldType FT) : FT(FT) {
210  switch (FT) {
211  case FT_INTEGRAL:
212  new (&IntInfo) IntFieldInfo();
213  break;
214  case FT_REAL:
215  new (&RealInfo) RealFieldInfo();
216  break;
217  case FT_STRUCT:
218  new (&StructInfo) StructFieldInfo();
219  break;
220  }
221  }
222 
223  FieldInitializer(SmallVector<const MCExpr *, 1> &&Values) : FT(FT_INTEGRAL) {
224  new (&IntInfo) IntFieldInfo(Values);
225  }
226 
227  FieldInitializer(SmallVector<APInt, 1> &&AsIntValues) : FT(FT_REAL) {
228  new (&RealInfo) RealFieldInfo(AsIntValues);
229  }
230 
231  FieldInitializer(std::vector<StructInitializer> &&Initializers,
232  struct StructInfo Structure)
233  : FT(FT_STRUCT) {
234  new (&StructInfo) StructFieldInfo(Initializers, Structure);
235  }
236 
237  FieldInitializer(const FieldInitializer &Initializer) : FT(Initializer.FT) {
238  switch (FT) {
239  case FT_INTEGRAL:
240  new (&IntInfo) IntFieldInfo(Initializer.IntInfo);
241  break;
242  case FT_REAL:
243  new (&RealInfo) RealFieldInfo(Initializer.RealInfo);
244  break;
245  case FT_STRUCT:
246  new (&StructInfo) StructFieldInfo(Initializer.StructInfo);
247  break;
248  }
249  }
250 
251  FieldInitializer(FieldInitializer &&Initializer) : FT(Initializer.FT) {
252  switch (FT) {
253  case FT_INTEGRAL:
254  new (&IntInfo) IntFieldInfo(Initializer.IntInfo);
255  break;
256  case FT_REAL:
257  new (&RealInfo) RealFieldInfo(Initializer.RealInfo);
258  break;
259  case FT_STRUCT:
260  new (&StructInfo) StructFieldInfo(Initializer.StructInfo);
261  break;
262  }
263  }
264 
265  FieldInitializer &operator=(const FieldInitializer &Initializer) {
266  if (FT != Initializer.FT) {
267  switch (FT) {
268  case FT_INTEGRAL:
269  IntInfo.~IntFieldInfo();
270  break;
271  case FT_REAL:
272  RealInfo.~RealFieldInfo();
273  break;
274  case FT_STRUCT:
275  StructInfo.~StructFieldInfo();
276  break;
277  }
278  }
279  FT = Initializer.FT;
280  switch (FT) {
281  case FT_INTEGRAL:
282  IntInfo = Initializer.IntInfo;
283  break;
284  case FT_REAL:
285  RealInfo = Initializer.RealInfo;
286  break;
287  case FT_STRUCT:
288  StructInfo = Initializer.StructInfo;
289  break;
290  }
291  return *this;
292  }
293 
294  FieldInitializer &operator=(FieldInitializer &&Initializer) {
295  if (FT != Initializer.FT) {
296  switch (FT) {
297  case FT_INTEGRAL:
298  IntInfo.~IntFieldInfo();
299  break;
300  case FT_REAL:
301  RealInfo.~RealFieldInfo();
302  break;
303  case FT_STRUCT:
304  StructInfo.~StructFieldInfo();
305  break;
306  }
307  }
308  FT = Initializer.FT;
309  switch (FT) {
310  case FT_INTEGRAL:
311  IntInfo = Initializer.IntInfo;
312  break;
313  case FT_REAL:
314  RealInfo = Initializer.RealInfo;
315  break;
316  case FT_STRUCT:
317  StructInfo = Initializer.StructInfo;
318  break;
319  }
320  return *this;
321  }
322 };
323 
324 struct StructInitializer {
325  std::vector<FieldInitializer> FieldInitializers;
326 };
327 
328 struct FieldInfo {
329  // Offset of the field within the containing STRUCT.
330  unsigned Offset = 0;
331 
332  // Total size of the field (= LengthOf * Type).
333  unsigned SizeOf = 0;
334 
335  // Number of elements in the field (1 if scalar, >1 if an array).
336  unsigned LengthOf = 0;
337 
338  // Size of a single entry in this field, in bytes ("type" in MASM standards).
339  unsigned Type = 0;
340 
341  FieldInitializer Contents;
342 
343  FieldInfo(FieldType FT) : Contents(FT) {}
344 };
345 
346 FieldInfo &StructInfo::addField(StringRef FieldName, FieldType FT,
347  unsigned FieldAlignmentSize) {
348  if (!FieldName.empty())
349  FieldsByName[FieldName.lower()] = Fields.size();
350  Fields.emplace_back(FT);
351  FieldInfo &Field = Fields.back();
352  Field.Offset =
353  llvm::alignTo(NextOffset, std::min(Alignment, FieldAlignmentSize));
354  if (!IsUnion) {
355  NextOffset = std::max(NextOffset, Field.Offset);
356  }
357  AlignmentSize = std::max(AlignmentSize, FieldAlignmentSize);
358  return Field;
359 }
360 
361 /// The concrete assembly parser instance.
362 // Note that this is a full MCAsmParser, not an MCAsmParserExtension!
363 // It's a peer of AsmParser, not of COFFAsmParser, WasmAsmParser, etc.
364 class MasmParser : public MCAsmParser {
365 private:
366  AsmLexer Lexer;
367  MCContext &Ctx;
368  MCStreamer &Out;
369  const MCAsmInfo &MAI;
370  SourceMgr &SrcMgr;
371  SourceMgr::DiagHandlerTy SavedDiagHandler;
372  void *SavedDiagContext;
373  std::unique_ptr<MCAsmParserExtension> PlatformParser;
374 
375  /// This is the current buffer index we're lexing from as managed by the
376  /// SourceMgr object.
377  unsigned CurBuffer;
378 
379  /// time of assembly
380  struct tm TM;
381 
382  std::vector<bool> EndStatementAtEOFStack;
383 
384  AsmCond TheCondState;
385  std::vector<AsmCond> TheCondStack;
386 
387  /// maps directive names to handler methods in parser
388  /// extensions. Extensions register themselves in this map by calling
389  /// addDirectiveHandler.
390  StringMap<ExtensionDirectiveHandler> ExtensionDirectiveMap;
391 
392  /// maps assembly-time variable names to variables.
393  struct Variable {
394  enum RedefinableKind { NOT_REDEFINABLE, WARN_ON_REDEFINITION, REDEFINABLE };
395 
396  StringRef Name;
397  RedefinableKind Redefinable = REDEFINABLE;
398  bool IsText = false;
399  std::string TextValue;
400  };
401  StringMap<Variable> Variables;
402 
403  /// Stack of active struct definitions.
404  SmallVector<StructInfo, 1> StructInProgress;
405 
406  /// Maps struct tags to struct definitions.
407  StringMap<StructInfo> Structs;
408 
409  /// Maps data location names to types.
410  StringMap<AsmTypeInfo> KnownType;
411 
412  /// Stack of active macro instantiations.
413  std::vector<MacroInstantiation*> ActiveMacros;
414 
415  /// List of bodies of anonymous macros.
416  std::deque<MCAsmMacro> MacroLikeBodies;
417 
418  /// Keeps track of how many .macro's have been instantiated.
419  unsigned NumOfMacroInstantiations;
420 
421  /// The values from the last parsed cpp hash file line comment if any.
422  struct CppHashInfoTy {
423  StringRef Filename;
424  int64_t LineNumber;
425  SMLoc Loc;
426  unsigned Buf;
427  CppHashInfoTy() : Filename(), LineNumber(0), Loc(), Buf(0) {}
428  };
429  CppHashInfoTy CppHashInfo;
430 
431  /// The filename from the first cpp hash file line comment, if any.
432  StringRef FirstCppHashFilename;
433 
434  /// List of forward directional labels for diagnosis at the end.
436 
437  /// AssemblerDialect. ~OU means unset value and use value provided by MAI.
438  /// Defaults to 1U, meaning Intel.
439  unsigned AssemblerDialect = 1U;
440 
441  /// is Darwin compatibility enabled?
442  bool IsDarwin = false;
443 
444  /// Are we parsing ms-style inline assembly?
445  bool ParsingMSInlineAsm = false;
446 
447  /// Did we already inform the user about inconsistent MD5 usage?
448  bool ReportedInconsistentMD5 = false;
449 
450  // Current <...> expression depth.
451  unsigned AngleBracketDepth = 0U;
452 
453  // Number of locals defined.
454  uint16_t LocalCounter = 0;
455 
456 public:
457  MasmParser(SourceMgr &SM, MCContext &Ctx, MCStreamer &Out,
458  const MCAsmInfo &MAI, struct tm TM, unsigned CB = 0);
459  MasmParser(const MasmParser &) = delete;
460  MasmParser &operator=(const MasmParser &) = delete;
461  ~MasmParser() override;
462 
463  bool Run(bool NoInitialTextSection, bool NoFinalize = false) override;
464 
465  void addDirectiveHandler(StringRef Directive,
466  ExtensionDirectiveHandler Handler) override {
467  ExtensionDirectiveMap[Directive] = Handler;
468  if (DirectiveKindMap.find(Directive) == DirectiveKindMap.end()) {
469  DirectiveKindMap[Directive] = DK_HANDLER_DIRECTIVE;
470  }
471  }
472 
473  void addAliasForDirective(StringRef Directive, StringRef Alias) override {
474  DirectiveKindMap[Directive] = DirectiveKindMap[Alias];
475  }
476 
477  /// @name MCAsmParser Interface
478  /// {
479 
480  SourceMgr &getSourceManager() override { return SrcMgr; }
481  MCAsmLexer &getLexer() override { return Lexer; }
482  MCContext &getContext() override { return Ctx; }
483  MCStreamer &getStreamer() override { return Out; }
484 
485  CodeViewContext &getCVContext() { return Ctx.getCVContext(); }
486 
487  unsigned getAssemblerDialect() override {
488  if (AssemblerDialect == ~0U)
489  return MAI.getAssemblerDialect();
490  else
491  return AssemblerDialect;
492  }
493  void setAssemblerDialect(unsigned i) override {
494  AssemblerDialect = i;
495  }
496 
497  void Note(SMLoc L, const Twine &Msg, SMRange Range = None) override;
498  bool Warning(SMLoc L, const Twine &Msg, SMRange Range = None) override;
499  bool printError(SMLoc L, const Twine &Msg, SMRange Range = None) override;
500 
501  enum ExpandKind { ExpandMacros, DoNotExpandMacros };
502  const AsmToken &Lex(ExpandKind ExpandNextToken);
503  const AsmToken &Lex() override { return Lex(ExpandMacros); }
504 
505  void setParsingMSInlineAsm(bool V) override {
506  ParsingMSInlineAsm = V;
507  // When parsing MS inline asm, we must lex 0b1101 and 0ABCH as binary and
508  // hex integer literals.
509  Lexer.setLexMasmIntegers(V);
510  }
511  bool isParsingMSInlineAsm() override { return ParsingMSInlineAsm; }
512 
513  bool isParsingMasm() const override { return true; }
514 
515  bool defineMacro(StringRef Name, StringRef Value) override;
516 
517  bool lookUpField(StringRef Name, AsmFieldInfo &Info) const override;
518  bool lookUpField(StringRef Base, StringRef Member,
519  AsmFieldInfo &Info) const override;
520 
521  bool lookUpType(StringRef Name, AsmTypeInfo &Info) const override;
522 
523  bool parseMSInlineAsm(std::string &AsmString, unsigned &NumOutputs,
524  unsigned &NumInputs,
525  SmallVectorImpl<std::pair<void *, bool>> &OpDecls,
526  SmallVectorImpl<std::string> &Constraints,
528  const MCInstrInfo *MII, const MCInstPrinter *IP,
529  MCAsmParserSemaCallback &SI) override;
530 
531  bool parseExpression(const MCExpr *&Res);
532  bool parseExpression(const MCExpr *&Res, SMLoc &EndLoc) override;
533  bool parsePrimaryExpr(const MCExpr *&Res, SMLoc &EndLoc,
534  AsmTypeInfo *TypeInfo) override;
535  bool parseParenExpression(const MCExpr *&Res, SMLoc &EndLoc) override;
536  bool parseParenExprOfDepth(unsigned ParenDepth, const MCExpr *&Res,
537  SMLoc &EndLoc) override;
538  bool parseAbsoluteExpression(int64_t &Res) override;
539 
540  /// Parse a floating point expression using the float \p Semantics
541  /// and set \p Res to the value.
542  bool parseRealValue(const fltSemantics &Semantics, APInt &Res);
543 
544  /// Parse an identifier or string (as a quoted identifier)
545  /// and set \p Res to the identifier contents.
546  enum IdentifierPositionKind { StandardPosition, StartOfStatement };
547  bool parseIdentifier(StringRef &Res, IdentifierPositionKind Position);
548  bool parseIdentifier(StringRef &Res) override {
549  return parseIdentifier(Res, StandardPosition);
550  }
551  void eatToEndOfStatement() override;
552 
553  bool checkForValidSection() override;
554 
555  /// }
556 
557 private:
558  bool expandMacros();
559  const AsmToken peekTok(bool ShouldSkipSpace = true);
560 
561  bool parseStatement(ParseStatementInfo &Info,
563  bool parseCurlyBlockScope(SmallVectorImpl<AsmRewrite>& AsmStrRewrites);
564  bool parseCppHashLineFilenameComment(SMLoc L);
565 
566  bool expandMacro(raw_svector_ostream &OS, StringRef Body,
569  const std::vector<std::string> &Locals, SMLoc L);
570 
571  /// Are we inside a macro instantiation?
572  bool isInsideMacroInstantiation() {return !ActiveMacros.empty();}
573 
574  /// Handle entry to macro instantiation.
575  ///
576  /// \param M The macro.
577  /// \param NameLoc Instantiation location.
578  bool handleMacroEntry(
579  const MCAsmMacro *M, SMLoc NameLoc,
581 
582  /// Handle invocation of macro function.
583  ///
584  /// \param M The macro.
585  /// \param NameLoc Invocation location.
586  bool handleMacroInvocation(const MCAsmMacro *M, SMLoc NameLoc);
587 
588  /// Handle exit from macro instantiation.
589  void handleMacroExit();
590 
591  /// Extract AsmTokens for a macro argument.
592  bool
593  parseMacroArgument(const MCAsmMacroParameter *MP, MCAsmMacroArgument &MA,
595 
596  /// Parse all macro arguments for a given macro.
597  bool
598  parseMacroArguments(const MCAsmMacro *M, MCAsmMacroArguments &A,
600 
601  void printMacroInstantiations();
602 
603  bool expandStatement(SMLoc Loc);
604 
605  void printMessage(SMLoc Loc, SourceMgr::DiagKind Kind, const Twine &Msg,
606  SMRange Range = None) const {
607  ArrayRef<SMRange> Ranges(Range);
608  SrcMgr.PrintMessage(Loc, Kind, Msg, Ranges);
609  }
610  static void DiagHandler(const SMDiagnostic &Diag, void *Context);
611 
612  bool lookUpField(const StructInfo &Structure, StringRef Member,
613  AsmFieldInfo &Info) const;
614 
615  /// Should we emit DWARF describing this assembler source? (Returns false if
616  /// the source has .file directives, which means we don't want to generate
617  /// info describing the assembler source itself.)
618  bool enabledGenDwarfForAssembly();
619 
620  /// Enter the specified file. This returns true on failure.
621  bool enterIncludeFile(const std::string &Filename);
622 
623  /// Reset the current lexer position to that given by \p Loc. The
624  /// current token is not set; clients should ensure Lex() is called
625  /// subsequently.
626  ///
627  /// \param InBuffer If not 0, should be the known buffer id that contains the
628  /// location.
629  void jumpToLoc(SMLoc Loc, unsigned InBuffer = 0,
630  bool EndStatementAtEOF = true);
631 
632  /// Parse up to a token of kind \p EndTok and return the contents from the
633  /// current token up to (but not including) this token; the current token on
634  /// exit will be either this kind or EOF. Reads through instantiated macro
635  /// functions and text macros.
636  SmallVector<StringRef, 1> parseStringRefsTo(AsmToken::TokenKind EndTok);
637  std::string parseStringTo(AsmToken::TokenKind EndTok);
638 
639  /// Parse up to the end of statement and return the contents from the current
640  /// token until the end of the statement; the current token on exit will be
641  /// either the EndOfStatement or EOF.
642  StringRef parseStringToEndOfStatement() override;
643 
644  bool parseTextItem(std::string &Data);
645 
646  unsigned getBinOpPrecedence(AsmToken::TokenKind K,
648 
649  bool parseBinOpRHS(unsigned Precedence, const MCExpr *&Res, SMLoc &EndLoc);
650  bool parseParenExpr(const MCExpr *&Res, SMLoc &EndLoc);
651  bool parseBracketExpr(const MCExpr *&Res, SMLoc &EndLoc);
652 
653  bool parseRegisterOrRegisterNumber(int64_t &Register, SMLoc DirectiveLoc);
654 
655  bool parseCVFunctionId(int64_t &FunctionId, StringRef DirectiveName);
656  bool parseCVFileId(int64_t &FileId, StringRef DirectiveName);
657 
658  // Generic (target and platform independent) directive parsing.
659  enum DirectiveKind {
660  DK_NO_DIRECTIVE, // Placeholder
661  DK_HANDLER_DIRECTIVE,
662  DK_ASSIGN,
663  DK_EQU,
664  DK_TEXTEQU,
665  DK_ASCII,
666  DK_ASCIZ,
667  DK_STRING,
668  DK_BYTE,
669  DK_SBYTE,
670  DK_WORD,
671  DK_SWORD,
672  DK_DWORD,
673  DK_SDWORD,
674  DK_FWORD,
675  DK_QWORD,
676  DK_SQWORD,
677  DK_DB,
678  DK_DD,
679  DK_DF,
680  DK_DQ,
681  DK_DW,
682  DK_REAL4,
683  DK_REAL8,
684  DK_REAL10,
685  DK_ALIGN,
686  DK_EVEN,
687  DK_ORG,
688  DK_ENDR,
689  DK_EXTERN,
690  DK_PUBLIC,
691  DK_COMM,
692  DK_COMMENT,
693  DK_INCLUDE,
694  DK_REPEAT,
695  DK_WHILE,
696  DK_FOR,
697  DK_FORC,
698  DK_IF,
699  DK_IFE,
700  DK_IFB,
701  DK_IFNB,
702  DK_IFDEF,
703  DK_IFNDEF,
704  DK_IFDIF,
705  DK_IFDIFI,
706  DK_IFIDN,
707  DK_IFIDNI,
708  DK_ELSEIF,
709  DK_ELSEIFE,
710  DK_ELSEIFB,
711  DK_ELSEIFNB,
712  DK_ELSEIFDEF,
713  DK_ELSEIFNDEF,
714  DK_ELSEIFDIF,
715  DK_ELSEIFDIFI,
716  DK_ELSEIFIDN,
717  DK_ELSEIFIDNI,
718  DK_ELSE,
719  DK_ENDIF,
720  DK_FILE,
721  DK_LINE,
722  DK_LOC,
723  DK_STABS,
724  DK_CV_FILE,
725  DK_CV_FUNC_ID,
726  DK_CV_INLINE_SITE_ID,
727  DK_CV_LOC,
728  DK_CV_LINETABLE,
729  DK_CV_INLINE_LINETABLE,
730  DK_CV_DEF_RANGE,
731  DK_CV_STRINGTABLE,
732  DK_CV_STRING,
733  DK_CV_FILECHECKSUMS,
734  DK_CV_FILECHECKSUM_OFFSET,
735  DK_CV_FPO_DATA,
736  DK_CFI_SECTIONS,
737  DK_CFI_STARTPROC,
738  DK_CFI_ENDPROC,
739  DK_CFI_DEF_CFA,
740  DK_CFI_DEF_CFA_OFFSET,
741  DK_CFI_ADJUST_CFA_OFFSET,
742  DK_CFI_DEF_CFA_REGISTER,
743  DK_CFI_OFFSET,
744  DK_CFI_REL_OFFSET,
745  DK_CFI_PERSONALITY,
746  DK_CFI_LSDA,
747  DK_CFI_REMEMBER_STATE,
748  DK_CFI_RESTORE_STATE,
749  DK_CFI_SAME_VALUE,
750  DK_CFI_RESTORE,
751  DK_CFI_ESCAPE,
752  DK_CFI_RETURN_COLUMN,
753  DK_CFI_SIGNAL_FRAME,
754  DK_CFI_UNDEFINED,
755  DK_CFI_REGISTER,
756  DK_CFI_WINDOW_SAVE,
757  DK_CFI_B_KEY_FRAME,
758  DK_MACRO,
759  DK_EXITM,
760  DK_ENDM,
761  DK_PURGE,
762  DK_ERR,
763  DK_ERRB,
764  DK_ERRNB,
765  DK_ERRDEF,
766  DK_ERRNDEF,
767  DK_ERRDIF,
768  DK_ERRDIFI,
769  DK_ERRIDN,
770  DK_ERRIDNI,
771  DK_ERRE,
772  DK_ERRNZ,
773  DK_ECHO,
774  DK_STRUCT,
775  DK_UNION,
776  DK_ENDS,
777  DK_END,
778  DK_PUSHFRAME,
779  DK_PUSHREG,
780  DK_SAVEREG,
781  DK_SAVEXMM128,
782  DK_SETFRAME,
783  DK_RADIX,
784  };
785 
786  /// Maps directive name --> DirectiveKind enum, for directives parsed by this
787  /// class.
788  StringMap<DirectiveKind> DirectiveKindMap;
789 
790  bool isMacroLikeDirective();
791 
792  // Codeview def_range type parsing.
793  enum CVDefRangeType {
794  CVDR_DEFRANGE = 0, // Placeholder
795  CVDR_DEFRANGE_REGISTER,
796  CVDR_DEFRANGE_FRAMEPOINTER_REL,
797  CVDR_DEFRANGE_SUBFIELD_REGISTER,
798  CVDR_DEFRANGE_REGISTER_REL
799  };
800 
801  /// Maps Codeview def_range types --> CVDefRangeType enum, for Codeview
802  /// def_range types parsed by this class.
803  StringMap<CVDefRangeType> CVDefRangeTypeMap;
804 
805  // Generic (target and platform independent) directive parsing.
806  enum BuiltinSymbol {
807  BI_NO_SYMBOL, // Placeholder
808  BI_DATE,
809  BI_TIME,
810  BI_VERSION,
811  BI_FILECUR,
812  BI_FILENAME,
813  BI_LINE,
814  BI_CURSEG,
815  BI_CPU,
816  BI_INTERFACE,
817  BI_CODE,
818  BI_DATA,
819  BI_FARDATA,
820  BI_WORDSIZE,
821  BI_CODESIZE,
822  BI_DATASIZE,
823  BI_MODEL,
824  BI_STACK,
825  };
826 
827  /// Maps builtin name --> BuiltinSymbol enum, for builtins handled by this
828  /// class.
829  StringMap<BuiltinSymbol> BuiltinSymbolMap;
830 
831  const MCExpr *evaluateBuiltinValue(BuiltinSymbol Symbol, SMLoc StartLoc);
832 
833  llvm::Optional<std::string> evaluateBuiltinTextMacro(BuiltinSymbol Symbol,
834  SMLoc StartLoc);
835 
836  // ".ascii", ".asciz", ".string"
837  bool parseDirectiveAscii(StringRef IDVal, bool ZeroTerminated);
838 
839  // "byte", "word", ...
840  bool emitIntValue(const MCExpr *Value, unsigned Size);
841  bool parseScalarInitializer(unsigned Size,
843  unsigned StringPadLength = 0);
844  bool parseScalarInstList(
845  unsigned Size, SmallVectorImpl<const MCExpr *> &Values,
847  bool emitIntegralValues(unsigned Size, unsigned *Count = nullptr);
848  bool addIntegralField(StringRef Name, unsigned Size);
849  bool parseDirectiveValue(StringRef IDVal, unsigned Size);
850  bool parseDirectiveNamedValue(StringRef TypeName, unsigned Size,
851  StringRef Name, SMLoc NameLoc);
852 
853  // "real4", "real8", "real10"
854  bool emitRealValues(const fltSemantics &Semantics, unsigned *Count = nullptr);
855  bool addRealField(StringRef Name, const fltSemantics &Semantics, size_t Size);
856  bool parseDirectiveRealValue(StringRef IDVal, const fltSemantics &Semantics,
857  size_t Size);
858  bool parseRealInstList(
859  const fltSemantics &Semantics, SmallVectorImpl<APInt> &Values,
861  bool parseDirectiveNamedRealValue(StringRef TypeName,
862  const fltSemantics &Semantics,
863  unsigned Size, StringRef Name,
864  SMLoc NameLoc);
865 
866  bool parseOptionalAngleBracketOpen();
867  bool parseAngleBracketClose(const Twine &Msg = "expected '>'");
868 
869  bool parseFieldInitializer(const FieldInfo &Field,
870  FieldInitializer &Initializer);
871  bool parseFieldInitializer(const FieldInfo &Field,
872  const IntFieldInfo &Contents,
873  FieldInitializer &Initializer);
874  bool parseFieldInitializer(const FieldInfo &Field,
875  const RealFieldInfo &Contents,
876  FieldInitializer &Initializer);
877  bool parseFieldInitializer(const FieldInfo &Field,
878  const StructFieldInfo &Contents,
879  FieldInitializer &Initializer);
880 
881  bool parseStructInitializer(const StructInfo &Structure,
882  StructInitializer &Initializer);
883  bool parseStructInstList(
884  const StructInfo &Structure, std::vector<StructInitializer> &Initializers,
886 
887  bool emitFieldValue(const FieldInfo &Field);
888  bool emitFieldValue(const FieldInfo &Field, const IntFieldInfo &Contents);
889  bool emitFieldValue(const FieldInfo &Field, const RealFieldInfo &Contents);
890  bool emitFieldValue(const FieldInfo &Field, const StructFieldInfo &Contents);
891 
892  bool emitFieldInitializer(const FieldInfo &Field,
893  const FieldInitializer &Initializer);
894  bool emitFieldInitializer(const FieldInfo &Field,
895  const IntFieldInfo &Contents,
896  const IntFieldInfo &Initializer);
897  bool emitFieldInitializer(const FieldInfo &Field,
898  const RealFieldInfo &Contents,
899  const RealFieldInfo &Initializer);
900  bool emitFieldInitializer(const FieldInfo &Field,
901  const StructFieldInfo &Contents,
902  const StructFieldInfo &Initializer);
903 
904  bool emitStructInitializer(const StructInfo &Structure,
905  const StructInitializer &Initializer);
906 
907  // User-defined types (structs, unions):
908  bool emitStructValues(const StructInfo &Structure, unsigned *Count = nullptr);
909  bool addStructField(StringRef Name, const StructInfo &Structure);
910  bool parseDirectiveStructValue(const StructInfo &Structure,
911  StringRef Directive, SMLoc DirLoc);
912  bool parseDirectiveNamedStructValue(const StructInfo &Structure,
913  StringRef Directive, SMLoc DirLoc,
914  StringRef Name);
915 
916  // "=", "equ", "textequ"
917  bool parseDirectiveEquate(StringRef IDVal, StringRef Name,
918  DirectiveKind DirKind, SMLoc NameLoc);
919 
920  bool parseDirectiveOrg(); // "org"
921 
922  bool emitAlignTo(int64_t Alignment);
923  bool parseDirectiveAlign(); // "align"
924  bool parseDirectiveEven(); // "even"
925 
926  // ".file", ".line", ".loc", ".stabs"
927  bool parseDirectiveFile(SMLoc DirectiveLoc);
928  bool parseDirectiveLine();
929  bool parseDirectiveLoc();
930  bool parseDirectiveStabs();
931 
932  // ".cv_file", ".cv_func_id", ".cv_inline_site_id", ".cv_loc", ".cv_linetable",
933  // ".cv_inline_linetable", ".cv_def_range", ".cv_string"
934  bool parseDirectiveCVFile();
935  bool parseDirectiveCVFuncId();
936  bool parseDirectiveCVInlineSiteId();
937  bool parseDirectiveCVLoc();
938  bool parseDirectiveCVLinetable();
939  bool parseDirectiveCVInlineLinetable();
940  bool parseDirectiveCVDefRange();
941  bool parseDirectiveCVString();
942  bool parseDirectiveCVStringTable();
943  bool parseDirectiveCVFileChecksums();
944  bool parseDirectiveCVFileChecksumOffset();
945  bool parseDirectiveCVFPOData();
946 
947  // .cfi directives
948  bool parseDirectiveCFIRegister(SMLoc DirectiveLoc);
949  bool parseDirectiveCFIWindowSave();
950  bool parseDirectiveCFISections();
951  bool parseDirectiveCFIStartProc();
952  bool parseDirectiveCFIEndProc();
953  bool parseDirectiveCFIDefCfaOffset();
954  bool parseDirectiveCFIDefCfa(SMLoc DirectiveLoc);
955  bool parseDirectiveCFIAdjustCfaOffset();
956  bool parseDirectiveCFIDefCfaRegister(SMLoc DirectiveLoc);
957  bool parseDirectiveCFIOffset(SMLoc DirectiveLoc);
958  bool parseDirectiveCFIRelOffset(SMLoc DirectiveLoc);
959  bool parseDirectiveCFIPersonalityOrLsda(bool IsPersonality);
960  bool parseDirectiveCFIRememberState();
961  bool parseDirectiveCFIRestoreState();
962  bool parseDirectiveCFISameValue(SMLoc DirectiveLoc);
963  bool parseDirectiveCFIRestore(SMLoc DirectiveLoc);
964  bool parseDirectiveCFIEscape();
965  bool parseDirectiveCFIReturnColumn(SMLoc DirectiveLoc);
966  bool parseDirectiveCFISignalFrame();
967  bool parseDirectiveCFIUndefined(SMLoc DirectiveLoc);
968 
969  // macro directives
970  bool parseDirectivePurgeMacro(SMLoc DirectiveLoc);
971  bool parseDirectiveExitMacro(SMLoc DirectiveLoc, StringRef Directive,
972  std::string &Value);
973  bool parseDirectiveEndMacro(StringRef Directive);
974  bool parseDirectiveMacro(StringRef Name, SMLoc NameLoc);
975 
976  bool parseDirectiveStruct(StringRef Directive, DirectiveKind DirKind,
977  StringRef Name, SMLoc NameLoc);
978  bool parseDirectiveNestedStruct(StringRef Directive, DirectiveKind DirKind);
979  bool parseDirectiveEnds(StringRef Name, SMLoc NameLoc);
980  bool parseDirectiveNestedEnds();
981 
982  /// Parse a directive like ".globl" which accepts a single symbol (which
983  /// should be a label or an external).
984  bool parseDirectiveSymbolAttribute(MCSymbolAttr Attr);
985 
986  bool parseDirectiveComm(bool IsLocal); // ".comm" and ".lcomm"
987 
988  bool parseDirectiveComment(SMLoc DirectiveLoc); // "comment"
989 
990  bool parseDirectiveInclude(); // "include"
991 
992  // "if" or "ife"
993  bool parseDirectiveIf(SMLoc DirectiveLoc, DirectiveKind DirKind);
994  // "ifb" or "ifnb", depending on ExpectBlank.
995  bool parseDirectiveIfb(SMLoc DirectiveLoc, bool ExpectBlank);
996  // "ifidn", "ifdif", "ifidni", or "ifdifi", depending on ExpectEqual and
997  // CaseInsensitive.
998  bool parseDirectiveIfidn(SMLoc DirectiveLoc, bool ExpectEqual,
999  bool CaseInsensitive);
1000  // "ifdef" or "ifndef", depending on expect_defined
1001  bool parseDirectiveIfdef(SMLoc DirectiveLoc, bool expect_defined);
1002  // "elseif" or "elseife"
1003  bool parseDirectiveElseIf(SMLoc DirectiveLoc, DirectiveKind DirKind);
1004  // "elseifb" or "elseifnb", depending on ExpectBlank.
1005  bool parseDirectiveElseIfb(SMLoc DirectiveLoc, bool ExpectBlank);
1006  // ".elseifdef" or ".elseifndef", depending on expect_defined
1007  bool parseDirectiveElseIfdef(SMLoc DirectiveLoc, bool expect_defined);
1008  // "elseifidn", "elseifdif", "elseifidni", or "elseifdifi", depending on
1009  // ExpectEqual and CaseInsensitive.
1010  bool parseDirectiveElseIfidn(SMLoc DirectiveLoc, bool ExpectEqual,
1011  bool CaseInsensitive);
1012  bool parseDirectiveElse(SMLoc DirectiveLoc); // "else"
1013  bool parseDirectiveEndIf(SMLoc DirectiveLoc); // "endif"
1014  bool parseEscapedString(std::string &Data) override;
1015  bool parseAngleBracketString(std::string &Data) override;
1016 
1017  // Macro-like directives
1018  MCAsmMacro *parseMacroLikeBody(SMLoc DirectiveLoc);
1019  void instantiateMacroLikeBody(MCAsmMacro *M, SMLoc DirectiveLoc,
1020  raw_svector_ostream &OS);
1021  void instantiateMacroLikeBody(MCAsmMacro *M, SMLoc DirectiveLoc,
1022  SMLoc ExitLoc, raw_svector_ostream &OS);
1023  bool parseDirectiveRepeat(SMLoc DirectiveLoc, StringRef Directive);
1024  bool parseDirectiveFor(SMLoc DirectiveLoc, StringRef Directive);
1025  bool parseDirectiveForc(SMLoc DirectiveLoc, StringRef Directive);
1026  bool parseDirectiveWhile(SMLoc DirectiveLoc);
1027 
1028  // "_emit" or "__emit"
1029  bool parseDirectiveMSEmit(SMLoc DirectiveLoc, ParseStatementInfo &Info,
1030  size_t Len);
1031 
1032  // "align"
1033  bool parseDirectiveMSAlign(SMLoc DirectiveLoc, ParseStatementInfo &Info);
1034 
1035  // "end"
1036  bool parseDirectiveEnd(SMLoc DirectiveLoc);
1037 
1038  // ".err"
1039  bool parseDirectiveError(SMLoc DirectiveLoc);
1040  // ".errb" or ".errnb", depending on ExpectBlank.
1041  bool parseDirectiveErrorIfb(SMLoc DirectiveLoc, bool ExpectBlank);
1042  // ".errdef" or ".errndef", depending on ExpectBlank.
1043  bool parseDirectiveErrorIfdef(SMLoc DirectiveLoc, bool ExpectDefined);
1044  // ".erridn", ".errdif", ".erridni", or ".errdifi", depending on ExpectEqual
1045  // and CaseInsensitive.
1046  bool parseDirectiveErrorIfidn(SMLoc DirectiveLoc, bool ExpectEqual,
1047  bool CaseInsensitive);
1048  // ".erre" or ".errnz", depending on ExpectZero.
1049  bool parseDirectiveErrorIfe(SMLoc DirectiveLoc, bool ExpectZero);
1050 
1051  // ".radix"
1052  bool parseDirectiveRadix(SMLoc DirectiveLoc);
1053 
1054  // "echo"
1055  bool parseDirectiveEcho(SMLoc DirectiveLoc);
1056 
1057  void initializeDirectiveKindMap();
1058  void initializeCVDefRangeTypeMap();
1059  void initializeBuiltinSymbolMap();
1060 };
1061 
1062 } // end anonymous namespace
1063 
1064 namespace llvm {
1065 
1067 
1068 } // end namespace llvm
1069 
1070 enum { DEFAULT_ADDRSPACE = 0 };
1071 
1072 MasmParser::MasmParser(SourceMgr &SM, MCContext &Ctx, MCStreamer &Out,
1073  const MCAsmInfo &MAI, struct tm TM, unsigned CB)
1074  : Lexer(MAI), Ctx(Ctx), Out(Out), MAI(MAI), SrcMgr(SM),
1075  CurBuffer(CB ? CB : SM.getMainFileID()), TM(TM) {
1076  HadError = false;
1077  // Save the old handler.
1078  SavedDiagHandler = SrcMgr.getDiagHandler();
1079  SavedDiagContext = SrcMgr.getDiagContext();
1080  // Set our own handler which calls the saved handler.
1082  Lexer.setBuffer(SrcMgr.getMemoryBuffer(CurBuffer)->getBuffer());
1083  EndStatementAtEOFStack.push_back(true);
1084 
1085  // Initialize the platform / file format parser.
1086  switch (Ctx.getObjectFileType()) {
1087  case MCContext::IsCOFF:
1088  PlatformParser.reset(createCOFFMasmParser());
1089  break;
1090  default:
1091  report_fatal_error("llvm-ml currently supports only COFF output.");
1092  break;
1093  }
1094 
1095  initializeDirectiveKindMap();
1096  PlatformParser->Initialize(*this);
1097  initializeCVDefRangeTypeMap();
1098  initializeBuiltinSymbolMap();
1099 
1100  NumOfMacroInstantiations = 0;
1101 }
1102 
1103 MasmParser::~MasmParser() {
1104  assert((HadError || ActiveMacros.empty()) &&
1105  "Unexpected active macro instantiation!");
1106 
1107  // Restore the saved diagnostics handler and context for use during
1108  // finalization.
1109  SrcMgr.setDiagHandler(SavedDiagHandler, SavedDiagContext);
1110 }
1111 
1112 void MasmParser::printMacroInstantiations() {
1113  // Print the active macro instantiation stack.
1114  for (std::vector<MacroInstantiation *>::const_reverse_iterator
1115  it = ActiveMacros.rbegin(),
1116  ie = ActiveMacros.rend();
1117  it != ie; ++it)
1118  printMessage((*it)->InstantiationLoc, SourceMgr::DK_Note,
1119  "while in macro instantiation");
1120 }
1121 
1122 void MasmParser::Note(SMLoc L, const Twine &Msg, SMRange Range) {
1123  printPendingErrors();
1124  printMessage(L, SourceMgr::DK_Note, Msg, Range);
1125  printMacroInstantiations();
1126 }
1127 
1128 bool MasmParser::Warning(SMLoc L, const Twine &Msg, SMRange Range) {
1129  if (getTargetParser().getTargetOptions().MCNoWarn)
1130  return false;
1131  if (getTargetParser().getTargetOptions().MCFatalWarnings)
1132  return Error(L, Msg, Range);
1133  printMessage(L, SourceMgr::DK_Warning, Msg, Range);
1134  printMacroInstantiations();
1135  return false;
1136 }
1137 
1138 bool MasmParser::printError(SMLoc L, const Twine &Msg, SMRange Range) {
1139  HadError = true;
1140  printMessage(L, SourceMgr::DK_Error, Msg, Range);
1141  printMacroInstantiations();
1142  return true;
1143 }
1144 
1145 bool MasmParser::enterIncludeFile(const std::string &Filename) {
1146  std::string IncludedFile;
1147  unsigned NewBuf =
1148  SrcMgr.AddIncludeFile(Filename, Lexer.getLoc(), IncludedFile);
1149  if (!NewBuf)
1150  return true;
1151 
1152  CurBuffer = NewBuf;
1153  Lexer.setBuffer(SrcMgr.getMemoryBuffer(CurBuffer)->getBuffer());
1154  EndStatementAtEOFStack.push_back(true);
1155  return false;
1156 }
1157 
1158 void MasmParser::jumpToLoc(SMLoc Loc, unsigned InBuffer,
1159  bool EndStatementAtEOF) {
1160  CurBuffer = InBuffer ? InBuffer : SrcMgr.FindBufferContainingLoc(Loc);
1161  Lexer.setBuffer(SrcMgr.getMemoryBuffer(CurBuffer)->getBuffer(),
1162  Loc.getPointer(), EndStatementAtEOF);
1163 }
1164 
1165 bool MasmParser::expandMacros() {
1166  const AsmToken &Tok = getTok();
1167  const std::string IDLower = Tok.getIdentifier().lower();
1168 
1169  const llvm::MCAsmMacro *M = getContext().lookupMacro(IDLower);
1170  if (M && M->IsFunction && peekTok().is(AsmToken::LParen)) {
1171  // This is a macro function invocation; expand it in place.
1172  const SMLoc MacroLoc = Tok.getLoc();
1173  const StringRef MacroId = Tok.getIdentifier();
1174  Lexer.Lex();
1175  if (handleMacroInvocation(M, MacroLoc)) {
1176  Lexer.UnLex(AsmToken(AsmToken::Error, MacroId));
1177  Lexer.Lex();
1178  }
1179  return false;
1180  }
1181 
1182  llvm::Optional<std::string> ExpandedValue;
1183  auto BuiltinIt = BuiltinSymbolMap.find(IDLower);
1184  if (BuiltinIt != BuiltinSymbolMap.end()) {
1185  ExpandedValue =
1186  evaluateBuiltinTextMacro(BuiltinIt->getValue(), Tok.getLoc());
1187  } else {
1188  auto VarIt = Variables.find(IDLower);
1189  if (VarIt != Variables.end() && VarIt->getValue().IsText) {
1190  ExpandedValue = VarIt->getValue().TextValue;
1191  }
1192  }
1193 
1194  if (!ExpandedValue.hasValue())
1195  return true;
1196  std::unique_ptr<MemoryBuffer> Instantiation =
1197  MemoryBuffer::getMemBufferCopy(*ExpandedValue, "<instantiation>");
1198 
1199  // Jump to the macro instantiation and prime the lexer.
1200  CurBuffer =
1201  SrcMgr.AddNewSourceBuffer(std::move(Instantiation), Tok.getEndLoc());
1202  Lexer.setBuffer(SrcMgr.getMemoryBuffer(CurBuffer)->getBuffer(), nullptr,
1203  /*EndStatementAtEOF=*/false);
1204  EndStatementAtEOFStack.push_back(false);
1205  Lexer.Lex();
1206  return false;
1207 }
1208 
1209 const AsmToken &MasmParser::Lex(ExpandKind ExpandNextToken) {
1210  if (Lexer.getTok().is(AsmToken::Error))
1211  Error(Lexer.getErrLoc(), Lexer.getErr());
1212 
1213  // if it's a end of statement with a comment in it
1214  if (getTok().is(AsmToken::EndOfStatement)) {
1215  // if this is a line comment output it.
1216  if (!getTok().getString().empty() && getTok().getString().front() != '\n' &&
1217  getTok().getString().front() != '\r' && MAI.preserveAsmComments())
1218  Out.addExplicitComment(Twine(getTok().getString()));
1219  }
1220 
1221  const AsmToken *tok = &Lexer.Lex();
1222  bool StartOfStatement = Lexer.isAtStartOfStatement();
1223 
1224  while (ExpandNextToken == ExpandMacros && tok->is(AsmToken::Identifier)) {
1225  if (StartOfStatement) {
1226  AsmToken NextTok;
1227  MutableArrayRef<AsmToken> Buf(NextTok);
1228  size_t ReadCount = Lexer.peekTokens(Buf);
1229  if (ReadCount && NextTok.is(AsmToken::Identifier) &&
1230  (NextTok.getString().equals_insensitive("equ") ||
1231  NextTok.getString().equals_insensitive("textequ"))) {
1232  // This looks like an EQU or TEXTEQU directive; don't expand the
1233  // identifier, allowing for redefinitions.
1234  break;
1235  }
1236  }
1237  if (expandMacros())
1238  break;
1239  }
1240 
1241  // Parse comments here to be deferred until end of next statement.
1242  while (tok->is(AsmToken::Comment)) {
1243  if (MAI.preserveAsmComments())
1244  Out.addExplicitComment(Twine(tok->getString()));
1245  tok = &Lexer.Lex();
1246  }
1247 
1248  // Recognize and bypass line continuations.
1249  while (tok->is(AsmToken::BackSlash) &&
1250  peekTok().is(AsmToken::EndOfStatement)) {
1251  // Eat both the backslash and the end of statement.
1252  Lexer.Lex();
1253  tok = &Lexer.Lex();
1254  }
1255 
1256  if (tok->is(AsmToken::Eof)) {
1257  // If this is the end of an included file, pop the parent file off the
1258  // include stack.
1259  SMLoc ParentIncludeLoc = SrcMgr.getParentIncludeLoc(CurBuffer);
1260  if (ParentIncludeLoc != SMLoc()) {
1261  EndStatementAtEOFStack.pop_back();
1262  jumpToLoc(ParentIncludeLoc, 0, EndStatementAtEOFStack.back());
1263  return Lex();
1264  }
1265  EndStatementAtEOFStack.pop_back();
1266  assert(EndStatementAtEOFStack.empty());
1267  }
1268 
1269  return *tok;
1270 }
1271 
1272 const AsmToken MasmParser::peekTok(bool ShouldSkipSpace) {
1273  AsmToken Tok;
1274 
1275  MutableArrayRef<AsmToken> Buf(Tok);
1276  size_t ReadCount = Lexer.peekTokens(Buf, ShouldSkipSpace);
1277 
1278  if (ReadCount == 0) {
1279  // If this is the end of an included file, pop the parent file off the
1280  // include stack.
1281  SMLoc ParentIncludeLoc = SrcMgr.getParentIncludeLoc(CurBuffer);
1282  if (ParentIncludeLoc != SMLoc()) {
1283  EndStatementAtEOFStack.pop_back();
1284  jumpToLoc(ParentIncludeLoc, 0, EndStatementAtEOFStack.back());
1285  return peekTok(ShouldSkipSpace);
1286  }
1287  EndStatementAtEOFStack.pop_back();
1288  assert(EndStatementAtEOFStack.empty());
1289  }
1290 
1291  assert(ReadCount == 1);
1292  return Tok;
1293 }
1294 
1295 bool MasmParser::enabledGenDwarfForAssembly() {
1296  // Check whether the user specified -g.
1297  if (!getContext().getGenDwarfForAssembly())
1298  return false;
1299  // If we haven't encountered any .file directives (which would imply that
1300  // the assembler source was produced with debug info already) then emit one
1301  // describing the assembler source file itself.
1302  if (getContext().getGenDwarfFileNumber() == 0) {
1303  // Use the first #line directive for this, if any. It's preprocessed, so
1304  // there is no checksum, and of course no source directive.
1305  if (!FirstCppHashFilename.empty())
1306  getContext().setMCLineTableRootFile(/*CUID=*/0,
1307  getContext().getCompilationDir(),
1308  FirstCppHashFilename,
1309  /*Cksum=*/None, /*Source=*/None);
1310  const MCDwarfFile &RootFile =
1311  getContext().getMCDwarfLineTable(/*CUID=*/0).getRootFile();
1312  getContext().setGenDwarfFileNumber(getStreamer().emitDwarfFileDirective(
1313  /*CUID=*/0, getContext().getCompilationDir(), RootFile.Name,
1314  RootFile.Checksum, RootFile.Source));
1315  }
1316  return true;
1317 }
1318 
1319 bool MasmParser::Run(bool NoInitialTextSection, bool NoFinalize) {
1320  // Create the initial section, if requested.
1321  if (!NoInitialTextSection)
1322  Out.initSections(false, getTargetParser().getSTI());
1323 
1324  // Prime the lexer.
1325  Lex();
1326 
1327  HadError = false;
1328  AsmCond StartingCondState = TheCondState;
1329  SmallVector<AsmRewrite, 4> AsmStrRewrites;
1330 
1331  // If we are generating dwarf for assembly source files save the initial text
1332  // section. (Don't use enabledGenDwarfForAssembly() here, as we aren't
1333  // emitting any actual debug info yet and haven't had a chance to parse any
1334  // embedded .file directives.)
1335  if (getContext().getGenDwarfForAssembly()) {
1336  MCSection *Sec = getStreamer().getCurrentSectionOnly();
1337  if (!Sec->getBeginSymbol()) {
1338  MCSymbol *SectionStartSym = getContext().createTempSymbol();
1339  getStreamer().emitLabel(SectionStartSym);
1340  Sec->setBeginSymbol(SectionStartSym);
1341  }
1342  bool InsertResult = getContext().addGenDwarfSection(Sec);
1343  assert(InsertResult && ".text section should not have debug info yet");
1344  (void)InsertResult;
1345  }
1346 
1347  getTargetParser().onBeginOfFile();
1348 
1349  // While we have input, parse each statement.
1350  while (Lexer.isNot(AsmToken::Eof) ||
1351  SrcMgr.getParentIncludeLoc(CurBuffer) != SMLoc()) {
1352  // Skip through the EOF at the end of an inclusion.
1353  if (Lexer.is(AsmToken::Eof))
1354  Lex();
1355 
1356  ParseStatementInfo Info(&AsmStrRewrites);
1357  bool Parsed = parseStatement(Info, nullptr);
1358 
1359  // If we have a Lexer Error we are on an Error Token. Load in Lexer Error
1360  // for printing ErrMsg via Lex() only if no (presumably better) parser error
1361  // exists.
1362  if (Parsed && !hasPendingError() && Lexer.getTok().is(AsmToken::Error)) {
1363  Lex();
1364  }
1365 
1366  // parseStatement returned true so may need to emit an error.
1367  printPendingErrors();
1368 
1369  // Skipping to the next line if needed.
1370  if (Parsed && !getLexer().isAtStartOfStatement())
1371  eatToEndOfStatement();
1372  }
1373 
1374  getTargetParser().onEndOfFile();
1375  printPendingErrors();
1376 
1377  // All errors should have been emitted.
1378  assert(!hasPendingError() && "unexpected error from parseStatement");
1379 
1380  getTargetParser().flushPendingInstructions(getStreamer());
1381 
1382  if (TheCondState.TheCond != StartingCondState.TheCond ||
1383  TheCondState.Ignore != StartingCondState.Ignore)
1384  printError(getTok().getLoc(), "unmatched .ifs or .elses");
1385  // Check to see there are no empty DwarfFile slots.
1386  const auto &LineTables = getContext().getMCDwarfLineTables();
1387  if (!LineTables.empty()) {
1388  unsigned Index = 0;
1389  for (const auto &File : LineTables.begin()->second.getMCDwarfFiles()) {
1390  if (File.Name.empty() && Index != 0)
1391  printError(getTok().getLoc(), "unassigned file number: " +
1392  Twine(Index) +
1393  " for .file directives");
1394  ++Index;
1395  }
1396  }
1397 
1398  // Check to see that all assembler local symbols were actually defined.
1399  // Targets that don't do subsections via symbols may not want this, though,
1400  // so conservatively exclude them. Only do this if we're finalizing, though,
1401  // as otherwise we won't necessarilly have seen everything yet.
1402  if (!NoFinalize) {
1403  if (MAI.hasSubsectionsViaSymbols()) {
1404  for (const auto &TableEntry : getContext().getSymbols()) {
1405  MCSymbol *Sym = TableEntry.getValue();
1406  // Variable symbols may not be marked as defined, so check those
1407  // explicitly. If we know it's a variable, we have a definition for
1408  // the purposes of this check.
1409  if (Sym->isTemporary() && !Sym->isVariable() && !Sym->isDefined())
1410  // FIXME: We would really like to refer back to where the symbol was
1411  // first referenced for a source location. We need to add something
1412  // to track that. Currently, we just point to the end of the file.
1413  printError(getTok().getLoc(), "assembler local symbol '" +
1414  Sym->getName() + "' not defined");
1415  }
1416  }
1417 
1418  // Temporary symbols like the ones for directional jumps don't go in the
1419  // symbol table. They also need to be diagnosed in all (final) cases.
1420  for (std::tuple<SMLoc, CppHashInfoTy, MCSymbol *> &LocSym : DirLabels) {
1421  if (std::get<2>(LocSym)->isUndefined()) {
1422  // Reset the state of any "# line file" directives we've seen to the
1423  // context as it was at the diagnostic site.
1424  CppHashInfo = std::get<1>(LocSym);
1425  printError(std::get<0>(LocSym), "directional label undefined");
1426  }
1427  }
1428  }
1429 
1430  // Finalize the output stream if there are no errors and if the client wants
1431  // us to.
1432  if (!HadError && !NoFinalize)
1433  Out.Finish(Lexer.getLoc());
1434 
1435  return HadError || getContext().hadError();
1436 }
1437 
1438 bool MasmParser::checkForValidSection() {
1439  if (!ParsingMSInlineAsm && !getStreamer().getCurrentSectionOnly()) {
1440  Out.initSections(false, getTargetParser().getSTI());
1441  return Error(getTok().getLoc(),
1442  "expected section directive before assembly directive");
1443  }
1444  return false;
1445 }
1446 
1447 /// Throw away the rest of the line for testing purposes.
1448 void MasmParser::eatToEndOfStatement() {
1449  while (Lexer.isNot(AsmToken::EndOfStatement)) {
1450  if (Lexer.is(AsmToken::Eof)) {
1451  SMLoc ParentIncludeLoc = SrcMgr.getParentIncludeLoc(CurBuffer);
1452  if (ParentIncludeLoc == SMLoc()) {
1453  break;
1454  }
1455 
1456  EndStatementAtEOFStack.pop_back();
1457  jumpToLoc(ParentIncludeLoc, 0, EndStatementAtEOFStack.back());
1458  }
1459 
1460  Lexer.Lex();
1461  }
1462 
1463  // Eat EOL.
1464  if (Lexer.is(AsmToken::EndOfStatement))
1465  Lexer.Lex();
1466 }
1467 
1469 MasmParser::parseStringRefsTo(AsmToken::TokenKind EndTok) {
1471  const char *Start = getTok().getLoc().getPointer();
1472  while (Lexer.isNot(EndTok)) {
1473  if (Lexer.is(AsmToken::Eof)) {
1474  SMLoc ParentIncludeLoc = SrcMgr.getParentIncludeLoc(CurBuffer);
1475  if (ParentIncludeLoc == SMLoc()) {
1476  break;
1477  }
1478  Refs.emplace_back(Start, getTok().getLoc().getPointer() - Start);
1479 
1480  EndStatementAtEOFStack.pop_back();
1481  jumpToLoc(ParentIncludeLoc, 0, EndStatementAtEOFStack.back());
1482  Lexer.Lex();
1483  Start = getTok().getLoc().getPointer();
1484  } else {
1485  Lexer.Lex();
1486  }
1487  }
1488  Refs.emplace_back(Start, getTok().getLoc().getPointer() - Start);
1489  return Refs;
1490 }
1491 
1492 std::string MasmParser::parseStringTo(AsmToken::TokenKind EndTok) {
1493  SmallVector<StringRef, 1> Refs = parseStringRefsTo(EndTok);
1494  std::string Str;
1495  for (StringRef S : Refs) {
1496  Str.append(S.str());
1497  }
1498  return Str;
1499 }
1500 
1501 StringRef MasmParser::parseStringToEndOfStatement() {
1502  const char *Start = getTok().getLoc().getPointer();
1503 
1504  while (Lexer.isNot(AsmToken::EndOfStatement) && Lexer.isNot(AsmToken::Eof))
1505  Lexer.Lex();
1506 
1507  const char *End = getTok().getLoc().getPointer();
1508  return StringRef(Start, End - Start);
1509 }
1510 
1511 /// Parse a paren expression and return it.
1512 /// NOTE: This assumes the leading '(' has already been consumed.
1513 ///
1514 /// parenexpr ::= expr)
1515 ///
1516 bool MasmParser::parseParenExpr(const MCExpr *&Res, SMLoc &EndLoc) {
1517  if (parseExpression(Res))
1518  return true;
1519  if (Lexer.isNot(AsmToken::RParen))
1520  return TokError("expected ')' in parentheses expression");
1521  EndLoc = Lexer.getTok().getEndLoc();
1522  Lex();
1523  return false;
1524 }
1525 
1526 /// Parse a bracket expression and return it.
1527 /// NOTE: This assumes the leading '[' has already been consumed.
1528 ///
1529 /// bracketexpr ::= expr]
1530 ///
1531 bool MasmParser::parseBracketExpr(const MCExpr *&Res, SMLoc &EndLoc) {
1532  if (parseExpression(Res))
1533  return true;
1534  EndLoc = getTok().getEndLoc();
1535  if (parseToken(AsmToken::RBrac, "expected ']' in brackets expression"))
1536  return true;
1537  return false;
1538 }
1539 
1540 /// Parse a primary expression and return it.
1541 /// primaryexpr ::= (parenexpr
1542 /// primaryexpr ::= symbol
1543 /// primaryexpr ::= number
1544 /// primaryexpr ::= '.'
1545 /// primaryexpr ::= ~,+,-,'not' primaryexpr
1546 /// primaryexpr ::= string
1547 /// (a string is interpreted as a 64-bit number in big-endian base-256)
1548 bool MasmParser::parsePrimaryExpr(const MCExpr *&Res, SMLoc &EndLoc,
1549  AsmTypeInfo *TypeInfo) {
1550  SMLoc FirstTokenLoc = getLexer().getLoc();
1551  AsmToken::TokenKind FirstTokenKind = Lexer.getKind();
1552  switch (FirstTokenKind) {
1553  default:
1554  return TokError("unknown token in expression");
1555  // If we have an error assume that we've already handled it.
1556  case AsmToken::Error:
1557  return true;
1558  case AsmToken::Exclaim:
1559  Lex(); // Eat the operator.
1560  if (parsePrimaryExpr(Res, EndLoc, nullptr))
1561  return true;
1562  Res = MCUnaryExpr::createLNot(Res, getContext(), FirstTokenLoc);
1563  return false;
1564  case AsmToken::Dollar:
1565  case AsmToken::At:
1566  case AsmToken::Identifier: {
1568  if (parseIdentifier(Identifier)) {
1569  // We may have failed but $ may be a valid token.
1570  if (getTok().is(AsmToken::Dollar)) {
1571  if (Lexer.getMAI().getDollarIsPC()) {
1572  Lex();
1573  // This is a '$' reference, which references the current PC. Emit a
1574  // temporary label to the streamer and refer to it.
1575  MCSymbol *Sym = Ctx.createTempSymbol();
1576  Out.emitLabel(Sym);
1577  Res = MCSymbolRefExpr::create(Sym, MCSymbolRefExpr::VK_None,
1578  getContext());
1579  EndLoc = FirstTokenLoc;
1580  return false;
1581  }
1582  return Error(FirstTokenLoc, "invalid token in expression");
1583  }
1584  }
1585  // Parse named bitwise negation.
1586  if (Identifier.equals_insensitive("not")) {
1587  if (parsePrimaryExpr(Res, EndLoc, nullptr))
1588  return true;
1589  Res = MCUnaryExpr::createNot(Res, getContext(), FirstTokenLoc);
1590  return false;
1591  }
1592  // Parse symbol variant.
1593  std::pair<StringRef, StringRef> Split;
1594  if (!MAI.useParensForSymbolVariant()) {
1595  if (FirstTokenKind == AsmToken::String) {
1596  if (Lexer.is(AsmToken::At)) {
1597  Lex(); // eat @
1598  SMLoc AtLoc = getLexer().getLoc();
1599  StringRef VName;
1600  if (parseIdentifier(VName))
1601  return Error(AtLoc, "expected symbol variant after '@'");
1602 
1603  Split = std::make_pair(Identifier, VName);
1604  }
1605  } else {
1606  Split = Identifier.split('@');
1607  }
1608  } else if (Lexer.is(AsmToken::LParen)) {
1609  Lex(); // eat '('.
1610  StringRef VName;
1611  parseIdentifier(VName);
1612  // eat ')'.
1613  if (parseToken(AsmToken::RParen,
1614  "unexpected token in variant, expected ')'"))
1615  return true;
1616  Split = std::make_pair(Identifier, VName);
1617  }
1618 
1619  EndLoc = SMLoc::getFromPointer(Identifier.end());
1620 
1621  // This is a symbol reference.
1623  if (SymbolName.empty())
1624  return Error(getLexer().getLoc(), "expected a symbol reference");
1625 
1626  MCSymbolRefExpr::VariantKind Variant = MCSymbolRefExpr::VK_None;
1627 
1628  // Look up the symbol variant if used.
1629  if (!Split.second.empty()) {
1630  Variant = MCSymbolRefExpr::getVariantKindForName(Split.second);
1631  if (Variant != MCSymbolRefExpr::VK_Invalid) {
1632  SymbolName = Split.first;
1633  } else if (MAI.doesAllowAtInName() && !MAI.useParensForSymbolVariant()) {
1634  Variant = MCSymbolRefExpr::VK_None;
1635  } else {
1636  return Error(SMLoc::getFromPointer(Split.second.begin()),
1637  "invalid variant '" + Split.second + "'");
1638  }
1639  }
1640 
1641  // Find the field offset if used.
1643  Split = SymbolName.split('.');
1644  if (Split.second.empty()) {
1645  } else {
1646  SymbolName = Split.first;
1647  if (lookUpField(SymbolName, Split.second, Info)) {
1648  std::pair<StringRef, StringRef> BaseMember = Split.second.split('.');
1649  StringRef Base = BaseMember.first, Member = BaseMember.second;
1650  lookUpField(Base, Member, Info);
1651  } else if (Structs.count(SymbolName.lower())) {
1652  // This is actually a reference to a field offset.
1653  Res = MCConstantExpr::create(Info.Offset, getContext());
1654  return false;
1655  }
1656  }
1657 
1658  MCSymbol *Sym = getContext().getInlineAsmLabel(SymbolName);
1659  if (!Sym) {
1660  // If this is a built-in numeric value, treat it as a constant.
1661  auto BuiltinIt = BuiltinSymbolMap.find(SymbolName.lower());
1662  const BuiltinSymbol Symbol = (BuiltinIt == BuiltinSymbolMap.end())
1663  ? BI_NO_SYMBOL
1664  : BuiltinIt->getValue();
1665  if (Symbol != BI_NO_SYMBOL) {
1666  const MCExpr *Value = evaluateBuiltinValue(Symbol, FirstTokenLoc);
1667  if (Value) {
1668  Res = Value;
1669  return false;
1670  }
1671  }
1672 
1673  // Variables use case-insensitive symbol names; if this is a variable, we
1674  // find the symbol using its canonical name.
1675  auto VarIt = Variables.find(SymbolName.lower());
1676  if (VarIt != Variables.end())
1677  SymbolName = VarIt->second.Name;
1678  Sym = getContext().getOrCreateSymbol(SymbolName);
1679  }
1680 
1681  // If this is an absolute variable reference, substitute it now to preserve
1682  // semantics in the face of reassignment.
1683  if (Sym->isVariable()) {
1684  auto V = Sym->getVariableValue(/*SetUsed=*/false);
1685  bool DoInline = isa<MCConstantExpr>(V) && !Variant;
1686  if (auto TV = dyn_cast<MCTargetExpr>(V))
1687  DoInline = TV->inlineAssignedExpr();
1688  if (DoInline) {
1689  if (Variant)
1690  return Error(EndLoc, "unexpected modifier on variable reference");
1691  Res = Sym->getVariableValue(/*SetUsed=*/false);
1692  return false;
1693  }
1694  }
1695 
1696  // Otherwise create a symbol ref.
1697  const MCExpr *SymRef =
1698  MCSymbolRefExpr::create(Sym, Variant, getContext(), FirstTokenLoc);
1699  if (Info.Offset) {
1700  Res = MCBinaryExpr::create(
1701  MCBinaryExpr::Add, SymRef,
1702  MCConstantExpr::create(Info.Offset, getContext()), getContext());
1703  } else {
1704  Res = SymRef;
1705  }
1706  if (TypeInfo) {
1707  if (Info.Type.Name.empty()) {
1708  auto TypeIt = KnownType.find(Identifier.lower());
1709  if (TypeIt != KnownType.end()) {
1710  Info.Type = TypeIt->second;
1711  }
1712  }
1713 
1714  *TypeInfo = Info.Type;
1715  }
1716  return false;
1717  }
1718  case AsmToken::BigNum:
1719  return TokError("literal value out of range for directive");
1720  case AsmToken::Integer: {
1721  SMLoc Loc = getTok().getLoc();
1722  int64_t IntVal = getTok().getIntVal();
1723  Res = MCConstantExpr::create(IntVal, getContext());
1724  EndLoc = Lexer.getTok().getEndLoc();
1725  Lex(); // Eat token.
1726  // Look for 'b' or 'f' following an Integer as a directional label.
1727  if (Lexer.getKind() == AsmToken::Identifier) {
1728  StringRef IDVal = getTok().getString();
1729  // Look up the symbol variant if used.
1730  std::pair<StringRef, StringRef> Split = IDVal.split('@');
1731  MCSymbolRefExpr::VariantKind Variant = MCSymbolRefExpr::VK_None;
1732  if (Split.first.size() != IDVal.size()) {
1733  Variant = MCSymbolRefExpr::getVariantKindForName(Split.second);
1734  if (Variant == MCSymbolRefExpr::VK_Invalid)
1735  return TokError("invalid variant '" + Split.second + "'");
1736  IDVal = Split.first;
1737  }
1738  if (IDVal == "f" || IDVal == "b") {
1739  MCSymbol *Sym =
1740  Ctx.getDirectionalLocalSymbol(IntVal, IDVal == "b");
1741  Res = MCSymbolRefExpr::create(Sym, Variant, getContext());
1742  if (IDVal == "b" && Sym->isUndefined())
1743  return Error(Loc, "directional label undefined");
1744  DirLabels.push_back(std::make_tuple(Loc, CppHashInfo, Sym));
1745  EndLoc = Lexer.getTok().getEndLoc();
1746  Lex(); // Eat identifier.
1747  }
1748  }
1749  return false;
1750  }
1751  case AsmToken::String: {
1752  // MASM strings (used as constants) are interpreted as big-endian base-256.
1753  SMLoc ValueLoc = getTok().getLoc();
1754  std::string Value;
1755  if (parseEscapedString(Value))
1756  return true;
1757  if (Value.size() > 8)
1758  return Error(ValueLoc, "literal value out of range");
1759  uint64_t IntValue = 0;
1760  for (const unsigned char CharVal : Value)
1761  IntValue = (IntValue << 8) | CharVal;
1762  Res = MCConstantExpr::create(IntValue, getContext());
1763  return false;
1764  }
1765  case AsmToken::Real: {
1766  APFloat RealVal(APFloat::IEEEdouble(), getTok().getString());
1767  uint64_t IntVal = RealVal.bitcastToAPInt().getZExtValue();
1768  Res = MCConstantExpr::create(IntVal, getContext());
1769  EndLoc = Lexer.getTok().getEndLoc();
1770  Lex(); // Eat token.
1771  return false;
1772  }
1773  case AsmToken::Dot: {
1774  // This is a '.' reference, which references the current PC. Emit a
1775  // temporary label to the streamer and refer to it.
1776  MCSymbol *Sym = Ctx.createTempSymbol();
1777  Out.emitLabel(Sym);
1778  Res = MCSymbolRefExpr::create(Sym, MCSymbolRefExpr::VK_None, getContext());
1779  EndLoc = Lexer.getTok().getEndLoc();
1780  Lex(); // Eat identifier.
1781  return false;
1782  }
1783  case AsmToken::LParen:
1784  Lex(); // Eat the '('.
1785  return parseParenExpr(Res, EndLoc);
1786  case AsmToken::LBrac:
1787  if (!PlatformParser->HasBracketExpressions())
1788  return TokError("brackets expression not supported on this target");
1789  Lex(); // Eat the '['.
1790  return parseBracketExpr(Res, EndLoc);
1791  case AsmToken::Minus:
1792  Lex(); // Eat the operator.
1793  if (parsePrimaryExpr(Res, EndLoc, nullptr))
1794  return true;
1795  Res = MCUnaryExpr::createMinus(Res, getContext(), FirstTokenLoc);
1796  return false;
1797  case AsmToken::Plus:
1798  Lex(); // Eat the operator.
1799  if (parsePrimaryExpr(Res, EndLoc, nullptr))
1800  return true;
1801  Res = MCUnaryExpr::createPlus(Res, getContext(), FirstTokenLoc);
1802  return false;
1803  case AsmToken::Tilde:
1804  Lex(); // Eat the operator.
1805  if (parsePrimaryExpr(Res, EndLoc, nullptr))
1806  return true;
1807  Res = MCUnaryExpr::createNot(Res, getContext(), FirstTokenLoc);
1808  return false;
1809  // MIPS unary expression operators. The lexer won't generate these tokens if
1810  // MCAsmInfo::HasMipsExpressions is false for the target.
1811  case AsmToken::PercentCall16:
1812  case AsmToken::PercentCall_Hi:
1813  case AsmToken::PercentCall_Lo:
1814  case AsmToken::PercentDtprel_Hi:
1815  case AsmToken::PercentDtprel_Lo:
1816  case AsmToken::PercentGot:
1817  case AsmToken::PercentGot_Disp:
1818  case AsmToken::PercentGot_Hi:
1819  case AsmToken::PercentGot_Lo:
1820  case AsmToken::PercentGot_Ofst:
1821  case AsmToken::PercentGot_Page:
1822  case AsmToken::PercentGottprel:
1823  case AsmToken::PercentGp_Rel:
1824  case AsmToken::PercentHi:
1825  case AsmToken::PercentHigher:
1826  case AsmToken::PercentHighest:
1827  case AsmToken::PercentLo:
1828  case AsmToken::PercentNeg:
1829  case AsmToken::PercentPcrel_Hi:
1830  case AsmToken::PercentPcrel_Lo:
1831  case AsmToken::PercentTlsgd:
1832  case AsmToken::PercentTlsldm:
1833  case AsmToken::PercentTprel_Hi:
1834  case AsmToken::PercentTprel_Lo:
1835  Lex(); // Eat the operator.
1836  if (Lexer.isNot(AsmToken::LParen))
1837  return TokError("expected '(' after operator");
1838  Lex(); // Eat the operator.
1839  if (parseExpression(Res, EndLoc))
1840  return true;
1841  if (Lexer.isNot(AsmToken::RParen))
1842  return TokError("expected ')'");
1843  Lex(); // Eat the operator.
1844  Res = getTargetParser().createTargetUnaryExpr(Res, FirstTokenKind, Ctx);
1845  return !Res;
1846  }
1847 }
1848 
1849 bool MasmParser::parseExpression(const MCExpr *&Res) {
1850  SMLoc EndLoc;
1851  return parseExpression(Res, EndLoc);
1852 }
1853 
1854 /// This function checks if the next token is <string> type or arithmetic.
1855 /// string that begin with character '<' must end with character '>'.
1856 /// otherwise it is arithmetics.
1857 /// If the function returns a 'true' value,
1858 /// the End argument will be filled with the last location pointed to the '>'
1859 /// character.
1860 static bool isAngleBracketString(SMLoc &StrLoc, SMLoc &EndLoc) {
1861  assert((StrLoc.getPointer() != nullptr) &&
1862  "Argument to the function cannot be a NULL value");
1863  const char *CharPtr = StrLoc.getPointer();
1864  while ((*CharPtr != '>') && (*CharPtr != '\n') && (*CharPtr != '\r') &&
1865  (*CharPtr != '\0')) {
1866  if (*CharPtr == '!')
1867  CharPtr++;
1868  CharPtr++;
1869  }
1870  if (*CharPtr == '>') {
1871  EndLoc = StrLoc.getFromPointer(CharPtr + 1);
1872  return true;
1873  }
1874  return false;
1875 }
1876 
1877 /// creating a string without the escape characters '!'.
1878 static std::string angleBracketString(StringRef BracketContents) {
1879  std::string Res;
1880  for (size_t Pos = 0; Pos < BracketContents.size(); Pos++) {
1881  if (BracketContents[Pos] == '!')
1882  Pos++;
1883  Res += BracketContents[Pos];
1884  }
1885  return Res;
1886 }
1887 
1888 /// Parse an expression and return it.
1889 ///
1890 /// expr ::= expr &&,|| expr -> lowest.
1891 /// expr ::= expr |,^,&,! expr
1892 /// expr ::= expr ==,!=,<>,<,<=,>,>= expr
1893 /// expr ::= expr <<,>> expr
1894 /// expr ::= expr +,- expr
1895 /// expr ::= expr *,/,% expr -> highest.
1896 /// expr ::= primaryexpr
1897 ///
1898 bool MasmParser::parseExpression(const MCExpr *&Res, SMLoc &EndLoc) {
1899  // Parse the expression.
1900  Res = nullptr;
1901  if (getTargetParser().parsePrimaryExpr(Res, EndLoc) ||
1902  parseBinOpRHS(1, Res, EndLoc))
1903  return true;
1904 
1905  // Try to constant fold it up front, if possible. Do not exploit
1906  // assembler here.
1907  int64_t Value;
1908  if (Res->evaluateAsAbsolute(Value))
1909  Res = MCConstantExpr::create(Value, getContext());
1910 
1911  return false;
1912 }
1913 
1914 bool MasmParser::parseParenExpression(const MCExpr *&Res, SMLoc &EndLoc) {
1915  Res = nullptr;
1916  return parseParenExpr(Res, EndLoc) || parseBinOpRHS(1, Res, EndLoc);
1917 }
1918 
1919 bool MasmParser::parseParenExprOfDepth(unsigned ParenDepth, const MCExpr *&Res,
1920  SMLoc &EndLoc) {
1921  if (parseParenExpr(Res, EndLoc))
1922  return true;
1923 
1924  for (; ParenDepth > 0; --ParenDepth) {
1925  if (parseBinOpRHS(1, Res, EndLoc))
1926  return true;
1927 
1928  // We don't Lex() the last RParen.
1929  // This is the same behavior as parseParenExpression().
1930  if (ParenDepth - 1 > 0) {
1931  EndLoc = getTok().getEndLoc();
1932  if (parseToken(AsmToken::RParen,
1933  "expected ')' in parentheses expression"))
1934  return true;
1935  }
1936  }
1937  return false;
1938 }
1939 
1940 bool MasmParser::parseAbsoluteExpression(int64_t &Res) {
1941  const MCExpr *Expr;
1942 
1943  SMLoc StartLoc = Lexer.getLoc();
1944  if (parseExpression(Expr))
1945  return true;
1946 
1947  if (!Expr->evaluateAsAbsolute(Res, getStreamer().getAssemblerPtr()))
1948  return Error(StartLoc, "expected absolute expression");
1949 
1950  return false;
1951 }
1952 
1955  bool ShouldUseLogicalShr,
1956  bool EndExpressionAtGreater) {
1957  switch (K) {
1958  default:
1959  return 0; // not a binop.
1960 
1961  // Lowest Precedence: &&, ||
1962  case AsmToken::AmpAmp:
1963  Kind = MCBinaryExpr::LAnd;
1964  return 2;
1965  case AsmToken::PipePipe:
1966  Kind = MCBinaryExpr::LOr;
1967  return 1;
1968 
1969  // Low Precedence: ==, !=, <>, <, <=, >, >=
1970  case AsmToken::EqualEqual:
1972  return 3;
1973  case AsmToken::ExclaimEqual:
1974  case AsmToken::LessGreater:
1976  return 3;
1977  case AsmToken::Less:
1979  return 3;
1980  case AsmToken::LessEqual:
1981  Kind = MCBinaryExpr::LTE;
1982  return 3;
1983  case AsmToken::Greater:
1984  if (EndExpressionAtGreater)
1985  return 0;
1987  return 3;
1988  case AsmToken::GreaterEqual:
1989  Kind = MCBinaryExpr::GTE;
1990  return 3;
1991 
1992  // Low Intermediate Precedence: +, -
1993  case AsmToken::Plus:
1995  return 4;
1996  case AsmToken::Minus:
1997  Kind = MCBinaryExpr::Sub;
1998  return 4;
1999 
2000  // High Intermediate Precedence: |, &, ^
2001  case AsmToken::Pipe:
2002  Kind = MCBinaryExpr::Or;
2003  return 5;
2004  case AsmToken::Caret:
2005  Kind = MCBinaryExpr::Xor;
2006  return 5;
2007  case AsmToken::Amp:
2008  Kind = MCBinaryExpr::And;
2009  return 5;
2010 
2011  // Highest Precedence: *, /, %, <<, >>
2012  case AsmToken::Star:
2013  Kind = MCBinaryExpr::Mul;
2014  return 6;
2015  case AsmToken::Slash:
2016  Kind = MCBinaryExpr::Div;
2017  return 6;
2018  case AsmToken::Percent:
2020  return 6;
2021  case AsmToken::LessLess:
2022  Kind = MCBinaryExpr::Shl;
2023  return 6;
2024  case AsmToken::GreaterGreater:
2025  if (EndExpressionAtGreater)
2026  return 0;
2027  Kind = ShouldUseLogicalShr ? MCBinaryExpr::LShr : MCBinaryExpr::AShr;
2028  return 6;
2029  }
2030 }
2031 
2032 unsigned MasmParser::getBinOpPrecedence(AsmToken::TokenKind K,
2034  bool ShouldUseLogicalShr = MAI.shouldUseLogicalShr();
2035  return getGNUBinOpPrecedence(K, Kind, ShouldUseLogicalShr,
2036  AngleBracketDepth > 0);
2037 }
2038 
2039 /// Parse all binary operators with precedence >= 'Precedence'.
2040 /// Res contains the LHS of the expression on input.
2041 bool MasmParser::parseBinOpRHS(unsigned Precedence, const MCExpr *&Res,
2042  SMLoc &EndLoc) {
2043  SMLoc StartLoc = Lexer.getLoc();
2044  while (true) {
2046  if (Lexer.getKind() == AsmToken::Identifier) {
2048  .CaseLower("and", AsmToken::Amp)
2049  .CaseLower("not", AsmToken::Exclaim)
2050  .CaseLower("or", AsmToken::Pipe)
2052  .CaseLower("ne", AsmToken::ExclaimEqual)
2053  .CaseLower("lt", AsmToken::Less)
2054  .CaseLower("le", AsmToken::LessEqual)
2055  .CaseLower("gt", AsmToken::Greater)
2056  .CaseLower("ge", AsmToken::GreaterEqual)
2057  .Default(TokKind);
2058  }
2060  unsigned TokPrec = getBinOpPrecedence(TokKind, Kind);
2061 
2062  // If the next token is lower precedence than we are allowed to eat, return
2063  // successfully with what we ate already.
2064  if (TokPrec < Precedence)
2065  return false;
2066 
2067  Lex();
2068 
2069  // Eat the next primary expression.
2070  const MCExpr *RHS;
2071  if (getTargetParser().parsePrimaryExpr(RHS, EndLoc))
2072  return true;
2073 
2074  // If BinOp binds less tightly with RHS than the operator after RHS, let
2075  // the pending operator take RHS as its LHS.
2077  unsigned NextTokPrec = getBinOpPrecedence(Lexer.getKind(), Dummy);
2078  if (TokPrec < NextTokPrec && parseBinOpRHS(TokPrec + 1, RHS, EndLoc))
2079  return true;
2080 
2081  // Merge LHS and RHS according to operator.
2082  Res = MCBinaryExpr::create(Kind, Res, RHS, getContext(), StartLoc);
2083  }
2084 }
2085 
2086 /// ParseStatement:
2087 /// ::= % statement
2088 /// ::= EndOfStatement
2089 /// ::= Label* Directive ...Operands... EndOfStatement
2090 /// ::= Label* Identifier OperandList* EndOfStatement
2091 bool MasmParser::parseStatement(ParseStatementInfo &Info,
2093  assert(!hasPendingError() && "parseStatement started with pending error");
2094  // Eat initial spaces and comments.
2095  while (Lexer.is(AsmToken::Space))
2096  Lex();
2097  if (Lexer.is(AsmToken::EndOfStatement)) {
2098  // If this is a line comment we can drop it safely.
2099  if (getTok().getString().empty() || getTok().getString().front() == '\r' ||
2100  getTok().getString().front() == '\n')
2101  Out.AddBlankLine();
2102  Lex();
2103  return false;
2104  }
2105 
2106  // If preceded by an expansion operator, first expand all text macros and
2107  // macro functions.
2108  if (getTok().is(AsmToken::Percent)) {
2109  SMLoc ExpansionLoc = getTok().getLoc();
2110  if (parseToken(AsmToken::Percent) || expandStatement(ExpansionLoc))
2111  return true;
2112  }
2113 
2114  // Statements always start with an identifier, unless we're dealing with a
2115  // processor directive (.386, .686, etc.) that lexes as a real.
2116  AsmToken ID = getTok();
2117  SMLoc IDLoc = ID.getLoc();
2118  StringRef IDVal;
2119  int64_t LocalLabelVal = -1;
2120  if (Lexer.is(AsmToken::HashDirective))
2121  return parseCppHashLineFilenameComment(IDLoc);
2122  // Allow an integer followed by a ':' as a directional local label.
2123  if (Lexer.is(AsmToken::Integer)) {
2124  LocalLabelVal = getTok().getIntVal();
2125  if (LocalLabelVal < 0) {
2126  if (!TheCondState.Ignore) {
2127  Lex(); // always eat a token
2128  return Error(IDLoc, "unexpected token at start of statement");
2129  }
2130  IDVal = "";
2131  } else {
2132  IDVal = getTok().getString();
2133  Lex(); // Consume the integer token to be used as an identifier token.
2134  if (Lexer.getKind() != AsmToken::Colon) {
2135  if (!TheCondState.Ignore) {
2136  Lex(); // always eat a token
2137  return Error(IDLoc, "unexpected token at start of statement");
2138  }
2139  }
2140  }
2141  } else if (Lexer.is(AsmToken::Dot)) {
2142  // Treat '.' as a valid identifier in this context.
2143  Lex();
2144  IDVal = ".";
2145  } else if (Lexer.is(AsmToken::LCurly)) {
2146  // Treat '{' as a valid identifier in this context.
2147  Lex();
2148  IDVal = "{";
2149 
2150  } else if (Lexer.is(AsmToken::RCurly)) {
2151  // Treat '}' as a valid identifier in this context.
2152  Lex();
2153  IDVal = "}";
2154  } else if (Lexer.is(AsmToken::Star) &&
2155  getTargetParser().starIsStartOfStatement()) {
2156  // Accept '*' as a valid start of statement.
2157  Lex();
2158  IDVal = "*";
2159  } else if (Lexer.is(AsmToken::Real)) {
2160  // Treat ".<number>" as a valid identifier in this context.
2161  IDVal = getTok().getString();
2162  Lex(); // always eat a token
2163  if (!IDVal.startswith("."))
2164  return Error(IDLoc, "unexpected token at start of statement");
2165  } else if (parseIdentifier(IDVal, StartOfStatement)) {
2166  if (!TheCondState.Ignore) {
2167  Lex(); // always eat a token
2168  return Error(IDLoc, "unexpected token at start of statement");
2169  }
2170  IDVal = "";
2171  }
2172 
2173  // Handle conditional assembly here before checking for skipping. We
2174  // have to do this so that .endif isn't skipped in a ".if 0" block for
2175  // example.
2177  DirectiveKindMap.find(IDVal.lower());
2178  DirectiveKind DirKind = (DirKindIt == DirectiveKindMap.end())
2179  ? DK_NO_DIRECTIVE
2180  : DirKindIt->getValue();
2181  switch (DirKind) {
2182  default:
2183  break;
2184  case DK_IF:
2185  case DK_IFE:
2186  return parseDirectiveIf(IDLoc, DirKind);
2187  case DK_IFB:
2188  return parseDirectiveIfb(IDLoc, true);
2189  case DK_IFNB:
2190  return parseDirectiveIfb(IDLoc, false);
2191  case DK_IFDEF:
2192  return parseDirectiveIfdef(IDLoc, true);
2193  case DK_IFNDEF:
2194  return parseDirectiveIfdef(IDLoc, false);
2195  case DK_IFDIF:
2196  return parseDirectiveIfidn(IDLoc, /*ExpectEqual=*/false,
2197  /*CaseInsensitive=*/false);
2198  case DK_IFDIFI:
2199  return parseDirectiveIfidn(IDLoc, /*ExpectEqual=*/false,
2200  /*CaseInsensitive=*/true);
2201  case DK_IFIDN:
2202  return parseDirectiveIfidn(IDLoc, /*ExpectEqual=*/true,
2203  /*CaseInsensitive=*/false);
2204  case DK_IFIDNI:
2205  return parseDirectiveIfidn(IDLoc, /*ExpectEqual=*/true,
2206  /*CaseInsensitive=*/true);
2207  case DK_ELSEIF:
2208  case DK_ELSEIFE:
2209  return parseDirectiveElseIf(IDLoc, DirKind);
2210  case DK_ELSEIFB:
2211  return parseDirectiveElseIfb(IDLoc, true);
2212  case DK_ELSEIFNB:
2213  return parseDirectiveElseIfb(IDLoc, false);
2214  case DK_ELSEIFDEF:
2215  return parseDirectiveElseIfdef(IDLoc, true);
2216  case DK_ELSEIFNDEF:
2217  return parseDirectiveElseIfdef(IDLoc, false);
2218  case DK_ELSEIFDIF:
2219  return parseDirectiveElseIfidn(IDLoc, /*ExpectEqual=*/false,
2220  /*CaseInsensitive=*/false);
2221  case DK_ELSEIFDIFI:
2222  return parseDirectiveElseIfidn(IDLoc, /*ExpectEqual=*/false,
2223  /*CaseInsensitive=*/true);
2224  case DK_ELSEIFIDN:
2225  return parseDirectiveElseIfidn(IDLoc, /*ExpectEqual=*/true,
2226  /*CaseInsensitive=*/false);
2227  case DK_ELSEIFIDNI:
2228  return parseDirectiveElseIfidn(IDLoc, /*ExpectEqual=*/true,
2229  /*CaseInsensitive=*/true);
2230  case DK_ELSE:
2231  return parseDirectiveElse(IDLoc);
2232  case DK_ENDIF:
2233  return parseDirectiveEndIf(IDLoc);
2234  }
2235 
2236  // Ignore the statement if in the middle of inactive conditional
2237  // (e.g. ".if 0").
2238  if (TheCondState.Ignore) {
2239  eatToEndOfStatement();
2240  return false;
2241  }
2242 
2243  // FIXME: Recurse on local labels?
2244 
2245  // See what kind of statement we have.
2246  switch (Lexer.getKind()) {
2247  case AsmToken::Colon: {
2248  if (!getTargetParser().isLabel(ID))
2249  break;
2250  if (checkForValidSection())
2251  return true;
2252 
2253  // identifier ':' -> Label.
2254  Lex();
2255 
2256  // Diagnose attempt to use '.' as a label.
2257  if (IDVal == ".")
2258  return Error(IDLoc, "invalid use of pseudo-symbol '.' as a label");
2259 
2260  // Diagnose attempt to use a variable as a label.
2261  //
2262  // FIXME: Diagnostics. Note the location of the definition as a label.
2263  // FIXME: This doesn't diagnose assignment to a symbol which has been
2264  // implicitly marked as external.
2265  MCSymbol *Sym;
2266  if (LocalLabelVal == -1) {
2267  if (ParsingMSInlineAsm && SI) {
2268  StringRef RewrittenLabel =
2269  SI->LookupInlineAsmLabel(IDVal, getSourceManager(), IDLoc, true);
2270  assert(!RewrittenLabel.empty() &&
2271  "We should have an internal name here.");
2272  Info.AsmRewrites->emplace_back(AOK_Label, IDLoc, IDVal.size(),
2273  RewrittenLabel);
2274  IDVal = RewrittenLabel;
2275  }
2276  Sym = getContext().getOrCreateSymbol(IDVal);
2277  } else
2278  Sym = Ctx.createDirectionalLocalSymbol(LocalLabelVal);
2279  // End of Labels should be treated as end of line for lexing
2280  // purposes but that information is not available to the Lexer who
2281  // does not understand Labels. This may cause us to see a Hash
2282  // here instead of a preprocessor line comment.
2283  if (getTok().is(AsmToken::Hash)) {
2284  std::string CommentStr = parseStringTo(AsmToken::EndOfStatement);
2285  Lexer.Lex();
2286  Lexer.UnLex(AsmToken(AsmToken::EndOfStatement, CommentStr));
2287  }
2288 
2289  // Consume any end of statement token, if present, to avoid spurious
2290  // AddBlankLine calls().
2291  if (getTok().is(AsmToken::EndOfStatement)) {
2292  Lex();
2293  }
2294 
2295  getTargetParser().doBeforeLabelEmit(Sym);
2296 
2297  // Emit the label.
2298  if (!getTargetParser().isParsingMSInlineAsm())
2299  Out.emitLabel(Sym, IDLoc);
2300 
2301  // If we are generating dwarf for assembly source files then gather the
2302  // info to make a dwarf label entry for this label if needed.
2303  if (enabledGenDwarfForAssembly())
2304  MCGenDwarfLabelEntry::Make(Sym, &getStreamer(), getSourceManager(),
2305  IDLoc);
2306 
2307  getTargetParser().onLabelParsed(Sym);
2308 
2309  return false;
2310  }
2311 
2312  default: // Normal instruction or directive.
2313  break;
2314  }
2315 
2316  // If macros are enabled, check to see if this is a macro instantiation.
2317  if (const MCAsmMacro *M = getContext().lookupMacro(IDVal.lower())) {
2318  return handleMacroEntry(M, IDLoc);
2319  }
2320 
2321  // Otherwise, we have a normal instruction or directive.
2322 
2323  if (DirKind != DK_NO_DIRECTIVE) {
2324  // There are several entities interested in parsing directives:
2325  //
2326  // 1. Asm parser extensions. For example, platform-specific parsers
2327  // (like the ELF parser) register themselves as extensions.
2328  // 2. The target-specific assembly parser. Some directives are target
2329  // specific or may potentially behave differently on certain targets.
2330  // 3. The generic directive parser implemented by this class. These are
2331  // all the directives that behave in a target and platform independent
2332  // manner, or at least have a default behavior that's shared between
2333  // all targets and platforms.
2334 
2335  getTargetParser().flushPendingInstructions(getStreamer());
2336 
2337  // Special-case handling of structure-end directives at higher priority,
2338  // since ENDS is overloaded as a segment-end directive.
2339  if (IDVal.equals_insensitive("ends") && StructInProgress.size() > 1 &&
2340  getTok().is(AsmToken::EndOfStatement)) {
2341  return parseDirectiveNestedEnds();
2342  }
2343 
2344  // First, check the extension directive map to see if any extension has
2345  // registered itself to parse this directive.
2346  std::pair<MCAsmParserExtension *, DirectiveHandler> Handler =
2347  ExtensionDirectiveMap.lookup(IDVal.lower());
2348  if (Handler.first)
2349  return (*Handler.second)(Handler.first, IDVal, IDLoc);
2350 
2351  // Next, let the target-specific assembly parser try.
2352  SMLoc StartTokLoc = getTok().getLoc();
2353  bool TPDirectiveReturn =
2354  ID.is(AsmToken::Identifier) && getTargetParser().ParseDirective(ID);
2355 
2356  if (hasPendingError())
2357  return true;
2358  // Currently the return value should be true if we are
2359  // uninterested but as this is at odds with the standard parsing
2360  // convention (return true = error) we have instances of a parsed
2361  // directive that fails returning true as an error. Catch these
2362  // cases as best as possible errors here.
2363  if (TPDirectiveReturn && StartTokLoc != getTok().getLoc())
2364  return true;
2365  // Return if we did some parsing or believe we succeeded.
2366  if (!TPDirectiveReturn || StartTokLoc != getTok().getLoc())
2367  return false;
2368 
2369  // Finally, if no one else is interested in this directive, it must be
2370  // generic and familiar to this class.
2371  switch (DirKind) {
2372  default:
2373  break;
2374  case DK_ASCII:
2375  return parseDirectiveAscii(IDVal, false);
2376  case DK_ASCIZ:
2377  case DK_STRING:
2378  return parseDirectiveAscii(IDVal, true);
2379  case DK_BYTE:
2380  case DK_SBYTE:
2381  case DK_DB:
2382  return parseDirectiveValue(IDVal, 1);
2383  case DK_WORD:
2384  case DK_SWORD:
2385  case DK_DW:
2386  return parseDirectiveValue(IDVal, 2);
2387  case DK_DWORD:
2388  case DK_SDWORD:
2389  case DK_DD:
2390  return parseDirectiveValue(IDVal, 4);
2391  case DK_FWORD:
2392  case DK_DF:
2393  return parseDirectiveValue(IDVal, 6);
2394  case DK_QWORD:
2395  case DK_SQWORD:
2396  case DK_DQ:
2397  return parseDirectiveValue(IDVal, 8);
2398  case DK_REAL4:
2399  return parseDirectiveRealValue(IDVal, APFloat::IEEEsingle(), 4);
2400  case DK_REAL8:
2401  return parseDirectiveRealValue(IDVal, APFloat::IEEEdouble(), 8);
2402  case DK_REAL10:
2403  return parseDirectiveRealValue(IDVal, APFloat::x87DoubleExtended(), 10);
2404  case DK_STRUCT:
2405  case DK_UNION:
2406  return parseDirectiveNestedStruct(IDVal, DirKind);
2407  case DK_ENDS:
2408  return parseDirectiveNestedEnds();
2409  case DK_ALIGN:
2410  return parseDirectiveAlign();
2411  case DK_EVEN:
2412  return parseDirectiveEven();
2413  case DK_ORG:
2414  return parseDirectiveOrg();
2415  case DK_EXTERN:
2416  eatToEndOfStatement(); // .extern is the default, ignore it.
2417  return false;
2418  case DK_PUBLIC:
2419  return parseDirectiveSymbolAttribute(MCSA_Global);
2420  case DK_COMM:
2421  return parseDirectiveComm(/*IsLocal=*/false);
2422  case DK_COMMENT:
2423  return parseDirectiveComment(IDLoc);
2424  case DK_INCLUDE:
2425  return parseDirectiveInclude();
2426  case DK_REPEAT:
2427  return parseDirectiveRepeat(IDLoc, IDVal);
2428  case DK_WHILE:
2429  return parseDirectiveWhile(IDLoc);
2430  case DK_FOR:
2431  return parseDirectiveFor(IDLoc, IDVal);
2432  case DK_FORC:
2433  return parseDirectiveForc(IDLoc, IDVal);
2434  case DK_FILE:
2435  return parseDirectiveFile(IDLoc);
2436  case DK_LINE:
2437  return parseDirectiveLine();
2438  case DK_LOC:
2439  return parseDirectiveLoc();
2440  case DK_STABS:
2441  return parseDirectiveStabs();
2442  case DK_CV_FILE:
2443  return parseDirectiveCVFile();
2444  case DK_CV_FUNC_ID:
2445  return parseDirectiveCVFuncId();
2446  case DK_CV_INLINE_SITE_ID:
2447  return parseDirectiveCVInlineSiteId();
2448  case DK_CV_LOC:
2449  return parseDirectiveCVLoc();
2450  case DK_CV_LINETABLE:
2451  return parseDirectiveCVLinetable();
2452  case DK_CV_INLINE_LINETABLE:
2453  return parseDirectiveCVInlineLinetable();
2454  case DK_CV_DEF_RANGE:
2455  return parseDirectiveCVDefRange();
2456  case DK_CV_STRING:
2457  return parseDirectiveCVString();
2458  case DK_CV_STRINGTABLE:
2459  return parseDirectiveCVStringTable();
2460  case DK_CV_FILECHECKSUMS:
2461  return parseDirectiveCVFileChecksums();
2462  case DK_CV_FILECHECKSUM_OFFSET:
2463  return parseDirectiveCVFileChecksumOffset();
2464  case DK_CV_FPO_DATA:
2465  return parseDirectiveCVFPOData();
2466  case DK_CFI_SECTIONS:
2467  return parseDirectiveCFISections();
2468  case DK_CFI_STARTPROC:
2469  return parseDirectiveCFIStartProc();
2470  case DK_CFI_ENDPROC:
2471  return parseDirectiveCFIEndProc();
2472  case DK_CFI_DEF_CFA:
2473  return parseDirectiveCFIDefCfa(IDLoc);
2474  case DK_CFI_DEF_CFA_OFFSET:
2475  return parseDirectiveCFIDefCfaOffset();
2476  case DK_CFI_ADJUST_CFA_OFFSET:
2477  return parseDirectiveCFIAdjustCfaOffset();
2478  case DK_CFI_DEF_CFA_REGISTER:
2479  return parseDirectiveCFIDefCfaRegister(IDLoc);
2480  case DK_CFI_OFFSET:
2481  return parseDirectiveCFIOffset(IDLoc);
2482  case DK_CFI_REL_OFFSET:
2483  return parseDirectiveCFIRelOffset(IDLoc);
2484  case DK_CFI_PERSONALITY:
2485  return parseDirectiveCFIPersonalityOrLsda(true);
2486  case DK_CFI_LSDA:
2487  return parseDirectiveCFIPersonalityOrLsda(false);
2488  case DK_CFI_REMEMBER_STATE:
2489  return parseDirectiveCFIRememberState();
2490  case DK_CFI_RESTORE_STATE:
2491  return parseDirectiveCFIRestoreState();
2492  case DK_CFI_SAME_VALUE:
2493  return parseDirectiveCFISameValue(IDLoc);
2494  case DK_CFI_RESTORE:
2495  return parseDirectiveCFIRestore(IDLoc);
2496  case DK_CFI_ESCAPE:
2497  return parseDirectiveCFIEscape();
2498  case DK_CFI_RETURN_COLUMN:
2499  return parseDirectiveCFIReturnColumn(IDLoc);
2500  case DK_CFI_SIGNAL_FRAME:
2501  return parseDirectiveCFISignalFrame();
2502  case DK_CFI_UNDEFINED:
2503  return parseDirectiveCFIUndefined(IDLoc);
2504  case DK_CFI_REGISTER:
2505  return parseDirectiveCFIRegister(IDLoc);
2506  case DK_CFI_WINDOW_SAVE:
2507  return parseDirectiveCFIWindowSave();
2508  case DK_EXITM:
2509  Info.ExitValue = "";
2510  return parseDirectiveExitMacro(IDLoc, IDVal, *Info.ExitValue);
2511  case DK_ENDM:
2512  Info.ExitValue = "";
2513  return parseDirectiveEndMacro(IDVal);
2514  case DK_PURGE:
2515  return parseDirectivePurgeMacro(IDLoc);
2516  case DK_END:
2517  return parseDirectiveEnd(IDLoc);
2518  case DK_ERR:
2519  return parseDirectiveError(IDLoc);
2520  case DK_ERRB:
2521  return parseDirectiveErrorIfb(IDLoc, true);
2522  case DK_ERRNB:
2523  return parseDirectiveErrorIfb(IDLoc, false);
2524  case DK_ERRDEF:
2525  return parseDirectiveErrorIfdef(IDLoc, true);
2526  case DK_ERRNDEF:
2527  return parseDirectiveErrorIfdef(IDLoc, false);
2528  case DK_ERRDIF:
2529  return parseDirectiveErrorIfidn(IDLoc, /*ExpectEqual=*/false,
2530  /*CaseInsensitive=*/false);
2531  case DK_ERRDIFI:
2532  return parseDirectiveErrorIfidn(IDLoc, /*ExpectEqual=*/false,
2533  /*CaseInsensitive=*/true);
2534  case DK_ERRIDN:
2535  return parseDirectiveErrorIfidn(IDLoc, /*ExpectEqual=*/true,
2536  /*CaseInsensitive=*/false);
2537  case DK_ERRIDNI:
2538  return parseDirectiveErrorIfidn(IDLoc, /*ExpectEqual=*/true,
2539  /*CaseInsensitive=*/true);
2540  case DK_ERRE:
2541  return parseDirectiveErrorIfe(IDLoc, true);
2542  case DK_ERRNZ:
2543  return parseDirectiveErrorIfe(IDLoc, false);
2544  case DK_RADIX:
2545  return parseDirectiveRadix(IDLoc);
2546  case DK_ECHO:
2547  return parseDirectiveEcho(IDLoc);
2548  }
2549 
2550  return Error(IDLoc, "unknown directive");
2551  }
2552 
2553  // We also check if this is allocating memory with user-defined type.
2554  auto IDIt = Structs.find(IDVal.lower());
2555  if (IDIt != Structs.end())
2556  return parseDirectiveStructValue(/*Structure=*/IDIt->getValue(), IDVal,
2557  IDLoc);
2558 
2559  // Non-conditional Microsoft directives sometimes follow their first argument.
2560  const AsmToken nextTok = getTok();
2561  const StringRef nextVal = nextTok.getString();
2562  const SMLoc nextLoc = nextTok.getLoc();
2563 
2564  const AsmToken afterNextTok = peekTok();
2565 
2566  // There are several entities interested in parsing infix directives:
2567  //
2568  // 1. Asm parser extensions. For example, platform-specific parsers
2569  // (like the ELF parser) register themselves as extensions.
2570  // 2. The generic directive parser implemented by this class. These are
2571  // all the directives that behave in a target and platform independent
2572  // manner, or at least have a default behavior that's shared between
2573  // all targets and platforms.
2574 
2575  getTargetParser().flushPendingInstructions(getStreamer());
2576 
2577  // Special-case handling of structure-end directives at higher priority, since
2578  // ENDS is overloaded as a segment-end directive.
2579  if (nextVal.equals_insensitive("ends") && StructInProgress.size() == 1) {
2580  Lex();
2581  return parseDirectiveEnds(IDVal, IDLoc);
2582  }
2583 
2584  // First, check the extension directive map to see if any extension has
2585  // registered itself to parse this directive.
2586  std::pair<MCAsmParserExtension *, DirectiveHandler> Handler =
2587  ExtensionDirectiveMap.lookup(nextVal.lower());
2588  if (Handler.first) {
2589  Lex();
2590  Lexer.UnLex(ID);
2591  return (*Handler.second)(Handler.first, nextVal, nextLoc);
2592  }
2593 
2594  // If no one else is interested in this directive, it must be
2595  // generic and familiar to this class.
2596  DirKindIt = DirectiveKindMap.find(nextVal.lower());
2597  DirKind = (DirKindIt == DirectiveKindMap.end())
2598  ? DK_NO_DIRECTIVE
2599  : DirKindIt->getValue();
2600  switch (DirKind) {
2601  default:
2602  break;
2603  case DK_ASSIGN:
2604  case DK_EQU:
2605  case DK_TEXTEQU:
2606  Lex();
2607  return parseDirectiveEquate(nextVal, IDVal, DirKind, IDLoc);
2608  case DK_BYTE:
2609  if (afterNextTok.is(AsmToken::Identifier) &&
2610  afterNextTok.getString().equals_insensitive("ptr")) {
2611  // Size directive; part of an instruction.
2612  break;
2613  }
2615  case DK_SBYTE:
2616  case DK_DB:
2617  Lex();
2618  return parseDirectiveNamedValue(nextVal, 1, IDVal, IDLoc);
2619  case DK_WORD:
2620  if (afterNextTok.is(AsmToken::Identifier) &&
2621  afterNextTok.getString().equals_insensitive("ptr")) {
2622  // Size directive; part of an instruction.
2623  break;
2624  }
2626  case DK_SWORD:
2627  case DK_DW:
2628  Lex();
2629  return parseDirectiveNamedValue(nextVal, 2, IDVal, IDLoc);
2630  case DK_DWORD:
2631  if (afterNextTok.is(AsmToken::Identifier) &&
2632  afterNextTok.getString().equals_insensitive("ptr")) {
2633  // Size directive; part of an instruction.
2634  break;
2635  }
2637  case DK_SDWORD:
2638  case DK_DD:
2639  Lex();
2640  return parseDirectiveNamedValue(nextVal, 4, IDVal, IDLoc);
2641  case DK_FWORD:
2642  if (afterNextTok.is(AsmToken::Identifier) &&
2643  afterNextTok.getString().equals_insensitive("ptr")) {
2644  // Size directive; part of an instruction.
2645  break;
2646  }
2648  case DK_DF:
2649  Lex();
2650  return parseDirectiveNamedValue(nextVal, 6, IDVal, IDLoc);
2651  case DK_QWORD:
2652  if (afterNextTok.is(AsmToken::Identifier) &&
2653  afterNextTok.getString().equals_insensitive("ptr")) {
2654  // Size directive; part of an instruction.
2655  break;
2656  }
2658  case DK_SQWORD:
2659  case DK_DQ:
2660  Lex();
2661  return parseDirectiveNamedValue(nextVal, 8, IDVal, IDLoc);
2662  case DK_REAL4:
2663  Lex();
2664  return parseDirectiveNamedRealValue(nextVal, APFloat::IEEEsingle(), 4,
2665  IDVal, IDLoc);
2666  case DK_REAL8:
2667  Lex();
2668  return parseDirectiveNamedRealValue(nextVal, APFloat::IEEEdouble(), 8,
2669  IDVal, IDLoc);
2670  case DK_REAL10:
2671  Lex();
2672  return parseDirectiveNamedRealValue(nextVal, APFloat::x87DoubleExtended(),
2673  10, IDVal, IDLoc);
2674  case DK_STRUCT:
2675  case DK_UNION:
2676  Lex();
2677  return parseDirectiveStruct(nextVal, DirKind, IDVal, IDLoc);
2678  case DK_ENDS:
2679  Lex();
2680  return parseDirectiveEnds(IDVal, IDLoc);
2681  case DK_MACRO:
2682  Lex();
2683  return parseDirectiveMacro(IDVal, IDLoc);
2684  }
2685 
2686  // Finally, we check if this is allocating a variable with user-defined type.
2687  auto NextIt = Structs.find(nextVal.lower());
2688  if (NextIt != Structs.end()) {
2689  Lex();
2690  return parseDirectiveNamedStructValue(/*Structure=*/NextIt->getValue(),
2691  nextVal, nextLoc, IDVal);
2692  }
2693 
2694  // __asm _emit or __asm __emit
2695  if (ParsingMSInlineAsm && (IDVal == "_emit" || IDVal == "__emit" ||
2696  IDVal == "_EMIT" || IDVal == "__EMIT"))
2697  return parseDirectiveMSEmit(IDLoc, Info, IDVal.size());
2698 
2699  // __asm align
2700  if (ParsingMSInlineAsm && (IDVal == "align" || IDVal == "ALIGN"))
2701  return parseDirectiveMSAlign(IDLoc, Info);
2702 
2703  if (ParsingMSInlineAsm && (IDVal == "even" || IDVal == "EVEN"))
2704  Info.AsmRewrites->emplace_back(AOK_EVEN, IDLoc, 4);
2705  if (checkForValidSection())
2706  return true;
2707 
2708  // Canonicalize the opcode to lower case.
2709  std::string OpcodeStr = IDVal.lower();
2710  ParseInstructionInfo IInfo(Info.AsmRewrites);
2711  bool ParseHadError = getTargetParser().ParseInstruction(IInfo, OpcodeStr, ID,
2712  Info.ParsedOperands);
2713  Info.ParseError = ParseHadError;
2714 
2715  // Dump the parsed representation, if requested.
2716  if (getShowParsedOperands()) {
2717  SmallString<256> Str;
2718  raw_svector_ostream OS(Str);
2719  OS << "parsed instruction: [";
2720  for (unsigned i = 0; i != Info.ParsedOperands.size(); ++i) {
2721  if (i != 0)
2722  OS << ", ";
2723  Info.ParsedOperands[i]->print(OS);
2724  }
2725  OS << "]";
2726 
2727  printMessage(IDLoc, SourceMgr::DK_Note, OS.str());
2728  }
2729 
2730  // Fail even if ParseInstruction erroneously returns false.
2731  if (hasPendingError() || ParseHadError)
2732  return true;
2733 
2734  // If we are generating dwarf for the current section then generate a .loc
2735  // directive for the instruction.
2736  if (!ParseHadError && enabledGenDwarfForAssembly() &&
2737  getContext().getGenDwarfSectionSyms().count(
2738  getStreamer().getCurrentSectionOnly())) {
2739  unsigned Line;
2740  if (ActiveMacros.empty())
2741  Line = SrcMgr.FindLineNumber(IDLoc, CurBuffer);
2742  else
2743  Line = SrcMgr.FindLineNumber(ActiveMacros.front()->InstantiationLoc,
2744  ActiveMacros.front()->ExitBuffer);
2745 
2746  // If we previously parsed a cpp hash file line comment then make sure the
2747  // current Dwarf File is for the CppHashFilename if not then emit the
2748  // Dwarf File table for it and adjust the line number for the .loc.
2749  if (!CppHashInfo.Filename.empty()) {
2750  unsigned FileNumber = getStreamer().emitDwarfFileDirective(
2751  0, StringRef(), CppHashInfo.Filename);
2752  getContext().setGenDwarfFileNumber(FileNumber);
2753 
2754  unsigned CppHashLocLineNo =
2755  SrcMgr.FindLineNumber(CppHashInfo.Loc, CppHashInfo.Buf);
2756  Line = CppHashInfo.LineNumber - 1 + (Line - CppHashLocLineNo);
2757  }
2758 
2759  getStreamer().emitDwarfLocDirective(
2760  getContext().getGenDwarfFileNumber(), Line, 0,
2762  StringRef());
2763  }
2764 
2765  // If parsing succeeded, match the instruction.
2766  if (!ParseHadError) {
2768  if (getTargetParser().MatchAndEmitInstruction(
2769  IDLoc, Info.Opcode, Info.ParsedOperands, Out, ErrorInfo,
2770  getTargetParser().isParsingMSInlineAsm()))
2771  return true;
2772  }
2773  return false;
2774 }
2775 
2776 // Parse and erase curly braces marking block start/end.
2777 bool MasmParser::parseCurlyBlockScope(
2778  SmallVectorImpl<AsmRewrite> &AsmStrRewrites) {
2779  // Identify curly brace marking block start/end.
2780  if (Lexer.isNot(AsmToken::LCurly) && Lexer.isNot(AsmToken::RCurly))
2781  return false;
2782 
2783  SMLoc StartLoc = Lexer.getLoc();
2784  Lex(); // Eat the brace.
2785  if (Lexer.is(AsmToken::EndOfStatement))
2786  Lex(); // Eat EndOfStatement following the brace.
2787 
2788  // Erase the block start/end brace from the output asm string.
2789  AsmStrRewrites.emplace_back(AOK_Skip, StartLoc, Lexer.getLoc().getPointer() -
2790  StartLoc.getPointer());
2791  return true;
2792 }
2793 
2794 /// parseCppHashLineFilenameComment as this:
2795 /// ::= # number "filename"
2796 bool MasmParser::parseCppHashLineFilenameComment(SMLoc L) {
2797  Lex(); // Eat the hash token.
2798  // Lexer only ever emits HashDirective if it fully formed if it's
2799  // done the checking already so this is an internal error.
2800  assert(getTok().is(AsmToken::Integer) &&
2801  "Lexing Cpp line comment: Expected Integer");
2802  int64_t LineNumber = getTok().getIntVal();
2803  Lex();
2804  assert(getTok().is(AsmToken::String) &&
2805  "Lexing Cpp line comment: Expected String");
2806  StringRef Filename = getTok().getString();
2807  Lex();
2808 
2809  // Get rid of the enclosing quotes.
2810  Filename = Filename.substr(1, Filename.size() - 2);
2811 
2812  // Save the SMLoc, Filename and LineNumber for later use by diagnostics
2813  // and possibly DWARF file info.
2814  CppHashInfo.Loc = L;
2815  CppHashInfo.Filename = Filename;
2816  CppHashInfo.LineNumber = LineNumber;
2817  CppHashInfo.Buf = CurBuffer;
2818  if (FirstCppHashFilename.empty())
2819  FirstCppHashFilename = Filename;
2820  return false;
2821 }
2822 
2823 /// will use the last parsed cpp hash line filename comment
2824 /// for the Filename and LineNo if any in the diagnostic.
2825 void MasmParser::DiagHandler(const SMDiagnostic &Diag, void *Context) {
2826  const MasmParser *Parser = static_cast<const MasmParser *>(Context);
2827  raw_ostream &OS = errs();
2828 
2829  const SourceMgr &DiagSrcMgr = *Diag.getSourceMgr();
2830  SMLoc DiagLoc = Diag.getLoc();
2831  unsigned DiagBuf = DiagSrcMgr.FindBufferContainingLoc(DiagLoc);
2832  unsigned CppHashBuf =
2833  Parser->SrcMgr.FindBufferContainingLoc(Parser->CppHashInfo.Loc);
2834 
2835  // Like SourceMgr::printMessage() we need to print the include stack if any
2836  // before printing the message.
2837  unsigned DiagCurBuffer = DiagSrcMgr.FindBufferContainingLoc(DiagLoc);
2838  if (!Parser->SavedDiagHandler && DiagCurBuffer &&
2839  DiagCurBuffer != DiagSrcMgr.getMainFileID()) {
2840  SMLoc ParentIncludeLoc = DiagSrcMgr.getParentIncludeLoc(DiagCurBuffer);
2841  DiagSrcMgr.PrintIncludeStack(ParentIncludeLoc, OS);
2842  }
2843 
2844  // If we have not parsed a cpp hash line filename comment or the source
2845  // manager changed or buffer changed (like in a nested include) then just
2846  // print the normal diagnostic using its Filename and LineNo.
2847  if (!Parser->CppHashInfo.LineNumber || &DiagSrcMgr != &Parser->SrcMgr ||
2848  DiagBuf != CppHashBuf) {
2849  if (Parser->SavedDiagHandler)
2850  Parser->SavedDiagHandler(Diag, Parser->SavedDiagContext);
2851  else
2852  Diag.print(nullptr, OS);
2853  return;
2854  }
2855 
2856  // Use the CppHashFilename and calculate a line number based on the
2857  // CppHashInfo.Loc and CppHashInfo.LineNumber relative to this Diag's SMLoc
2858  // for the diagnostic.
2859  const std::string &Filename = std::string(Parser->CppHashInfo.Filename);
2860 
2861  int DiagLocLineNo = DiagSrcMgr.FindLineNumber(DiagLoc, DiagBuf);
2862  int CppHashLocLineNo =
2863  Parser->SrcMgr.FindLineNumber(Parser->CppHashInfo.Loc, CppHashBuf);
2864  int LineNo =
2865  Parser->CppHashInfo.LineNumber - 1 + (DiagLocLineNo - CppHashLocLineNo);
2866 
2867  SMDiagnostic NewDiag(*Diag.getSourceMgr(), Diag.getLoc(), Filename, LineNo,
2868  Diag.getColumnNo(), Diag.getKind(), Diag.getMessage(),
2869  Diag.getLineContents(), Diag.getRanges());
2870 
2871  if (Parser->SavedDiagHandler)
2872  Parser->SavedDiagHandler(NewDiag, Parser->SavedDiagContext);
2873  else
2874  NewDiag.print(nullptr, OS);
2875 }
2876 
2877 // This is similar to the IsIdentifierChar function in AsmLexer.cpp, but does
2878 // not accept '.'.
2879 static bool isMacroParameterChar(char C) {
2880  return isAlnum(C) || C == '_' || C == '$' || C == '@' || C == '?';
2881 }
2882 
2883 bool MasmParser::expandMacro(raw_svector_ostream &OS, StringRef Body,
2884  ArrayRef<MCAsmMacroParameter> Parameters,
2886  const std::vector<std::string> &Locals, SMLoc L) {
2887  unsigned NParameters = Parameters.size();
2888  if (NParameters != A.size())
2889  return Error(L, "Wrong number of arguments");
2890  StringMap<std::string> LocalSymbols;
2891  std::string Name;
2892  Name.reserve(6);
2893  for (StringRef Local : Locals) {
2895  LocalName << "??"
2896  << format_hex_no_prefix(LocalCounter++, 4, /*Upper=*/true);
2897  LocalSymbols.insert({Local, LocalName.str()});
2898  Name.clear();
2899  }
2900 
2901  Optional<char> CurrentQuote;
2902  while (!Body.empty()) {
2903  // Scan for the next substitution.
2904  std::size_t End = Body.size(), Pos = 0;
2905  std::size_t IdentifierPos = End;
2906  for (; Pos != End; ++Pos) {
2907  // Find the next possible macro parameter, including preceding a '&'
2908  // inside quotes.
2909  if (Body[Pos] == '&')
2910  break;
2911  if (isMacroParameterChar(Body[Pos])) {
2912  if (!CurrentQuote.hasValue())
2913  break;
2914  if (IdentifierPos == End)
2915  IdentifierPos = Pos;
2916  } else {
2917  IdentifierPos = End;
2918  }
2919 
2920  // Track quotation status
2921  if (!CurrentQuote.hasValue()) {
2922  if (Body[Pos] == '\'' || Body[Pos] == '"')
2923  CurrentQuote = Body[Pos];
2924  } else if (Body[Pos] == CurrentQuote) {
2925  if (Pos + 1 != End && Body[Pos + 1] == CurrentQuote) {
2926  // Escaped quote, and quotes aren't identifier chars; skip
2927  ++Pos;
2928  continue;
2929  } else {
2930  CurrentQuote.reset();
2931  }
2932  }
2933  }
2934  if (IdentifierPos != End) {
2935  // We've recognized an identifier before an apostrophe inside quotes;
2936  // check once to see if we can expand it.
2937  Pos = IdentifierPos;
2938  IdentifierPos = End;
2939  }
2940 
2941  // Add the prefix.
2942  OS << Body.slice(0, Pos);
2943 
2944  // Check if we reached the end.
2945  if (Pos == End)
2946  break;
2947 
2948  unsigned I = Pos;
2949  bool InitialAmpersand = (Body[I] == '&');
2950  if (InitialAmpersand) {
2951  ++I;
2952  ++Pos;
2953  }
2954  while (I < End && isMacroParameterChar(Body[I]))
2955  ++I;
2956 
2957  const char *Begin = Body.data() + Pos;
2958  StringRef Argument(Begin, I - Pos);
2959  const std::string ArgumentLower = Argument.lower();
2960  unsigned Index = 0;
2961 
2962  for (; Index < NParameters; ++Index)
2963  if (Parameters[Index].Name.equals_insensitive(ArgumentLower))
2964  break;
2965 
2966  if (Index == NParameters) {
2967  if (InitialAmpersand)
2968  OS << '&';
2969  auto it = LocalSymbols.find(ArgumentLower);
2970  if (it != LocalSymbols.end())
2971  OS << it->second;
2972  else
2973  OS << Argument;
2974  Pos = I;
2975  } else {
2976  for (const AsmToken &Token : A[Index]) {
2977  // In MASM, you can write '%expr'.
2978  // The prefix '%' evaluates the expression 'expr'
2979  // and uses the result as a string (e.g. replace %(1+2) with the
2980  // string "3").
2981  // Here, we identify the integer token which is the result of the
2982  // absolute expression evaluation and replace it with its string
2983  // representation.
2984  if (Token.getString().front() == '%' && Token.is(AsmToken::Integer))
2985  // Emit an integer value to the buffer.
2986  OS << Token.getIntVal();
2987  else
2988  OS << Token.getString();
2989  }
2990 
2991  Pos += Argument.size();
2992  if (Pos < End && Body[Pos] == '&') {
2993  ++Pos;
2994  }
2995  }
2996  // Update the scan point.
2997  Body = Body.substr(Pos);
2998  }
2999 
3000  return false;
3001 }
3002 
3003 static bool isOperator(AsmToken::TokenKind kind) {
3004  switch (kind) {
3005  default:
3006  return false;
3007  case AsmToken::Plus:
3008  case AsmToken::Minus:
3009  case AsmToken::Tilde:
3010  case AsmToken::Slash:
3011  case AsmToken::Star:
3012  case AsmToken::Dot:
3013  case AsmToken::Equal:
3014  case AsmToken::EqualEqual:
3015  case AsmToken::Pipe:
3016  case AsmToken::PipePipe:
3017  case AsmToken::Caret:
3018  case AsmToken::Amp:
3019  case AsmToken::AmpAmp:
3020  case AsmToken::Exclaim:
3021  case AsmToken::ExclaimEqual:
3022  case AsmToken::Less:
3023  case AsmToken::LessEqual:
3024  case AsmToken::LessLess:
3025  case AsmToken::LessGreater:
3026  case AsmToken::Greater:
3027  case AsmToken::GreaterEqual:
3028  case AsmToken::GreaterGreater:
3029  return true;
3030  }
3031 }
3032 
3033 namespace {
3034 
3035 class AsmLexerSkipSpaceRAII {
3036 public:
3037  AsmLexerSkipSpaceRAII(AsmLexer &Lexer, bool SkipSpace) : Lexer(Lexer) {
3038  Lexer.setSkipSpace(SkipSpace);
3039  }
3040 
3041  ~AsmLexerSkipSpaceRAII() {
3042  Lexer.setSkipSpace(true);
3043  }
3044 
3045 private:
3046  AsmLexer &Lexer;
3047 };
3048 
3049 } // end anonymous namespace
3050 
3051 bool MasmParser::parseMacroArgument(const MCAsmMacroParameter *MP,
3052  MCAsmMacroArgument &MA,
3053  AsmToken::TokenKind EndTok) {
3054  if (MP && MP->Vararg) {
3055  if (Lexer.isNot(EndTok)) {
3056  SmallVector<StringRef, 1> Str = parseStringRefsTo(EndTok);
3057  for (StringRef S : Str) {
3058  MA.emplace_back(AsmToken::String, S);
3059  }
3060  }
3061  return false;
3062  }
3063 
3064  SMLoc StrLoc = Lexer.getLoc(), EndLoc;
3065  if (Lexer.is(AsmToken::Less) && isAngleBracketString(StrLoc, EndLoc)) {
3066  const char *StrChar = StrLoc.getPointer() + 1;
3067  const char *EndChar = EndLoc.getPointer() - 1;
3068  jumpToLoc(EndLoc, CurBuffer, EndStatementAtEOFStack.back());
3069  /// Eat from '<' to '>'.
3070  Lex();
3071  MA.emplace_back(AsmToken::String, StringRef(StrChar, EndChar - StrChar));
3072  return false;
3073  }
3074 
3075  unsigned ParenLevel = 0;
3076 
3077  // Darwin doesn't use spaces to delmit arguments.
3078  AsmLexerSkipSpaceRAII ScopedSkipSpace(Lexer, IsDarwin);
3079 
3080  bool SpaceEaten;
3081 
3082  while (true) {
3083  SpaceEaten = false;
3084  if (Lexer.is(AsmToken::Eof) || Lexer.is(AsmToken::Equal))
3085  return TokError("unexpected token");
3086 
3087  if (ParenLevel == 0) {
3088  if (Lexer.is(AsmToken::Comma))
3089  break;
3090 
3091  if (Lexer.is(AsmToken::Space)) {
3092  SpaceEaten = true;
3093  Lex(); // Eat spaces.
3094  }
3095 
3096  // Spaces can delimit parameters, but could also be part an expression.
3097  // If the token after a space is an operator, add the token and the next
3098  // one into this argument
3099  if (!IsDarwin) {
3100  if (isOperator(Lexer.getKind()) && Lexer.isNot(EndTok)) {
3101  MA.push_back(getTok());
3102  Lex();
3103 
3104  // Whitespace after an operator can be ignored.
3105  if (Lexer.is(AsmToken::Space))
3106  Lex();
3107 
3108  continue;
3109  }
3110  }
3111  if (SpaceEaten)
3112  break;
3113  }
3114 
3115  // handleMacroEntry relies on not advancing the lexer here
3116  // to be able to fill in the remaining default parameter values
3117  if (Lexer.is(EndTok) && (EndTok != AsmToken::RParen || ParenLevel == 0))
3118  break;
3119 
3120  // Adjust the current parentheses level.
3121  if (Lexer.is(AsmToken::LParen))
3122  ++ParenLevel;
3123  else if (Lexer.is(AsmToken::RParen) && ParenLevel)
3124  --ParenLevel;
3125 
3126  // Append the token to the current argument list.
3127  MA.push_back(getTok());
3128  Lex();
3129  }
3130 
3131  if (ParenLevel != 0)
3132  return TokError("unbalanced parentheses in argument");
3133 
3134  if (MA.empty() && MP) {
3135  if (MP->Required) {
3136  return TokError("missing value for required parameter '" + MP->Name +
3137  "'");
3138  } else {
3139  MA = MP->Value;
3140  }
3141  }
3142  return false;
3143 }
3144 
3145 // Parse the macro instantiation arguments.
3146 bool MasmParser::parseMacroArguments(const MCAsmMacro *M,
3147  MCAsmMacroArguments &A,
3148  AsmToken::TokenKind EndTok) {
3149  const unsigned NParameters = M ? M->Parameters.size() : 0;
3150  bool NamedParametersFound = false;
3151  SmallVector<SMLoc, 4> FALocs;
3152 
3153  A.resize(NParameters);
3154  FALocs.resize(NParameters);
3155 
3156  // Parse two kinds of macro invocations:
3157  // - macros defined without any parameters accept an arbitrary number of them
3158  // - macros defined with parameters accept at most that many of them
3159  for (unsigned Parameter = 0; !NParameters || Parameter < NParameters;
3160  ++Parameter) {
3161  SMLoc IDLoc = Lexer.getLoc();
3163 
3164  if (Lexer.is(AsmToken::Identifier) && peekTok().is(AsmToken::Equal)) {
3165  if (parseIdentifier(FA.Name))
3166  return Error(IDLoc, "invalid argument identifier for formal argument");
3167 
3168  if (Lexer.isNot(AsmToken::Equal))
3169  return TokError("expected '=' after formal parameter identifier");
3170 
3171  Lex();
3172 
3173  NamedParametersFound = true;
3174  }
3175 
3176  if (NamedParametersFound && FA.Name.empty())
3177  return Error(IDLoc, "cannot mix positional and keyword arguments");
3178 
3179  unsigned PI = Parameter;
3180  if (!FA.Name.empty()) {
3181  assert(M && "expected macro to be defined");
3182  unsigned FAI = 0;
3183  for (FAI = 0; FAI < NParameters; ++FAI)
3184  if (M->Parameters[FAI].Name == FA.Name)
3185  break;
3186 
3187  if (FAI >= NParameters) {
3188  return Error(IDLoc, "parameter named '" + FA.Name +
3189  "' does not exist for macro '" + M->Name + "'");
3190  }
3191  PI = FAI;
3192  }
3193  const MCAsmMacroParameter *MP = nullptr;
3194  if (M && PI < NParameters)
3195  MP = &M->Parameters[PI];
3196 
3197  SMLoc StrLoc = Lexer.getLoc();
3198  SMLoc EndLoc;
3199  if (Lexer.is(AsmToken::Percent)) {
3200  const MCExpr *AbsoluteExp;
3201  int64_t Value;
3202  /// Eat '%'.
3203  Lex();
3204  if (parseExpression(AbsoluteExp, EndLoc))
3205  return false;
3206  if (!AbsoluteExp->evaluateAsAbsolute(Value,
3207  getStreamer().getAssemblerPtr()))
3208  return Error(StrLoc, "expected absolute expression");
3209  const char *StrChar = StrLoc.getPointer();
3210  const char *EndChar = EndLoc.getPointer();
3211  AsmToken newToken(AsmToken::Integer,
3212  StringRef(StrChar, EndChar - StrChar), Value);
3213  FA.Value.push_back(newToken);
3214  } else if (parseMacroArgument(MP, FA.Value, EndTok)) {
3215  if (M)
3216  return addErrorSuffix(" in '" + M->Name + "' macro");
3217  else
3218  return true;
3219  }
3220 
3221  if (!FA.Value.empty()) {
3222  if (A.size() <= PI)
3223  A.resize(PI + 1);
3224  A[PI] = FA.Value;
3225 
3226  if (FALocs.size() <= PI)
3227  FALocs.resize(PI + 1);
3228 
3229  FALocs[PI] = Lexer.getLoc();
3230  }
3231 
3232  // At the end of the statement, fill in remaining arguments that have
3233  // default values. If there aren't any, then the next argument is
3234  // required but missing
3235  if (Lexer.is(EndTok)) {
3236  bool Failure = false;
3237  for (unsigned FAI = 0; FAI < NParameters; ++FAI) {
3238  if (A[FAI].empty()) {
3239  if (M->Parameters[FAI].Required) {
3240  Error(FALocs[FAI].isValid() ? FALocs[FAI] : Lexer.getLoc(),
3241  "missing value for required parameter "
3242  "'" +
3243  M->Parameters[FAI].Name + "' in macro '" + M->Name + "'");
3244  Failure = true;
3245  }
3246 
3247  if (!M->Parameters[FAI].Value.empty())
3248  A[FAI] = M->Parameters[FAI].Value;
3249  }
3250  }
3251  return Failure;
3252  }
3253 
3254  if (Lexer.is(AsmToken::Comma))
3255  Lex();
3256  }
3257 
3258  return TokError("too many positional arguments");
3259 }
3260 
3261 bool MasmParser::handleMacroEntry(const MCAsmMacro *M, SMLoc NameLoc,
3262  AsmToken::TokenKind ArgumentEndTok) {
3263  // Arbitrarily limit macro nesting depth (default matches 'as'). We can
3264  // eliminate this, although we should protect against infinite loops.
3265  unsigned MaxNestingDepth = AsmMacroMaxNestingDepth;
3266  if (ActiveMacros.size() == MaxNestingDepth) {
3267  std::ostringstream MaxNestingDepthError;
3268  MaxNestingDepthError << "macros cannot be nested more than "
3269  << MaxNestingDepth << " levels deep."
3270  << " Use -asm-macro-max-nesting-depth to increase "
3271  "this limit.";
3272  return TokError(MaxNestingDepthError.str());
3273  }
3274 
3275  MCAsmMacroArguments A;
3276  if (parseMacroArguments(M, A, ArgumentEndTok))
3277  return true;
3278 
3279  // Macro instantiation is lexical, unfortunately. We construct a new buffer
3280  // to hold the macro body with substitutions.
3281  SmallString<256> Buf;
3282  StringRef Body = M->Body;
3283  raw_svector_ostream OS(Buf);
3284 
3285  if (expandMacro(OS, Body, M->Parameters, A, M->Locals, getTok().getLoc()))
3286  return true;
3287 
3288  // We include the endm in the buffer as our cue to exit the macro
3289  // instantiation.
3290  OS << "endm\n";
3291 
3292  std::unique_ptr<MemoryBuffer> Instantiation =
3293  MemoryBuffer::getMemBufferCopy(OS.str(), "<instantiation>");
3294 
3295  // Create the macro instantiation object and add to the current macro
3296  // instantiation stack.
3297  MacroInstantiation *MI = new MacroInstantiation{
3298  NameLoc, CurBuffer, getTok().getLoc(), TheCondStack.size()};
3299  ActiveMacros.push_back(MI);
3300 
3301  ++NumOfMacroInstantiations;
3302 
3303  // Jump to the macro instantiation and prime the lexer.
3304  CurBuffer = SrcMgr.AddNewSourceBuffer(std::move(Instantiation), SMLoc());
3305  Lexer.setBuffer(SrcMgr.getMemoryBuffer(CurBuffer)->getBuffer());
3306  EndStatementAtEOFStack.push_back(true);
3307  Lex();
3308 
3309  return false;
3310 }
3311 
3312 void MasmParser::handleMacroExit() {
3313  // Jump to the token we should return to, and consume it.
3314  EndStatementAtEOFStack.pop_back();
3315  jumpToLoc(ActiveMacros.back()->ExitLoc, ActiveMacros.back()->ExitBuffer,
3316  EndStatementAtEOFStack.back());
3317  Lex();
3318 
3319  // Pop the instantiation entry.
3320  delete ActiveMacros.back();
3321  ActiveMacros.pop_back();
3322 }
3323 
3324 bool MasmParser::handleMacroInvocation(const MCAsmMacro *M, SMLoc NameLoc) {
3325  if (!M->IsFunction)
3326  return Error(NameLoc, "cannot invoke macro procedure as function");
3327 
3328  if (parseToken(AsmToken::LParen, "invoking macro function '" + M->Name +
3329  "' requires arguments in parentheses") ||
3330  handleMacroEntry(M, NameLoc, AsmToken::RParen))
3331  return true;
3332 
3333  // Parse all statements in the macro, retrieving the exit value when it ends.
3334  std::string ExitValue;
3335  SmallVector<AsmRewrite, 4> AsmStrRewrites;
3336  while (Lexer.isNot(AsmToken::Eof)) {
3337  ParseStatementInfo Info(&AsmStrRewrites);
3338  bool Parsed = parseStatement(Info, nullptr);
3339 
3340  if (!Parsed && Info.ExitValue.hasValue()) {
3341  ExitValue = std::move(*Info.ExitValue);
3342  break;
3343  }
3344 
3345  // If we have a Lexer Error we are on an Error Token. Load in Lexer Error
3346  // for printing ErrMsg via Lex() only if no (presumably better) parser error
3347  // exists.
3348  if (Parsed && !hasPendingError() && Lexer.getTok().is(AsmToken::Error)) {
3349  Lex();
3350  }
3351 
3352  // parseStatement returned true so may need to emit an error.
3353  printPendingErrors();
3354 
3355  // Skipping to the next line if needed.
3356  if (Parsed && !getLexer().isAtStartOfStatement())
3357  eatToEndOfStatement();
3358  }
3359 
3360  // Consume the right-parenthesis on the other side of the arguments.
3361  if (parseToken(AsmToken::RParen, "invoking macro function '" + M->Name +
3362  "' requires arguments in parentheses"))
3363  return true;
3364 
3365  // Exit values may require lexing, unfortunately. We construct a new buffer to
3366  // hold the exit value.
3367  std::unique_ptr<MemoryBuffer> MacroValue =
3368  MemoryBuffer::getMemBufferCopy(ExitValue, "<macro-value>");
3369 
3370  // Jump from this location to the instantiated exit value, and prime the
3371  // lexer.
3372  CurBuffer = SrcMgr.AddNewSourceBuffer(std::move(MacroValue), Lexer.getLoc());
3373  Lexer.setBuffer(SrcMgr.getMemoryBuffer(CurBuffer)->getBuffer(), nullptr,
3374  /*EndStatementAtEOF=*/false);
3375  EndStatementAtEOFStack.push_back(false);
3376  Lex();
3377 
3378  return false;
3379 }
3380 
3381 /// parseIdentifier:
3382 /// ::= identifier
3383 /// ::= string
3384 bool MasmParser::parseIdentifier(StringRef &Res,
3385  IdentifierPositionKind Position) {
3386  // The assembler has relaxed rules for accepting identifiers, in particular we
3387  // allow things like '.globl $foo' and '.def @feat.00', which would normally
3388  // be separate tokens. At this level, we have already lexed so we cannot
3389  // (currently) handle this as a context dependent token, instead we detect
3390  // adjacent tokens and return the combined identifier.
3391  if (Lexer.is(AsmToken::Dollar) || Lexer.is(AsmToken::At)) {
3392  SMLoc PrefixLoc = getLexer().getLoc();
3393 
3394  // Consume the prefix character, and check for a following identifier.
3395 
3396  AsmToken nextTok = peekTok(false);
3397 
3398  if (nextTok.isNot(AsmToken::Identifier))
3399  return true;
3400 
3401  // We have a '$' or '@' followed by an identifier, make sure they are adjacent.
3402  if (PrefixLoc.getPointer() + 1 != nextTok.getLoc().getPointer())
3403  return true;
3404 
3405  // eat $ or @
3406  Lexer.Lex(); // Lexer's Lex guarantees consecutive token.
3407  // Construct the joined identifier and consume the token.
3408  Res =
3409  StringRef(PrefixLoc.getPointer(), getTok().getIdentifier().size() + 1);
3410  Lex(); // Parser Lex to maintain invariants.
3411  return false;
3412  }
3413 
3414  if (Lexer.isNot(AsmToken::Identifier) && Lexer.isNot(AsmToken::String))
3415  return true;
3416 
3417  Res = getTok().getIdentifier();
3418 
3419  // Consume the identifier token - but if parsing certain directives, avoid
3420  // lexical expansion of the next token.
3421  ExpandKind ExpandNextToken = ExpandMacros;
3422  if (Position == StartOfStatement &&
3423  StringSwitch<bool>(Res)
3424  .CaseLower("echo", true)
3425  .CasesLower("ifdef", "ifndef", "elseifdef", "elseifndef", true)
3426  .Default(false)) {
3427  ExpandNextToken = DoNotExpandMacros;
3428  }
3429  Lex(ExpandNextToken);
3430 
3431  return false;
3432 }
3433 
3434 /// parseDirectiveEquate:
3435 /// ::= name "=" expression
3436 /// | name "equ" expression (not redefinable)
3437 /// | name "equ" text-list
3438 /// | name "textequ" text-list (redefinability unspecified)
3439 bool MasmParser::parseDirectiveEquate(StringRef IDVal, StringRef Name,
3440  DirectiveKind DirKind, SMLoc NameLoc) {
3441  auto BuiltinIt = BuiltinSymbolMap.find(Name.lower());
3442  if (BuiltinIt != BuiltinSymbolMap.end())
3443  return Error(NameLoc, "cannot redefine a built-in symbol");
3444 
3445  Variable &Var = Variables[Name.lower()];
3446  if (Var.Name.empty()) {
3447  Var.Name = Name;
3448  }
3449 
3450  SMLoc StartLoc = Lexer.getLoc();
3451  if (DirKind == DK_EQU || DirKind == DK_TEXTEQU) {
3452  // "equ" and "textequ" both allow text expressions.
3453  std::string Value;
3454  std::string TextItem;
3455  if (!parseTextItem(TextItem)) {
3456  Value += TextItem;
3457 
3458  // Accept a text-list, not just one text-item.
3459  auto parseItem = [&]() -> bool {
3460  if (parseTextItem(TextItem))
3461  return TokError("expected text item");
3462  Value += TextItem;
3463  return false;
3464  };
3465  if (parseOptionalToken(AsmToken::Comma) && parseMany(parseItem))
3466  return addErrorSuffix(" in '" + Twine(IDVal) + "' directive");
3467 
3468  if (!Var.IsText || Var.TextValue != Value) {
3469  switch (Var.Redefinable) {
3470  case Variable::NOT_REDEFINABLE:
3471  return Error(getTok().getLoc(), "invalid variable redefinition");
3472  case Variable::WARN_ON_REDEFINITION:
3473  if (Warning(NameLoc, "redefining '" + Name +
3474  "', already defined on the command line")) {
3475  return true;
3476  }
3477  break;
3478  default:
3479  break;
3480  }
3481  }
3482  Var.IsText = true;
3483  Var.TextValue = Value;
3484  Var.Redefinable = Variable::REDEFINABLE;
3485 
3486  return false;
3487  }
3488  }
3489  if (DirKind == DK_TEXTEQU)
3490  return TokError("expected <text> in '" + Twine(IDVal) + "' directive");
3491 
3492  // Parse as expression assignment.
3493  const MCExpr *Expr;
3494  SMLoc EndLoc;
3495  if (parseExpression(Expr, EndLoc))
3496  return addErrorSuffix(" in '" + Twine(IDVal) + "' directive");
3497  StringRef ExprAsString = StringRef(
3498  StartLoc.getPointer(), EndLoc.getPointer() - StartLoc.getPointer());
3499 
3500  int64_t Value;
3501  if (!Expr->evaluateAsAbsolute(Value, getStreamer().getAssemblerPtr())) {
3502  if (DirKind == DK_ASSIGN)
3503  return Error(
3504  StartLoc,
3505  "expected absolute expression; not all symbols have known values",
3506  {StartLoc, EndLoc});
3507 
3508  // Not an absolute expression; define as a text replacement.
3509  if (!Var.IsText || Var.TextValue != ExprAsString) {
3510  switch (Var.Redefinable) {
3511  case Variable::NOT_REDEFINABLE:
3512  return Error(getTok().getLoc(), "invalid variable redefinition");
3513  case Variable::WARN_ON_REDEFINITION:
3514  if (Warning(NameLoc, "redefining '" + Name +
3515  "', already defined on the command line")) {
3516  return true;
3517  }
3518  break;
3519  default:
3520  break;
3521  }
3522  }
3523 
3524  Var.IsText = true;
3525  Var.TextValue = ExprAsString.str();
3526  Var.Redefinable = Variable::REDEFINABLE;
3527 
3528  return false;
3529  }
3530 
3531  MCSymbol *Sym = getContext().getOrCreateSymbol(Var.Name);
3532 
3533  const MCConstantExpr *PrevValue =
3534  Sym->isVariable() ? dyn_cast_or_null<MCConstantExpr>(
3535  Sym->getVariableValue(/*SetUsed=*/false))
3536  : nullptr;
3537  if (Var.IsText || !PrevValue || PrevValue->getValue() != Value) {
3538  switch (Var.Redefinable) {
3539  case Variable::NOT_REDEFINABLE:
3540  return Error(getTok().getLoc(), "invalid variable redefinition");
3541  case Variable::WARN_ON_REDEFINITION:
3542  if (Warning(NameLoc, "redefining '" + Name +
3543  "', already defined on the command line")) {
3544  return true;
3545  }
3546  break;
3547  default:
3548  break;
3549  }
3550  }
3551 
3552  Var.IsText = false;
3553  Var.TextValue.clear();
3554  Var.Redefinable = (DirKind == DK_ASSIGN) ? Variable::REDEFINABLE
3555  : Variable::NOT_REDEFINABLE;
3556 
3557  Sym->setRedefinable(Var.Redefinable != Variable::NOT_REDEFINABLE);
3558  Sym->setVariableValue(Expr);
3559  Sym->setExternal(false);
3560 
3561  return false;
3562 }
3563 
3564 bool MasmParser::parseEscapedString(std::string &Data) {
3565  if (check(getTok().isNot(AsmToken::String), "expected string"))
3566  return true;
3567 
3568  Data = "";
3569  char Quote = getTok().getString().front();
3570  StringRef Str = getTok().getStringContents();
3571  Data.reserve(Str.size());
3572  for (size_t i = 0, e = Str.size(); i != e; ++i) {
3573  Data.push_back(Str[i]);
3574  if (Str[i] == Quote) {
3575  // MASM treats doubled delimiting quotes as an escaped delimiting quote.
3576  // If we're escaping the string's trailing delimiter, we're definitely
3577  // missing a quotation mark.
3578  if (i + 1 == Str.size())
3579  return Error(getTok().getLoc(), "missing quotation mark in string");
3580  if (Str[i + 1] == Quote)
3581  ++i;
3582  }
3583  }
3584 
3585  Lex();
3586  return false;
3587 }
3588 
3589 bool MasmParser::parseAngleBracketString(std::string &Data) {
3590  SMLoc EndLoc, StartLoc = getTok().getLoc();
3591  if (isAngleBracketString(StartLoc, EndLoc)) {
3592  const char *StartChar = StartLoc.getPointer() + 1;
3593  const char *EndChar = EndLoc.getPointer() - 1;
3594  jumpToLoc(EndLoc, CurBuffer, EndStatementAtEOFStack.back());
3595  // Eat from '<' to '>'.
3596  Lex();
3597 
3598  Data = angleBracketString(StringRef(StartChar, EndChar - StartChar));
3599  return false;
3600  }
3601  return true;
3602 }
3603 
3604 /// textItem ::= textLiteral | textMacroID | % constExpr
3605 bool MasmParser::parseTextItem(std::string &Data) {
3606  switch (getTok().getKind()) {
3607  default:
3608  return true;
3609  case AsmToken::Percent: {
3610  int64_t Res;
3611  if (parseToken(AsmToken::Percent) || parseAbsoluteExpression(Res))
3612  return true;
3613  Data = std::to_string(Res);
3614  return false;
3615  }
3616  case AsmToken::Less:
3617  case AsmToken::LessEqual:
3618  case AsmToken::LessLess:
3619  case AsmToken::LessGreater:
3620  return parseAngleBracketString(Data);
3621  case AsmToken::Identifier: {
3622  // This must be a text macro; we need to expand it accordingly.
3623  StringRef ID;
3624  SMLoc StartLoc = getTok().getLoc();
3625  if (parseIdentifier(ID))
3626  return true;
3627  Data = ID.str();
3628 
3629  bool Expanded = false;
3630  while (true) {
3631  // Try to resolve as a built-in text macro
3632  auto BuiltinIt = BuiltinSymbolMap.find(ID.lower());
3633  if (BuiltinIt != BuiltinSymbolMap.end()) {
3634  llvm::Optional<std::string> BuiltinText =
3635  evaluateBuiltinTextMacro(BuiltinIt->getValue(), StartLoc);
3636  if (!BuiltinText.hasValue()) {
3637  // Not a text macro; break without substituting
3638  break;
3639  }
3640  Data = std::move(*BuiltinText);
3641  ID = StringRef(Data);
3642  Expanded = true;
3643  continue;
3644  }
3645 
3646  // Try to resolve as a variable text macro
3647  auto VarIt = Variables.find(ID.lower());
3648  if (VarIt != Variables.end()) {
3649  const Variable &Var = VarIt->getValue();
3650  if (!Var.IsText) {
3651  // Not a text macro; break without substituting
3652  break;
3653  }
3654  Data = Var.TextValue;
3655  ID = StringRef(Data);
3656  Expanded = true;
3657  continue;
3658  }
3659 
3660  break;
3661  }
3662 
3663  if (!Expanded) {
3664  // Not a text macro; not usable in TextItem context. Since we haven't used
3665  // the token, put it back for better error recovery.
3666  getLexer().UnLex(AsmToken(AsmToken::Identifier, ID));
3667  return true;
3668  }
3669  return false;
3670  }
3671  }
3672  llvm_unreachable("unhandled token kind");
3673 }
3674 
3675 /// parseDirectiveAscii:
3676 /// ::= ( .ascii | .asciz | .string ) [ "string" ( , "string" )* ]
3677 bool MasmParser::parseDirectiveAscii(StringRef IDVal, bool ZeroTerminated) {
3678  auto parseOp = [&]() -> bool {
3679  std::string Data;
3680  if (checkForValidSection() || parseEscapedString(Data))
3681  return true;
3682  getStreamer().emitBytes(Data);
3683  if (ZeroTerminated)
3684  getStreamer().emitBytes(StringRef("\0", 1));
3685  return false;
3686  };
3687 
3688  if (parseMany(parseOp))
3689  return addErrorSuffix(" in '" + Twine(IDVal) + "' directive");
3690  return false;
3691 }
3692 
3693 bool MasmParser::emitIntValue(const MCExpr *Value, unsigned Size) {
3694  // Special case constant expressions to match code generator.
3695  if (const MCConstantExpr *MCE = dyn_cast<MCConstantExpr>(Value)) {
3696  assert(Size <= 8 && "Invalid size");
3697  int64_t IntValue = MCE->getValue();
3698  if (!isUIntN(8 * Size, IntValue) && !isIntN(8 * Size, IntValue))
3699  return Error(MCE->getLoc(), "out of range literal value");
3700  getStreamer().emitIntValue(IntValue, Size);
3701  } else {
3702  const MCSymbolRefExpr *MSE = dyn_cast<MCSymbolRefExpr>(Value);
3703  if (MSE && MSE->getSymbol().getName() == "?") {
3704  // ? initializer; treat as 0.
3705  getStreamer().emitIntValue(0, Size);
3706  } else {
3707  getStreamer().emitValue(Value, Size, Value->getLoc());
3708  }
3709  }
3710  return false;
3711 }
3712 
3713 bool MasmParser::parseScalarInitializer(unsigned Size,
3715  unsigned StringPadLength) {
3716  if (Size == 1 && getTok().is(AsmToken::String)) {
3717  std::string Value;
3718  if (parseEscapedString(Value))
3719  return true;
3720  // Treat each character as an initializer.
3721  for (const unsigned char CharVal : Value)
3722  Values.push_back(MCConstantExpr::create(CharVal, getContext()));
3723 
3724  // Pad the string with spaces to the specified length.
3725  for (size_t i = Value.size(); i < StringPadLength; ++i)
3726  Values.push_back(MCConstantExpr::create(' ', getContext()));
3727  } else {
3728  const MCExpr *Value;
3729  if (parseExpression(Value))
3730  return true;
3731  if (getTok().is(AsmToken::Identifier) &&
3732  getTok().getString().equals_insensitive("dup")) {
3733  Lex(); // Eat 'dup'.
3734  const MCConstantExpr *MCE = dyn_cast<MCConstantExpr>(Value);
3735  if (!MCE)
3736  return Error(Value->getLoc(),
3737  "cannot repeat value a non-constant number of times");
3738  const int64_t Repetitions = MCE->getValue();
3739  if (Repetitions < 0)
3740  return Error(Value->getLoc(),
3741  "cannot repeat value a negative number of times");
3742 
3743  SmallVector<const MCExpr *, 1> DuplicatedValues;
3744  if (parseToken(AsmToken::LParen,
3745  "parentheses required for 'dup' contents") ||
3746  parseScalarInstList(Size, DuplicatedValues) ||
3747  parseToken(AsmToken::RParen, "unmatched parentheses"))
3748  return true;
3749 
3750  for (int i = 0; i < Repetitions; ++i)
3751  Values.append(DuplicatedValues.begin(), DuplicatedValues.end());
3752  } else {
3753  Values.push_back(Value);
3754  }
3755  }
3756  return false;
3757 }
3758 
3759 bool MasmParser::parseScalarInstList(unsigned Size,
3761  const AsmToken::TokenKind EndToken) {
3762  while (getTok().isNot(EndToken) &&
3763  (EndToken != AsmToken::Greater ||
3764  getTok().isNot(AsmToken::GreaterGreater))) {
3765  parseScalarInitializer(Size, Values);
3766 
3767  // If we see a comma, continue, and allow line continuation.
3768  if (!parseOptionalToken(AsmToken::Comma))
3769  break;
3770  parseOptionalToken(AsmToken::EndOfStatement);
3771  }
3772  return false;
3773 }
3774 
3775 bool MasmParser::emitIntegralValues(unsigned Size, unsigned *Count) {
3777  if (checkForValidSection() || parseScalarInstList(Size, Values))
3778  return true;
3779 
3780  for (auto Value : Values) {
3781  emitIntValue(Value, Size);
3782  }
3783  if (Count)
3784  *Count = Values.size();
3785  return false;
3786 }
3787 
3788 // Add a field to the current structure.
3789 bool MasmParser::addIntegralField(StringRef Name, unsigned Size) {
3790  StructInfo &Struct = StructInProgress.back();
3791  FieldInfo &Field = Struct.addField(Name, FT_INTEGRAL, Size);
3792  IntFieldInfo &IntInfo = Field.Contents.IntInfo;
3793 
3794  Field.Type = Size;
3795 
3796  if (parseScalarInstList(Size, IntInfo.Values))
3797  return true;
3798 
3799  Field.SizeOf = Field.Type * IntInfo.Values.size();
3800  Field.LengthOf = IntInfo.Values.size();
3801  const unsigned FieldEnd = Field.Offset + Field.SizeOf;
3802  if (!Struct.IsUnion) {
3803  Struct.NextOffset = FieldEnd;
3804  }
3805  Struct.Size = std::max(Struct.Size, FieldEnd);
3806  return false;
3807 }
3808 
3809 /// parseDirectiveValue
3810 /// ::= (byte | word | ... ) [ expression (, expression)* ]
3811 bool MasmParser::parseDirectiveValue(StringRef IDVal, unsigned Size) {
3812  if (StructInProgress.empty()) {
3813  // Initialize data value.
3814  if (emitIntegralValues(Size))
3815  return addErrorSuffix(" in '" + Twine(IDVal) + "' directive");
3816  } else if (addIntegralField("", Size)) {
3817  return addErrorSuffix(" in '" + Twine(IDVal) + "' directive");
3818  }
3819 
3820  return false;
3821 }
3822 
3823 /// parseDirectiveNamedValue
3824 /// ::= name (byte | word | ... ) [ expression (, expression)* ]
3825 bool MasmParser::parseDirectiveNamedValue(StringRef TypeName, unsigned Size,
3826  StringRef Name, SMLoc NameLoc) {
3827  if (StructInProgress.empty()) {
3828  // Initialize named data value.
3829  MCSymbol *Sym = getContext().getOrCreateSymbol(Name);
3830  getStreamer().emitLabel(Sym);
3831  unsigned Count;
3832  if (emitIntegralValues(Size, &Count))
3833  return addErrorSuffix(" in '" + Twine(TypeName) + "' directive");
3834 
3835  AsmTypeInfo Type;
3836  Type.Name = TypeName;
3837  Type.Size = Size * Count;
3838  Type.ElementSize = Size;
3839  Type.Length = Count;
3840  KnownType[Name.lower()] = Type;
3841  } else if (addIntegralField(Name, Size)) {
3842  return addErrorSuffix(" in '" + Twine(TypeName) + "' directive");
3843  }
3844 
3845  return false;
3846 }
3847 
3848 static bool parseHexOcta(MasmParser &Asm, uint64_t &hi, uint64_t &lo) {
3849  if (Asm.getTok().isNot(AsmToken::Integer) &&
3850  Asm.getTok().isNot(AsmToken::BigNum))
3851  return Asm.TokError("unknown token in expression");
3852  SMLoc ExprLoc = Asm.getTok().getLoc();
3853  APInt IntValue = Asm.getTok().getAPIntVal();
3854  Asm.Lex();
3855  if (!IntValue.isIntN(128))
3856  return Asm.Error(ExprLoc, "out of range literal value");
3857  if (!IntValue.isIntN(64)) {
3858  hi = IntValue.getHiBits(IntValue.getBitWidth() - 64).getZExtValue();
3859  lo = IntValue.getLoBits(64).getZExtValue();
3860  } else {
3861  hi = 0;
3862  lo = IntValue.getZExtValue();
3863  }
3864  return false;
3865 }
3866 
3867 bool MasmParser::parseRealValue(const fltSemantics &Semantics, APInt &Res) {
3868  // We don't truly support arithmetic on floating point expressions, so we
3869  // have to manually parse unary prefixes.
3870  bool IsNeg = false;
3871  SMLoc SignLoc;
3872  if (getLexer().is(AsmToken::Minus)) {
3873  SignLoc = getLexer().getLoc();
3874  Lexer.Lex();
3875  IsNeg = true;
3876  } else if (getLexer().is(AsmToken::Plus)) {
3877  SignLoc = getLexer().getLoc();
3878  Lexer.Lex();
3879  }
3880 
3881  if (Lexer.is(AsmToken::Error))
3882  return TokError(Lexer.getErr());
3883  if (Lexer.isNot(AsmToken::Integer) && Lexer.isNot(AsmToken::Real) &&
3884  Lexer.isNot(AsmToken::Identifier))
3885  return TokError("unexpected token in directive");
3886 
3887  // Convert to an APFloat.
3888  APFloat Value(Semantics);
3889  StringRef IDVal = getTok().getString();
3890  if (getLexer().is(AsmToken::Identifier)) {
3891  if (IDVal.equals_insensitive("infinity") || IDVal.equals_insensitive("inf"))
3892  Value = APFloat::getInf(Semantics);
3893  else if (IDVal.equals_insensitive("nan"))
3894  Value = APFloat::getNaN(Semantics, false, ~0);
3895  else if (IDVal.equals_insensitive("?"))
3896  Value = APFloat::getZero(Semantics);
3897  else
3898  return TokError("invalid floating point literal");
3899  } else if (IDVal.consume_back("r") || IDVal.consume_back("R")) {
3900  // MASM hexadecimal floating-point literal; no APFloat conversion needed.
3901  // To match ML64.exe, ignore the initial sign.
3902  unsigned SizeInBits = Value.getSizeInBits(Semantics);
3903  if (SizeInBits != (IDVal.size() << 2))
3904  return TokError("invalid floating point literal");
3905 
3906  // Consume the numeric token.
3907  Lex();
3908 
3909  Res = APInt(SizeInBits, IDVal, 16);
3910  if (SignLoc.isValid())
3911  return Warning(SignLoc, "MASM-style hex floats ignore explicit sign");
3912  return false;
3913  } else if (errorToBool(
3914  Value.convertFromString(IDVal, APFloat::rmNearestTiesToEven)
3915  .takeError())) {
3916  return TokError("invalid floating point literal");
3917  }
3918  if (IsNeg)
3919  Value.changeSign();
3920 
3921  // Consume the numeric token.
3922  Lex();
3923 
3924  Res = Value.bitcastToAPInt();
3925 
3926  return false;
3927 }
3928 
3929 bool MasmParser::parseRealInstList(const fltSemantics &Semantics,
3930  SmallVectorImpl<APInt> &ValuesAsInt,
3931  const AsmToken::TokenKind EndToken) {
3932  while (getTok().isNot(EndToken) ||
3933  (EndToken == AsmToken::Greater &&
3934  getTok().isNot(AsmToken::GreaterGreater))) {
3935  const AsmToken NextTok = peekTok();
3936  if (NextTok.is(AsmToken::Identifier) &&
3937  NextTok.getString().equals_insensitive("dup")) {
3938  const MCExpr *Value;
3939  if (parseExpression(Value) || parseToken(AsmToken::Identifier))
3940  return true;
3941  const MCConstantExpr *MCE = dyn_cast<MCConstantExpr>(Value);
3942  if (!MCE)
3943  return Error(Value->getLoc(),
3944  "cannot repeat value a non-constant number of times");
3945  const int64_t Repetitions = MCE->getValue();
3946  if (Repetitions < 0)
3947  return Error(Value->getLoc(),
3948  "cannot repeat value a negative number of times");
3949 
3950  SmallVector<APInt, 1> DuplicatedValues;
3951  if (parseToken(AsmToken::LParen,
3952  "parentheses required for 'dup' contents") ||
3953  parseRealInstList(Semantics, DuplicatedValues) ||
3954  parseToken(AsmToken::RParen, "unmatched parentheses"))
3955  return true;
3956 
3957  for (int i = 0; i < Repetitions; ++i)
3958  ValuesAsInt.append(DuplicatedValues.begin(), DuplicatedValues.end());
3959  } else {
3960  APInt AsInt;
3961  if (parseRealValue(Semantics, AsInt))
3962  return true;
3963  ValuesAsInt.push_back(AsInt);
3964  }
3965 
3966  // Continue if we see a comma. (Also, allow line continuation.)
3967  if (!parseOptionalToken(AsmToken::Comma))
3968  break;
3969  parseOptionalToken(AsmToken::EndOfStatement);
3970  }
3971 
3972  return false;
3973 }
3974 
3975 // Initialize real data values.
3976 bool MasmParser::emitRealValues(const fltSemantics &Semantics,
3977  unsigned *Count) {
3978  if (checkForValidSection())
3979  return true;
3980 
3981  SmallVector<APInt, 1> ValuesAsInt;
3982  if (parseRealInstList(Semantics, ValuesAsInt))
3983  return true;
3984 
3985  for (const APInt &AsInt : ValuesAsInt) {
3986  getStreamer().emitIntValue(AsInt);
3987  }
3988  if (Count)
3989  *Count = ValuesAsInt.size();
3990  return false;
3991 }
3992 
3993 // Add a real field to the current struct.
3994 bool MasmParser::addRealField(StringRef Name, const fltSemantics &Semantics,
3995  size_t Size) {
3996  StructInfo &Struct = StructInProgress.back();
3997  FieldInfo &Field = Struct.addField(Name, FT_REAL, Size);
3998  RealFieldInfo &RealInfo = Field.Contents.RealInfo;
3999 
4000  Field.SizeOf = 0;
4001 
4002  if (parseRealInstList(Semantics, RealInfo.AsIntValues))
4003  return true;
4004 
4005  Field.Type = RealInfo.AsIntValues.back().getBitWidth() / 8;
4006  Field.LengthOf = RealInfo.AsIntValues.size();
4007  Field.SizeOf = Field.Type * Field.LengthOf;
4008 
4009  const unsigned FieldEnd = Field.Offset + Field.SizeOf;
4010  if (!Struct.IsUnion) {
4011  Struct.NextOffset = FieldEnd;
4012  }
4013  Struct.Size = std::max(Struct.Size, FieldEnd);
4014  return false;
4015 }
4016 
4017 /// parseDirectiveRealValue
4018 /// ::= (real4 | real8 | real10) [ expression (, expression)* ]
4019 bool MasmParser::parseDirectiveRealValue(StringRef IDVal,
4020  const fltSemantics &Semantics,
4021  size_t Size) {
4022  if (StructInProgress.empty()) {
4023  // Initialize data value.
4024  if (emitRealValues(Semantics))
4025  return addErrorSuffix(" in '" + Twine(IDVal) + "' directive");
4026  } else if (addRealField("", Semantics, Size)) {
4027  return addErrorSuffix(" in '" + Twine(IDVal) + "' directive");
4028  }
4029  return false;
4030 }
4031 
4032 /// parseDirectiveNamedRealValue
4033 /// ::= name (real4 | real8 | real10) [ expression (, expression)* ]
4034 bool MasmParser::parseDirectiveNamedRealValue(StringRef TypeName,
4035  const fltSemantics &Semantics,
4036  unsigned Size, StringRef Name,
4037  SMLoc NameLoc) {
4038  if (StructInProgress.empty()) {
4039  // Initialize named data value.
4040  MCSymbol *Sym = getContext().getOrCreateSymbol(Name);
4041  getStreamer().emitLabel(Sym);
4042  unsigned Count;
4043  if (emitRealValues(Semantics, &Count))
4044  return addErrorSuffix(" in '" + TypeName + "' directive");
4045 
4046  AsmTypeInfo Type;
4047  Type.Name = TypeName;
4048  Type.Size = Size * Count;
4049  Type.ElementSize = Size;
4050  Type.Length = Count;
4051  KnownType[Name.lower()] = Type;
4052  } else if (addRealField(Name, Semantics, Size)) {
4053  return addErrorSuffix(" in '" + TypeName + "' directive");
4054  }
4055  return false;
4056 }
4057 
4058 bool MasmParser::parseOptionalAngleBracketOpen() {
4059  const AsmToken Tok = getTok();
4060  if (parseOptionalToken(AsmToken::LessLess)) {
4061  AngleBracketDepth++;
4062  Lexer.UnLex(AsmToken(AsmToken::Less, Tok.getString().substr(1)));
4063  return true;
4064  } else if (parseOptionalToken(AsmToken::LessGreater)) {
4065  AngleBracketDepth++;
4066  Lexer.UnLex(AsmToken(AsmToken::Greater, Tok.getString().substr(1)));
4067  return true;
4068  } else if (parseOptionalToken(AsmToken::Less)) {
4069  AngleBracketDepth++;
4070  return true;
4071  }
4072 
4073  return false;
4074 }
4075 
4076 bool MasmParser::parseAngleBracketClose(const Twine &Msg) {
4077  const AsmToken Tok = getTok();
4078  if (parseOptionalToken(AsmToken::GreaterGreater)) {
4079  Lexer.UnLex(AsmToken(AsmToken::Greater, Tok.getString().substr(1)));
4080  } else if (parseToken(AsmToken::Greater, Msg)) {
4081  return true;
4082  }
4083  AngleBracketDepth--;
4084  return false;
4085 }
4086 
4087 bool MasmParser::parseFieldInitializer(const FieldInfo &Field,
4088  const IntFieldInfo &Contents,
4089  FieldInitializer &Initializer) {
4090  SMLoc Loc = getTok().getLoc();
4091 
4093  if (parseOptionalToken(AsmToken::LCurly)) {
4094  if (Field.LengthOf == 1 && Field.Type > 1)
4095  return Error(Loc, "Cannot initialize scalar field with array value");
4096  if (parseScalarInstList(Field.Type, Values, AsmToken::RCurly) ||
4097  parseToken(AsmToken::RCurly))
4098  return true;
4099  } else if (parseOptionalAngleBracketOpen()) {
4100  if (Field.LengthOf == 1 && Field.Type > 1)
4101  return Error(Loc, "Cannot initialize scalar field with array value");
4102  if (parseScalarInstList(Field.Type, Values, AsmToken::Greater) ||
4103  parseAngleBracketClose())
4104  return true;
4105  } else if (Field.LengthOf > 1 && Field.Type > 1) {
4106  return Error(Loc, "Cannot initialize array field with scalar value");
4107  } else if (parseScalarInitializer(Field.Type, Values,
4108  /*StringPadLength=*/Field.LengthOf)) {
4109  return true;
4110  }
4111 
4112  if (Values.size() > Field.LengthOf) {
4113  return Error(Loc, "Initializer too long for field; expected at most " +
4114  std::to_string(Field.LengthOf) + " elements, got " +
4115  std::to_string(Values.size()));
4116  }
4117  // Default-initialize all remaining values.
4118  Values.append(Contents.Values.begin() + Values.size(), Contents.Values.end());
4119 
4120  Initializer = FieldInitializer(std::move(Values));
4121  return false;
4122 }
4123 
4124 bool MasmParser::parseFieldInitializer(const FieldInfo &Field,
4125  const RealFieldInfo &Contents,
4126  FieldInitializer &Initializer) {
4127  const fltSemantics *Semantics;
4128  switch (Field.Type) {
4129  case 4:
4130  Semantics = &APFloat::IEEEsingle();
4131  break;
4132  case 8:
4133  Semantics = &APFloat::IEEEdouble();
4134  break;
4135  case 10:
4136  Semantics = &APFloat::x87DoubleExtended();
4137  break;
4138  default:
4139  llvm_unreachable("unknown real field type");
4140  }
4141 
4142  SMLoc Loc = getTok().getLoc();
4143 
4144  SmallVector<APInt, 1> AsIntValues;
4145  if (parseOptionalToken(AsmToken::LCurly)) {
4146  if (Field.LengthOf == 1)
4147  return Error(Loc, "Cannot initialize scalar field with array value");
4148  if (parseRealInstList(*Semantics, AsIntValues, AsmToken::RCurly) ||
4149  parseToken(AsmToken::RCurly))
4150  return true;
4151  } else if (parseOptionalAngleBracketOpen()) {
4152  if (Field.LengthOf == 1)
4153  return Error(Loc, "Cannot initialize scalar field with array value");
4154  if (parseRealInstList(*Semantics, AsIntValues, AsmToken::Greater) ||
4155  parseAngleBracketClose())
4156  return true;
4157  } else if (Field.LengthOf > 1) {
4158  return Error(Loc, "Cannot initialize array field with scalar value");
4159  } else {
4160  AsIntValues.emplace_back();
4161  if (parseRealValue(*Semantics, AsIntValues.back()))
4162  return true;
4163  }
4164 
4165  if (AsIntValues.size() > Field.LengthOf) {
4166  return Error(Loc, "Initializer too long for field; expected at most " +
4167  std::to_string(Field.LengthOf) + " elements, got " +
4168  std::to_string(AsIntValues.size()));
4169  }
4170  // Default-initialize all remaining values.
4171  AsIntValues.append(Contents.AsIntValues.begin() + AsIntValues.size(),
4172  Contents.AsIntValues.end());
4173 
4174  Initializer = FieldInitializer(std::move(AsIntValues));
4175  return false;
4176 }
4177 
4178 bool MasmParser::parseFieldInitializer(const FieldInfo &Field,
4179  const StructFieldInfo &Contents,
4180  FieldInitializer &Initializer) {
4181  SMLoc Loc = getTok().getLoc();
4182 
4183  std::vector<StructInitializer> Initializers;
4184  if (Field.LengthOf > 1) {
4185  if (parseOptionalToken(AsmToken::LCurly)) {
4186  if (parseStructInstList(Contents.Structure, Initializers,
4187  AsmToken::RCurly) ||
4188  parseToken(AsmToken::RCurly))
4189  return true;
4190  } else if (parseOptionalAngleBracketOpen()) {
4191  if (parseStructInstList(Contents.Structure, Initializers,
4192  AsmToken::Greater) ||
4193  parseAngleBracketClose())
4194  return true;
4195  } else {
4196  return Error(Loc, "Cannot initialize array field with scalar value");
4197  }
4198  } else {
4199  Initializers.emplace_back();
4200  if (parseStructInitializer(Contents.Structure, Initializers.back()))
4201  return true;
4202  }
4203 
4204  if (Initializers.size() > Field.LengthOf) {
4205  return Error(Loc, "Initializer too long for field; expected at most " +
4206  std::to_string(Field.LengthOf) + " elements, got " +
4207  std::to_string(Initializers.size()));
4208  }
4209  // Default-initialize all remaining values.
4210  Initializers.insert(Initializers.end(),
4211  Contents.Initializers.begin() + Initializers.size(),
4212  Contents.Initializers.end());
4213 
4214  Initializer = FieldInitializer(std::move(Initializers), Contents.Structure);
4215  return false;
4216 }
4217 
4218 bool MasmParser::parseFieldInitializer(const FieldInfo &Field,
4219  FieldInitializer &Initializer) {
4220  switch (Field.Contents.FT) {
4221  case FT_INTEGRAL:
4222  return parseFieldInitializer(Field, Field.Contents.IntInfo, Initializer);
4223  case FT_REAL:
4224  return parseFieldInitializer(Field, Field.Contents.RealInfo, Initializer);
4225  case FT_STRUCT:
4226  return parseFieldInitializer(Field, Field.Contents.StructInfo, Initializer);
4227  }
4228  llvm_unreachable("Unhandled FieldType enum");
4229 }
4230 
4231 bool MasmParser::parseStructInitializer(const StructInfo &Structure,
4232  StructInitializer &Initializer) {
4233  const AsmToken FirstToken = getTok();
4234 
4236  if (parseOptionalToken(AsmToken::LCurly)) {
4237  EndToken = AsmToken::RCurly;
4238  } else if (parseOptionalAngleBracketOpen()) {
4239  EndToken = AsmToken::Greater;
4240  AngleBracketDepth++;
4241  } else if (FirstToken.is(AsmToken::Identifier) &&
4242  FirstToken.getString() == "?") {
4243  // ? initializer; leave EndToken uninitialized to treat as empty.
4244  if (parseToken(AsmToken::Identifier))
4245  return true;
4246  } else {
4247  return Error(FirstToken.getLoc(), "Expected struct initializer");
4248  }
4249 
4250  auto &FieldInitializers = Initializer.FieldInitializers;
4251  size_t FieldIndex = 0;
4252  if (EndToken.hasValue()) {
4253  // Initialize all fields with given initializers.
4254  while (getTok().isNot(EndToken.getValue()) &&
4255  FieldIndex < Structure.Fields.size()) {
4256  const FieldInfo &Field = Structure.Fields[FieldIndex++];
4257  if (parseOptionalToken(AsmToken::Comma)) {
4258  // Empty initializer; use the default and continue. (Also, allow line
4259  // continuation.)
4260  FieldInitializers.push_back(Field.Contents);
4261  parseOptionalToken(AsmToken::EndOfStatement);
4262  continue;
4263  }
4264  FieldInitializers.emplace_back(Field.Contents.FT);
4265  if (parseFieldInitializer(Field, FieldInitializers.back()))
4266  return true;
4267 
4268  // Continue if we see a comma. (Also, allow line continuation.)
4269  SMLoc CommaLoc = getTok().getLoc();
4270  if (!parseOptionalToken(AsmToken::Comma))
4271  break;
4272  if (FieldIndex == Structure.Fields.size())
4273  return Error(CommaLoc, "'" + Structure.Name +
4274  "' initializer initializes too many fields");
4275  parseOptionalToken(AsmToken::EndOfStatement);
4276  }
4277  }
4278  // Default-initialize all remaining fields.
4279  for (auto It = Structure.Fields.begin() + FieldIndex;
4280  It != Structure.Fields.end(); ++It) {
4281  const FieldInfo &Field = *It;
4282  FieldInitializers.push_back(Field.Contents);
4283  }
4284 
4285  if (EndToken.hasValue()) {
4286  if (EndToken.getValue() == AsmToken::Greater)
4287  return parseAngleBracketClose();
4288 
4289  return parseToken(EndToken.getValue());
4290  }
4291 
4292  return false;
4293 }
4294 
4295 bool MasmParser::parseStructInstList(
4296  const StructInfo &Structure, std::vector<StructInitializer> &Initializers,
4297  const AsmToken::TokenKind EndToken) {
4298  while (getTok().isNot(EndToken) ||
4299  (EndToken == AsmToken::Greater &&
4300  getTok().isNot(AsmToken::GreaterGreater))) {
4301  const AsmToken NextTok = peekTok();
4302  if (NextTok.is(AsmToken::Identifier) &&
4303  NextTok.getString().equals_insensitive("dup")) {
4304  const MCExpr *Value;
4305  if (parseExpression(Value) || parseToken(AsmToken::Identifier))
4306  return true;
4307  const MCConstantExpr *MCE = dyn_cast<MCConstantExpr>(Value);
4308  if (!MCE)
4309  return Error(Value->getLoc(),
4310  "cannot repeat value a non-constant number of times");
4311  const int64_t Repetitions = MCE->getValue();
4312  if (Repetitions < 0)
4313  return Error(Value->getLoc(),
4314  "cannot repeat value a negative number of times");
4315 
4316  std::vector<StructInitializer> DuplicatedValues;
4317  if (parseToken(AsmToken::LParen,
4318  "parentheses required for 'dup' contents") ||
4319  parseStructInstList(Structure, DuplicatedValues) ||
4320  parseToken(AsmToken::RParen, "unmatched parentheses"))
4321  return true;
4322 
4323  for (int i = 0; i < Repetitions; ++i)
4324  llvm::append_range(Initializers, DuplicatedValues);
4325  } else {
4326  Initializers.emplace_back();
4327  if (parseStructInitializer(Structure, Initializers.back()))
4328  return true;
4329  }
4330 
4331  // Continue if we see a comma. (Also, allow line continuation.)
4332  if (!parseOptionalToken(AsmToken::Comma))
4333  break;
4334  parseOptionalToken(AsmToken::EndOfStatement);
4335  }
4336 
4337  return false;
4338 }
4339 
4340 bool MasmParser::emitFieldValue(const FieldInfo &Field,
4341  const IntFieldInfo &Contents) {
4342  // Default-initialize all values.
4343  for (const MCExpr *Value : Contents.Values) {
4344  if (emitIntValue(Value, Field.Type))
4345  return true;
4346  }
4347  return false;
4348 }
4349 
4350 bool MasmParser::emitFieldValue(const FieldInfo &Field,
4351  const RealFieldInfo &Contents) {
4352  for (const APInt &AsInt : Contents.AsIntValues) {
4353  getStreamer().emitIntValue(AsInt.getLimitedValue(),
4354  AsInt.getBitWidth() / 8);
4355  }
4356  return false;
4357 }
4358 
4359 bool MasmParser::emitFieldValue(const FieldInfo &Field,
4360  const StructFieldInfo &Contents) {
4361  for (const auto &Initializer : Contents.Initializers) {
4362  size_t Index = 0, Offset = 0;
4363  for (const auto &SubField : Contents.Structure.Fields) {
4364  getStreamer().emitZeros(SubField.Offset - Offset);
4365  Offset = SubField.Offset + SubField.SizeOf;
4366  emitFieldInitializer(SubField, Initializer.FieldInitializers[Index++]);
4367  }
4368  }
4369  return false;
4370 }
4371 
4372 bool MasmParser::emitFieldValue(const FieldInfo &Field) {
4373  switch (Field.Contents.FT) {
4374  case FT_INTEGRAL:
4375  return emitFieldValue(Field, Field.Contents.IntInfo);
4376  case FT_REAL:
4377  return emitFieldValue(Field, Field.Contents.RealInfo);
4378  case FT_STRUCT:
4379  return emitFieldValue(Field, Field.Contents.StructInfo);
4380  }
4381  llvm_unreachable("Unhandled FieldType enum");
4382 }
4383 
4384 bool MasmParser::emitFieldInitializer(const FieldInfo &Field,
4385  const IntFieldInfo &Contents,
4386  const IntFieldInfo &Initializer) {
4387  for (const auto &Value : Initializer.Values) {
4388  if (emitIntValue(Value, Field.Type))
4389  return true;
4390  }
4391  // Default-initialize all remaining values.
4392  for (auto it = Contents.Values.begin() + Initializer.Values.size();
4393  it != Contents.Values.end(); ++it) {
4394  const auto &Value = *it;
4395  if (emitIntValue(Value, Field.Type))
4396  return true;
4397  }
4398  return false;
4399 }
4400 
4401 bool MasmParser::emitFieldInitializer(const FieldInfo &Field,
4402  const RealFieldInfo &Contents,
4403  const RealFieldInfo &Initializer) {
4404  for (const auto &AsInt : Initializer.AsIntValues) {
4405  getStreamer().emitIntValue(AsInt.getLimitedValue(),
4406  AsInt.getBitWidth() / 8);
4407  }
4408  // Default-initialize all remaining values.
4409  for (auto It = Contents.AsIntValues.begin() + Initializer.AsIntValues.size();
4410  It != Contents.AsIntValues.end(); ++It) {
4411  const auto &AsInt = *It;
4412  getStreamer().emitIntValue(AsInt.getLimitedValue(),
4413  AsInt.getBitWidth() / 8);
4414  }
4415  return false;
4416 }
4417 
4418 bool MasmParser::emitFieldInitializer(const FieldInfo &Field,
4419  const StructFieldInfo &Contents,
4420  const StructFieldInfo &Initializer) {
4421  for (const auto &Init : Initializer.Initializers) {
4422  if (emitStructInitializer(Contents.Structure, Init))
4423  return true;
4424  }
4425  // Default-initialize all remaining values.
4426  for (auto It =
4427  Contents.Initializers.begin() + Initializer.Initializers.size();
4428  It != Contents.Initializers.end(); ++It) {
4429  const auto &Init = *It;
4430  if (emitStructInitializer(Contents.Structure, Init))
4431  return true;
4432  }
4433  return false;
4434 }
4435 
4436 bool MasmParser::emitFieldInitializer(const FieldInfo &Field,
4437  const FieldInitializer &Initializer) {
4438  switch (Field.Contents.FT) {
4439  case FT_INTEGRAL:
4440  return emitFieldInitializer(Field, Field.Contents.IntInfo,
4441  Initializer.IntInfo);
4442  case FT_REAL:
4443  return emitFieldInitializer(Field, Field.Contents.RealInfo,
4444  Initializer.RealInfo);
4445  case FT_STRUCT:
4446  return emitFieldInitializer(Field, Field.Contents.StructInfo,
4447  Initializer.StructInfo);
4448  }
4449  llvm_unreachable("Unhandled FieldType enum");
4450 }
4451 
4452 bool MasmParser::emitStructInitializer(const StructInfo &Structure,
4453  const StructInitializer &Initializer) {
4454  if (!Structure.Initializable)
4455  return Error(getLexer().getLoc(),
4456  "cannot initialize a value of type '" + Structure.Name +
4457  "'; 'org' was used in the type's declaration");
4458  size_t Index = 0, Offset = 0;
4459  for (const auto &Init : Initializer.FieldInitializers) {
4460  const auto &Field = Structure.Fields[Index++];
4461  getStreamer().emitZeros(Field.Offset - Offset);
4462  Offset = Field.Offset + Field.SizeOf;
4463  if (emitFieldInitializer(Field, Init))
4464  return true;
4465  }
4466  // Default-initialize all remaining fields.
4467  for (auto It =
4468  Structure.Fields.begin() + Initializer.FieldInitializers.size();
4469  It != Structure.Fields.end(); ++It) {
4470  const auto &Field = *It;
4471  getStreamer().emitZeros(Field.Offset - Offset);
4472  Offset = Field.Offset + Field.SizeOf;
4473  if (emitFieldValue(Field))
4474  return true;
4475  }
4476  // Add final padding.
4477  if (Offset != Structure.Size)
4478  getStreamer().emitZeros(Structure.Size - Offset);
4479  return false;
4480 }
4481 
4482 // Set data values from initializers.
4483 bool MasmParser::emitStructValues(const StructInfo &Structure,
4484  unsigned *Count) {
4485  std::vector<StructInitializer> Initializers;
4486  if (parseStructInstList(Structure, Initializers))
4487  return true;
4488 
4489  for (const auto &Initializer : Initializers) {
4490  if (emitStructInitializer(Structure, Initializer))
4491  return true;
4492  }
4493 
4494  if (Count)
4495  *Count = Initializers.size();
4496  return false;
4497 }
4498 
4499 // Declare a field in the current struct.
4500 bool MasmParser::addStructField(StringRef Name, const StructInfo &Structure) {
4501  StructInfo &OwningStruct = StructInProgress.back();
4502  FieldInfo &Field =
4503  OwningStruct.addField(Name, FT_STRUCT, Structure.AlignmentSize);
4504  StructFieldInfo &StructInfo = Field.Contents.StructInfo;
4505 
4506  StructInfo.Structure = Structure;
4507  Field.Type = Structure.Size;
4508 
4509  if (parseStructInstList(Structure, StructInfo.Initializers))
4510  return true;
4511 
4512  Field.LengthOf = StructInfo.Initializers.size();
4513  Field.SizeOf = Field.Type * Field.LengthOf;
4514 
4515  const unsigned FieldEnd = Field.Offset + Field.SizeOf;
4516  if (!OwningStruct.IsUnion) {
4517  OwningStruct.NextOffset = FieldEnd;
4518  }
4519  OwningStruct.Size = std::max(OwningStruct.Size, FieldEnd);
4520 
4521  return false;
4522 }
4523 
4524 /// parseDirectiveStructValue
4525 /// ::= struct-id (<struct-initializer> | {struct-initializer})
4526 /// [, (<struct-initializer> | {struct-initializer})]*
4527 bool MasmParser::parseDirectiveStructValue(const StructInfo &Structure,
4528  StringRef Directive, SMLoc DirLoc) {
4529  if (StructInProgress.empty()) {
4530  if (emitStructValues(Structure))
4531  return true;
4532  } else if (addStructField("", Structure)) {
4533  return addErrorSuffix(" in '" + Twine(Directive) + "' directive");
4534  }
4535 
4536  return false;
4537 }
4538 
4539 /// parseDirectiveNamedValue
4540 /// ::= name (byte | word | ... ) [ expression (, expression)* ]
4541 bool MasmParser::parseDirectiveNamedStructValue(const StructInfo &Structure,
4543  SMLoc DirLoc, StringRef Name) {
4544  if (StructInProgress.empty()) {
4545  // Initialize named data value.
4546  MCSymbol *Sym = getContext().getOrCreateSymbol(Name);
4547  getStreamer().emitLabel(Sym);
4548  unsigned Count;
4549  if (emitStructValues(Structure, &Count))
4550  return true;
4551  AsmTypeInfo Type;
4552  Type.Name = Structure.Name;
4553  Type.Size = Structure.Size * Count;
4554  Type.ElementSize = Structure.Size;
4555  Type.Length = Count;
4556  KnownType[Name.lower()] = Type;
4557  } else if (addStructField(Name, Structure)) {
4558  return addErrorSuffix(" in '" + Twine(Directive) + "' directive");
4559  }
4560 
4561  return false;
4562 }
4563 
4564 /// parseDirectiveStruct
4565 /// ::= <name> (STRUC | STRUCT | UNION) [fieldAlign] [, NONUNIQUE]
4566 /// (dataDir | generalDir | offsetDir | nestedStruct)+
4567 /// <name> ENDS
4568 ////// dataDir = data declaration
4569 ////// offsetDir = EVEN, ORG, ALIGN
4570 bool MasmParser::parseDirectiveStruct(StringRef Directive,
4571  DirectiveKind DirKind, StringRef Name,
4572  SMLoc NameLoc) {
4573  // We ignore NONUNIQUE; we do not support OPTION M510 or OPTION OLDSTRUCTS
4574  // anyway, so all field accesses must be qualified.
4575  AsmToken NextTok = getTok();
4576  int64_t AlignmentValue = 1;
4577  if (NextTok.isNot(AsmToken::Comma) &&
4578  NextTok.isNot(AsmToken::EndOfStatement) &&
4579  parseAbsoluteExpression(AlignmentValue)) {
4580  return addErrorSuffix(" in alignment value for '" + Twine(Directive) +
4581  "' directive");
4582  }
4583  if (!isPowerOf2_64(AlignmentValue)) {
4584  return Error(NextTok.getLoc(), "alignment must be a power of two; was " +
4585  std::to_string(AlignmentValue));
4586  }
4587 
4588  StringRef Qualifier;
4589  SMLoc QualifierLoc;
4590  if (parseOptionalToken(AsmToken::Comma)) {
4591  QualifierLoc = getTok().getLoc();
4592  if (parseIdentifier(Qualifier))
4593  return addErrorSuffix(" in '" + Twine(Directive) + "' directive");
4594  if (!Qualifier.equals_insensitive("nonunique"))
4595  return Error(QualifierLoc, "Unrecognized qualifier for '" +
4596  Twine(Directive) +
4597  "' directive; expected none or NONUNIQUE");
4598  }
4599 
4600  if (parseToken(AsmToken::EndOfStatement))
4601  return addErrorSuffix(" in '" + Twine(Directive) + "' directive");
4602 
4603  StructInProgress.emplace_back(Name, DirKind == DK_UNION, AlignmentValue);
4604  return false;
4605 }
4606 
4607 /// parseDirectiveNestedStruct
4608 /// ::= (STRUC | STRUCT | UNION) [name]
4609 /// (dataDir | generalDir | offsetDir | nestedStruct)+
4610 /// ENDS
4611 bool MasmParser::parseDirectiveNestedStruct(StringRef Directive,
4612  DirectiveKind DirKind) {
4613  if (StructInProgress.empty())
4614  return TokError("missing name in top-level '" + Twine(Directive) +
4615  "' directive");
4616 
4617  StringRef Name;
4618  if (getTok().is(AsmToken::Identifier)) {
4619  Name = getTok().getIdentifier();
4620  parseToken(AsmToken::Identifier);
4621  }
4622  if (parseToken(AsmToken::EndOfStatement))
4623  return addErrorSuffix(" in '" + Twine(Directive) + "' directive");
4624 
4625  // Reserve space to ensure Alignment doesn't get invalidated when
4626  // StructInProgress grows.
4627  StructInProgress.reserve(StructInProgress.size() + 1);
4628  StructInProgress.emplace_back(Name, DirKind == DK_UNION,
4629  StructInProgress.back().Alignment);
4630  return false;
4631 }
4632 
4633 bool MasmParser::parseDirectiveEnds(StringRef Name, SMLoc NameLoc) {
4634  if (StructInProgress.empty())
4635  return Error(NameLoc, "ENDS directive without matching STRUC/STRUCT/UNION");
4636  if (StructInProgress.size() > 1)
4637  return Error(NameLoc, "unexpected name in nested ENDS directive");
4638  if (StructInProgress.back().Name.compare_insensitive(Name))
4639  return Error(NameLoc, "mismatched name in ENDS directive; expected '" +
4640  StructInProgress.back().Name + "'");
4641  StructInfo Structure = StructInProgress.pop_back_val();
4642  // Pad to make the structure's size divisible by the smaller of its alignment
4643  // and the size of its largest field.
4644  Structure.Size = llvm::alignTo(
4645  Structure.Size, std::min(Structure.Alignment, Structure.AlignmentSize));
4646  Structs[Name.lower()] = Structure;
4647 
4648  if (parseToken(AsmToken::EndOfStatement))
4649  return addErrorSuffix(" in ENDS directive");
4650 
4651  return false;
4652 }
4653 
4654 bool MasmParser::parseDirectiveNestedEnds() {
4655  if (StructInProgress.empty())
4656  return TokError("ENDS directive without matching STRUC/STRUCT/UNION");
4657  if (StructInProgress.size() == 1)
4658  return TokError("missing name in top-level ENDS directive");
4659 
4660  if (parseToken(AsmToken::EndOfStatement))
4661  return addErrorSuffix(" in nested ENDS directive");
4662 
4663  StructInfo Structure = StructInProgress.pop_back_val();
4664  // Pad to make the structure's size divisible by its alignment.
4665  Structure.Size = llvm::alignTo(Structure.Size, Structure.Alignment);
4666 
4667  StructInfo &ParentStruct = StructInProgress.back();
4668  if (Structure.Name.empty()) {
4669  // Anonymous substructures' fields are addressed as if they belong to the
4670  // parent structure - so we transfer them to the parent here.
4671  const size_t OldFields = ParentStruct.Fields.size();
4672  ParentStruct.Fields.insert(
4673  ParentStruct.Fields.end(),
4674  std::make_move_iterator(Structure.Fields.begin()),
4675  std::make_move_iterator(Structure.Fields.end()));
4676  for (const auto &FieldByName : Structure.FieldsByName) {
4677  ParentStruct.FieldsByName[FieldByName.getKey()] =
4678  FieldByName.getValue() + OldFields;
4679  }
4680 
4681  unsigned FirstFieldOffset = 0;
4682  if (!Structure.Fields.empty() && !ParentStruct.IsUnion) {
4683  FirstFieldOffset = llvm::alignTo(
4684  ParentStruct.NextOffset,
4685  std::min(ParentStruct.Alignment, Structure.AlignmentSize));
4686  }
4687 
4688  if (ParentStruct.IsUnion) {
4689  ParentStruct.Size = std::max(ParentStruct.Size, Structure.Size);
4690  } else {
4691  for (auto FieldIter = ParentStruct.Fields.begin() + OldFields;
4692  FieldIter != ParentStruct.Fields.end(); ++FieldIter) {
4693  FieldIter->Offset += FirstFieldOffset;
4694  }
4695 
4696  const unsigned StructureEnd = FirstFieldOffset + Structure.Size;
4697  if (!ParentStruct.IsUnion) {
4698  ParentStruct.NextOffset = StructureEnd;
4699  }
4700  ParentStruct.Size = std::max(ParentStruct.Size, StructureEnd);
4701  }
4702  } else {
4703  FieldInfo &Field = ParentStruct.addField(Structure.Name, FT_STRUCT,
4704  Structure.AlignmentSize);
4705  StructFieldInfo &StructInfo = Field.Contents.StructInfo;
4706  Field.Type = Structure.Size;
4707  Field.LengthOf = 1;
4708  Field.SizeOf = Structure.Size;
4709 
4710  const unsigned StructureEnd = Field.Offset + Field.SizeOf;
4711  if (!ParentStruct.IsUnion) {
4712  ParentStruct.NextOffset = StructureEnd;
4713  }
4714  ParentStruct.Size = std::max(ParentStruct.Size, StructureEnd);
4715 
4716  StructInfo.Structure = Structure;
4717  StructInfo.Initializers.emplace_back();
4718  auto &FieldInitializers = StructInfo.Initializers.back().FieldInitializers;
4719  for (const auto &SubField : Structure.Fields) {
4720  FieldInitializers.push_back(SubField.Contents);
4721  }
4722  }
4723 
4724  return false;
4725 }
4726 
4727 /// parseDirectiveOrg
4728 /// ::= org expression
4729 bool MasmParser::parseDirectiveOrg() {
4730  const MCExpr *Offset;
4731  SMLoc OffsetLoc = Lexer.getLoc();
4732  if (checkForValidSection() || parseExpression(Offset))
4733  return true;
4734  if (parseToken(AsmToken::EndOfStatement))
4735  return addErrorSuffix(" in 'org' directive");
4736 
4737  if (StructInProgress.empty()) {
4738  // Not in a struct; change the offset for the next instruction or data
4739  if (checkForValidSection())
4740  return addErrorSuffix(" in 'org' directive");
4741 
4742  getStreamer().emitValueToOffset(Offset, 0, OffsetLoc);
4743  } else {
4744  // Offset the next field of this struct
4745  StructInfo &Structure = StructInProgress.back();
4746  int64_t OffsetRes;
4747  if (!Offset->evaluateAsAbsolute(OffsetRes, getStreamer().getAssemblerPtr()))
4748  return Error(OffsetLoc,
4749  "expected absolute expression in 'org' directive");
4750  if (OffsetRes < 0)
4751  return Error(
4752  OffsetLoc,
4753  "expected non-negative value in struct's 'org' directive; was " +
4754  std::to_string(OffsetRes));
4755  Structure.NextOffset = static_cast<unsigned>(OffsetRes);
4756 
4757  // ORG-affected structures cannot be initialized
4758  Structure.Initializable = false;
4759  }
4760 
4761  return false;
4762 }
4763 
4764 bool MasmParser::emitAlignTo(int64_t Alignment) {
4765  if (StructInProgress.empty()) {
4766  // Not in a struct; align the next instruction or data
4767  if (checkForValidSection())
4768  return true;
4769 
4770  // Check whether we should use optimal code alignment for this align
4771  // directive.
4772  const MCSection *Section = getStreamer().getCurrentSectionOnly();
4773  assert(Section && "must have section to emit alignment");
4774  if (Section->UseCodeAlign()) {
4775  getStreamer().emitCodeAlignment(Alignment, &getTargetParser().getSTI(),
4776  /*MaxBytesToEmit=*/0);
4777  } else {
4778  // FIXME: Target specific behavior about how the "extra" bytes are filled.
4779  getStreamer().emitValueToAlignment(Alignment, /*Value=*/0,
4780  /*ValueSize=*/1,
4781  /*MaxBytesToEmit=*/0);
4782  }
4783  } else {
4784  // Align the next field of this struct
4785  StructInfo &Structure = StructInProgress.back();
4786  Structure.NextOffset = llvm::alignTo(Structure.NextOffset, Alignment);
4787  }
4788 
4789  return false;
4790 }
4791 
4792 /// parseDirectiveAlign
4793 /// ::= align expression
4794 bool MasmParser::parseDirectiveAlign() {
4795  SMLoc AlignmentLoc = getLexer().getLoc();
4796  int64_t Alignment;
4797 
4798  // Ignore empty 'align' directives.
4799  if (getTok().is(AsmToken::EndOfStatement)) {
4800  return Warning(AlignmentLoc,
4801  "align directive with no operand is ignored") &&
4802  parseToken(AsmToken::EndOfStatement);
4803  }
4804  if (parseAbsoluteExpression(Alignment) ||
4805  parseToken(AsmToken::EndOfStatement))
4806  return addErrorSuffix(" in align directive");
4807 
4808  // Always emit an alignment here even if we throw an error.
4809  bool ReturnVal = false;
4810 
4811  // Reject alignments that aren't either a power of two or zero, for ML.exe
4812  // compatibility. Alignment of zero is silently rounded up to one.
4813  if (Alignment == 0)
4814  Alignment = 1;
4815  if (!isPowerOf2_64(Alignment))
4816  ReturnVal |= Error(AlignmentLoc, "alignment must be a power of 2; was " +
4817  std::to_string(Alignment));
4818 
4819  if (emitAlignTo(Alignment))
4820  ReturnVal |= addErrorSuffix(" in align directive");
4821 
4822  return ReturnVal;
4823 }
4824 
4825 /// parseDirectiveEven
4826 /// ::= even
4827 bool MasmParser::parseDirectiveEven() {
4828  if (parseToken(AsmToken::EndOfStatement) || emitAlignTo(2))
4829  return addErrorSuffix(" in even directive");
4830 
4831  return false;
4832 }
4833 
4834 /// parseDirectiveFile
4835 /// ::= .file filename
4836 /// ::= .file number [directory] filename [md5 checksum] [source source-text]
4837 bool MasmParser::parseDirectiveFile(SMLoc DirectiveLoc) {
4838  // FIXME: I'm not sure what this is.
4839  int64_t FileNumber = -1;
4840  if (getLexer().is(AsmToken::Integer)) {
4841  FileNumber = getTok().getIntVal();
4842  Lex();
4843 
4844  if (FileNumber < 0)
4845  return TokError("negative file number");
4846  }
4847 
4848  std::string Path;
4849 
4850  // Usually the directory and filename together, otherwise just the directory.
4851  // Allow the strings to have escaped octal character sequence.
4852  if (check(getTok().isNot(AsmToken::String),
4853  "unexpected token in '.file' directive") ||
4854  parseEscapedString(Path))
4855  return true;
4856 
4857  StringRef Directory;
4858  StringRef Filename;
4859  std::string FilenameData;
4860  if (getLexer().is(AsmToken::String)) {
4861  if (check(FileNumber == -1,
4862  "explicit path specified, but no file number") ||
4863  parseEscapedString(FilenameData))
4864  return true;
4865  Filename = FilenameData;
4866  Directory = Path;
4867  } else {
4868  Filename = Path;
4869  }
4870 
4871  uint64_t MD5Hi, MD5Lo;
4872  bool HasMD5 = false;
4873 
4875  bool HasSource = false;
4876  std::string SourceString;
4877 
4878  while (!parseOptionalToken(AsmToken::EndOfStatement)) {
4879  StringRef Keyword;
4880  if (check(getTok().isNot(AsmToken::Identifier),
4881  "unexpected token in '.file' directive") ||
4882  parseIdentifier(Keyword))
4883  return true;
4884  if (Keyword == "md5") {
4885  HasMD5 = true;
4886  if (check(FileNumber == -1,
4887  "MD5 checksum specified, but no file number") ||
4888  parseHexOcta(*this, MD5Hi, MD5Lo))
4889  return true;
4890  } else if (Keyword == "source") {
4891  HasSource = true;
4892  if (check(FileNumber == -1,
4893  "source specified, but no file number") ||
4894  check(getTok().isNot(AsmToken::String),
4895  "unexpected token in '.file' directive") ||
4896  parseEscapedString(SourceString))
4897  return true;
4898  } else {
4899  return TokError("unexpected token in '.file' directive");
4900  }
4901  }
4902 
4903  if (FileNumber == -1) {
4904  // Ignore the directive if there is no number and the target doesn't support
4905  // numberless .file directives. This allows some portability of assembler
4906  // between different object file formats.
4907  if (getContext().getAsmInfo()->hasSingleParameterDotFile())
4908  getStreamer().emitFileDirective(Filename);
4909  } else {
4910  // In case there is a -g option as well as debug info from directive .file,
4911  // we turn off the -g option, directly use the existing debug info instead.
4912  // Throw away any implicit file table for the assembler source.
4913  if (Ctx.getGenDwarfForAssembly()) {
4915  Ctx.setGenDwarfForAssembly(false);
4916  }
4917 
4919  if (HasMD5) {
4920  MD5::MD5Result Sum;
4921  for (unsigned i = 0; i != 8; ++i) {
4922  Sum.Bytes[i] = uint8_t(MD5Hi >> ((7 - i) * 8));
4923  Sum.Bytes[i + 8] = uint8_t(MD5Lo >> ((7 - i) * 8));
4924  }
4925  CKMem = Sum;
4926  }
4927  if (HasSource) {
4928  char *SourceBuf = static_cast<char *>(Ctx.allocate(SourceString.size()));
4929  memcpy(SourceBuf, SourceString.data(), SourceString.size());
4930  Source = StringRef(SourceBuf, SourceString.size());
4931  }
4932  if (FileNumber == 0) {
4933  if (Ctx.getDwarfVersion() < 5)
4934  return Warning(DirectiveLoc, "file 0 not supported prior to DWARF-5");
4935  getStreamer().emitDwarfFile0Directive(Directory, Filename, CKMem, Source);
4936  } else {
4937  Expected<unsigned> FileNumOrErr = getStreamer().tryEmitDwarfFileDirective(
4938  FileNumber, Directory, Filename, CKMem, Source);
4939  if (!FileNumOrErr)
4940  return Error(DirectiveLoc, toString(FileNumOrErr.takeError()));
4941  }
4942  // Alert the user if there are some .file directives with MD5 and some not.
4943  // But only do that once.
4944  if (!ReportedInconsistentMD5 && !Ctx.isDwarfMD5UsageConsistent(0)) {
4945  ReportedInconsistentMD5 = true;
4946  return Warning(DirectiveLoc, "inconsistent use of MD5 checksums");
4947  }
4948  }
4949 
4950  return false;
4951 }
4952 
4953 /// parseDirectiveLine
4954 /// ::= .line [number]
4955 bool MasmParser::parseDirectiveLine() {
4956  int64_t LineNumber;
4957  if (getLexer().is(AsmToken::Integer)) {
4958  if (parseIntToken(LineNumber, "unexpected token in '.line' directive"))
4959  return true;
4960  (void)LineNumber;
4961  // FIXME: Do something with the .line.
4962  }
4963  if (parseToken(AsmToken::EndOfStatement,
4964  "unexpected token in '.line' directive"))
4965  return true;
4966 
4967  return false;
4968 }
4969 
4970 /// parseDirectiveLoc
4971 /// ::= .loc FileNumber [LineNumber] [ColumnPos] [basic_block] [prologue_end]
4972 /// [epilogue_begin] [is_stmt VALUE] [isa VALUE]
4973 /// The first number is a file number, must have been previously assigned with
4974 /// a .file directive, the second number is the line number and optionally the
4975 /// third number is a column position (zero if not specified). The remaining
4976 /// optional items are .loc sub-directives.
4977 bool MasmParser::parseDirectiveLoc() {
4978  int64_t FileNumber = 0, LineNumber = 0;
4979  SMLoc Loc = getTok().getLoc();
4980  if (parseIntToken(FileNumber, "unexpected token in '.loc' directive") ||
4981  check(FileNumber < 1 && Ctx.getDwarfVersion() < 5, Loc,
4982  "file number less than one in '.loc' directive") ||
4983  check(!getContext().isValidDwarfFileNumber(FileNumber), Loc,
4984  "unassigned file number in '.loc' directive"))
4985  return true;
4986 
4987  // optional
4988  if (getLexer().is(AsmToken::Integer)) {
4989  LineNumber = getTok().getIntVal();
4990  if (LineNumber < 0)
4991  return TokError("line number less than zero in '.loc' directive");
4992  Lex();
4993  }
4994 
4995  int64_t ColumnPos = 0;
4996  if (getLexer().is(AsmToken::Integer)) {
4997  ColumnPos = getTok().getIntVal();
4998  if (ColumnPos < 0)
4999  return TokError("column position less than zero in '.loc' directive");
5000  Lex();
5001  }
5002 
5003  auto PrevFlags = getContext().getCurrentDwarfLoc().getFlags();
5004  unsigned Flags = PrevFlags & DWARF2_FLAG_IS_STMT;
5005  unsigned Isa = 0;
5006  int64_t Discriminator = 0;
5007 
5008  auto parseLocOp = [&]() -> bool {
5009  StringRef Name;
5010  SMLoc Loc = getTok().getLoc();
5011  if (parseIdentifier(Name))
5012  return TokError("unexpected token in '.loc' directive");
5013 
5014  if (Name == "basic_block")
5015  Flags |= DWARF2_FLAG_BASIC_BLOCK;
5016  else if (Name == "prologue_end")
5017  Flags |= DWARF2_FLAG_PROLOGUE_END;
5018  else if (Name == "epilogue_begin")
5019  Flags |= DWARF2_FLAG_EPILOGUE_BEGIN;
5020  else if (Name == "is_stmt") {
5021  Loc = getTok().getLoc();
5022  const MCExpr *Value;
5023  if (parseExpression(Value))
5024  return true;
5025  // The expression must be the constant 0 or 1.
5026  if (const MCConstantExpr *MCE = dyn_cast<MCConstantExpr>(Value)) {
5027  int Value = MCE->getValue();
5028  if (Value == 0)
5029  Flags &= ~DWARF2_FLAG_IS_STMT;
5030  else if (Value == 1)
5031  Flags |= DWARF2_FLAG_IS_STMT;
5032  else
5033  return Error(Loc, "is_stmt value not 0 or 1");
5034  } else {
5035  return Error(Loc, "is_stmt value not the constant value of 0 or 1");
5036  }
5037  } else if (Name == "isa") {
5038  Loc = getTok().getLoc();
5039  const MCExpr *Value;
5040  if (parseExpression(Value))
5041  return true;
5042  // The expression must be a constant greater or equal to 0.
5043  if (const MCConstantExpr *MCE = dyn_cast<MCConstantExpr>(Value)) {
5044  int Value = MCE->getValue();
5045  if (Value < 0)
5046  return Error(Loc, "isa number less than zero");
5047  Isa = Value;
5048  } else {
5049  return Error(Loc, "isa number not a constant value");
5050  }
5051  } else if (Name == "discriminator") {
5052  if (parseAbsoluteExpression(Discriminator))
5053  return true;
5054  } else {
5055  return Error(Loc, "unknown sub-directive in '.loc' directive");
5056  }
5057  return false;
5058  };
5059 
5060  if (parseMany(parseLocOp, false /*hasComma*/))
5061  return true;
5062 
5063  getStreamer().emitDwarfLocDirective(FileNumber, LineNumber, ColumnPos, Flags,
5064  Isa, Discriminator, StringRef());
5065 
5066  return false;
5067 }
5068 
5069 /// parseDirectiveStabs
5070 /// ::= .stabs string, number, number, number
5071 bool MasmParser::parseDirectiveStabs() {
5072  return TokError("unsupported directive '.stabs'");
5073 }
5074 
5075 /// parseDirectiveCVFile
5076 /// ::= .cv_file number filename [checksum] [checksumkind]
5077 bool MasmParser::parseDirectiveCVFile() {
5078  SMLoc FileNumberLoc = getTok().getLoc();
5079  int64_t FileNumber;
5080  std::string Filename;
5081  std::string Checksum;
5082  int64_t ChecksumKind = 0;
5083 
5084  if (parseIntToken(FileNumber,
5085  "expected file number in '.cv_file' directive") ||
5086  check(FileNumber < 1, FileNumberLoc, "file number less than one") ||
5087  check(getTok().isNot(AsmToken::String),
5088  "unexpected token in '.cv_file' directive") ||
5089  parseEscapedString(Filename))
5090  return true;
5091  if (!parseOptionalToken(AsmToken::EndOfStatement)) {
5092  if (check(getTok().isNot(AsmToken::String),
5093  "unexpected token in '.cv_file' directive") ||
5094  parseEscapedString(Checksum) ||
5095  parseIntToken(ChecksumKind,
5096  "expected checksum kind in '.cv_file' directive") ||
5097  parseToken(AsmToken::EndOfStatement,
5098  "unexpected token in '.cv_file' directive"))
5099  return true;
5100  }
5101 
5102  Checksum = fromHex(Checksum);
5103  void *CKMem = Ctx.allocate(Checksum.size(), 1);
5104  memcpy(CKMem, Checksum.data(), Checksum.size());
5105  ArrayRef<uint8_t> ChecksumAsBytes(reinterpret_cast<const uint8_t *>(CKMem),
5106  Checksum.size());
5107 
5108  if (!getStreamer().EmitCVFileDirective(FileNumber, Filename, ChecksumAsBytes,
5109  static_cast<uint8_t>(ChecksumKind)))
5110  return Error(FileNumberLoc, "file number already allocated");
5111 
5112  return false;
5113 }
5114 
5115 bool MasmParser::parseCVFunctionId(int64_t &FunctionId,
5116  StringRef DirectiveName) {
5117  SMLoc Loc;
5118  return parseTokenLoc(Loc) ||
5119  parseIntToken(FunctionId, "expected function id in '" + DirectiveName +
5120  "' directive") ||
5121  check(FunctionId < 0 || FunctionId >= UINT_MAX, Loc,
5122  "expected function id within range [0, UINT_MAX)");
5123 }
5124 
5125 bool MasmParser::parseCVFileId(int64_t &FileNumber, StringRef DirectiveName) {
5126  SMLoc Loc;
5127  return parseTokenLoc(Loc) ||
5128  parseIntToken(FileNumber, "expected integer in '" + DirectiveName +
5129  "' directive") ||
5130  check(FileNumber < 1, Loc, "file number less than one in '" +
5131  DirectiveName + "' directive") ||
5132  check(!getCVContext().isValidFileNumber(FileNumber), Loc,
5133  "unassigned file number in '" + DirectiveName + "' directive");
5134 }
5135 
5136 /// parseDirectiveCVFuncId
5137 /// ::= .cv_func_id FunctionId
5138 ///
5139 /// Introduces a function ID that can be used with .cv_loc.
5140 bool MasmParser::parseDirectiveCVFuncId() {
5141  SMLoc FunctionIdLoc = getTok().getLoc();
5142  int64_t FunctionId;
5143 
5144  if (parseCVFunctionId(FunctionId, ".cv_func_id") ||
5145  parseToken(AsmToken::EndOfStatement,
5146  "unexpected token in '.cv_func_id' directive"))
5147  return true;
5148 
5149  if (!getStreamer().EmitCVFuncIdDirective(FunctionId))
5150  return Error(FunctionIdLoc, "function id already allocated");
5151 
5152  return false;
5153 }
5154 
5155 /// parseDirectiveCVInlineSiteId
5156 /// ::= .cv_inline_site_id FunctionId
5157 /// "within" IAFunc
5158 /// "inlined_at" IAFile IALine [IACol]
5159 ///
5160 /// Introduces a function ID that can be used with .cv_loc. Includes "inlined
5161 /// at" source location information for use in the line table of the caller,
5162 /// whether the caller is a real function or another inlined call site.
5163 bool MasmParser::parseDirectiveCVInlineSiteId() {
5164  SMLoc FunctionIdLoc = getTok().getLoc();
5165  int64_t FunctionId;
5166  int64_t IAFunc;
5167  int64_t IAFile;
5168  int64_t IALine;
5169  int64_t IACol = 0;
5170 
5171  // FunctionId
5172  if (parseCVFunctionId(FunctionId, ".cv_inline_site_id"))
5173  return true;
5174 
5175  // "within"
5176  if (check((getLexer().isNot(AsmToken::Identifier) ||
5177  getTok().getIdentifier() != "within"),
5178  "expected 'within' identifier in '.cv_inline_site_id' directive"))
5179  return true;
5180  Lex();
5181 
5182  // IAFunc
5183  if (parseCVFunctionId(IAFunc, ".cv_inline_site_id"))
5184  return true;
5185 
5186  // "inlined_at"
5187  if (check((getLexer().isNot(AsmToken::Identifier) ||
5188  getTok().getIdentifier() != "inlined_at"),
5189  "expected 'inlined_at' identifier in '.cv_inline_site_id' "
5190  "directive") )
5191  return true;
5192  Lex();
5193 
5194  // IAFile IALine
5195  if (parseCVFileId(IAFile, ".cv_inline_site_id") ||
5196  parseIntToken(IALine, "expected line number after 'inlined_at'"))
5197  return true;
5198 
5199  // [IACol]
5200  if (getLexer().is(AsmToken::Integer)) {
5201  IACol = getTok().getIntVal();
5202  Lex();
5203  }
5204 
5205  if (parseToken(AsmToken::EndOfStatement,
5206  "unexpected token in '.cv_inline_site_id' directive"))
5207  return true;
5208 
5209  if (!getStreamer().EmitCVInlineSiteIdDirective(FunctionId, IAFunc, IAFile,
5210  IALine, IACol, FunctionIdLoc))
5211  return Error(FunctionIdLoc, "function id already allocated");
5212 
5213  return false;
5214 }
5215 
5216 /// parseDirectiveCVLoc
5217 /// ::= .cv_loc FunctionId FileNumber [LineNumber] [ColumnPos] [prologue_end]
5218 /// [is_stmt VALUE]
5219 /// The first number is a file number, must have been previously assigned with
5220 /// a .file directive, the second number is the line number and optionally the
5221 /// third number is a column position (zero if not specified). The remaining
5222 /// optional items are .loc sub-directives.
5223 bool MasmParser::parseDirectiveCVLoc() {
5224  SMLoc DirectiveLoc = getTok().getLoc();
5225  int64_t FunctionId, FileNumber;
5226  if (parseCVFunctionId(FunctionId, ".cv_loc") ||
5227  parseCVFileId(FileNumber, ".cv_loc"))
5228  return true;
5229 
5230  int64_t LineNumber = 0;
5231  if (getLexer().is(AsmToken::Integer)) {
5232  LineNumber = getTok().getIntVal();
5233  if (LineNumber < 0)
5234  return TokError("line number less than zero in '.cv_loc' directive");
5235  Lex();
5236  }
5237 
5238  int64_t ColumnPos = 0;
5239  if (getLexer().is(AsmToken::Integer)) {
5240  ColumnPos = getTok().getIntVal();
5241  if (ColumnPos < 0)
5242  return TokError("column position less than zero in '.cv_loc' directive");
5243  Lex();
5244  }
5245 
5246  bool PrologueEnd = false;
5247  uint64_t IsStmt = 0;
5248 
5249  auto parseOp = [&]() -> bool {
5250  StringRef Name;
5251  SMLoc Loc = getTok().getLoc();
5252  if (parseIdentifier(Name))
5253  return TokError("unexpected token in '.cv_loc' directive");
5254  if (Name == "prologue_end")
5255  PrologueEnd = true;
5256  else if (Name == "is_stmt") {
5257  Loc = getTok().getLoc();
5258  const MCExpr *Value;
5259  if (parseExpression(Value))
5260  return true;
5261  // The expression must be the constant 0 or 1.
5262  IsStmt = ~0ULL;
5263  if (const auto *MCE = dyn_cast<MCConstantExpr>(Value))
5264  IsStmt = MCE->getValue();
5265 
5266  if (IsStmt > 1)
5267  return Error(Loc, "is_stmt value not 0 or 1");
5268  } else {
5269  return Error(Loc, "unknown sub-directive in '.cv_loc' directive");
5270  }
5271  return false;
5272  };
5273 
5274  if (parseMany(parseOp, false /*hasComma*/))
5275  return true;
5276 
5277  getStreamer().emitCVLocDirective(FunctionId, FileNumber, LineNumber,
5278  ColumnPos, PrologueEnd, IsStmt, StringRef(),
5279  DirectiveLoc);
5280  return false;
5281 }
5282 
5283 /// parseDirectiveCVLinetable
5284 /// ::= .cv_linetable FunctionId, FnStart, FnEnd
5285 bool MasmParser::parseDirectiveCVLinetable() {
5286  int64_t FunctionId;
5287  StringRef FnStartName, FnEndName;
5288  SMLoc Loc = getTok().getLoc();
5289  if (parseCVFunctionId(FunctionId, ".cv_linetable") ||
5290  parseToken(AsmToken::Comma,
5291  "unexpected token in '.cv_linetable' directive") ||
5292  parseTokenLoc(Loc) || check(parseIdentifier(FnStartName), Loc,
5293  "expected identifier in directive") ||
5294  parseToken(AsmToken::Comma,
5295  "unexpected token in '.cv_linetable' directive") ||
5296  parseTokenLoc(Loc) || check(parseIdentifier(FnEndName), Loc,
5297  "expected identifier in directive"))
5298  return true;
5299 
5300  MCSymbol *FnStartSym = getContext().getOrCreateSymbol(FnStartName);
5301  MCSymbol *FnEndSym = getContext().getOrCreateSymbol(FnEndName);
5302 
5303  getStreamer().emitCVLinetableDirective(FunctionId, FnStartSym, FnEndSym);
5304  return false;
5305 }
5306 
5307 /// parseDirectiveCVInlineLinetable
5308 /// ::= .cv_inline_linetable PrimaryFunctionId FileId LineNum FnStart FnEnd
5309 bool MasmParser::parseDirectiveCVInlineLinetable() {
5310  int64_t PrimaryFunctionId, SourceFileId, SourceLineNum;
5311  StringRef FnStartName, FnEndName;
5312  SMLoc Loc = getTok().getLoc();
5313  if (parseCVFunctionId(PrimaryFunctionId, ".cv_inline_linetable") ||
5314  parseTokenLoc(Loc) ||
5315  parseIntToken(
5316  SourceFileId,
5317  "expected SourceField in '.cv_inline_linetable' directive") ||
5318  check(SourceFileId <= 0, Loc,
5319  "File id less than zero in '.cv_inline_linetable' directive") ||
5320  parseTokenLoc(Loc) ||
5321  parseIntToken(
5322  SourceLineNum,
5323  "expected SourceLineNum in '.cv_inline_linetable' directive") ||
5324  check(SourceLineNum < 0, Loc,
5325  "Line number less than zero in '.cv_inline_linetable' directive") ||
5326  parseTokenLoc(Loc) || check(parseIdentifier(FnStartName), Loc,
5327  "expected identifier in directive") ||
5328  parseTokenLoc(Loc) || check(parseIdentifier(FnEndName), Loc,
5329  "expected identifier in directive"))
5330  return true;
5331 
5332  if (parseToken(AsmToken::EndOfStatement, "Expected End of Statement"))
5333  return true;
5334 
5335  MCSymbol *FnStartSym = getContext().getOrCreateSymbol(FnStartName);
5336  MCSymbol *FnEndSym = getContext().getOrCreateSymbol(FnEndName);
5337  getStreamer().emitCVInlineLinetableDirective(PrimaryFunctionId, SourceFileId,
5338  SourceLineNum, FnStartSym,
5339  FnEndSym);
5340  return false;
5341 }
5342 
5343 void MasmParser::initializeCVDefRangeTypeMap() {
5344  CVDefRangeTypeMap["reg"] = CVDR_DEFRANGE_REGISTER;
5345  CVDefRangeTypeMap["frame_ptr_rel"] = CVDR_DEFRANGE_FRAMEPOINTER_REL;
5346  CVDefRangeTypeMap["subfield_reg"] = CVDR_DEFRANGE_SUBFIELD_REGISTER;
5347  CVDefRangeTypeMap["reg_rel"] = CVDR_DEFRANGE_REGISTER_REL;
5348 }
5349 
5350 /// parseDirectiveCVDefRange
5351 /// ::= .cv_def_range RangeStart RangeEnd (GapStart GapEnd)*, bytes*
5352 bool MasmParser::parseDirectiveCVDefRange() {
5353  SMLoc Loc;
5354  std::vector<std::pair<const MCSymbol *, const MCSymbol *>> Ranges;
5355  while (getLexer().is(AsmToken::Identifier)) {
5356  Loc = getLexer().getLoc();
5357  StringRef GapStartName;
5358  if (parseIdentifier(GapStartName))
5359  return Error(Loc, "expected identifier in directive");
5360  MCSymbol *GapStartSym = getContext().getOrCreateSymbol(GapStartName);
5361 
5362  Loc = getLexer().getLoc();
5363  StringRef GapEndName;
5364  if (parseIdentifier(GapEndName))
5365  return Error(Loc, "expected identifier in directive");
5366  MCSymbol *GapEndSym = getContext().getOrCreateSymbol(GapEndName);
5367 
5368  Ranges.push_back({GapStartSym, GapEndSym});
5369  }
5370 
5371  StringRef CVDefRangeTypeStr;
5372  if (parseToken(
5374  "expected comma before def_range type in .cv_def_range directive") ||
5375  parseIdentifier(CVDefRangeTypeStr))
5376  return Error(Loc, "expected def_range type in directive");
5377 
5379  CVDefRangeTypeMap.find(CVDefRangeTypeStr);
5380  CVDefRangeType CVDRType = (CVTypeIt == CVDefRangeTypeMap.end())
5381  ? CVDR_DEFRANGE
5382  : CVTypeIt->getValue();
5383  switch (CVDRType) {
5384  case CVDR_DEFRANGE_REGISTER: {
5385  int64_t DRRegister;
5386  if (parseToken(AsmToken::Comma, "expected comma before register number in "
5387  ".cv_def_range directive") ||
5388  parseAbsoluteExpression(DRRegister))
5389  return Error(Loc, "expected register number");
5390 
5392  DRHdr.Register = DRRegister;
5393  DRHdr.MayHaveNoName = 0;
5394  getStreamer().emitCVDefRangeDirective(Ranges, DRHdr);
5395  break;
5396  }
5397  case CVDR_DEFRANGE_FRAMEPOINTER_REL: {
5398  int64_t DROffset;
5399  if (parseToken(AsmToken::Comma,
5400  "expected comma before offset in .cv_def_range directive") ||
5401  parseAbsoluteExpression(DROffset))
5402  return Error(Loc, "expected offset value");
5403 
5405  DRHdr.Offset = DROffset;
5406  getStreamer().emitCVDefRangeDirective(Ranges, DRHdr);
5407  break;
5408  }
5409  case CVDR_DEFRANGE_SUBFIELD_REGISTER: {
5410  int64_t DRRegister;
5411  int64_t DROffsetInParent;
5412  if (parseToken(AsmToken::Comma, "expected comma before register number in "
5413  ".cv_def_range directive") ||
5414  parseAbsoluteExpression(DRRegister))
5415  return Error(Loc, "expected register number");
5416  if (parseToken(AsmToken::Comma,
5417  "expected comma before offset in .cv_def_range directive") ||
5418  parseAbsoluteExpression(DROffsetInParent))
5419  return Error(Loc, "expected offset value");
5420 
5422  DRHdr.Register = DRRegister;
5423  DRHdr.MayHaveNoName = 0;
5424  DRHdr.OffsetInParent = DROffsetInParent;
5425  getStreamer().emitCVDefRangeDirective(Ranges, DRHdr);
5426  break;
5427  }
5428  case CVDR_DEFRANGE_REGISTER_REL: {
5429  int64_t DRRegister;
5430  int64_t DRFlags;
5431  int64_t DRBasePointerOffset;
5432  if (parseToken(AsmToken::Comma, "expected comma before register number in "
5433  ".cv_def_range directive") ||
5434  parseAbsoluteExpression(DRRegister))
5435  return Error(Loc, "expected register value");
5436  if (parseToken(
5438  "expected comma before flag value in .cv_def_range directive") ||
5439  parseAbsoluteExpression(DRFlags))
5440  return Error(Loc, "expected flag value");
5441  if (parseToken(AsmToken::Comma, "expected comma before base pointer offset "
5442  "in .cv_def_range directive") ||
5443  parseAbsoluteExpression(DRBasePointerOffset))
5444  return Error(Loc, "expected base pointer offset value");
5445 
5447  DRHdr.Register = DRRegister;
5448  DRHdr.Flags = DRFlags;
5449  DRHdr.BasePointerOffset = DRBasePointerOffset;
5450  getStreamer().emitCVDefRangeDirective(Ranges, DRHdr);
5451  break;
5452  }
5453  default:
5454  return Error(Loc, "unexpected def_range type in .cv_def_range directive");
5455  }
5456  return true;
5457 }
5458 
5459 /// parseDirectiveCVString
5460 /// ::= .cv_stringtable "string"
5461 bool MasmParser::parseDirectiveCVString() {
5462  std::string Data;
5463  if (checkForValidSection() || parseEscapedString(Data))
5464  return addErrorSuffix(" in '.cv_string' directive");
5465 
5466  // Put the string in the table and emit the offset.
5467  std::pair<StringRef, unsigned> Insertion =
5468  getCVContext().addToStringTable(Data);
5469  getStreamer().emitIntValue(Insertion.second, 4);
5470  return false;
5471 }
5472 
5473 /// parseDirectiveCVStringTable
5474 /// ::= .cv_stringtable
5475 bool MasmParser::parseDirectiveCVStringTable() {
5476  getStreamer().emitCVStringTableDirective();
5477  return false;
5478 }
5479 
5480 /// parseDirectiveCVFileChecksums
5481 /// ::= .cv_filechecksums
5482 bool MasmParser::parseDirectiveCVFileChecksums() {
5483  getStreamer().emitCVFileChecksumsDirective();
5484  return false;
5485 }
5486 
5487 /// parseDirectiveCVFileChecksumOffset
5488 /// ::= .cv_filechecksumoffset fileno
5489 bool MasmParser::parseDirectiveCVFileChecksumOffset() {
5490  int64_t FileNo;
5491  if (parseIntToken(FileNo, "expected identifier in directive"))
5492  return true;
5493  if (parseToken(AsmToken::EndOfStatement, "Expected End of Statement"))
5494  return true;
5495  getStreamer().emitCVFileChecksumOffsetDirective(FileNo);
5496  return false;
5497 }
5498 
5499 /// parseDirectiveCVFPOData
5500 /// ::= .cv_fpo_data procsym
5501 bool MasmParser::parseDirectiveCVFPOData() {
5502  SMLoc DirLoc = getLexer().getLoc();
5503  StringRef ProcName;
5504  if (parseIdentifier(ProcName))
5505  return TokError("expected symbol name");
5506  if (parseEOL("unexpected tokens"))
5507  return addErrorSuffix(" in '.cv_fpo_data' directive");
5508  MCSymbol *ProcSym = getContext().getOrCreateSymbol(ProcName);
5509  getStreamer().EmitCVFPOData(ProcSym, DirLoc);
5510  return false;
5511 }
5512 
5513 /// parseDirectiveCFISections
5514 /// ::= .cfi_sections section [, section]
5515 bool MasmParser::parseDirectiveCFISections() {
5516  StringRef Name;
5517  bool EH = false;
5518  bool Debug = false;
5519 
5520  if (parseIdentifier(Name))
5521  return TokError("Expected an identifier");
5522 
5523  if (Name == ".eh_frame")
5524  EH = true;
5525  else if (Name == ".debug_frame")
5526  Debug = true;
5527 
5528  if (getLexer().is(AsmToken::Comma)) {
5529  Lex();
5530 
5531  if (parseIdentifier(Name))
5532  return TokError("Expected an identifier");
5533 
5534  if (Name == ".eh_frame")
5535  EH = true;
5536  else if (Name == ".debug_frame")
5537  Debug = true;
5538  }
5539 
5540  getStreamer().emitCFISections(EH, Debug);
5541  return false;
5542 }
5543 
5544 /// parseDirectiveCFIStartProc
5545 /// ::= .cfi_startproc [simple]
5546 bool MasmParser::parseDirectiveCFIStartProc() {
5547  StringRef Simple;
5548  if (!parseOptionalToken(AsmToken::EndOfStatement)) {
5549  if (check(parseIdentifier(Simple) || Simple != "simple",
5550  "unexpected token") ||
5551  parseToken(AsmToken::EndOfStatement))
5552  return addErrorSuffix(" in '.cfi_startproc' directive");
5553  }
5554 
5555  // TODO(kristina): Deal with a corner case of incorrect diagnostic context
5556  // being produced if this directive is emitted as part of preprocessor macro
5557  // expansion which can *ONLY* happen if Clang's cc1as is the API consumer.
5558  // Tools like llvm-mc on the other hand are not affected by it, and report
5559  // correct context information.
5560  getStreamer().emitCFIStartProc(!Simple.empty(), Lexer.getLoc());
5561  return false;
5562 }
5563 
5564 /// parseDirectiveCFIEndProc
5565 /// ::= .cfi_endproc
5566 bool MasmParser::parseDirectiveCFIEndProc() {
5567  getStreamer().emitCFIEndProc();
5568  return false;
5569 }
5570 
5571 /// parse register name or number.
5572 bool MasmParser::parseRegisterOrRegisterNumber(int64_t &Register,
5573  SMLoc DirectiveLoc) {
5574  unsigned RegNo;
5575 
5576  if (getLexer().isNot(AsmToken::Integer)) {
5577  if (getTargetParser().ParseRegister(RegNo, DirectiveLoc, DirectiveLoc))
5578  return true;
5579  Register = getContext().getRegisterInfo()->getDwarfRegNum(RegNo, true);
5580  } else
5581  return parseAbsoluteExpression(Register);
5582 
5583  return false;
5584 }
5585 
5586 /// parseDirectiveCFIDefCfa
5587 /// ::= .cfi_def_cfa register, offset
5588 bool MasmParser::parseDirectiveCFIDefCfa(SMLoc DirectiveLoc) {
5589  int64_t Register = 0, Offset = 0;
5590  if (parseRegisterOrRegisterNumber(Register, DirectiveLoc) ||
5591  parseToken(AsmToken::Comma, "unexpected token in directive") ||
5592  parseAbsoluteExpression(Offset))
5593  return true;
5594 
5595  getStreamer().emitCFIDefCfa(Register, Offset);
5596  return false;
5597 }
5598 
5599 /// parseDirectiveCFIDefCfaOffset
5600 /// ::= .cfi_def_cfa_offset offset
5601 bool MasmParser::parseDirectiveCFIDefCfaOffset() {
5602  int64_t Offset = 0;
5603  if (parseAbsoluteExpression(Offset))
5604  return true;
5605 
5606  getStreamer().emitCFIDefCfaOffset(Offset);
5607  return false;
5608 }
5609 
5610 /// parseDirectiveCFIRegister
5611 /// ::= .cfi_register register, register
5612 bool MasmParser::parseDirectiveCFIRegister(SMLoc DirectiveLoc) {
5613  int64_t Register1 = 0, Register2 = 0;
5614  if (parseRegisterOrRegisterNumber(Register1, DirectiveLoc) ||
5615  parseToken(AsmToken::Comma, "unexpected token in directive") ||
5616  parseRegisterOrRegisterNumber(Register2, DirectiveLoc))
5617  return true;
5618 
5619  getStreamer().emitCFIRegister(Register1, Register2);
5620  return false;
5621 }
5622 
5623 /// parseDirectiveCFIWindowSave
5624 /// ::= .cfi_window_save
5625 bool MasmParser::parseDirectiveCFIWindowSave() {
5626  getStreamer().emitCFIWindowSave();
5627  return false;
5628 }
5629 
5630 /// parseDirectiveCFIAdjustCfaOffset
5631 /// ::= .cfi_adjust_cfa_offset adjustment
5632 bool MasmParser::parseDirectiveCFIAdjustCfaOffset() {
5633  int64_t Adjustment = 0;
5634  if (parseAbsoluteExpression(Adjustment))
5635  return true;
5636 
5637  getStreamer().emitCFIAdjustCfaOffset(Adjustment);
5638  return false;
5639 }
5640 
5641 /// parseDirectiveCFIDefCfaRegister
5642 /// ::= .cfi_def_cfa_register register
5643 bool MasmParser::parseDirectiveCFIDefCfaRegister(SMLoc DirectiveLoc) {
5644  int64_t Register = 0;
5645  if (parseRegisterOrRegisterNumber(Register, DirectiveLoc))
5646  return true;
5647 
5648  getStreamer().emitCFIDefCfaRegister(Register);
5649  return false;
5650 }
5651 
5652 /// parseDirectiveCFIOffset
5653 /// ::= .cfi_offset register, offset
5654 bool MasmParser::parseDirectiveCFIOffset(SMLoc DirectiveLoc) {
5655  int64_t Register = 0;
5656  int64_t Offset = 0;
5657 
5658  if (parseRegisterOrRegisterNumber(Register, DirectiveLoc) ||
5659  parseToken(AsmToken::Comma, "unexpected token in directive") ||
5660  parseAbsoluteExpression(Offset))
5661  return true;
5662 
5663  getStreamer().emitCFIOffset(Register, Offset);
5664  return false;
5665 }
5666 
5667 /// parseDirectiveCFIRelOffset
5668 /// ::= .cfi_rel_offset register, offset
5669 bool MasmParser::parseDirectiveCFIRelOffset(SMLoc DirectiveLoc) {
5670  int64_t Register = 0, Offset = 0;
5671 
5672  if (parseRegisterOrRegisterNumber(Register, DirectiveLoc) ||
5673  parseToken(AsmToken::Comma, "unexpected token in directive") ||
5674  parseAbsoluteExpression(Offset))
5675  return true;
5676 
5677  getStreamer().emitCFIRelOffset(Register, Offset);
5678  return false;
5679 }
5680 
5681 static bool isValidEncoding(int64_t Encoding) {
5682  if (Encoding & ~0xff)
5683  return false;
5684 
5685  if (Encoding == dwarf::DW_EH_PE_omit)
5686  return true;
5687 
5688  const unsigned Format = Encoding & 0xf;
5693  return false;
5694 
5695  const unsigned Application = Encoding & 0x70;
5696  if (Application != dwarf::DW_EH_PE_absptr &&
5697  Application != dwarf::DW_EH_PE_pcrel)
5698  return false;
5699 
5700  return true;
5701 }
5702 
5703 /// parseDirectiveCFIPersonalityOrLsda
5704 /// IsPersonality true for cfi_personality, false for cfi_lsda
5705 /// ::= .cfi_personality encoding, [symbol_name]
5706 /// ::= .cfi_lsda encoding, [symbol_name]
5707 bool MasmParser::parseDirectiveCFIPersonalityOrLsda(bool IsPersonality) {
5708  int64_t Encoding = 0;
5709  if (parseAbsoluteExpression(Encoding))
5710  return true;
5711  if (Encoding == dwarf::DW_EH_PE_omit)
5712  return false;
5713 
5714  StringRef Name;
5715  if (check(!isValidEncoding(Encoding), "unsupported encoding.") ||
5716  parseToken(AsmToken::Comma, "unexpected token in directive") ||
5717  check(parseIdentifier(Name), "expected identifier in directive"))
5718  return true;
5719 
5720  MCSymbol *Sym = getContext().getOrCreateSymbol(Name);
5721 
5722  if (IsPersonality)
5723  getStreamer().emitCFIPersonality(Sym, Encoding);
5724  else
5725  getStreamer().emitCFILsda(Sym, Encoding);
5726  return false;
5727 }
5728 
5729 /// parseDirectiveCFIRememberState
5730 /// ::= .cfi_remember_state
5731 bool MasmParser::parseDirectiveCFIRememberState() {
5732  getStreamer().emitCFIRememberState();
5733  return false;
5734 }
5735 
5736 /// parseDirectiveCFIRestoreState
5737 /// ::= .cfi_remember_state
5738 bool MasmParser::parseDirectiveCFIRestoreState() {
5739  getStreamer().emitCFIRestoreState();
5740  return false;
5741 }
5742 
5743 /// parseDirectiveCFISameValue
5744 /// ::= .cfi_same_value register
5745 bool MasmParser::parseDirectiveCFISameValue(SMLoc DirectiveLoc) {
5746  int64_t Register = 0;
5747 
5748  if (parseRegisterOrRegisterNumber(Register, DirectiveLoc))
5749  return true;
5750 
5751  getStreamer().emitCFISameValue(Register);
5752  return false;
5753 }
5754 
5755 /// parseDirectiveCFIRestore
5756 /// ::= .cfi_restore register
5757 bool MasmParser::parseDirectiveCFIRestore(SMLoc DirectiveLoc) {
5758  int64_t Register = 0;
5759  if (parseRegisterOrRegisterNumber(Register, DirectiveLoc))
5760  return true;
5761 
5762  getStreamer().emitCFIRestore(Register);
5763  return false;
5764 }
5765 
5766 /// parseDirectiveCFIEscape
5767 /// ::= .cfi_escape expression[,...]
5768 bool MasmParser::parseDirectiveCFIEscape() {
5769  std::string Values;
5770  int64_t CurrValue;
5771  if (parseAbsoluteExpression(CurrValue))
5772  return true;
5773 
5774  Values.push_back((uint8_t)CurrValue);
5775 
5776  while (getLexer().is(AsmToken::Comma)) {
5777  Lex();
5778 
5779  if (parseAbsoluteExpression(CurrValue))
5780  return true;
5781 
5782  Values.push_back((uint8_t)CurrValue);
5783  }
5784 
5785  getStreamer().emitCFIEscape(Values);
5786  return false;
5787 }
5788 
5789 /// parseDirectiveCFIReturnColumn
5790 /// ::= .cfi_return_column register
5791 bool MasmParser::parseDirectiveCFIReturnColumn(SMLoc DirectiveLoc) {
5792  int64_t Register = 0;
5793  if (parseRegisterOrRegisterNumber(Register, DirectiveLoc))
5794  return true;
5795  getStreamer().emitCFIReturnColumn(Register);
5796  return false;
5797 }
5798 
5799 /// parseDirectiveCFISignalFrame
5800 /// ::= .cfi_signal_frame
5801 bool MasmParser::parseDirectiveCFISignalFrame() {
5802  if (parseToken(AsmToken::EndOfStatement,
5803  "unexpected token in '.cfi_signal_frame'"))
5804  return true;
5805 
5806  getStreamer().emitCFISignalFrame();
5807  return false;
5808 }
5809 
5810 /// parseDirectiveCFIUndefined
5811 /// ::= .cfi_undefined register
5812 bool MasmParser::parseDirectiveCFIUndefined(SMLoc DirectiveLoc) {
5813  int64_t Register = 0;
5814 
5815  if (parseRegisterOrRegisterNumber(Register, DirectiveLoc))
5816  return true;
5817 
5818  getStreamer().emitCFIUndefined(Register);
5819  return false;
5820 }
5821 
5822 /// parseDirectiveMacro
5823 /// ::= name macro [parameters]
5824 /// ["LOCAL" identifiers]
5825 /// parameters ::= parameter [, parameter]*
5826 /// parameter ::= name ":" qualifier
5827 /// qualifier ::= "req" | "vararg" | "=" macro_argument
5828 bool MasmParser::parseDirectiveMacro(StringRef Name, SMLoc NameLoc) {
5829  MCAsmMacroParameters Parameters;
5830  while (getLexer().isNot(AsmToken::EndOfStatement)) {
5831  if (!Parameters.empty() && Parameters.back().Vararg)
5832  return Error(Lexer.getLoc(),
5833  "Vararg parameter '" + Parameters.back().Name +
5834  "' should be last in the list of parameters");
5835 
5836  MCAsmMacroParameter Parameter;
5837  if (parseIdentifier(Parameter.Name))
5838  return TokError("expected identifier in 'macro' directive");
5839 
5840  // Emit an error if two (or more) named parameters share the same name.
5841  for (const MCAsmMacroParameter& CurrParam : Parameters)
5842  if (CurrParam.Name.equals_insensitive(Parameter.Name))
5843  return TokError("macro '" + Name + "' has multiple parameters"
5844  " named '" + Parameter.Name + "'");
5845 
5846  if (Lexer.is(AsmToken::Colon)) {
5847  Lex(); // consume ':'
5848 
5849  if (parseOptionalToken(AsmToken::Equal)) {
5850  // Default value
5851  SMLoc ParamLoc;
5852 
5853  ParamLoc = Lexer.getLoc();
5854  if (parseMacroArgument(nullptr, Parameter.Value))
5855  return true;
5856  } else {
5857  SMLoc QualLoc;
5858  StringRef Qualifier;
5859 
5860  QualLoc = Lexer.getLoc();
5861  if (parseIdentifier(Qualifier))
5862  return Error(QualLoc, "missing parameter qualifier for "
5863  "'" +
5864  Parameter.Name + "' in macro '" + Name +
5865  "'");
5866 
5867  if (Qualifier.equals_insensitive("req"))
5868  Parameter.Required = true;
5869  else if (Qualifier.equals_insensitive("vararg"))
5870  Parameter.Vararg = true;
5871  else
5872  return Error(QualLoc,
5873  Qualifier + " is not a valid parameter qualifier for '" +
5874  Parameter.Name + "' in macro '" + Name + "'");
5875  }
5876  }
5877 
5878  Parameters.push_back(std::move(Parameter));
5879 
5880  if (getLexer().is(AsmToken::Comma))