LLVM  10.0.0svn
COFFModuleDefinition.cpp
Go to the documentation of this file.
1 //===--- COFFModuleDefinition.cpp - Simple DEF parser ---------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // Windows-specific.
10 // A parser for the module-definition file (.def file).
11 //
12 // The format of module-definition files are described in this document:
13 // https://msdn.microsoft.com/en-us/library/28d6s79h.aspx
14 //
15 //===----------------------------------------------------------------------===//
16 
18 #include "llvm/ADT/StringRef.h"
19 #include "llvm/ADT/StringSwitch.h"
20 #include "llvm/Object/COFF.h"
22 #include "llvm/Object/Error.h"
23 #include "llvm/Support/Error.h"
24 #include "llvm/Support/Path.h"
26 
27 using namespace llvm::COFF;
28 using namespace llvm;
29 
30 namespace llvm {
31 namespace object {
32 
33 enum Kind {
34  Unknown,
35  Eof,
51 };
52 
53 struct Token {
54  explicit Token(Kind T = Unknown, StringRef S = "") : K(T), Value(S) {}
57 };
58 
59 static bool isDecorated(StringRef Sym, bool MingwDef) {
60  // In def files, the symbols can either be listed decorated or undecorated.
61  //
62  // - For cdecl symbols, only the undecorated form is allowed.
63  // - For fastcall and vectorcall symbols, both fully decorated or
64  // undecorated forms can be present.
65  // - For stdcall symbols in non-MinGW environments, the decorated form is
66  // fully decorated with leading underscore and trailing stack argument
67  // size - like "_Func@0".
68  // - In MinGW def files, a decorated stdcall symbol does not include the
69  // leading underscore though, like "Func@0".
70 
71  // This function controls whether a leading underscore should be added to
72  // the given symbol name or not. For MinGW, treat a stdcall symbol name such
73  // as "Func@0" as undecorated, i.e. a leading underscore must be added.
74  // For non-MinGW, look for '@' in the whole string and consider "_Func@0"
75  // as decorated, i.e. don't add any more leading underscores.
76  // We can't check for a leading underscore here, since function names
77  // themselves can start with an underscore, while a second one still needs
78  // to be added.
79  return Sym.startswith("@") || Sym.contains("@@") || Sym.startswith("?") ||
80  (!MingwDef && Sym.contains('@'));
81 }
82 
83 static Error createError(const Twine &Err) {
84  return make_error<StringError>(StringRef(Err.str()),
85  object_error::parse_failed);
86 }
87 
88 class Lexer {
89 public:
90  Lexer(StringRef S) : Buf(S) {}
91 
92  Token lex() {
93  Buf = Buf.trim();
94  if (Buf.empty())
95  return Token(Eof);
96 
97  switch (Buf[0]) {
98  case '\0':
99  return Token(Eof);
100  case ';': {
101  size_t End = Buf.find('\n');
102  Buf = (End == Buf.npos) ? "" : Buf.drop_front(End);
103  return lex();
104  }
105  case '=':
106  Buf = Buf.drop_front();
107  if (Buf.startswith("=")) {
108  Buf = Buf.drop_front();
109  return Token(EqualEqual, "==");
110  }
111  return Token(Equal, "=");
112  case ',':
113  Buf = Buf.drop_front();
114  return Token(Comma, ",");
115  case '"': {
116  StringRef S;
117  std::tie(S, Buf) = Buf.substr(1).split('"');
118  return Token(Identifier, S);
119  }
120  default: {
121  size_t End = Buf.find_first_of("=,;\r\n \t\v");
122  StringRef Word = Buf.substr(0, End);
124  .Case("BASE", KwBase)
125  .Case("CONSTANT", KwConstant)
126  .Case("DATA", KwData)
127  .Case("EXPORTS", KwExports)
128  .Case("HEAPSIZE", KwHeapsize)
129  .Case("LIBRARY", KwLibrary)
130  .Case("NAME", KwName)
131  .Case("NONAME", KwNoname)
132  .Case("PRIVATE", KwPrivate)
133  .Case("STACKSIZE", KwStacksize)
134  .Case("VERSION", KwVersion)
136  Buf = (End == Buf.npos) ? "" : Buf.drop_front(End);
137  return Token(K, Word);
138  }
139  }
140  }
141 
142 private:
143  StringRef Buf;
144 };
145 
146 class Parser {
147 public:
148  explicit Parser(StringRef S, MachineTypes M, bool B)
149  : Lex(S), Machine(M), MingwDef(B) {}
150 
152  do {
153  if (Error Err = parseOne())
154  return std::move(Err);
155  } while (Tok.K != Eof);
156  return Info;
157  }
158 
159 private:
160  void read() {
161  if (Stack.empty()) {
162  Tok = Lex.lex();
163  return;
164  }
165  Tok = Stack.back();
166  Stack.pop_back();
167  }
168 
169  Error readAsInt(uint64_t *I) {
170  read();
171  if (Tok.K != Identifier || Tok.Value.getAsInteger(10, *I))
172  return createError("integer expected");
173  return Error::success();
174  }
175 
176  Error expect(Kind Expected, StringRef Msg) {
177  read();
178  if (Tok.K != Expected)
179  return createError(Msg);
180  return Error::success();
181  }
182 
183  void unget() { Stack.push_back(Tok); }
184 
185  Error parseOne() {
186  read();
187  switch (Tok.K) {
188  case Eof:
189  return Error::success();
190  case KwExports:
191  for (;;) {
192  read();
193  if (Tok.K != Identifier) {
194  unget();
195  return Error::success();
196  }
197  if (Error Err = parseExport())
198  return Err;
199  }
200  case KwHeapsize:
201  return parseNumbers(&Info.HeapReserve, &Info.HeapCommit);
202  case KwStacksize:
203  return parseNumbers(&Info.StackReserve, &Info.StackCommit);
204  case KwLibrary:
205  case KwName: {
206  bool IsDll = Tok.K == KwLibrary; // Check before parseName.
207  std::string Name;
208  if (Error Err = parseName(&Name, &Info.ImageBase))
209  return Err;
210 
211  Info.ImportName = Name;
212 
213  // Set the output file, but don't override /out if it was already passed.
214  if (Info.OutputFile.empty()) {
215  Info.OutputFile = Name;
216  // Append the appropriate file extension if not already present.
217  if (!sys::path::has_extension(Name))
218  Info.OutputFile += IsDll ? ".dll" : ".exe";
219  }
220 
221  return Error::success();
222  }
223  case KwVersion:
224  return parseVersion(&Info.MajorImageVersion, &Info.MinorImageVersion);
225  default:
226  return createError("unknown directive: " + Tok.Value);
227  }
228  }
229 
230  Error parseExport() {
232  E.Name = Tok.Value;
233  read();
234  if (Tok.K == Equal) {
235  read();
236  if (Tok.K != Identifier)
237  return createError("identifier expected, but got " + Tok.Value);
238  E.ExtName = E.Name;
239  E.Name = Tok.Value;
240  } else {
241  unget();
242  }
243 
245  if (!isDecorated(E.Name, MingwDef))
246  E.Name = (std::string("_").append(E.Name));
247  if (!E.ExtName.empty() && !isDecorated(E.ExtName, MingwDef))
248  E.ExtName = (std::string("_").append(E.ExtName));
249  }
250 
251  for (;;) {
252  read();
253  if (Tok.K == Identifier && Tok.Value[0] == '@') {
254  if (Tok.Value == "@") {
255  // "foo @ 10"
256  read();
257  Tok.Value.getAsInteger(10, E.Ordinal);
258  } else if (Tok.Value.drop_front().getAsInteger(10, E.Ordinal)) {
259  // "foo \n @bar" - Not an ordinal modifier at all, but the next
260  // export (fastcall decorated) - complete the current one.
261  unget();
262  Info.Exports.push_back(E);
263  return Error::success();
264  }
265  // "foo @10"
266  read();
267  if (Tok.K == KwNoname) {
268  E.Noname = true;
269  } else {
270  unget();
271  }
272  continue;
273  }
274  if (Tok.K == KwData) {
275  E.Data = true;
276  continue;
277  }
278  if (Tok.K == KwConstant) {
279  E.Constant = true;
280  continue;
281  }
282  if (Tok.K == KwPrivate) {
283  E.Private = true;
284  continue;
285  }
286  if (Tok.K == EqualEqual) {
287  read();
288  E.AliasTarget = Tok.Value;
289  if (Machine == IMAGE_FILE_MACHINE_I386 && !isDecorated(E.AliasTarget, MingwDef))
290  E.AliasTarget = std::string("_").append(E.AliasTarget);
291  continue;
292  }
293  unget();
294  Info.Exports.push_back(E);
295  return Error::success();
296  }
297  }
298 
299  // HEAPSIZE/STACKSIZE reserve[,commit]
300  Error parseNumbers(uint64_t *Reserve, uint64_t *Commit) {
301  if (Error Err = readAsInt(Reserve))
302  return Err;
303  read();
304  if (Tok.K != Comma) {
305  unget();
306  Commit = nullptr;
307  return Error::success();
308  }
309  if (Error Err = readAsInt(Commit))
310  return Err;
311  return Error::success();
312  }
313 
314  // NAME outputPath [BASE=address]
315  Error parseName(std::string *Out, uint64_t *Baseaddr) {
316  read();
317  if (Tok.K == Identifier) {
318  *Out = Tok.Value;
319  } else {
320  *Out = "";
321  unget();
322  return Error::success();
323  }
324  read();
325  if (Tok.K == KwBase) {
326  if (Error Err = expect(Equal, "'=' expected"))
327  return Err;
328  if (Error Err = readAsInt(Baseaddr))
329  return Err;
330  } else {
331  unget();
332  *Baseaddr = 0;
333  }
334  return Error::success();
335  }
336 
337  // VERSION major[.minor]
338  Error parseVersion(uint32_t *Major, uint32_t *Minor) {
339  read();
340  if (Tok.K != Identifier)
341  return createError("identifier expected, but got " + Tok.Value);
342  StringRef V1, V2;
343  std::tie(V1, V2) = Tok.Value.split('.');
344  if (V1.getAsInteger(10, *Major))
345  return createError("integer expected, but got " + Tok.Value);
346  if (V2.empty())
347  *Minor = 0;
348  else if (V2.getAsInteger(10, *Minor))
349  return createError("integer expected, but got " + Tok.Value);
350  return Error::success();
351  }
352 
353  Lexer Lex;
354  Token Tok;
355  std::vector<Token> Stack;
358  bool MingwDef;
359 };
360 
363  bool MingwDef) {
364  return Parser(MB.getBuffer(), Machine, MingwDef).parse();
365 }
366 
367 } // namespace object
368 } // namespace llvm
This class represents lattice values for constants.
Definition: AllocatorList.h:23
amdgpu Simplify well known AMD library false FunctionCallee Value const Twine & Name
LLVM_NODISCARD bool startswith(StringRef Prefix) const
Check if this string starts with the given Prefix.
Definition: StringRef.h:270
std::string Name
The name of the export as specified in the .def file or on the command line, i.e. ...
bool has_extension(const Twine &path, Style style=Style::native)
Has extension?
Definition: Path.cpp:656
static bool isDecorated(StringRef Sym, bool MingwDef)
Parser(StringRef S, MachineTypes M, bool B)
StringSwitch & Case(StringLiteral S, T Value)
Definition: StringSwitch.h:67
Expected< COFFModuleDefinition > parseCOFFModuleDefinition(MemoryBufferRef MB, COFF::MachineTypes Machine, bool MingwDef=false)
MachineTypes
Definition: COFF.h:93
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition: Twine.h:80
StringRef getBuffer() const
Definition: MemoryBuffer.h:272
support::ulittle32_t Word
Definition: IRSymtab.h:50
LLVM_NODISCARD R Default(T Value)
Definition: StringSwitch.h:181
Tagged union holding either a T or a Error.
Definition: yaml2obj.h:21
LLVM_NODISCARD StringRef substr(size_t Start, size_t N=npos) const
Return a reference to the substring from [Start, Start + N).
Definition: StringRef.h:592
LLVM_NODISCARD bool empty() const
empty - Check if the string is empty.
Definition: StringRef.h:140
static Error createError(const Twine &Err)
std::string AliasTarget
Creates a weak alias.
COFF::MachineTypes Machine
Definition: COFFYAML.cpp:365
Analysis containing CSE Info
Definition: CSEInfo.cpp:20
A switch()-like statement whose cases are string literals.
Definition: StringSwitch.h:42
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
LLVM_NODISCARD bool contains(StringRef Other) const
Return true if the given string is a substring of *this, and false otherwise.
Definition: StringRef.h:446
Token(Kind T=Unknown, StringRef S="")
std::enable_if< std::numeric_limits< T >::is_signed, bool >::type getAsInteger(unsigned Radix, T &Result) const
Parse the current string as an integer of the specified radix.
Definition: StringRef.h:492
static ErrorSuccess success()
Create a success value.
Definition: Error.h:326
std::string ExtName
The external, exported name.
LLVM_NODISCARD std::pair< StringRef, StringRef > split(char Separator) const
Split into two substrings around the first occurrence of a separator character.
Definition: StringRef.h:710
Expected< COFFModuleDefinition > parse()
value_type read(const void *memory, endianness endian)
Read a value of a particular endianness from memory.
Definition: Endian.h:65
#define I(x, y, z)
Definition: MD5.cpp:58
std::string str() const
Return the twine contents as a std::string.
Definition: Twine.cpp:17
static Version parseVersion(StringRef Name)
LLVM Value Representation.
Definition: Value.h:74
Lightweight error class with error context and mandatory checking.
Definition: Error.h:157
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:48
static llvm::Error parse(DataExtractor &Data, uint64_t BaseAddr, LineEntryCallback const &Callback)
Definition: LineTable.cpp:54