LLVM  6.0.0svn
COFFModuleDefinition.cpp
Go to the documentation of this file.
1 //===--- COFFModuleDefinition.cpp - Simple DEF parser ---------------------===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // Windows-specific.
11 // A parser for the module-definition file (.def file).
12 //
13 // The format of module-definition files are described in this document:
14 // https://msdn.microsoft.com/en-us/library/28d6s79h.aspx
15 //
16 //===----------------------------------------------------------------------===//
17 
19 #include "llvm/ADT/StringRef.h"
20 #include "llvm/ADT/StringSwitch.h"
21 #include "llvm/Object/COFF.h"
23 #include "llvm/Object/Error.h"
24 #include "llvm/Support/Error.h"
25 #include "llvm/Support/Path.h"
27 
28 using namespace llvm::COFF;
29 using namespace llvm;
30 
31 namespace llvm {
32 namespace object {
33 
34 enum Kind {
35  Unknown,
36  Eof,
51 };
52 
53 struct Token {
54  explicit Token(Kind T = Unknown, StringRef S = "") : K(T), Value(S) {}
57 };
58 
59 static bool isDecorated(StringRef Sym, bool MingwDef) {
60  // In def files, the symbols can either be listed decorated or undecorated.
61  //
62  // - For cdecl symbols, only the undecorated form is allowed.
63  // - For fastcall and vectorcall symbols, both fully decorated or
64  // undecorated forms can be present.
65  // - For stdcall symbols in non-MinGW environments, the decorated form is
66  // fully decorated with leading underscore and trailing stack argument
67  // size - like "_Func@0".
68  // - In MinGW def files, a decorated stdcall symbol does not include the
69  // leading underscore though, like "Func@0".
70 
71  // This function controls whether a leading underscore should be added to
72  // the given symbol name or not. For MinGW, treat a stdcall symbol name such
73  // as "Func@0" as undecorated, i.e. a leading underscore must be added.
74  // For non-MinGW, look for '@' in the whole string and consider "_Func@0"
75  // as decorated, i.e. don't add any more leading underscores.
76  // We can't check for a leading underscore here, since function names
77  // themselves can start with an underscore, while a second one still needs
78  // to be added.
79  return Sym.startswith("@") || Sym.contains("@@") || Sym.startswith("?") ||
80  (!MingwDef && Sym.contains('@'));
81 }
82 
83 static Error createError(const Twine &Err) {
84  return make_error<StringError>(StringRef(Err.str()),
85  object_error::parse_failed);
86 }
87 
88 class Lexer {
89 public:
90  Lexer(StringRef S) : Buf(S) {}
91 
92  Token lex() {
93  Buf = Buf.trim();
94  if (Buf.empty())
95  return Token(Eof);
96 
97  switch (Buf[0]) {
98  case '\0':
99  return Token(Eof);
100  case ';': {
101  size_t End = Buf.find('\n');
102  Buf = (End == Buf.npos) ? "" : Buf.drop_front(End);
103  return lex();
104  }
105  case '=':
106  Buf = Buf.drop_front();
107  // GNU dlltool accepts both = and ==.
108  if (Buf.startswith("="))
109  Buf = Buf.drop_front();
110  return Token(Equal, "=");
111  case ',':
112  Buf = Buf.drop_front();
113  return Token(Comma, ",");
114  case '"': {
115  StringRef S;
116  std::tie(S, Buf) = Buf.substr(1).split('"');
117  return Token(Identifier, S);
118  }
119  default: {
120  size_t End = Buf.find_first_of("=,\r\n \t\v");
121  StringRef Word = Buf.substr(0, End);
123  .Case("BASE", KwBase)
124  .Case("CONSTANT", KwConstant)
125  .Case("DATA", KwData)
126  .Case("EXPORTS", KwExports)
127  .Case("HEAPSIZE", KwHeapsize)
128  .Case("LIBRARY", KwLibrary)
129  .Case("NAME", KwName)
130  .Case("NONAME", KwNoname)
131  .Case("PRIVATE", KwPrivate)
132  .Case("STACKSIZE", KwStacksize)
133  .Case("VERSION", KwVersion)
135  Buf = (End == Buf.npos) ? "" : Buf.drop_front(End);
136  return Token(K, Word);
137  }
138  }
139  }
140 
141 private:
142  StringRef Buf;
143 };
144 
145 class Parser {
146 public:
147  explicit Parser(StringRef S, MachineTypes M, bool B)
148  : Lex(S), Machine(M), MingwDef(B) {}
149 
151  do {
152  if (Error Err = parseOne())
153  return std::move(Err);
154  } while (Tok.K != Eof);
155  return Info;
156  }
157 
158 private:
159  void read() {
160  if (Stack.empty()) {
161  Tok = Lex.lex();
162  return;
163  }
164  Tok = Stack.back();
165  Stack.pop_back();
166  }
167 
168  Error readAsInt(uint64_t *I) {
169  read();
170  if (Tok.K != Identifier || Tok.Value.getAsInteger(10, *I))
171  return createError("integer expected");
172  return Error::success();
173  }
174 
175  Error expect(Kind Expected, StringRef Msg) {
176  read();
177  if (Tok.K != Expected)
178  return createError(Msg);
179  return Error::success();
180  }
181 
182  void unget() { Stack.push_back(Tok); }
183 
184  Error parseOne() {
185  read();
186  switch (Tok.K) {
187  case Eof:
188  return Error::success();
189  case KwExports:
190  for (;;) {
191  read();
192  if (Tok.K != Identifier) {
193  unget();
194  return Error::success();
195  }
196  if (Error Err = parseExport())
197  return Err;
198  }
199  case KwHeapsize:
200  return parseNumbers(&Info.HeapReserve, &Info.HeapCommit);
201  case KwStacksize:
202  return parseNumbers(&Info.StackReserve, &Info.StackCommit);
203  case KwLibrary:
204  case KwName: {
205  bool IsDll = Tok.K == KwLibrary; // Check before parseName.
206  std::string Name;
207  if (Error Err = parseName(&Name, &Info.ImageBase))
208  return Err;
209 
210  Info.ImportName = Name;
211 
212  // Set the output file, but don't override /out if it was already passed.
213  if (Info.OutputFile.empty()) {
214  Info.OutputFile = Name;
215  // Append the appropriate file extension if not already present.
216  if (!sys::path::has_extension(Name))
217  Info.OutputFile += IsDll ? ".dll" : ".exe";
218  }
219 
220  return Error::success();
221  }
222  case KwVersion:
223  return parseVersion(&Info.MajorImageVersion, &Info.MinorImageVersion);
224  default:
225  return createError("unknown directive: " + Tok.Value);
226  }
227  }
228 
229  Error parseExport() {
231  E.Name = Tok.Value;
232  read();
233  if (Tok.K == Equal) {
234  read();
235  if (Tok.K != Identifier)
236  return createError("identifier expected, but got " + Tok.Value);
237  E.ExtName = E.Name;
238  E.Name = Tok.Value;
239  } else {
240  unget();
241  }
242 
244  if (!isDecorated(E.Name, MingwDef))
245  E.Name = (std::string("_").append(E.Name));
246  if (!E.ExtName.empty() && !isDecorated(E.ExtName, MingwDef))
247  E.ExtName = (std::string("_").append(E.ExtName));
248  }
249 
250  for (;;) {
251  read();
252  if (Tok.K == Identifier && Tok.Value[0] == '@') {
253  if (Tok.Value == "@") {
254  // "foo @ 10"
255  read();
256  Tok.Value.getAsInteger(10, E.Ordinal);
257  } else if (Tok.Value.drop_front().getAsInteger(10, E.Ordinal)) {
258  // "foo \n @bar" - Not an ordinal modifier at all, but the next
259  // export (fastcall decorated) - complete the current one.
260  unget();
261  Info.Exports.push_back(E);
262  return Error::success();
263  }
264  // "foo @10"
265  read();
266  if (Tok.K == KwNoname) {
267  E.Noname = true;
268  } else {
269  unget();
270  }
271  continue;
272  }
273  if (Tok.K == KwData) {
274  E.Data = true;
275  continue;
276  }
277  if (Tok.K == KwConstant) {
278  E.Constant = true;
279  continue;
280  }
281  if (Tok.K == KwPrivate) {
282  E.Private = true;
283  continue;
284  }
285  unget();
286  Info.Exports.push_back(E);
287  return Error::success();
288  }
289  }
290 
291  // HEAPSIZE/STACKSIZE reserve[,commit]
292  Error parseNumbers(uint64_t *Reserve, uint64_t *Commit) {
293  if (Error Err = readAsInt(Reserve))
294  return Err;
295  read();
296  if (Tok.K != Comma) {
297  unget();
298  Commit = nullptr;
299  return Error::success();
300  }
301  if (Error Err = readAsInt(Commit))
302  return Err;
303  return Error::success();
304  }
305 
306  // NAME outputPath [BASE=address]
307  Error parseName(std::string *Out, uint64_t *Baseaddr) {
308  read();
309  if (Tok.K == Identifier) {
310  *Out = Tok.Value;
311  } else {
312  *Out = "";
313  unget();
314  return Error::success();
315  }
316  read();
317  if (Tok.K == KwBase) {
318  if (Error Err = expect(Equal, "'=' expected"))
319  return Err;
320  if (Error Err = readAsInt(Baseaddr))
321  return Err;
322  } else {
323  unget();
324  *Baseaddr = 0;
325  }
326  return Error::success();
327  }
328 
329  // VERSION major[.minor]
330  Error parseVersion(uint32_t *Major, uint32_t *Minor) {
331  read();
332  if (Tok.K != Identifier)
333  return createError("identifier expected, but got " + Tok.Value);
334  StringRef V1, V2;
335  std::tie(V1, V2) = Tok.Value.split('.');
336  if (V1.getAsInteger(10, *Major))
337  return createError("integer expected, but got " + Tok.Value);
338  if (V2.empty())
339  *Minor = 0;
340  else if (V2.getAsInteger(10, *Minor))
341  return createError("integer expected, but got " + Tok.Value);
342  return Error::success();
343  }
344 
345  Lexer Lex;
346  Token Tok;
347  std::vector<Token> Stack;
350  bool MingwDef;
351 };
352 
355  bool MingwDef) {
356  return Parser(MB.getBuffer(), Machine, MingwDef).parse();
357 }
358 
359 } // namespace object
360 } // namespace llvm
Compute iterated dominance frontiers using a linear time algorithm.
Definition: AllocatorList.h:24
LLVM_NODISCARD LLVM_ATTRIBUTE_ALWAYS_INLINE bool contains(StringRef Other) const
Return true if the given string is a substring of *this, and false otherwise.
Definition: StringRef.h:448
bool has_extension(const Twine &path, Style style=Style::native)
Has extension?
Definition: Path.cpp:666
static bool isDecorated(StringRef Sym, bool MingwDef)
Parser(StringRef S, MachineTypes M, bool B)
Expected< COFFModuleDefinition > parseCOFFModuleDefinition(MemoryBufferRef MB, COFF::MachineTypes Machine, bool MingwDef=false)
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition: Twine.h:81
StringRef getBuffer() const
Definition: MemoryBuffer.h:170
support::ulittle32_t Word
Definition: IRSymtab.h:51
Tagged union holding either a T or a Error.
Definition: CachePruning.h:23
Definition: regcomp.c:196
LLVM_NODISCARD LLVM_ATTRIBUTE_ALWAYS_INLINE bool startswith(StringRef Prefix) const
Check if this string starts with the given Prefix.
Definition: StringRef.h:267
LLVM_ATTRIBUTE_ALWAYS_INLINE StringSwitch & Case(const char(&S)[N], const T &Value)
Definition: StringSwitch.h:74
static Error createError(const Twine &Err)
LLVM_NODISCARD LLVM_ATTRIBUTE_ALWAYS_INLINE bool empty() const
empty - Check if the string is empty.
Definition: StringRef.h:133
COFF::MachineTypes Machine
Definition: COFFYAML.cpp:363
LLVM_NODISCARD LLVM_ATTRIBUTE_ALWAYS_INLINE StringRef substr(size_t Start, size_t N=npos) const
Return a reference to the substring from [Start, Start + N).
Definition: StringRef.h:598
A switch()-like statement whose cases are string literals.
Definition: StringSwitch.h:43
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
static const unsigned End
LLVM_ATTRIBUTE_ALWAYS_INLINE R Default(const T &Value) const
Definition: StringSwitch.h:244
Token(Kind T=Unknown, StringRef S="")
std::enable_if< std::numeric_limits< T >::is_signed, bool >::type getAsInteger(unsigned Radix, T &Result) const
Parse the current string as an integer of the specified radix.
Definition: StringRef.h:497
static ErrorSuccess success()
Create a success value.
Definition: Error.h:313
LLVM_NODISCARD std::pair< StringRef, StringRef > split(char Separator) const
Split into two substrings around the first occurrence of a separator character.
Definition: StringRef.h:727
Expected< COFFModuleDefinition > parse()
value_type read(const void *memory, endianness endian)
Read a value of a particular endianness from memory.
Definition: Endian.h:66
#define I(x, y, z)
Definition: MD5.cpp:58
std::string str() const
Return the twine contents as a std::string.
Definition: Twine.cpp:17
MachineTypes
Definition: COFF.h:94
static Version parseVersion(StringRef Name)
LLVM Value Representation.
Definition: Value.h:73
Lightweight error class with error context and mandatory checking.
Definition: Error.h:156
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:49