LLVM  14.0.0git
COFFModuleDefinition.cpp
Go to the documentation of this file.
1 //===--- COFFModuleDefinition.cpp - Simple DEF parser ---------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // Windows-specific.
10 // A parser for the module-definition file (.def file).
11 //
12 // The format of module-definition files are described in this document:
13 // https://msdn.microsoft.com/en-us/library/28d6s79h.aspx
14 //
15 //===----------------------------------------------------------------------===//
16 
18 #include "llvm/ADT/StringRef.h"
19 #include "llvm/ADT/StringSwitch.h"
20 #include "llvm/Object/COFF.h"
22 #include "llvm/Object/Error.h"
23 #include "llvm/Support/Error.h"
24 #include "llvm/Support/Path.h"
26 
27 using namespace llvm::COFF;
28 using namespace llvm;
29 
30 namespace llvm {
31 namespace object {
32 
33 enum Kind {
34  Unknown,
35  Eof,
51 };
52 
53 struct Token {
54  explicit Token(Kind T = Unknown, StringRef S = "") : K(T), Value(S) {}
57 };
58 
59 static bool isDecorated(StringRef Sym, bool MingwDef) {
60  // In def files, the symbols can either be listed decorated or undecorated.
61  //
62  // - For cdecl symbols, only the undecorated form is allowed.
63  // - For fastcall and vectorcall symbols, both fully decorated or
64  // undecorated forms can be present.
65  // - For stdcall symbols in non-MinGW environments, the decorated form is
66  // fully decorated with leading underscore and trailing stack argument
67  // size - like "_Func@0".
68  // - In MinGW def files, a decorated stdcall symbol does not include the
69  // leading underscore though, like "Func@0".
70 
71  // This function controls whether a leading underscore should be added to
72  // the given symbol name or not. For MinGW, treat a stdcall symbol name such
73  // as "Func@0" as undecorated, i.e. a leading underscore must be added.
74  // For non-MinGW, look for '@' in the whole string and consider "_Func@0"
75  // as decorated, i.e. don't add any more leading underscores.
76  // We can't check for a leading underscore here, since function names
77  // themselves can start with an underscore, while a second one still needs
78  // to be added.
79  return Sym.startswith("@") || Sym.contains("@@") || Sym.startswith("?") ||
80  (!MingwDef && Sym.contains('@'));
81 }
82 
83 class Lexer {
84 public:
85  Lexer(StringRef S) : Buf(S) {}
86 
87  Token lex() {
88  Buf = Buf.trim();
89  if (Buf.empty())
90  return Token(Eof);
91 
92  switch (Buf[0]) {
93  case '\0':
94  return Token(Eof);
95  case ';': {
96  size_t End = Buf.find('\n');
97  Buf = (End == Buf.npos) ? "" : Buf.drop_front(End);
98  return lex();
99  }
100  case '=':
101  Buf = Buf.drop_front();
102  if (Buf.startswith("=")) {
103  Buf = Buf.drop_front();
104  return Token(EqualEqual, "==");
105  }
106  return Token(Equal, "=");
107  case ',':
108  Buf = Buf.drop_front();
109  return Token(Comma, ",");
110  case '"': {
111  StringRef S;
112  std::tie(S, Buf) = Buf.substr(1).split('"');
113  return Token(Identifier, S);
114  }
115  default: {
116  size_t End = Buf.find_first_of("=,;\r\n \t\v");
117  StringRef Word = Buf.substr(0, End);
119  .Case("BASE", KwBase)
120  .Case("CONSTANT", KwConstant)
121  .Case("DATA", KwData)
122  .Case("EXPORTS", KwExports)
123  .Case("HEAPSIZE", KwHeapsize)
124  .Case("LIBRARY", KwLibrary)
125  .Case("NAME", KwName)
126  .Case("NONAME", KwNoname)
127  .Case("PRIVATE", KwPrivate)
128  .Case("STACKSIZE", KwStacksize)
129  .Case("VERSION", KwVersion)
131  Buf = (End == Buf.npos) ? "" : Buf.drop_front(End);
132  return Token(K, Word);
133  }
134  }
135  }
136 
137 private:
138  StringRef Buf;
139 };
140 
141 class Parser {
142 public:
143  explicit Parser(StringRef S, MachineTypes M, bool B)
144  : Lex(S), Machine(M), MingwDef(B) {}
145 
147  do {
148  if (Error Err = parseOne())
149  return std::move(Err);
150  } while (Tok.K != Eof);
151  return Info;
152  }
153 
154 private:
155  void read() {
156  if (Stack.empty()) {
157  Tok = Lex.lex();
158  return;
159  }
160  Tok = Stack.back();
161  Stack.pop_back();
162  }
163 
164  Error readAsInt(uint64_t *I) {
165  read();
166  if (Tok.K != Identifier || Tok.Value.getAsInteger(10, *I))
167  return createError("integer expected");
168  return Error::success();
169  }
170 
171  Error expect(Kind Expected, StringRef Msg) {
172  read();
173  if (Tok.K != Expected)
174  return createError(Msg);
175  return Error::success();
176  }
177 
178  void unget() { Stack.push_back(Tok); }
179 
180  Error parseOne() {
181  read();
182  switch (Tok.K) {
183  case Eof:
184  return Error::success();
185  case KwExports:
186  for (;;) {
187  read();
188  if (Tok.K != Identifier) {
189  unget();
190  return Error::success();
191  }
192  if (Error Err = parseExport())
193  return Err;
194  }
195  case KwHeapsize:
196  return parseNumbers(&Info.HeapReserve, &Info.HeapCommit);
197  case KwStacksize:
198  return parseNumbers(&Info.StackReserve, &Info.StackCommit);
199  case KwLibrary:
200  case KwName: {
201  bool IsDll = Tok.K == KwLibrary; // Check before parseName.
202  std::string Name;
203  if (Error Err = parseName(&Name, &Info.ImageBase))
204  return Err;
205 
206  Info.ImportName = Name;
207 
208  // Set the output file, but don't override /out if it was already passed.
209  if (Info.OutputFile.empty()) {
210  Info.OutputFile = Name;
211  // Append the appropriate file extension if not already present.
213  Info.OutputFile += IsDll ? ".dll" : ".exe";
214  }
215 
216  return Error::success();
217  }
218  case KwVersion:
219  return parseVersion(&Info.MajorImageVersion, &Info.MinorImageVersion);
220  default:
221  return createError("unknown directive: " + Tok.Value);
222  }
223  }
224 
225  Error parseExport() {
226  COFFShortExport E;
227  E.Name = std::string(Tok.Value);
228  read();
229  if (Tok.K == Equal) {
230  read();
231  if (Tok.K != Identifier)
232  return createError("identifier expected, but got " + Tok.Value);
233  E.ExtName = E.Name;
234  E.Name = std::string(Tok.Value);
235  } else {
236  unget();
237  }
238 
240  if (!isDecorated(E.Name, MingwDef))
241  E.Name = (std::string("_").append(E.Name));
242  if (!E.ExtName.empty() && !isDecorated(E.ExtName, MingwDef))
243  E.ExtName = (std::string("_").append(E.ExtName));
244  }
245 
246  for (;;) {
247  read();
248  if (Tok.K == Identifier && Tok.Value[0] == '@') {
249  if (Tok.Value == "@") {
250  // "foo @ 10"
251  read();
252  Tok.Value.getAsInteger(10, E.Ordinal);
253  } else if (Tok.Value.drop_front().getAsInteger(10, E.Ordinal)) {
254  // "foo \n @bar" - Not an ordinal modifier at all, but the next
255  // export (fastcall decorated) - complete the current one.
256  unget();
257  Info.Exports.push_back(E);
258  return Error::success();
259  }
260  // "foo @10"
261  read();
262  if (Tok.K == KwNoname) {
263  E.Noname = true;
264  } else {
265  unget();
266  }
267  continue;
268  }
269  if (Tok.K == KwData) {
270  E.Data = true;
271  continue;
272  }
273  if (Tok.K == KwConstant) {
274  E.Constant = true;
275  continue;
276  }
277  if (Tok.K == KwPrivate) {
278  E.Private = true;
279  continue;
280  }
281  if (Tok.K == EqualEqual) {
282  read();
283  E.AliasTarget = std::string(Tok.Value);
284  if (Machine == IMAGE_FILE_MACHINE_I386 && !isDecorated(E.AliasTarget, MingwDef))
285  E.AliasTarget = std::string("_").append(E.AliasTarget);
286  continue;
287  }
288  unget();
289  Info.Exports.push_back(E);
290  return Error::success();
291  }
292  }
293 
294  // HEAPSIZE/STACKSIZE reserve[,commit]
295  Error parseNumbers(uint64_t *Reserve, uint64_t *Commit) {
296  if (Error Err = readAsInt(Reserve))
297  return Err;
298  read();
299  if (Tok.K != Comma) {
300  unget();
301  Commit = nullptr;
302  return Error::success();
303  }
304  if (Error Err = readAsInt(Commit))
305  return Err;
306  return Error::success();
307  }
308 
309  // NAME outputPath [BASE=address]
310  Error parseName(std::string *Out, uint64_t *Baseaddr) {
311  read();
312  if (Tok.K == Identifier) {
313  *Out = std::string(Tok.Value);
314  } else {
315  *Out = "";
316  unget();
317  return Error::success();
318  }
319  read();
320  if (Tok.K == KwBase) {
321  if (Error Err = expect(Equal, "'=' expected"))
322  return Err;
323  if (Error Err = readAsInt(Baseaddr))
324  return Err;
325  } else {
326  unget();
327  *Baseaddr = 0;
328  }
329  return Error::success();
330  }
331 
332  // VERSION major[.minor]
333  Error parseVersion(uint32_t *Major, uint32_t *Minor) {
334  read();
335  if (Tok.K != Identifier)
336  return createError("identifier expected, but got " + Tok.Value);
337  StringRef V1, V2;
338  std::tie(V1, V2) = Tok.Value.split('.');
339  if (V1.getAsInteger(10, *Major))
340  return createError("integer expected, but got " + Tok.Value);
341  if (V2.empty())
342  *Minor = 0;
343  else if (V2.getAsInteger(10, *Minor))
344  return createError("integer expected, but got " + Tok.Value);
345  return Error::success();
346  }
347 
348  Lexer Lex;
349  Token Tok;
350  std::vector<Token> Stack;
352  COFFModuleDefinition Info;
353  bool MingwDef;
354 };
355 
358  bool MingwDef) {
359  return Parser(MB.getBuffer(), Machine, MingwDef).parse();
360 }
361 
362 } // namespace object
363 } // namespace llvm
llvm::StringSwitch::Case
StringSwitch & Case(StringLiteral S, T Value)
Definition: StringSwitch.h:67
llvm::StringRef::startswith
LLVM_NODISCARD bool startswith(StringRef Prefix) const
Check if this string starts with the given Prefix.
Definition: StringRef.h:286
llvm::object::Lexer::Lexer
Lexer(StringRef S)
Definition: COFFModuleDefinition.cpp:85
llvm::object::Equal
@ Equal
Definition: COFFModuleDefinition.cpp:38
llvm::object::Kind
Kind
Definition: COFFModuleDefinition.cpp:33
llvm
This file implements support for optimizing divisions by a constant.
Definition: AllocatorList.h:23
M
We currently emits eax Perhaps this is what we really should generate is Is imull three or four cycles eax eax The current instruction priority is based on pattern complexity The former is more complex because it folds a load so the latter will not be emitted Perhaps we should use AddedComplexity to give LEA32r a higher priority We should always try to match LEA first since the LEA matching code does some estimate to determine whether the match is profitable if we care more about code then imull is better It s two bytes shorter than movl leal On a Pentium M
Definition: README.txt:252
COFFModuleDefinition.h
T
StringRef.h
llvm::StringSwitch::Default
LLVM_NODISCARD R Default(T Value)
Definition: StringSwitch.h:181
Path.h
llvm::Error::success
static ErrorSuccess success()
Create a success value.
Definition: Error.h:331
Error.h
COFFImportFile.h
llvm::object::Eof
@ Eof
Definition: COFFModuleDefinition.cpp:35
llvm::object::KwData
@ KwData
Definition: COFFModuleDefinition.cpp:42
llvm::Expected
Tagged union holding either a T or a Error.
Definition: APFloat.h:42
llvm::MemoryBufferRef
Definition: MemoryBufferRef.h:22
llvm::object::KwLibrary
@ KwLibrary
Definition: COFFModuleDefinition.cpp:45
llvm::object::Parser
Definition: COFFModuleDefinition.cpp:141
llvm::object::Token::K
Kind K
Definition: COFFModuleDefinition.cpp:55
llvm::COFF::IMAGE_FILE_MACHINE_I386
@ IMAGE_FILE_MACHINE_I386
Definition: COFF.h:103
Error.h
llvm::sys::path::has_extension
bool has_extension(const Twine &path, Style style=Style::native)
Has extension?
Definition: Path.cpp:660
E
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
llvm::createError
static Error createError(const Twine &Err)
Definition: APFloat.cpp:232
B
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
llvm::StringRef::contains
LLVM_NODISCARD bool contains(StringRef Other) const
Return true if the given string is a substring of *this, and false otherwise.
Definition: StringRef.h:462
Info
Analysis containing CSE Info
Definition: CSEInfo.cpp:27
llvm::StringRef::getAsInteger
std::enable_if_t< std::numeric_limits< T >::is_signed, bool > getAsInteger(unsigned Radix, T &Result) const
Parse the current string as an integer of the specified radix.
Definition: StringRef.h:510
llvm::object::Lexer::lex
Token lex()
Definition: COFFModuleDefinition.cpp:87
object
bar al al movzbl eax ret Missed when stored in a memory object
Definition: README.txt:1411
llvm::object::KwExports
@ KwExports
Definition: COFFModuleDefinition.cpp:43
uint64_t
llvm::object::Token::Token
Token(Kind T=Unknown, StringRef S="")
Definition: COFFModuleDefinition.cpp:54
parseVersion
static Version parseVersion(StringRef Name)
Definition: CodeViewDebug.cpp:780
move
compiles ldr LCPI1_0 ldr ldr mov lsr tst moveq r1 ldr LCPI1_1 and r0 bx lr It would be better to do something like to fold the shift into the conditional move
Definition: README.txt:546
I
#define I(x, y, z)
Definition: MD5.cpp:59
llvm::object::Parser::parse
Expected< COFFModuleDefinition > parse()
Definition: COFFModuleDefinition.cpp:146
llvm::object::KwNoname
@ KwNoname
Definition: COFFModuleDefinition.cpp:47
llvm::pdb::PDB_MemoryType::Stack
@ Stack
llvm::object::Token
Definition: COFFModuleDefinition.cpp:53
llvm::MemoryBufferRef::getBuffer
StringRef getBuffer() const
Definition: MemoryBufferRef.h:32
llvm::irsymtab::storage::Word
support::ulittle32_t Word
Definition: IRSymtab.h:52
llvm::StringRef
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:58
uint32_t
S
add sub stmia L5 ldr r0 bl L_printf $stub Instead of a and a wouldn t it be better to do three moves *Return an aggregate type is even return S
Definition: README.txt:210
llvm::object::Comma
@ Comma
Definition: COFFModuleDefinition.cpp:37
llvm::object::Parser::Parser
Parser(StringRef S, MachineTypes M, bool B)
Definition: COFFModuleDefinition.cpp:143
llvm::NVPTX::PTXLdStInstCode::V2
@ V2
Definition: NVPTX.h:123
llvm::GraphProgram::Name
Name
Definition: GraphWriter.h:52
llvm::support::endian::read
value_type read(const void *memory, endianness endian)
Read a value of a particular endianness from memory.
Definition: Endian.h:63
llvm::object::EqualEqual
@ EqualEqual
Definition: COFFModuleDefinition.cpp:39
llvm::Error
Lightweight error class with error context and mandatory checking.
Definition: Error.h:157
llvm::COFF::MachineTypes
MachineTypes
Definition: COFF.h:93
llvm::object::Identifier
@ Identifier
Definition: COFFModuleDefinition.cpp:36
StringSwitch.h
Machine
COFF::MachineTypes Machine
Definition: COFFYAML.cpp:366
COFF.h
llvm::object::KwHeapsize
@ KwHeapsize
Definition: COFFModuleDefinition.cpp:44
llvm::object::KwBase
@ KwBase
Definition: COFFModuleDefinition.cpp:40
llvm::object::isDecorated
static bool isDecorated(StringRef Sym, bool MingwDef)
Definition: COFFModuleDefinition.cpp:59
llvm::object::KwPrivate
@ KwPrivate
Definition: COFFModuleDefinition.cpp:48
llvm::object::KwConstant
@ KwConstant
Definition: COFFModuleDefinition.cpp:41
llvm::StringSwitch
A switch()-like statement whose cases are string literals.
Definition: StringSwitch.h:42
llvm::object::Token::Value
StringRef Value
Definition: COFFModuleDefinition.cpp:56
raw_ostream.h
llvm::COFF
Definition: COFF.h:30
llvm::object::Lexer
Definition: COFFModuleDefinition.cpp:83
llvm::object::parseCOFFModuleDefinition
Expected< COFFModuleDefinition > parseCOFFModuleDefinition(MemoryBufferRef MB, COFF::MachineTypes Machine, bool MingwDef=false)
Definition: COFFModuleDefinition.cpp:356
llvm::Value
LLVM Value Representation.
Definition: Value.h:74
llvm::object::KwVersion
@ KwVersion
Definition: COFFModuleDefinition.cpp:50
llvm::object::KwStacksize
@ KwStacksize
Definition: COFFModuleDefinition.cpp:49
llvm::object::KwName
@ KwName
Definition: COFFModuleDefinition.cpp:46