LLVM  16.0.0git
COFFModuleDefinition.cpp
Go to the documentation of this file.
1 //===--- COFFModuleDefinition.cpp - Simple DEF parser ---------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // Windows-specific.
10 // A parser for the module-definition file (.def file).
11 //
12 // The format of module-definition files are described in this document:
13 // https://msdn.microsoft.com/en-us/library/28d6s79h.aspx
14 //
15 //===----------------------------------------------------------------------===//
16 
18 #include "llvm/ADT/StringRef.h"
19 #include "llvm/ADT/StringSwitch.h"
21 #include "llvm/Object/Error.h"
22 #include "llvm/Support/Error.h"
23 #include "llvm/Support/Path.h"
24 
25 using namespace llvm::COFF;
26 using namespace llvm;
27 
28 namespace llvm {
29 namespace object {
30 
31 enum Kind {
32  Unknown,
33  Eof,
49 };
50 
51 struct Token {
52  explicit Token(Kind T = Unknown, StringRef S = "") : K(T), Value(S) {}
55 };
56 
57 static bool isDecorated(StringRef Sym, bool MingwDef) {
58  // In def files, the symbols can either be listed decorated or undecorated.
59  //
60  // - For cdecl symbols, only the undecorated form is allowed.
61  // - For fastcall and vectorcall symbols, both fully decorated or
62  // undecorated forms can be present.
63  // - For stdcall symbols in non-MinGW environments, the decorated form is
64  // fully decorated with leading underscore and trailing stack argument
65  // size - like "_Func@0".
66  // - In MinGW def files, a decorated stdcall symbol does not include the
67  // leading underscore though, like "Func@0".
68 
69  // This function controls whether a leading underscore should be added to
70  // the given symbol name or not. For MinGW, treat a stdcall symbol name such
71  // as "Func@0" as undecorated, i.e. a leading underscore must be added.
72  // For non-MinGW, look for '@' in the whole string and consider "_Func@0"
73  // as decorated, i.e. don't add any more leading underscores.
74  // We can't check for a leading underscore here, since function names
75  // themselves can start with an underscore, while a second one still needs
76  // to be added.
77  return Sym.startswith("@") || Sym.contains("@@") || Sym.startswith("?") ||
78  (!MingwDef && Sym.contains('@'));
79 }
80 
81 class Lexer {
82 public:
83  Lexer(StringRef S) : Buf(S) {}
84 
85  Token lex() {
86  Buf = Buf.trim();
87  if (Buf.empty())
88  return Token(Eof);
89 
90  switch (Buf[0]) {
91  case '\0':
92  return Token(Eof);
93  case ';': {
94  size_t End = Buf.find('\n');
95  Buf = (End == Buf.npos) ? "" : Buf.drop_front(End);
96  return lex();
97  }
98  case '=':
99  Buf = Buf.drop_front();
100  if (Buf.startswith("=")) {
101  Buf = Buf.drop_front();
102  return Token(EqualEqual, "==");
103  }
104  return Token(Equal, "=");
105  case ',':
106  Buf = Buf.drop_front();
107  return Token(Comma, ",");
108  case '"': {
109  StringRef S;
110  std::tie(S, Buf) = Buf.substr(1).split('"');
111  return Token(Identifier, S);
112  }
113  default: {
114  size_t End = Buf.find_first_of("=,;\r\n \t\v");
115  StringRef Word = Buf.substr(0, End);
117  .Case("BASE", KwBase)
118  .Case("CONSTANT", KwConstant)
119  .Case("DATA", KwData)
120  .Case("EXPORTS", KwExports)
121  .Case("HEAPSIZE", KwHeapsize)
122  .Case("LIBRARY", KwLibrary)
123  .Case("NAME", KwName)
124  .Case("NONAME", KwNoname)
125  .Case("PRIVATE", KwPrivate)
126  .Case("STACKSIZE", KwStacksize)
127  .Case("VERSION", KwVersion)
129  Buf = (End == Buf.npos) ? "" : Buf.drop_front(End);
130  return Token(K, Word);
131  }
132  }
133  }
134 
135 private:
136  StringRef Buf;
137 };
138 
139 class Parser {
140 public:
141  explicit Parser(StringRef S, MachineTypes M, bool B)
142  : Lex(S), Machine(M), MingwDef(B) {}
143 
145  do {
146  if (Error Err = parseOne())
147  return std::move(Err);
148  } while (Tok.K != Eof);
149  return Info;
150  }
151 
152 private:
153  void read() {
154  if (Stack.empty()) {
155  Tok = Lex.lex();
156  return;
157  }
158  Tok = Stack.back();
159  Stack.pop_back();
160  }
161 
162  Error readAsInt(uint64_t *I) {
163  read();
164  if (Tok.K != Identifier || Tok.Value.getAsInteger(10, *I))
165  return createError("integer expected");
166  return Error::success();
167  }
168 
169  Error expect(Kind Expected, StringRef Msg) {
170  read();
171  if (Tok.K != Expected)
172  return createError(Msg);
173  return Error::success();
174  }
175 
176  void unget() { Stack.push_back(Tok); }
177 
178  Error parseOne() {
179  read();
180  switch (Tok.K) {
181  case Eof:
182  return Error::success();
183  case KwExports:
184  for (;;) {
185  read();
186  if (Tok.K != Identifier) {
187  unget();
188  return Error::success();
189  }
190  if (Error Err = parseExport())
191  return Err;
192  }
193  case KwHeapsize:
194  return parseNumbers(&Info.HeapReserve, &Info.HeapCommit);
195  case KwStacksize:
196  return parseNumbers(&Info.StackReserve, &Info.StackCommit);
197  case KwLibrary:
198  case KwName: {
199  bool IsDll = Tok.K == KwLibrary; // Check before parseName.
200  std::string Name;
201  if (Error Err = parseName(&Name, &Info.ImageBase))
202  return Err;
203 
204  Info.ImportName = Name;
205 
206  // Set the output file, but don't override /out if it was already passed.
207  if (Info.OutputFile.empty()) {
208  Info.OutputFile = Name;
209  // Append the appropriate file extension if not already present.
211  Info.OutputFile += IsDll ? ".dll" : ".exe";
212  }
213 
214  return Error::success();
215  }
216  case KwVersion:
217  return parseVersion(&Info.MajorImageVersion, &Info.MinorImageVersion);
218  default:
219  return createError("unknown directive: " + Tok.Value);
220  }
221  }
222 
223  Error parseExport() {
224  COFFShortExport E;
225  E.Name = std::string(Tok.Value);
226  read();
227  if (Tok.K == Equal) {
228  read();
229  if (Tok.K != Identifier)
230  return createError("identifier expected, but got " + Tok.Value);
231  E.ExtName = E.Name;
232  E.Name = std::string(Tok.Value);
233  } else {
234  unget();
235  }
236 
238  if (!isDecorated(E.Name, MingwDef))
239  E.Name = (std::string("_").append(E.Name));
240  if (!E.ExtName.empty() && !isDecorated(E.ExtName, MingwDef))
241  E.ExtName = (std::string("_").append(E.ExtName));
242  }
243 
244  for (;;) {
245  read();
246  if (Tok.K == Identifier && Tok.Value[0] == '@') {
247  if (Tok.Value == "@") {
248  // "foo @ 10"
249  read();
250  Tok.Value.getAsInteger(10, E.Ordinal);
251  } else if (Tok.Value.drop_front().getAsInteger(10, E.Ordinal)) {
252  // "foo \n @bar" - Not an ordinal modifier at all, but the next
253  // export (fastcall decorated) - complete the current one.
254  unget();
255  Info.Exports.push_back(E);
256  return Error::success();
257  }
258  // "foo @10"
259  read();
260  if (Tok.K == KwNoname) {
261  E.Noname = true;
262  } else {
263  unget();
264  }
265  continue;
266  }
267  if (Tok.K == KwData) {
268  E.Data = true;
269  continue;
270  }
271  if (Tok.K == KwConstant) {
272  E.Constant = true;
273  continue;
274  }
275  if (Tok.K == KwPrivate) {
276  E.Private = true;
277  continue;
278  }
279  if (Tok.K == EqualEqual) {
280  read();
281  E.AliasTarget = std::string(Tok.Value);
282  if (Machine == IMAGE_FILE_MACHINE_I386 && !isDecorated(E.AliasTarget, MingwDef))
283  E.AliasTarget = std::string("_").append(E.AliasTarget);
284  continue;
285  }
286  unget();
287  Info.Exports.push_back(E);
288  return Error::success();
289  }
290  }
291 
292  // HEAPSIZE/STACKSIZE reserve[,commit]
293  Error parseNumbers(uint64_t *Reserve, uint64_t *Commit) {
294  if (Error Err = readAsInt(Reserve))
295  return Err;
296  read();
297  if (Tok.K != Comma) {
298  unget();
299  Commit = nullptr;
300  return Error::success();
301  }
302  if (Error Err = readAsInt(Commit))
303  return Err;
304  return Error::success();
305  }
306 
307  // NAME outputPath [BASE=address]
308  Error parseName(std::string *Out, uint64_t *Baseaddr) {
309  read();
310  if (Tok.K == Identifier) {
311  *Out = std::string(Tok.Value);
312  } else {
313  *Out = "";
314  unget();
315  return Error::success();
316  }
317  read();
318  if (Tok.K == KwBase) {
319  if (Error Err = expect(Equal, "'=' expected"))
320  return Err;
321  if (Error Err = readAsInt(Baseaddr))
322  return Err;
323  } else {
324  unget();
325  *Baseaddr = 0;
326  }
327  return Error::success();
328  }
329 
330  // VERSION major[.minor]
331  Error parseVersion(uint32_t *Major, uint32_t *Minor) {
332  read();
333  if (Tok.K != Identifier)
334  return createError("identifier expected, but got " + Tok.Value);
335  StringRef V1, V2;
336  std::tie(V1, V2) = Tok.Value.split('.');
337  if (V1.getAsInteger(10, *Major))
338  return createError("integer expected, but got " + Tok.Value);
339  if (V2.empty())
340  *Minor = 0;
341  else if (V2.getAsInteger(10, *Minor))
342  return createError("integer expected, but got " + Tok.Value);
343  return Error::success();
344  }
345 
346  Lexer Lex;
347  Token Tok;
348  std::vector<Token> Stack;
350  COFFModuleDefinition Info;
351  bool MingwDef;
352 };
353 
356  bool MingwDef) {
357  return Parser(MB.getBuffer(), Machine, MingwDef).parse();
358 }
359 
360 } // namespace object
361 } // namespace llvm
llvm::StringSwitch::Case
StringSwitch & Case(StringLiteral S, T Value)
Definition: StringSwitch.h:69
llvm::object::Lexer::Lexer
Lexer(StringRef S)
Definition: COFFModuleDefinition.cpp:83
llvm::object::Equal
@ Equal
Definition: COFFModuleDefinition.cpp:36
llvm::object::Kind
Kind
Definition: COFFModuleDefinition.cpp:31
llvm
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
M
We currently emits eax Perhaps this is what we really should generate is Is imull three or four cycles eax eax The current instruction priority is based on pattern complexity The former is more complex because it folds a load so the latter will not be emitted Perhaps we should use AddedComplexity to give LEA32r a higher priority We should always try to match LEA first since the LEA matching code does some estimate to determine whether the match is profitable if we care more about code then imull is better It s two bytes shorter than movl leal On a Pentium M
Definition: README.txt:252
COFFModuleDefinition.h
T
StringRef.h
Path.h
llvm::Error::success
static ErrorSuccess success()
Create a success value.
Definition: Error.h:329
Error.h
COFFImportFile.h
llvm::object::Eof
@ Eof
Definition: COFFModuleDefinition.cpp:33
llvm::object::KwData
@ KwData
Definition: COFFModuleDefinition.cpp:40
llvm::Expected
Tagged union holding either a T or a Error.
Definition: APFloat.h:41
llvm::MemoryBufferRef
Definition: MemoryBufferRef.h:22
llvm::object::KwLibrary
@ KwLibrary
Definition: COFFModuleDefinition.cpp:43
llvm::object::Parser
Definition: COFFModuleDefinition.cpp:139
llvm::object::Token::K
Kind K
Definition: COFFModuleDefinition.cpp:53
llvm::COFF::IMAGE_FILE_MACHINE_I386
@ IMAGE_FILE_MACHINE_I386
Definition: COFF.h:102
Error.h
llvm::StringRef::startswith
bool startswith(StringRef Prefix) const
Check if this string starts with the given Prefix.
Definition: StringRef.h:248
llvm::sys::path::has_extension
bool has_extension(const Twine &path, Style style=Style::native)
Has extension?
Definition: Path.cpp:664
E
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
llvm::createError
static Error createError(const Twine &Err)
Definition: APFloat.cpp:232
B
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
Info
Analysis containing CSE Info
Definition: CSEInfo.cpp:27
llvm::StringRef::getAsInteger
std::enable_if_t< std::numeric_limits< T >::is_signed, bool > getAsInteger(unsigned Radix, T &Result) const
Parse the current string as an integer of the specified radix.
Definition: StringRef.h:455
llvm::object::Lexer::lex
Token lex()
Definition: COFFModuleDefinition.cpp:85
object
bar al al movzbl eax ret Missed when stored in a memory object
Definition: README.txt:1411
llvm::object::KwExports
@ KwExports
Definition: COFFModuleDefinition.cpp:41
uint64_t
llvm::object::Token::Token
Token(Kind T=Unknown, StringRef S="")
Definition: COFFModuleDefinition.cpp:52
parseVersion
static Version parseVersion(StringRef Name)
Definition: CodeViewDebug.cpp:814
move
compiles ldr LCPI1_0 ldr ldr mov lsr tst moveq r1 ldr LCPI1_1 and r0 bx lr It would be better to do something like to fold the shift into the conditional move
Definition: README.txt:546
I
#define I(x, y, z)
Definition: MD5.cpp:58
llvm::object::Parser::parse
Expected< COFFModuleDefinition > parse()
Definition: COFFModuleDefinition.cpp:144
llvm::object::KwNoname
@ KwNoname
Definition: COFFModuleDefinition.cpp:45
llvm::pdb::PDB_MemoryType::Stack
@ Stack
llvm::object::Token
Definition: COFFModuleDefinition.cpp:51
llvm::MemoryBufferRef::getBuffer
StringRef getBuffer() const
Definition: MemoryBufferRef.h:32
llvm::irsymtab::storage::Word
support::ulittle32_t Word
Definition: IRSymtab.h:52
llvm::StringRef
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:50
uint32_t
S
add sub stmia L5 ldr r0 bl L_printf $stub Instead of a and a wouldn t it be better to do three moves *Return an aggregate type is even return S
Definition: README.txt:210
llvm::object::Comma
@ Comma
Definition: COFFModuleDefinition.cpp:35
llvm::AMDGPU::SendMsg::Msg
const CustomOperand< const MCSubtargetInfo & > Msg[]
Definition: AMDGPUAsmUtils.cpp:39
llvm::object::Parser::Parser
Parser(StringRef S, MachineTypes M, bool B)
Definition: COFFModuleDefinition.cpp:141
llvm::NVPTX::PTXLdStInstCode::V2
@ V2
Definition: NVPTX.h:123
llvm::GraphProgram::Name
Name
Definition: GraphWriter.h:50
llvm::support::endian::read
value_type read(const void *memory, endianness endian)
Read a value of a particular endianness from memory.
Definition: Endian.h:63
llvm::object::EqualEqual
@ EqualEqual
Definition: COFFModuleDefinition.cpp:37
llvm::Error
Lightweight error class with error context and mandatory checking.
Definition: Error.h:155
llvm::COFF::MachineTypes
MachineTypes
Definition: COFF.h:92
llvm::object::Identifier
@ Identifier
Definition: COFFModuleDefinition.cpp:34
StringSwitch.h
llvm::StringRef::contains
bool contains(StringRef Other) const
Return true if the given string is a substring of *this, and false otherwise.
Definition: StringRef.h:407
Machine
COFF::MachineTypes Machine
Definition: COFFYAML.cpp:369
llvm::object::KwHeapsize
@ KwHeapsize
Definition: COFFModuleDefinition.cpp:42
llvm::object::KwBase
@ KwBase
Definition: COFFModuleDefinition.cpp:38
llvm::object::isDecorated
static bool isDecorated(StringRef Sym, bool MingwDef)
Definition: COFFModuleDefinition.cpp:57
llvm::object::KwPrivate
@ KwPrivate
Definition: COFFModuleDefinition.cpp:46
llvm::object::KwConstant
@ KwConstant
Definition: COFFModuleDefinition.cpp:39
llvm::StringSwitch::Default
R Default(T Value)
Definition: StringSwitch.h:182
llvm::StringSwitch
A switch()-like statement whose cases are string literals.
Definition: StringSwitch.h:44
llvm::object::Token::Value
StringRef Value
Definition: COFFModuleDefinition.cpp:54
llvm::COFF
Definition: COFF.h:29
llvm::object::Lexer
Definition: COFFModuleDefinition.cpp:81
llvm::object::parseCOFFModuleDefinition
Expected< COFFModuleDefinition > parseCOFFModuleDefinition(MemoryBufferRef MB, COFF::MachineTypes Machine, bool MingwDef=false)
Definition: COFFModuleDefinition.cpp:354
llvm::Value
LLVM Value Representation.
Definition: Value.h:74
llvm::object::KwVersion
@ KwVersion
Definition: COFFModuleDefinition.cpp:48
llvm::object::KwStacksize
@ KwStacksize
Definition: COFFModuleDefinition.cpp:47
llvm::object::KwName
@ KwName
Definition: COFFModuleDefinition.cpp:44