LLVM 17.0.0git
COFFModuleDefinition.cpp
Go to the documentation of this file.
1//===--- COFFModuleDefinition.cpp - Simple DEF parser ---------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// Windows-specific.
10// A parser for the module-definition file (.def file).
11//
12// The format of module-definition files are described in this document:
13// https://msdn.microsoft.com/en-us/library/28d6s79h.aspx
14//
15//===----------------------------------------------------------------------===//
16
18#include "llvm/ADT/StringRef.h"
21#include "llvm/Object/Error.h"
22#include "llvm/Support/Error.h"
23#include "llvm/Support/Path.h"
24
25using namespace llvm::COFF;
26using namespace llvm;
27
28namespace llvm {
29namespace object {
30
31enum Kind {
49};
50
51struct Token {
52 explicit Token(Kind T = Unknown, StringRef S = "") : K(T), Value(S) {}
55};
56
57static bool isDecorated(StringRef Sym, bool MingwDef) {
58 // In def files, the symbols can either be listed decorated or undecorated.
59 //
60 // - For cdecl symbols, only the undecorated form is allowed.
61 // - For fastcall and vectorcall symbols, both fully decorated or
62 // undecorated forms can be present.
63 // - For stdcall symbols in non-MinGW environments, the decorated form is
64 // fully decorated with leading underscore and trailing stack argument
65 // size - like "_Func@0".
66 // - In MinGW def files, a decorated stdcall symbol does not include the
67 // leading underscore though, like "Func@0".
68
69 // This function controls whether a leading underscore should be added to
70 // the given symbol name or not. For MinGW, treat a stdcall symbol name such
71 // as "Func@0" as undecorated, i.e. a leading underscore must be added.
72 // For non-MinGW, look for '@' in the whole string and consider "_Func@0"
73 // as decorated, i.e. don't add any more leading underscores.
74 // We can't check for a leading underscore here, since function names
75 // themselves can start with an underscore, while a second one still needs
76 // to be added.
77 return Sym.startswith("@") || Sym.contains("@@") || Sym.startswith("?") ||
78 (!MingwDef && Sym.contains('@'));
79}
80
81class Lexer {
82public:
83 Lexer(StringRef S) : Buf(S) {}
84
86 Buf = Buf.trim();
87 if (Buf.empty())
88 return Token(Eof);
89
90 switch (Buf[0]) {
91 case '\0':
92 return Token(Eof);
93 case ';': {
94 size_t End = Buf.find('\n');
95 Buf = (End == Buf.npos) ? "" : Buf.drop_front(End);
96 return lex();
97 }
98 case '=':
99 Buf = Buf.drop_front();
100 if (Buf.startswith("=")) {
101 Buf = Buf.drop_front();
102 return Token(EqualEqual, "==");
103 }
104 return Token(Equal, "=");
105 case ',':
106 Buf = Buf.drop_front();
107 return Token(Comma, ",");
108 case '"': {
109 StringRef S;
110 std::tie(S, Buf) = Buf.substr(1).split('"');
111 return Token(Identifier, S);
112 }
113 default: {
114 size_t End = Buf.find_first_of("=,;\r\n \t\v");
115 StringRef Word = Buf.substr(0, End);
117 .Case("BASE", KwBase)
118 .Case("CONSTANT", KwConstant)
119 .Case("DATA", KwData)
120 .Case("EXPORTS", KwExports)
121 .Case("HEAPSIZE", KwHeapsize)
122 .Case("LIBRARY", KwLibrary)
123 .Case("NAME", KwName)
124 .Case("NONAME", KwNoname)
125 .Case("PRIVATE", KwPrivate)
126 .Case("STACKSIZE", KwStacksize)
127 .Case("VERSION", KwVersion)
129 Buf = (End == Buf.npos) ? "" : Buf.drop_front(End);
130 return Token(K, Word);
131 }
132 }
133 }
134
135private:
136 StringRef Buf;
137};
138
139class Parser {
140public:
141 explicit Parser(StringRef S, MachineTypes M, bool B)
142 : Lex(S), Machine(M), MingwDef(B) {}
143
145 do {
146 if (Error Err = parseOne())
147 return std::move(Err);
148 } while (Tok.K != Eof);
149 return Info;
150 }
151
152private:
153 void read() {
154 if (Stack.empty()) {
155 Tok = Lex.lex();
156 return;
157 }
158 Tok = Stack.back();
159 Stack.pop_back();
160 }
161
162 Error readAsInt(uint64_t *I) {
163 read();
164 if (Tok.K != Identifier || Tok.Value.getAsInteger(10, *I))
165 return createError("integer expected");
166 return Error::success();
167 }
168
169 Error expect(Kind Expected, StringRef Msg) {
170 read();
171 if (Tok.K != Expected)
172 return createError(Msg);
173 return Error::success();
174 }
175
176 void unget() { Stack.push_back(Tok); }
177
178 Error parseOne() {
179 read();
180 switch (Tok.K) {
181 case Eof:
182 return Error::success();
183 case KwExports:
184 for (;;) {
185 read();
186 if (Tok.K != Identifier) {
187 unget();
188 return Error::success();
189 }
190 if (Error Err = parseExport())
191 return Err;
192 }
193 case KwHeapsize:
194 return parseNumbers(&Info.HeapReserve, &Info.HeapCommit);
195 case KwStacksize:
196 return parseNumbers(&Info.StackReserve, &Info.StackCommit);
197 case KwLibrary:
198 case KwName: {
199 bool IsDll = Tok.K == KwLibrary; // Check before parseName.
200 std::string Name;
201 if (Error Err = parseName(&Name, &Info.ImageBase))
202 return Err;
203
204 Info.ImportName = Name;
205
206 // Set the output file, but don't override /out if it was already passed.
207 if (Info.OutputFile.empty()) {
208 Info.OutputFile = Name;
209 // Append the appropriate file extension if not already present.
211 Info.OutputFile += IsDll ? ".dll" : ".exe";
212 }
213
214 return Error::success();
215 }
216 case KwVersion:
217 return parseVersion(&Info.MajorImageVersion, &Info.MinorImageVersion);
218 default:
219 return createError("unknown directive: " + Tok.Value);
220 }
221 }
222
223 Error parseExport() {
224 COFFShortExport E;
225 E.Name = std::string(Tok.Value);
226 read();
227 if (Tok.K == Equal) {
228 read();
229 if (Tok.K != Identifier)
230 return createError("identifier expected, but got " + Tok.Value);
231 E.ExtName = E.Name;
232 E.Name = std::string(Tok.Value);
233 } else {
234 unget();
235 }
236
237 if (Machine == IMAGE_FILE_MACHINE_I386) {
238 if (!isDecorated(E.Name, MingwDef))
239 E.Name = (std::string("_").append(E.Name));
240 if (!E.ExtName.empty() && !isDecorated(E.ExtName, MingwDef))
241 E.ExtName = (std::string("_").append(E.ExtName));
242 }
243
244 for (;;) {
245 read();
246 if (Tok.K == Identifier && Tok.Value[0] == '@') {
247 if (Tok.Value == "@") {
248 // "foo @ 10"
249 read();
250 Tok.Value.getAsInteger(10, E.Ordinal);
251 } else if (Tok.Value.drop_front().getAsInteger(10, E.Ordinal)) {
252 // "foo \n @bar" - Not an ordinal modifier at all, but the next
253 // export (fastcall decorated) - complete the current one.
254 unget();
255 Info.Exports.push_back(E);
256 return Error::success();
257 }
258 // "foo @10"
259 read();
260 if (Tok.K == KwNoname) {
261 E.Noname = true;
262 } else {
263 unget();
264 }
265 continue;
266 }
267 if (Tok.K == KwData) {
268 E.Data = true;
269 continue;
270 }
271 if (Tok.K == KwConstant) {
272 E.Constant = true;
273 continue;
274 }
275 if (Tok.K == KwPrivate) {
276 E.Private = true;
277 continue;
278 }
279 if (Tok.K == EqualEqual) {
280 read();
281 E.AliasTarget = std::string(Tok.Value);
282 if (Machine == IMAGE_FILE_MACHINE_I386 && !isDecorated(E.AliasTarget, MingwDef))
283 E.AliasTarget = std::string("_").append(E.AliasTarget);
284 continue;
285 }
286 unget();
287 Info.Exports.push_back(E);
288 return Error::success();
289 }
290 }
291
292 // HEAPSIZE/STACKSIZE reserve[,commit]
293 Error parseNumbers(uint64_t *Reserve, uint64_t *Commit) {
294 if (Error Err = readAsInt(Reserve))
295 return Err;
296 read();
297 if (Tok.K != Comma) {
298 unget();
299 Commit = nullptr;
300 return Error::success();
301 }
302 if (Error Err = readAsInt(Commit))
303 return Err;
304 return Error::success();
305 }
306
307 // NAME outputPath [BASE=address]
308 Error parseName(std::string *Out, uint64_t *Baseaddr) {
309 read();
310 if (Tok.K == Identifier) {
311 *Out = std::string(Tok.Value);
312 } else {
313 *Out = "";
314 unget();
315 return Error::success();
316 }
317 read();
318 if (Tok.K == KwBase) {
319 if (Error Err = expect(Equal, "'=' expected"))
320 return Err;
321 if (Error Err = readAsInt(Baseaddr))
322 return Err;
323 } else {
324 unget();
325 *Baseaddr = 0;
326 }
327 return Error::success();
328 }
329
330 // VERSION major[.minor]
331 Error parseVersion(uint32_t *Major, uint32_t *Minor) {
332 read();
333 if (Tok.K != Identifier)
334 return createError("identifier expected, but got " + Tok.Value);
335 StringRef V1, V2;
336 std::tie(V1, V2) = Tok.Value.split('.');
337 if (V1.getAsInteger(10, *Major))
338 return createError("integer expected, but got " + Tok.Value);
339 if (V2.empty())
340 *Minor = 0;
341 else if (V2.getAsInteger(10, *Minor))
342 return createError("integer expected, but got " + Tok.Value);
343 return Error::success();
344 }
345
346 Lexer Lex;
347 Token Tok;
348 std::vector<Token> Stack;
349 MachineTypes Machine;
350 COFFModuleDefinition Info;
351 bool MingwDef;
352};
353
355 MachineTypes Machine,
356 bool MingwDef) {
357 return Parser(MB.getBuffer(), Machine, MingwDef).parse();
358}
359
360} // namespace object
361} // namespace llvm
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
std::string Name
bool End
Definition: ELF_riscv.cpp:464
Symbol * Sym
Definition: ELF_riscv.cpp:463
#define I(x, y, z)
Definition: MD5.cpp:58
This file implements the StringSwitch template, which mimics a switch() statement whose cases are str...
Lightweight error class with error context and mandatory checking.
Definition: Error.h:156
static ErrorSuccess success()
Create a success value.
Definition: Error.h:330
Tagged union holding either a T or a Error.
Definition: Error.h:470
StringRef getBuffer() const
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:50
std::pair< StringRef, StringRef > split(char Separator) const
Split into two substrings around the first occurrence of a separator character.
Definition: StringRef.h:704
bool getAsInteger(unsigned Radix, T &Result) const
Parse the current string as an integer of the specified radix.
Definition: StringRef.h:474
constexpr StringRef substr(size_t Start, size_t N=npos) const
Return a reference to the substring from [Start, Start + N).
Definition: StringRef.h:575
constexpr bool empty() const
empty - Check if the string is empty.
Definition: StringRef.h:134
StringRef drop_front(size_t N=1) const
Return a StringRef equal to 'this' but with the first N elements dropped.
Definition: StringRef.h:613
bool startswith(StringRef Prefix) const
Definition: StringRef.h:261
size_t find_first_of(char C, size_t From=0) const
Find the first character in the string that is C, or npos if not found.
Definition: StringRef.h:381
size_t find(char C, size_t From=0) const
Search for the first character C in the string.
Definition: StringRef.h:301
StringRef trim(char Char) const
Return string with consecutive Char characters starting from the left and right removed.
Definition: StringRef.h:819
static constexpr size_t npos
Definition: StringRef.h:52
A switch()-like statement whose cases are string literals.
Definition: StringSwitch.h:44
StringSwitch & Case(StringLiteral S, T Value)
Definition: StringSwitch.h:69
R Default(T Value)
Definition: StringSwitch.h:182
LLVM Value Representation.
Definition: Value.h:74
Parser(StringRef S, MachineTypes M, bool B)
Expected< COFFModuleDefinition > parse()
MachineTypes
Definition: COFF.h:92
@ IMAGE_FILE_MACHINE_I386
Definition: COFF.h:104
Error createError(const Twine &Err)
Definition: Error.h:84
static bool isDecorated(StringRef Sym, bool MingwDef)
Expected< COFFModuleDefinition > parseCOFFModuleDefinition(MemoryBufferRef MB, COFF::MachineTypes Machine, bool MingwDef=false)
bool has_extension(const Twine &path, Style style=Style::native)
Has extension?
Definition: Path.cpp:665
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
std::vector< COFFShortExport > Exports
Token(Kind T=Unknown, StringRef S="")