LLVM 17.0.0git
DLangDemangle.cpp
Go to the documentation of this file.
1//===--- DLangDemangle.cpp ------------------------------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8///
9/// \file
10/// This file defines a demangler for the D programming language as specified
11/// in the ABI specification, available at:
12/// https://dlang.org/spec/abi.html#name_mangling
13///
14//===----------------------------------------------------------------------===//
15
19
20#include <cctype>
21#include <cstring>
22#include <limits>
23
24using namespace llvm;
25using llvm::itanium_demangle::OutputBuffer;
26using llvm::itanium_demangle::StringView;
27
28namespace {
29
30/// Demangle information structure.
31struct Demangler {
32 /// Initialize the information structure we use to pass around information.
33 ///
34 /// \param Mangled String to demangle.
35 Demangler(const char *Mangled);
36
37 /// Extract and demangle the mangled symbol and append it to the output
38 /// string.
39 ///
40 /// \param Demangled Output buffer to write the demangled name.
41 ///
42 /// \return The remaining string on success or nullptr on failure.
43 ///
44 /// \see https://dlang.org/spec/abi.html#name_mangling .
45 /// \see https://dlang.org/spec/abi.html#MangledName .
46 const char *parseMangle(OutputBuffer *Demangled);
47
48private:
49 /// Extract and demangle a given mangled symbol and append it to the output
50 /// string.
51 ///
52 /// \param Demangled output buffer to write the demangled name.
53 /// \param Mangled mangled symbol to be demangled.
54 ///
55 /// \return The remaining string on success or nullptr on failure.
56 ///
57 /// \see https://dlang.org/spec/abi.html#name_mangling .
58 /// \see https://dlang.org/spec/abi.html#MangledName .
59 const char *parseMangle(OutputBuffer *Demangled, const char *Mangled);
60
61 /// Extract the number from a given string.
62 ///
63 /// \param Mangled string to extract the number.
64 /// \param Ret assigned result value.
65 ///
66 /// \return The remaining string on success or nullptr on failure.
67 ///
68 /// \note A result larger than UINT_MAX is considered a failure.
69 ///
70 /// \see https://dlang.org/spec/abi.html#Number .
71 const char *decodeNumber(const char *Mangled, unsigned long &Ret);
72
73 /// Extract the back reference position from a given string.
74 ///
75 /// \param Mangled string to extract the back reference position.
76 /// \param Ret assigned result value.
77 ///
78 /// \return the remaining string on success or nullptr on failure.
79 ///
80 /// \note Ret is always >= 0 on success, and unspecified on failure
81 ///
82 /// \see https://dlang.org/spec/abi.html#back_ref .
83 /// \see https://dlang.org/spec/abi.html#NumberBackRef .
84 const char *decodeBackrefPos(const char *Mangled, long &Ret);
85
86 /// Extract the symbol pointed by the back reference form a given string.
87 ///
88 /// \param Mangled string to extract the back reference position.
89 /// \param Ret assigned result value.
90 ///
91 /// \return the remaining string on success or nullptr on failure.
92 ///
93 /// \see https://dlang.org/spec/abi.html#back_ref .
94 const char *decodeBackref(const char *Mangled, const char *&Ret);
95
96 /// Extract and demangle backreferenced symbol from a given mangled symbol
97 /// and append it to the output string.
98 ///
99 /// \param Demangled output buffer to write the demangled name.
100 /// \param Mangled mangled symbol to be demangled.
101 ///
102 /// \return the remaining string on success or nullptr on failure.
103 ///
104 /// \see https://dlang.org/spec/abi.html#back_ref .
105 /// \see https://dlang.org/spec/abi.html#IdentifierBackRef .
106 const char *parseSymbolBackref(OutputBuffer *Demangled, const char *Mangled);
107
108 /// Extract and demangle backreferenced type from a given mangled symbol
109 /// and append it to the output string.
110 ///
111 /// \param Mangled mangled symbol to be demangled.
112 ///
113 /// \return the remaining string on success or nullptr on failure.
114 ///
115 /// \see https://dlang.org/spec/abi.html#back_ref .
116 /// \see https://dlang.org/spec/abi.html#TypeBackRef .
117 const char *parseTypeBackref(const char *Mangled);
118
119 /// Check whether it is the beginning of a symbol name.
120 ///
121 /// \param Mangled string to extract the symbol name.
122 ///
123 /// \return true on success, false otherwise.
124 ///
125 /// \see https://dlang.org/spec/abi.html#SymbolName .
126 bool isSymbolName(const char *Mangled);
127
128 /// Extract and demangle an identifier from a given mangled symbol append it
129 /// to the output string.
130 ///
131 /// \param Demangled Output buffer to write the demangled name.
132 /// \param Mangled Mangled symbol to be demangled.
133 ///
134 /// \return The remaining string on success or nullptr on failure.
135 ///
136 /// \see https://dlang.org/spec/abi.html#SymbolName .
137 const char *parseIdentifier(OutputBuffer *Demangled, const char *Mangled);
138
139 /// Extract and demangle the plain identifier from a given mangled symbol and
140 /// prepend/append it to the output string, with a special treatment for some
141 /// magic compiler generated symbols.
142 ///
143 /// \param Demangled Output buffer to write the demangled name.
144 /// \param Mangled Mangled symbol to be demangled.
145 /// \param Len Length of the mangled symbol name.
146 ///
147 /// \return The remaining string on success or nullptr on failure.
148 ///
149 /// \see https://dlang.org/spec/abi.html#LName .
150 const char *parseLName(OutputBuffer *Demangled, const char *Mangled,
151 unsigned long Len);
152
153 /// Extract and demangle the qualified symbol from a given mangled symbol
154 /// append it to the output string.
155 ///
156 /// \param Demangled Output buffer to write the demangled name.
157 /// \param Mangled Mangled symbol to be demangled.
158 ///
159 /// \return The remaining string on success or nullptr on failure.
160 ///
161 /// \see https://dlang.org/spec/abi.html#QualifiedName .
162 const char *parseQualified(OutputBuffer *Demangled, const char *Mangled);
163
164 /// Extract and demangle a type from a given mangled symbol append it to
165 /// the output string.
166 ///
167 /// \param Mangled mangled symbol to be demangled.
168 ///
169 /// \return the remaining string on success or nullptr on failure.
170 ///
171 /// \see https://dlang.org/spec/abi.html#Type .
172 const char *parseType(const char *Mangled);
173
174 /// The string we are demangling.
175 const char *Str;
176 /// The index of the last back reference.
177 int LastBackref;
178};
179
180} // namespace
181
182const char *Demangler::decodeNumber(const char *Mangled, unsigned long &Ret) {
183 // Return nullptr if trying to extract something that isn't a digit.
184 if (Mangled == nullptr || !std::isdigit(*Mangled))
185 return nullptr;
186
187 unsigned long Val = 0;
188
189 do {
190 unsigned long Digit = Mangled[0] - '0';
191
192 // Check for overflow.
193 if (Val > (std::numeric_limits<unsigned int>::max() - Digit) / 10)
194 return nullptr;
195
196 Val = Val * 10 + Digit;
197 ++Mangled;
198 } while (std::isdigit(*Mangled));
199
200 if (*Mangled == '\0')
201 return nullptr;
202
203 Ret = Val;
204 return Mangled;
205}
206
207const char *Demangler::decodeBackrefPos(const char *Mangled, long &Ret) {
208 // Return nullptr if trying to extract something that isn't a digit
209 if (Mangled == nullptr || !std::isalpha(*Mangled))
210 return nullptr;
211
212 // Any identifier or non-basic type that has been emitted to the mangled
213 // symbol before will not be emitted again, but is referenced by a special
214 // sequence encoding the relative position of the original occurrence in the
215 // mangled symbol name.
216 // Numbers in back references are encoded with base 26 by upper case letters
217 // A-Z for higher digits but lower case letters a-z for the last digit.
218 // NumberBackRef:
219 // [a-z]
220 // [A-Z] NumberBackRef
221 // ^
222 unsigned long Val = 0;
223
224 while (std::isalpha(*Mangled)) {
225 // Check for overflow
226 if (Val > (std::numeric_limits<unsigned long>::max() - 25) / 26)
227 break;
228
229 Val *= 26;
230
231 if (Mangled[0] >= 'a' && Mangled[0] <= 'z') {
232 Val += Mangled[0] - 'a';
233 if ((long)Val <= 0)
234 break;
235 Ret = Val;
236 return Mangled + 1;
237 }
238
239 Val += Mangled[0] - 'A';
240 ++Mangled;
241 }
242
243 return nullptr;
244}
245
246const char *Demangler::decodeBackref(const char *Mangled, const char *&Ret) {
247 assert(Mangled != nullptr && *Mangled == 'Q' && "Invalid back reference!");
248 Ret = nullptr;
249
250 // Position of 'Q'
251 const char *Qpos = Mangled;
252 long RefPos;
253 ++Mangled;
254
255 Mangled = decodeBackrefPos(Mangled, RefPos);
256 if (Mangled == nullptr)
257 return nullptr;
258
259 if (RefPos > Qpos - Str)
260 return nullptr;
261
262 // Set the position of the back reference.
263 Ret = Qpos - RefPos;
264
265 return Mangled;
266}
267
268const char *Demangler::parseSymbolBackref(OutputBuffer *Demangled,
269 const char *Mangled) {
270 // An identifier back reference always points to a digit 0 to 9.
271 // IdentifierBackRef:
272 // Q NumberBackRef
273 // ^
274 const char *Backref;
275 unsigned long Len;
276
277 // Get position of the back reference
278 Mangled = decodeBackref(Mangled, Backref);
279
280 // Must point to a simple identifier
281 Backref = decodeNumber(Backref, Len);
282 if (Backref == nullptr || strlen(Backref) < Len)
283 return nullptr;
284
285 Backref = parseLName(Demangled, Backref, Len);
286 if (Backref == nullptr)
287 return nullptr;
288
289 return Mangled;
290}
291
292const char *Demangler::parseTypeBackref(const char *Mangled) {
293 // A type back reference always points to a letter.
294 // TypeBackRef:
295 // Q NumberBackRef
296 // ^
297 const char *Backref;
298
299 // If we appear to be moving backwards through the mangle string, then
300 // bail as this may be a recursive back reference.
301 if (Mangled - Str >= LastBackref)
302 return nullptr;
303
304 int SaveRefPos = LastBackref;
305 LastBackref = Mangled - Str;
306
307 // Get position of the back reference.
308 Mangled = decodeBackref(Mangled, Backref);
309
310 // Can't decode back reference.
311 if (Backref == nullptr)
312 return nullptr;
313
314 // TODO: Add support for function type back references.
315 Backref = parseType(Backref);
316
317 LastBackref = SaveRefPos;
318
319 if (Backref == nullptr)
320 return nullptr;
321
322 return Mangled;
323}
324
325bool Demangler::isSymbolName(const char *Mangled) {
326 long Ret;
327 const char *Qref = Mangled;
328
329 if (std::isdigit(*Mangled))
330 return true;
331
332 // TODO: Handle template instances.
333
334 if (*Mangled != 'Q')
335 return false;
336
337 Mangled = decodeBackrefPos(Mangled + 1, Ret);
338 if (Mangled == nullptr || Ret > Qref - Str)
339 return false;
340
341 return std::isdigit(Qref[-Ret]);
342}
343
344const char *Demangler::parseMangle(OutputBuffer *Demangled,
345 const char *Mangled) {
346 // A D mangled symbol is comprised of both scope and type information.
347 // MangleName:
348 // _D QualifiedName Type
349 // _D QualifiedName Z
350 // ^
351 // The caller should have guaranteed that the start pointer is at the
352 // above location.
353 // Note that type is never a function type, but only the return type of
354 // a function or the type of a variable.
355 Mangled += 2;
356
357 Mangled = parseQualified(Demangled, Mangled);
358
359 if (Mangled != nullptr) {
360 // Artificial symbols end with 'Z' and have no type.
361 if (*Mangled == 'Z')
362 ++Mangled;
363 else {
364 Mangled = parseType(Mangled);
365 }
366 }
367
368 return Mangled;
369}
370
371const char *Demangler::parseQualified(OutputBuffer *Demangled,
372 const char *Mangled) {
373 // Qualified names are identifiers separated by their encoded length.
374 // Nested functions also encode their argument types without specifying
375 // what they return.
376 // QualifiedName:
377 // SymbolFunctionName
378 // SymbolFunctionName QualifiedName
379 // ^
380 // SymbolFunctionName:
381 // SymbolName
382 // SymbolName TypeFunctionNoReturn
383 // SymbolName M TypeFunctionNoReturn
384 // SymbolName M TypeModifiers TypeFunctionNoReturn
385 // The start pointer should be at the above location.
386
387 // Whether it has more than one symbol
388 size_t NotFirst = false;
389 do {
390 // Skip over anonymous symbols.
391 if (*Mangled == '0') {
392 do
393 ++Mangled;
394 while (*Mangled == '0');
395
396 continue;
397 }
398
399 if (NotFirst)
400 *Demangled << '.';
401 NotFirst = true;
402
403 Mangled = parseIdentifier(Demangled, Mangled);
404
405 } while (Mangled && isSymbolName(Mangled));
406
407 return Mangled;
408}
409
410const char *Demangler::parseIdentifier(OutputBuffer *Demangled,
411 const char *Mangled) {
412 unsigned long Len;
413
414 if (Mangled == nullptr || *Mangled == '\0')
415 return nullptr;
416
417 if (*Mangled == 'Q')
418 return parseSymbolBackref(Demangled, Mangled);
419
420 // TODO: Parse lengthless template instances.
421
422 const char *Endptr = decodeNumber(Mangled, Len);
423
424 if (Endptr == nullptr || Len == 0)
425 return nullptr;
426
427 if (strlen(Endptr) < Len)
428 return nullptr;
429
430 Mangled = Endptr;
431
432 // TODO: Parse template instances with a length prefix.
433
434 // There can be multiple different declarations in the same function that
435 // have the same mangled name. To make the mangled names unique, a fake
436 // parent in the form `__Sddd' is added to the symbol.
437 if (Len >= 4 && Mangled[0] == '_' && Mangled[1] == '_' && Mangled[2] == 'S') {
438 const char *NumPtr = Mangled + 3;
439 while (NumPtr < (Mangled + Len) && std::isdigit(*NumPtr))
440 ++NumPtr;
441
442 if (Mangled + Len == NumPtr) {
443 // Skip over the fake parent.
444 Mangled += Len;
445 return parseIdentifier(Demangled, Mangled);
446 }
447
448 // Else demangle it as a plain identifier.
449 }
450
451 return parseLName(Demangled, Mangled, Len);
452}
453
454const char *Demangler::parseType(const char *Mangled) {
455 if (*Mangled == '\0')
456 return nullptr;
457
458 switch (*Mangled) {
459 // TODO: Parse type qualifiers.
460 // TODO: Parse function types.
461 // TODO: Parse compound types.
462 // TODO: Parse delegate types.
463 // TODO: Parse tuple types.
464
465 // Basic types.
466 case 'i':
467 ++Mangled;
468 // TODO: Add type name dumping
469 return Mangled;
470
471 // TODO: Add support for the rest of the basic types.
472
473 // Back referenced type.
474 case 'Q':
475 return parseTypeBackref(Mangled);
476
477 default: // unhandled.
478 return nullptr;
479 }
480}
481
482const char *Demangler::parseLName(OutputBuffer *Demangled, const char *Mangled,
483 unsigned long Len) {
484 switch (Len) {
485 case 6:
486 if (strncmp(Mangled, "__initZ", Len + 1) == 0) {
487 // The static initializer for a given symbol.
488 Demangled->prepend("initializer for ");
489 Demangled->setCurrentPosition(Demangled->getCurrentPosition() - 1);
490 Mangled += Len;
491 return Mangled;
492 }
493 if (strncmp(Mangled, "__vtblZ", Len + 1) == 0) {
494 // The vtable symbol for a given class.
495 Demangled->prepend("vtable for ");
496 Demangled->setCurrentPosition(Demangled->getCurrentPosition() - 1);
497 Mangled += Len;
498 return Mangled;
499 }
500 break;
501
502 case 7:
503 if (strncmp(Mangled, "__ClassZ", Len + 1) == 0) {
504 // The classinfo symbol for a given class.
505 Demangled->prepend("ClassInfo for ");
506 Demangled->setCurrentPosition(Demangled->getCurrentPosition() - 1);
507 Mangled += Len;
508 return Mangled;
509 }
510 break;
511
512 case 11:
513 if (strncmp(Mangled, "__InterfaceZ", Len + 1) == 0) {
514 // The interface symbol for a given class.
515 Demangled->prepend("Interface for ");
516 Demangled->setCurrentPosition(Demangled->getCurrentPosition() - 1);
517 Mangled += Len;
518 return Mangled;
519 }
520 break;
521
522 case 12:
523 if (strncmp(Mangled, "__ModuleInfoZ", Len + 1) == 0) {
524 // The ModuleInfo symbol for a given module.
525 Demangled->prepend("ModuleInfo for ");
526 Demangled->setCurrentPosition(Demangled->getCurrentPosition() - 1);
527 Mangled += Len;
528 return Mangled;
529 }
530 break;
531 }
532
533 *Demangled << StringView(Mangled, Len);
534 Mangled += Len;
535
536 return Mangled;
537}
538
539Demangler::Demangler(const char *Mangled)
540 : Str(Mangled), LastBackref(strlen(Mangled)) {}
541
542const char *Demangler::parseMangle(OutputBuffer *Demangled) {
543 return parseMangle(Demangled, this->Str);
544}
545
546char *llvm::dlangDemangle(const char *MangledName) {
547 if (MangledName == nullptr || strncmp(MangledName, "_D", 2) != 0)
548 return nullptr;
549
550 OutputBuffer Demangled;
551 if (strcmp(MangledName, "_Dmain") == 0) {
552 Demangled << "D main";
553 } else {
554
555 Demangler D = Demangler(MangledName);
556 MangledName = D.parseMangle(&Demangled);
557
558 // Check that the entire symbol was successfully demangled.
559 if (MangledName == nullptr || *MangledName != '\0') {
560 std::free(Demangled.getBuffer());
561 return nullptr;
562 }
563 }
564
565 // OutputBuffer's internal buffer is not null terminated and therefore we need
566 // to add it to comply with C null terminated strings.
567 if (Demangled.getCurrentPosition() > 0) {
568 Demangled << '\0';
569 Demangled.setCurrentPosition(Demangled.getCurrentPosition() - 1);
570 return Demangled.getBuffer();
571 }
572
573 std::free(Demangled.getBuffer());
574 return nullptr;
575}
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")
itanium_demangle::ManglingParser< DefaultAllocator > Demangler
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
OutputBuffer & prepend(StringView R)
Definition: Utility.h:117
void setCurrentPosition(size_t NewPos)
Definition: Utility.h:167
size_t getCurrentPosition() const
Definition: Utility.h:166
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
char * dlangDemangle(const char *MangledName)
Type * parseType(StringRef Asm, SMDiagnostic &Err, const Module &M, const SlotMapping *Slots=nullptr)
Parse a type in the given string.
Definition: Parser.cpp:199