LLVM 22.0.0git
MicrosoftDemangle.cpp
Go to the documentation of this file.
1//===- MicrosoftDemangle.cpp ----------------------------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file defines a demangler for MSVC-style mangled symbols.
10//
11// This file has no dependencies on the rest of LLVM so that it can be
12// easily reused in other programs such as libcxxabi.
13//
14//===----------------------------------------------------------------------===//
15
17
23
24#include <cctype>
25#include <cstdio>
26#include <optional>
27#include <string_view>
28#include <tuple>
29
30using namespace llvm;
31using namespace ms_demangle;
32
33static bool startsWithDigit(std::string_view S) {
34 return !S.empty() && std::isdigit(S.front());
35}
36
37struct NodeList {
38 Node *N = nullptr;
39 NodeList *Next = nullptr;
40};
41
42static bool consumeFront(std::string_view &S, char C) {
43 if (!llvm::itanium_demangle::starts_with(S, C))
44 return false;
45 S.remove_prefix(1);
46 return true;
47}
48
49static bool consumeFront(std::string_view &S, std::string_view C) {
50 if (!llvm::itanium_demangle::starts_with(S, C))
51 return false;
52 S.remove_prefix(C.size());
53 return true;
54}
55
56static bool consumeFront(std::string_view &S, std::string_view PrefixA,
57 std::string_view PrefixB, bool A) {
58 const std::string_view &Prefix = A ? PrefixA : PrefixB;
59 return consumeFront(S, Prefix);
60}
61
62static bool startsWith(std::string_view S, std::string_view PrefixA,
63 std::string_view PrefixB, bool A) {
64 const std::string_view &Prefix = A ? PrefixA : PrefixB;
65 return llvm::itanium_demangle::starts_with(S, Prefix);
66}
67
68bool Demangler::isMemberPointer(std::string_view MangledName, bool &Error) {
69 Error = false;
70 const char F = MangledName.front();
71 MangledName.remove_prefix(1);
72 switch (F) {
73 case '$':
74 // This is probably an rvalue reference (e.g. $$Q), and you cannot have an
75 // rvalue reference to a member.
76 return false;
77 case 'A':
78 // 'A' indicates a reference, and you cannot have a reference to a member
79 // function or member.
80 return false;
81 case 'P':
82 case 'Q':
83 case 'R':
84 case 'S':
85 // These 4 values indicate some kind of pointer, but we still don't know
86 // what.
87 break;
88 default:
89 // isMemberPointer() is called only if isPointerType() returns true,
90 // and it rejects other prefixes.
92 }
93
94 // If it starts with a number, then 6 indicates a non-member function
95 // pointer, and 8 indicates a member function pointer.
96 if (startsWithDigit(MangledName)) {
97 if (MangledName[0] != '6' && MangledName[0] != '8') {
98 Error = true;
99 return false;
100 }
101 return (MangledName[0] == '8');
102 }
103
104 // Remove ext qualifiers since those can appear on either type and are
105 // therefore not indicative.
106 consumeFront(MangledName, 'E'); // 64-bit
107 consumeFront(MangledName, 'I'); // restrict
108 consumeFront(MangledName, 'F'); // unaligned
109 demanglePointerAuthQualifier(MangledName);
110
111 if (MangledName.empty()) {
112 Error = true;
113 return false;
114 }
115
116 // The next value should be either ABCD (non-member) or QRST (member).
117 switch (MangledName.front()) {
118 case 'A':
119 case 'B':
120 case 'C':
121 case 'D':
122 return false;
123 case 'Q':
124 case 'R':
125 case 'S':
126 case 'T':
127 return true;
128 default:
129 Error = true;
130 return false;
131 }
132}
133
135consumeSpecialIntrinsicKind(std::string_view &MangledName) {
136 if (consumeFront(MangledName, "?_7"))
138 if (consumeFront(MangledName, "?_8"))
140 if (consumeFront(MangledName, "?_9"))
142 if (consumeFront(MangledName, "?_A"))
144 if (consumeFront(MangledName, "?_B"))
146 if (consumeFront(MangledName, "?_C"))
148 if (consumeFront(MangledName, "?_P"))
150 if (consumeFront(MangledName, "?_R0"))
152 if (consumeFront(MangledName, "?_R1"))
154 if (consumeFront(MangledName, "?_R2"))
156 if (consumeFront(MangledName, "?_R3"))
158 if (consumeFront(MangledName, "?_R4"))
160 if (consumeFront(MangledName, "?_S"))
162 if (consumeFront(MangledName, "?__E"))
164 if (consumeFront(MangledName, "?__F"))
166 if (consumeFront(MangledName, "?__J"))
169}
170
171static bool startsWithLocalScopePattern(std::string_view S) {
172 if (!consumeFront(S, '?'))
173 return false;
174
175 size_t End = S.find('?');
176 if (End == std::string_view::npos)
177 return false;
178 std::string_view Candidate = S.substr(0, End);
179 if (Candidate.empty())
180 return false;
181
182 // \?[0-9]\?
183 // ?@? is the discriminator 0.
184 if (Candidate.size() == 1)
185 return Candidate[0] == '@' || (Candidate[0] >= '0' && Candidate[0] <= '9');
186
187 // If it's not 0-9, then it's an encoded number terminated with an @
188 if (Candidate.back() != '@')
189 return false;
190 Candidate.remove_suffix(1);
191
192 // An encoded number starts with B-P and all subsequent digits are in A-P.
193 // Note that the reason the first digit cannot be A is two fold. First, it
194 // would create an ambiguity with ?A which delimits the beginning of an
195 // anonymous namespace. Second, A represents 0, and you don't start a multi
196 // digit number with a leading 0. Presumably the anonymous namespace
197 // ambiguity is also why single digit encoded numbers use 0-9 rather than A-J.
198 if (Candidate[0] < 'B' || Candidate[0] > 'P')
199 return false;
200 Candidate.remove_prefix(1);
201 while (!Candidate.empty()) {
202 if (Candidate[0] < 'A' || Candidate[0] > 'P')
203 return false;
204 Candidate.remove_prefix(1);
205 }
206
207 return true;
208}
209
210static bool isTagType(std::string_view S) {
211 switch (S.front()) {
212 case 'T': // union
213 case 'U': // struct
214 case 'V': // class
215 case 'W': // enum
216 return true;
217 }
218 return false;
219}
220
221static bool isCustomType(std::string_view S) { return S[0] == '?'; }
222
223static bool isPointerType(std::string_view S) {
224 if (llvm::itanium_demangle::starts_with(S, "$$Q")) // foo &&
225 return true;
226
227 switch (S.front()) {
228 case 'A': // foo &
229 case 'P': // foo *
230 case 'Q': // foo *const
231 case 'R': // foo *volatile
232 case 'S': // foo *const volatile
233 return true;
234 }
235 return false;
236}
237
238static bool isArrayType(std::string_view S) { return S[0] == 'Y'; }
239
240static bool isFunctionType(std::string_view S) {
241 return llvm::itanium_demangle::starts_with(S, "$$A8@@") ||
242 llvm::itanium_demangle::starts_with(S, "$$A6");
243}
244
246demangleFunctionRefQualifier(std::string_view &MangledName) {
247 if (consumeFront(MangledName, 'G'))
249 else if (consumeFront(MangledName, 'H'))
252}
253
254static std::pair<Qualifiers, PointerAffinity>
255demanglePointerCVQualifiers(std::string_view &MangledName) {
256 if (consumeFront(MangledName, "$$Q"))
257 return std::make_pair(Q_None, PointerAffinity::RValueReference);
258
259 const char F = MangledName.front();
260 MangledName.remove_prefix(1);
261 switch (F) {
262 case 'A':
263 return std::make_pair(Q_None, PointerAffinity::Reference);
264 case 'P':
265 return std::make_pair(Q_None, PointerAffinity::Pointer);
266 case 'Q':
267 return std::make_pair(Q_Const, PointerAffinity::Pointer);
268 case 'R':
269 return std::make_pair(Q_Volatile, PointerAffinity::Pointer);
270 case 'S':
271 return std::make_pair(Qualifiers(Q_Const | Q_Volatile),
273 }
274 // This function is only called if isPointerType() returns true,
275 // and it only returns true for the six cases listed above.
277}
278
280 size_t Count) {
282 N->Count = Count;
283 N->Nodes = Arena.allocArray<Node *>(Count);
284 for (size_t I = 0; I < Count; ++I) {
285 N->Nodes[I] = Head->N;
286 Head = Head->Next;
287 }
288 return N;
289}
290
291std::string_view Demangler::copyString(std::string_view Borrowed) {
292 char *Stable = Arena.allocUnalignedBuffer(Borrowed.size());
293 // This is not a micro-optimization, it avoids UB, should Borrowed be an null
294 // buffer.
295 if (Borrowed.size())
296 std::memcpy(Stable, Borrowed.data(), Borrowed.size());
297
298 return {Stable, Borrowed.size()};
299}
300
302Demangler::demangleSpecialTableSymbolNode(std::string_view &MangledName,
304 NamedIdentifierNode *NI = Arena.alloc<NamedIdentifierNode>();
305 switch (K) {
307 NI->Name = "`vftable'";
308 break;
310 NI->Name = "`vbtable'";
311 break;
313 NI->Name = "`local vftable'";
314 break;
316 NI->Name = "`RTTI Complete Object Locator'";
317 break;
318 default:
320 }
321 QualifiedNameNode *QN = demangleNameScopeChain(MangledName, NI);
322 SpecialTableSymbolNode *STSN = Arena.alloc<SpecialTableSymbolNode>();
323 STSN->Name = QN;
324 bool IsMember = false;
325 if (MangledName.empty()) {
326 Error = true;
327 return nullptr;
328 }
329 char Front = MangledName.front();
330 MangledName.remove_prefix(1);
331 if (Front != '6' && Front != '7') {
332 Error = true;
333 return nullptr;
334 }
335
336 std::tie(STSN->Quals, IsMember) = demangleQualifiers(MangledName);
337
338 NodeList *TargetCurrent = nullptr;
339 NodeList *TargetHead = nullptr;
340 size_t Count = 0;
341 while (!consumeFront(MangledName, '@')) {
342 ++Count;
343
344 NodeList *Next = Arena.alloc<NodeList>();
345 if (TargetCurrent)
346 TargetCurrent->Next = Next;
347 else
348 TargetHead = Next;
349
350 TargetCurrent = Next;
351 QualifiedNameNode *QN = demangleFullyQualifiedTypeName(MangledName);
352 if (Error)
353 return nullptr;
354 assert(QN);
355 TargetCurrent->N = QN;
356 }
357
358 if (Count > 0)
359 STSN->TargetNames = nodeListToNodeArray(Arena, TargetHead, Count);
360
361 return STSN;
362}
363
365Demangler::demangleLocalStaticGuard(std::string_view &MangledName,
366 bool IsThread) {
367 LocalStaticGuardIdentifierNode *LSGI =
368 Arena.alloc<LocalStaticGuardIdentifierNode>();
369 LSGI->IsThread = IsThread;
370 QualifiedNameNode *QN = demangleNameScopeChain(MangledName, LSGI);
371 LocalStaticGuardVariableNode *LSGVN =
372 Arena.alloc<LocalStaticGuardVariableNode>();
373 LSGVN->Name = QN;
374
375 if (consumeFront(MangledName, "4IA"))
376 LSGVN->IsVisible = false;
377 else if (consumeFront(MangledName, "5"))
378 LSGVN->IsVisible = true;
379 else {
380 Error = true;
381 return nullptr;
382 }
383
384 if (!MangledName.empty())
385 LSGI->ScopeIndex = demangleUnsigned(MangledName);
386 return LSGVN;
387}
388
390 std::string_view Name) {
392 Id->Name = Name;
393 return Id;
394}
395
397 IdentifierNode *Identifier) {
399 QN->Components = Arena.alloc<NodeArrayNode>();
400 QN->Components->Count = 1;
401 QN->Components->Nodes = Arena.allocArray<Node *>(1);
402 QN->Components->Nodes[0] = Identifier;
403 return QN;
404}
405
407 std::string_view Name) {
409 return synthesizeQualifiedName(Arena, Id);
410}
411
413 TypeNode *Type,
414 std::string_view VariableName) {
416 VSN->Type = Type;
417 VSN->Name = synthesizeQualifiedName(Arena, VariableName);
418 return VSN;
419}
420
422Demangler::demangleUntypedVariable(ArenaAllocator &Arena,
423 std::string_view &MangledName,
424 std::string_view VariableName) {
425 NamedIdentifierNode *NI = synthesizeNamedIdentifier(Arena, VariableName);
426 QualifiedNameNode *QN = demangleNameScopeChain(MangledName, NI);
427 VariableSymbolNode *VSN = Arena.alloc<VariableSymbolNode>();
428 VSN->Name = QN;
429 if (consumeFront(MangledName, "8"))
430 return VSN;
431
432 Error = true;
433 return nullptr;
434}
435
437Demangler::demangleRttiBaseClassDescriptorNode(ArenaAllocator &Arena,
438 std::string_view &MangledName) {
439 RttiBaseClassDescriptorNode *RBCDN =
440 Arena.alloc<RttiBaseClassDescriptorNode>();
441 RBCDN->NVOffset = demangleUnsigned(MangledName);
442 RBCDN->VBPtrOffset = demangleSigned(MangledName);
443 RBCDN->VBTableOffset = demangleUnsigned(MangledName);
444 RBCDN->Flags = demangleUnsigned(MangledName);
445 if (Error)
446 return nullptr;
447
448 VariableSymbolNode *VSN = Arena.alloc<VariableSymbolNode>();
449 VSN->Name = demangleNameScopeChain(MangledName, RBCDN);
450 consumeFront(MangledName, '8');
451 return VSN;
452}
453
455Demangler::demangleInitFiniStub(std::string_view &MangledName,
456 bool IsDestructor) {
457 DynamicStructorIdentifierNode *DSIN =
458 Arena.alloc<DynamicStructorIdentifierNode>();
459 DSIN->IsDestructor = IsDestructor;
460
461 bool IsKnownStaticDataMember = false;
462 if (consumeFront(MangledName, '?'))
463 IsKnownStaticDataMember = true;
464
465 SymbolNode *Symbol = demangleDeclarator(MangledName);
466 if (Error)
467 return nullptr;
468
469 FunctionSymbolNode *FSN = nullptr;
470
471 if (Symbol->kind() == NodeKind::VariableSymbol) {
472 DSIN->Variable = static_cast<VariableSymbolNode *>(Symbol);
473
474 // Older versions of clang mangled this type of symbol incorrectly. They
475 // would omit the leading ? and they would only emit a single @ at the end.
476 // The correct mangling is a leading ? and 2 trailing @ signs. Handle
477 // both cases.
478 int AtCount = IsKnownStaticDataMember ? 2 : 1;
479 for (int I = 0; I < AtCount; ++I) {
480 if (consumeFront(MangledName, '@'))
481 continue;
482 Error = true;
483 return nullptr;
484 }
485
486 FSN = demangleFunctionEncoding(MangledName);
487 if (FSN)
488 FSN->Name = synthesizeQualifiedName(Arena, DSIN);
489 } else {
490 if (IsKnownStaticDataMember) {
491 // This was supposed to be a static data member, but we got a function.
492 Error = true;
493 return nullptr;
494 }
495
496 FSN = static_cast<FunctionSymbolNode *>(Symbol);
497 DSIN->Name = Symbol->Name;
498 FSN->Name = synthesizeQualifiedName(Arena, DSIN);
499 }
500
501 return FSN;
502}
503
504SymbolNode *Demangler::demangleSpecialIntrinsic(std::string_view &MangledName) {
506
507 switch (SIK) {
509 return nullptr;
511 return demangleStringLiteral(MangledName);
516 return demangleSpecialTableSymbolNode(MangledName, SIK);
518 return demangleVcallThunkNode(MangledName);
520 return demangleLocalStaticGuard(MangledName, /*IsThread=*/false);
522 return demangleLocalStaticGuard(MangledName, /*IsThread=*/true);
524 TypeNode *T = demangleType(MangledName, QualifierMangleMode::Result);
525 if (Error)
526 break;
527 if (!consumeFront(MangledName, "@8"))
528 break;
529 if (!MangledName.empty())
530 break;
531 return synthesizeVariable(Arena, T, "`RTTI Type Descriptor'");
532 }
534 return demangleUntypedVariable(Arena, MangledName,
535 "`RTTI Base Class Array'");
537 return demangleUntypedVariable(Arena, MangledName,
538 "`RTTI Class Hierarchy Descriptor'");
540 return demangleRttiBaseClassDescriptorNode(Arena, MangledName);
542 return demangleInitFiniStub(MangledName, /*IsDestructor=*/false);
544 return demangleInitFiniStub(MangledName, /*IsDestructor=*/true);
547 // It's unclear which tools produces these manglings, so demangling
548 // support is not (yet?) implemented.
549 break;
551 DEMANGLE_UNREACHABLE; // Never returned by consumeSpecialIntrinsicKind.
552 }
553 Error = true;
554 return nullptr;
555}
556
558Demangler::demangleFunctionIdentifierCode(std::string_view &MangledName) {
559 assert(llvm::itanium_demangle::starts_with(MangledName, '?'));
560 MangledName.remove_prefix(1);
561 if (MangledName.empty()) {
562 Error = true;
563 return nullptr;
564 }
565
566 if (consumeFront(MangledName, "__"))
567 return demangleFunctionIdentifierCode(
569 if (consumeFront(MangledName, "_"))
570 return demangleFunctionIdentifierCode(MangledName,
572 return demangleFunctionIdentifierCode(MangledName,
574}
575
577Demangler::demangleStructorIdentifier(std::string_view &MangledName,
578 bool IsDestructor) {
579 StructorIdentifierNode *N = Arena.alloc<StructorIdentifierNode>();
580 N->IsDestructor = IsDestructor;
581 return N;
582}
583
585Demangler::demangleConversionOperatorIdentifier(std::string_view &MangledName) {
586 ConversionOperatorIdentifierNode *N =
587 Arena.alloc<ConversionOperatorIdentifierNode>();
588 return N;
589}
590
592Demangler::demangleLiteralOperatorIdentifier(std::string_view &MangledName) {
593 LiteralOperatorIdentifierNode *N =
594 Arena.alloc<LiteralOperatorIdentifierNode>();
595 N->Name = demangleSimpleString(MangledName, /*Memorize=*/false);
596 return N;
597}
598
600Demangler::translateIntrinsicFunctionCode(char CH,
602 using IFK = IntrinsicFunctionKind;
603 if (!(CH >= '0' && CH <= '9') && !(CH >= 'A' && CH <= 'Z')) {
604 Error = true;
605 return IFK::None;
606 }
607
608 // Not all ? identifiers are intrinsics *functions*. This function only maps
609 // operator codes for the special functions, all others are handled elsewhere,
610 // hence the IFK::None entries in the table.
611 static IFK Basic[36] = {
612 IFK::None, // ?0 # Foo::Foo()
613 IFK::None, // ?1 # Foo::~Foo()
614 IFK::New, // ?2 # operator new
615 IFK::Delete, // ?3 # operator delete
616 IFK::Assign, // ?4 # operator=
617 IFK::RightShift, // ?5 # operator>>
618 IFK::LeftShift, // ?6 # operator<<
619 IFK::LogicalNot, // ?7 # operator!
620 IFK::Equals, // ?8 # operator==
621 IFK::NotEquals, // ?9 # operator!=
622 IFK::ArraySubscript, // ?A # operator[]
623 IFK::None, // ?B # Foo::operator <type>()
624 IFK::Pointer, // ?C # operator->
625 IFK::Dereference, // ?D # operator*
626 IFK::Increment, // ?E # operator++
627 IFK::Decrement, // ?F # operator--
628 IFK::Minus, // ?G # operator-
629 IFK::Plus, // ?H # operator+
630 IFK::BitwiseAnd, // ?I # operator&
631 IFK::MemberPointer, // ?J # operator->*
632 IFK::Divide, // ?K # operator/
633 IFK::Modulus, // ?L # operator%
634 IFK::LessThan, // ?M operator<
635 IFK::LessThanEqual, // ?N operator<=
636 IFK::GreaterThan, // ?O operator>
637 IFK::GreaterThanEqual, // ?P operator>=
638 IFK::Comma, // ?Q operator,
639 IFK::Parens, // ?R operator()
640 IFK::BitwiseNot, // ?S operator~
641 IFK::BitwiseXor, // ?T operator^
642 IFK::BitwiseOr, // ?U operator|
643 IFK::LogicalAnd, // ?V operator&&
644 IFK::LogicalOr, // ?W operator||
645 IFK::TimesEqual, // ?X operator*=
646 IFK::PlusEqual, // ?Y operator+=
647 IFK::MinusEqual, // ?Z operator-=
648 };
649 static IFK Under[36] = {
650 IFK::DivEqual, // ?_0 operator/=
651 IFK::ModEqual, // ?_1 operator%=
652 IFK::RshEqual, // ?_2 operator>>=
653 IFK::LshEqual, // ?_3 operator<<=
654 IFK::BitwiseAndEqual, // ?_4 operator&=
655 IFK::BitwiseOrEqual, // ?_5 operator|=
656 IFK::BitwiseXorEqual, // ?_6 operator^=
657 IFK::None, // ?_7 # vftable
658 IFK::None, // ?_8 # vbtable
659 IFK::None, // ?_9 # vcall
660 IFK::None, // ?_A # typeof
661 IFK::None, // ?_B # local static guard
662 IFK::None, // ?_C # string literal
663 IFK::VbaseDtor, // ?_D # vbase destructor
664 IFK::VecDelDtor, // ?_E # vector deleting destructor
665 IFK::DefaultCtorClosure, // ?_F # default constructor closure
666 IFK::ScalarDelDtor, // ?_G # scalar deleting destructor
667 IFK::VecCtorIter, // ?_H # vector constructor iterator
668 IFK::VecDtorIter, // ?_I # vector destructor iterator
669 IFK::VecVbaseCtorIter, // ?_J # vector vbase constructor iterator
670 IFK::VdispMap, // ?_K # virtual displacement map
671 IFK::EHVecCtorIter, // ?_L # eh vector constructor iterator
672 IFK::EHVecDtorIter, // ?_M # eh vector destructor iterator
673 IFK::EHVecVbaseCtorIter, // ?_N # eh vector vbase constructor iterator
674 IFK::CopyCtorClosure, // ?_O # copy constructor closure
675 IFK::None, // ?_P<name> # udt returning <name>
676 IFK::None, // ?_Q # <unknown>
677 IFK::None, // ?_R0 - ?_R4 # RTTI Codes
678 IFK::None, // ?_S # local vftable
679 IFK::LocalVftableCtorClosure, // ?_T # local vftable constructor closure
680 IFK::ArrayNew, // ?_U operator new[]
681 IFK::ArrayDelete, // ?_V operator delete[]
682 IFK::None, // ?_W <unused>
683 IFK::None, // ?_X <unused>
684 IFK::None, // ?_Y <unused>
685 IFK::None, // ?_Z <unused>
686 };
687 static IFK DoubleUnder[36] = {
688 IFK::None, // ?__0 <unused>
689 IFK::None, // ?__1 <unused>
690 IFK::None, // ?__2 <unused>
691 IFK::None, // ?__3 <unused>
692 IFK::None, // ?__4 <unused>
693 IFK::None, // ?__5 <unused>
694 IFK::None, // ?__6 <unused>
695 IFK::None, // ?__7 <unused>
696 IFK::None, // ?__8 <unused>
697 IFK::None, // ?__9 <unused>
698 IFK::ManVectorCtorIter, // ?__A managed vector ctor iterator
699 IFK::ManVectorDtorIter, // ?__B managed vector dtor iterator
700 IFK::EHVectorCopyCtorIter, // ?__C EH vector copy ctor iterator
701 IFK::EHVectorVbaseCopyCtorIter, // ?__D EH vector vbase copy ctor iter
702 IFK::None, // ?__E dynamic initializer for `T'
703 IFK::None, // ?__F dynamic atexit destructor for `T'
704 IFK::VectorCopyCtorIter, // ?__G vector copy constructor iter
705 IFK::VectorVbaseCopyCtorIter, // ?__H vector vbase copy ctor iter
706 IFK::ManVectorVbaseCopyCtorIter, // ?__I managed vector vbase copy ctor
707 // iter
708 IFK::None, // ?__J local static thread guard
709 IFK::None, // ?__K operator ""_name
710 IFK::CoAwait, // ?__L operator co_await
711 IFK::Spaceship, // ?__M operator<=>
712 IFK::None, // ?__N <unused>
713 IFK::None, // ?__O <unused>
714 IFK::None, // ?__P <unused>
715 IFK::None, // ?__Q <unused>
716 IFK::None, // ?__R <unused>
717 IFK::None, // ?__S <unused>
718 IFK::None, // ?__T <unused>
719 IFK::None, // ?__U <unused>
720 IFK::None, // ?__V <unused>
721 IFK::None, // ?__W <unused>
722 IFK::None, // ?__X <unused>
723 IFK::None, // ?__Y <unused>
724 IFK::None, // ?__Z <unused>
725 };
726
727 int Index = (CH >= '0' && CH <= '9') ? (CH - '0') : (CH - 'A' + 10);
728 switch (Group) {
730 return Basic[Index];
732 return Under[Index];
734 return DoubleUnder[Index];
735 }
737}
738
740Demangler::demangleFunctionIdentifierCode(std::string_view &MangledName,
742 if (MangledName.empty()) {
743 Error = true;
744 return nullptr;
745 }
746 const char CH = MangledName.front();
747 switch (Group) {
749 MangledName.remove_prefix(1);
750 switch (CH) {
751 case '0':
752 case '1':
753 return demangleStructorIdentifier(MangledName, CH == '1');
754 case 'B':
755 return demangleConversionOperatorIdentifier(MangledName);
756 default:
757 return Arena.alloc<IntrinsicFunctionIdentifierNode>(
758 translateIntrinsicFunctionCode(CH, Group));
759 }
761 MangledName.remove_prefix(1);
762 return Arena.alloc<IntrinsicFunctionIdentifierNode>(
763 translateIntrinsicFunctionCode(CH, Group));
765 MangledName.remove_prefix(1);
766 switch (CH) {
767 case 'K':
768 return demangleLiteralOperatorIdentifier(MangledName);
769 default:
770 return Arena.alloc<IntrinsicFunctionIdentifierNode>(
771 translateIntrinsicFunctionCode(CH, Group));
772 }
773 }
774
776}
777
778SymbolNode *Demangler::demangleEncodedSymbol(std::string_view &MangledName,
779 QualifiedNameNode *Name) {
780 if (MangledName.empty()) {
781 Error = true;
782 return nullptr;
783 }
784
785 // Read a variable.
786 switch (MangledName.front()) {
787 case '0':
788 case '1':
789 case '2':
790 case '3':
791 case '4': {
792 StorageClass SC = demangleVariableStorageClass(MangledName);
793 return demangleVariableEncoding(MangledName, SC);
794 }
795 }
796 FunctionSymbolNode *FSN = demangleFunctionEncoding(MangledName);
797
798 IdentifierNode *UQN = Name->getUnqualifiedIdentifier();
800 ConversionOperatorIdentifierNode *COIN =
801 static_cast<ConversionOperatorIdentifierNode *>(UQN);
802 if (FSN)
803 COIN->TargetType = FSN->Signature->ReturnType;
804 }
805 return FSN;
806}
807
808SymbolNode *Demangler::demangleDeclarator(std::string_view &MangledName) {
809 // What follows is a main symbol name. This may include namespaces or class
810 // back references.
811 QualifiedNameNode *QN = demangleFullyQualifiedSymbolName(MangledName);
812 if (Error)
813 return nullptr;
814
815 SymbolNode *Symbol = demangleEncodedSymbol(MangledName, QN);
816 if (Error)
817 return nullptr;
818 Symbol->Name = QN;
819
820 IdentifierNode *UQN = QN->getUnqualifiedIdentifier();
822 ConversionOperatorIdentifierNode *COIN =
823 static_cast<ConversionOperatorIdentifierNode *>(UQN);
824 if (!COIN->TargetType) {
825 Error = true;
826 return nullptr;
827 }
828 }
829 return Symbol;
830}
831
832SymbolNode *Demangler::demangleMD5Name(std::string_view &MangledName) {
833 assert(llvm::itanium_demangle::starts_with(MangledName, "??@"));
834 // This is an MD5 mangled name. We can't demangle it, just return the
835 // mangled name.
836 // An MD5 mangled name is ??@ followed by 32 characters and a terminating @.
837 size_t MD5Last = MangledName.find('@', strlen("??@"));
838 if (MD5Last == std::string_view::npos) {
839 Error = true;
840 return nullptr;
841 }
842 const char *Start = MangledName.data();
843 const size_t StartSize = MangledName.size();
844 MangledName.remove_prefix(MD5Last + 1);
845
846 // There are two additional special cases for MD5 names:
847 // 1. For complete object locators where the object name is long enough
848 // for the object to have an MD5 name, the complete object locator is
849 // called ??@...@??_R4@ (with a trailing "??_R4@" instead of the usual
850 // leading "??_R4". This is handled here.
851 // 2. For catchable types, in versions of MSVC before 2015 (<1900) or after
852 // 2017.2 (>= 1914), the catchable type mangling is _CT??@...@??@...@8
853 // instead of_CT??@...@8 with just one MD5 name. Since we don't yet
854 // demangle catchable types anywhere, this isn't handled for MD5 names
855 // either.
856 consumeFront(MangledName, "??_R4@");
857
858 assert(MangledName.size() < StartSize);
859 const size_t Count = StartSize - MangledName.size();
860 std::string_view MD5(Start, Count);
861 SymbolNode *S = Arena.alloc<SymbolNode>(NodeKind::Md5Symbol);
862 S->Name = synthesizeQualifiedName(Arena, MD5);
863
864 return S;
865}
866
867SymbolNode *Demangler::demangleTypeinfoName(std::string_view &MangledName) {
868 assert(llvm::itanium_demangle::starts_with(MangledName, '.'));
869 consumeFront(MangledName, '.');
870
871 TypeNode *T = demangleType(MangledName, QualifierMangleMode::Result);
872 if (Error || !MangledName.empty()) {
873 Error = true;
874 return nullptr;
875 }
876 return synthesizeVariable(Arena, T, "`RTTI Type Descriptor Name'");
877}
878
879// Parser entry point.
880SymbolNode *Demangler::parse(std::string_view &MangledName) {
881 // Typeinfo names are strings stored in RTTI data. They're not symbol names.
882 // It's still useful to demangle them. They're the only demangled entity
883 // that doesn't start with a "?" but a ".".
884 if (llvm::itanium_demangle::starts_with(MangledName, '.'))
885 return demangleTypeinfoName(MangledName);
886
887 if (llvm::itanium_demangle::starts_with(MangledName, "??@"))
888 return demangleMD5Name(MangledName);
889
890 // MSVC-style mangled symbols must start with '?'.
891 if (!llvm::itanium_demangle::starts_with(MangledName, '?')) {
892 Error = true;
893 return nullptr;
894 }
895
896 consumeFront(MangledName, '?');
897
898 // ?$ is a template instantiation, but all other names that start with ? are
899 // operators / special names.
900 if (SymbolNode *SI = demangleSpecialIntrinsic(MangledName))
901 return SI;
902
903 return demangleDeclarator(MangledName);
904}
905
906TagTypeNode *Demangler::parseTagUniqueName(std::string_view &MangledName) {
907 if (!consumeFront(MangledName, ".?A")) {
908 Error = true;
909 return nullptr;
910 }
911 consumeFront(MangledName, ".?A");
912 if (MangledName.empty()) {
913 Error = true;
914 return nullptr;
915 }
916
917 return demangleClassType(MangledName);
918}
919
920// <type-encoding> ::= <storage-class> <variable-type>
921// <storage-class> ::= 0 # private static member
922// ::= 1 # protected static member
923// ::= 2 # public static member
924// ::= 3 # global
925// ::= 4 # static local
926
928Demangler::demangleVariableEncoding(std::string_view &MangledName,
929 StorageClass SC) {
931
932 VSN->Type = demangleType(MangledName, QualifierMangleMode::Drop);
933 VSN->SC = SC;
934
935 if (Error)
936 return nullptr;
937
938 // <variable-type> ::= <type> <cvr-qualifiers>
939 // ::= <type> <pointee-cvr-qualifiers> # pointers, references
940 switch (VSN->Type->kind()) {
942 PointerTypeNode *PTN = static_cast<PointerTypeNode *>(VSN->Type);
943
944 Qualifiers ExtraChildQuals = Q_None;
945 PTN->Quals = Qualifiers(VSN->Type->Quals |
946 demanglePointerExtQualifiers(MangledName));
947
948 bool IsMember = false;
949 std::tie(ExtraChildQuals, IsMember) = demangleQualifiers(MangledName);
950
951 if (PTN->ClassParent) {
952 QualifiedNameNode *BackRefName =
953 demangleFullyQualifiedTypeName(MangledName);
954 (void)BackRefName;
955 }
956 PTN->Pointee->Quals = Qualifiers(PTN->Pointee->Quals | ExtraChildQuals);
957
958 break;
959 }
960 default:
961 VSN->Type->Quals = demangleQualifiers(MangledName).first;
962 break;
963 }
964
965 return VSN;
966}
967
968// Sometimes numbers are encoded in mangled symbols. For example,
969// "int (*x)[20]" is a valid C type (x is a pointer to an array of
970// length 20), so we need some way to embed numbers as part of symbols.
971// This function parses it.
972//
973// <number> ::= [?] <non-negative integer>
974//
975// <non-negative integer> ::= <decimal digit> # when 1 <= Number <= 10
976// ::= <hex digit>+ @ # when Number == 0 or >= 10
977//
978// <hex-digit> ::= [A-P] # A = 0, B = 1, ...
979std::pair<uint64_t, bool>
980Demangler::demangleNumber(std::string_view &MangledName) {
981 bool IsNegative = consumeFront(MangledName, '?');
982
983 if (startsWithDigit(MangledName)) {
984 uint64_t Ret = MangledName[0] - '0' + 1;
985 MangledName.remove_prefix(1);
986 return {Ret, IsNegative};
987 }
988
989 uint64_t Ret = 0;
990 for (size_t i = 0; i < MangledName.size(); ++i) {
991 char C = MangledName[i];
992 if (C == '@') {
993 MangledName.remove_prefix(i + 1);
994 return {Ret, IsNegative};
995 }
996 if ('A' <= C && C <= 'P') {
997 Ret = (Ret << 4) + (C - 'A');
998 continue;
999 }
1000 break;
1001 }
1002
1003 Error = true;
1004 return {0ULL, false};
1005}
1006
1007uint64_t Demangler::demangleUnsigned(std::string_view &MangledName) {
1008 bool IsNegative = false;
1009 uint64_t Number = 0;
1010 std::tie(Number, IsNegative) = demangleNumber(MangledName);
1011 if (IsNegative)
1012 Error = true;
1013 return Number;
1014}
1015
1016int64_t Demangler::demangleSigned(std::string_view &MangledName) {
1017 bool IsNegative = false;
1018 uint64_t Number = 0;
1019 std::tie(Number, IsNegative) = demangleNumber(MangledName);
1020 if (Number > INT64_MAX)
1021 Error = true;
1022 int64_t I = static_cast<int64_t>(Number);
1023 return IsNegative ? -I : I;
1024}
1025
1026// First 10 strings can be referenced by special BackReferences ?0, ?1, ..., ?9.
1027// Memorize it.
1028void Demangler::memorizeString(std::string_view S) {
1029 if (Backrefs.NamesCount >= BackrefContext::Max)
1030 return;
1031 for (size_t i = 0; i < Backrefs.NamesCount; ++i)
1032 if (S == Backrefs.Names[i]->Name)
1033 return;
1034 NamedIdentifierNode *N = Arena.alloc<NamedIdentifierNode>();
1035 N->Name = S;
1036 Backrefs.Names[Backrefs.NamesCount++] = N;
1037}
1038
1040Demangler::demangleBackRefName(std::string_view &MangledName) {
1041 assert(startsWithDigit(MangledName));
1042
1043 size_t I = MangledName[0] - '0';
1044 if (I >= Backrefs.NamesCount) {
1045 Error = true;
1046 return nullptr;
1047 }
1048
1049 MangledName.remove_prefix(1);
1050 return Backrefs.Names[I];
1051}
1052
1053void Demangler::memorizeIdentifier(IdentifierNode *Identifier) {
1054 // Render this class template name into a string buffer so that we can
1055 // memorize it for the purpose of back-referencing.
1056 OutputBuffer OB;
1057 Identifier->output(OB, OF_Default);
1058 std::string_view Owned = copyString(OB);
1059 memorizeString(Owned);
1060 std::free(OB.getBuffer());
1061}
1062
1064Demangler::demangleTemplateInstantiationName(std::string_view &MangledName,
1065 NameBackrefBehavior NBB) {
1066 assert(llvm::itanium_demangle::starts_with(MangledName, "?$"));
1067 consumeFront(MangledName, "?$");
1068
1069 BackrefContext OuterContext;
1070 std::swap(OuterContext, Backrefs);
1071
1072 IdentifierNode *Identifier =
1073 demangleUnqualifiedSymbolName(MangledName, NBB_Simple);
1074 if (!Error)
1075 Identifier->TemplateParams = demangleTemplateParameterList(MangledName);
1076
1077 std::swap(OuterContext, Backrefs);
1078 if (Error)
1079 return nullptr;
1080
1081 if (NBB & NBB_Template) {
1082 // NBB_Template is only set for types and non-leaf names ("a::" in "a::b").
1083 // Structors and conversion operators only makes sense in a leaf name, so
1084 // reject them in NBB_Template contexts.
1087 Error = true;
1088 return nullptr;
1089 }
1090
1091 memorizeIdentifier(Identifier);
1092 }
1093
1094 return Identifier;
1095}
1096
1098Demangler::demangleSimpleName(std::string_view &MangledName, bool Memorize) {
1099 std::string_view S = demangleSimpleString(MangledName, Memorize);
1100 if (Error)
1101 return nullptr;
1102
1103 NamedIdentifierNode *Name = Arena.alloc<NamedIdentifierNode>();
1104 Name->Name = S;
1105 return Name;
1106}
1107
1108static bool isRebasedHexDigit(char C) { return (C >= 'A' && C <= 'P'); }
1109
1112 return (C <= 'J') ? (C - 'A') : (10 + C - 'K');
1113}
1114
1115uint8_t Demangler::demangleCharLiteral(std::string_view &MangledName) {
1116 assert(!MangledName.empty());
1117 if (!llvm::itanium_demangle::starts_with(MangledName, '?')) {
1118 const uint8_t F = MangledName.front();
1119 MangledName.remove_prefix(1);
1120 return F;
1121 }
1122
1123 MangledName.remove_prefix(1);
1124 if (MangledName.empty())
1125 goto CharLiteralError;
1126
1127 if (consumeFront(MangledName, '$')) {
1128 // Two hex digits
1129 if (MangledName.size() < 2)
1130 goto CharLiteralError;
1131 std::string_view Nibbles = MangledName.substr(0, 2);
1132 if (!isRebasedHexDigit(Nibbles[0]) || !isRebasedHexDigit(Nibbles[1]))
1133 goto CharLiteralError;
1134 // Don't append the null terminator.
1135 uint8_t C1 = rebasedHexDigitToNumber(Nibbles[0]);
1136 uint8_t C2 = rebasedHexDigitToNumber(Nibbles[1]);
1137 MangledName.remove_prefix(2);
1138 return (C1 << 4) | C2;
1139 }
1140
1141 if (startsWithDigit(MangledName)) {
1142 const char *Lookup = ",/\\:. \n\t'-";
1143 char C = Lookup[MangledName[0] - '0'];
1144 MangledName.remove_prefix(1);
1145 return C;
1146 }
1147
1148 if (MangledName[0] >= 'a' && MangledName[0] <= 'z') {
1149 char Lookup[26] = {'\xE1', '\xE2', '\xE3', '\xE4', '\xE5', '\xE6', '\xE7',
1150 '\xE8', '\xE9', '\xEA', '\xEB', '\xEC', '\xED', '\xEE',
1151 '\xEF', '\xF0', '\xF1', '\xF2', '\xF3', '\xF4', '\xF5',
1152 '\xF6', '\xF7', '\xF8', '\xF9', '\xFA'};
1153 char C = Lookup[MangledName[0] - 'a'];
1154 MangledName.remove_prefix(1);
1155 return C;
1156 }
1157
1158 if (MangledName[0] >= 'A' && MangledName[0] <= 'Z') {
1159 char Lookup[26] = {'\xC1', '\xC2', '\xC3', '\xC4', '\xC5', '\xC6', '\xC7',
1160 '\xC8', '\xC9', '\xCA', '\xCB', '\xCC', '\xCD', '\xCE',
1161 '\xCF', '\xD0', '\xD1', '\xD2', '\xD3', '\xD4', '\xD5',
1162 '\xD6', '\xD7', '\xD8', '\xD9', '\xDA'};
1163 char C = Lookup[MangledName[0] - 'A'];
1164 MangledName.remove_prefix(1);
1165 return C;
1166 }
1167
1168CharLiteralError:
1169 Error = true;
1170 return '\0';
1171}
1172
1173wchar_t Demangler::demangleWcharLiteral(std::string_view &MangledName) {
1174 uint8_t C1, C2;
1175
1176 C1 = demangleCharLiteral(MangledName);
1177 if (Error || MangledName.empty())
1178 goto WCharLiteralError;
1179 C2 = demangleCharLiteral(MangledName);
1180 if (Error)
1181 goto WCharLiteralError;
1182
1183 return ((wchar_t)C1 << 8) | (wchar_t)C2;
1184
1185WCharLiteralError:
1186 Error = true;
1187 return L'\0';
1188}
1189
1190static void writeHexDigit(char *Buffer, uint8_t Digit) {
1191 assert(Digit <= 15);
1192 *Buffer = (Digit < 10) ? ('0' + Digit) : ('A' + Digit - 10);
1193}
1194
1195static void outputHex(OutputBuffer &OB, unsigned C) {
1196 assert (C != 0);
1197
1198 // It's easier to do the math if we can work from right to left, but we need
1199 // to print the numbers from left to right. So render this into a temporary
1200 // buffer first, then output the temporary buffer. Each byte is of the form
1201 // \xAB, which means that each byte needs 4 characters. Since there are at
1202 // most 4 bytes, we need a 4*4+1 = 17 character temporary buffer.
1203 char TempBuffer[17];
1204
1205 ::memset(TempBuffer, 0, sizeof(TempBuffer));
1206 constexpr int MaxPos = sizeof(TempBuffer) - 1;
1207
1208 int Pos = MaxPos - 1; // TempBuffer[MaxPos] is the terminating \0.
1209 while (C != 0) {
1210 for (int I = 0; I < 2; ++I) {
1211 writeHexDigit(&TempBuffer[Pos--], C % 16);
1212 C /= 16;
1213 }
1214 }
1215 TempBuffer[Pos--] = 'x';
1216 assert(Pos >= 0);
1217 TempBuffer[Pos--] = '\\';
1218 OB << std::string_view(&TempBuffer[Pos + 1]);
1219}
1220
1221static void outputEscapedChar(OutputBuffer &OB, unsigned C) {
1222 switch (C) {
1223 case '\0': // nul
1224 OB << "\\0";
1225 return;
1226 case '\'': // single quote
1227 OB << "\\\'";
1228 return;
1229 case '\"': // double quote
1230 OB << "\\\"";
1231 return;
1232 case '\\': // backslash
1233 OB << "\\\\";
1234 return;
1235 case '\a': // bell
1236 OB << "\\a";
1237 return;
1238 case '\b': // backspace
1239 OB << "\\b";
1240 return;
1241 case '\f': // form feed
1242 OB << "\\f";
1243 return;
1244 case '\n': // new line
1245 OB << "\\n";
1246 return;
1247 case '\r': // carriage return
1248 OB << "\\r";
1249 return;
1250 case '\t': // tab
1251 OB << "\\t";
1252 return;
1253 case '\v': // vertical tab
1254 OB << "\\v";
1255 return;
1256 default:
1257 break;
1258 }
1259
1260 if (C > 0x1F && C < 0x7F) {
1261 // Standard ascii char.
1262 OB << (char)C;
1263 return;
1264 }
1265
1266 outputHex(OB, C);
1267}
1268
1269static unsigned countTrailingNullBytes(const uint8_t *StringBytes, int Length) {
1270 const uint8_t *End = StringBytes + Length - 1;
1271 unsigned Count = 0;
1272 while (Length > 0 && *End == 0) {
1273 --Length;
1274 --End;
1275 ++Count;
1276 }
1277 return Count;
1278}
1279
1280static unsigned countEmbeddedNulls(const uint8_t *StringBytes,
1281 unsigned Length) {
1282 unsigned Result = 0;
1283 for (unsigned I = 0; I < Length; ++I) {
1284 if (*StringBytes++ == 0)
1285 ++Result;
1286 }
1287 return Result;
1288}
1289
1290// A mangled (non-wide) string literal stores the total length of the string it
1291// refers to (passed in NumBytes), and it contains up to 32 bytes of actual text
1292// (passed in StringBytes, NumChars).
1293static unsigned guessCharByteSize(const uint8_t *StringBytes, unsigned NumChars,
1294 uint64_t NumBytes) {
1295 assert(NumBytes > 0);
1296
1297 // If the number of bytes is odd, this is guaranteed to be a char string.
1298 if (NumBytes % 2 == 1)
1299 return 1;
1300
1301 // All strings can encode at most 32 bytes of data. If it's less than that,
1302 // then we encoded the entire string. In this case we check for a 1-byte,
1303 // 2-byte, or 4-byte null terminator.
1304 if (NumBytes < 32) {
1305 unsigned TrailingNulls = countTrailingNullBytes(StringBytes, NumChars);
1306 if (TrailingNulls >= 4 && NumBytes % 4 == 0)
1307 return 4;
1308 if (TrailingNulls >= 2)
1309 return 2;
1310 return 1;
1311 }
1312
1313 // The whole string was not able to be encoded. Try to look at embedded null
1314 // terminators to guess. The heuristic is that we count all embedded null
1315 // terminators. If more than 2/3 are null, it's a char32. If more than 1/3
1316 // are null, it's a char16. Otherwise it's a char8. This obviously isn't
1317 // perfect and is biased towards languages that have ascii alphabets, but this
1318 // was always going to be best effort since the encoding is lossy.
1319 unsigned Nulls = countEmbeddedNulls(StringBytes, NumChars);
1320 if (Nulls >= 2 * NumChars / 3 && NumBytes % 4 == 0)
1321 return 4;
1322 if (Nulls >= NumChars / 3)
1323 return 2;
1324 return 1;
1325}
1326
1327static unsigned decodeMultiByteChar(const uint8_t *StringBytes,
1328 unsigned CharIndex, unsigned CharBytes) {
1329 assert(CharBytes == 1 || CharBytes == 2 || CharBytes == 4);
1330 unsigned Offset = CharIndex * CharBytes;
1331 unsigned Result = 0;
1332 StringBytes = StringBytes + Offset;
1333 for (unsigned I = 0; I < CharBytes; ++I) {
1334 unsigned C = static_cast<unsigned>(StringBytes[I]);
1335 Result |= C << (8 * I);
1336 }
1337 return Result;
1338}
1339
1341Demangler::demangleVcallThunkNode(std::string_view &MangledName) {
1342 FunctionSymbolNode *FSN = Arena.alloc<FunctionSymbolNode>();
1343 VcallThunkIdentifierNode *VTIN = Arena.alloc<VcallThunkIdentifierNode>();
1344 FSN->Signature = Arena.alloc<ThunkSignatureNode>();
1346
1347 FSN->Name = demangleNameScopeChain(MangledName, VTIN);
1348 if (!Error)
1349 Error = !consumeFront(MangledName, "$B");
1350 if (!Error)
1351 VTIN->OffsetInVTable = demangleUnsigned(MangledName);
1352 if (!Error)
1353 Error = !consumeFront(MangledName, 'A');
1354 if (!Error)
1355 FSN->Signature->CallConvention = demangleCallingConvention(MangledName);
1356 return (Error) ? nullptr : FSN;
1357}
1358
1360Demangler::demangleStringLiteral(std::string_view &MangledName) {
1361 // This function uses goto, so declare all variables up front.
1362 OutputBuffer OB;
1363 std::string_view CRC;
1364 uint64_t StringByteSize;
1365 bool IsWcharT = false;
1366 bool IsNegative = false;
1367 size_t CrcEndPos = 0;
1368 char F;
1369
1370 EncodedStringLiteralNode *Result = Arena.alloc<EncodedStringLiteralNode>();
1371
1372 // Prefix indicating the beginning of a string literal
1373 if (!consumeFront(MangledName, "@_"))
1374 goto StringLiteralError;
1375 if (MangledName.empty())
1376 goto StringLiteralError;
1377
1378 // Char Type (regular or wchar_t)
1379 F = MangledName.front();
1380 MangledName.remove_prefix(1);
1381 switch (F) {
1382 case '1':
1383 IsWcharT = true;
1385 case '0':
1386 break;
1387 default:
1388 goto StringLiteralError;
1389 }
1390
1391 // Encoded Length
1392 std::tie(StringByteSize, IsNegative) = demangleNumber(MangledName);
1393 if (Error || IsNegative || StringByteSize < (IsWcharT ? 2 : 1))
1394 goto StringLiteralError;
1395
1396 // CRC 32 (always 8 characters plus a terminator)
1397 CrcEndPos = MangledName.find('@');
1398 if (CrcEndPos == std::string_view::npos)
1399 goto StringLiteralError;
1400 CRC = MangledName.substr(0, CrcEndPos);
1401 MangledName.remove_prefix(CrcEndPos + 1);
1402 if (MangledName.empty())
1403 goto StringLiteralError;
1404
1405 if (IsWcharT) {
1406 Result->Char = CharKind::Wchar;
1407 if (StringByteSize > 64)
1408 Result->IsTruncated = true;
1409
1410 while (!consumeFront(MangledName, '@')) {
1411 // For a wide string StringByteSize has to have an even length.
1412 if (StringByteSize % 2 != 0)
1413 goto StringLiteralError;
1414 if (StringByteSize == 0)
1415 goto StringLiteralError;
1416 if (MangledName.size() < 2)
1417 goto StringLiteralError;
1418 wchar_t W = demangleWcharLiteral(MangledName);
1419 if (StringByteSize != 2 || Result->IsTruncated)
1420 outputEscapedChar(OB, W);
1421 StringByteSize -= 2;
1422 if (Error)
1423 goto StringLiteralError;
1424 }
1425 } else {
1426 // The max byte length is actually 32, but some compilers mangled strings
1427 // incorrectly, so we have to assume it can go higher.
1428 constexpr unsigned MaxStringByteLength = 32 * 4;
1429 uint8_t StringBytes[MaxStringByteLength];
1430
1431 unsigned BytesDecoded = 0;
1432 while (!consumeFront(MangledName, '@')) {
1433 if (MangledName.size() < 1 || BytesDecoded >= MaxStringByteLength)
1434 goto StringLiteralError;
1435 StringBytes[BytesDecoded++] = demangleCharLiteral(MangledName);
1436 }
1437
1438 if (StringByteSize > BytesDecoded)
1439 Result->IsTruncated = true;
1440
1441 unsigned CharBytes =
1442 guessCharByteSize(StringBytes, BytesDecoded, StringByteSize);
1443 assert(StringByteSize % CharBytes == 0);
1444 switch (CharBytes) {
1445 case 1:
1446 Result->Char = CharKind::Char;
1447 break;
1448 case 2:
1449 Result->Char = CharKind::Char16;
1450 break;
1451 case 4:
1452 Result->Char = CharKind::Char32;
1453 break;
1454 default:
1456 }
1457 const unsigned NumChars = BytesDecoded / CharBytes;
1458 for (unsigned CharIndex = 0; CharIndex < NumChars; ++CharIndex) {
1459 unsigned NextChar =
1460 decodeMultiByteChar(StringBytes, CharIndex, CharBytes);
1461 if (CharIndex + 1 < NumChars || Result->IsTruncated)
1462 outputEscapedChar(OB, NextChar);
1463 }
1464 }
1465
1466 Result->DecodedString = copyString(OB);
1467 std::free(OB.getBuffer());
1468 return Result;
1469
1470StringLiteralError:
1471 Error = true;
1472 std::free(OB.getBuffer());
1473 return nullptr;
1474}
1475
1476// Returns MangledName's prefix before the first '@', or an error if
1477// MangledName contains no '@' or the prefix has length 0.
1478std::string_view Demangler::demangleSimpleString(std::string_view &MangledName,
1479 bool Memorize) {
1480 std::string_view S;
1481 for (size_t i = 0; i < MangledName.size(); ++i) {
1482 if (MangledName[i] != '@')
1483 continue;
1484 if (i == 0)
1485 break;
1486 S = MangledName.substr(0, i);
1487 MangledName.remove_prefix(i + 1);
1488
1489 if (Memorize)
1490 memorizeString(S);
1491 return S;
1492 }
1493
1494 Error = true;
1495 return {};
1496}
1497
1499Demangler::demangleAnonymousNamespaceName(std::string_view &MangledName) {
1500 assert(llvm::itanium_demangle::starts_with(MangledName, "?A"));
1501 consumeFront(MangledName, "?A");
1502
1503 NamedIdentifierNode *Node = Arena.alloc<NamedIdentifierNode>();
1504 Node->Name = "`anonymous namespace'";
1505 size_t EndPos = MangledName.find('@');
1506 if (EndPos == std::string_view::npos) {
1507 Error = true;
1508 return nullptr;
1509 }
1510 std::string_view NamespaceKey = MangledName.substr(0, EndPos);
1511 memorizeString(NamespaceKey);
1512 MangledName = MangledName.substr(EndPos + 1);
1513 return Node;
1514}
1515
1517Demangler::demangleLocallyScopedNamePiece(std::string_view &MangledName) {
1518 assert(startsWithLocalScopePattern(MangledName));
1519
1520 NamedIdentifierNode *Identifier = Arena.alloc<NamedIdentifierNode>();
1521 consumeFront(MangledName, '?');
1522 uint64_t Number = 0;
1523 bool IsNegative = false;
1524 std::tie(Number, IsNegative) = demangleNumber(MangledName);
1525 assert(!IsNegative);
1526
1527 // One ? to terminate the number
1528 consumeFront(MangledName, '?');
1529
1530 assert(!Error);
1531 Node *Scope = parse(MangledName);
1532 if (Error)
1533 return nullptr;
1534
1535 // Render the parent symbol's name into a buffer.
1536 OutputBuffer OB;
1537 OB << '`';
1538 Scope->output(OB, OF_Default);
1539 OB << '\'';
1540 OB << "::`" << Number << "'";
1541
1542 Identifier->Name = copyString(OB);
1543 std::free(OB.getBuffer());
1544 return Identifier;
1545}
1546
1547// Parses a type name in the form of A@B@C@@ which represents C::B::A.
1549Demangler::demangleFullyQualifiedTypeName(std::string_view &MangledName) {
1550 IdentifierNode *Identifier =
1551 demangleUnqualifiedTypeName(MangledName, /*Memorize=*/true);
1552 if (Error)
1553 return nullptr;
1554 assert(Identifier);
1555
1556 QualifiedNameNode *QN = demangleNameScopeChain(MangledName, Identifier);
1557 if (Error)
1558 return nullptr;
1559 assert(QN);
1560 return QN;
1561}
1562
1563// Parses a symbol name in the form of A@B@C@@ which represents C::B::A.
1564// Symbol names have slightly different rules regarding what can appear
1565// so we separate out the implementations for flexibility.
1567Demangler::demangleFullyQualifiedSymbolName(std::string_view &MangledName) {
1568 // This is the final component of a symbol name (i.e. the leftmost component
1569 // of a mangled name. Since the only possible template instantiation that
1570 // can appear in this context is a function template, and since those are
1571 // not saved for the purposes of name backreferences, only backref simple
1572 // names.
1573 IdentifierNode *Identifier =
1574 demangleUnqualifiedSymbolName(MangledName, NBB_Simple);
1575 if (Error)
1576 return nullptr;
1577
1578 QualifiedNameNode *QN = demangleNameScopeChain(MangledName, Identifier);
1579 if (Error)
1580 return nullptr;
1581
1583 if (QN->Components->Count < 2) {
1584 Error = true;
1585 return nullptr;
1586 }
1587 StructorIdentifierNode *SIN =
1588 static_cast<StructorIdentifierNode *>(Identifier);
1589 Node *ClassNode = QN->Components->Nodes[QN->Components->Count - 2];
1590 SIN->Class = static_cast<IdentifierNode *>(ClassNode);
1591 }
1592 assert(QN);
1593 return QN;
1594}
1595
1597Demangler::demangleUnqualifiedTypeName(std::string_view &MangledName,
1598 bool Memorize) {
1599 // An inner-most name can be a back-reference, because a fully-qualified name
1600 // (e.g. Scope + Inner) can contain other fully qualified names inside of
1601 // them (for example template parameters), and these nested parameters can
1602 // refer to previously mangled types.
1603 if (startsWithDigit(MangledName))
1604 return demangleBackRefName(MangledName);
1605
1606 if (llvm::itanium_demangle::starts_with(MangledName, "?$"))
1607 return demangleTemplateInstantiationName(MangledName, NBB_Template);
1608
1609 return demangleSimpleName(MangledName, Memorize);
1610}
1611
1613Demangler::demangleUnqualifiedSymbolName(std::string_view &MangledName,
1614 NameBackrefBehavior NBB) {
1615 if (startsWithDigit(MangledName))
1616 return demangleBackRefName(MangledName);
1617 if (llvm::itanium_demangle::starts_with(MangledName, "?$"))
1618 return demangleTemplateInstantiationName(MangledName, NBB);
1619 if (llvm::itanium_demangle::starts_with(MangledName, '?'))
1620 return demangleFunctionIdentifierCode(MangledName);
1621 return demangleSimpleName(MangledName, /*Memorize=*/(NBB & NBB_Simple) != 0);
1622}
1623
1625Demangler::demangleNameScopePiece(std::string_view &MangledName) {
1626 if (startsWithDigit(MangledName))
1627 return demangleBackRefName(MangledName);
1628
1629 if (llvm::itanium_demangle::starts_with(MangledName, "?$"))
1630 return demangleTemplateInstantiationName(MangledName, NBB_Template);
1631
1632 if (llvm::itanium_demangle::starts_with(MangledName, "?A"))
1633 return demangleAnonymousNamespaceName(MangledName);
1634
1635 if (startsWithLocalScopePattern(MangledName))
1636 return demangleLocallyScopedNamePiece(MangledName);
1637
1638 return demangleSimpleName(MangledName, /*Memorize=*/true);
1639}
1640
1642Demangler::demangleNameScopeChain(std::string_view &MangledName,
1643 IdentifierNode *UnqualifiedName) {
1644 NodeList *Head = Arena.alloc<NodeList>();
1645
1646 Head->N = UnqualifiedName;
1647
1648 size_t Count = 1;
1649 while (!consumeFront(MangledName, "@")) {
1650 ++Count;
1651 NodeList *NewHead = Arena.alloc<NodeList>();
1652 NewHead->Next = Head;
1653 Head = NewHead;
1654
1655 if (MangledName.empty()) {
1656 Error = true;
1657 return nullptr;
1658 }
1659
1660 assert(!Error);
1661 IdentifierNode *Elem = demangleNameScopePiece(MangledName);
1662 if (Error)
1663 return nullptr;
1664
1665 Head->N = Elem;
1666 }
1667
1668 QualifiedNameNode *QN = Arena.alloc<QualifiedNameNode>();
1669 QN->Components = nodeListToNodeArray(Arena, Head, Count);
1670 return QN;
1671}
1672
1673FuncClass Demangler::demangleFunctionClass(std::string_view &MangledName) {
1674 const char F = MangledName.front();
1675 MangledName.remove_prefix(1);
1676 switch (F) {
1677 case '9':
1679 case 'A':
1680 return FC_Private;
1681 case 'B':
1682 return FuncClass(FC_Private | FC_Far);
1683 case 'C':
1684 return FuncClass(FC_Private | FC_Static);
1685 case 'D':
1687 case 'E':
1689 case 'F':
1691 case 'G':
1693 case 'H':
1695 case 'I':
1696 return FuncClass(FC_Protected);
1697 case 'J':
1698 return FuncClass(FC_Protected | FC_Far);
1699 case 'K':
1701 case 'L':
1703 case 'M':
1705 case 'N':
1707 case 'O':
1709 case 'P':
1711 case 'Q':
1712 return FuncClass(FC_Public);
1713 case 'R':
1714 return FuncClass(FC_Public | FC_Far);
1715 case 'S':
1716 return FuncClass(FC_Public | FC_Static);
1717 case 'T':
1718 return FuncClass(FC_Public | FC_Static | FC_Far);
1719 case 'U':
1720 return FuncClass(FC_Public | FC_Virtual);
1721 case 'V':
1723 case 'W':
1725 case 'X':
1727 case 'Y':
1728 return FuncClass(FC_Global);
1729 case 'Z':
1730 return FuncClass(FC_Global | FC_Far);
1731 case '$': {
1733 if (consumeFront(MangledName, 'R'))
1734 VFlag = FuncClass(VFlag | FC_VirtualThisAdjustEx);
1735 if (MangledName.empty())
1736 break;
1737 const char F = MangledName.front();
1738 MangledName.remove_prefix(1);
1739 switch (F) {
1740 case '0':
1741 return FuncClass(FC_Private | FC_Virtual | VFlag);
1742 case '1':
1743 return FuncClass(FC_Private | FC_Virtual | VFlag | FC_Far);
1744 case '2':
1745 return FuncClass(FC_Protected | FC_Virtual | VFlag);
1746 case '3':
1747 return FuncClass(FC_Protected | FC_Virtual | VFlag | FC_Far);
1748 case '4':
1749 return FuncClass(FC_Public | FC_Virtual | VFlag);
1750 case '5':
1751 return FuncClass(FC_Public | FC_Virtual | VFlag | FC_Far);
1752 }
1753 }
1754 }
1755
1756 Error = true;
1757 return FC_Public;
1758}
1759
1761Demangler::demangleCallingConvention(std::string_view &MangledName) {
1762 if (MangledName.empty()) {
1763 Error = true;
1764 return CallingConv::None;
1765 }
1766
1767 const char F = MangledName.front();
1768 MangledName.remove_prefix(1);
1769 switch (F) {
1770 case 'A':
1771 case 'B':
1772 return CallingConv::Cdecl;
1773 case 'C':
1774 case 'D':
1775 return CallingConv::Pascal;
1776 case 'E':
1777 case 'F':
1778 return CallingConv::Thiscall;
1779 case 'G':
1780 case 'H':
1781 return CallingConv::Stdcall;
1782 case 'I':
1783 case 'J':
1784 return CallingConv::Fastcall;
1785 case 'M':
1786 case 'N':
1787 return CallingConv::Clrcall;
1788 case 'O':
1789 case 'P':
1790 return CallingConv::Eabi;
1791 case 'Q':
1793 case 'S':
1794 return CallingConv::Swift;
1795 case 'W':
1797 }
1798
1799 return CallingConv::None;
1800}
1801
1803Demangler::demangleVariableStorageClass(std::string_view &MangledName) {
1804 assert(MangledName.front() >= '0' && MangledName.front() <= '4');
1805
1806 const char F = MangledName.front();
1807 MangledName.remove_prefix(1);
1808 switch (F) {
1809 case '0':
1811 case '1':
1813 case '2':
1815 case '3':
1816 return StorageClass::Global;
1817 case '4':
1819 }
1821}
1822
1823std::pair<Qualifiers, bool>
1824Demangler::demangleQualifiers(std::string_view &MangledName) {
1825 if (MangledName.empty()) {
1826 Error = true;
1827 return std::make_pair(Q_None, false);
1828 }
1829
1830 const char F = MangledName.front();
1831 MangledName.remove_prefix(1);
1832 switch (F) {
1833 // Member qualifiers
1834 case 'Q':
1835 return std::make_pair(Q_None, true);
1836 case 'R':
1837 return std::make_pair(Q_Const, true);
1838 case 'S':
1839 return std::make_pair(Q_Volatile, true);
1840 case 'T':
1841 return std::make_pair(Qualifiers(Q_Const | Q_Volatile), true);
1842 // Non-Member qualifiers
1843 case 'A':
1844 return std::make_pair(Q_None, false);
1845 case 'B':
1846 return std::make_pair(Q_Const, false);
1847 case 'C':
1848 return std::make_pair(Q_Volatile, false);
1849 case 'D':
1850 return std::make_pair(Qualifiers(Q_Const | Q_Volatile), false);
1851 }
1852 Error = true;
1853 return std::make_pair(Q_None, false);
1854}
1855
1856// <variable-type> ::= <type> <cvr-qualifiers>
1857// ::= <type> <pointee-cvr-qualifiers> # pointers, references
1858TypeNode *Demangler::demangleType(std::string_view &MangledName,
1859 QualifierMangleMode QMM) {
1860 Qualifiers Quals = Q_None;
1861 bool IsMember = false;
1862 if (QMM == QualifierMangleMode::Mangle) {
1863 std::tie(Quals, IsMember) = demangleQualifiers(MangledName);
1864 } else if (QMM == QualifierMangleMode::Result) {
1865 if (consumeFront(MangledName, '?'))
1866 std::tie(Quals, IsMember) = demangleQualifiers(MangledName);
1867 }
1868
1869 if (MangledName.empty()) {
1870 Error = true;
1871 return nullptr;
1872 }
1873
1874 TypeNode *Ty = nullptr;
1875 if (isTagType(MangledName))
1876 Ty = demangleClassType(MangledName);
1877 else if (isPointerType(MangledName)) {
1878 if (isMemberPointer(MangledName, Error))
1879 Ty = demangleMemberPointerType(MangledName);
1880 else if (!Error)
1881 Ty = demanglePointerType(MangledName);
1882 else
1883 return nullptr;
1884 } else if (isArrayType(MangledName))
1885 Ty = demangleArrayType(MangledName);
1886 else if (isFunctionType(MangledName)) {
1887 if (consumeFront(MangledName, "$$A8@@"))
1888 Ty = demangleFunctionType(MangledName, true);
1889 else {
1890 assert(llvm::itanium_demangle::starts_with(MangledName, "$$A6"));
1891 consumeFront(MangledName, "$$A6");
1892 Ty = demangleFunctionType(MangledName, false);
1893 }
1894 } else if (isCustomType(MangledName)) {
1895 Ty = demangleCustomType(MangledName);
1896 } else {
1897 Ty = demanglePrimitiveType(MangledName);
1898 }
1899
1900 if (!Ty || Error)
1901 return Ty;
1902 Ty->Quals = Qualifiers(Ty->Quals | Quals);
1903 return Ty;
1904}
1905
1906bool Demangler::demangleThrowSpecification(std::string_view &MangledName) {
1907 if (consumeFront(MangledName, "_E"))
1908 return true;
1909 if (consumeFront(MangledName, 'Z'))
1910 return false;
1911
1912 Error = true;
1913 return false;
1914}
1915
1917Demangler::demangleFunctionType(std::string_view &MangledName,
1918 bool HasThisQuals) {
1919 FunctionSignatureNode *FTy = Arena.alloc<FunctionSignatureNode>();
1920
1921 if (HasThisQuals) {
1922 FTy->Quals = demanglePointerExtQualifiers(MangledName);
1923 FTy->RefQualifier = demangleFunctionRefQualifier(MangledName);
1924 FTy->Quals = Qualifiers(FTy->Quals | demangleQualifiers(MangledName).first);
1925 }
1926
1927 // Fields that appear on both member and non-member functions.
1928 FTy->CallConvention = demangleCallingConvention(MangledName);
1929
1930 // <return-type> ::= <type>
1931 // ::= @ # structors (they have no declared return type)
1932 bool IsStructor = consumeFront(MangledName, '@');
1933 if (!IsStructor)
1934 FTy->ReturnType = demangleType(MangledName, QualifierMangleMode::Result);
1935
1936 FTy->Params = demangleFunctionParameterList(MangledName, FTy->IsVariadic);
1937
1938 FTy->IsNoexcept = demangleThrowSpecification(MangledName);
1939
1940 return FTy;
1941}
1942
1944Demangler::demangleFunctionEncoding(std::string_view &MangledName) {
1945 FuncClass ExtraFlags = FC_None;
1946 if (consumeFront(MangledName, "$$J0"))
1947 ExtraFlags = FC_ExternC;
1948
1949 if (MangledName.empty()) {
1950 Error = true;
1951 return nullptr;
1952 }
1953
1954 FuncClass FC = demangleFunctionClass(MangledName);
1955 FC = FuncClass(ExtraFlags | FC);
1956
1957 FunctionSignatureNode *FSN = nullptr;
1958 ThunkSignatureNode *TTN = nullptr;
1959 if (FC & FC_StaticThisAdjust) {
1960 TTN = Arena.alloc<ThunkSignatureNode>();
1961 TTN->ThisAdjust.StaticOffset = demangleSigned(MangledName);
1962 } else if (FC & FC_VirtualThisAdjust) {
1963 TTN = Arena.alloc<ThunkSignatureNode>();
1964 if (FC & FC_VirtualThisAdjustEx) {
1965 TTN->ThisAdjust.VBPtrOffset = demangleSigned(MangledName);
1966 TTN->ThisAdjust.VBOffsetOffset = demangleSigned(MangledName);
1967 }
1968 TTN->ThisAdjust.VtordispOffset = demangleSigned(MangledName);
1969 TTN->ThisAdjust.StaticOffset = demangleSigned(MangledName);
1970 }
1971
1972 if (FC & FC_NoParameterList) {
1973 // This is an extern "C" function whose full signature hasn't been mangled.
1974 // This happens when we need to mangle a local symbol inside of an extern
1975 // "C" function.
1976 FSN = Arena.alloc<FunctionSignatureNode>();
1977 } else {
1978 bool HasThisQuals = !(FC & (FC_Global | FC_Static));
1979 FSN = demangleFunctionType(MangledName, HasThisQuals);
1980 }
1981
1982 if (Error)
1983 return nullptr;
1984
1985 if (TTN) {
1986 *static_cast<FunctionSignatureNode *>(TTN) = *FSN;
1987 FSN = TTN;
1988 }
1989 FSN->FunctionClass = FC;
1990
1991 FunctionSymbolNode *Symbol = Arena.alloc<FunctionSymbolNode>();
1992 Symbol->Signature = FSN;
1993 return Symbol;
1994}
1995
1996CustomTypeNode *Demangler::demangleCustomType(std::string_view &MangledName) {
1997 assert(llvm::itanium_demangle::starts_with(MangledName, '?'));
1998 MangledName.remove_prefix(1);
1999
2000 CustomTypeNode *CTN = Arena.alloc<CustomTypeNode>();
2001 CTN->Identifier = demangleUnqualifiedTypeName(MangledName, /*Memorize=*/true);
2002 if (!consumeFront(MangledName, '@'))
2003 Error = true;
2004 if (Error)
2005 return nullptr;
2006 return CTN;
2007}
2008
2009// Reads a primitive type.
2011Demangler::demanglePrimitiveType(std::string_view &MangledName) {
2012 if (consumeFront(MangledName, "$$T"))
2013 return Arena.alloc<PrimitiveTypeNode>(PrimitiveKind::Nullptr);
2014
2015 const char F = MangledName.front();
2016 MangledName.remove_prefix(1);
2017 switch (F) {
2018 case 'X':
2019 return Arena.alloc<PrimitiveTypeNode>(PrimitiveKind::Void);
2020 case 'D':
2021 return Arena.alloc<PrimitiveTypeNode>(PrimitiveKind::Char);
2022 case 'C':
2023 return Arena.alloc<PrimitiveTypeNode>(PrimitiveKind::Schar);
2024 case 'E':
2025 return Arena.alloc<PrimitiveTypeNode>(PrimitiveKind::Uchar);
2026 case 'F':
2027 return Arena.alloc<PrimitiveTypeNode>(PrimitiveKind::Short);
2028 case 'G':
2029 return Arena.alloc<PrimitiveTypeNode>(PrimitiveKind::Ushort);
2030 case 'H':
2031 return Arena.alloc<PrimitiveTypeNode>(PrimitiveKind::Int);
2032 case 'I':
2033 return Arena.alloc<PrimitiveTypeNode>(PrimitiveKind::Uint);
2034 case 'J':
2035 return Arena.alloc<PrimitiveTypeNode>(PrimitiveKind::Long);
2036 case 'K':
2037 return Arena.alloc<PrimitiveTypeNode>(PrimitiveKind::Ulong);
2038 case 'M':
2039 return Arena.alloc<PrimitiveTypeNode>(PrimitiveKind::Float);
2040 case 'N':
2041 return Arena.alloc<PrimitiveTypeNode>(PrimitiveKind::Double);
2042 case 'O':
2043 return Arena.alloc<PrimitiveTypeNode>(PrimitiveKind::Ldouble);
2044 case '_': {
2045 if (MangledName.empty()) {
2046 Error = true;
2047 return nullptr;
2048 }
2049 const char F = MangledName.front();
2050 MangledName.remove_prefix(1);
2051 switch (F) {
2052 case 'N':
2053 return Arena.alloc<PrimitiveTypeNode>(PrimitiveKind::Bool);
2054 case 'J':
2055 return Arena.alloc<PrimitiveTypeNode>(PrimitiveKind::Int64);
2056 case 'K':
2057 return Arena.alloc<PrimitiveTypeNode>(PrimitiveKind::Uint64);
2058 case 'W':
2059 return Arena.alloc<PrimitiveTypeNode>(PrimitiveKind::Wchar);
2060 case 'Q':
2061 return Arena.alloc<PrimitiveTypeNode>(PrimitiveKind::Char8);
2062 case 'S':
2063 return Arena.alloc<PrimitiveTypeNode>(PrimitiveKind::Char16);
2064 case 'U':
2065 return Arena.alloc<PrimitiveTypeNode>(PrimitiveKind::Char32);
2066 case 'P':
2067 return Arena.alloc<PrimitiveTypeNode>(PrimitiveKind::Auto);
2068 case 'T':
2069 return Arena.alloc<PrimitiveTypeNode>(PrimitiveKind::DecltypeAuto);
2070 }
2071 break;
2072 }
2073 }
2074 Error = true;
2075 return nullptr;
2076}
2077
2078TagTypeNode *Demangler::demangleClassType(std::string_view &MangledName) {
2079 TagTypeNode *TT = nullptr;
2080
2081 const char F = MangledName.front();
2082 MangledName.remove_prefix(1);
2083 switch (F) {
2084 case 'T':
2085 TT = Arena.alloc<TagTypeNode>(TagKind::Union);
2086 break;
2087 case 'U':
2088 TT = Arena.alloc<TagTypeNode>(TagKind::Struct);
2089 break;
2090 case 'V':
2091 TT = Arena.alloc<TagTypeNode>(TagKind::Class);
2092 break;
2093 case 'W':
2094 if (!consumeFront(MangledName, '4')) {
2095 Error = true;
2096 return nullptr;
2097 }
2098 TT = Arena.alloc<TagTypeNode>(TagKind::Enum);
2099 break;
2100 default:
2101 assert(false);
2102 }
2103
2104 TT->QualifiedName = demangleFullyQualifiedTypeName(MangledName);
2105 return TT;
2106}
2107
2108// <pointer-type> ::= E? <pointer-cvr-qualifiers> <ext-qualifiers> <type>
2109// # the E is required for 64-bit non-static pointers
2110PointerTypeNode *Demangler::demanglePointerType(std::string_view &MangledName) {
2111 PointerTypeNode *Pointer = Arena.alloc<PointerTypeNode>();
2112
2113 std::tie(Pointer->Quals, Pointer->Affinity) =
2114 demanglePointerCVQualifiers(MangledName);
2115
2116 if (consumeFront(MangledName, "6")) {
2117 Pointer->Pointee = demangleFunctionType(MangledName, false);
2118 return Pointer;
2119 }
2120
2121 Qualifiers ExtQuals = demanglePointerExtQualifiers(MangledName);
2122 Pointer->Quals = Qualifiers(Pointer->Quals | ExtQuals);
2123
2124 Pointer->PointerAuthQualifier = createPointerAuthQualifier(MangledName);
2125
2126 Pointer->Pointee = demangleType(MangledName, QualifierMangleMode::Mangle);
2127 return Pointer;
2128}
2129
2131Demangler::demangleMemberPointerType(std::string_view &MangledName) {
2132 PointerTypeNode *Pointer = Arena.alloc<PointerTypeNode>();
2133
2134 std::tie(Pointer->Quals, Pointer->Affinity) =
2135 demanglePointerCVQualifiers(MangledName);
2137
2138 Qualifiers ExtQuals = demanglePointerExtQualifiers(MangledName);
2139 Pointer->Quals = Qualifiers(Pointer->Quals | ExtQuals);
2140
2141 // isMemberPointer() only returns true if there is at least one character
2142 // after the qualifiers.
2143 if (consumeFront(MangledName, "8")) {
2144 Pointer->ClassParent = demangleFullyQualifiedTypeName(MangledName);
2145 Pointer->Pointee = demangleFunctionType(MangledName, true);
2146 } else {
2147 Qualifiers PointeeQuals = Q_None;
2148 bool IsMember = false;
2149 std::tie(PointeeQuals, IsMember) = demangleQualifiers(MangledName);
2150 assert(IsMember || Error);
2151 Pointer->ClassParent = demangleFullyQualifiedTypeName(MangledName);
2152
2153 Pointer->Pointee = demangleType(MangledName, QualifierMangleMode::Drop);
2154 if (Pointer->Pointee)
2155 Pointer->Pointee->Quals = PointeeQuals;
2156 }
2157
2158 return Pointer;
2159}
2160
2162Demangler::demanglePointerExtQualifiers(std::string_view &MangledName) {
2163 Qualifiers Quals = Q_None;
2164 if (consumeFront(MangledName, 'E'))
2165 Quals = Qualifiers(Quals | Q_Pointer64);
2166 if (consumeFront(MangledName, 'I'))
2167 Quals = Qualifiers(Quals | Q_Restrict);
2168 if (consumeFront(MangledName, 'F'))
2169 Quals = Qualifiers(Quals | Q_Unaligned);
2170
2171 return Quals;
2172}
2173
2174std::optional<PointerAuthQualifierNode::ArgArray>
2175Demangler::demanglePointerAuthQualifier(std::string_view &MangledName) {
2176 if (!consumeFront(MangledName, "__ptrauth"))
2177 return std::nullopt;
2178
2179 constexpr unsigned NumArgs = PointerAuthQualifierNode::NumArgs;
2181
2182 for (unsigned I = 0; I < NumArgs; ++I) {
2183 bool IsNegative = false;
2184 uint64_t Value = 0;
2185 std::tie(Value, IsNegative) = demangleNumber(MangledName);
2186 if (IsNegative)
2187 return std::nullopt;
2188
2189 Array[I] = Value;
2190 }
2191
2192 return Array;
2193}
2194
2196Demangler::createPointerAuthQualifier(std::string_view &MangledName) {
2197 constexpr unsigned NumArgs = PointerAuthQualifierNode::NumArgs;
2198 std::optional<PointerAuthQualifierNode::ArgArray> Vals =
2199 demanglePointerAuthQualifier(MangledName);
2200
2201 if (!Vals)
2202 return nullptr;
2203
2204 PointerAuthQualifierNode *PtrAuthQual =
2205 Arena.alloc<PointerAuthQualifierNode>();
2206 NodeArrayNode *Array = Arena.alloc<NodeArrayNode>();
2207 PtrAuthQual->Components = Array;
2208 Array->Count = NumArgs;
2209 Array->Nodes = Arena.allocArray<Node *>(NumArgs);
2210
2211 for (unsigned I = 0; I < NumArgs; ++I)
2212 Array->Nodes[I] = Arena.alloc<IntegerLiteralNode>((*Vals)[I], false);
2213
2214 return PtrAuthQual;
2215}
2216
2217ArrayTypeNode *Demangler::demangleArrayType(std::string_view &MangledName) {
2218 assert(MangledName.front() == 'Y');
2219 MangledName.remove_prefix(1);
2220
2221 uint64_t Rank = 0;
2222 bool IsNegative = false;
2223 std::tie(Rank, IsNegative) = demangleNumber(MangledName);
2224 if (IsNegative || Rank == 0) {
2225 Error = true;
2226 return nullptr;
2227 }
2228
2229 ArrayTypeNode *ATy = Arena.alloc<ArrayTypeNode>();
2230 NodeList *Head = Arena.alloc<NodeList>();
2231 NodeList *Tail = Head;
2232
2233 for (uint64_t I = 0; I < Rank; ++I) {
2234 uint64_t D = 0;
2235 std::tie(D, IsNegative) = demangleNumber(MangledName);
2236 if (Error || IsNegative) {
2237 Error = true;
2238 return nullptr;
2239 }
2240 Tail->N = Arena.alloc<IntegerLiteralNode>(D, IsNegative);
2241 if (I + 1 < Rank) {
2242 Tail->Next = Arena.alloc<NodeList>();
2243 Tail = Tail->Next;
2244 }
2245 }
2246 ATy->Dimensions = nodeListToNodeArray(Arena, Head, Rank);
2247
2248 if (consumeFront(MangledName, "$$C")) {
2249 bool IsMember = false;
2250 std::tie(ATy->Quals, IsMember) = demangleQualifiers(MangledName);
2251 if (IsMember) {
2252 Error = true;
2253 return nullptr;
2254 }
2255 }
2256
2257 ATy->ElementType = demangleType(MangledName, QualifierMangleMode::Drop);
2258 return ATy;
2259}
2260
2261// Reads a function's parameters.
2263Demangler::demangleFunctionParameterList(std::string_view &MangledName,
2264 bool &IsVariadic) {
2265 // Empty parameter list.
2266 if (consumeFront(MangledName, 'X'))
2267 return nullptr;
2268
2269 NodeList *Head = Arena.alloc<NodeList>();
2270 NodeList **Current = &Head;
2271 size_t Count = 0;
2272 while (!Error && !llvm::itanium_demangle::starts_with(MangledName, '@') &&
2273 !llvm::itanium_demangle::starts_with(MangledName, 'Z')) {
2274 ++Count;
2275
2276 if (startsWithDigit(MangledName)) {
2277 size_t N = MangledName[0] - '0';
2278 if (N >= Backrefs.FunctionParamCount) {
2279 Error = true;
2280 return nullptr;
2281 }
2282 MangledName.remove_prefix(1);
2283
2284 *Current = Arena.alloc<NodeList>();
2285 (*Current)->N = Backrefs.FunctionParams[N];
2286 Current = &(*Current)->Next;
2287 continue;
2288 }
2289
2290 size_t OldSize = MangledName.size();
2291
2292 *Current = Arena.alloc<NodeList>();
2293 TypeNode *TN = demangleType(MangledName, QualifierMangleMode::Drop);
2294 if (!TN || Error)
2295 return nullptr;
2296
2297 (*Current)->N = TN;
2298
2299 size_t CharsConsumed = OldSize - MangledName.size();
2300 assert(CharsConsumed != 0);
2301
2302 // Single-letter types are ignored for backreferences because memorizing
2303 // them doesn't save anything.
2304 if (Backrefs.FunctionParamCount <= 9 && CharsConsumed > 1)
2305 Backrefs.FunctionParams[Backrefs.FunctionParamCount++] = TN;
2306
2307 Current = &(*Current)->Next;
2308 }
2309
2310 if (Error)
2311 return nullptr;
2312
2313 NodeArrayNode *NA = nodeListToNodeArray(Arena, Head, Count);
2314 // A non-empty parameter list is terminated by either 'Z' (variadic) parameter
2315 // list or '@' (non variadic). Careful not to consume "@Z", as in that case
2316 // the following Z could be a throw specifier.
2317 if (consumeFront(MangledName, '@'))
2318 return NA;
2319
2320 if (consumeFront(MangledName, 'Z')) {
2321 IsVariadic = true;
2322 return NA;
2323 }
2324
2326}
2327
2329Demangler::demangleTemplateParameterList(std::string_view &MangledName) {
2330 NodeList *Head = nullptr;
2331 NodeList **Current = &Head;
2332 size_t Count = 0;
2333
2334 while (!llvm::itanium_demangle::starts_with(MangledName, '@')) {
2335 if (consumeFront(MangledName, "$S") || consumeFront(MangledName, "$$V") ||
2336 consumeFront(MangledName, "$$$V") || consumeFront(MangledName, "$$Z")) {
2337 // parameter pack separator
2338 continue;
2339 }
2340
2341 ++Count;
2342
2343 // Template parameter lists don't participate in back-referencing.
2344 *Current = Arena.alloc<NodeList>();
2345
2346 NodeList &TP = **Current;
2347
2348 // <auto-nttp> ::= $ M <type> <nttp>
2349 const bool IsAutoNTTP = consumeFront(MangledName, "$M");
2350 if (IsAutoNTTP) {
2351 // The deduced type of the auto NTTP parameter isn't printed so
2352 // we want to ignore the AST created from demangling the type.
2353 //
2354 // TODO: Avoid the extra allocations to the bump allocator in this case.
2355 (void)demangleType(MangledName, QualifierMangleMode::Drop);
2356 if (Error)
2357 return nullptr;
2358 }
2359
2360 TemplateParameterReferenceNode *TPRN = nullptr;
2361 if (consumeFront(MangledName, "$$Y")) {
2362 // Template alias
2363 TP.N = demangleFullyQualifiedTypeName(MangledName);
2364 } else if (consumeFront(MangledName, "$$B")) {
2365 // Array
2366 TP.N = demangleType(MangledName, QualifierMangleMode::Drop);
2367 } else if (consumeFront(MangledName, "$$C")) {
2368 // Type has qualifiers.
2369 TP.N = demangleType(MangledName, QualifierMangleMode::Mangle);
2370 } else if (startsWith(MangledName, "$1", "1", !IsAutoNTTP) ||
2371 startsWith(MangledName, "$H", "H", !IsAutoNTTP) ||
2372 startsWith(MangledName, "$I", "I", !IsAutoNTTP) ||
2373 startsWith(MangledName, "$J", "J", !IsAutoNTTP)) {
2374 // Pointer to member
2375 TP.N = TPRN = Arena.alloc<TemplateParameterReferenceNode>();
2376 TPRN->IsMemberPointer = true;
2377
2378 if (!IsAutoNTTP)
2379 MangledName.remove_prefix(1); // Remove leading '$'
2380
2381 // 1 - single inheritance <name>
2382 // H - multiple inheritance <name> <number>
2383 // I - virtual inheritance <name> <number> <number>
2384 // J - unspecified inheritance <name> <number> <number> <number>
2385 char InheritanceSpecifier = MangledName.front();
2386 MangledName.remove_prefix(1);
2387 SymbolNode *S = nullptr;
2388 if (llvm::itanium_demangle::starts_with(MangledName, '?')) {
2389 S = parse(MangledName);
2390 if (Error || !S->Name) {
2391 Error = true;
2392 return nullptr;
2393 }
2394 memorizeIdentifier(S->Name->getUnqualifiedIdentifier());
2395 }
2396
2397 switch (InheritanceSpecifier) {
2398 case 'J':
2399 TPRN->ThunkOffsets[TPRN->ThunkOffsetCount++] =
2400 demangleSigned(MangledName);
2402 case 'I':
2403 TPRN->ThunkOffsets[TPRN->ThunkOffsetCount++] =
2404 demangleSigned(MangledName);
2406 case 'H':
2407 TPRN->ThunkOffsets[TPRN->ThunkOffsetCount++] =
2408 demangleSigned(MangledName);
2410 case '1':
2411 break;
2412 default:
2414 }
2416 TPRN->Symbol = S;
2417 } else if (llvm::itanium_demangle::starts_with(MangledName, "$E?")) {
2418 consumeFront(MangledName, "$E");
2419 // Reference to symbol
2420 TP.N = TPRN = Arena.alloc<TemplateParameterReferenceNode>();
2421 TPRN->Symbol = parse(MangledName);
2423 } else if (startsWith(MangledName, "$F", "F", !IsAutoNTTP) ||
2424 startsWith(MangledName, "$G", "G", !IsAutoNTTP)) {
2425 TP.N = TPRN = Arena.alloc<TemplateParameterReferenceNode>();
2426
2427 // Data member pointer.
2428 if (!IsAutoNTTP)
2429 MangledName.remove_prefix(1); // Remove leading '$'
2430 char InheritanceSpecifier = MangledName.front();
2431 MangledName.remove_prefix(1);
2432
2433 switch (InheritanceSpecifier) {
2434 case 'G':
2435 TPRN->ThunkOffsets[TPRN->ThunkOffsetCount++] =
2436 demangleSigned(MangledName);
2438 case 'F':
2439 TPRN->ThunkOffsets[TPRN->ThunkOffsetCount++] =
2440 demangleSigned(MangledName);
2441 TPRN->ThunkOffsets[TPRN->ThunkOffsetCount++] =
2442 demangleSigned(MangledName);
2443 break;
2444 default:
2446 }
2447 TPRN->IsMemberPointer = true;
2448
2449 } else if (consumeFront(MangledName, "$0", "0", !IsAutoNTTP)) {
2450 // Integral non-type template parameter
2451 bool IsNegative = false;
2452 uint64_t Value = 0;
2453 std::tie(Value, IsNegative) = demangleNumber(MangledName);
2454
2455 TP.N = Arena.alloc<IntegerLiteralNode>(Value, IsNegative);
2456 } else {
2457 TP.N = demangleType(MangledName, QualifierMangleMode::Drop);
2458 }
2459 if (Error)
2460 return nullptr;
2461
2462 Current = &TP.Next;
2463 }
2464
2465 // The loop above returns nullptr on Error.
2466 assert(!Error);
2467
2468 // Template parameter lists cannot be variadic, so it can only be terminated
2469 // by @ (as opposed to 'Z' in the function parameter case).
2470 assert(llvm::itanium_demangle::starts_with(
2471 MangledName, '@')); // The above loop exits only on '@'.
2472 consumeFront(MangledName, '@');
2473 return nodeListToNodeArray(Arena, Head, Count);
2474}
2475
2476void Demangler::dumpBackReferences() {
2477 std::printf("%d function parameter backreferences\n",
2478 (int)Backrefs.FunctionParamCount);
2479
2480 // Create an output stream so we can render each type.
2481 OutputBuffer OB;
2482 for (size_t I = 0; I < Backrefs.FunctionParamCount; ++I) {
2483 OB.setCurrentPosition(0);
2484
2485 TypeNode *T = Backrefs.FunctionParams[I];
2486 T->output(OB, OF_Default);
2487
2488 std::string_view B = OB;
2489 std::printf(" [%d] - %.*s\n", (int)I, (int)B.size(), B.data());
2490 }
2491 std::free(OB.getBuffer());
2492
2493 if (Backrefs.FunctionParamCount > 0)
2494 std::printf("\n");
2495 std::printf("%d name backreferences\n", (int)Backrefs.NamesCount);
2496 for (size_t I = 0; I < Backrefs.NamesCount; ++I) {
2497 std::printf(" [%d] - %.*s\n", (int)I, (int)Backrefs.Names[I]->Name.size(),
2498 Backrefs.Names[I]->Name.data());
2499 }
2500 if (Backrefs.NamesCount > 0)
2501 std::printf("\n");
2502}
2503
2504std::optional<size_t>
2505llvm::getArm64ECInsertionPointInMangledName(std::string_view MangledName) {
2506 std::string_view ProcessedName{MangledName};
2507
2508 // We only support this for MSVC-style C++ symbols.
2509 if (!consumeFront(ProcessedName, '?'))
2510 return std::nullopt;
2511
2512 // The insertion point is just after the name of the symbol, so parse that to
2513 // remove it from the processed name.
2514 Demangler D;
2515 D.demangleFullyQualifiedSymbolName(ProcessedName);
2516 if (D.Error)
2517 return std::nullopt;
2518
2519 return MangledName.length() - ProcessedName.length();
2520}
2521
2522char *llvm::microsoftDemangle(std::string_view MangledName, size_t *NMangled,
2523 int *Status, MSDemangleFlags Flags) {
2524 Demangler D;
2525
2526 std::string_view Name{MangledName};
2527 SymbolNode *AST = D.parse(Name);
2528 if (!D.Error && NMangled)
2529 *NMangled = MangledName.size() - Name.size();
2530
2531 if (Flags & MSDF_DumpBackrefs)
2532 D.dumpBackReferences();
2533
2535 if (Flags & MSDF_NoCallingConvention)
2537 if (Flags & MSDF_NoAccessSpecifier)
2539 if (Flags & MSDF_NoReturnType)
2540 OF = OutputFlags(OF | OF_NoReturnType);
2541 if (Flags & MSDF_NoMemberType)
2542 OF = OutputFlags(OF | OF_NoMemberType);
2543 if (Flags & MSDF_NoVariableType)
2544 OF = OutputFlags(OF | OF_NoVariableType);
2545
2546 int InternalStatus = demangle_success;
2547 char *Buf;
2548 if (D.Error)
2549 InternalStatus = demangle_invalid_mangled_name;
2550 else {
2551 OutputBuffer OB;
2552 AST->output(OB, OF);
2553 OB += '\0';
2554 Buf = OB.getBuffer();
2555 }
2556
2557 if (Status)
2558 *Status = InternalStatus;
2559 return InternalStatus == demangle_success ? Buf : nullptr;
2560}
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
#define DEMANGLE_FALLTHROUGH
#define DEMANGLE_UNREACHABLE
#define F(x, y, z)
Definition MD5.cpp:54
#define I(x, y, z)
Definition MD5.cpp:57
static bool startsWithLocalScopePattern(std::string_view S)
static bool isArrayType(std::string_view S)
static unsigned countEmbeddedNulls(const uint8_t *StringBytes, unsigned Length)
static bool startsWithDigit(std::string_view S)
static QualifiedNameNode * synthesizeQualifiedName(ArenaAllocator &Arena, IdentifierNode *Identifier)
static void outputEscapedChar(OutputBuffer &OB, unsigned C)
static bool isCustomType(std::string_view S)
static void outputHex(OutputBuffer &OB, unsigned C)
static std::pair< Qualifiers, PointerAffinity > demanglePointerCVQualifiers(std::string_view &MangledName)
static VariableSymbolNode * synthesizeVariable(ArenaAllocator &Arena, TypeNode *Type, std::string_view VariableName)
static unsigned decodeMultiByteChar(const uint8_t *StringBytes, unsigned CharIndex, unsigned CharBytes)
static void writeHexDigit(char *Buffer, uint8_t Digit)
static FunctionRefQualifier demangleFunctionRefQualifier(std::string_view &MangledName)
static bool isRebasedHexDigit(char C)
static NodeArrayNode * nodeListToNodeArray(ArenaAllocator &Arena, NodeList *Head, size_t Count)
static uint8_t rebasedHexDigitToNumber(char C)
static unsigned countTrailingNullBytes(const uint8_t *StringBytes, int Length)
static NamedIdentifierNode * synthesizeNamedIdentifier(ArenaAllocator &Arena, std::string_view Name)
static bool startsWith(std::string_view S, std::string_view PrefixA, std::string_view PrefixB, bool A)
static bool consumeFront(std::string_view &S, char C)
static bool isFunctionType(std::string_view S)
static bool isPointerType(std::string_view S)
static unsigned guessCharByteSize(const uint8_t *StringBytes, unsigned NumChars, uint64_t NumBytes)
static SpecialIntrinsicKind consumeSpecialIntrinsicKind(std::string_view &MangledName)
static bool isTagType(std::string_view S)
#define T
#define CH(x, y, z)
Definition SHA256.cpp:34
static int Lookup(ArrayRef< TableEntry > Table, unsigned Opcode)
Lightweight error class with error context and mandatory checking.
Definition Error.h:159
The instances of the Type class are immutable: once they are created, they are never changed.
Definition Type.h:45
T * alloc(Args &&... ConstructorArgs)
DEMANGLE_ABI SymbolNode * parse(std::string_view &MangledName)
#define INT64_MAX
Definition DataTypes.h:71
@ Tail
Attemps to make calls as fast as possible while guaranteeing that tail call optimization can always b...
Definition CallingConv.h:76
@ C
The default llvm calling convention, compatible with C.
Definition CallingConv.h:34
@ OB
OB - OneByte - Set if this instruction has a one byte opcode.
NodeAddr< NodeBase * > Node
Definition RDFGraph.h:381
SmallVector< Node, 4 > NodeList
Definition RDFGraph.h:550
This is an optimization pass for GlobalISel generic memory operations.
@ Offset
Definition DWP.cpp:532
@ Length
Definition DWP.cpp:532
FunctionAddr VTableAddr Value
Definition InstrProf.h:137
@ demangle_success
Definition Demangle.h:31
@ demangle_invalid_mangled_name
Definition Demangle.h:29
DEMANGLE_ABI std::optional< size_t > getArm64ECInsertionPointInMangledName(std::string_view MangledName)
FunctionAddr VTableAddr Count
Definition InstrProf.h:139
DEMANGLE_ABI char * microsoftDemangle(std::string_view mangled_name, size_t *n_read, int *status, MSDemangleFlags Flags=MSDF_None)
Demangles the Microsoft symbol pointed at by mangled_name and returns it.
MSDemangleFlags
Definition Demangle.h:40
@ MSDF_NoReturnType
Definition Demangle.h:45
@ MSDF_DumpBackrefs
Definition Demangle.h:42
@ MSDF_NoMemberType
Definition Demangle.h:46
@ MSDF_NoVariableType
Definition Demangle.h:47
@ MSDF_NoCallingConvention
Definition Demangle.h:44
@ MSDF_NoAccessSpecifier
Definition Demangle.h:43
FunctionAddr VTableAddr Next
Definition InstrProf.h:141
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition BitVector.h:869
#define N
NodeList * Next
NamedIdentifierNode * Names[Max]
void output(OutputBuffer &OB, OutputFlags Flags) const override