LLVM  16.0.0git
RustDemangle.cpp
Go to the documentation of this file.
1 //===--- RustDemangle.cpp ---------------------------------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file defines a demangler for Rust v0 mangled symbols as specified in
10 // https://rust-lang.github.io/rfcs/2603-rust-symbol-name-mangling-v0.html
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #include "llvm/Demangle/Demangle.h"
16 #include "llvm/Demangle/Utility.h"
17 
18 #include <algorithm>
19 #include <cassert>
20 #include <cstdint>
21 #include <cstring>
22 #include <limits>
23 
24 using namespace llvm;
25 
26 using llvm::itanium_demangle::OutputBuffer;
27 using llvm::itanium_demangle::ScopedOverride;
28 using llvm::itanium_demangle::StringView;
29 
30 namespace {
31 
32 struct Identifier {
33  StringView Name;
34  bool Punycode;
35 
36  bool empty() const { return Name.empty(); }
37 };
38 
39 enum class BasicType {
40  Bool,
41  Char,
42  I8,
43  I16,
44  I32,
45  I64,
46  I128,
47  ISize,
48  U8,
49  U16,
50  U32,
51  U64,
52  U128,
53  USize,
54  F32,
55  F64,
56  Str,
57  Placeholder,
58  Unit,
59  Variadic,
60  Never,
61 };
62 
63 enum class IsInType {
64  No,
65  Yes,
66 };
67 
68 enum class LeaveGenericsOpen {
69  No,
70  Yes,
71 };
72 
73 class Demangler {
74  // Maximum recursion level. Used to avoid stack overflow.
75  size_t MaxRecursionLevel;
76  // Current recursion level.
77  size_t RecursionLevel;
78  size_t BoundLifetimes;
79  // Input string that is being demangled with "_R" prefix removed.
80  StringView Input;
81  // Position in the input string.
82  size_t Position;
83  // When true, print methods append the output to the stream.
84  // When false, the output is suppressed.
85  bool Print;
86  // True if an error occurred.
87  bool Error;
88 
89 public:
90  // Demangled output.
91  OutputBuffer Output;
92 
93  Demangler(size_t MaxRecursionLevel = 500);
94 
95  bool demangle(StringView MangledName);
96 
97 private:
98  bool demanglePath(IsInType Type,
99  LeaveGenericsOpen LeaveOpen = LeaveGenericsOpen::No);
100  void demangleImplPath(IsInType InType);
101  void demangleGenericArg();
102  void demangleType();
103  void demangleFnSig();
104  void demangleDynBounds();
105  void demangleDynTrait();
106  void demangleOptionalBinder();
107  void demangleConst();
108  void demangleConstInt();
109  void demangleConstBool();
110  void demangleConstChar();
111 
112  template <typename Callable> void demangleBackref(Callable Demangler) {
113  uint64_t Backref = parseBase62Number();
114  if (Error || Backref >= Position) {
115  Error = true;
116  return;
117  }
118 
119  if (!Print)
120  return;
121 
122  ScopedOverride<size_t> SavePosition(Position, Position);
123  Position = Backref;
124  Demangler();
125  }
126 
127  Identifier parseIdentifier();
128  uint64_t parseOptionalBase62Number(char Tag);
129  uint64_t parseBase62Number();
130  uint64_t parseDecimalNumber();
131  uint64_t parseHexNumber(StringView &HexDigits);
132 
133  void print(char C);
134  void print(StringView S);
135  void printDecimalNumber(uint64_t N);
136  void printBasicType(BasicType);
137  void printLifetime(uint64_t Index);
138  void printIdentifier(Identifier Ident);
139 
140  char look() const;
141  char consume();
142  bool consumeIf(char Prefix);
143 
144  bool addAssign(uint64_t &A, uint64_t B);
145  bool mulAssign(uint64_t &A, uint64_t B);
146 };
147 
148 } // namespace
149 
150 char *llvm::rustDemangle(const char *MangledName) {
151  if (MangledName == nullptr)
152  return nullptr;
153 
154  // Return early if mangled name doesn't look like a Rust symbol.
155  StringView Mangled(MangledName);
156  if (!Mangled.startsWith("_R"))
157  return nullptr;
158 
159  Demangler D;
160  if (!D.demangle(Mangled)) {
161  std::free(D.Output.getBuffer());
162  return nullptr;
163  }
164 
165  D.Output += '\0';
166 
167  return D.Output.getBuffer();
168 }
169 
170 Demangler::Demangler(size_t MaxRecursionLevel)
171  : MaxRecursionLevel(MaxRecursionLevel) {}
172 
173 static inline bool isDigit(const char C) { return '0' <= C && C <= '9'; }
174 
175 static inline bool isHexDigit(const char C) {
176  return ('0' <= C && C <= '9') || ('a' <= C && C <= 'f');
177 }
178 
179 static inline bool isLower(const char C) { return 'a' <= C && C <= 'z'; }
180 
181 static inline bool isUpper(const char C) { return 'A' <= C && C <= 'Z'; }
182 
183 /// Returns true if C is a valid mangled character: <0-9a-zA-Z_>.
184 static inline bool isValid(const char C) {
185  return isDigit(C) || isLower(C) || isUpper(C) || C == '_';
186 }
187 
188 // Demangles Rust v0 mangled symbol. Returns true when successful, and false
189 // otherwise. The demangled symbol is stored in Output field. It is
190 // responsibility of the caller to free the memory behind the output stream.
191 //
192 // <symbol-name> = "_R" <path> [<instantiating-crate>]
193 bool Demangler::demangle(StringView Mangled) {
194  Position = 0;
195  Error = false;
196  Print = true;
197  RecursionLevel = 0;
198  BoundLifetimes = 0;
199 
200  if (!Mangled.consumeFront("_R")) {
201  Error = true;
202  return false;
203  }
204  size_t Dot = Mangled.find('.');
205  Input = Mangled.substr(0, Dot);
206  StringView Suffix = Mangled.dropFront(Dot);
207 
208  demanglePath(IsInType::No);
209 
210  if (Position != Input.size()) {
211  ScopedOverride<bool> SavePrint(Print, false);
212  demanglePath(IsInType::No);
213  }
214 
215  if (Position != Input.size())
216  Error = true;
217 
218  if (!Suffix.empty()) {
219  print(" (");
220  print(Suffix);
221  print(")");
222  }
223 
224  return !Error;
225 }
226 
227 // Demangles a path. InType indicates whether a path is inside a type. When
228 // LeaveOpen is true, a closing `>` after generic arguments is omitted from the
229 // output. Return value indicates whether generics arguments have been left
230 // open.
231 //
232 // <path> = "C" <identifier> // crate root
233 // | "M" <impl-path> <type> // <T> (inherent impl)
234 // | "X" <impl-path> <type> <path> // <T as Trait> (trait impl)
235 // | "Y" <type> <path> // <T as Trait> (trait definition)
236 // | "N" <ns> <path> <identifier> // ...::ident (nested path)
237 // | "I" <path> {<generic-arg>} "E" // ...<T, U> (generic args)
238 // | <backref>
239 // <identifier> = [<disambiguator>] <undisambiguated-identifier>
240 // <ns> = "C" // closure
241 // | "S" // shim
242 // | <A-Z> // other special namespaces
243 // | <a-z> // internal namespaces
244 bool Demangler::demanglePath(IsInType InType, LeaveGenericsOpen LeaveOpen) {
245  if (Error || RecursionLevel >= MaxRecursionLevel) {
246  Error = true;
247  return false;
248  }
249  ScopedOverride<size_t> SaveRecursionLevel(RecursionLevel, RecursionLevel + 1);
250 
251  switch (consume()) {
252  case 'C': {
253  parseOptionalBase62Number('s');
254  printIdentifier(parseIdentifier());
255  break;
256  }
257  case 'M': {
258  demangleImplPath(InType);
259  print("<");
260  demangleType();
261  print(">");
262  break;
263  }
264  case 'X': {
265  demangleImplPath(InType);
266  print("<");
267  demangleType();
268  print(" as ");
269  demanglePath(IsInType::Yes);
270  print(">");
271  break;
272  }
273  case 'Y': {
274  print("<");
275  demangleType();
276  print(" as ");
277  demanglePath(IsInType::Yes);
278  print(">");
279  break;
280  }
281  case 'N': {
282  char NS = consume();
283  if (!isLower(NS) && !isUpper(NS)) {
284  Error = true;
285  break;
286  }
287  demanglePath(InType);
288 
289  uint64_t Disambiguator = parseOptionalBase62Number('s');
290  Identifier Ident = parseIdentifier();
291 
292  if (isUpper(NS)) {
293  // Special namespaces
294  print("::{");
295  if (NS == 'C')
296  print("closure");
297  else if (NS == 'S')
298  print("shim");
299  else
300  print(NS);
301  if (!Ident.empty()) {
302  print(":");
303  printIdentifier(Ident);
304  }
305  print('#');
306  printDecimalNumber(Disambiguator);
307  print('}');
308  } else {
309  // Implementation internal namespaces.
310  if (!Ident.empty()) {
311  print("::");
312  printIdentifier(Ident);
313  }
314  }
315  break;
316  }
317  case 'I': {
318  demanglePath(InType);
319  // Omit "::" when in a type, where it is optional.
320  if (InType == IsInType::No)
321  print("::");
322  print("<");
323  for (size_t I = 0; !Error && !consumeIf('E'); ++I) {
324  if (I > 0)
325  print(", ");
326  demangleGenericArg();
327  }
328  if (LeaveOpen == LeaveGenericsOpen::Yes)
329  return true;
330  else
331  print(">");
332  break;
333  }
334  case 'B': {
335  bool IsOpen = false;
336  demangleBackref([&] { IsOpen = demanglePath(InType, LeaveOpen); });
337  return IsOpen;
338  }
339  default:
340  Error = true;
341  break;
342  }
343 
344  return false;
345 }
346 
347 // <impl-path> = [<disambiguator>] <path>
348 // <disambiguator> = "s" <base-62-number>
349 void Demangler::demangleImplPath(IsInType InType) {
350  ScopedOverride<bool> SavePrint(Print, false);
351  parseOptionalBase62Number('s');
352  demanglePath(InType);
353 }
354 
355 // <generic-arg> = <lifetime>
356 // | <type>
357 // | "K" <const>
358 // <lifetime> = "L" <base-62-number>
359 void Demangler::demangleGenericArg() {
360  if (consumeIf('L'))
361  printLifetime(parseBase62Number());
362  else if (consumeIf('K'))
363  demangleConst();
364  else
365  demangleType();
366 }
367 
368 // <basic-type> = "a" // i8
369 // | "b" // bool
370 // | "c" // char
371 // | "d" // f64
372 // | "e" // str
373 // | "f" // f32
374 // | "h" // u8
375 // | "i" // isize
376 // | "j" // usize
377 // | "l" // i32
378 // | "m" // u32
379 // | "n" // i128
380 // | "o" // u128
381 // | "s" // i16
382 // | "t" // u16
383 // | "u" // ()
384 // | "v" // ...
385 // | "x" // i64
386 // | "y" // u64
387 // | "z" // !
388 // | "p" // placeholder (e.g. for generic params), shown as _
389 static bool parseBasicType(char C, BasicType &Type) {
390  switch (C) {
391  case 'a':
392  Type = BasicType::I8;
393  return true;
394  case 'b':
396  return true;
397  case 'c':
398  Type = BasicType::Char;
399  return true;
400  case 'd':
401  Type = BasicType::F64;
402  return true;
403  case 'e':
404  Type = BasicType::Str;
405  return true;
406  case 'f':
407  Type = BasicType::F32;
408  return true;
409  case 'h':
410  Type = BasicType::U8;
411  return true;
412  case 'i':
413  Type = BasicType::ISize;
414  return true;
415  case 'j':
416  Type = BasicType::USize;
417  return true;
418  case 'l':
419  Type = BasicType::I32;
420  return true;
421  case 'm':
422  Type = BasicType::U32;
423  return true;
424  case 'n':
425  Type = BasicType::I128;
426  return true;
427  case 'o':
428  Type = BasicType::U128;
429  return true;
430  case 'p':
431  Type = BasicType::Placeholder;
432  return true;
433  case 's':
434  Type = BasicType::I16;
435  return true;
436  case 't':
437  Type = BasicType::U16;
438  return true;
439  case 'u':
440  Type = BasicType::Unit;
441  return true;
442  case 'v':
444  return true;
445  case 'x':
446  Type = BasicType::I64;
447  return true;
448  case 'y':
450  return true;
451  case 'z':
452  Type = BasicType::Never;
453  return true;
454  default:
455  return false;
456  }
457 }
458 
459 void Demangler::printBasicType(BasicType Type) {
460  switch (Type) {
461  case BasicType::Bool:
462  print("bool");
463  break;
464  case BasicType::Char:
465  print("char");
466  break;
467  case BasicType::I8:
468  print("i8");
469  break;
470  case BasicType::I16:
471  print("i16");
472  break;
473  case BasicType::I32:
474  print("i32");
475  break;
476  case BasicType::I64:
477  print("i64");
478  break;
479  case BasicType::I128:
480  print("i128");
481  break;
482  case BasicType::ISize:
483  print("isize");
484  break;
485  case BasicType::U8:
486  print("u8");
487  break;
488  case BasicType::U16:
489  print("u16");
490  break;
491  case BasicType::U32:
492  print("u32");
493  break;
494  case BasicType::U64:
495  print("u64");
496  break;
497  case BasicType::U128:
498  print("u128");
499  break;
500  case BasicType::USize:
501  print("usize");
502  break;
503  case BasicType::F32:
504  print("f32");
505  break;
506  case BasicType::F64:
507  print("f64");
508  break;
509  case BasicType::Str:
510  print("str");
511  break;
512  case BasicType::Placeholder:
513  print("_");
514  break;
515  case BasicType::Unit:
516  print("()");
517  break;
518  case BasicType::Variadic:
519  print("...");
520  break;
521  case BasicType::Never:
522  print("!");
523  break;
524  }
525 }
526 
527 // <type> = | <basic-type>
528 // | <path> // named type
529 // | "A" <type> <const> // [T; N]
530 // | "S" <type> // [T]
531 // | "T" {<type>} "E" // (T1, T2, T3, ...)
532 // | "R" [<lifetime>] <type> // &T
533 // | "Q" [<lifetime>] <type> // &mut T
534 // | "P" <type> // *const T
535 // | "O" <type> // *mut T
536 // | "F" <fn-sig> // fn(...) -> ...
537 // | "D" <dyn-bounds> <lifetime> // dyn Trait<Assoc = X> + Send + 'a
538 // | <backref> // backref
539 void Demangler::demangleType() {
540  if (Error || RecursionLevel >= MaxRecursionLevel) {
541  Error = true;
542  return;
543  }
544  ScopedOverride<size_t> SaveRecursionLevel(RecursionLevel, RecursionLevel + 1);
545 
546  size_t Start = Position;
547  char C = consume();
548  BasicType Type;
549  if (parseBasicType(C, Type))
550  return printBasicType(Type);
551 
552  switch (C) {
553  case 'A':
554  print("[");
555  demangleType();
556  print("; ");
557  demangleConst();
558  print("]");
559  break;
560  case 'S':
561  print("[");
562  demangleType();
563  print("]");
564  break;
565  case 'T': {
566  print("(");
567  size_t I = 0;
568  for (; !Error && !consumeIf('E'); ++I) {
569  if (I > 0)
570  print(", ");
571  demangleType();
572  }
573  if (I == 1)
574  print(",");
575  print(")");
576  break;
577  }
578  case 'R':
579  case 'Q':
580  print('&');
581  if (consumeIf('L')) {
582  if (auto Lifetime = parseBase62Number()) {
583  printLifetime(Lifetime);
584  print(' ');
585  }
586  }
587  if (C == 'Q')
588  print("mut ");
589  demangleType();
590  break;
591  case 'P':
592  print("*const ");
593  demangleType();
594  break;
595  case 'O':
596  print("*mut ");
597  demangleType();
598  break;
599  case 'F':
600  demangleFnSig();
601  break;
602  case 'D':
603  demangleDynBounds();
604  if (consumeIf('L')) {
605  if (auto Lifetime = parseBase62Number()) {
606  print(" + ");
607  printLifetime(Lifetime);
608  }
609  } else {
610  Error = true;
611  }
612  break;
613  case 'B':
614  demangleBackref([&] { demangleType(); });
615  break;
616  default:
617  Position = Start;
618  demanglePath(IsInType::Yes);
619  break;
620  }
621 }
622 
623 // <fn-sig> := [<binder>] ["U"] ["K" <abi>] {<type>} "E" <type>
624 // <abi> = "C"
625 // | <undisambiguated-identifier>
626 void Demangler::demangleFnSig() {
627  ScopedOverride<size_t> SaveBoundLifetimes(BoundLifetimes, BoundLifetimes);
628  demangleOptionalBinder();
629 
630  if (consumeIf('U'))
631  print("unsafe ");
632 
633  if (consumeIf('K')) {
634  print("extern \"");
635  if (consumeIf('C')) {
636  print("C");
637  } else {
638  Identifier Ident = parseIdentifier();
639  if (Ident.Punycode)
640  Error = true;
641  for (char C : Ident.Name) {
642  // When mangling ABI string, the "-" is replaced with "_".
643  if (C == '_')
644  C = '-';
645  print(C);
646  }
647  }
648  print("\" ");
649  }
650 
651  print("fn(");
652  for (size_t I = 0; !Error && !consumeIf('E'); ++I) {
653  if (I > 0)
654  print(", ");
655  demangleType();
656  }
657  print(")");
658 
659  if (consumeIf('u')) {
660  // Skip the unit type from the output.
661  } else {
662  print(" -> ");
663  demangleType();
664  }
665 }
666 
667 // <dyn-bounds> = [<binder>] {<dyn-trait>} "E"
668 void Demangler::demangleDynBounds() {
669  ScopedOverride<size_t> SaveBoundLifetimes(BoundLifetimes, BoundLifetimes);
670  print("dyn ");
671  demangleOptionalBinder();
672  for (size_t I = 0; !Error && !consumeIf('E'); ++I) {
673  if (I > 0)
674  print(" + ");
675  demangleDynTrait();
676  }
677 }
678 
679 // <dyn-trait> = <path> {<dyn-trait-assoc-binding>}
680 // <dyn-trait-assoc-binding> = "p" <undisambiguated-identifier> <type>
681 void Demangler::demangleDynTrait() {
682  bool IsOpen = demanglePath(IsInType::Yes, LeaveGenericsOpen::Yes);
683  while (!Error && consumeIf('p')) {
684  if (!IsOpen) {
685  IsOpen = true;
686  print('<');
687  } else {
688  print(", ");
689  }
690  print(parseIdentifier().Name);
691  print(" = ");
692  demangleType();
693  }
694  if (IsOpen)
695  print(">");
696 }
697 
698 // Demangles optional binder and updates the number of bound lifetimes.
699 //
700 // <binder> = "G" <base-62-number>
701 void Demangler::demangleOptionalBinder() {
702  uint64_t Binder = parseOptionalBase62Number('G');
703  if (Error || Binder == 0)
704  return;
705 
706  // In valid inputs each bound lifetime is referenced later. Referencing a
707  // lifetime requires at least one byte of input. Reject inputs that are too
708  // short to reference all bound lifetimes. Otherwise demangling of invalid
709  // binders could generate excessive amounts of output.
710  if (Binder >= Input.size() - BoundLifetimes) {
711  Error = true;
712  return;
713  }
714 
715  print("for<");
716  for (size_t I = 0; I != Binder; ++I) {
717  BoundLifetimes += 1;
718  if (I > 0)
719  print(", ");
720  printLifetime(1);
721  }
722  print("> ");
723 }
724 
725 // <const> = <basic-type> <const-data>
726 // | "p" // placeholder
727 // | <backref>
728 void Demangler::demangleConst() {
729  if (Error || RecursionLevel >= MaxRecursionLevel) {
730  Error = true;
731  return;
732  }
733  ScopedOverride<size_t> SaveRecursionLevel(RecursionLevel, RecursionLevel + 1);
734 
735  char C = consume();
736  BasicType Type;
737  if (parseBasicType(C, Type)) {
738  switch (Type) {
739  case BasicType::I8:
740  case BasicType::I16:
741  case BasicType::I32:
742  case BasicType::I64:
743  case BasicType::I128:
744  case BasicType::ISize:
745  case BasicType::U8:
746  case BasicType::U16:
747  case BasicType::U32:
748  case BasicType::U64:
749  case BasicType::U128:
750  case BasicType::USize:
751  demangleConstInt();
752  break;
753  case BasicType::Bool:
754  demangleConstBool();
755  break;
756  case BasicType::Char:
757  demangleConstChar();
758  break;
759  case BasicType::Placeholder:
760  print('_');
761  break;
762  default:
763  Error = true;
764  break;
765  }
766  } else if (C == 'B') {
767  demangleBackref([&] { demangleConst(); });
768  } else {
769  Error = true;
770  }
771 }
772 
773 // <const-data> = ["n"] <hex-number>
774 void Demangler::demangleConstInt() {
775  if (consumeIf('n'))
776  print('-');
777 
778  StringView HexDigits;
779  uint64_t Value = parseHexNumber(HexDigits);
780  if (HexDigits.size() <= 16) {
781  printDecimalNumber(Value);
782  } else {
783  print("0x");
784  print(HexDigits);
785  }
786 }
787 
788 // <const-data> = "0_" // false
789 // | "1_" // true
790 void Demangler::demangleConstBool() {
791  StringView HexDigits;
792  parseHexNumber(HexDigits);
793  if (HexDigits == "0")
794  print("false");
795  else if (HexDigits == "1")
796  print("true");
797  else
798  Error = true;
799 }
800 
801 /// Returns true if CodePoint represents a printable ASCII character.
802 static bool isAsciiPrintable(uint64_t CodePoint) {
803  return 0x20 <= CodePoint && CodePoint <= 0x7e;
804 }
805 
806 // <const-data> = <hex-number>
807 void Demangler::demangleConstChar() {
808  StringView HexDigits;
809  uint64_t CodePoint = parseHexNumber(HexDigits);
810  if (Error || HexDigits.size() > 6) {
811  Error = true;
812  return;
813  }
814 
815  print("'");
816  switch (CodePoint) {
817  case '\t':
818  print(R"(\t)");
819  break;
820  case '\r':
821  print(R"(\r)");
822  break;
823  case '\n':
824  print(R"(\n)");
825  break;
826  case '\\':
827  print(R"(\\)");
828  break;
829  case '"':
830  print(R"(")");
831  break;
832  case '\'':
833  print(R"(\')");
834  break;
835  default:
836  if (isAsciiPrintable(CodePoint)) {
837  char C = CodePoint;
838  print(C);
839  } else {
840  print(R"(\u{)");
841  print(HexDigits);
842  print('}');
843  }
844  break;
845  }
846  print('\'');
847 }
848 
849 // <undisambiguated-identifier> = ["u"] <decimal-number> ["_"] <bytes>
850 Identifier Demangler::parseIdentifier() {
851  bool Punycode = consumeIf('u');
852  uint64_t Bytes = parseDecimalNumber();
853 
854  // Underscore resolves the ambiguity when identifier starts with a decimal
855  // digit or another underscore.
856  consumeIf('_');
857 
858  if (Error || Bytes > Input.size() - Position) {
859  Error = true;
860  return {};
861  }
862  StringView S = Input.substr(Position, Bytes);
863  Position += Bytes;
864 
865  if (!std::all_of(S.begin(), S.end(), isValid)) {
866  Error = true;
867  return {};
868  }
869 
870  return {S, Punycode};
871 }
872 
873 // Parses optional base 62 number. The presence of a number is determined using
874 // Tag. Returns 0 when tag is absent and parsed value + 1 otherwise
875 //
876 // This function is indended for parsing disambiguators and binders which when
877 // not present have their value interpreted as 0, and otherwise as decoded
878 // value + 1. For example for binders, value for "G_" is 1, for "G0_" value is
879 // 2. When "G" is absent value is 0.
880 uint64_t Demangler::parseOptionalBase62Number(char Tag) {
881  if (!consumeIf(Tag))
882  return 0;
883 
884  uint64_t N = parseBase62Number();
885  if (Error || !addAssign(N, 1))
886  return 0;
887 
888  return N;
889 }
890 
891 // Parses base 62 number with <0-9a-zA-Z> as digits. Number is terminated by
892 // "_". All values are offset by 1, so that "_" encodes 0, "0_" encodes 1,
893 // "1_" encodes 2, etc.
894 //
895 // <base-62-number> = {<0-9a-zA-Z>} "_"
896 uint64_t Demangler::parseBase62Number() {
897  if (consumeIf('_'))
898  return 0;
899 
900  uint64_t Value = 0;
901 
902  while (true) {
903  uint64_t Digit;
904  char C = consume();
905 
906  if (C == '_') {
907  break;
908  } else if (isDigit(C)) {
909  Digit = C - '0';
910  } else if (isLower(C)) {
911  Digit = 10 + (C - 'a');
912  } else if (isUpper(C)) {
913  Digit = 10 + 26 + (C - 'A');
914  } else {
915  Error = true;
916  return 0;
917  }
918 
919  if (!mulAssign(Value, 62))
920  return 0;
921 
922  if (!addAssign(Value, Digit))
923  return 0;
924  }
925 
926  if (!addAssign(Value, 1))
927  return 0;
928 
929  return Value;
930 }
931 
932 // Parses a decimal number that had been encoded without any leading zeros.
933 //
934 // <decimal-number> = "0"
935 // | <1-9> {<0-9>}
936 uint64_t Demangler::parseDecimalNumber() {
937  char C = look();
938  if (!isDigit(C)) {
939  Error = true;
940  return 0;
941  }
942 
943  if (C == '0') {
944  consume();
945  return 0;
946  }
947 
948  uint64_t Value = 0;
949 
950  while (isDigit(look())) {
951  if (!mulAssign(Value, 10)) {
952  Error = true;
953  return 0;
954  }
955 
956  uint64_t D = consume() - '0';
957  if (!addAssign(Value, D))
958  return 0;
959  }
960 
961  return Value;
962 }
963 
964 // Parses a hexadecimal number with <0-9a-f> as a digits. Returns the parsed
965 // value and stores hex digits in HexDigits. The return value is unspecified if
966 // HexDigits.size() > 16.
967 //
968 // <hex-number> = "0_"
969 // | <1-9a-f> {<0-9a-f>} "_"
970 uint64_t Demangler::parseHexNumber(StringView &HexDigits) {
971  size_t Start = Position;
972  uint64_t Value = 0;
973 
974  if (!isHexDigit(look()))
975  Error = true;
976 
977  if (consumeIf('0')) {
978  if (!consumeIf('_'))
979  Error = true;
980  } else {
981  while (!Error && !consumeIf('_')) {
982  char C = consume();
983  Value *= 16;
984  if (isDigit(C))
985  Value += C - '0';
986  else if ('a' <= C && C <= 'f')
987  Value += 10 + (C - 'a');
988  else
989  Error = true;
990  }
991  }
992 
993  if (Error) {
994  HexDigits = StringView();
995  return 0;
996  }
997 
998  size_t End = Position - 1;
999  assert(Start < End);
1000  HexDigits = Input.substr(Start, End - Start);
1001  return Value;
1002 }
1003 
1004 void Demangler::print(char C) {
1005  if (Error || !Print)
1006  return;
1007 
1008  Output += C;
1009 }
1010 
1011 void Demangler::print(StringView S) {
1012  if (Error || !Print)
1013  return;
1014 
1015  Output += S;
1016 }
1017 
1018 void Demangler::printDecimalNumber(uint64_t N) {
1019  if (Error || !Print)
1020  return;
1021 
1022  Output << N;
1023 }
1024 
1025 // Prints a lifetime. An index 0 always represents an erased lifetime. Indices
1026 // starting from 1, are De Bruijn indices, referring to higher-ranked lifetimes
1027 // bound by one of the enclosing binders.
1028 void Demangler::printLifetime(uint64_t Index) {
1029  if (Index == 0) {
1030  print("'_");
1031  return;
1032  }
1033 
1034  if (Index - 1 >= BoundLifetimes) {
1035  Error = true;
1036  return;
1037  }
1038 
1039  uint64_t Depth = BoundLifetimes - Index;
1040  print('\'');
1041  if (Depth < 26) {
1042  char C = 'a' + Depth;
1043  print(C);
1044  } else {
1045  print('z');
1046  printDecimalNumber(Depth - 26 + 1);
1047  }
1048 }
1049 
1050 static inline bool decodePunycodeDigit(char C, size_t &Value) {
1051  if (isLower(C)) {
1052  Value = C - 'a';
1053  return true;
1054  }
1055 
1056  if (isDigit(C)) {
1057  Value = 26 + (C - '0');
1058  return true;
1059  }
1060 
1061  return false;
1062 }
1063 
1064 static void removeNullBytes(OutputBuffer &Output, size_t StartIdx) {
1065  char *Buffer = Output.getBuffer();
1066  char *Start = Buffer + StartIdx;
1067  char *End = Buffer + Output.getCurrentPosition();
1068  Output.setCurrentPosition(std::remove(Start, End, '\0') - Buffer);
1069 }
1070 
1071 // Encodes code point as UTF-8 and stores results in Output. Returns false if
1072 // CodePoint is not a valid unicode scalar value.
1073 static inline bool encodeUTF8(size_t CodePoint, char *Output) {
1074  if (0xD800 <= CodePoint && CodePoint <= 0xDFFF)
1075  return false;
1076 
1077  if (CodePoint <= 0x7F) {
1078  Output[0] = CodePoint;
1079  return true;
1080  }
1081 
1082  if (CodePoint <= 0x7FF) {
1083  Output[0] = 0xC0 | ((CodePoint >> 6) & 0x3F);
1084  Output[1] = 0x80 | (CodePoint & 0x3F);
1085  return true;
1086  }
1087 
1088  if (CodePoint <= 0xFFFF) {
1089  Output[0] = 0xE0 | (CodePoint >> 12);
1090  Output[1] = 0x80 | ((CodePoint >> 6) & 0x3F);
1091  Output[2] = 0x80 | (CodePoint & 0x3F);
1092  return true;
1093  }
1094 
1095  if (CodePoint <= 0x10FFFF) {
1096  Output[0] = 0xF0 | (CodePoint >> 18);
1097  Output[1] = 0x80 | ((CodePoint >> 12) & 0x3F);
1098  Output[2] = 0x80 | ((CodePoint >> 6) & 0x3F);
1099  Output[3] = 0x80 | (CodePoint & 0x3F);
1100  return true;
1101  }
1102 
1103  return false;
1104 }
1105 
1106 // Decodes string encoded using punycode and appends results to Output.
1107 // Returns true if decoding was successful.
1108 static bool decodePunycode(StringView Input, OutputBuffer &Output) {
1109  size_t OutputSize = Output.getCurrentPosition();
1110  size_t InputIdx = 0;
1111 
1112  // Rust uses an underscore as a delimiter.
1113  size_t DelimiterPos = StringView::npos;
1114  for (size_t I = 0; I != Input.size(); ++I)
1115  if (Input[I] == '_')
1116  DelimiterPos = I;
1117 
1118  if (DelimiterPos != StringView::npos) {
1119  // Copy basic code points before the last delimiter to the output.
1120  for (; InputIdx != DelimiterPos; ++InputIdx) {
1121  char C = Input[InputIdx];
1122  if (!isValid(C))
1123  return false;
1124  // Code points are padded with zeros while decoding is in progress.
1125  char UTF8[4] = {C};
1126  Output += StringView(UTF8, UTF8 + 4);
1127  }
1128  // Skip over the delimiter.
1129  ++InputIdx;
1130  }
1131 
1132  size_t Base = 36;
1133  size_t Skew = 38;
1134  size_t Bias = 72;
1135  size_t N = 0x80;
1136  size_t TMin = 1;
1137  size_t TMax = 26;
1138  size_t Damp = 700;
1139 
1140  auto Adapt = [&](size_t Delta, size_t NumPoints) {
1141  Delta /= Damp;
1142  Delta += Delta / NumPoints;
1143  Damp = 2;
1144 
1145  size_t K = 0;
1146  while (Delta > (Base - TMin) * TMax / 2) {
1147  Delta /= Base - TMin;
1148  K += Base;
1149  }
1150  return K + (((Base - TMin + 1) * Delta) / (Delta + Skew));
1151  };
1152 
1153  // Main decoding loop.
1154  for (size_t I = 0; InputIdx != Input.size(); I += 1) {
1155  size_t OldI = I;
1156  size_t W = 1;
1158  for (size_t K = Base; true; K += Base) {
1159  if (InputIdx == Input.size())
1160  return false;
1161  char C = Input[InputIdx++];
1162  size_t Digit = 0;
1163  if (!decodePunycodeDigit(C, Digit))
1164  return false;
1165 
1166  if (Digit > (Max - I) / W)
1167  return false;
1168  I += Digit * W;
1169 
1170  size_t T;
1171  if (K <= Bias)
1172  T = TMin;
1173  else if (K >= Bias + TMax)
1174  T = TMax;
1175  else
1176  T = K - Bias;
1177 
1178  if (Digit < T)
1179  break;
1180 
1181  if (W > Max / (Base - T))
1182  return false;
1183  W *= (Base - T);
1184  }
1185  size_t NumPoints = (Output.getCurrentPosition() - OutputSize) / 4 + 1;
1186  Bias = Adapt(I - OldI, NumPoints);
1187 
1188  if (I / NumPoints > Max - N)
1189  return false;
1190  N += I / NumPoints;
1191  I = I % NumPoints;
1192 
1193  // Insert N at position I in the output.
1194  char UTF8[4] = {};
1195  if (!encodeUTF8(N, UTF8))
1196  return false;
1197  Output.insert(OutputSize + I * 4, UTF8, 4);
1198  }
1199 
1200  removeNullBytes(Output, OutputSize);
1201  return true;
1202 }
1203 
1204 void Demangler::printIdentifier(Identifier Ident) {
1205  if (Error || !Print)
1206  return;
1207 
1208  if (Ident.Punycode) {
1209  if (!decodePunycode(Ident.Name, Output))
1210  Error = true;
1211  } else {
1212  print(Ident.Name);
1213  }
1214 }
1215 
1216 char Demangler::look() const {
1217  if (Error || Position >= Input.size())
1218  return 0;
1219 
1220  return Input[Position];
1221 }
1222 
1223 char Demangler::consume() {
1224  if (Error || Position >= Input.size()) {
1225  Error = true;
1226  return 0;
1227  }
1228 
1229  return Input[Position++];
1230 }
1231 
1232 bool Demangler::consumeIf(char Prefix) {
1233  if (Error || Position >= Input.size() || Input[Position] != Prefix)
1234  return false;
1235 
1236  Position += 1;
1237  return true;
1238 }
1239 
1240 /// Computes A + B. When computation wraps around sets the error and returns
1241 /// false. Otherwise assigns the result to A and returns true.
1242 bool Demangler::addAssign(uint64_t &A, uint64_t B) {
1244  Error = true;
1245  return false;
1246  }
1247 
1248  A += B;
1249  return true;
1250 }
1251 
1252 /// Computes A * B. When computation wraps around sets the error and returns
1253 /// false. Otherwise assigns the result to A and returns true.
1254 bool Demangler::mulAssign(uint64_t &A, uint64_t B) {
1255  if (B != 0 && A > std::numeric_limits<uint64_t>::max() / B) {
1256  Error = true;
1257  return false;
1258  }
1259 
1260  A *= B;
1261  return true;
1262 }
llvm
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
llvm::wasm::ValType::I32
@ I32
llvm::lltok::Error
@ Error
Definition: LLToken.h:21
llvm::AMDGPU::HSAMD::ValueType::U16
@ U16
print
static void print(raw_ostream &Out, object::Archive::Kind Kind, T Val)
Definition: ArchiveWriter.cpp:189
llvm::pdb::PDB_BuiltinType::Char
@ Char
llvm::cl::Prefix
@ Prefix
Definition: CommandLine.h:161
T
llvm::RISCVFenceField::W
@ W
Definition: RISCVBaseInfo.h:266
U64
instcombine should handle this C2 when and C2 are unsigned Similarly for udiv and signed operands Currently InstCombine avoids this transform but will do it when the signs of the operands and the sign of the divide match See the FIXME in InstructionCombining cpp in the visitSetCondInst method after the switch case for Instruction::UDiv(around line 4447) for more details. The SingleSource/Benchmarks/Shootout-C++/hash and hash2 tests have examples of this const ruct.[LOOP OPTIMIZATION] SingleSource/Benchmarks/Misc/dt.c shows several interesting optimization opportunities in its double_array_divs_variable function typedef unsigned long long U64
Definition: README.txt:268
llvm::AMDGPU::HSAMD::ValueType::I8
@ I8
llvm::AMDGPU::HSAMD::ValueType::U32
@ U32
llvm::Depth
@ Depth
Definition: SIMachineScheduler.h:36
llvm::Type
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
Utility.h
llvm::rustDemangle
char * rustDemangle(const char *MangledName)
Definition: RustDemangle.cpp:150
T
#define T
Definition: Mips16ISelLowering.cpp:341
llvm::max
Expected< ExpressionValue > max(const ExpressionValue &Lhs, const ExpressionValue &Rhs)
Definition: FileCheck.cpp:337
llvm::dwarf::Tag
Tag
Definition: Dwarf.h:105
Demangler
itanium_demangle::ManglingParser< DefaultAllocator > Demangler
Definition: ItaniumDemangle.cpp:366
encodeUTF8
static void encodeUTF8(uint32_t UnicodeScalarValue, SmallVectorImpl< char > &Result)
encodeUTF8 - Encode UnicodeScalarValue in UTF-8 and append it to result.
Definition: YAMLParser.cpp:573
llvm::SwiftAsyncFramePointerMode::Never
@ Never
Never set the bit.
llvm::all_of
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1734
llvm::InlinerFunctionImportStatsOpts::No
@ No
llvm::wasm::ValType::F64
@ F64
parseBasicType
static bool parseBasicType(char C, BasicType &Type)
Definition: RustDemangle.cpp:389
C
(vector float) vec_cmpeq(*A, *B) C
Definition: README_ALTIVEC.txt:86
llvm::wasm::ValType::F32
@ F32
llvm::dwarf::Index
Index
Definition: Dwarf.h:472
B
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
StringView::npos
static const size_t npos
Definition: StringView.h:30
isLower
static bool isLower(const char C)
Definition: RustDemangle.cpp:179
llvm::demangle
std::string demangle(const std::string &MangledName)
Attempt to demangle a string using different demangling schemes.
Definition: Demangle.cpp:29
StringView.h
Index
uint32_t Index
Definition: ELFObjHandler.cpp:83
uint64_t
D
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")
isAsciiPrintable
static bool isAsciiPrintable(uint64_t CodePoint)
Returns true if CodePoint represents a printable ASCII character.
Definition: RustDemangle.cpp:802
llvm::wasm::ValType::I64
@ I64
I
#define I(x, y, z)
Definition: MD5.cpp:58
assert
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
llvm::AMDGPU::HSAMD::ValueType::I16
@ I16
llvm::codeview::consume
Error consume(BinaryStreamReader &Reader)
Definition: RecordSerialization.h:45
llvm::print
Printable print(const GCNRegPressure &RP, const GCNSubtarget *ST=nullptr)
Definition: GCNRegPressure.cpp:138
llvm::sys::fs::remove
std::error_code remove(const Twine &path, bool IgnoreNonExisting=true)
Remove path.
isDigit
static bool isDigit(const char C)
Definition: RustDemangle.cpp:173
Demangle.h
A
* A
Definition: README_ALTIVEC.txt:89
llvm::rdf::Print
Print(const T &, const DataFlowGraph &) -> Print< T >
S
add sub stmia L5 ldr r0 bl L_printf $stub Instead of a and a wouldn t it be better to do three moves *Return an aggregate type is even return S
Definition: README.txt:210
llvm::AMDGPU::HSAMD::ValueType::U8
@ U8
llvm::pdb::Bool
@ Bool
Definition: PDBTypes.h:407
llvm::MCID::Variadic
@ Variadic
Definition: MCInstrDesc.h:149
llvm::GraphProgram::Name
Name
Definition: GraphWriter.h:50
Bool
@ Bool
Definition: TargetLibraryInfo.cpp:45
llvm::Error
Lightweight error class with error context and mandatory checking.
Definition: Error.h:155
llvm::TargetStackID::Value
Value
Definition: TargetFrameLowering.h:27
llvm::object::Identifier
@ Identifier
Definition: COFFModuleDefinition.cpp:34
llvm::PrevailingType::Yes
@ Yes
isValid
static bool isValid(const char C)
Returns true if C is a valid mangled character: <0-9a-zA-Z_>.
Definition: RustDemangle.cpp:184
llvm::pdb::DbgHeaderType::Max
@ Max
N
#define N
isHexDigit
static bool isHexDigit(const char C)
Definition: RustDemangle.cpp:175
llvm::msgpack::Type
Type
MessagePack types as defined in the standard, with the exception of Integer being divided into a sign...
Definition: MsgPackReader.h:48
consume
static bool consume(InternalInstruction *insn, T &ptr)
Definition: X86Disassembler.cpp:192
isUpper
static bool isUpper(const char C)
Definition: RustDemangle.cpp:181
llvm::Value
LLVM Value Representation.
Definition: Value.h:74
llvm::UTF8
unsigned char UTF8
Definition: ConvertUTF.h:130
llvm::sampleprof::Base
@ Base
Definition: Discriminator.h:58