49 if (Input.
size() == 0)
52 switch (uint8_t(Input[0])) {
54 if (Input.
size() >= 4) {
56 && uint8_t(Input[2]) == 0xFE
57 && uint8_t(Input[3]) == 0xFF)
59 if (Input[1] == 0 && Input[2] == 0 && Input[3] != 0)
63 if (Input.
size() >= 2 && Input[1] != 0)
67 if ( Input.
size() >= 4
68 && uint8_t(Input[1]) == 0xFE
73 if (Input.
size() >= 2 && uint8_t(Input[1]) == 0xFE)
77 if (Input.
size() >= 2 && uint8_t(Input[1]) == 0xFF)
81 if ( Input.
size() >= 3
82 && uint8_t(Input[1]) == 0xBB
83 && uint8_t(Input[2]) == 0xBF)
89 if (Input.
size() >= 4 && Input[1] == 0 && Input[2] == 0 && Input[3] == 0)
92 if (Input.
size() >= 2 && Input[1] == 0)
101 void Node::anchor() {}
102 void NullNode::anchor() {}
103 void ScalarNode::anchor() {}
104 void BlockScalarNode::anchor() {}
105 void KeyValueNode::anchor() {}
106 void MappingNode::anchor() {}
107 void SequenceNode::anchor() {}
108 void AliasNode::anchor() {}
163 mutable Token Sentinel;
169 return new (Alloc.Allocate<
Token>())
Token(V);
203 return Tok == Other.Tok;
218 if ((*Position & 0x80) == 0) {
219 return std::make_pair(*Position, 1);
223 if (Position + 1 != End &&
224 ((*Position & 0xE0) == 0xC0) &&
225 ((*(Position + 1) & 0xC0) == 0x80)) {
226 uint32_t codepoint = ((*Position & 0x1F) << 6) |
227 (*(Position + 1) & 0x3F);
228 if (codepoint >= 0x80)
229 return std::make_pair(codepoint, 2);
233 if (Position + 2 != End &&
234 ((*Position & 0xF0) == 0xE0) &&
235 ((*(Position + 1) & 0xC0) == 0x80) &&
236 ((*(Position + 2) & 0xC0) == 0x80)) {
237 uint32_t codepoint = ((*Position & 0x0F) << 12) |
238 ((*(Position + 1) & 0x3F) << 6) |
239 (*(Position + 2) & 0x3F);
242 if (codepoint >= 0x800 &&
243 (codepoint < 0xD800 || codepoint > 0xDFFF))
244 return std::make_pair(codepoint, 3);
248 if (Position + 3 != End &&
249 ((*Position & 0xF8) == 0xF0) &&
250 ((*(Position + 1) & 0xC0) == 0x80) &&
251 ((*(Position + 2) & 0xC0) == 0x80) &&
252 ((*(Position + 3) & 0xC0) == 0x80)) {
253 uint32_t codepoint = ((*Position & 0x07) << 18) |
254 ((*(Position + 1) & 0x3F) << 12) |
255 ((*(Position + 2) & 0x3F) << 6) |
256 (*(Position + 3) & 0x3F);
257 if (codepoint >= 0x10000 && codepoint <= 0x10FFFF)
258 return std::make_pair(codepoint, 4);
260 return std::make_pair(0, 0);
294 setError(Message, Current);
306 return StringRef(Current, End - Current);
391 void advanceWhile(SkipWhileFunc
Func);
404 bool consume(uint32_t Expected);
407 void skip(uint32_t Distance);
416 bool consumeLineBreakIfPresent();
427 void removeStaleSimpleKeyCandidates();
430 void removeSimpleKeyCandidatesOnFlowLevel(
unsigned Level);
434 bool unrollIndent(
int ToColumn);
438 bool rollIndent(
int ToColumn
447 void scanToNextToken();
450 bool scanStreamStart();
453 bool scanStreamEnd();
456 bool scanDirective();
459 bool scanDocumentIndicator(
bool IsStart);
462 bool scanFlowCollectionStart(
bool IsSequence);
465 bool scanFlowCollectionEnd(
bool IsSequence);
468 bool scanFlowEntry();
471 bool scanBlockEntry();
480 bool scanFlowScalar(
bool IsDoubleQuoted);
483 bool scanPlainScalar();
486 bool scanAliasOrAnchor(
bool IsAlias);
489 bool scanBlockScalar(
bool IsLiteral);
492 char scanBlockChompingIndicator();
495 unsigned scanBlockIndentationIndicator();
500 bool scanBlockScalarHeader(
char &ChompingIndicator,
unsigned &IndentIndicator,
506 bool findBlockScalarIndent(
unsigned &BlockIndent,
unsigned BlockExitIndent,
507 unsigned &LineBreaks,
bool &IsDone);
512 bool scanBlockScalarIndent(
unsigned BlockIndent,
unsigned BlockExitIndent,
519 bool fetchMoreTokens();
546 bool IsStartOfStream;
549 bool IsSimpleKeyAllowed;
575 if (UnicodeScalarValue <= 0x7F) {
576 Result.
push_back(UnicodeScalarValue & 0x7F);
577 }
else if (UnicodeScalarValue <= 0x7FF) {
578 uint8_t FirstByte = 0xC0 | ((UnicodeScalarValue & 0x7C0) >> 6);
579 uint8_t SecondByte = 0x80 | (UnicodeScalarValue & 0x3F);
582 }
else if (UnicodeScalarValue <= 0xFFFF) {
583 uint8_t FirstByte = 0xE0 | ((UnicodeScalarValue & 0xF000) >> 12);
584 uint8_t SecondByte = 0x80 | ((UnicodeScalarValue & 0xFC0) >> 6);
585 uint8_t ThirdByte = 0x80 | (UnicodeScalarValue & 0x3F);
589 }
else if (UnicodeScalarValue <= 0x10FFFF) {
590 uint8_t FirstByte = 0xF0 | ((UnicodeScalarValue & 0x1F0000) >> 18);
591 uint8_t SecondByte = 0x80 | ((UnicodeScalarValue & 0x3F000) >> 12);
592 uint8_t ThirdByte = 0x80 | ((UnicodeScalarValue & 0xFC0) >> 6);
593 uint8_t FourthByte = 0x80 | (UnicodeScalarValue & 0x3F);
608 OS <<
"Stream-Start: ";
611 OS <<
"Stream-End: ";
614 OS <<
"Version-Directive: ";
617 OS <<
"Tag-Directive: ";
620 OS <<
"Document-Start: ";
623 OS <<
"Document-End: ";
626 OS <<
"Block-Entry: ";
632 OS <<
"Block-Sequence-Start: ";
635 OS <<
"Block-Mapping-Start: ";
638 OS <<
"Flow-Entry: ";
641 OS <<
"Flow-Sequence-Start: ";
644 OS <<
"Flow-Sequence-End: ";
647 OS <<
"Flow-Mapping-Start: ";
650 OS <<
"Flow-Mapping-End: ";
662 OS <<
"Block Scalar: ";
676 OS << T.
Range <<
"\n";
699 std::string EscapedInput;
702 EscapedInput +=
"\\\\";
704 EscapedInput +=
"\\\"";
706 EscapedInput +=
"\\0";
708 EscapedInput +=
"\\a";
710 EscapedInput +=
"\\b";
712 EscapedInput +=
"\\t";
714 EscapedInput +=
"\\n";
716 EscapedInput +=
"\\v";
718 EscapedInput +=
"\\f";
720 EscapedInput +=
"\\r";
722 EscapedInput +=
"\\e";
723 else if ((
unsigned char)*i < 0x20) {
725 EscapedInput +=
"\\x" + std::string(2 - HexStr.size(),
'0') + HexStr;
726 }
else if (*i & 0x80) {
729 if (UnicodeScalarValue.second == 0) {
733 EscapedInput.insert(EscapedInput.end(), Val.
begin(), Val.
end());
737 if (UnicodeScalarValue.first == 0x85)
738 EscapedInput +=
"\\N";
739 else if (UnicodeScalarValue.first == 0xA0)
740 EscapedInput +=
"\\_";
741 else if (UnicodeScalarValue.first == 0x2028)
742 EscapedInput +=
"\\L";
743 else if (UnicodeScalarValue.first == 0x2029)
744 EscapedInput +=
"\\P";
746 std::string HexStr =
utohexstr(UnicodeScalarValue.first);
747 if (HexStr.size() <= 2)
748 EscapedInput +=
"\\x" + std::string(2 - HexStr.size(),
'0') + HexStr;
749 else if (HexStr.size() <= 4)
750 EscapedInput +=
"\\u" + std::string(4 - HexStr.size(),
'0') + HexStr;
751 else if (HexStr.size() <= 8)
752 EscapedInput +=
"\\U" + std::string(8 - HexStr.size(),
'0') + HexStr;
754 i += UnicodeScalarValue.second - 1;
756 EscapedInput.push_back(*i);
762 : SM(sm), ShowColors(ShowColors) {
767 : SM(SM_), ShowColors(ShowColors) {
772 InputBuffer = Buffer;
779 IsStartOfStream =
true;
780 IsSimpleKeyAllowed =
true;
782 std::unique_ptr<MemoryBuffer> InputBufferOwner =
790 bool NeedMore =
false;
792 if (TokenQueue.
empty() || NeedMore) {
793 if (!fetchMoreTokens()) {
796 return TokenQueue.
front();
799 assert(!TokenQueue.
empty() &&
800 "fetchMoreTokens lied about getting tokens!");
802 removeStaleSimpleKeyCandidates();
804 SK.Tok = TokenQueue.
front();
805 if (std::find(SimpleKeys.
begin(), SimpleKeys.
end(), SK)
811 return TokenQueue.
front();
817 if (!TokenQueue.
empty())
822 if (TokenQueue.
empty()) {
823 TokenQueue.Alloc.Reset();
833 if ( *Position == 0x09
834 || (*Position >= 0x20 && *Position <= 0x7E))
838 if (uint8_t(*Position) & 0x80) {
841 && u8d.first != 0xFEFF
842 && ( u8d.first == 0x85
843 || ( u8d.first >= 0xA0
844 && u8d.first <= 0xD7FF)
845 || ( u8d.first >= 0xE000
846 && u8d.first <= 0xFFFD)
847 || ( u8d.first >= 0x10000
848 && u8d.first <= 0x10FFFF)))
849 return Position + u8d.second;
857 if (*Position == 0x0D) {
858 if (Position + 1 != End && *(Position + 1) == 0x0A)
863 if (*Position == 0x0A)
871 if (*Position ==
' ')
879 if (*Position ==
' ' || *Position ==
'\t')
887 if (*Position ==
' ' || *Position ==
'\t')
889 return skip_nb_char(Position);
903 void Scanner::advanceWhile(SkipWhileFunc Func) {
904 auto Final = skip_while(Func, Current);
905 Column += Final - Current;
910 return (C >=
'0' && C <=
'9')
911 || (C >=
'a' && C <=
'z')
912 || (C >=
'A' && C <=
'Z');
917 || (C >=
'a' && C <=
'z')
918 || (C >=
'A' && C <=
'Z');
926 if (( *Current ==
'%'
931 ||
StringRef(Current, 1).find_first_of(
"#;/?:@&=+$,_.!~*'()[]")
938 return StringRef(Start, Current - Start);
941 bool Scanner::consume(uint32_t Expected) {
942 if (Expected >= 0x80)
946 if (uint8_t(*Current) >= 0x80)
948 if (uint8_t(*Current) == Expected) {
956 void Scanner::skip(uint32_t Distance) {
959 assert(Current <= End &&
"Skipped past the end");
965 if ( *Position ==
' ' || *Position ==
'\t'
966 || *Position ==
'\r' || *Position ==
'\n')
971 bool Scanner::consumeLineBreakIfPresent() {
972 auto Next = skip_b_break(Current);
984 if (IsSimpleKeyAllowed) {
988 SK.Column = AtColumn;
989 SK.IsRequired = IsRequired;
990 SK.FlowLevel = FlowLevel;
995 void Scanner::removeStaleSimpleKeyCandidates() {
997 i != SimpleKeys.
end();) {
998 if (i->Line != Line || i->Column + 1024 < Column) {
1000 setError(
"Could not find expected : for simple key"
1001 , i->Tok->Range.begin());
1002 i = SimpleKeys.
erase(i);
1008 void Scanner::removeSimpleKeyCandidatesOnFlowLevel(
unsigned Level) {
1009 if (!SimpleKeys.
empty() && (SimpleKeys.
end() - 1)->FlowLevel == Level)
1013 bool Scanner::unrollIndent(
int ToColumn) {
1019 while (Indent > ToColumn) {
1029 bool Scanner::rollIndent(
int ToColumn
1034 if (Indent < ToColumn) {
1041 TokenQueue.
insert(InsertPoint, T);
1046 void Scanner::skipComment() {
1047 if (*Current !=
'#')
1060 void Scanner::scanToNextToken() {
1062 while (*Current ==
' ' || *Current ==
'\t') {
1077 IsSimpleKeyAllowed =
true;
1081 bool Scanner::scanStreamStart() {
1082 IsStartOfStream =
false;
1090 Current += EI.second;
1094 bool Scanner::scanStreamEnd() {
1103 IsSimpleKeyAllowed =
false;
1112 bool Scanner::scanDirective() {
1116 IsSimpleKeyAllowed =
false;
1121 Current = skip_while(&Scanner::skip_ns_char, Current);
1123 Current = skip_while(&Scanner::skip_s_white, Current);
1126 if (
Name ==
"YAML") {
1127 Current = skip_while(&Scanner::skip_ns_char, Current);
1132 }
else if(
Name ==
"TAG") {
1133 Current = skip_while(&Scanner::skip_ns_char, Current);
1134 Current = skip_while(&Scanner::skip_s_white, Current);
1135 Current = skip_while(&Scanner::skip_ns_char, Current);
1144 bool Scanner::scanDocumentIndicator(
bool IsStart) {
1147 IsSimpleKeyAllowed =
false;
1157 bool Scanner::scanFlowCollectionStart(
bool IsSequence) {
1166 saveSimpleKeyCandidate(TokenQueue.
back(), Column - 1,
false);
1169 IsSimpleKeyAllowed =
true;
1174 bool Scanner::scanFlowCollectionEnd(
bool IsSequence) {
1175 removeSimpleKeyCandidatesOnFlowLevel(FlowLevel);
1176 IsSimpleKeyAllowed =
false;
1188 bool Scanner::scanFlowEntry() {
1189 removeSimpleKeyCandidatesOnFlowLevel(FlowLevel);
1190 IsSimpleKeyAllowed =
true;
1199 bool Scanner::scanBlockEntry() {
1201 removeSimpleKeyCandidatesOnFlowLevel(FlowLevel);
1202 IsSimpleKeyAllowed =
true;
1211 bool Scanner::scanKey() {
1215 removeSimpleKeyCandidatesOnFlowLevel(FlowLevel);
1216 IsSimpleKeyAllowed = !FlowLevel;
1226 bool Scanner::scanValue() {
1229 if (!SimpleKeys.
empty()) {
1233 T.
Range = SK.Tok->Range;
1235 for (i = TokenQueue.
begin(), e = TokenQueue.
end(); i != e; ++i) {
1239 assert(i != e &&
"SimpleKey not in token queue!");
1240 i = TokenQueue.
insert(i, T);
1245 IsSimpleKeyAllowed =
false;
1249 IsSimpleKeyAllowed = !FlowLevel;
1269 assert(Position - 1 >= First);
1273 while (I >= First && *I ==
'\\') --
I;
1276 return (Position - 1 - I) % 2 == 1;
1279 bool Scanner::scanFlowScalar(
bool IsDoubleQuoted) {
1281 unsigned ColStart = Column;
1282 if (IsDoubleQuoted) {
1285 while (Current != End && *Current !=
'"')
1289 }
while ( Current != End
1290 && *(Current - 1) ==
'\\'
1296 if (Current + 1 < End && *Current ==
'\'' && *(Current + 1) ==
'\'') {
1299 }
else if (*Current ==
'\'')
1303 i = skip_b_break(Current);
1318 if (Current == End) {
1319 setError(
"Expected quote at end of scalar", Current);
1329 saveSimpleKeyCandidate(TokenQueue.
back(), ColStart,
false);
1331 IsSimpleKeyAllowed =
false;
1336 bool Scanner::scanPlainScalar() {
1338 unsigned ColStart = Column;
1339 unsigned LeadingBlanks = 0;
1340 assert(Indent >= -1 &&
"Indent must be >= -1 !");
1341 unsigned indent =
static_cast<unsigned>(Indent + 1);
1343 if (*Current ==
'#')
1346 while (!isBlankOrBreak(Current)) {
1347 if ( FlowLevel && *Current ==
':'
1348 && !(isBlankOrBreak(Current + 1) || *(Current + 1) ==
',')) {
1349 setError(
"Found unexpected ':' while scanning a plain scalar", Current);
1354 if ( (*Current ==
':' && isBlankOrBreak(Current + 1))
1356 && (
StringRef(Current, 1).find_first_of(
",:?[]{}")
1368 if (!isBlankOrBreak(Current))
1373 while (isBlankOrBreak(Tmp)) {
1376 if (LeadingBlanks && (Column < indent) && *Tmp ==
'\t') {
1377 setError(
"Found invalid tab character in indentation", Tmp);
1383 i = skip_b_break(Tmp);
1392 if (!FlowLevel && Column < indent)
1397 if (Start == Current) {
1398 setError(
"Got empty plain scalar", Start);
1407 saveSimpleKeyCandidate(TokenQueue.
back(), ColStart,
false);
1409 IsSimpleKeyAllowed =
false;
1414 bool Scanner::scanAliasOrAnchor(
bool IsAlias) {
1416 unsigned ColStart = Column;
1419 if ( *Current ==
'[' || *Current ==
']'
1420 || *Current ==
'{' || *Current ==
'}'
1431 if (Start == Current) {
1432 setError(
"Got empty alias or anchor", Start);
1442 saveSimpleKeyCandidate(TokenQueue.
back(), ColStart,
false);
1444 IsSimpleKeyAllowed =
false;
1449 char Scanner::scanBlockChompingIndicator() {
1450 char Indicator =
' ';
1451 if (Current != End && (*Current ==
'+' || *Current ==
'-')) {
1452 Indicator = *Current;
1464 if (ChompingIndicator ==
'-')
1466 if (ChompingIndicator ==
'+')
1469 return Str.
empty() ? 0 : 1;
1472 unsigned Scanner::scanBlockIndentationIndicator() {
1473 unsigned Indent = 0;
1474 if (Current != End && (*Current >=
'1' && *Current <=
'9')) {
1481 bool Scanner::scanBlockScalarHeader(
char &ChompingIndicator,
1482 unsigned &IndentIndicator,
bool &IsDone) {
1483 auto Start = Current;
1485 ChompingIndicator = scanBlockChompingIndicator();
1486 IndentIndicator = scanBlockIndentationIndicator();
1488 if (ChompingIndicator ==
' ')
1489 ChompingIndicator = scanBlockChompingIndicator();
1490 Current = skip_while(&Scanner::skip_s_white, Current);
1493 if (Current == End) {
1502 if (!consumeLineBreakIfPresent()) {
1503 setError(
"Expected a line break after block scalar header", Current);
1509 bool Scanner::findBlockScalarIndent(
unsigned &BlockIndent,
1510 unsigned BlockExitIndent,
1511 unsigned &LineBreaks,
bool &IsDone) {
1512 unsigned MaxAllSpaceLineCharacters = 0;
1516 advanceWhile(&Scanner::skip_s_space);
1517 if (skip_nb_char(Current) != Current) {
1519 if (Column <= BlockExitIndent) {
1524 BlockIndent = Column;
1525 if (MaxAllSpaceLineCharacters > BlockIndent) {
1527 "Leading all-spaces line must be smaller than the block indent",
1528 LongestAllSpaceLine);
1533 if (skip_b_break(Current) != Current &&
1534 Column > MaxAllSpaceLineCharacters) {
1537 MaxAllSpaceLineCharacters = Column;
1538 LongestAllSpaceLine = Current;
1542 if (Current == End) {
1547 if (!consumeLineBreakIfPresent()) {
1556 bool Scanner::scanBlockScalarIndent(
unsigned BlockIndent,
1557 unsigned BlockExitIndent,
bool &IsDone) {
1559 while (Column < BlockIndent) {
1560 auto I = skip_s_space(Current);
1567 if (skip_nb_char(Current) == Current)
1570 if (Column <= BlockExitIndent) {
1575 if (Column < BlockIndent) {
1576 if (Current != End && *Current ==
'#') {
1580 setError(
"A text line is less indented than the block scalar", Current);
1586 bool Scanner::scanBlockScalar(
bool IsLiteral) {
1588 assert(*Current ==
'|' || *Current ==
'>');
1591 char ChompingIndicator;
1592 unsigned BlockIndent;
1593 bool IsDone =
false;
1594 if (!scanBlockScalarHeader(ChompingIndicator, BlockIndent, IsDone))
1599 auto Start = Current;
1600 unsigned BlockExitIndent = Indent < 0 ? 0 : (
unsigned)Indent;
1601 unsigned LineBreaks = 0;
1602 if (BlockIndent == 0) {
1603 if (!findBlockScalarIndent(BlockIndent, BlockExitIndent, LineBreaks,
1611 if (!scanBlockScalarIndent(BlockIndent, BlockExitIndent, IsDone))
1617 auto LineStart = Current;
1618 advanceWhile(&Scanner::skip_nb_char);
1619 if (LineStart != Current) {
1620 Str.
append(LineBreaks,
'\n');
1629 if (!consumeLineBreakIfPresent())
1634 if (Current == End && !LineBreaks)
1641 IsSimpleKeyAllowed =
true;
1651 bool Scanner::scanTag() {
1653 unsigned ColStart = Column;
1655 if (Current == End || isBlankOrBreak(Current));
1656 else if (*Current ==
'<') {
1663 Current = skip_while(&Scanner::skip_ns_char, Current);
1672 saveSimpleKeyCandidate(TokenQueue.
back(), ColStart,
false);
1674 IsSimpleKeyAllowed =
false;
1679 bool Scanner::fetchMoreTokens() {
1680 if (IsStartOfStream)
1681 return scanStreamStart();
1686 return scanStreamEnd();
1688 removeStaleSimpleKeyCandidates();
1690 unrollIndent(Column);
1692 if (Column == 0 && *Current ==
'%')
1693 return scanDirective();
1695 if (Column == 0 && Current + 4 <= End
1697 && *(Current + 1) ==
'-'
1698 && *(Current + 2) ==
'-'
1699 && (Current + 3 == End || isBlankOrBreak(Current + 3)))
1700 return scanDocumentIndicator(
true);
1702 if (Column == 0 && Current + 4 <= End
1704 && *(Current + 1) ==
'.'
1705 && *(Current + 2) ==
'.'
1706 && (Current + 3 == End || isBlankOrBreak(Current + 3)))
1707 return scanDocumentIndicator(
false);
1709 if (*Current ==
'[')
1710 return scanFlowCollectionStart(
true);
1712 if (*Current ==
'{')
1713 return scanFlowCollectionStart(
false);
1715 if (*Current ==
']')
1716 return scanFlowCollectionEnd(
true);
1718 if (*Current ==
'}')
1719 return scanFlowCollectionEnd(
false);
1721 if (*Current ==
',')
1722 return scanFlowEntry();
1724 if (*Current ==
'-' && isBlankOrBreak(Current + 1))
1725 return scanBlockEntry();
1727 if (*Current ==
'?' && (FlowLevel || isBlankOrBreak(Current + 1)))
1730 if (*Current ==
':' && (FlowLevel || isBlankOrBreak(Current + 1)))
1733 if (*Current ==
'*')
1734 return scanAliasOrAnchor(
true);
1736 if (*Current ==
'&')
1737 return scanAliasOrAnchor(
false);
1739 if (*Current ==
'!')
1742 if (*Current ==
'|' && !FlowLevel)
1743 return scanBlockScalar(
true);
1745 if (*Current ==
'>' && !FlowLevel)
1746 return scanBlockScalar(
false);
1748 if (*Current ==
'\'')
1749 return scanFlowScalar(
false);
1751 if (*Current ==
'"')
1752 return scanFlowScalar(
true);
1756 if (!(isBlankOrBreak(Current)
1757 || FirstChar.find_first_of(
"-?:,[]{}#&*!|>'\"%@`") !=
StringRef::npos)
1758 || (*Current ==
'-' && !isBlankOrBreak(Current + 1))
1759 || (!FlowLevel && (*Current ==
'?' || *Current ==
':')
1760 && isBlankOrBreak(Current + 1))
1761 || (!FlowLevel && *Current ==
':'
1762 && Current + 2 < End
1763 && *(Current + 1) ==
':'
1764 && !isBlankOrBreak(Current + 2)))
1765 return scanPlainScalar();
1767 setError(
"Unrecognized character while tokenizing.");
1772 : scanner(new
Scanner(Input, SM, ShowColors)), CurrentDoc() {}
1775 : scanner(new
Scanner(InputBuffer, SM, ShowColors)), CurrentDoc() {}
1795 CurrentDoc.reset(
new Document(*
this));
1810 : Doc(D),
TypeID(Type), Anchor(A),
Tag(T) {
1817 if (!Raw.
empty() && Raw !=
"!") {
1820 Ret =
Doc->getTagMap().find(
"!")->second;
1824 Ret =
Doc->getTagMap().find(
"!!")->second;
1829 std::map<StringRef, StringRef>::const_iterator It =
1830 Doc->getTagMap().find(TagHandle);
1831 if (It !=
Doc->getTagMap().end())
1836 T.
Range = TagHandle;
1846 return "tag:yaml.org,2002:null";
1848 case NK_BlockScalar:
1850 return "tag:yaml.org,2002:str";
1852 return "tag:yaml.org,2002:map";
1854 return "tag:yaml.org,2002:seq";
1861 return Doc->peekNext();
1865 return Doc->getNext();
1873 return Doc->NodeAllocator;
1877 Doc->setError(Msg, Tok);
1880 bool Node::failed()
const {
1881 return Doc->failed();
1888 if (
Value[0] ==
'"') {
1894 return unescapeDoubleQuoted(UnquotedValue, i, Storage);
1895 return UnquotedValue;
1896 }
else if (
Value[0] ==
'\'') {
1906 Storage.
insert(Storage.
end(), Valid.begin(), Valid.end());
1908 UnquotedValue = UnquotedValue.
substr(i + 2);
1913 return UnquotedValue;
1916 return Value.rtrim(
" ");
1929 Storage.
insert(Storage.
end(), Valid.begin(), Valid.end());
1931 UnquotedValue = UnquotedValue.
substr(i);
1933 assert(!UnquotedValue.
empty() &&
"Can't be empty!");
1936 switch (UnquotedValue[0]) {
1940 if ( UnquotedValue.
size() > 1
1941 && (UnquotedValue[1] ==
'\r' || UnquotedValue[1] ==
'\n'))
1942 UnquotedValue = UnquotedValue.
substr(1);
1943 UnquotedValue = UnquotedValue.
substr(1);
1946 if (UnquotedValue.
size() == 1)
1949 UnquotedValue = UnquotedValue.
substr(1);
1950 switch (UnquotedValue[0]) {
1954 setError(
"Unrecognized escape code!", T);
1960 if ( UnquotedValue.
size() > 1
1961 && (UnquotedValue[1] ==
'\r' || UnquotedValue[1] ==
'\n'))
1962 UnquotedValue = UnquotedValue.
substr(1);
2019 if (UnquotedValue.
size() < 3)
2022 unsigned int UnicodeScalarValue;
2025 UnicodeScalarValue = 0xFFFD;
2027 UnquotedValue = UnquotedValue.
substr(2);
2031 if (UnquotedValue.
size() < 5)
2034 unsigned int UnicodeScalarValue;
2037 UnicodeScalarValue = 0xFFFD;
2039 UnquotedValue = UnquotedValue.
substr(4);
2043 if (UnquotedValue.
size() < 9)
2046 unsigned int UnicodeScalarValue;
2049 UnicodeScalarValue = 0xFFFD;
2051 UnquotedValue = UnquotedValue.
substr(8);
2055 UnquotedValue = UnquotedValue.
substr(1);
2067 Token &t = peekNext();
2068 if ( t.
Kind == Token::TK_BlockEnd
2069 || t.
Kind == Token::TK_Value
2070 || t.
Kind == Token::TK_Error) {
2071 return Key =
new (getAllocator())
NullNode(Doc);
2073 if (t.
Kind == Token::TK_Key)
2078 Token &t = peekNext();
2079 if (t.
Kind == Token::TK_BlockEnd || t.
Kind == Token::TK_Value) {
2080 return Key =
new (getAllocator())
NullNode(Doc);
2084 return Key = parseBlockNode();
2096 Token &t = peekNext();
2097 if ( t.
Kind == Token::TK_BlockEnd
2098 || t.
Kind == Token::TK_FlowMappingEnd
2099 || t.
Kind == Token::TK_Key
2100 || t.
Kind == Token::TK_FlowEntry
2101 || t.
Kind == Token::TK_Error) {
2105 if (t.
Kind != Token::TK_Value) {
2106 setError(
"Unexpected token in Key Value.", t);
2113 Token &t = peekNext();
2114 if (t.
Kind == Token::TK_BlockEnd || t.
Kind == Token::TK_Key) {
2119 return Value = parseBlockNode();
2122 void MappingNode::increment() {
2125 CurrentEntry =
nullptr;
2129 CurrentEntry->skip();
2130 if (
Type == MT_Inline) {
2132 CurrentEntry =
nullptr;
2136 Token T = peekNext();
2137 if (T.
Kind == Token::TK_Key || T.
Kind == Token::TK_Scalar) {
2140 }
else if (
Type == MT_Block) {
2142 case Token::TK_BlockEnd:
2145 CurrentEntry =
nullptr;
2148 setError(
"Unexpected token. Expected Key or Block End", T);
2149 case Token::TK_Error:
2151 CurrentEntry =
nullptr;
2155 case Token::TK_FlowEntry:
2159 case Token::TK_FlowMappingEnd:
2161 case Token::TK_Error:
2164 CurrentEntry =
nullptr;
2167 setError(
"Unexpected token. Expected Key, Flow Entry, or Flow "
2171 CurrentEntry =
nullptr;
2176 void SequenceNode::increment() {
2179 CurrentEntry =
nullptr;
2183 CurrentEntry->skip();
2184 Token T = peekNext();
2185 if (SeqType == ST_Block) {
2187 case Token::TK_BlockEntry:
2189 CurrentEntry = parseBlockNode();
2190 if (!CurrentEntry) {
2192 CurrentEntry =
nullptr;
2195 case Token::TK_BlockEnd:
2198 CurrentEntry =
nullptr;
2201 setError(
"Unexpected token. Expected Block Entry or Block End."
2203 case Token::TK_Error:
2205 CurrentEntry =
nullptr;
2207 }
else if (SeqType == ST_Indentless) {
2209 case Token::TK_BlockEntry:
2211 CurrentEntry = parseBlockNode();
2212 if (!CurrentEntry) {
2214 CurrentEntry =
nullptr;
2218 case Token::TK_Error:
2220 CurrentEntry =
nullptr;
2222 }
else if (SeqType == ST_Flow) {
2224 case Token::TK_FlowEntry:
2227 WasPreviousTokenFlowEntry =
true;
2229 case Token::TK_FlowSequenceEnd:
2231 case Token::TK_Error:
2234 CurrentEntry =
nullptr;
2236 case Token::TK_StreamEnd:
2237 case Token::TK_DocumentEnd:
2238 case Token::TK_DocumentStart:
2239 setError(
"Could not find closing ]!", T);
2242 CurrentEntry =
nullptr;
2245 if (!WasPreviousTokenFlowEntry) {
2246 setError(
"Expected , between entries!", T);
2248 CurrentEntry =
nullptr;
2252 CurrentEntry = parseBlockNode();
2253 if (!CurrentEntry) {
2256 WasPreviousTokenFlowEntry =
false;
2262 Document::Document(
Stream &S) : stream(S), Root(nullptr) {
2265 TagMap[
"!!"] =
"tag:yaml.org,2002:";
2267 if (parseDirectives())
2269 Token &T = peekNext();
2275 if (stream.scanner->failed())
2280 Token &T = peekNext();
2290 Token &Document::peekNext() {
2291 return stream.scanner->peekNext();
2294 Token Document::getNext() {
2295 return stream.scanner->getNext();
2298 void Document::setError(
const Twine &Message,
Token &Location)
const {
2299 stream.scanner->setError(Message, Location.
Range.
begin());
2302 bool Document::failed()
const {
2303 return stream.scanner->failed();
2307 Token T = peekNext();
2318 setError(
"Already encountered an anchor for this node!", T);
2321 AnchorInfo = getNext();
2323 goto parse_property;
2326 setError(
"Already encountered a tag for this node!", T);
2329 TagInfo = getNext();
2331 goto parse_property;
2341 return new (NodeAllocator)
SequenceNode( stream.CurrentDoc
2347 return new (NodeAllocator)
2354 return new (NodeAllocator)
2361 return new (NodeAllocator)
2368 return new (NodeAllocator)
2375 return new (NodeAllocator)
2384 return new (NodeAllocator)
2390 return new (NodeAllocator)
2401 return new (NodeAllocator)
NullNode(stream.CurrentDoc);
2409 bool Document::parseDirectives() {
2410 bool isDirective =
false;
2412 Token T = peekNext();
2414 parseTAGDirective();
2417 parseYAMLDirective();
2425 void Document::parseYAMLDirective() {
2429 void Document::parseTAGDirective() {
2437 TagMap[TagHandle] = TagPrefix;
2440 bool Document::expectToken(
int TK) {
2441 Token T = getNext();
2443 setError(
"Unexpected token", T);
std::enable_if< std::numeric_limits< T >::is_signed, bool >::type getAsInteger(unsigned Radix, T &Result) const
Parse the current string as an integer of the specified radix.
static void destroySentinel(Token *)
Represents a range in source code.
void push_back(const T &Elt)
std::unique_ptr< Document > & Doc
std::string getVerbatimTag() const
Get the verbatium tag for a given Node.
bool operator==(const BinaryRef &LHS, const BinaryRef &RHS)
iplist< Token >::iterator iterator
size_t size() const
size - Get the string size.
Node(unsigned int Type, std::unique_ptr< Document > &, StringRef Anchor, StringRef Tag)
Not a valid Unicode encoding.
bool scanTokens(StringRef Input)
Scans all tokens in input without outputting anything.
size_t find(char C, size_t From=0) const
Search for the first character C in the string.
static std::unique_ptr< MemoryBuffer > getMemBuffer(StringRef InputData, StringRef BufferName="", bool RequiresNullTerminator=true)
Open the specified memory range as a MemoryBuffer.
std::string Value
The value of a block scalar node.
StringRef getRawTag() const
Get the tag as it was written in the document.
Represents a YAML sequence created from either a block sequence for a flow sequence.
static LLVM_ATTRIBUTE_NOINLINE bool wasEscaped(StringRef::iterator First, StringRef::iterator Position)
StringRef substr(size_t Start, size_t N=npos) const
Return a reference to the substring from [Start, Start + N).
std::string str() const
str - Get the contents as an std::string.
const char * getBufferStart() const
FunctionType * getType(LLVMContext &Context, ID id, ArrayRef< Type * > Tys=None)
Return the function type for an intrinsic.
std::pair< uint32_t, unsigned > UTF8Decoded
The Unicode scalar value of a UTF-8 minimal well-formed code unit subsequence and the subsequence's l...
document_iterator begin()
Represents an alias to a Node with an anchor.
void skip(CollectionType &C)
StringRef drop_back(size_t N=1) const
Return a StringRef equal to 'this' but with the last N elements dropped.
void reserve(size_type N)
LLVM_ATTRIBUTE_NORETURN void report_fatal_error(const char *reason, bool gen_crash_diag=true)
Reports a serious error, calling any installed error handler.
void setError(const Twine &Message, Token &Location) const
static bool is_ns_hex_digit(const char C)
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
T LLVM_ATTRIBUTE_UNUSED_RESULT pop_back_val()
bool failed()
Returns true if an error occurred while parsing.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
StringRef copy(Allocator &A) const
TypeID
Definitions of all of the base types for the Type system.
Token * ensureHead(Token *) const
static EncodingInfo getUnicodeEncoding(StringRef Input)
getUnicodeEncoding - Reads up to the first 4 bytes to determine the Unicode encoding form of Input...
void addNodeToList(Token *)
static void noteHead(Token *, Token *)
bool LLVM_ATTRIBUTE_UNUSED_RESULT empty() const
Node * getRoot()
Parse and return the root level node.
unsigned AddNewSourceBuffer(std::unique_ptr< MemoryBuffer > F, SMLoc IncludeLoc)
Add a new source buffer to this source manager.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory)...
SmallString - A SmallString is just a SmallVector with methods and accessors that make it work better...
void append(in_iter S, in_iter E)
Append from an iterator pair.
Scanner(StringRef Input, SourceMgr &SM, bool ShowColors=true)
bool dumpTokens(StringRef Input, raw_ostream &)
Dump all the tokens in this stream to OS.
initializer< Ty > init(const Ty &Val)
Token * createSentinel() const
The instances of the Type class are immutable: once they are created, they are never changed...
void printError(Node *N, const Twine &Msg)
Allocate memory in an ever growing pool, as if by bump-pointer.
ilist_sentinel_traits - A fragment for template traits for intrusive list that provides default senti...
const char * getBufferEnd() const
iterator insert(iterator where, const NodeTy &val)
Token * createNode(const Token &V)
std::string escape(StringRef Input)
Escape Input for a double quoted scalar.
static unsigned getChompedLineBreaks(char ChompingIndicator, unsigned LineBreaks, StringRef Str)
Get the number of line breaks after chomping.
StringRef Range
A string of length 0 or more whose begin() points to the logical location of the token in the input...
void printError(SMLoc Loc, SourceMgr::DiagKind Kind, const Twine &Message, ArrayRef< SMRange > Ranges=None)
Token getNext()
Parse the next token and pop it from the queue.
This owns the files read by a parser, handles include stacks, and handles diagnostic wrangling...
ilist_node_traits - A fragment for template traits for intrusive list that provides default node rela...
iterator erase(iterator I)
A scalar node is an opaque datum that can be presented as a series of zero or more Unicode scalar val...
void setError(const Twine &Message, StringRef::iterator Position)
Token * provideInitialHead() const
void removeNodeFromList(Token *)
size_t find_last_of(char C, size_t From=npos) const
Find the last character in the string that is C, or npos if not found.
void transferNodesFromList(ilist_node_traits &, ilist_iterator< Token >, ilist_iterator< Token >)
bool startswith(StringRef Prefix) const
Check if this string starts with the given Prefix.
bool skip()
Finish parsing the current document and return true if there are more.
enum llvm::yaml::Token::TokenKind Kind
Token & peekNext()
Parse the next token and return it without popping it.
This class represents a YAML stream potentially containing multiple documents.
#define LLVM_ATTRIBUTE_NOINLINE
LLVM_ATTRIBUTE_NOINLINE - On compilers where we have a directive to do so, mark a method "not for inl...
bool LLVM_ATTRIBUTE_UNUSED_RESULT empty() const
Stream(StringRef Input, SourceMgr &, bool ShowColors=true)
This keeps a reference to the string referenced by Input.
ilist< Token > TokenQueueT
static void encodeUTF8(uint32_t UnicodeScalarValue, SmallVectorImpl< char > &Result)
encodeUTF8 - Encode UnicodeScalarValue in UTF-8 and append it to result.
StringRef str() const
Explicit conversion to StringRef.
void setError(const Twine &Message)
iterator insert(iterator I, T &&Elt)
static UTF8Decoded decodeUTF8(StringRef Range)
static SMLoc getFromPointer(const char *Ptr)
SMRange getSourceRange() const
ilist_node - Base class that provides next/prev services for nodes that use ilist_nextprev_traits or ...
Token - A single YAML token.
A block scalar node is an opaque datum that can be presented as a series of zero or more Unicode scal...
Represents a YAML map created from either a block map for a flow map.
static void deleteNode(Token *V)
size_t find_first_of(char C, size_t From=0) const
Find the first character in the string that is C, or npos if not found.
static std::string utohexstr(uint64_t X, bool LowerCase=false)
Scans YAML tokens from a MemoryBuffer.
std::pair< UnicodeEncodingForm, unsigned > EncodingInfo
EncodingInfo - Holds the encoding type and length of the byte order mark if it exists.
Iterator abstraction for Documents over a Stream.
const ARM::ArchExtKind Kind
LLVM Value Representation.
This class implements an extremely fast bulk output stream that can only output to a stream...
StringRef - Represent a constant reference to a string, i.e.
void PrintMessage(raw_ostream &OS, SMLoc Loc, DiagKind Kind, const Twine &Msg, ArrayRef< SMRange > Ranges=None, ArrayRef< SMFixIt > FixIts=None, bool ShowColors=true) const
Emit a message about the specified location with the specified string.
Represents a location in source code.
An inline mapping node is used for "[key: value]".
Node * parseBlockNode()
Root for parsing a node. Returns a single node.
StringRef ltrim(StringRef Chars=" \t\n\v\f\r") const
Return string with consecutive characters in Chars starting from the left removed.
void push_back(const NodeTy &val)
bool empty() const
empty - Check if the string is empty.
Abstract base class for all Nodes.
static bool is_ns_word_char(const char C)