49 if (Input.
size() == 0)
52 switch (uint8_t(Input[0])) {
54 if (Input.
size() >= 4) {
56 && uint8_t(Input[2]) == 0xFE
57 && uint8_t(Input[3]) == 0xFF)
59 if (Input[1] == 0 && Input[2] == 0 && Input[3] != 0)
63 if (Input.
size() >= 2 && Input[1] != 0)
67 if ( Input.
size() >= 4
68 && uint8_t(Input[1]) == 0xFE
73 if (Input.
size() >= 2 && uint8_t(Input[1]) == 0xFE)
77 if (Input.
size() >= 2 && uint8_t(Input[1]) == 0xFF)
81 if ( Input.
size() >= 3
82 && uint8_t(Input[1]) == 0xBB
83 && uint8_t(Input[2]) == 0xBF)
89 if (Input.
size() >= 4 && Input[1] == 0 && Input[2] == 0 && Input[3] == 0)
92 if (Input.
size() >= 2 && Input[1] == 0)
101 void Node::anchor() {}
102 void NullNode::anchor() {}
103 void ScalarNode::anchor() {}
104 void BlockScalarNode::anchor() {}
105 void KeyValueNode::anchor() {}
106 void MappingNode::anchor() {}
107 void SequenceNode::anchor() {}
108 void AliasNode::anchor() {}
170 return Tok == Other.Tok;
185 if ((*Position & 0x80) == 0) {
186 return std::make_pair(*Position, 1);
190 if (Position + 1 != End &&
191 ((*Position & 0xE0) == 0xC0) &&
192 ((*(Position + 1) & 0xC0) == 0x80)) {
193 uint32_t codepoint = ((*Position & 0x1F) << 6) |
194 (*(Position + 1) & 0x3F);
195 if (codepoint >= 0x80)
196 return std::make_pair(codepoint, 2);
200 if (Position + 2 != End &&
201 ((*Position & 0xF0) == 0xE0) &&
202 ((*(Position + 1) & 0xC0) == 0x80) &&
203 ((*(Position + 2) & 0xC0) == 0x80)) {
204 uint32_t codepoint = ((*Position & 0x0F) << 12) |
205 ((*(Position + 1) & 0x3F) << 6) |
206 (*(Position + 2) & 0x3F);
209 if (codepoint >= 0x800 &&
210 (codepoint < 0xD800 || codepoint > 0xDFFF))
211 return std::make_pair(codepoint, 3);
215 if (Position + 3 != End &&
216 ((*Position & 0xF8) == 0xF0) &&
217 ((*(Position + 1) & 0xC0) == 0x80) &&
218 ((*(Position + 2) & 0xC0) == 0x80) &&
219 ((*(Position + 3) & 0xC0) == 0x80)) {
220 uint32_t codepoint = ((*Position & 0x07) << 18) |
221 ((*(Position + 1) & 0x3F) << 12) |
222 ((*(Position + 2) & 0x3F) << 6) |
223 (*(Position + 3) & 0x3F);
224 if (codepoint >= 0x10000 && codepoint <= 0x10FFFF)
225 return std::make_pair(codepoint, 4);
227 return std::make_pair(0, 0);
236 std::error_code *EC =
nullptr);
238 std::error_code *EC =
nullptr);
267 setError(Message, Current);
364 void advanceWhile(SkipWhileFunc
Func);
369 void scan_ns_uri_char();
386 bool consumeLineBreakIfPresent();
397 void removeStaleSimpleKeyCandidates();
400 void removeSimpleKeyCandidatesOnFlowLevel(
unsigned Level);
404 bool unrollIndent(
int ToColumn);
408 bool rollIndent(
int ToColumn
417 void scanToNextToken();
420 bool scanStreamStart();
423 bool scanStreamEnd();
426 bool scanDirective();
429 bool scanDocumentIndicator(
bool IsStart);
432 bool scanFlowCollectionStart(
bool IsSequence);
435 bool scanFlowCollectionEnd(
bool IsSequence);
438 bool scanFlowEntry();
441 bool scanBlockEntry();
450 bool scanFlowScalar(
bool IsDoubleQuoted);
453 bool scanPlainScalar();
456 bool scanAliasOrAnchor(
bool IsAlias);
459 bool scanBlockScalar(
bool IsLiteral);
462 char scanBlockChompingIndicator();
465 unsigned scanBlockIndentationIndicator();
470 bool scanBlockScalarHeader(
char &ChompingIndicator,
unsigned &IndentIndicator,
476 bool findBlockScalarIndent(
unsigned &BlockIndent,
unsigned BlockExitIndent,
477 unsigned &LineBreaks,
bool &IsDone);
482 bool scanBlockScalarIndent(
unsigned BlockIndent,
unsigned BlockExitIndent,
489 bool fetchMoreTokens();
516 bool IsStartOfStream;
519 bool IsSimpleKeyAllowed;
547 if (UnicodeScalarValue <= 0x7F) {
548 Result.
push_back(UnicodeScalarValue & 0x7F);
549 }
else if (UnicodeScalarValue <= 0x7FF) {
550 uint8_t FirstByte = 0xC0 | ((UnicodeScalarValue & 0x7C0) >> 6);
551 uint8_t SecondByte = 0x80 | (UnicodeScalarValue & 0x3F);
554 }
else if (UnicodeScalarValue <= 0xFFFF) {
555 uint8_t FirstByte = 0xE0 | ((UnicodeScalarValue & 0xF000) >> 12);
556 uint8_t SecondByte = 0x80 | ((UnicodeScalarValue & 0xFC0) >> 6);
557 uint8_t ThirdByte = 0x80 | (UnicodeScalarValue & 0x3F);
561 }
else if (UnicodeScalarValue <= 0x10FFFF) {
562 uint8_t FirstByte = 0xF0 | ((UnicodeScalarValue & 0x1F0000) >> 18);
563 uint8_t SecondByte = 0x80 | ((UnicodeScalarValue & 0x3F000) >> 12);
564 uint8_t ThirdByte = 0x80 | ((UnicodeScalarValue & 0xFC0) >> 6);
565 uint8_t FourthByte = 0x80 | (UnicodeScalarValue & 0x3F);
580 OS <<
"Stream-Start: ";
583 OS <<
"Stream-End: ";
586 OS <<
"Version-Directive: ";
589 OS <<
"Tag-Directive: ";
592 OS <<
"Document-Start: ";
595 OS <<
"Document-End: ";
598 OS <<
"Block-Entry: ";
604 OS <<
"Block-Sequence-Start: ";
607 OS <<
"Block-Mapping-Start: ";
610 OS <<
"Flow-Entry: ";
613 OS <<
"Flow-Sequence-Start: ";
616 OS <<
"Flow-Sequence-End: ";
619 OS <<
"Flow-Mapping-Start: ";
622 OS <<
"Flow-Mapping-End: ";
634 OS <<
"Block Scalar: ";
648 OS << T.
Range <<
"\n";
671 std::string EscapedInput;
674 EscapedInput +=
"\\\\";
676 EscapedInput +=
"\\\"";
678 EscapedInput +=
"\\0";
680 EscapedInput +=
"\\a";
682 EscapedInput +=
"\\b";
684 EscapedInput +=
"\\t";
686 EscapedInput +=
"\\n";
688 EscapedInput +=
"\\v";
690 EscapedInput +=
"\\f";
692 EscapedInput +=
"\\r";
694 EscapedInput +=
"\\e";
695 else if ((
unsigned char)*
i < 0x20) {
697 EscapedInput +=
"\\x" + std::string(2 - HexStr.size(),
'0') + HexStr;
698 }
else if (*
i & 0x80) {
701 if (UnicodeScalarValue.second == 0) {
705 EscapedInput.insert(EscapedInput.end(), Val.
begin(), Val.
end());
709 if (UnicodeScalarValue.first == 0x85)
710 EscapedInput +=
"\\N";
711 else if (UnicodeScalarValue.first == 0xA0)
712 EscapedInput +=
"\\_";
713 else if (UnicodeScalarValue.first == 0x2028)
714 EscapedInput +=
"\\L";
715 else if (UnicodeScalarValue.first == 0x2029)
716 EscapedInput +=
"\\P";
718 std::string HexStr =
utohexstr(UnicodeScalarValue.first);
719 if (HexStr.size() <= 2)
720 EscapedInput +=
"\\x" + std::string(2 - HexStr.size(),
'0') + HexStr;
721 else if (HexStr.size() <= 4)
722 EscapedInput +=
"\\u" + std::string(4 - HexStr.size(),
'0') + HexStr;
723 else if (HexStr.size() <= 8)
724 EscapedInput +=
"\\U" + std::string(8 - HexStr.size(),
'0') + HexStr;
726 i += UnicodeScalarValue.second - 1;
728 EscapedInput.push_back(*
i);
735 : SM(sm), ShowColors(ShowColors), EC(EC) {
741 : SM(SM_), ShowColors(ShowColors), EC(EC) {
746 InputBuffer = Buffer;
753 IsStartOfStream =
true;
754 IsSimpleKeyAllowed =
true;
756 std::unique_ptr<MemoryBuffer> InputBufferOwner =
764 bool NeedMore =
false;
766 if (TokenQueue.
empty() || NeedMore) {
767 if (!fetchMoreTokens()) {
770 return TokenQueue.
front();
774 "fetchMoreTokens lied about getting tokens!");
776 removeStaleSimpleKeyCandidates();
778 SK.Tok = TokenQueue.
begin();
784 return TokenQueue.
front();
790 if (!TokenQueue.
empty())
795 if (TokenQueue.
empty())
805 if ( *Position == 0x09
806 || (*Position >= 0x20 && *Position <= 0x7E))
810 if (uint8_t(*Position) & 0x80) {
813 && u8d.first != 0xFEFF
814 && ( u8d.first == 0x85
815 || ( u8d.first >= 0xA0
816 && u8d.first <= 0xD7FF)
817 || ( u8d.first >= 0xE000
818 && u8d.first <= 0xFFFD)
819 || ( u8d.first >= 0x10000
820 && u8d.first <= 0x10FFFF)))
821 return Position + u8d.second;
829 if (*Position == 0x0D) {
830 if (Position + 1 != End && *(Position + 1) == 0x0A)
835 if (*Position == 0x0A)
843 if (*Position ==
' ')
851 if (*Position ==
' ' || *Position ==
'\t')
859 if (*Position ==
' ' || *Position ==
'\t')
861 return skip_nb_char(Position);
875 void Scanner::advanceWhile(SkipWhileFunc Func) {
876 auto Final = skip_while(Func, Current);
877 Column += Final - Current;
882 return (C >=
'0' && C <=
'9')
883 || (C >=
'a' && C <=
'z')
884 || (C >=
'A' && C <=
'Z');
889 || (C >=
'a' && C <=
'z')
890 || (C >=
'A' && C <=
'Z');
893 void Scanner::scan_ns_uri_char() {
897 if (( *Current ==
'%'
902 ||
StringRef(Current, 1).find_first_of(
"#;/?:@&=+$,_.!~*'()[]")
912 if (Expected >= 0x80)
916 if (uint8_t(*Current) >= 0x80)
918 if (uint8_t(*Current) == Expected) {
926 void Scanner::skip(
uint32_t Distance) {
929 assert(Current <= End &&
"Skipped past the end");
935 return *Position ==
' ' || *Position ==
'\t' || *Position ==
'\r' ||
939 bool Scanner::consumeLineBreakIfPresent() {
940 auto Next = skip_b_break(Current);
952 if (IsSimpleKeyAllowed) {
956 SK.Column = AtColumn;
957 SK.IsRequired = IsRequired;
958 SK.FlowLevel = FlowLevel;
963 void Scanner::removeStaleSimpleKeyCandidates() {
965 i != SimpleKeys.
end();) {
966 if (i->Line != Line || i->Column + 1024 < Column) {
968 setError(
"Could not find expected : for simple key"
969 , i->Tok->Range.begin());
970 i = SimpleKeys.
erase(i);
976 void Scanner::removeSimpleKeyCandidatesOnFlowLevel(
unsigned Level) {
977 if (!SimpleKeys.
empty() && (SimpleKeys.
end() - 1)->FlowLevel == Level)
981 bool Scanner::unrollIndent(
int ToColumn) {
987 while (Indent > ToColumn) {
997 bool Scanner::rollIndent(
int ToColumn
1002 if (Indent < ToColumn) {
1009 TokenQueue.
insert(InsertPoint, T);
1014 void Scanner::skipComment() {
1015 if (*Current !=
'#')
1028 void Scanner::scanToNextToken() {
1030 while (*Current ==
' ' || *Current ==
'\t') {
1045 IsSimpleKeyAllowed =
true;
1049 bool Scanner::scanStreamStart() {
1050 IsStartOfStream =
false;
1058 Current += EI.second;
1062 bool Scanner::scanStreamEnd() {
1071 IsSimpleKeyAllowed =
false;
1080 bool Scanner::scanDirective() {
1084 IsSimpleKeyAllowed =
false;
1089 Current = skip_while(&Scanner::skip_ns_char, Current);
1091 Current = skip_while(&Scanner::skip_s_white, Current);
1094 if (
Name ==
"YAML") {
1095 Current = skip_while(&Scanner::skip_ns_char, Current);
1100 }
else if(
Name ==
"TAG") {
1101 Current = skip_while(&Scanner::skip_ns_char, Current);
1102 Current = skip_while(&Scanner::skip_s_white, Current);
1103 Current = skip_while(&Scanner::skip_ns_char, Current);
1112 bool Scanner::scanDocumentIndicator(
bool IsStart) {
1115 IsSimpleKeyAllowed =
false;
1125 bool Scanner::scanFlowCollectionStart(
bool IsSequence) {
1134 saveSimpleKeyCandidate(--TokenQueue.
end(), Column - 1,
false);
1137 IsSimpleKeyAllowed =
true;
1142 bool Scanner::scanFlowCollectionEnd(
bool IsSequence) {
1143 removeSimpleKeyCandidatesOnFlowLevel(FlowLevel);
1144 IsSimpleKeyAllowed =
false;
1156 bool Scanner::scanFlowEntry() {
1157 removeSimpleKeyCandidatesOnFlowLevel(FlowLevel);
1158 IsSimpleKeyAllowed =
true;
1167 bool Scanner::scanBlockEntry() {
1169 removeSimpleKeyCandidatesOnFlowLevel(FlowLevel);
1170 IsSimpleKeyAllowed =
true;
1179 bool Scanner::scanKey() {
1183 removeSimpleKeyCandidatesOnFlowLevel(FlowLevel);
1184 IsSimpleKeyAllowed = !FlowLevel;
1194 bool Scanner::scanValue() {
1197 if (!SimpleKeys.
empty()) {
1201 T.
Range = SK.Tok->Range;
1203 for (i = TokenQueue.
begin(), e = TokenQueue.
end(); i != e; ++
i) {
1207 assert(i != e &&
"SimpleKey not in token queue!");
1208 i = TokenQueue.
insert(i, T);
1213 IsSimpleKeyAllowed =
false;
1217 IsSimpleKeyAllowed = !FlowLevel;
1237 assert(Position - 1 >= First);
1241 while (I >= First && *I ==
'\\') --
I;
1244 return (Position - 1 - I) % 2 == 1;
1247 bool Scanner::scanFlowScalar(
bool IsDoubleQuoted) {
1249 unsigned ColStart = Column;
1250 if (IsDoubleQuoted) {
1253 while (Current != End && *Current !=
'"')
1257 }
while ( Current != End
1258 && *(Current - 1) ==
'\\'
1264 if (Current + 1 < End && *Current ==
'\'' && *(Current + 1) ==
'\'') {
1267 }
else if (*Current ==
'\'')
1271 i = skip_b_break(Current);
1286 if (Current == End) {
1287 setError(
"Expected quote at end of scalar", Current);
1297 saveSimpleKeyCandidate(--TokenQueue.
end(), ColStart,
false);
1299 IsSimpleKeyAllowed =
false;
1304 bool Scanner::scanPlainScalar() {
1306 unsigned ColStart = Column;
1307 unsigned LeadingBlanks = 0;
1308 assert(Indent >= -1 &&
"Indent must be >= -1 !");
1309 unsigned indent =
static_cast<unsigned>(Indent + 1);
1311 if (*Current ==
'#')
1314 while (!isBlankOrBreak(Current)) {
1315 if ( FlowLevel && *Current ==
':'
1316 && !(isBlankOrBreak(Current + 1) || *(Current + 1) ==
',')) {
1317 setError(
"Found unexpected ':' while scanning a plain scalar", Current);
1322 if ( (*Current ==
':' && isBlankOrBreak(Current + 1))
1324 && (
StringRef(Current, 1).find_first_of(
",:?[]{}")
1336 if (!isBlankOrBreak(Current))
1341 while (isBlankOrBreak(Tmp)) {
1344 if (LeadingBlanks && (Column < indent) && *Tmp ==
'\t') {
1345 setError(
"Found invalid tab character in indentation", Tmp);
1351 i = skip_b_break(Tmp);
1360 if (!FlowLevel && Column < indent)
1365 if (Start == Current) {
1366 setError(
"Got empty plain scalar", Start);
1375 saveSimpleKeyCandidate(--TokenQueue.
end(), ColStart,
false);
1377 IsSimpleKeyAllowed =
false;
1382 bool Scanner::scanAliasOrAnchor(
bool IsAlias) {
1384 unsigned ColStart = Column;
1387 if ( *Current ==
'[' || *Current ==
']'
1388 || *Current ==
'{' || *Current ==
'}'
1399 if (Start == Current) {
1400 setError(
"Got empty alias or anchor", Start);
1410 saveSimpleKeyCandidate(--TokenQueue.
end(), ColStart,
false);
1412 IsSimpleKeyAllowed =
false;
1417 char Scanner::scanBlockChompingIndicator() {
1418 char Indicator =
' ';
1419 if (Current != End && (*Current ==
'+' || *Current ==
'-')) {
1420 Indicator = *Current;
1432 if (ChompingIndicator ==
'-')
1434 if (ChompingIndicator ==
'+')
1437 return Str.
empty() ? 0 : 1;
1440 unsigned Scanner::scanBlockIndentationIndicator() {
1441 unsigned Indent = 0;
1442 if (Current != End && (*Current >=
'1' && *Current <=
'9')) {
1449 bool Scanner::scanBlockScalarHeader(
char &ChompingIndicator,
1450 unsigned &IndentIndicator,
bool &IsDone) {
1451 auto Start = Current;
1453 ChompingIndicator = scanBlockChompingIndicator();
1454 IndentIndicator = scanBlockIndentationIndicator();
1456 if (ChompingIndicator ==
' ')
1457 ChompingIndicator = scanBlockChompingIndicator();
1458 Current = skip_while(&Scanner::skip_s_white, Current);
1461 if (Current == End) {
1470 if (!consumeLineBreakIfPresent()) {
1471 setError(
"Expected a line break after block scalar header", Current);
1477 bool Scanner::findBlockScalarIndent(
unsigned &BlockIndent,
1478 unsigned BlockExitIndent,
1479 unsigned &LineBreaks,
bool &IsDone) {
1480 unsigned MaxAllSpaceLineCharacters = 0;
1484 advanceWhile(&Scanner::skip_s_space);
1485 if (skip_nb_char(Current) != Current) {
1487 if (Column <= BlockExitIndent) {
1492 BlockIndent = Column;
1493 if (MaxAllSpaceLineCharacters > BlockIndent) {
1495 "Leading all-spaces line must be smaller than the block indent",
1496 LongestAllSpaceLine);
1501 if (skip_b_break(Current) != Current &&
1502 Column > MaxAllSpaceLineCharacters) {
1505 MaxAllSpaceLineCharacters = Column;
1506 LongestAllSpaceLine = Current;
1510 if (Current == End) {
1515 if (!consumeLineBreakIfPresent()) {
1524 bool Scanner::scanBlockScalarIndent(
unsigned BlockIndent,
1525 unsigned BlockExitIndent,
bool &IsDone) {
1527 while (Column < BlockIndent) {
1528 auto I = skip_s_space(Current);
1535 if (skip_nb_char(Current) == Current)
1538 if (Column <= BlockExitIndent) {
1543 if (Column < BlockIndent) {
1544 if (Current != End && *Current ==
'#') {
1548 setError(
"A text line is less indented than the block scalar", Current);
1554 bool Scanner::scanBlockScalar(
bool IsLiteral) {
1556 assert(*Current ==
'|' || *Current ==
'>');
1559 char ChompingIndicator;
1560 unsigned BlockIndent;
1561 bool IsDone =
false;
1562 if (!scanBlockScalarHeader(ChompingIndicator, BlockIndent, IsDone))
1567 auto Start = Current;
1568 unsigned BlockExitIndent = Indent < 0 ? 0 : (
unsigned)Indent;
1569 unsigned LineBreaks = 0;
1570 if (BlockIndent == 0) {
1571 if (!findBlockScalarIndent(BlockIndent, BlockExitIndent, LineBreaks,
1579 if (!scanBlockScalarIndent(BlockIndent, BlockExitIndent, IsDone))
1585 auto LineStart = Current;
1586 advanceWhile(&Scanner::skip_nb_char);
1587 if (LineStart != Current) {
1588 Str.
append(LineBreaks,
'\n');
1597 if (!consumeLineBreakIfPresent())
1602 if (Current == End && !LineBreaks)
1609 IsSimpleKeyAllowed =
true;
1619 bool Scanner::scanTag() {
1621 unsigned ColStart = Column;
1623 if (Current == End || isBlankOrBreak(Current));
1624 else if (*Current ==
'<') {
1631 Current = skip_while(&Scanner::skip_ns_char, Current);
1640 saveSimpleKeyCandidate(--TokenQueue.
end(), ColStart,
false);
1642 IsSimpleKeyAllowed =
false;
1647 bool Scanner::fetchMoreTokens() {
1648 if (IsStartOfStream)
1649 return scanStreamStart();
1654 return scanStreamEnd();
1656 removeStaleSimpleKeyCandidates();
1658 unrollIndent(Column);
1660 if (Column == 0 && *Current ==
'%')
1661 return scanDirective();
1663 if (Column == 0 && Current + 4 <= End
1665 && *(Current + 1) ==
'-'
1666 && *(Current + 2) ==
'-'
1667 && (Current + 3 == End || isBlankOrBreak(Current + 3)))
1668 return scanDocumentIndicator(
true);
1670 if (Column == 0 && Current + 4 <= End
1672 && *(Current + 1) ==
'.'
1673 && *(Current + 2) ==
'.'
1674 && (Current + 3 == End || isBlankOrBreak(Current + 3)))
1675 return scanDocumentIndicator(
false);
1677 if (*Current ==
'[')
1678 return scanFlowCollectionStart(
true);
1680 if (*Current ==
'{')
1681 return scanFlowCollectionStart(
false);
1683 if (*Current ==
']')
1684 return scanFlowCollectionEnd(
true);
1686 if (*Current ==
'}')
1687 return scanFlowCollectionEnd(
false);
1689 if (*Current ==
',')
1690 return scanFlowEntry();
1692 if (*Current ==
'-' && isBlankOrBreak(Current + 1))
1693 return scanBlockEntry();
1695 if (*Current ==
'?' && (FlowLevel || isBlankOrBreak(Current + 1)))
1698 if (*Current ==
':' && (FlowLevel || isBlankOrBreak(Current + 1)))
1701 if (*Current ==
'*')
1702 return scanAliasOrAnchor(
true);
1704 if (*Current ==
'&')
1705 return scanAliasOrAnchor(
false);
1707 if (*Current ==
'!')
1710 if (*Current ==
'|' && !FlowLevel)
1711 return scanBlockScalar(
true);
1713 if (*Current ==
'>' && !FlowLevel)
1714 return scanBlockScalar(
false);
1716 if (*Current ==
'\'')
1717 return scanFlowScalar(
false);
1719 if (*Current ==
'"')
1720 return scanFlowScalar(
true);
1724 if (!(isBlankOrBreak(Current)
1725 || FirstChar.find_first_of(
"-?:,[]{}#&*!|>'\"%@`") !=
StringRef::npos)
1726 || (*Current ==
'-' && !isBlankOrBreak(Current + 1))
1727 || (!FlowLevel && (*Current ==
'?' || *Current ==
':')
1728 && isBlankOrBreak(Current + 1))
1729 || (!FlowLevel && *Current ==
':'
1730 && Current + 2 < End
1731 && *(Current + 1) ==
':'
1732 && !isBlankOrBreak(Current + 2)))
1733 return scanPlainScalar();
1735 setError(
"Unrecognized character while tokenizing.");
1740 std::error_code *EC)
1741 : scanner(new
Scanner(Input, SM, ShowColors, EC)), CurrentDoc() {}
1744 std::error_code *EC)
1745 : scanner(new
Scanner(InputBuffer, SM, ShowColors, EC)), CurrentDoc() {}
1765 CurrentDoc.reset(
new Document(*
this));
1780 : Doc(D),
TypeID(Type), Anchor(A),
Tag(T) {
1787 if (!Raw.
empty() && Raw !=
"!") {
1790 Ret =
Doc->getTagMap().find(
"!")->second;
1794 Ret =
Doc->getTagMap().find(
"!!")->second;
1799 std::map<StringRef, StringRef>::const_iterator It =
1800 Doc->getTagMap().find(TagHandle);
1801 if (It !=
Doc->getTagMap().end())
1806 T.
Range = TagHandle;
1816 return "tag:yaml.org,2002:null";
1818 case NK_BlockScalar:
1820 return "tag:yaml.org,2002:str";
1822 return "tag:yaml.org,2002:map";
1824 return "tag:yaml.org,2002:seq";
1831 return Doc->peekNext();
1835 return Doc->getNext();
1843 return Doc->NodeAllocator;
1847 Doc->setError(Msg, Tok);
1850 bool Node::failed()
const {
1851 return Doc->failed();
1858 if (
Value[0] ==
'"') {
1864 return unescapeDoubleQuoted(UnquotedValue, i, Storage);
1865 return UnquotedValue;
1866 }
else if (
Value[0] ==
'\'') {
1876 Storage.
insert(Storage.
end(), Valid.begin(), Valid.end());
1878 UnquotedValue = UnquotedValue.
substr(i + 2);
1883 return UnquotedValue;
1886 return Value.rtrim(
' ');
1899 Storage.
insert(Storage.
end(), Valid.begin(), Valid.end());
1901 UnquotedValue = UnquotedValue.
substr(i);
1903 assert(!UnquotedValue.
empty() &&
"Can't be empty!");
1906 switch (UnquotedValue[0]) {
1910 if ( UnquotedValue.
size() > 1
1911 && (UnquotedValue[1] ==
'\r' || UnquotedValue[1] ==
'\n'))
1912 UnquotedValue = UnquotedValue.
substr(1);
1913 UnquotedValue = UnquotedValue.
substr(1);
1916 if (UnquotedValue.
size() == 1)
1919 UnquotedValue = UnquotedValue.
substr(1);
1920 switch (UnquotedValue[0]) {
1924 setError(
"Unrecognized escape code!", T);
1930 if ( UnquotedValue.
size() > 1
1931 && (UnquotedValue[1] ==
'\r' || UnquotedValue[1] ==
'\n'))
1932 UnquotedValue = UnquotedValue.
substr(1);
1989 if (UnquotedValue.
size() < 3)
1992 unsigned int UnicodeScalarValue;
1995 UnicodeScalarValue = 0xFFFD;
1997 UnquotedValue = UnquotedValue.
substr(2);
2001 if (UnquotedValue.
size() < 5)
2004 unsigned int UnicodeScalarValue;
2007 UnicodeScalarValue = 0xFFFD;
2009 UnquotedValue = UnquotedValue.
substr(4);
2013 if (UnquotedValue.
size() < 9)
2016 unsigned int UnicodeScalarValue;
2019 UnicodeScalarValue = 0xFFFD;
2021 UnquotedValue = UnquotedValue.
substr(8);
2025 UnquotedValue = UnquotedValue.
substr(1);
2038 if ( t.
Kind == Token::TK_BlockEnd
2039 || t.
Kind == Token::TK_Value
2040 || t.
Kind == Token::TK_Error) {
2041 return Key =
new (getAllocator())
NullNode(Doc);
2043 if (t.
Kind == Token::TK_Key)
2049 if (t.
Kind == Token::TK_BlockEnd || t.
Kind == Token::TK_Value) {
2050 return Key =
new (getAllocator())
NullNode(Doc);
2054 return Key = parseBlockNode();
2067 if ( t.
Kind == Token::TK_BlockEnd
2068 || t.
Kind == Token::TK_FlowMappingEnd
2069 || t.
Kind == Token::TK_Key
2070 || t.
Kind == Token::TK_FlowEntry
2071 || t.
Kind == Token::TK_Error) {
2075 if (t.
Kind != Token::TK_Value) {
2076 setError(
"Unexpected token in Key Value.", t);
2084 if (t.
Kind == Token::TK_BlockEnd || t.
Kind == Token::TK_Key) {
2089 return Value = parseBlockNode();
2092 void MappingNode::increment() {
2095 CurrentEntry =
nullptr;
2099 CurrentEntry->skip();
2100 if (
Type == MT_Inline) {
2102 CurrentEntry =
nullptr;
2106 Token T = peekNext();
2107 if (T.
Kind == Token::TK_Key || T.
Kind == Token::TK_Scalar) {
2110 }
else if (
Type == MT_Block) {
2112 case Token::TK_BlockEnd:
2115 CurrentEntry =
nullptr;
2118 setError(
"Unexpected token. Expected Key or Block End", T);
2119 case Token::TK_Error:
2121 CurrentEntry =
nullptr;
2125 case Token::TK_FlowEntry:
2129 case Token::TK_FlowMappingEnd:
2131 case Token::TK_Error:
2134 CurrentEntry =
nullptr;
2137 setError(
"Unexpected token. Expected Key, Flow Entry, or Flow "
2141 CurrentEntry =
nullptr;
2146 void SequenceNode::increment() {
2149 CurrentEntry =
nullptr;
2153 CurrentEntry->skip();
2154 Token T = peekNext();
2155 if (SeqType == ST_Block) {
2157 case Token::TK_BlockEntry:
2159 CurrentEntry = parseBlockNode();
2160 if (!CurrentEntry) {
2162 CurrentEntry =
nullptr;
2165 case Token::TK_BlockEnd:
2168 CurrentEntry =
nullptr;
2171 setError(
"Unexpected token. Expected Block Entry or Block End."
2173 case Token::TK_Error:
2175 CurrentEntry =
nullptr;
2177 }
else if (SeqType == ST_Indentless) {
2179 case Token::TK_BlockEntry:
2181 CurrentEntry = parseBlockNode();
2182 if (!CurrentEntry) {
2184 CurrentEntry =
nullptr;
2188 case Token::TK_Error:
2190 CurrentEntry =
nullptr;
2192 }
else if (SeqType == ST_Flow) {
2194 case Token::TK_FlowEntry:
2197 WasPreviousTokenFlowEntry =
true;
2199 case Token::TK_FlowSequenceEnd:
2201 case Token::TK_Error:
2204 CurrentEntry =
nullptr;
2206 case Token::TK_StreamEnd:
2207 case Token::TK_DocumentEnd:
2208 case Token::TK_DocumentStart:
2209 setError(
"Could not find closing ]!", T);
2212 CurrentEntry =
nullptr;
2215 if (!WasPreviousTokenFlowEntry) {
2216 setError(
"Expected , between entries!", T);
2218 CurrentEntry =
nullptr;
2222 CurrentEntry = parseBlockNode();
2223 if (!CurrentEntry) {
2226 WasPreviousTokenFlowEntry =
false;
2232 Document::Document(
Stream &S) : stream(S), Root(nullptr) {
2235 TagMap[
"!!"] =
"tag:yaml.org,2002:";
2237 if (parseDirectives())
2239 Token &T = peekNext();
2245 if (stream.scanner->failed())
2250 Token &T = peekNext();
2260 Token &Document::peekNext() {
2261 return stream.scanner->peekNext();
2264 Token Document::getNext() {
2265 return stream.scanner->getNext();
2268 void Document::setError(
const Twine &Message,
Token &Location)
const {
2269 stream.scanner->setError(Message, Location.
Range.
begin());
2272 bool Document::failed()
const {
2273 return stream.scanner->failed();
2277 Token T = peekNext();
2288 setError(
"Already encountered an anchor for this node!", T);
2291 AnchorInfo = getNext();
2293 goto parse_property;
2296 setError(
"Already encountered a tag for this node!", T);
2299 TagInfo = getNext();
2301 goto parse_property;
2311 return new (NodeAllocator)
SequenceNode( stream.CurrentDoc
2317 return new (NodeAllocator)
2324 return new (NodeAllocator)
2331 return new (NodeAllocator)
2338 return new (NodeAllocator)
2345 return new (NodeAllocator)
2354 return new (NodeAllocator)
2360 return new (NodeAllocator)
2371 return new (NodeAllocator)
NullNode(stream.CurrentDoc);
2379 bool Document::parseDirectives() {
2380 bool isDirective =
false;
2382 Token T = peekNext();
2384 parseTAGDirective();
2387 parseYAMLDirective();
2395 void Document::parseYAMLDirective() {
2399 void Document::parseTAGDirective() {
2407 TagMap[TagHandle] = TagPrefix;
2410 bool Document::expectToken(
int TK) {
2411 Token T = getNext();
2413 setError(
"Unexpected token", T);
std::enable_if< std::numeric_limits< T >::is_signed, bool >::type getAsInteger(unsigned Radix, T &Result) const
Parse the current string as an integer of the specified radix.
Represents a range in source code.
void push_back(const T &Elt)
std::unique_ptr< Document > & Doc
std::string getVerbatimTag() const
Get the verbatium tag for a given Node.
bool operator==(const BinaryRef &LHS, const BinaryRef &RHS)
LLVM_ATTRIBUTE_NORETURN void report_fatal_error(Error Err, bool gen_crash_diag=true)
Report a serious error, calling any installed error handler.
llvm::BumpPtrList< Token > TokenQueueT
A linked-list with a custom, local allocator.
Node(unsigned int Type, std::unique_ptr< Document > &, StringRef Anchor, StringRef Tag)
Not a valid Unicode encoding.
Scanner(StringRef Input, SourceMgr &SM, bool ShowColors=true, std::error_code *EC=nullptr)
bool scanTokens(StringRef Input)
Scans all tokens in input without outputting anything.
static std::unique_ptr< MemoryBuffer > getMemBuffer(StringRef InputData, StringRef BufferName="", bool RequiresNullTerminator=true)
Open the specified memory range as a MemoryBuffer.
std::string Value
The value of a block scalar node.
StringRef getRawTag() const
Get the tag as it was written in the document.
Represents a YAML sequence created from either a block sequence for a flow sequence.
static LLVM_ATTRIBUTE_NOINLINE bool wasEscaped(StringRef::iterator First, StringRef::iterator Position)
const char * getBufferStart() const
void resetAlloc()
Reset the underlying allocator.
FunctionType * getType(LLVMContext &Context, ID id, ArrayRef< Type * > Tys=None)
Return the function type for an intrinsic.
std::pair< uint32_t, unsigned > UTF8Decoded
The Unicode scalar value of a UTF-8 minimal well-formed code unit subsequence and the subsequence's l...
document_iterator begin()
Represents an alias to a Node with an anchor.
void skip(CollectionType &C)
void reserve(size_type N)
void setError(const Twine &Message, Token &Location) const
static bool is_ns_hex_digit(const char C)
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
bool failed()
Returns true if an error occurred while parsing.
static GCRegistry::Add< StatepointGC > D("statepoint-example","an example strategy for statepoint")
std::error_code make_error_code(BitcodeError E)
TypeID
Definitions of all of the base types for the Type system.
Tagged union holding either a T or a Error.
static EncodingInfo getUnicodeEncoding(StringRef Input)
getUnicodeEncoding - Reads up to the first 4 bytes to determine the Unicode encoding form of Input...
LLVM_NODISCARD bool empty() const
LLVM_NODISCARD LLVM_ATTRIBUTE_ALWAYS_INLINE bool startswith(StringRef Prefix) const
Check if this string starts with the given Prefix.
Node * getRoot()
Parse and return the root level node.
unsigned AddNewSourceBuffer(std::unique_ptr< MemoryBuffer > F, SMLoc IncludeLoc)
Add a new source buffer to this source manager.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory)...
SmallString - A SmallString is just a SmallVector with methods and accessors that make it work better...
LLVM_NODISCARD LLVM_ATTRIBUTE_ALWAYS_INLINE size_t size() const
size - Get the string size.
void append(in_iter S, in_iter E)
Append from an iterator pair.
bool dumpTokens(StringRef Input, raw_ostream &)
Dump all the tokens in this stream to OS.
initializer< Ty > init(const Ty &Val)
LLVM_NODISCARD LLVM_ATTRIBUTE_ALWAYS_INLINE size_t find(char C, size_t From=0) const
Search for the first character C in the string.
The instances of the Type class are immutable: once they are created, they are never changed...
void printError(Node *N, const Twine &Msg)
Allocate memory in an ever growing pool, as if by bump-pointer.
LLVM_ATTRIBUTE_ALWAYS_INLINE iterator begin()
const char * getBufferEnd() const
LLVM_NODISCARD size_t find_last_of(char C, size_t From=npos) const
Find the last character in the string that is C, or npos if not found.
LLVM_NODISCARD LLVM_ATTRIBUTE_ALWAYS_INLINE StringRef substr(size_t Start, size_t N=npos) const
Return a reference to the substring from [Start, Start + N).
std::string escape(StringRef Input)
Escape Input for a double quoted scalar.
static unsigned getChompedLineBreaks(char ChompingIndicator, unsigned LineBreaks, StringRef Str)
Get the number of line breaks after chomping.
static const unsigned End
StringRef Range
A string of length 0 or more whose begin() points to the logical location of the token in the input...
void printError(SMLoc Loc, SourceMgr::DiagKind Kind, const Twine &Message, ArrayRef< SMRange > Ranges=None)
Token getNext()
Parse the next token and pop it from the queue.
This owns the files read by a parser, handles include stacks, and handles diagnostic wrangling...
LLVM_NODISCARD std::string str() const
str - Get the contents as an std::string.
iterator erase(const_iterator CI)
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
A scalar node is an opaque datum that can be presented as a series of zero or more Unicode scalar val...
static Cursor skipComment(Cursor C)
Skip a line comment and return the updated cursor.
void setError(const Twine &Message, StringRef::iterator Position)
bool skip()
Finish parsing the current document and return true if there are more.
enum llvm::yaml::Token::TokenKind Kind
Token & peekNext()
Parse the next token and return it without popping it.
This class represents a YAML stream potentially containing multiple documents.
#define LLVM_ATTRIBUTE_NOINLINE
LLVM_ATTRIBUTE_NOINLINE - On compilers where we have a directive to do so, mark a method "not for inl...
LLVM_NODISCARD T pop_back_val()
static GCRegistry::Add< ShadowStackGC > C("shadow-stack","Very portable GC for uncooperative code generators")
iterator insert(iterator I, T &&V)
static void encodeUTF8(uint32_t UnicodeScalarValue, SmallVectorImpl< char > &Result)
encodeUTF8 - Encode UnicodeScalarValue in UTF-8 and append it to result.
StringRef str() const
Explicit conversion to StringRef.
void setError(const Twine &Message)
LLVM_NODISCARD LLVM_ATTRIBUTE_ALWAYS_INLINE bool empty() const
empty - Check if the string is empty.
iterator insert(iterator I, T &&Elt)
static UTF8Decoded decodeUTF8(StringRef Range)
LLVM_NODISCARD StringRef copy(Allocator &A) const
static SMLoc getFromPointer(const char *Ptr)
SMRange getSourceRange() const
Stream(StringRef Input, SourceMgr &, bool ShowColors=true, std::error_code *EC=nullptr)
This keeps a reference to the string referenced by Input.
LLVM_ATTRIBUTE_ALWAYS_INLINE iterator end()
LLVM_ATTRIBUTE_ALWAYS_INLINE size_type size() const
LLVM_NODISCARD StringRef ltrim(char Char) const
Return string with consecutive Char characters starting from the the left removed.
Token - A single YAML token.
A block scalar node is an opaque datum that can be presented as a series of zero or more Unicode scal...
Represents a YAML map created from either a block map for a flow map.
IteratorImpl< T, typename list_type::iterator > iterator
static std::string utohexstr(uint64_t X, bool LowerCase=false)
Scans YAML tokens from a MemoryBuffer.
std::pair< UnicodeEncodingForm, unsigned > EncodingInfo
EncodingInfo - Holds the encoding type and length of the byte order mark if it exists.
LLVM_NODISCARD size_t find_first_of(char C, size_t From=0) const
Find the first character in the string that is C, or npos if not found.
Iterator abstraction for Documents over a Stream.
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
LLVM Value Representation.
This class implements an extremely fast bulk output stream that can only output to a stream...
StringRef - Represent a constant reference to a string, i.e.
LLVM_NODISCARD LLVM_ATTRIBUTE_ALWAYS_INLINE StringRef drop_back(size_t N=1) const
Return a StringRef equal to 'this' but with the last N elements dropped.
void PrintMessage(raw_ostream &OS, SMLoc Loc, DiagKind Kind, const Twine &Msg, ArrayRef< SMRange > Ranges=None, ArrayRef< SMFixIt > FixIts=None, bool ShowColors=true) const
Emit a message about the specified location with the specified string.
Represents a location in source code.
An inline mapping node is used for "[key: value]".
static GCRegistry::Add< ErlangGC > A("erlang","erlang-compatible garbage collector")
Node * parseBlockNode()
Root for parsing a node. Returns a single node.
Abstract base class for all Nodes.
static bool is_ns_word_char(const char C)
bool is_contained(R &&Range, const E &Element)
Wrapper function around std::find to detect if an element exists in a container.