35#include <system_error>
64 switch (uint8_t(Input[0])) {
66 if (Input.
size() >= 4) {
68 && uint8_t(Input[2]) == 0xFE
69 && uint8_t(Input[3]) == 0xFF)
71 if (Input[1] == 0 && Input[2] == 0 && Input[3] != 0)
75 if (Input.
size() >= 2 && Input[1] != 0)
79 if ( Input.
size() >= 4
80 && uint8_t(Input[1]) == 0xFE
85 if (Input.
size() >= 2 && uint8_t(Input[1]) == 0xFE)
89 if (Input.
size() >= 2 && uint8_t(Input[1]) == 0xFF)
93 if ( Input.
size() >= 3
94 && uint8_t(Input[1]) == 0xBB
95 && uint8_t(Input[2]) == 0xBF)
101 if (Input.
size() >= 4 && Input[1] == 0 && Input[2] == 0 && Input[3] == 0)
104 if (Input.
size() >= 2 && Input[1] == 0)
111void Node::anchor() {}
112void NullNode::anchor() {}
113void ScalarNode::anchor() {}
114void BlockScalarNode::anchor() {}
115void KeyValueNode::anchor() {}
116void MappingNode::anchor() {}
117void SequenceNode::anchor() {}
118void AliasNode::anchor() {}
181 unsigned FlowLevel = 0;
182 bool IsRequired =
false;
184 bool operator ==(
const SimpleKey &
Other) {
185 return Tok ==
Other.Tok;
201 if (Position <
End && (*Position & 0x80) == 0) {
202 return std::make_pair(*Position, 1);
206 if (Position + 1 <
End && ((*Position & 0xE0) == 0xC0) &&
207 ((*(Position + 1) & 0xC0) == 0x80)) {
208 uint32_t codepoint = ((*Position & 0x1F) << 6) |
209 (*(Position + 1) & 0x3F);
210 if (codepoint >= 0x80)
211 return std::make_pair(codepoint, 2);
215 if (Position + 2 <
End && ((*Position & 0xF0) == 0xE0) &&
216 ((*(Position + 1) & 0xC0) == 0x80) &&
217 ((*(Position + 2) & 0xC0) == 0x80)) {
218 uint32_t codepoint = ((*Position & 0x0F) << 12) |
219 ((*(Position + 1) & 0x3F) << 6) |
220 (*(Position + 2) & 0x3F);
223 if (codepoint >= 0x800 &&
224 (codepoint < 0xD800 || codepoint > 0xDFFF))
225 return std::make_pair(codepoint, 3);
229 if (Position + 3 <
End && ((*Position & 0xF8) == 0xF0) &&
230 ((*(Position + 1) & 0xC0) == 0x80) &&
231 ((*(Position + 2) & 0xC0) == 0x80) &&
232 ((*(Position + 3) & 0xC0) == 0x80)) {
233 uint32_t codepoint = ((*Position & 0x07) << 18) |
234 ((*(Position + 1) & 0x3F) << 12) |
235 ((*(Position + 2) & 0x3F) << 6) |
236 (*(Position + 3) & 0x3F);
237 if (codepoint >= 0x10000 && codepoint <= 0x10FFFF)
238 return std::make_pair(codepoint, 4);
240 return std::make_pair(0, 0);
250 std::error_code *EC =
nullptr);
252 std::error_code *EC =
nullptr);
262 SM.
PrintMessage(Loc, Kind, Message, Ranges, std::nullopt,
300 return ::decodeUTF8(
StringRef(Position,
End - Position));
376 void advanceWhile(SkipWhileFunc Func);
381 void scan_ns_uri_char();
405 bool consumeLineBreakIfPresent();
416 void removeStaleSimpleKeyCandidates();
419 void removeSimpleKeyCandidatesOnFlowLevel(
unsigned Level);
423 bool unrollIndent(
int ToColumn);
427 bool rollIndent(
int ToColumn
436 void scanToNextToken();
439 bool scanStreamStart();
442 bool scanStreamEnd();
445 bool scanDirective();
448 bool scanDocumentIndicator(
bool IsStart);
451 bool scanFlowCollectionStart(
bool IsSequence);
454 bool scanFlowCollectionEnd(
bool IsSequence);
457 bool scanFlowEntry();
460 bool scanBlockEntry();
469 bool scanFlowScalar(
bool IsDoubleQuoted);
472 bool scanPlainScalar();
475 bool scanAliasOrAnchor(
bool IsAlias);
478 bool scanBlockScalar(
bool IsLiteral);
486 bool scanBlockScalarIndicators(
char &StyleIndicator,
char &ChompingIndicator,
487 unsigned &IndentIndicator,
bool &IsDone);
490 char scanBlockStyleIndicator();
493 char scanBlockChompingIndicator();
496 unsigned scanBlockIndentationIndicator();
501 bool scanBlockScalarHeader(
char &ChompingIndicator,
unsigned &IndentIndicator,
507 bool findBlockScalarIndent(
unsigned &BlockIndent,
unsigned BlockExitIndent,
508 unsigned &LineBreaks,
bool &IsDone);
513 bool scanBlockScalarIndent(
unsigned BlockIndent,
unsigned BlockExitIndent,
520 bool fetchMoreTokens();
547 bool IsStartOfStream;
550 bool IsSimpleKeyAllowed;
554 bool IsAdjacentValueAllowedInFlow;
582 if (UnicodeScalarValue <= 0x7F) {
583 Result.push_back(UnicodeScalarValue & 0x7F);
584 }
else if (UnicodeScalarValue <= 0x7FF) {
585 uint8_t FirstByte = 0xC0 | ((UnicodeScalarValue & 0x7C0) >> 6);
586 uint8_t SecondByte = 0x80 | (UnicodeScalarValue & 0x3F);
587 Result.push_back(FirstByte);
588 Result.push_back(SecondByte);
589 }
else if (UnicodeScalarValue <= 0xFFFF) {
590 uint8_t FirstByte = 0xE0 | ((UnicodeScalarValue & 0xF000) >> 12);
591 uint8_t SecondByte = 0x80 | ((UnicodeScalarValue & 0xFC0) >> 6);
592 uint8_t ThirdByte = 0x80 | (UnicodeScalarValue & 0x3F);
593 Result.push_back(FirstByte);
594 Result.push_back(SecondByte);
595 Result.push_back(ThirdByte);
596 }
else if (UnicodeScalarValue <= 0x10FFFF) {
597 uint8_t FirstByte = 0xF0 | ((UnicodeScalarValue & 0x1F0000) >> 18);
598 uint8_t SecondByte = 0x80 | ((UnicodeScalarValue & 0x3F000) >> 12);
599 uint8_t ThirdByte = 0x80 | ((UnicodeScalarValue & 0xFC0) >> 6);
600 uint8_t FourthByte = 0x80 | (UnicodeScalarValue & 0x3F);
601 Result.push_back(FirstByte);
602 Result.push_back(SecondByte);
603 Result.push_back(ThirdByte);
604 Result.push_back(FourthByte);
615 OS <<
"Stream-Start: ";
618 OS <<
"Stream-End: ";
621 OS <<
"Version-Directive: ";
624 OS <<
"Tag-Directive: ";
627 OS <<
"Document-Start: ";
630 OS <<
"Document-End: ";
633 OS <<
"Block-Entry: ";
639 OS <<
"Block-Sequence-Start: ";
642 OS <<
"Block-Mapping-Start: ";
645 OS <<
"Flow-Entry: ";
648 OS <<
"Flow-Sequence-Start: ";
651 OS <<
"Flow-Sequence-End: ";
654 OS <<
"Flow-Mapping-Start: ";
657 OS <<
"Flow-Mapping-End: ";
669 OS <<
"Block Scalar: ";
683 OS <<
T.Range <<
"\n";
706 std::string EscapedInput;
709 EscapedInput +=
"\\\\";
711 EscapedInput +=
"\\\"";
713 EscapedInput +=
"\\0";
715 EscapedInput +=
"\\a";
717 EscapedInput +=
"\\b";
719 EscapedInput +=
"\\t";
721 EscapedInput +=
"\\n";
723 EscapedInput +=
"\\v";
725 EscapedInput +=
"\\f";
727 EscapedInput +=
"\\r";
729 EscapedInput +=
"\\e";
730 else if ((
unsigned char)*i < 0x20) {
731 std::string HexStr = utohexstr(*i);
732 EscapedInput +=
"\\x" + std::string(2 - HexStr.size(),
'0') + HexStr;
733 }
else if (*i & 0x80) {
736 if (UnicodeScalarValue.second == 0) {
744 if (UnicodeScalarValue.first == 0x85)
745 EscapedInput +=
"\\N";
746 else if (UnicodeScalarValue.first == 0xA0)
747 EscapedInput +=
"\\_";
748 else if (UnicodeScalarValue.first == 0x2028)
749 EscapedInput +=
"\\L";
750 else if (UnicodeScalarValue.first == 0x2029)
751 EscapedInput +=
"\\P";
752 else if (!EscapePrintable &&
754 EscapedInput +=
StringRef(i, UnicodeScalarValue.second);
756 std::string HexStr = utohexstr(UnicodeScalarValue.first);
757 if (HexStr.size() <= 2)
758 EscapedInput +=
"\\x" + std::string(2 - HexStr.size(),
'0') + HexStr;
759 else if (HexStr.size() <= 4)
760 EscapedInput +=
"\\u" + std::string(4 - HexStr.size(),
'0') + HexStr;
761 else if (HexStr.size() <= 8)
762 EscapedInput +=
"\\U" + std::string(8 - HexStr.size(),
'0') + HexStr;
764 i += UnicodeScalarValue.second - 1;
766 EscapedInput.push_back(*i);
859 : SM(sm), ShowColors(ShowColors), EC(EC) {
865 : SM(SM_), ShowColors(ShowColors), EC(EC) {
870 InputBuffer = Buffer;
877 IsStartOfStream =
true;
878 IsSimpleKeyAllowed =
true;
879 IsAdjacentValueAllowedInFlow =
false;
881 std::unique_ptr<MemoryBuffer> InputBufferOwner =
889 bool NeedMore =
false;
891 if (TokenQueue.
empty() || NeedMore) {
892 if (!fetchMoreTokens()) {
896 return TokenQueue.
front();
900 "fetchMoreTokens lied about getting tokens!");
902 removeStaleSimpleKeyCandidates();
904 SK.Tok = TokenQueue.
begin();
910 return TokenQueue.
front();
916 if (!TokenQueue.
empty())
921 if (TokenQueue.
empty())
931 if ( *Position == 0x09
932 || (*Position >= 0x20 && *Position <= 0x7E))
936 if (uint8_t(*Position) & 0x80) {
939 && u8d.first != 0xFEFF
940 && ( u8d.first == 0x85
941 || ( u8d.first >= 0xA0
942 && u8d.first <= 0xD7FF)
943 || ( u8d.first >= 0xE000
944 && u8d.first <= 0xFFFD)
945 || ( u8d.first >= 0x10000
946 && u8d.first <= 0x10FFFF)))
947 return Position + u8d.second;
955 if (*Position == 0x0D) {
956 if (Position + 1 != End && *(Position + 1) == 0x0A)
961 if (*Position == 0x0A)
969 if (*Position ==
' ')
977 if (*Position ==
' ' || *Position ==
'\t')
985 if (*Position ==
' ' || *Position ==
'\t')
987 return skip_nb_char(Position);
1001void Scanner::advanceWhile(SkipWhileFunc Func) {
1002 auto Final = skip_while(Func, Current);
1003 Column += Final - Current;
1011void Scanner::scan_ns_uri_char() {
1015 if (( *Current ==
'%'
1016 && Current + 2 < End
1020 ||
StringRef(Current, 1).find_first_of(
"#;/?:@&=+$,_.!~*'()[]")
1031 setError(
"Cannot consume non-ascii characters", Current);
1036 if (uint8_t(*Current) >= 0x80) {
1037 setError(
"Cannot consume non-ascii characters", Current);
1040 if (uint8_t(*Current) ==
Expected) {
1048void Scanner::skip(
uint32_t Distance) {
1049 Current += Distance;
1051 assert(Current <= End &&
"Skipped past the end");
1055 if (Position == End)
1057 return *Position ==
' ' || *Position ==
'\t' || *Position ==
'\r' ||
1062 if (Position == End || isBlankOrBreak(Position))
1070bool Scanner::isLineEmpty(
StringRef Line) {
1071 for (
const auto *Position =
Line.begin(); Position !=
Line.end(); ++Position)
1072 if (!isBlankOrBreak(Position))
1077bool Scanner::consumeLineBreakIfPresent() {
1078 auto Next = skip_b_break(Current);
1079 if (Next == Current)
1089 ,
bool IsRequired) {
1090 if (IsSimpleKeyAllowed) {
1094 SK.Column = AtColumn;
1095 SK.IsRequired = IsRequired;
1096 SK.FlowLevel = FlowLevel;
1101void Scanner::removeStaleSimpleKeyCandidates() {
1103 i != SimpleKeys.
end();) {
1104 if (i->Line != Line || i->Column + 1024 < Column) {
1106 setError(
"Could not find expected : for simple key"
1107 , i->Tok->Range.begin());
1108 i = SimpleKeys.
erase(i);
1114void Scanner::removeSimpleKeyCandidatesOnFlowLevel(
unsigned Level) {
1115 if (!SimpleKeys.
empty() && (SimpleKeys.
end() - 1)->FlowLevel == Level)
1119bool Scanner::unrollIndent(
int ToColumn) {
1125 while (Indent > ToColumn) {
1135bool Scanner::rollIndent(
int ToColumn
1140 if (Indent < ToColumn) {
1147 TokenQueue.
insert(InsertPoint,
T);
1152void Scanner::skipComment() {
1153 if (Current == End || *Current !=
'#')
1166void Scanner::scanToNextToken() {
1168 while (Current != End && (*Current ==
' ' || *Current ==
'\t')) {
1183 IsSimpleKeyAllowed =
true;
1187bool Scanner::scanStreamStart() {
1188 IsStartOfStream =
false;
1196 Current += EI.second;
1200bool Scanner::scanStreamEnd() {
1209 IsSimpleKeyAllowed =
false;
1210 IsAdjacentValueAllowedInFlow =
false;
1219bool Scanner::scanDirective() {
1223 IsSimpleKeyAllowed =
false;
1224 IsAdjacentValueAllowedInFlow =
false;
1229 Current = skip_while(&Scanner::skip_ns_char, Current);
1231 Current = skip_while(&Scanner::skip_s_white, Current);
1234 if (
Name ==
"YAML") {
1235 Current = skip_while(&Scanner::skip_ns_char, Current);
1240 }
else if(
Name ==
"TAG") {
1241 Current = skip_while(&Scanner::skip_ns_char, Current);
1242 Current = skip_while(&Scanner::skip_s_white, Current);
1243 Current = skip_while(&Scanner::skip_ns_char, Current);
1252bool Scanner::scanDocumentIndicator(
bool IsStart) {
1255 IsSimpleKeyAllowed =
false;
1256 IsAdjacentValueAllowedInFlow =
false;
1266bool Scanner::scanFlowCollectionStart(
bool IsSequence) {
1275 saveSimpleKeyCandidate(--TokenQueue.
end(), Column - 1,
false);
1278 IsSimpleKeyAllowed =
true;
1280 IsAdjacentValueAllowedInFlow =
false;
1285bool Scanner::scanFlowCollectionEnd(
bool IsSequence) {
1286 removeSimpleKeyCandidatesOnFlowLevel(FlowLevel);
1287 IsSimpleKeyAllowed =
false;
1288 IsAdjacentValueAllowedInFlow =
true;
1300bool Scanner::scanFlowEntry() {
1301 removeSimpleKeyCandidatesOnFlowLevel(FlowLevel);
1302 IsSimpleKeyAllowed =
true;
1303 IsAdjacentValueAllowedInFlow =
false;
1312bool Scanner::scanBlockEntry() {
1314 removeSimpleKeyCandidatesOnFlowLevel(FlowLevel);
1315 IsSimpleKeyAllowed =
true;
1316 IsAdjacentValueAllowedInFlow =
false;
1325bool Scanner::scanKey() {
1329 removeSimpleKeyCandidatesOnFlowLevel(FlowLevel);
1330 IsSimpleKeyAllowed = !FlowLevel;
1331 IsAdjacentValueAllowedInFlow =
false;
1341bool Scanner::scanValue() {
1344 if (!SimpleKeys.
empty()) {
1348 T.Range = SK.Tok->Range;
1350 for (i = TokenQueue.
begin(), e = TokenQueue.
end(); i != e; ++i) {
1363 IsSimpleKeyAllowed =
false;
1367 IsSimpleKeyAllowed = !FlowLevel;
1369 IsAdjacentValueAllowedInFlow =
false;
1392 while (
I >=
First && *
I ==
'\\') --
I;
1395 return (Position - 1 -
I) % 2 == 1;
1398bool Scanner::scanFlowScalar(
bool IsDoubleQuoted) {
1400 unsigned ColStart = Column;
1401 if (IsDoubleQuoted) {
1404 while (Current != End && *Current !=
'"')
1408 }
while ( Current != End
1409 && *(Current - 1) ==
'\\'
1413 while (Current != End) {
1415 if (Current + 1 < End && *Current ==
'\'' && *(Current + 1) ==
'\'') {
1418 }
else if (*Current ==
'\'')
1422 i = skip_b_break(Current);
1437 if (Current == End) {
1438 setError(
"Expected quote at end of scalar", Current);
1448 saveSimpleKeyCandidate(--TokenQueue.
end(), ColStart,
false);
1450 IsSimpleKeyAllowed =
false;
1451 IsAdjacentValueAllowedInFlow =
true;
1456bool Scanner::scanPlainScalar() {
1458 unsigned ColStart = Column;
1459 unsigned LeadingBlanks = 0;
1460 assert(Indent >= -1 &&
"Indent must be >= -1 !");
1461 unsigned indent =
static_cast<unsigned>(Indent + 1);
1462 while (Current != End) {
1463 if (*Current ==
'#')
1466 while (Current != End &&
1467 ((*Current !=
':' && isPlainSafeNonBlank(Current)) ||
1468 (*Current ==
':' && isPlainSafeNonBlank(Current + 1)))) {
1477 if (!isBlankOrBreak(Current))
1482 while (isBlankOrBreak(Tmp)) {
1485 if (LeadingBlanks && (Column < indent) && *Tmp ==
'\t') {
1486 setError(
"Found invalid tab character in indentation", Tmp);
1492 i = skip_b_break(Tmp);
1501 if (!FlowLevel && Column < indent)
1506 if (Start == Current) {
1507 setError(
"Got empty plain scalar", Start);
1516 saveSimpleKeyCandidate(--TokenQueue.
end(), ColStart,
false);
1518 IsSimpleKeyAllowed =
false;
1519 IsAdjacentValueAllowedInFlow =
false;
1524bool Scanner::scanAliasOrAnchor(
bool IsAlias) {
1526 unsigned ColStart = Column;
1528 while (Current != End) {
1529 if ( *Current ==
'[' || *Current ==
']'
1530 || *Current ==
'{' || *Current ==
'}'
1541 if (Start + 1 == Current) {
1542 setError(
"Got empty alias or anchor", Start);
1552 saveSimpleKeyCandidate(--TokenQueue.
end(), ColStart,
false);
1554 IsSimpleKeyAllowed =
false;
1555 IsAdjacentValueAllowedInFlow =
false;
1560bool Scanner::scanBlockScalarIndicators(
char &StyleIndicator,
1561 char &ChompingIndicator,
1562 unsigned &IndentIndicator,
1564 StyleIndicator = scanBlockStyleIndicator();
1565 if (!scanBlockScalarHeader(ChompingIndicator, IndentIndicator, IsDone))
1570char Scanner::scanBlockStyleIndicator() {
1571 char Indicator =
' ';
1572 if (Current != End && (*Current ==
'>' || *Current ==
'|')) {
1573 Indicator = *Current;
1579char Scanner::scanBlockChompingIndicator() {
1580 char Indicator =
' ';
1581 if (Current != End && (*Current ==
'+' || *Current ==
'-')) {
1582 Indicator = *Current;
1594 if (ChompingIndicator ==
'-')
1596 if (ChompingIndicator ==
'+')
1599 return Str.empty() ? 0 : 1;
1602unsigned Scanner::scanBlockIndentationIndicator() {
1603 unsigned Indent = 0;
1604 if (Current != End && (*Current >=
'1' && *Current <=
'9')) {
1611bool Scanner::scanBlockScalarHeader(
char &ChompingIndicator,
1612 unsigned &IndentIndicator,
bool &IsDone) {
1613 auto Start = Current;
1615 ChompingIndicator = scanBlockChompingIndicator();
1616 IndentIndicator = scanBlockIndentationIndicator();
1618 if (ChompingIndicator ==
' ')
1619 ChompingIndicator = scanBlockChompingIndicator();
1620 Current = skip_while(&Scanner::skip_s_white, Current);
1623 if (Current == End) {
1632 if (!consumeLineBreakIfPresent()) {
1633 setError(
"Expected a line break after block scalar header", Current);
1639bool Scanner::findBlockScalarIndent(
unsigned &BlockIndent,
1640 unsigned BlockExitIndent,
1641 unsigned &LineBreaks,
bool &IsDone) {
1642 unsigned MaxAllSpaceLineCharacters = 0;
1646 advanceWhile(&Scanner::skip_s_space);
1647 if (skip_nb_char(Current) != Current) {
1649 if (Column <= BlockExitIndent) {
1654 BlockIndent = Column;
1655 if (MaxAllSpaceLineCharacters > BlockIndent) {
1657 "Leading all-spaces line must be smaller than the block indent",
1658 LongestAllSpaceLine);
1663 if (skip_b_break(Current) != Current &&
1664 Column > MaxAllSpaceLineCharacters) {
1667 MaxAllSpaceLineCharacters = Column;
1668 LongestAllSpaceLine = Current;
1672 if (Current == End) {
1677 if (!consumeLineBreakIfPresent()) {
1686bool Scanner::scanBlockScalarIndent(
unsigned BlockIndent,
1687 unsigned BlockExitIndent,
bool &IsDone) {
1689 while (Column < BlockIndent) {
1690 auto I = skip_s_space(Current);
1697 if (skip_nb_char(Current) == Current)
1700 if (Column <= BlockExitIndent) {
1705 if (Column < BlockIndent) {
1706 if (Current != End && *Current ==
'#') {
1710 setError(
"A text line is less indented than the block scalar", Current);
1716bool Scanner::scanBlockScalar(
bool IsLiteral) {
1717 assert(*Current ==
'|' || *Current ==
'>');
1718 char StyleIndicator;
1719 char ChompingIndicator;
1720 unsigned BlockIndent;
1721 bool IsDone =
false;
1722 if (!scanBlockScalarIndicators(StyleIndicator, ChompingIndicator, BlockIndent,
1727 bool IsFolded = StyleIndicator ==
'>';
1729 const auto *Start = Current;
1730 unsigned BlockExitIndent = Indent < 0 ? 0 : (
unsigned)Indent;
1731 unsigned LineBreaks = 0;
1732 if (BlockIndent == 0) {
1733 if (!findBlockScalarIndent(BlockIndent, BlockExitIndent, LineBreaks,
1741 if (!scanBlockScalarIndent(BlockIndent, BlockExitIndent, IsDone))
1747 auto LineStart = Current;
1748 advanceWhile(&Scanner::skip_nb_char);
1749 if (LineStart != Current) {
1750 if (LineBreaks && IsFolded && !Scanner::isLineEmpty(Str)) {
1754 if (LineBreaks == 1) {
1755 Str.append(LineBreaks,
1756 isLineEmpty(
StringRef(LineStart, Current - LineStart))
1766 Str.append(LineBreaks,
'\n');
1767 Str.append(
StringRef(LineStart, Current - LineStart));
1775 if (!consumeLineBreakIfPresent())
1780 if (Current == End && !LineBreaks)
1787 IsSimpleKeyAllowed =
true;
1788 IsAdjacentValueAllowedInFlow =
false;
1793 T.Value = std::string(Str);
1798bool Scanner::scanTag() {
1800 unsigned ColStart = Column;
1802 if (Current == End || isBlankOrBreak(Current));
1803 else if (*Current ==
'<') {
1810 Current = skip_while(&Scanner::skip_ns_char, Current);
1819 saveSimpleKeyCandidate(--TokenQueue.
end(), ColStart,
false);
1821 IsSimpleKeyAllowed =
false;
1822 IsAdjacentValueAllowedInFlow =
false;
1827bool Scanner::fetchMoreTokens() {
1828 if (IsStartOfStream)
1829 return scanStreamStart();
1834 return scanStreamEnd();
1836 removeStaleSimpleKeyCandidates();
1838 unrollIndent(Column);
1840 if (Column == 0 && *Current ==
'%')
1841 return scanDirective();
1843 if (Column == 0 && Current + 4 <= End
1845 && *(Current + 1) ==
'-'
1846 && *(Current + 2) ==
'-'
1847 && (Current + 3 == End || isBlankOrBreak(Current + 3)))
1848 return scanDocumentIndicator(
true);
1850 if (Column == 0 && Current + 4 <= End
1852 && *(Current + 1) ==
'.'
1853 && *(Current + 2) ==
'.'
1854 && (Current + 3 == End || isBlankOrBreak(Current + 3)))
1855 return scanDocumentIndicator(
false);
1857 if (*Current ==
'[')
1858 return scanFlowCollectionStart(
true);
1860 if (*Current ==
'{')
1861 return scanFlowCollectionStart(
false);
1863 if (*Current ==
']')
1864 return scanFlowCollectionEnd(
true);
1866 if (*Current ==
'}')
1867 return scanFlowCollectionEnd(
false);
1869 if (*Current ==
',')
1870 return scanFlowEntry();
1872 if (*Current ==
'-' && (isBlankOrBreak(Current + 1) || Current + 1 == End))
1873 return scanBlockEntry();
1875 if (*Current ==
'?' && (Current + 1 == End || isBlankOrBreak(Current + 1)))
1878 if (*Current ==
':' &&
1879 (!isPlainSafeNonBlank(Current + 1) || IsAdjacentValueAllowedInFlow))
1882 if (*Current ==
'*')
1883 return scanAliasOrAnchor(
true);
1885 if (*Current ==
'&')
1886 return scanAliasOrAnchor(
false);
1888 if (*Current ==
'!')
1891 if (*Current ==
'|' && !FlowLevel)
1892 return scanBlockScalar(
true);
1894 if (*Current ==
'>' && !FlowLevel)
1895 return scanBlockScalar(
false);
1897 if (*Current ==
'\'')
1898 return scanFlowScalar(
false);
1900 if (*Current ==
'"')
1901 return scanFlowScalar(
true);
1905 if ((!isBlankOrBreak(Current) &&
1906 FirstChar.find_first_of(
"-?:,[]{}#&*!|>'\"%@`") ==
StringRef::npos) ||
1908 isPlainSafeNonBlank(Current + 1)))
1909 return scanPlainScalar();
1911 setError(
"Unrecognized character while tokenizing.", Current);
1916 std::error_code *EC)
1917 : scanner(new
Scanner(Input, SM, ShowColors, EC)) {}
1920 std::error_code *EC)
1921 : scanner(new
Scanner(InputBuffer, SM, ShowColors, EC)) {}
1933 scanner->printError(
Range.Start, Kind, Msg,
Range);
1943 CurrentDoc.reset(
new Document(*
this));
1960 SourceRange =
SMRange(Start, Start);
1965 if (!Raw.
empty() && Raw !=
"!") {
1968 Ret = std::string(
Doc->getTagMap().find(
"!")->second);
1972 Ret = std::string(
Doc->getTagMap().find(
"!!")->second);
1977 std::map<StringRef, StringRef>::const_iterator It =
1978 Doc->getTagMap().find(TagHandle);
1979 if (It !=
Doc->getTagMap().end())
1980 Ret = std::string(It->second);
1984 T.Range = TagHandle;
1994 return "tag:yaml.org,2002:null";
1998 return "tag:yaml.org,2002:str";
2000 return "tag:yaml.org,2002:map";
2002 return "tag:yaml.org,2002:seq";
2009 return Doc->peekNext();
2013 return Doc->getNext();
2017 return Doc->parseBlockNode();
2021 return Doc->NodeAllocator;
2025 Doc->setError(Msg, Tok);
2029 return Doc->failed();
2033 if (
Value[0] ==
'"')
2034 return getDoubleQuotedValue(
Value, Storage);
2035 if (
Value[0] ==
'\'')
2036 return getSingleQuotedValue(
Value, Storage);
2037 return getPlainValue(
Value, Storage);
2061 return UnquotedValue;
2065 char LastNewLineAddedAs =
'\0';
2067 if (UnquotedValue[
I] !=
'\r' && UnquotedValue[
I] !=
'\n') {
2069 UnquotedValue = UnescapeCallback(UnquotedValue.
drop_front(
I), Storage);
2070 LastNewLineAddedAs =
'\0';
2077 LastNewLineAddedAs =
' ';
2083 switch (LastNewLineAddedAs) {
2086 Storage.
back() =
'\n';
2087 LastNewLineAddedAs =
'\n';
2095 LastNewLineAddedAs =
' ';
2100 if (UnquotedValue.
substr(
I, 2) ==
"\r\n")
2109ScalarNode::getDoubleQuotedValue(
StringRef RawValue,
2112 RawValue.
back() ==
'"');
2115 auto UnescapeFunc = [
this](
StringRef UnquotedValue,
2118 if (UnquotedValue.
size() == 1) {
2120 T.Range = UnquotedValue;
2121 setError(
"Unrecognized escape code",
T);
2126 switch (UnquotedValue[0]) {
2130 setError(
"Unrecognized escape code",
T);
2136 if (UnquotedValue.
size() >= 2 && UnquotedValue[1] ==
'\n')
2194 if (UnquotedValue.
size() < 3)
2197 unsigned int UnicodeScalarValue;
2200 UnicodeScalarValue = 0xFFFD;
2205 if (UnquotedValue.
size() < 5)
2208 unsigned int UnicodeScalarValue;
2211 UnicodeScalarValue = 0xFFFD;
2216 if (UnquotedValue.
size() < 9)
2219 unsigned int UnicodeScalarValue;
2222 UnicodeScalarValue = 0xFFFD;
2236 RawValue.
back() ==
'\'');
2239 auto UnescapeFunc = [](
StringRef UnquotedValue,
2254 RawValue = RawValue.
rtrim(
"\r\n \t");
2309 setError(
"Unexpected token in Key Value.", t);
2325void MappingNode::increment() {
2328 CurrentEntry =
nullptr;
2332 CurrentEntry->
skip();
2335 CurrentEntry =
nullptr;
2348 CurrentEntry =
nullptr;
2351 setError(
"Unexpected token. Expected Key or Block End",
T);
2355 CurrentEntry =
nullptr;
2369 CurrentEntry =
nullptr;
2372 setError(
"Unexpected token. Expected Key, Flow Entry, or Flow "
2376 CurrentEntry =
nullptr;
2384 CurrentEntry =
nullptr;
2388 CurrentEntry->
skip();
2395 if (!CurrentEntry) {
2397 CurrentEntry =
nullptr;
2403 CurrentEntry =
nullptr;
2406 setError(
"Unexpected token. Expected Block Entry or Block End."
2411 CurrentEntry =
nullptr;
2418 if (!CurrentEntry) {
2420 CurrentEntry =
nullptr;
2426 CurrentEntry =
nullptr;
2428 }
else if (SeqType ==
ST_Flow) {
2433 WasPreviousTokenFlowEntry =
true;
2441 CurrentEntry =
nullptr;
2446 setError(
"Could not find closing ]!",
T);
2449 CurrentEntry =
nullptr;
2452 if (!WasPreviousTokenFlowEntry) {
2453 setError(
"Expected , between entries!",
T);
2455 CurrentEntry =
nullptr;
2460 if (!CurrentEntry) {
2463 WasPreviousTokenFlowEntry =
false;
2472 TagMap[
"!!"] =
"tag:yaml.org,2002:";
2474 if (parseDirectives())
2482 if (stream.scanner->failed())
2497Token &Document::peekNext() {
2498 return stream.scanner->peekNext();
2501Token Document::getNext() {
2502 return stream.scanner->getNext();
2505void Document::setError(
const Twine &Message,
Token &Location)
const {
2506 stream.scanner->setError(Message, Location.Range.begin());
2509bool Document::failed()
const {
2510 return stream.scanner->failed();
2522 return new (NodeAllocator)
AliasNode(stream.CurrentDoc,
T.Range.substr(1));
2525 setError(
"Already encountered an anchor for this node!",
T);
2528 AnchorInfo = getNext();
2530 goto parse_property;
2533 setError(
"Already encountered a tag for this node!",
T);
2536 TagInfo = getNext();
2538 goto parse_property;
2548 return new (NodeAllocator)
SequenceNode( stream.CurrentDoc
2554 return new (NodeAllocator)
2561 return new (NodeAllocator)
2568 return new (NodeAllocator)
2575 return new (NodeAllocator)
2582 return new (NodeAllocator)
2589 StringRef NullTerminatedStr(
T.Value.c_str(),
T.Value.length() + 1);
2591 return new (NodeAllocator)
2593 TagInfo.
Range, StrCopy,
T.Range);
2597 return new (NodeAllocator)
2608 return new (NodeAllocator)
NullNode(stream.CurrentDoc);
2612 if (Root && (isa<MappingNode>(Root) || isa<SequenceNode>(Root)))
2613 return new (NodeAllocator)
NullNode(stream.CurrentDoc);
2615 setError(
"Unexpected token",
T);
2625bool Document::parseDirectives() {
2626 bool isDirective =
false;
2630 parseTAGDirective();
2633 parseYAMLDirective();
2641void Document::parseYAMLDirective() {
2645void Document::parseTAGDirective() {
2649 T =
T.substr(
T.find_first_of(
" \t")).ltrim(
" \t");
2650 std::size_t HandleEnd =
T.find_first_of(
" \t");
2651 StringRef TagHandle =
T.substr(0, HandleEnd);
2652 StringRef TagPrefix =
T.substr(HandleEnd).ltrim(
" \t");
2653 TagMap[TagHandle] = TagPrefix;
2656bool Document::expectToken(
int TK) {
2659 setError(
"Unexpected token",
T);
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")
#define LLVM_ATTRIBUTE_NOINLINE
LLVM_ATTRIBUTE_NOINLINE - On compilers where we have a directive to do so, mark a method "not for inl...
std::optional< std::vector< StOtherPiece > > Other
static Cursor skipComment(Cursor C)
Skip a line comment and return the updated cursor.
ConstantRange Range(APInt(BitWidth, Low), APInt(BitWidth, High))
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
This file defines the SmallString class.
This file defines the SmallVector class.
static EncodingInfo getUnicodeEncoding(StringRef Input)
getUnicodeEncoding - Reads up to the first 4 bytes to determine the Unicode encoding form of Input.
static bool is_ns_hex_digit(const char C)
static bool is_ns_word_char(const char C)
static unsigned getChompedLineBreaks(char ChompingIndicator, unsigned LineBreaks, StringRef Str)
Get the number of line breaks after chomping.
std::pair< uint32_t, unsigned > UTF8Decoded
The Unicode scalar value of a UTF-8 minimal well-formed code unit subsequence and the subsequence's l...
@ UEF_UTF32_LE
UTF-32 Little Endian.
@ UEF_UTF16_BE
UTF-16 Big Endian.
@ UEF_UTF16_LE
UTF-16 Little Endian.
@ UEF_UTF32_BE
UTF-32 Big Endian.
@ UEF_UTF8
UTF-8 or ascii.
@ UEF_Unknown
Not a valid Unicode encoding.
static LLVM_ATTRIBUTE_NOINLINE bool wasEscaped(StringRef::iterator First, StringRef::iterator Position)
static StringRef parseScalarValue(StringRef UnquotedValue, SmallVectorImpl< char > &Storage, StringRef LookupChars, std::function< StringRef(StringRef, SmallVectorImpl< char > &)> UnescapeCallback)
parseScalarValue - A common parsing routine for all flow scalar styles.
std::pair< UnicodeEncodingForm, unsigned > EncodingInfo
EncodingInfo - Holds the encoding type and length of the byte order mark if it exists.
static UTF8Decoded decodeUTF8(StringRef Range)
static void encodeUTF8(uint32_t UnicodeScalarValue, SmallVectorImpl< char > &Result)
encodeUTF8 - Encode UnicodeScalarValue in UTF-8 and append it to result.
A linked-list with a custom, local allocator.
iterator insert(iterator I, T &&V)
void resetAlloc()
Reset the underlying allocator.
IteratorImpl< T, typename list_type::iterator > iterator
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Allocate memory in an ever growing pool, as if by bump-pointer.
Tagged union holding either a T or a Error.
const char * getBufferStart() const
const char * getBufferEnd() const
static std::unique_ptr< MemoryBuffer > getMemBuffer(StringRef InputData, StringRef BufferName="", bool RequiresNullTerminator=true)
Open the specified memory range as a MemoryBuffer.
Represents a location in source code.
static SMLoc getFromPointer(const char *Ptr)
Represents a range in source code.
SmallString - A SmallString is just a SmallVector with methods and accessors that make it work better...
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
void reserve(size_type N)
iterator erase(const_iterator CI)
typename SuperClass::iterator iterator
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
This owns the files read by a parser, handles include stacks, and handles diagnostic wrangling.
void PrintMessage(raw_ostream &OS, SMLoc Loc, DiagKind Kind, const Twine &Msg, ArrayRef< SMRange > Ranges={}, ArrayRef< SMFixIt > FixIts={}, bool ShowColors=true) const
Emit a message about the specified location with the specified string.
unsigned AddNewSourceBuffer(std::unique_ptr< MemoryBuffer > F, SMLoc IncludeLoc)
Add a new source buffer to this source manager.
StringRef - Represent a constant reference to a string, i.e.
size_t find_last_not_of(char C, size_t From=npos) const
Find the last character in the string that is not C, or npos if not found.
bool getAsInteger(unsigned Radix, T &Result) const
Parse the current string as an integer of the specified radix.
constexpr StringRef substr(size_t Start, size_t N=npos) const
Return a reference to the substring from [Start, Start + N).
bool starts_with(StringRef Prefix) const
Check if this string starts with the given Prefix.
constexpr bool empty() const
empty - Check if the string is empty.
StringRef drop_front(size_t N=1) const
Return a StringRef equal to 'this' but with the first N elements dropped.
char back() const
back - Get the last character in the string.
constexpr size_t size() const
size - Get the string size.
char front() const
front - Get the first character in the string.
size_t find_last_of(char C, size_t From=npos) const
Find the last character in the string that is C, or npos if not found.
StringRef ltrim(char Char) const
Return string with consecutive Char characters starting from the the left removed.
size_t find_first_of(char C, size_t From=0) const
Find the first character in the string that is C, or npos if not found.
StringRef rtrim(char Char) const
Return string with consecutive Char characters starting from the right removed.
StringRef take_front(size_t N=1) const
Return a StringRef equal to 'this' but with only the first N elements remaining.
StringRef copy(Allocator &A) const
static constexpr size_t npos
StringRef drop_back(size_t N=1) const
Return a StringRef equal to 'this' but with the last N elements dropped.
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
The instances of the Type class are immutable: once they are created, they are never changed.
TypeID
Definitions of all of the base types for the Type system.
LLVM Value Representation.
This class implements an extremely fast bulk output stream that can only output to a stream.
Represents an alias to a Node with an anchor.
A block scalar node is an opaque datum that can be presented as a series of zero or more Unicode scal...
A YAML Stream is a sequence of Documents.
Node * parseBlockNode()
Root for parsing a node. Returns a single node.
bool skip()
Finish parsing the current document and return true if there are more.
Node * getRoot()
Parse and return the root level node.
Document(Stream &ParentStream)
Node * getValue()
Parse and return the value.
Node * getKey()
Parse and return the key.
Represents a YAML map created from either a block map for a flow map.
@ MT_Inline
An inline mapping node is used for "[key: value]".
Abstract base class for all Nodes.
StringRef getRawTag() const
Get the tag as it was written in the document.
unsigned int getType() const
std::string getVerbatimTag() const
Get the verbatium tag for a given Node.
std::unique_ptr< Document > & Doc
Node(unsigned int Type, std::unique_ptr< Document > &, StringRef Anchor, StringRef Tag)
BumpPtrAllocator & getAllocator()
void setError(const Twine &Message, Token &Location) const
A scalar node is an opaque datum that can be presented as a series of zero or more Unicode scalar val...
StringRef getValue(SmallVectorImpl< char > &Storage) const
Gets the value of this node as a StringRef.
Scans YAML tokens from a MemoryBuffer.
Scanner(StringRef Input, SourceMgr &SM, bool ShowColors=true, std::error_code *EC=nullptr)
void printError(SMLoc Loc, SourceMgr::DiagKind Kind, const Twine &Message, ArrayRef< SMRange > Ranges=std::nullopt)
void setError(const Twine &Message, StringRef::iterator Position)
Token getNext()
Parse the next token and pop it from the queue.
bool failed()
Returns true if an error occurred while parsing.
Token & peekNext()
Parse the next token and return it without popping it.
Represents a YAML sequence created from either a block sequence for a flow sequence.
This class represents a YAML stream potentially containing multiple documents.
document_iterator begin()
Stream(StringRef Input, SourceMgr &, bool ShowColors=true, std::error_code *EC=nullptr)
This keeps a reference to the string referenced by Input.
void printError(Node *N, const Twine &Msg, SourceMgr::DiagKind Kind=SourceMgr::DK_Error)
Iterator abstraction for Documents over a Stream.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ C
The default llvm calling convention, compatible with C.
bool isPrintable(int UCS)
Determines if a character is likely to be displayed correctly on the terminal.
bool dumpTokens(StringRef Input, raw_ostream &)
Dump all the tokens in this stream to OS.
std::optional< bool > parseBool(StringRef S)
Parse S as a bool according to https://yaml.org/type/bool.html.
bool scanTokens(StringRef Input)
Scans all tokens in input without outputting anything.
void skip(CollectionType &C)
std::string escape(StringRef Input, bool EscapePrintable=true)
Escape Input for a double quoted scalar; if EscapePrintable is true, all UTF8 sequences will be escap...
This is an optimization pass for GlobalISel generic memory operations.
std::error_code make_error_code(BitcodeError E)
testing::Matcher< const detail::ErrorHolder & > Failed()
void append_range(Container &C, Range &&R)
Wrapper function to append range R to container C.
void report_fatal_error(Error Err, bool gen_crash_diag=true)
Report a serious error, calling any installed error handler.
@ First
Helpers to iterate all locations in the MemoryEffectsBase class.
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
Token - A single YAML token.
enum llvm::yaml::Token::TokenKind Kind
std::string Value
The value of a block scalar node.
StringRef Range
A string of length 0 or more whose begin() points to the logical location of the token in the input.