35#include <system_error>
66 if (Input.
size() >= 4) {
71 if (Input[1] == 0 && Input[2] == 0 && Input[3] != 0)
75 if (Input.
size() >= 2 && Input[1] != 0)
79 if ( Input.
size() >= 4
93 if ( Input.
size() >= 3
101 if (Input.
size() >= 4 && Input[1] == 0 && Input[2] == 0 && Input[3] == 0)
104 if (Input.
size() >= 2 && Input[1] == 0)
111void Node::anchor() {}
112void NullNode::anchor() {}
113void ScalarNode::anchor() {}
114void BlockScalarNode::anchor() {}
115void KeyValueNode::anchor() {}
116void MappingNode::anchor() {}
117void SequenceNode::anchor() {}
118void AliasNode::anchor() {}
181 unsigned FlowLevel = 0;
182 bool IsRequired =
false;
184 bool operator ==(
const SimpleKey &
Other) {
185 return Tok ==
Other.Tok;
201 if (Position <
End && (*Position & 0x80) == 0) {
202 return std::make_pair(*Position, 1);
206 if (Position + 1 <
End && ((*Position & 0xE0) == 0xC0) &&
207 ((*(Position + 1) & 0xC0) == 0x80)) {
208 uint32_t codepoint = ((*Position & 0x1F) << 6) |
209 (*(Position + 1) & 0x3F);
210 if (codepoint >= 0x80)
211 return std::make_pair(codepoint, 2);
215 if (Position + 2 <
End && ((*Position & 0xF0) == 0xE0) &&
216 ((*(Position + 1) & 0xC0) == 0x80) &&
217 ((*(Position + 2) & 0xC0) == 0x80)) {
218 uint32_t codepoint = ((*Position & 0x0F) << 12) |
219 ((*(Position + 1) & 0x3F) << 6) |
220 (*(Position + 2) & 0x3F);
223 if (codepoint >= 0x800 &&
224 (codepoint < 0xD800 || codepoint > 0xDFFF))
225 return std::make_pair(codepoint, 3);
229 if (Position + 3 <
End && ((*Position & 0xF8) == 0xF0) &&
230 ((*(Position + 1) & 0xC0) == 0x80) &&
231 ((*(Position + 2) & 0xC0) == 0x80) &&
232 ((*(Position + 3) & 0xC0) == 0x80)) {
233 uint32_t codepoint = ((*Position & 0x07) << 18) |
234 ((*(Position + 1) & 0x3F) << 12) |
235 ((*(Position + 2) & 0x3F) << 6) |
236 (*(Position + 3) & 0x3F);
237 if (codepoint >= 0x10000 && codepoint <= 0x10FFFF)
238 return std::make_pair(codepoint, 4);
240 return std::make_pair(0, 0);
250 std::error_code *EC =
nullptr);
252 std::error_code *EC =
nullptr);
262 SM.
PrintMessage(Loc, Kind, Message, Ranges, {}, ShowColors);
299 return ::decodeUTF8(
StringRef(Position,
End - Position));
375 void advanceWhile(SkipWhileFunc Func);
380 void scan_ns_uri_char();
404 bool consumeLineBreakIfPresent();
415 void removeStaleSimpleKeyCandidates();
418 void removeSimpleKeyCandidatesOnFlowLevel(
unsigned Level);
422 bool unrollIndent(
int ToColumn);
426 bool rollIndent(
int ToColumn
435 void scanToNextToken();
438 bool scanStreamStart();
441 bool scanStreamEnd();
444 bool scanDirective();
447 bool scanDocumentIndicator(
bool IsStart);
450 bool scanFlowCollectionStart(
bool IsSequence);
453 bool scanFlowCollectionEnd(
bool IsSequence);
456 bool scanFlowEntry();
459 bool scanBlockEntry();
468 bool scanFlowScalar(
bool IsDoubleQuoted);
471 bool scanPlainScalar();
474 bool scanAliasOrAnchor(
bool IsAlias);
477 bool scanBlockScalar(
bool IsLiteral);
485 bool scanBlockScalarIndicators(
char &StyleIndicator,
char &ChompingIndicator,
486 unsigned &IndentIndicator,
bool &IsDone);
489 char scanBlockStyleIndicator();
492 char scanBlockChompingIndicator();
495 unsigned scanBlockIndentationIndicator();
500 bool scanBlockScalarHeader(
char &ChompingIndicator,
unsigned &IndentIndicator,
506 bool findBlockScalarIndent(
unsigned &BlockIndent,
unsigned BlockExitIndent,
507 unsigned &LineBreaks,
bool &IsDone);
512 bool scanBlockScalarIndent(
unsigned BlockIndent,
unsigned BlockExitIndent,
519 bool fetchMoreTokens();
546 bool IsStartOfStream;
549 bool IsSimpleKeyAllowed;
553 bool IsAdjacentValueAllowedInFlow;
581 if (UnicodeScalarValue <= 0x7F) {
582 Result.push_back(UnicodeScalarValue & 0x7F);
583 }
else if (UnicodeScalarValue <= 0x7FF) {
584 uint8_t FirstByte = 0xC0 | ((UnicodeScalarValue & 0x7C0) >> 6);
585 uint8_t SecondByte = 0x80 | (UnicodeScalarValue & 0x3F);
586 Result.push_back(FirstByte);
587 Result.push_back(SecondByte);
588 }
else if (UnicodeScalarValue <= 0xFFFF) {
589 uint8_t FirstByte = 0xE0 | ((UnicodeScalarValue & 0xF000) >> 12);
590 uint8_t SecondByte = 0x80 | ((UnicodeScalarValue & 0xFC0) >> 6);
591 uint8_t ThirdByte = 0x80 | (UnicodeScalarValue & 0x3F);
592 Result.push_back(FirstByte);
593 Result.push_back(SecondByte);
594 Result.push_back(ThirdByte);
595 }
else if (UnicodeScalarValue <= 0x10FFFF) {
596 uint8_t FirstByte = 0xF0 | ((UnicodeScalarValue & 0x1F0000) >> 18);
597 uint8_t SecondByte = 0x80 | ((UnicodeScalarValue & 0x3F000) >> 12);
598 uint8_t ThirdByte = 0x80 | ((UnicodeScalarValue & 0xFC0) >> 6);
599 uint8_t FourthByte = 0x80 | (UnicodeScalarValue & 0x3F);
600 Result.push_back(FirstByte);
601 Result.push_back(SecondByte);
602 Result.push_back(ThirdByte);
603 Result.push_back(FourthByte);
614 OS <<
"Stream-Start: ";
617 OS <<
"Stream-End: ";
620 OS <<
"Version-Directive: ";
623 OS <<
"Tag-Directive: ";
626 OS <<
"Document-Start: ";
629 OS <<
"Document-End: ";
632 OS <<
"Block-Entry: ";
638 OS <<
"Block-Sequence-Start: ";
641 OS <<
"Block-Mapping-Start: ";
644 OS <<
"Flow-Entry: ";
647 OS <<
"Flow-Sequence-Start: ";
650 OS <<
"Flow-Sequence-End: ";
653 OS <<
"Flow-Mapping-Start: ";
656 OS <<
"Flow-Mapping-End: ";
668 OS <<
"Block Scalar: ";
682 OS <<
T.Range <<
"\n";
705 std::string EscapedInput;
708 EscapedInput +=
"\\\\";
710 EscapedInput +=
"\\\"";
712 EscapedInput +=
"\\0";
714 EscapedInput +=
"\\a";
716 EscapedInput +=
"\\b";
718 EscapedInput +=
"\\t";
720 EscapedInput +=
"\\n";
722 EscapedInput +=
"\\v";
724 EscapedInput +=
"\\f";
726 EscapedInput +=
"\\r";
728 EscapedInput +=
"\\e";
729 else if ((
unsigned char)*i < 0x20) {
730 std::string HexStr = utohexstr(*i);
731 EscapedInput +=
"\\x" + std::string(2 - HexStr.size(),
'0') + HexStr;
732 }
else if (*i & 0x80) {
735 if (UnicodeScalarValue.second == 0) {
743 if (UnicodeScalarValue.first == 0x85)
744 EscapedInput +=
"\\N";
745 else if (UnicodeScalarValue.first == 0xA0)
746 EscapedInput +=
"\\_";
747 else if (UnicodeScalarValue.first == 0x2028)
748 EscapedInput +=
"\\L";
749 else if (UnicodeScalarValue.first == 0x2029)
750 EscapedInput +=
"\\P";
751 else if (!EscapePrintable &&
753 EscapedInput +=
StringRef(i, UnicodeScalarValue.second);
755 std::string HexStr = utohexstr(UnicodeScalarValue.first);
756 if (HexStr.size() <= 2)
757 EscapedInput +=
"\\x" + std::string(2 - HexStr.size(),
'0') + HexStr;
758 else if (HexStr.size() <= 4)
759 EscapedInput +=
"\\u" + std::string(4 - HexStr.size(),
'0') + HexStr;
760 else if (HexStr.size() <= 8)
761 EscapedInput +=
"\\U" + std::string(8 - HexStr.size(),
'0') + HexStr;
763 i += UnicodeScalarValue.second - 1;
765 EscapedInput.push_back(*i);
858 : SM(sm), ShowColors(ShowColors), EC(EC) {
864 : SM(SM_), ShowColors(ShowColors), EC(EC) {
869 InputBuffer = Buffer;
876 IsStartOfStream =
true;
877 IsSimpleKeyAllowed =
true;
878 IsAdjacentValueAllowedInFlow =
false;
880 std::unique_ptr<MemoryBuffer> InputBufferOwner =
888 bool NeedMore =
false;
890 if (TokenQueue.
empty() || NeedMore) {
891 if (!fetchMoreTokens()) {
895 return TokenQueue.
front();
899 "fetchMoreTokens lied about getting tokens!");
901 removeStaleSimpleKeyCandidates();
903 SK.Tok = TokenQueue.
begin();
909 return TokenQueue.
front();
915 if (!TokenQueue.
empty())
920 if (TokenQueue.
empty())
930 if ( *Position == 0x09
931 || (*Position >= 0x20 && *Position <= 0x7E))
935 if (
uint8_t(*Position) & 0x80) {
938 && u8d.first != 0xFEFF
939 && ( u8d.first == 0x85
940 || ( u8d.first >= 0xA0
941 && u8d.first <= 0xD7FF)
942 || ( u8d.first >= 0xE000
943 && u8d.first <= 0xFFFD)
944 || ( u8d.first >= 0x10000
945 && u8d.first <= 0x10FFFF)))
946 return Position + u8d.second;
954 if (*Position == 0x0D) {
955 if (Position + 1 != End && *(Position + 1) == 0x0A)
960 if (*Position == 0x0A)
968 if (*Position ==
' ')
976 if (*Position ==
' ' || *Position ==
'\t')
984 if (*Position ==
' ' || *Position ==
'\t')
986 return skip_nb_char(Position);
1000void Scanner::advanceWhile(SkipWhileFunc Func) {
1001 auto Final = skip_while(Func, Current);
1002 Column += Final - Current;
1010void Scanner::scan_ns_uri_char() {
1014 if (( *Current ==
'%'
1015 && Current + 2 < End
1019 ||
StringRef(Current, 1).find_first_of(
"#;/?:@&=+$,_.!~*'()[]")
1030 setError(
"Cannot consume non-ascii characters", Current);
1035 if (
uint8_t(*Current) >= 0x80) {
1036 setError(
"Cannot consume non-ascii characters", Current);
1047void Scanner::skip(
uint32_t Distance) {
1048 Current += Distance;
1050 assert(Current <= End &&
"Skipped past the end");
1054 if (Position == End)
1056 return *Position ==
' ' || *Position ==
'\t' || *Position ==
'\r' ||
1061 if (Position == End || isBlankOrBreak(Position))
1069bool Scanner::isLineEmpty(
StringRef Line) {
1070 for (
const auto *Position =
Line.begin(); Position !=
Line.end(); ++Position)
1071 if (!isBlankOrBreak(Position))
1076bool Scanner::consumeLineBreakIfPresent() {
1077 auto Next = skip_b_break(Current);
1078 if (Next == Current)
1088 ,
bool IsRequired) {
1089 if (IsSimpleKeyAllowed) {
1093 SK.Column = AtColumn;
1094 SK.IsRequired = IsRequired;
1095 SK.FlowLevel = FlowLevel;
1100void Scanner::removeStaleSimpleKeyCandidates() {
1102 i != SimpleKeys.
end();) {
1103 if (i->Line != Line || i->Column + 1024 < Column) {
1105 setError(
"Could not find expected : for simple key"
1106 , i->Tok->Range.begin());
1107 i = SimpleKeys.
erase(i);
1113void Scanner::removeSimpleKeyCandidatesOnFlowLevel(
unsigned Level) {
1114 if (!SimpleKeys.
empty() && (SimpleKeys.
end() - 1)->FlowLevel == Level)
1118bool Scanner::unrollIndent(
int ToColumn) {
1124 while (Indent > ToColumn) {
1134bool Scanner::rollIndent(
int ToColumn
1139 if (Indent < ToColumn) {
1146 TokenQueue.
insert(InsertPoint,
T);
1151void Scanner::skipComment() {
1152 if (Current == End || *Current !=
'#')
1165void Scanner::scanToNextToken() {
1167 while (Current != End && (*Current ==
' ' || *Current ==
'\t')) {
1182 IsSimpleKeyAllowed =
true;
1186bool Scanner::scanStreamStart() {
1187 IsStartOfStream =
false;
1195 Current += EI.second;
1199bool Scanner::scanStreamEnd() {
1208 IsSimpleKeyAllowed =
false;
1209 IsAdjacentValueAllowedInFlow =
false;
1218bool Scanner::scanDirective() {
1222 IsSimpleKeyAllowed =
false;
1223 IsAdjacentValueAllowedInFlow =
false;
1228 Current = skip_while(&Scanner::skip_ns_char, Current);
1230 Current = skip_while(&Scanner::skip_s_white, Current);
1233 if (
Name ==
"YAML") {
1234 Current = skip_while(&Scanner::skip_ns_char, Current);
1239 }
else if(
Name ==
"TAG") {
1240 Current = skip_while(&Scanner::skip_ns_char, Current);
1241 Current = skip_while(&Scanner::skip_s_white, Current);
1242 Current = skip_while(&Scanner::skip_ns_char, Current);
1251bool Scanner::scanDocumentIndicator(
bool IsStart) {
1254 IsSimpleKeyAllowed =
false;
1255 IsAdjacentValueAllowedInFlow =
false;
1265bool Scanner::scanFlowCollectionStart(
bool IsSequence) {
1274 saveSimpleKeyCandidate(--TokenQueue.
end(), Column - 1,
false);
1277 IsSimpleKeyAllowed =
true;
1279 IsAdjacentValueAllowedInFlow =
false;
1284bool Scanner::scanFlowCollectionEnd(
bool IsSequence) {
1285 removeSimpleKeyCandidatesOnFlowLevel(FlowLevel);
1286 IsSimpleKeyAllowed =
false;
1287 IsAdjacentValueAllowedInFlow =
true;
1299bool Scanner::scanFlowEntry() {
1300 removeSimpleKeyCandidatesOnFlowLevel(FlowLevel);
1301 IsSimpleKeyAllowed =
true;
1302 IsAdjacentValueAllowedInFlow =
false;
1311bool Scanner::scanBlockEntry() {
1313 removeSimpleKeyCandidatesOnFlowLevel(FlowLevel);
1314 IsSimpleKeyAllowed =
true;
1315 IsAdjacentValueAllowedInFlow =
false;
1324bool Scanner::scanKey() {
1328 removeSimpleKeyCandidatesOnFlowLevel(FlowLevel);
1329 IsSimpleKeyAllowed = !FlowLevel;
1330 IsAdjacentValueAllowedInFlow =
false;
1340bool Scanner::scanValue() {
1343 if (!SimpleKeys.
empty()) {
1347 T.Range = SK.Tok->Range;
1349 for (i = TokenQueue.
begin(), e = TokenQueue.
end(); i != e; ++i) {
1362 IsSimpleKeyAllowed =
false;
1366 IsSimpleKeyAllowed = !FlowLevel;
1368 IsAdjacentValueAllowedInFlow =
false;
1391 while (
I >=
First && *
I ==
'\\') --
I;
1394 return (Position - 1 -
I) % 2 == 1;
1397bool Scanner::scanFlowScalar(
bool IsDoubleQuoted) {
1399 unsigned ColStart = Column;
1400 if (IsDoubleQuoted) {
1403 while (Current != End && *Current !=
'"')
1407 }
while ( Current != End
1408 && *(Current - 1) ==
'\\'
1412 while (Current != End) {
1414 if (Current + 1 < End && *Current ==
'\'' && *(Current + 1) ==
'\'') {
1417 }
else if (*Current ==
'\'')
1421 i = skip_b_break(Current);
1436 if (Current == End) {
1437 setError(
"Expected quote at end of scalar", Current);
1447 saveSimpleKeyCandidate(--TokenQueue.
end(), ColStart,
false);
1449 IsSimpleKeyAllowed =
false;
1450 IsAdjacentValueAllowedInFlow =
true;
1455bool Scanner::scanPlainScalar() {
1457 unsigned ColStart = Column;
1458 unsigned LeadingBlanks = 0;
1459 assert(Indent >= -1 &&
"Indent must be >= -1 !");
1460 unsigned indent =
static_cast<unsigned>(Indent + 1);
1461 while (Current != End) {
1462 if (*Current ==
'#')
1465 while (Current != End &&
1466 ((*Current !=
':' && isPlainSafeNonBlank(Current)) ||
1467 (*Current ==
':' && isPlainSafeNonBlank(Current + 1)))) {
1476 if (!isBlankOrBreak(Current))
1481 while (isBlankOrBreak(Tmp)) {
1484 if (LeadingBlanks && (Column <
indent) && *Tmp ==
'\t') {
1485 setError(
"Found invalid tab character in indentation", Tmp);
1491 i = skip_b_break(Tmp);
1500 if (!FlowLevel && Column <
indent)
1505 if (Start == Current) {
1506 setError(
"Got empty plain scalar", Start);
1515 saveSimpleKeyCandidate(--TokenQueue.
end(), ColStart,
false);
1517 IsSimpleKeyAllowed =
false;
1518 IsAdjacentValueAllowedInFlow =
false;
1523bool Scanner::scanAliasOrAnchor(
bool IsAlias) {
1525 unsigned ColStart = Column;
1527 while (Current != End) {
1528 if ( *Current ==
'[' || *Current ==
']'
1529 || *Current ==
'{' || *Current ==
'}'
1540 if (Start + 1 == Current) {
1541 setError(
"Got empty alias or anchor", Start);
1551 saveSimpleKeyCandidate(--TokenQueue.
end(), ColStart,
false);
1553 IsSimpleKeyAllowed =
false;
1554 IsAdjacentValueAllowedInFlow =
false;
1559bool Scanner::scanBlockScalarIndicators(
char &StyleIndicator,
1560 char &ChompingIndicator,
1561 unsigned &IndentIndicator,
1563 StyleIndicator = scanBlockStyleIndicator();
1564 if (!scanBlockScalarHeader(ChompingIndicator, IndentIndicator, IsDone))
1569char Scanner::scanBlockStyleIndicator() {
1570 char Indicator =
' ';
1571 if (Current != End && (*Current ==
'>' || *Current ==
'|')) {
1572 Indicator = *Current;
1578char Scanner::scanBlockChompingIndicator() {
1579 char Indicator =
' ';
1580 if (Current != End && (*Current ==
'+' || *Current ==
'-')) {
1581 Indicator = *Current;
1593 if (ChompingIndicator ==
'-')
1595 if (ChompingIndicator ==
'+')
1598 return Str.empty() ? 0 : 1;
1601unsigned Scanner::scanBlockIndentationIndicator() {
1602 unsigned Indent = 0;
1603 if (Current != End && (*Current >=
'1' && *Current <=
'9')) {
1610bool Scanner::scanBlockScalarHeader(
char &ChompingIndicator,
1611 unsigned &IndentIndicator,
bool &IsDone) {
1612 auto Start = Current;
1614 ChompingIndicator = scanBlockChompingIndicator();
1615 IndentIndicator = scanBlockIndentationIndicator();
1617 if (ChompingIndicator ==
' ')
1618 ChompingIndicator = scanBlockChompingIndicator();
1619 Current = skip_while(&Scanner::skip_s_white, Current);
1622 if (Current == End) {
1631 if (!consumeLineBreakIfPresent()) {
1632 setError(
"Expected a line break after block scalar header", Current);
1638bool Scanner::findBlockScalarIndent(
unsigned &BlockIndent,
1639 unsigned BlockExitIndent,
1640 unsigned &LineBreaks,
bool &IsDone) {
1641 unsigned MaxAllSpaceLineCharacters = 0;
1645 advanceWhile(&Scanner::skip_s_space);
1646 if (skip_nb_char(Current) != Current) {
1648 if (Column <= BlockExitIndent) {
1653 BlockIndent = Column;
1654 if (MaxAllSpaceLineCharacters > BlockIndent) {
1656 "Leading all-spaces line must be smaller than the block indent",
1657 LongestAllSpaceLine);
1662 if (skip_b_break(Current) != Current &&
1663 Column > MaxAllSpaceLineCharacters) {
1666 MaxAllSpaceLineCharacters = Column;
1667 LongestAllSpaceLine = Current;
1671 if (Current == End) {
1676 if (!consumeLineBreakIfPresent()) {
1685bool Scanner::scanBlockScalarIndent(
unsigned BlockIndent,
1686 unsigned BlockExitIndent,
bool &IsDone) {
1688 while (Column < BlockIndent) {
1689 auto I = skip_s_space(Current);
1696 if (skip_nb_char(Current) == Current)
1699 if (Column <= BlockExitIndent) {
1704 if (Column < BlockIndent) {
1705 if (Current != End && *Current ==
'#') {
1709 setError(
"A text line is less indented than the block scalar", Current);
1715bool Scanner::scanBlockScalar(
bool IsLiteral) {
1716 assert(*Current ==
'|' || *Current ==
'>');
1717 char StyleIndicator;
1718 char ChompingIndicator;
1719 unsigned BlockIndent;
1720 bool IsDone =
false;
1721 if (!scanBlockScalarIndicators(StyleIndicator, ChompingIndicator, BlockIndent,
1726 bool IsFolded = StyleIndicator ==
'>';
1728 const auto *Start = Current;
1729 unsigned BlockExitIndent = Indent < 0 ? 0 : (
unsigned)Indent;
1730 unsigned LineBreaks = 0;
1731 if (BlockIndent == 0) {
1732 if (!findBlockScalarIndent(BlockIndent, BlockExitIndent, LineBreaks,
1740 if (!scanBlockScalarIndent(BlockIndent, BlockExitIndent, IsDone))
1746 auto LineStart = Current;
1747 advanceWhile(&Scanner::skip_nb_char);
1748 if (LineStart != Current) {
1749 if (LineBreaks && IsFolded && !Scanner::isLineEmpty(Str)) {
1753 if (LineBreaks == 1) {
1754 Str.append(LineBreaks,
1755 isLineEmpty(
StringRef(LineStart, Current - LineStart))
1765 Str.append(LineBreaks,
'\n');
1766 Str.append(
StringRef(LineStart, Current - LineStart));
1774 if (!consumeLineBreakIfPresent())
1779 if (Current == End && !LineBreaks)
1786 IsSimpleKeyAllowed =
true;
1787 IsAdjacentValueAllowedInFlow =
false;
1792 T.Value = std::string(Str);
1797bool Scanner::scanTag() {
1799 unsigned ColStart = Column;
1801 if (Current == End || isBlankOrBreak(Current));
1802 else if (*Current ==
'<') {
1809 Current = skip_while(&Scanner::skip_ns_char, Current);
1818 saveSimpleKeyCandidate(--TokenQueue.
end(), ColStart,
false);
1820 IsSimpleKeyAllowed =
false;
1821 IsAdjacentValueAllowedInFlow =
false;
1826bool Scanner::fetchMoreTokens() {
1827 if (IsStartOfStream)
1828 return scanStreamStart();
1833 return scanStreamEnd();
1835 removeStaleSimpleKeyCandidates();
1837 unrollIndent(Column);
1839 if (Column == 0 && *Current ==
'%')
1840 return scanDirective();
1842 if (Column == 0 && Current + 4 <= End
1844 && *(Current + 1) ==
'-'
1845 && *(Current + 2) ==
'-'
1846 && (Current + 3 == End || isBlankOrBreak(Current + 3)))
1847 return scanDocumentIndicator(
true);
1849 if (Column == 0 && Current + 4 <= End
1851 && *(Current + 1) ==
'.'
1852 && *(Current + 2) ==
'.'
1853 && (Current + 3 == End || isBlankOrBreak(Current + 3)))
1854 return scanDocumentIndicator(
false);
1856 if (*Current ==
'[')
1857 return scanFlowCollectionStart(
true);
1859 if (*Current ==
'{')
1860 return scanFlowCollectionStart(
false);
1862 if (*Current ==
']')
1863 return scanFlowCollectionEnd(
true);
1865 if (*Current ==
'}')
1866 return scanFlowCollectionEnd(
false);
1868 if (*Current ==
',')
1869 return scanFlowEntry();
1871 if (*Current ==
'-' && (isBlankOrBreak(Current + 1) || Current + 1 == End))
1872 return scanBlockEntry();
1874 if (*Current ==
'?' && (Current + 1 == End || isBlankOrBreak(Current + 1)))
1877 if (*Current ==
':' &&
1878 (!isPlainSafeNonBlank(Current + 1) || IsAdjacentValueAllowedInFlow))
1881 if (*Current ==
'*')
1882 return scanAliasOrAnchor(
true);
1884 if (*Current ==
'&')
1885 return scanAliasOrAnchor(
false);
1887 if (*Current ==
'!')
1890 if (*Current ==
'|' && !FlowLevel)
1891 return scanBlockScalar(
true);
1893 if (*Current ==
'>' && !FlowLevel)
1894 return scanBlockScalar(
false);
1896 if (*Current ==
'\'')
1897 return scanFlowScalar(
false);
1899 if (*Current ==
'"')
1900 return scanFlowScalar(
true);
1904 if ((!isBlankOrBreak(Current) &&
1905 FirstChar.find_first_of(
"-?:,[]{}#&*!|>'\"%@`") ==
StringRef::npos) ||
1907 isPlainSafeNonBlank(Current + 1)))
1908 return scanPlainScalar();
1910 setError(
"Unrecognized character while tokenizing.", Current);
1915 std::error_code *EC)
1916 : scanner(new
Scanner(Input, SM, ShowColors, EC)) {}
1919 std::error_code *EC)
1920 : scanner(new
Scanner(InputBuffer, SM, ShowColors, EC)) {}
1932 scanner->printError(
Range.Start, Kind, Msg,
Range);
1942 CurrentDoc.reset(
new Document(*
this));
1959 SourceRange =
SMRange(Start, Start);
1964 if (!Raw.
empty() && Raw !=
"!") {
1967 Ret = std::string(
Doc->getTagMap().find(
"!")->second);
1971 Ret = std::string(
Doc->getTagMap().find(
"!!")->second);
1976 std::map<StringRef, StringRef>::const_iterator It =
1977 Doc->getTagMap().find(TagHandle);
1978 if (It !=
Doc->getTagMap().end())
1979 Ret = std::string(It->second);
1983 T.Range = TagHandle;
1993 return "tag:yaml.org,2002:null";
1997 return "tag:yaml.org,2002:str";
1999 return "tag:yaml.org,2002:map";
2001 return "tag:yaml.org,2002:seq";
2008 return Doc->peekNext();
2012 return Doc->getNext();
2016 return Doc->parseBlockNode();
2020 return Doc->NodeAllocator;
2024 Doc->setError(Msg, Tok);
2028 return Doc->failed();
2032 if (
Value[0] ==
'"')
2033 return getDoubleQuotedValue(
Value, Storage);
2034 if (
Value[0] ==
'\'')
2035 return getSingleQuotedValue(
Value, Storage);
2036 return getPlainValue(
Value, Storage);
2060 return UnquotedValue;
2064 char LastNewLineAddedAs =
'\0';
2066 if (UnquotedValue[
I] !=
'\r' && UnquotedValue[
I] !=
'\n') {
2068 UnquotedValue = UnescapeCallback(UnquotedValue.
drop_front(
I), Storage);
2069 LastNewLineAddedAs =
'\0';
2076 LastNewLineAddedAs =
' ';
2082 switch (LastNewLineAddedAs) {
2085 Storage.
back() =
'\n';
2086 LastNewLineAddedAs =
'\n';
2094 LastNewLineAddedAs =
' ';
2099 if (UnquotedValue.
substr(
I, 2) ==
"\r\n")
2108ScalarNode::getDoubleQuotedValue(
StringRef RawValue,
2111 RawValue.
back() ==
'"');
2114 auto UnescapeFunc = [
this](
StringRef UnquotedValue,
2117 if (UnquotedValue.
size() == 1) {
2119 T.Range = UnquotedValue;
2120 setError(
"Unrecognized escape code",
T);
2125 switch (UnquotedValue[0]) {
2129 setError(
"Unrecognized escape code",
T);
2135 if (UnquotedValue.
size() >= 2 && UnquotedValue[1] ==
'\n')
2193 if (UnquotedValue.
size() < 3)
2196 unsigned int UnicodeScalarValue;
2199 UnicodeScalarValue = 0xFFFD;
2204 if (UnquotedValue.
size() < 5)
2207 unsigned int UnicodeScalarValue;
2210 UnicodeScalarValue = 0xFFFD;
2215 if (UnquotedValue.
size() < 9)
2218 unsigned int UnicodeScalarValue;
2221 UnicodeScalarValue = 0xFFFD;
2235 RawValue.
back() ==
'\'');
2238 auto UnescapeFunc = [](
StringRef UnquotedValue,
2253 RawValue = RawValue.
rtrim(
"\r\n \t");
2308 setError(
"Unexpected token in Key Value.", t);
2324void MappingNode::increment() {
2327 CurrentEntry =
nullptr;
2331 CurrentEntry->
skip();
2334 CurrentEntry =
nullptr;
2347 CurrentEntry =
nullptr;
2350 setError(
"Unexpected token. Expected Key or Block End",
T);
2354 CurrentEntry =
nullptr;
2368 CurrentEntry =
nullptr;
2371 setError(
"Unexpected token. Expected Key, Flow Entry, or Flow "
2375 CurrentEntry =
nullptr;
2383 CurrentEntry =
nullptr;
2387 CurrentEntry->
skip();
2394 if (!CurrentEntry) {
2396 CurrentEntry =
nullptr;
2402 CurrentEntry =
nullptr;
2405 setError(
"Unexpected token. Expected Block Entry or Block End."
2410 CurrentEntry =
nullptr;
2417 if (!CurrentEntry) {
2419 CurrentEntry =
nullptr;
2425 CurrentEntry =
nullptr;
2427 }
else if (SeqType ==
ST_Flow) {
2432 WasPreviousTokenFlowEntry =
true;
2440 CurrentEntry =
nullptr;
2445 setError(
"Could not find closing ]!",
T);
2448 CurrentEntry =
nullptr;
2451 if (!WasPreviousTokenFlowEntry) {
2452 setError(
"Expected , between entries!",
T);
2454 CurrentEntry =
nullptr;
2459 if (!CurrentEntry) {
2462 WasPreviousTokenFlowEntry =
false;
2471 TagMap[
"!!"] =
"tag:yaml.org,2002:";
2473 if (parseDirectives())
2481 if (stream.scanner->failed())
2496Token &Document::peekNext() {
2497 return stream.scanner->peekNext();
2500Token Document::getNext() {
2501 return stream.scanner->getNext();
2504void Document::setError(
const Twine &Message,
Token &Location)
const {
2505 stream.scanner->setError(Message, Location.Range.begin());
2508bool Document::failed()
const {
2509 return stream.scanner->failed();
2521 return new (NodeAllocator)
AliasNode(stream.CurrentDoc,
T.Range.substr(1));
2524 setError(
"Already encountered an anchor for this node!",
T);
2527 AnchorInfo = getNext();
2529 goto parse_property;
2532 setError(
"Already encountered a tag for this node!",
T);
2535 TagInfo = getNext();
2537 goto parse_property;
2547 return new (NodeAllocator)
SequenceNode( stream.CurrentDoc
2553 return new (NodeAllocator)
2560 return new (NodeAllocator)
2567 return new (NodeAllocator)
2574 return new (NodeAllocator)
2581 return new (NodeAllocator)
2588 StringRef NullTerminatedStr(
T.Value.c_str(),
T.Value.length() + 1);
2590 return new (NodeAllocator)
2592 TagInfo.
Range, StrCopy,
T.Range);
2596 return new (NodeAllocator)
2607 return new (NodeAllocator)
NullNode(stream.CurrentDoc);
2611 if (Root && (isa<MappingNode>(Root) || isa<SequenceNode>(Root)))
2612 return new (NodeAllocator)
NullNode(stream.CurrentDoc);
2614 setError(
"Unexpected token",
T);
2624bool Document::parseDirectives() {
2625 bool isDirective =
false;
2629 parseTAGDirective();
2632 parseYAMLDirective();
2640void Document::parseYAMLDirective() {
2644void Document::parseTAGDirective() {
2648 T =
T.substr(
T.find_first_of(
" \t")).ltrim(
" \t");
2649 std::size_t HandleEnd =
T.find_first_of(
" \t");
2650 StringRef TagHandle =
T.substr(0, HandleEnd);
2651 StringRef TagPrefix =
T.substr(HandleEnd).ltrim(
" \t");
2652 TagMap[TagHandle] = TagPrefix;
2655bool Document::expectToken(
int TK) {
2658 setError(
"Unexpected token",
T);
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")
#define LLVM_ATTRIBUTE_NOINLINE
LLVM_ATTRIBUTE_NOINLINE - On compilers where we have a directive to do so, mark a method "not for inl...
std::optional< std::vector< StOtherPiece > > Other
static Cursor skipComment(Cursor C)
Skip a line comment and return the updated cursor.
ConstantRange Range(APInt(BitWidth, Low), APInt(BitWidth, High))
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
This file defines the SmallString class.
This file defines the SmallVector class.
static EncodingInfo getUnicodeEncoding(StringRef Input)
getUnicodeEncoding - Reads up to the first 4 bytes to determine the Unicode encoding form of Input.
static bool is_ns_hex_digit(const char C)
static bool is_ns_word_char(const char C)
static unsigned getChompedLineBreaks(char ChompingIndicator, unsigned LineBreaks, StringRef Str)
Get the number of line breaks after chomping.
std::pair< uint32_t, unsigned > UTF8Decoded
The Unicode scalar value of a UTF-8 minimal well-formed code unit subsequence and the subsequence's l...
@ UEF_UTF32_LE
UTF-32 Little Endian.
@ UEF_UTF16_BE
UTF-16 Big Endian.
@ UEF_UTF16_LE
UTF-16 Little Endian.
@ UEF_UTF32_BE
UTF-32 Big Endian.
@ UEF_UTF8
UTF-8 or ascii.
@ UEF_Unknown
Not a valid Unicode encoding.
static LLVM_ATTRIBUTE_NOINLINE bool wasEscaped(StringRef::iterator First, StringRef::iterator Position)
static StringRef parseScalarValue(StringRef UnquotedValue, SmallVectorImpl< char > &Storage, StringRef LookupChars, std::function< StringRef(StringRef, SmallVectorImpl< char > &)> UnescapeCallback)
parseScalarValue - A common parsing routine for all flow scalar styles.
std::pair< UnicodeEncodingForm, unsigned > EncodingInfo
EncodingInfo - Holds the encoding type and length of the byte order mark if it exists.
static UTF8Decoded decodeUTF8(StringRef Range)
static void encodeUTF8(uint32_t UnicodeScalarValue, SmallVectorImpl< char > &Result)
encodeUTF8 - Encode UnicodeScalarValue in UTF-8 and append it to result.
A linked-list with a custom, local allocator.
iterator insert(iterator I, T &&V)
void resetAlloc()
Reset the underlying allocator.
IteratorImpl< T, typename list_type::iterator > iterator
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Allocate memory in an ever growing pool, as if by bump-pointer.
Tagged union holding either a T or a Error.
const char * getBufferStart() const
const char * getBufferEnd() const
static std::unique_ptr< MemoryBuffer > getMemBuffer(StringRef InputData, StringRef BufferName="", bool RequiresNullTerminator=true)
Open the specified memory range as a MemoryBuffer.
Represents a location in source code.
static SMLoc getFromPointer(const char *Ptr)
Represents a range in source code.
SmallString - A SmallString is just a SmallVector with methods and accessors that make it work better...
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
void reserve(size_type N)
iterator erase(const_iterator CI)
typename SuperClass::iterator iterator
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
This owns the files read by a parser, handles include stacks, and handles diagnostic wrangling.
void PrintMessage(raw_ostream &OS, SMLoc Loc, DiagKind Kind, const Twine &Msg, ArrayRef< SMRange > Ranges={}, ArrayRef< SMFixIt > FixIts={}, bool ShowColors=true) const
Emit a message about the specified location with the specified string.
unsigned AddNewSourceBuffer(std::unique_ptr< MemoryBuffer > F, SMLoc IncludeLoc)
Add a new source buffer to this source manager.
StringRef - Represent a constant reference to a string, i.e.
size_t find_last_not_of(char C, size_t From=npos) const
Find the last character in the string that is not C, or npos if not found.
bool getAsInteger(unsigned Radix, T &Result) const
Parse the current string as an integer of the specified radix.
constexpr StringRef substr(size_t Start, size_t N=npos) const
Return a reference to the substring from [Start, Start + N).
bool starts_with(StringRef Prefix) const
Check if this string starts with the given Prefix.
constexpr bool empty() const
empty - Check if the string is empty.
StringRef drop_front(size_t N=1) const
Return a StringRef equal to 'this' but with the first N elements dropped.
char back() const
back - Get the last character in the string.
constexpr size_t size() const
size - Get the string size.
char front() const
front - Get the first character in the string.
size_t find_last_of(char C, size_t From=npos) const
Find the last character in the string that is C, or npos if not found.
StringRef ltrim(char Char) const
Return string with consecutive Char characters starting from the the left removed.
size_t find_first_of(char C, size_t From=0) const
Find the first character in the string that is C, or npos if not found.
StringRef rtrim(char Char) const
Return string with consecutive Char characters starting from the right removed.
StringRef take_front(size_t N=1) const
Return a StringRef equal to 'this' but with only the first N elements remaining.
StringRef copy(Allocator &A) const
static constexpr size_t npos
StringRef drop_back(size_t N=1) const
Return a StringRef equal to 'this' but with the last N elements dropped.
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
The instances of the Type class are immutable: once they are created, they are never changed.
TypeID
Definitions of all of the base types for the Type system.
LLVM Value Representation.
This class implements an extremely fast bulk output stream that can only output to a stream.
Represents an alias to a Node with an anchor.
A block scalar node is an opaque datum that can be presented as a series of zero or more Unicode scal...
A YAML Stream is a sequence of Documents.
Node * parseBlockNode()
Root for parsing a node. Returns a single node.
bool skip()
Finish parsing the current document and return true if there are more.
Node * getRoot()
Parse and return the root level node.
Document(Stream &ParentStream)
Node * getValue()
Parse and return the value.
Node * getKey()
Parse and return the key.
Represents a YAML map created from either a block map for a flow map.
@ MT_Inline
An inline mapping node is used for "[key: value]".
Abstract base class for all Nodes.
StringRef getRawTag() const
Get the tag as it was written in the document.
unsigned int getType() const
std::string getVerbatimTag() const
Get the verbatium tag for a given Node.
std::unique_ptr< Document > & Doc
Node(unsigned int Type, std::unique_ptr< Document > &, StringRef Anchor, StringRef Tag)
BumpPtrAllocator & getAllocator()
void setError(const Twine &Message, Token &Location) const
A scalar node is an opaque datum that can be presented as a series of zero or more Unicode scalar val...
StringRef getValue(SmallVectorImpl< char > &Storage) const
Gets the value of this node as a StringRef.
Scans YAML tokens from a MemoryBuffer.
Scanner(StringRef Input, SourceMgr &SM, bool ShowColors=true, std::error_code *EC=nullptr)
void setError(const Twine &Message, StringRef::iterator Position)
Token getNext()
Parse the next token and pop it from the queue.
bool failed()
Returns true if an error occurred while parsing.
void printError(SMLoc Loc, SourceMgr::DiagKind Kind, const Twine &Message, ArrayRef< SMRange > Ranges={})
Token & peekNext()
Parse the next token and return it without popping it.
Represents a YAML sequence created from either a block sequence for a flow sequence.
This class represents a YAML stream potentially containing multiple documents.
document_iterator begin()
Stream(StringRef Input, SourceMgr &, bool ShowColors=true, std::error_code *EC=nullptr)
This keeps a reference to the string referenced by Input.
void printError(Node *N, const Twine &Msg, SourceMgr::DiagKind Kind=SourceMgr::DK_Error)
Iterator abstraction for Documents over a Stream.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ C
The default llvm calling convention, compatible with C.
bool isPrintable(int UCS)
Determines if a character is likely to be displayed correctly on the terminal.
bool dumpTokens(StringRef Input, raw_ostream &)
Dump all the tokens in this stream to OS.
std::optional< bool > parseBool(StringRef S)
Parse S as a bool according to https://yaml.org/type/bool.html.
bool scanTokens(StringRef Input)
Scans all tokens in input without outputting anything.
void skip(CollectionType &C)
std::string escape(StringRef Input, bool EscapePrintable=true)
Escape Input for a double quoted scalar; if EscapePrintable is true, all UTF8 sequences will be escap...
This is an optimization pass for GlobalISel generic memory operations.
std::error_code make_error_code(BitcodeError E)
testing::Matcher< const detail::ErrorHolder & > Failed()
void append_range(Container &C, Range &&R)
Wrapper function to append range R to container C.
void report_fatal_error(Error Err, bool gen_crash_diag=true)
Report a serious error, calling any installed error handler.
@ First
Helpers to iterate all locations in the MemoryEffectsBase class.
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
Token - A single YAML token.
enum llvm::yaml::Token::TokenKind Kind
std::string Value
The value of a block scalar node.
StringRef Range
A string of length 0 or more whose begin() points to the logical location of the token in the input.