LLVM 22.0.0git
YAMLParser.cpp
Go to the documentation of this file.
1//===- YAMLParser.cpp - Simple YAML parser --------------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file implements a YAML parser.
10//
11//===----------------------------------------------------------------------===//
12
15#include "llvm/ADT/ArrayRef.h"
16#include "llvm/ADT/STLExtras.h"
20#include "llvm/ADT/StringRef.h"
21#include "llvm/ADT/Twine.h"
25#include "llvm/Support/SMLoc.h"
29#include <cassert>
30#include <cstddef>
31#include <cstdint>
32#include <map>
33#include <memory>
34#include <string>
35#include <system_error>
36#include <utility>
37
38using namespace llvm;
39using namespace yaml;
40
42 UEF_UTF32_LE, ///< UTF-32 Little Endian
43 UEF_UTF32_BE, ///< UTF-32 Big Endian
44 UEF_UTF16_LE, ///< UTF-16 Little Endian
45 UEF_UTF16_BE, ///< UTF-16 Big Endian
46 UEF_UTF8, ///< UTF-8 or ascii.
47 UEF_Unknown ///< Not a valid Unicode encoding.
48};
49
50/// EncodingInfo - Holds the encoding type and length of the byte order mark if
51/// it exists. Length is in {0, 2, 3, 4}.
52using EncodingInfo = std::pair<UnicodeEncodingForm, unsigned>;
53
54/// getUnicodeEncoding - Reads up to the first 4 bytes to determine the Unicode
55/// encoding form of \a Input.
56///
57/// @param Input A string of length 0 or more.
58/// @returns An EncodingInfo indicating the Unicode encoding form of the input
59/// and how long the byte order mark is if one exists.
61 if (Input.empty())
62 return std::make_pair(UEF_Unknown, 0);
63
64 switch (uint8_t(Input[0])) {
65 case 0x00:
66 if (Input.size() >= 4) {
67 if ( Input[1] == 0
68 && uint8_t(Input[2]) == 0xFE
69 && uint8_t(Input[3]) == 0xFF)
70 return std::make_pair(UEF_UTF32_BE, 4);
71 if (Input[1] == 0 && Input[2] == 0 && Input[3] != 0)
72 return std::make_pair(UEF_UTF32_BE, 0);
73 }
74
75 if (Input.size() >= 2 && Input[1] != 0)
76 return std::make_pair(UEF_UTF16_BE, 0);
77 return std::make_pair(UEF_Unknown, 0);
78 case 0xFF:
79 if ( Input.size() >= 4
80 && uint8_t(Input[1]) == 0xFE
81 && Input[2] == 0
82 && Input[3] == 0)
83 return std::make_pair(UEF_UTF32_LE, 4);
84
85 if (Input.size() >= 2 && uint8_t(Input[1]) == 0xFE)
86 return std::make_pair(UEF_UTF16_LE, 2);
87 return std::make_pair(UEF_Unknown, 0);
88 case 0xFE:
89 if (Input.size() >= 2 && uint8_t(Input[1]) == 0xFF)
90 return std::make_pair(UEF_UTF16_BE, 2);
91 return std::make_pair(UEF_Unknown, 0);
92 case 0xEF:
93 if ( Input.size() >= 3
94 && uint8_t(Input[1]) == 0xBB
95 && uint8_t(Input[2]) == 0xBF)
96 return std::make_pair(UEF_UTF8, 3);
97 return std::make_pair(UEF_Unknown, 0);
98 }
99
100 // It could still be utf-32 or utf-16.
101 if (Input.size() >= 4 && Input[1] == 0 && Input[2] == 0 && Input[3] == 0)
102 return std::make_pair(UEF_UTF32_LE, 0);
103
104 if (Input.size() >= 2 && Input[1] == 0)
105 return std::make_pair(UEF_UTF16_LE, 0);
106
107 return std::make_pair(UEF_UTF8, 0);
108}
109
110/// Pin the vtables to this file.
111void Node::anchor() {}
112void NullNode::anchor() {}
113void ScalarNode::anchor() {}
114void BlockScalarNode::anchor() {}
115void KeyValueNode::anchor() {}
116void MappingNode::anchor() {}
117void SequenceNode::anchor() {}
118void AliasNode::anchor() {}
119
120namespace llvm {
121namespace yaml {
122
123/// Token - A single YAML token.
160
161} // end namespace yaml
162} // end namespace llvm
163
165
166namespace {
167
168/// This struct is used to track simple keys.
169///
170/// Simple keys are handled by creating an entry in SimpleKeys for each Token
171/// which could legally be the start of a simple key. When peekNext is called,
172/// if the Token To be returned is referenced by a SimpleKey, we continue
173/// tokenizing until that potential simple key has either been found to not be
174/// a simple key (we moved on to the next line or went further than 1024 chars).
175/// Or when we run into a Value, and then insert a Key token (and possibly
176/// others) before the SimpleKey's Tok.
177struct SimpleKey {
179 unsigned Column = 0;
180 unsigned Line = 0;
181 unsigned FlowLevel = 0;
182 bool IsRequired = false;
183
184 bool operator ==(const SimpleKey &Other) {
185 return Tok == Other.Tok;
186 }
187};
188
189} // end anonymous namespace
190
191/// The Unicode scalar value of a UTF-8 minimal well-formed code unit
192/// subsequence and the subsequence's length in code units (uint8_t).
193/// A length of 0 represents an error.
194using UTF8Decoded = std::pair<uint32_t, unsigned>;
195
197 StringRef::iterator Position= Range.begin();
198 StringRef::iterator End = Range.end();
199 // 1 byte: [0x00, 0x7f]
200 // Bit pattern: 0xxxxxxx
201 if (Position < End && (*Position & 0x80) == 0) {
202 return std::make_pair(*Position, 1);
203 }
204 // 2 bytes: [0x80, 0x7ff]
205 // Bit pattern: 110xxxxx 10xxxxxx
206 if (Position + 1 < End && ((*Position & 0xE0) == 0xC0) &&
207 ((*(Position + 1) & 0xC0) == 0x80)) {
208 uint32_t codepoint = ((*Position & 0x1F) << 6) |
209 (*(Position + 1) & 0x3F);
210 if (codepoint >= 0x80)
211 return std::make_pair(codepoint, 2);
212 }
213 // 3 bytes: [0x8000, 0xffff]
214 // Bit pattern: 1110xxxx 10xxxxxx 10xxxxxx
215 if (Position + 2 < End && ((*Position & 0xF0) == 0xE0) &&
216 ((*(Position + 1) & 0xC0) == 0x80) &&
217 ((*(Position + 2) & 0xC0) == 0x80)) {
218 uint32_t codepoint = ((*Position & 0x0F) << 12) |
219 ((*(Position + 1) & 0x3F) << 6) |
220 (*(Position + 2) & 0x3F);
221 // Codepoints between 0xD800 and 0xDFFF are invalid, as
222 // they are high / low surrogate halves used by UTF-16.
223 if (codepoint >= 0x800 &&
224 (codepoint < 0xD800 || codepoint > 0xDFFF))
225 return std::make_pair(codepoint, 3);
226 }
227 // 4 bytes: [0x10000, 0x10FFFF]
228 // Bit pattern: 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx
229 if (Position + 3 < End && ((*Position & 0xF8) == 0xF0) &&
230 ((*(Position + 1) & 0xC0) == 0x80) &&
231 ((*(Position + 2) & 0xC0) == 0x80) &&
232 ((*(Position + 3) & 0xC0) == 0x80)) {
233 uint32_t codepoint = ((*Position & 0x07) << 18) |
234 ((*(Position + 1) & 0x3F) << 12) |
235 ((*(Position + 2) & 0x3F) << 6) |
236 (*(Position + 3) & 0x3F);
237 if (codepoint >= 0x10000 && codepoint <= 0x10FFFF)
238 return std::make_pair(codepoint, 4);
239 }
240 return std::make_pair(0, 0);
241}
242
243namespace llvm {
244namespace yaml {
245
246/// Scans YAML tokens from a MemoryBuffer.
247class Scanner {
248public:
249 Scanner(StringRef Input, SourceMgr &SM, bool ShowColors = true,
250 std::error_code *EC = nullptr);
251 Scanner(MemoryBufferRef Buffer, SourceMgr &SM_, bool ShowColors = true,
252 std::error_code *EC = nullptr);
253
254 /// Parse the next token and return it without popping it.
255 Token &peekNext();
256
257 /// Parse the next token and pop it from the queue.
258 Token getNext();
259
260 void printError(SMLoc Loc, SourceMgr::DiagKind Kind, const Twine &Message,
261 ArrayRef<SMRange> Ranges = {}) {
262 SM.PrintMessage(Loc, Kind, Message, Ranges, /* FixIts= */ {}, ShowColors);
263 }
264
265 void setError(const Twine &Message, StringRef::iterator Position) {
266 if (Position >= End)
267 Position = End - 1;
268
269 // propagate the error if possible
270 if (EC)
271 *EC = make_error_code(std::errc::invalid_argument);
272
273 // Don't print out more errors after the first one we encounter. The rest
274 // are just the result of the first, and have no meaning.
275 if (!Failed)
277 Failed = true;
278 }
279
280 /// Returns true if an error occurred while parsing.
281 bool failed() {
282 return Failed;
283 }
284
285private:
286 void init(MemoryBufferRef Buffer);
287
288 StringRef currentInput() {
289 return StringRef(Current, End - Current);
290 }
291
292 /// Decode a UTF-8 minimal well-formed code unit subsequence starting
293 /// at \a Position.
294 ///
295 /// If the UTF-8 code units starting at Position do not form a well-formed
296 /// code unit subsequence, then the Unicode scalar value is 0, and the length
297 /// is 0.
299 return ::decodeUTF8(StringRef(Position, End - Position));
300 }
301
302 // The following functions are based on the gramar rules in the YAML spec. The
303 // style of the function names it meant to closely match how they are written
304 // in the spec. The number within the [] is the number of the grammar rule in
305 // the spec.
306 //
307 // See 4.2 [Production Naming Conventions] for the meaning of the prefixes.
308 //
309 // c-
310 // A production starting and ending with a special character.
311 // b-
312 // A production matching a single line break.
313 // nb-
314 // A production starting and ending with a non-break character.
315 // s-
316 // A production starting and ending with a white space character.
317 // ns-
318 // A production starting and ending with a non-space character.
319 // l-
320 // A production matching complete line(s).
321
322 /// Skip a single nb-char[27] starting at Position.
323 ///
324 /// A nb-char is 0x9 | [0x20-0x7E] | 0x85 | [0xA0-0xD7FF] | [0xE000-0xFEFE]
325 /// | [0xFF00-0xFFFD] | [0x10000-0x10FFFF]
326 ///
327 /// @returns The code unit after the nb-char, or Position if it's not an
328 /// nb-char.
329 StringRef::iterator skip_nb_char(StringRef::iterator Position);
330
331 /// Skip a single b-break[28] starting at Position.
332 ///
333 /// A b-break is 0xD 0xA | 0xD | 0xA
334 ///
335 /// @returns The code unit after the b-break, or Position if it's not a
336 /// b-break.
337 StringRef::iterator skip_b_break(StringRef::iterator Position);
338
339 /// Skip a single s-space[31] starting at Position.
340 ///
341 /// An s-space is 0x20
342 ///
343 /// @returns The code unit after the s-space, or Position if it's not a
344 /// s-space.
345 StringRef::iterator skip_s_space(StringRef::iterator Position);
346
347 /// Skip a single s-white[33] starting at Position.
348 ///
349 /// A s-white is 0x20 | 0x9
350 ///
351 /// @returns The code unit after the s-white, or Position if it's not a
352 /// s-white.
353 StringRef::iterator skip_s_white(StringRef::iterator Position);
354
355 /// Skip a single ns-char[34] starting at Position.
356 ///
357 /// A ns-char is nb-char - s-white
358 ///
359 /// @returns The code unit after the ns-char, or Position if it's not a
360 /// ns-char.
361 StringRef::iterator skip_ns_char(StringRef::iterator Position);
362
363 using SkipWhileFunc = StringRef::iterator (Scanner::*)(StringRef::iterator);
364
365 /// Skip minimal well-formed code unit subsequences until Func
366 /// returns its input.
367 ///
368 /// @returns The code unit after the last minimal well-formed code unit
369 /// subsequence that Func accepted.
370 StringRef::iterator skip_while( SkipWhileFunc Func
371 , StringRef::iterator Position);
372
373 /// Skip minimal well-formed code unit subsequences until Func returns its
374 /// input.
375 void advanceWhile(SkipWhileFunc Func);
376
377 /// Scan ns-uri-char[39]s starting at Cur.
378 ///
379 /// This updates Cur and Column while scanning.
380 void scan_ns_uri_char();
381
382 /// Consume a minimal well-formed code unit subsequence starting at
383 /// \a Cur. Return false if it is not the same Unicode scalar value as
384 /// \a Expected. This updates \a Column.
386
387 /// Skip \a Distance UTF-8 code units. Updates \a Cur and \a Column.
388 void skip(uint32_t Distance);
389
390 /// Return true if the minimal well-formed code unit subsequence at
391 /// Pos is whitespace or a new line
392 bool isBlankOrBreak(StringRef::iterator Position);
393
394 /// Return true if the minimal well-formed code unit subsequence at
395 /// Pos is considered a "safe" character for plain scalars.
396 bool isPlainSafeNonBlank(StringRef::iterator Position);
397
398 /// Return true if the line is a line break, false otherwise.
399 bool isLineEmpty(StringRef Line);
400
401 /// Consume a single b-break[28] if it's present at the current position.
402 ///
403 /// Return false if the code unit at the current position isn't a line break.
404 bool consumeLineBreakIfPresent();
405
406 /// If IsSimpleKeyAllowed, create and push_back a new SimpleKey.
407 void saveSimpleKeyCandidate( TokenQueueT::iterator Tok
408 , unsigned AtColumn
409 , bool IsRequired);
410
411 /// Remove simple keys that can no longer be valid simple keys.
412 ///
413 /// Invalid simple keys are not on the current line or are further than 1024
414 /// columns back.
415 void removeStaleSimpleKeyCandidates();
416
417 /// Remove all simple keys on FlowLevel \a Level.
418 void removeSimpleKeyCandidatesOnFlowLevel(unsigned Level);
419
420 /// Unroll indentation in \a Indents back to \a Col. Creates BlockEnd
421 /// tokens if needed.
422 bool unrollIndent(int ToColumn);
423
424 /// Increase indent to \a Col. Creates \a Kind token at \a InsertPoint
425 /// if needed.
426 bool rollIndent( int ToColumn
427 , Token::TokenKind Kind
428 , TokenQueueT::iterator InsertPoint);
429
430 /// Skip a single-line comment when the comment starts at the current
431 /// position of the scanner.
432 void skipComment();
433
434 /// Skip whitespace and comments until the start of the next token.
435 void scanToNextToken();
436
437 /// Must be the first token generated.
438 bool scanStreamStart();
439
440 /// Generate tokens needed to close out the stream.
441 bool scanStreamEnd();
442
443 /// Scan a %BLAH directive.
444 bool scanDirective();
445
446 /// Scan a ... or ---.
447 bool scanDocumentIndicator(bool IsStart);
448
449 /// Scan a [ or { and generate the proper flow collection start token.
450 bool scanFlowCollectionStart(bool IsSequence);
451
452 /// Scan a ] or } and generate the proper flow collection end token.
453 bool scanFlowCollectionEnd(bool IsSequence);
454
455 /// Scan the , that separates entries in a flow collection.
456 bool scanFlowEntry();
457
458 /// Scan the - that starts block sequence entries.
459 bool scanBlockEntry();
460
461 /// Scan an explicit ? indicating a key.
462 bool scanKey();
463
464 /// Scan an explicit : indicating a value.
465 bool scanValue();
466
467 /// Scan a quoted scalar.
468 bool scanFlowScalar(bool IsDoubleQuoted);
469
470 /// Scan an unquoted scalar.
471 bool scanPlainScalar();
472
473 /// Scan an Alias or Anchor starting with * or &.
474 bool scanAliasOrAnchor(bool IsAlias);
475
476 /// Scan a block scalar starting with | or >.
477 bool scanBlockScalar(bool IsLiteral);
478
479 /// Scan a block scalar style indicator and header.
480 ///
481 /// Note: This is distinct from scanBlockScalarHeader to mirror the fact that
482 /// YAML does not consider the style indicator to be a part of the header.
483 ///
484 /// Return false if an error occurred.
485 bool scanBlockScalarIndicators(char &StyleIndicator, char &ChompingIndicator,
486 unsigned &IndentIndicator, bool &IsDone);
487
488 /// Scan a style indicator in a block scalar header.
489 char scanBlockStyleIndicator();
490
491 /// Scan a chomping indicator in a block scalar header.
492 char scanBlockChompingIndicator();
493
494 /// Scan an indentation indicator in a block scalar header.
495 unsigned scanBlockIndentationIndicator();
496
497 /// Scan a block scalar header.
498 ///
499 /// Return false if an error occurred.
500 bool scanBlockScalarHeader(char &ChompingIndicator, unsigned &IndentIndicator,
501 bool &IsDone);
502
503 /// Look for the indentation level of a block scalar.
504 ///
505 /// Return false if an error occurred.
506 bool findBlockScalarIndent(unsigned &BlockIndent, unsigned BlockExitIndent,
507 unsigned &LineBreaks, bool &IsDone);
508
509 /// Scan the indentation of a text line in a block scalar.
510 ///
511 /// Return false if an error occurred.
512 bool scanBlockScalarIndent(unsigned BlockIndent, unsigned BlockExitIndent,
513 bool &IsDone);
514
515 /// Scan a tag of the form !stuff.
516 bool scanTag();
517
518 /// Dispatch to the next scanning function based on \a *Cur.
519 bool fetchMoreTokens();
520
521 /// The SourceMgr used for diagnostics and buffer management.
522 SourceMgr &SM;
523
524 /// The original input.
525 MemoryBufferRef InputBuffer;
526
527 /// The current position of the scanner.
528 StringRef::iterator Current;
529
530 /// The end of the input (one past the last character).
532
533 /// Current YAML indentation level in spaces.
534 int Indent;
535
536 /// Current column number in Unicode code points.
537 unsigned Column;
538
539 /// Current line number.
540 unsigned Line;
541
542 /// How deep we are in flow style containers. 0 Means at block level.
543 unsigned FlowLevel;
544
545 /// Are we at the start of the stream?
546 bool IsStartOfStream;
547
548 /// Can the next token be the start of a simple key?
549 bool IsSimpleKeyAllowed;
550
551 /// Can the next token be a value indicator even if it does not have a
552 /// trailing space?
553 bool IsAdjacentValueAllowedInFlow;
554
555 /// True if an error has occurred.
556 bool Failed;
557
558 /// Should colors be used when printing out the diagnostic messages?
559 bool ShowColors;
560
561 /// Queue of tokens. This is required to queue up tokens while looking
562 /// for the end of a simple key. And for cases where a single character
563 /// can produce multiple tokens (e.g. BlockEnd).
564 TokenQueueT TokenQueue;
565
566 /// Indentation levels.
567 SmallVector<int, 4> Indents;
568
569 /// Potential simple keys.
570 SmallVector<SimpleKey, 4> SimpleKeys;
571
572 std::error_code *EC;
573};
574
575} // end namespace yaml
576} // end namespace llvm
577
578/// encodeUTF8 - Encode \a UnicodeScalarValue in UTF-8 and append it to result.
579static void encodeUTF8( uint32_t UnicodeScalarValue
580 , SmallVectorImpl<char> &Result) {
581 if (UnicodeScalarValue <= 0x7F) {
582 Result.push_back(UnicodeScalarValue & 0x7F);
583 } else if (UnicodeScalarValue <= 0x7FF) {
584 uint8_t FirstByte = 0xC0 | ((UnicodeScalarValue & 0x7C0) >> 6);
585 uint8_t SecondByte = 0x80 | (UnicodeScalarValue & 0x3F);
586 Result.push_back(FirstByte);
587 Result.push_back(SecondByte);
588 } else if (UnicodeScalarValue <= 0xFFFF) {
589 uint8_t FirstByte = 0xE0 | ((UnicodeScalarValue & 0xF000) >> 12);
590 uint8_t SecondByte = 0x80 | ((UnicodeScalarValue & 0xFC0) >> 6);
591 uint8_t ThirdByte = 0x80 | (UnicodeScalarValue & 0x3F);
592 Result.push_back(FirstByte);
593 Result.push_back(SecondByte);
594 Result.push_back(ThirdByte);
595 } else if (UnicodeScalarValue <= 0x10FFFF) {
596 uint8_t FirstByte = 0xF0 | ((UnicodeScalarValue & 0x1F0000) >> 18);
597 uint8_t SecondByte = 0x80 | ((UnicodeScalarValue & 0x3F000) >> 12);
598 uint8_t ThirdByte = 0x80 | ((UnicodeScalarValue & 0xFC0) >> 6);
599 uint8_t FourthByte = 0x80 | (UnicodeScalarValue & 0x3F);
600 Result.push_back(FirstByte);
601 Result.push_back(SecondByte);
602 Result.push_back(ThirdByte);
603 Result.push_back(FourthByte);
604 }
605}
606
608 SourceMgr SM;
609 Scanner scanner(Input, SM);
610 while (true) {
611 Token T = scanner.getNext();
612 switch (T.Kind) {
614 OS << "Stream-Start: ";
615 break;
617 OS << "Stream-End: ";
618 break;
620 OS << "Version-Directive: ";
621 break;
623 OS << "Tag-Directive: ";
624 break;
626 OS << "Document-Start: ";
627 break;
629 OS << "Document-End: ";
630 break;
632 OS << "Block-Entry: ";
633 break;
635 OS << "Block-End: ";
636 break;
638 OS << "Block-Sequence-Start: ";
639 break;
641 OS << "Block-Mapping-Start: ";
642 break;
644 OS << "Flow-Entry: ";
645 break;
647 OS << "Flow-Sequence-Start: ";
648 break;
650 OS << "Flow-Sequence-End: ";
651 break;
653 OS << "Flow-Mapping-Start: ";
654 break;
656 OS << "Flow-Mapping-End: ";
657 break;
658 case Token::TK_Key:
659 OS << "Key: ";
660 break;
661 case Token::TK_Value:
662 OS << "Value: ";
663 break;
664 case Token::TK_Scalar:
665 OS << "Scalar: ";
666 break;
668 OS << "Block Scalar: ";
669 break;
670 case Token::TK_Alias:
671 OS << "Alias: ";
672 break;
673 case Token::TK_Anchor:
674 OS << "Anchor: ";
675 break;
676 case Token::TK_Tag:
677 OS << "Tag: ";
678 break;
679 case Token::TK_Error:
680 break;
681 }
682 OS << T.Range << "\n";
683 if (T.Kind == Token::TK_StreamEnd)
684 break;
685 else if (T.Kind == Token::TK_Error)
686 return false;
687 }
688 return true;
689}
690
692 SourceMgr SM;
693 Scanner scanner(Input, SM);
694 while (true) {
695 Token T = scanner.getNext();
696 if (T.Kind == Token::TK_StreamEnd)
697 break;
698 else if (T.Kind == Token::TK_Error)
699 return false;
700 }
701 return true;
702}
703
704std::string yaml::escape(StringRef Input, bool EscapePrintable) {
705 std::string EscapedInput;
706 for (StringRef::iterator i = Input.begin(), e = Input.end(); i != e; ++i) {
707 if (*i == '\\')
708 EscapedInput += "\\\\";
709 else if (*i == '"')
710 EscapedInput += "\\\"";
711 else if (*i == 0)
712 EscapedInput += "\\0";
713 else if (*i == 0x07)
714 EscapedInput += "\\a";
715 else if (*i == 0x08)
716 EscapedInput += "\\b";
717 else if (*i == 0x09)
718 EscapedInput += "\\t";
719 else if (*i == 0x0A)
720 EscapedInput += "\\n";
721 else if (*i == 0x0B)
722 EscapedInput += "\\v";
723 else if (*i == 0x0C)
724 EscapedInput += "\\f";
725 else if (*i == 0x0D)
726 EscapedInput += "\\r";
727 else if (*i == 0x1B)
728 EscapedInput += "\\e";
729 else if ((unsigned char)*i < 0x20) { // Control characters not handled above.
730 std::string HexStr = utohexstr(*i);
731 EscapedInput += "\\x" + std::string(2 - HexStr.size(), '0') + HexStr;
732 } else if (*i & 0x80) { // UTF-8 multiple code unit subsequence.
733 UTF8Decoded UnicodeScalarValue
734 = decodeUTF8(StringRef(i, Input.end() - i));
735 if (UnicodeScalarValue.second == 0) {
736 // Found invalid char.
737 SmallString<4> Val;
738 encodeUTF8(0xFFFD, Val);
739 llvm::append_range(EscapedInput, Val);
740 // FIXME: Error reporting.
741 return EscapedInput;
742 }
743 if (UnicodeScalarValue.first == 0x85)
744 EscapedInput += "\\N";
745 else if (UnicodeScalarValue.first == 0xA0)
746 EscapedInput += "\\_";
747 else if (UnicodeScalarValue.first == 0x2028)
748 EscapedInput += "\\L";
749 else if (UnicodeScalarValue.first == 0x2029)
750 EscapedInput += "\\P";
751 else if (!EscapePrintable &&
752 sys::unicode::isPrintable(UnicodeScalarValue.first))
753 EscapedInput += StringRef(i, UnicodeScalarValue.second);
754 else {
755 std::string HexStr = utohexstr(UnicodeScalarValue.first);
756 if (HexStr.size() <= 2)
757 EscapedInput += "\\x" + std::string(2 - HexStr.size(), '0') + HexStr;
758 else if (HexStr.size() <= 4)
759 EscapedInput += "\\u" + std::string(4 - HexStr.size(), '0') + HexStr;
760 else if (HexStr.size() <= 8)
761 EscapedInput += "\\U" + std::string(8 - HexStr.size(), '0') + HexStr;
762 }
763 i += UnicodeScalarValue.second - 1;
764 } else {
765 EscapedInput.push_back(*i);
766 }
767 }
768 return EscapedInput;
769}
770
771std::optional<bool> yaml::parseBool(StringRef S) {
772 switch (S.size()) {
773 case 1:
774 switch (S.front()) {
775 case 'y':
776 case 'Y':
777 return true;
778 case 'n':
779 case 'N':
780 return false;
781 default:
782 return std::nullopt;
783 }
784 case 2:
785 switch (S.front()) {
786 case 'O':
787 if (S[1] == 'N') // ON
788 return true;
789 [[fallthrough]];
790 case 'o':
791 if (S[1] == 'n') //[Oo]n
792 return true;
793 return std::nullopt;
794 case 'N':
795 if (S[1] == 'O') // NO
796 return false;
797 [[fallthrough]];
798 case 'n':
799 if (S[1] == 'o') //[Nn]o
800 return false;
801 return std::nullopt;
802 default:
803 return std::nullopt;
804 }
805 case 3:
806 switch (S.front()) {
807 case 'O':
808 if (S.drop_front() == "FF") // OFF
809 return false;
810 [[fallthrough]];
811 case 'o':
812 if (S.drop_front() == "ff") //[Oo]ff
813 return false;
814 return std::nullopt;
815 case 'Y':
816 if (S.drop_front() == "ES") // YES
817 return true;
818 [[fallthrough]];
819 case 'y':
820 if (S.drop_front() == "es") //[Yy]es
821 return true;
822 return std::nullopt;
823 default:
824 return std::nullopt;
825 }
826 case 4:
827 switch (S.front()) {
828 case 'T':
829 if (S.drop_front() == "RUE") // TRUE
830 return true;
831 [[fallthrough]];
832 case 't':
833 if (S.drop_front() == "rue") //[Tt]rue
834 return true;
835 return std::nullopt;
836 default:
837 return std::nullopt;
838 }
839 case 5:
840 switch (S.front()) {
841 case 'F':
842 if (S.drop_front() == "ALSE") // FALSE
843 return false;
844 [[fallthrough]];
845 case 'f':
846 if (S.drop_front() == "alse") //[Ff]alse
847 return false;
848 return std::nullopt;
849 default:
850 return std::nullopt;
851 }
852 default:
853 return std::nullopt;
854 }
855}
856
858 std::error_code *EC)
859 : SM(sm), ShowColors(ShowColors), EC(EC) {
860 init(MemoryBufferRef(Input, "YAML"));
861}
862
863Scanner::Scanner(MemoryBufferRef Buffer, SourceMgr &SM_, bool ShowColors,
864 std::error_code *EC)
865 : SM(SM_), ShowColors(ShowColors), EC(EC) {
866 init(Buffer);
867}
868
869void Scanner::init(MemoryBufferRef Buffer) {
870 InputBuffer = Buffer;
871 Current = InputBuffer.getBufferStart();
872 End = InputBuffer.getBufferEnd();
873 Indent = -1;
874 Column = 0;
875 Line = 0;
876 FlowLevel = 0;
877 IsStartOfStream = true;
878 IsSimpleKeyAllowed = true;
879 IsAdjacentValueAllowedInFlow = false;
880 Failed = false;
881 std::unique_ptr<MemoryBuffer> InputBufferOwner =
882 MemoryBuffer::getMemBuffer(Buffer, /*RequiresNullTerminator=*/false);
883 SM.AddNewSourceBuffer(std::move(InputBufferOwner), SMLoc());
884}
885
887 // If the current token is a possible simple key, keep parsing until we
888 // can confirm.
889 bool NeedMore = false;
890 while (true) {
891 if (TokenQueue.empty() || NeedMore) {
892 if (!fetchMoreTokens()) {
893 TokenQueue.clear();
894 SimpleKeys.clear();
895 TokenQueue.push_back(Token());
896 return TokenQueue.front();
897 }
898 }
899 assert(!TokenQueue.empty() &&
900 "fetchMoreTokens lied about getting tokens!");
901
902 removeStaleSimpleKeyCandidates();
903 SimpleKey SK;
904 SK.Tok = TokenQueue.begin();
905 if (!is_contained(SimpleKeys, SK))
906 break;
907 else
908 NeedMore = true;
909 }
910 return TokenQueue.front();
911}
912
914 Token Ret = peekNext();
915 // TokenQueue can be empty if there was an error getting the next token.
916 if (!TokenQueue.empty())
917 TokenQueue.pop_front();
918
919 // There cannot be any referenced Token's if the TokenQueue is empty. So do a
920 // quick deallocation of them all.
921 if (TokenQueue.empty())
922 TokenQueue.resetAlloc();
923
924 return Ret;
925}
926
927StringRef::iterator Scanner::skip_nb_char(StringRef::iterator Position) {
928 if (Position == End)
929 return Position;
930 // Check 7 bit c-printable - b-char.
931 if ( *Position == 0x09
932 || (*Position >= 0x20 && *Position <= 0x7E))
933 return Position + 1;
934
935 // Check for valid UTF-8.
936 if (uint8_t(*Position) & 0x80) {
937 UTF8Decoded u8d = decodeUTF8(Position);
938 if ( u8d.second != 0
939 && u8d.first != 0xFEFF
940 && ( u8d.first == 0x85
941 || ( u8d.first >= 0xA0
942 && u8d.first <= 0xD7FF)
943 || ( u8d.first >= 0xE000
944 && u8d.first <= 0xFFFD)
945 || ( u8d.first >= 0x10000
946 && u8d.first <= 0x10FFFF)))
947 return Position + u8d.second;
948 }
949 return Position;
950}
951
952StringRef::iterator Scanner::skip_b_break(StringRef::iterator Position) {
953 if (Position == End)
954 return Position;
955 if (*Position == 0x0D) {
956 if (Position + 1 != End && *(Position + 1) == 0x0A)
957 return Position + 2;
958 return Position + 1;
959 }
960
961 if (*Position == 0x0A)
962 return Position + 1;
963 return Position;
964}
965
966StringRef::iterator Scanner::skip_s_space(StringRef::iterator Position) {
967 if (Position == End)
968 return Position;
969 if (*Position == ' ')
970 return Position + 1;
971 return Position;
972}
973
974StringRef::iterator Scanner::skip_s_white(StringRef::iterator Position) {
975 if (Position == End)
976 return Position;
977 if (*Position == ' ' || *Position == '\t')
978 return Position + 1;
979 return Position;
980}
981
982StringRef::iterator Scanner::skip_ns_char(StringRef::iterator Position) {
983 if (Position == End)
984 return Position;
985 if (*Position == ' ' || *Position == '\t')
986 return Position;
987 return skip_nb_char(Position);
988}
989
990StringRef::iterator Scanner::skip_while( SkipWhileFunc Func
991 , StringRef::iterator Position) {
992 while (true) {
993 StringRef::iterator i = (this->*Func)(Position);
994 if (i == Position)
995 break;
996 Position = i;
997 }
998 return Position;
999}
1000
1001void Scanner::advanceWhile(SkipWhileFunc Func) {
1002 auto Final = skip_while(Func, Current);
1003 Column += Final - Current;
1004 Current = Final;
1005}
1006
1007static bool is_ns_hex_digit(const char C) { return isAlnum(C); }
1008
1009static bool is_ns_word_char(const char C) { return C == '-' || isAlpha(C); }
1010
1011void Scanner::scan_ns_uri_char() {
1012 while (true) {
1013 if (Current == End)
1014 break;
1015 if ((*Current == '%' && Current + 2 < End &&
1016 is_ns_hex_digit(*(Current + 1)) && is_ns_hex_digit(*(Current + 2))) ||
1017 is_ns_word_char(*Current) ||
1018 StringRef(Current, 1).find_first_of("#;/?:@&=+$,_.!~*'()[]") !=
1020 ++Current;
1021 ++Column;
1022 } else {
1023 break;
1024 }
1025 }
1026}
1027
1028bool Scanner::consume(uint32_t Expected) {
1029 if (Expected >= 0x80) {
1030 setError("Cannot consume non-ascii characters", Current);
1031 return false;
1032 }
1033 if (Current == End)
1034 return false;
1035 if (uint8_t(*Current) >= 0x80) {
1036 setError("Cannot consume non-ascii characters", Current);
1037 return false;
1038 }
1039 if (uint8_t(*Current) == Expected) {
1040 ++Current;
1041 ++Column;
1042 return true;
1043 }
1044 return false;
1045}
1046
1047void Scanner::skip(uint32_t Distance) {
1048 Current += Distance;
1049 Column += Distance;
1050 assert(Current <= End && "Skipped past the end");
1051}
1052
1053bool Scanner::isBlankOrBreak(StringRef::iterator Position) {
1054 if (Position == End)
1055 return false;
1056 return *Position == ' ' || *Position == '\t' || *Position == '\r' ||
1057 *Position == '\n';
1058}
1059
1060bool Scanner::isPlainSafeNonBlank(StringRef::iterator Position) {
1061 if (Position == End || isBlankOrBreak(Position))
1062 return false;
1063 if (FlowLevel &&
1064 StringRef(Position, 1).find_first_of(",[]{}") != StringRef::npos)
1065 return false;
1066 return true;
1067}
1068
1069bool Scanner::isLineEmpty(StringRef Line) {
1070 for (const auto *Position = Line.begin(); Position != Line.end(); ++Position)
1071 if (!isBlankOrBreak(Position))
1072 return false;
1073 return true;
1074}
1075
1076bool Scanner::consumeLineBreakIfPresent() {
1077 auto Next = skip_b_break(Current);
1078 if (Next == Current)
1079 return false;
1080 Column = 0;
1081 ++Line;
1082 Current = Next;
1083 return true;
1084}
1085
1086void Scanner::saveSimpleKeyCandidate( TokenQueueT::iterator Tok
1087 , unsigned AtColumn
1088 , bool IsRequired) {
1089 if (IsSimpleKeyAllowed) {
1090 SimpleKey SK;
1091 SK.Tok = Tok;
1092 SK.Line = Line;
1093 SK.Column = AtColumn;
1094 SK.IsRequired = IsRequired;
1095 SK.FlowLevel = FlowLevel;
1096 SimpleKeys.push_back(SK);
1097 }
1098}
1099
1100void Scanner::removeStaleSimpleKeyCandidates() {
1101 for (SmallVectorImpl<SimpleKey>::iterator i = SimpleKeys.begin();
1102 i != SimpleKeys.end();) {
1103 if (i->Line != Line || i->Column + 1024 < Column) {
1104 if (i->IsRequired)
1105 setError( "Could not find expected : for simple key"
1106 , i->Tok->Range.begin());
1107 i = SimpleKeys.erase(i);
1108 } else {
1109 ++i;
1110 }
1111 }
1112}
1113
1114void Scanner::removeSimpleKeyCandidatesOnFlowLevel(unsigned Level) {
1115 if (!SimpleKeys.empty() && (SimpleKeys.end() - 1)->FlowLevel == Level)
1116 SimpleKeys.pop_back();
1117}
1118
1119bool Scanner::unrollIndent(int ToColumn) {
1120 Token T;
1121 // Indentation is ignored in flow.
1122 if (FlowLevel != 0)
1123 return true;
1124
1125 while (Indent > ToColumn) {
1126 T.Kind = Token::TK_BlockEnd;
1127 T.Range = StringRef(Current, 1);
1128 TokenQueue.push_back(T);
1129 Indent = Indents.pop_back_val();
1130 }
1131
1132 return true;
1133}
1134
1135bool Scanner::rollIndent( int ToColumn
1136 , Token::TokenKind Kind
1137 , TokenQueueT::iterator InsertPoint) {
1138 if (FlowLevel)
1139 return true;
1140 if (Indent < ToColumn) {
1141 Indents.push_back(Indent);
1142 Indent = ToColumn;
1143
1144 Token T;
1145 T.Kind = Kind;
1146 T.Range = StringRef(Current, 0);
1147 TokenQueue.insert(InsertPoint, T);
1148 }
1149 return true;
1150}
1151
1152void Scanner::skipComment() {
1153 if (Current == End || *Current != '#')
1154 return;
1155 while (true) {
1156 // This may skip more than one byte, thus Column is only incremented
1157 // for code points.
1158 StringRef::iterator I = skip_nb_char(Current);
1159 if (I == Current)
1160 break;
1161 Current = I;
1162 ++Column;
1163 }
1164}
1165
1166void Scanner::scanToNextToken() {
1167 while (true) {
1168 while (Current != End && (*Current == ' ' || *Current == '\t')) {
1169 skip(1);
1170 }
1171
1172 skipComment();
1173
1174 // Skip EOL.
1175 StringRef::iterator i = skip_b_break(Current);
1176 if (i == Current)
1177 break;
1178 Current = i;
1179 ++Line;
1180 Column = 0;
1181 // New lines may start a simple key.
1182 if (!FlowLevel)
1183 IsSimpleKeyAllowed = true;
1184 }
1185}
1186
1187bool Scanner::scanStreamStart() {
1188 IsStartOfStream = false;
1189
1190 EncodingInfo EI = getUnicodeEncoding(currentInput());
1191
1192 Token T;
1193 T.Kind = Token::TK_StreamStart;
1194 T.Range = StringRef(Current, EI.second);
1195 TokenQueue.push_back(T);
1196 Current += EI.second;
1197 return true;
1198}
1199
1200bool Scanner::scanStreamEnd() {
1201 // Force an ending new line if one isn't present.
1202 if (Column != 0) {
1203 Column = 0;
1204 ++Line;
1205 }
1206
1207 unrollIndent(-1);
1208 SimpleKeys.clear();
1209 IsSimpleKeyAllowed = false;
1210 IsAdjacentValueAllowedInFlow = false;
1211
1212 Token T;
1213 T.Kind = Token::TK_StreamEnd;
1214 T.Range = StringRef(Current, 0);
1215 TokenQueue.push_back(T);
1216 return true;
1217}
1218
1219bool Scanner::scanDirective() {
1220 // Reset the indentation level.
1221 unrollIndent(-1);
1222 SimpleKeys.clear();
1223 IsSimpleKeyAllowed = false;
1224 IsAdjacentValueAllowedInFlow = false;
1225
1226 StringRef::iterator Start = Current;
1227 consume('%');
1228 StringRef::iterator NameStart = Current;
1229 Current = skip_while(&Scanner::skip_ns_char, Current);
1230 StringRef Name(NameStart, Current - NameStart);
1231 Current = skip_while(&Scanner::skip_s_white, Current);
1232
1233 Token T;
1234 if (Name == "YAML") {
1235 Current = skip_while(&Scanner::skip_ns_char, Current);
1237 T.Range = StringRef(Start, Current - Start);
1238 TokenQueue.push_back(T);
1239 return true;
1240 } else if(Name == "TAG") {
1241 Current = skip_while(&Scanner::skip_ns_char, Current);
1242 Current = skip_while(&Scanner::skip_s_white, Current);
1243 Current = skip_while(&Scanner::skip_ns_char, Current);
1245 T.Range = StringRef(Start, Current - Start);
1246 TokenQueue.push_back(T);
1247 return true;
1248 }
1249 return false;
1250}
1251
1252bool Scanner::scanDocumentIndicator(bool IsStart) {
1253 unrollIndent(-1);
1254 SimpleKeys.clear();
1255 IsSimpleKeyAllowed = false;
1256 IsAdjacentValueAllowedInFlow = false;
1257
1258 Token T;
1260 T.Range = StringRef(Current, 3);
1261 skip(3);
1262 TokenQueue.push_back(T);
1263 return true;
1264}
1265
1266bool Scanner::scanFlowCollectionStart(bool IsSequence) {
1267 Token T;
1268 T.Kind = IsSequence ? Token::TK_FlowSequenceStart
1270 T.Range = StringRef(Current, 1);
1271 skip(1);
1272 TokenQueue.push_back(T);
1273
1274 // [ and { may begin a simple key.
1275 saveSimpleKeyCandidate(--TokenQueue.end(), Column - 1, false);
1276
1277 // And may also be followed by a simple key.
1278 IsSimpleKeyAllowed = true;
1279 // Adjacent values are allowed in flows only after JSON-style keys.
1280 IsAdjacentValueAllowedInFlow = false;
1281 ++FlowLevel;
1282 return true;
1283}
1284
1285bool Scanner::scanFlowCollectionEnd(bool IsSequence) {
1286 removeSimpleKeyCandidatesOnFlowLevel(FlowLevel);
1287 IsSimpleKeyAllowed = false;
1288 IsAdjacentValueAllowedInFlow = true;
1289 Token T;
1290 T.Kind = IsSequence ? Token::TK_FlowSequenceEnd
1292 T.Range = StringRef(Current, 1);
1293 skip(1);
1294 TokenQueue.push_back(T);
1295 if (FlowLevel)
1296 --FlowLevel;
1297 return true;
1298}
1299
1300bool Scanner::scanFlowEntry() {
1301 removeSimpleKeyCandidatesOnFlowLevel(FlowLevel);
1302 IsSimpleKeyAllowed = true;
1303 IsAdjacentValueAllowedInFlow = false;
1304 Token T;
1305 T.Kind = Token::TK_FlowEntry;
1306 T.Range = StringRef(Current, 1);
1307 skip(1);
1308 TokenQueue.push_back(T);
1309 return true;
1310}
1311
1312bool Scanner::scanBlockEntry() {
1313 rollIndent(Column, Token::TK_BlockSequenceStart, TokenQueue.end());
1314 removeSimpleKeyCandidatesOnFlowLevel(FlowLevel);
1315 IsSimpleKeyAllowed = true;
1316 IsAdjacentValueAllowedInFlow = false;
1317 Token T;
1318 T.Kind = Token::TK_BlockEntry;
1319 T.Range = StringRef(Current, 1);
1320 skip(1);
1321 TokenQueue.push_back(T);
1322 return true;
1323}
1324
1325bool Scanner::scanKey() {
1326 if (!FlowLevel)
1327 rollIndent(Column, Token::TK_BlockMappingStart, TokenQueue.end());
1328
1329 removeSimpleKeyCandidatesOnFlowLevel(FlowLevel);
1330 IsSimpleKeyAllowed = !FlowLevel;
1331 IsAdjacentValueAllowedInFlow = false;
1332
1333 Token T;
1334 T.Kind = Token::TK_Key;
1335 T.Range = StringRef(Current, 1);
1336 skip(1);
1337 TokenQueue.push_back(T);
1338 return true;
1339}
1340
1341bool Scanner::scanValue() {
1342 // If the previous token could have been a simple key, insert the key token
1343 // into the token queue.
1344 if (!SimpleKeys.empty()) {
1345 SimpleKey SK = SimpleKeys.pop_back_val();
1346 Token T;
1347 T.Kind = Token::TK_Key;
1348 T.Range = SK.Tok->Range;
1350 for (i = TokenQueue.begin(), e = TokenQueue.end(); i != e; ++i) {
1351 if (i == SK.Tok)
1352 break;
1353 }
1354 if (i == e) {
1355 Failed = true;
1356 return false;
1357 }
1358 i = TokenQueue.insert(i, T);
1359
1360 // We may also need to add a Block-Mapping-Start token.
1361 rollIndent(SK.Column, Token::TK_BlockMappingStart, i);
1362
1363 IsSimpleKeyAllowed = false;
1364 } else {
1365 if (!FlowLevel)
1366 rollIndent(Column, Token::TK_BlockMappingStart, TokenQueue.end());
1367 IsSimpleKeyAllowed = !FlowLevel;
1368 }
1369 IsAdjacentValueAllowedInFlow = false;
1370
1371 Token T;
1372 T.Kind = Token::TK_Value;
1373 T.Range = StringRef(Current, 1);
1374 skip(1);
1375 TokenQueue.push_back(T);
1376 return true;
1377}
1378
1379// Forbidding inlining improves performance by roughly 20%.
1380// FIXME: Remove once llvm optimizes this to the faster version without hints.
1381LLVM_ATTRIBUTE_NOINLINE static bool
1383
1384// Returns whether a character at 'Position' was escaped with a leading '\'.
1385// 'First' specifies the position of the first character in the string.
1387 StringRef::iterator Position) {
1388 assert(Position - 1 >= First);
1389 StringRef::iterator I = Position - 1;
1390 // We calculate the number of consecutive '\'s before the current position
1391 // by iterating backwards through our string.
1392 while (I >= First && *I == '\\') --I;
1393 // (Position - 1 - I) now contains the number of '\'s before the current
1394 // position. If it is odd, the character at 'Position' was escaped.
1395 return (Position - 1 - I) % 2 == 1;
1396}
1397
1398bool Scanner::scanFlowScalar(bool IsDoubleQuoted) {
1399 StringRef::iterator Start = Current;
1400 unsigned ColStart = Column;
1401 if (IsDoubleQuoted) {
1402 do {
1403 ++Current;
1404 while (Current != End && *Current != '"')
1405 ++Current;
1406 // Repeat until the previous character was not a '\' or was an escaped
1407 // backslash.
1408 } while ( Current != End
1409 && *(Current - 1) == '\\'
1410 && wasEscaped(Start + 1, Current));
1411 } else {
1412 skip(1);
1413 while (Current != End) {
1414 // Skip a ' followed by another '.
1415 if (Current + 1 < End && *Current == '\'' && *(Current + 1) == '\'') {
1416 skip(2);
1417 continue;
1418 } else if (*Current == '\'')
1419 break;
1420 StringRef::iterator i = skip_nb_char(Current);
1421 if (i == Current) {
1422 i = skip_b_break(Current);
1423 if (i == Current)
1424 break;
1425 Current = i;
1426 Column = 0;
1427 ++Line;
1428 } else {
1429 if (i == End)
1430 break;
1431 Current = i;
1432 ++Column;
1433 }
1434 }
1435 }
1436
1437 if (Current == End) {
1438 setError("Expected quote at end of scalar", Current);
1439 return false;
1440 }
1441
1442 skip(1); // Skip ending quote.
1443 Token T;
1444 T.Kind = Token::TK_Scalar;
1445 T.Range = StringRef(Start, Current - Start);
1446 TokenQueue.push_back(T);
1447
1448 saveSimpleKeyCandidate(--TokenQueue.end(), ColStart, false);
1449
1450 IsSimpleKeyAllowed = false;
1451 IsAdjacentValueAllowedInFlow = true;
1452
1453 return true;
1454}
1455
1456bool Scanner::scanPlainScalar() {
1457 StringRef::iterator Start = Current;
1458 unsigned ColStart = Column;
1459 unsigned LeadingBlanks = 0;
1460 assert(Indent >= -1 && "Indent must be >= -1 !");
1461 unsigned indent = static_cast<unsigned>(Indent + 1);
1462 while (Current != End) {
1463 if (*Current == '#')
1464 break;
1465
1466 while (Current != End &&
1467 ((*Current != ':' && isPlainSafeNonBlank(Current)) ||
1468 (*Current == ':' && isPlainSafeNonBlank(Current + 1)))) {
1469 StringRef::iterator i = skip_nb_char(Current);
1470 if (i == Current)
1471 break;
1472 Current = i;
1473 ++Column;
1474 }
1475
1476 // Are we at the end?
1477 if (!isBlankOrBreak(Current))
1478 break;
1479
1480 // Eat blanks.
1481 StringRef::iterator Tmp = Current;
1482 while (isBlankOrBreak(Tmp)) {
1483 StringRef::iterator i = skip_s_white(Tmp);
1484 if (i != Tmp) {
1485 if (LeadingBlanks && (Column < indent) && *Tmp == '\t') {
1486 setError("Found invalid tab character in indentation", Tmp);
1487 return false;
1488 }
1489 Tmp = i;
1490 ++Column;
1491 } else {
1492 i = skip_b_break(Tmp);
1493 if (!LeadingBlanks)
1494 LeadingBlanks = 1;
1495 Tmp = i;
1496 Column = 0;
1497 ++Line;
1498 }
1499 }
1500
1501 if (!FlowLevel && Column < indent)
1502 break;
1503
1504 Current = Tmp;
1505 }
1506 if (Start == Current) {
1507 setError("Got empty plain scalar", Start);
1508 return false;
1509 }
1510 Token T;
1511 T.Kind = Token::TK_Scalar;
1512 T.Range = StringRef(Start, Current - Start);
1513 TokenQueue.push_back(T);
1514
1515 // Plain scalars can be simple keys.
1516 saveSimpleKeyCandidate(--TokenQueue.end(), ColStart, false);
1517
1518 IsSimpleKeyAllowed = false;
1519 IsAdjacentValueAllowedInFlow = false;
1520
1521 return true;
1522}
1523
1524bool Scanner::scanAliasOrAnchor(bool IsAlias) {
1525 StringRef::iterator Start = Current;
1526 unsigned ColStart = Column;
1527 skip(1);
1528 while (Current != End) {
1529 if ( *Current == '[' || *Current == ']'
1530 || *Current == '{' || *Current == '}'
1531 || *Current == ','
1532 || *Current == ':')
1533 break;
1534 StringRef::iterator i = skip_ns_char(Current);
1535 if (i == Current)
1536 break;
1537 Current = i;
1538 ++Column;
1539 }
1540
1541 if (Start + 1 == Current) {
1542 setError("Got empty alias or anchor", Start);
1543 return false;
1544 }
1545
1546 Token T;
1548 T.Range = StringRef(Start, Current - Start);
1549 TokenQueue.push_back(T);
1550
1551 // Alias and anchors can be simple keys.
1552 saveSimpleKeyCandidate(--TokenQueue.end(), ColStart, false);
1553
1554 IsSimpleKeyAllowed = false;
1555 IsAdjacentValueAllowedInFlow = false;
1556
1557 return true;
1558}
1559
1560bool Scanner::scanBlockScalarIndicators(char &StyleIndicator,
1561 char &ChompingIndicator,
1562 unsigned &IndentIndicator,
1563 bool &IsDone) {
1564 StyleIndicator = scanBlockStyleIndicator();
1565 if (!scanBlockScalarHeader(ChompingIndicator, IndentIndicator, IsDone))
1566 return false;
1567 return true;
1568}
1569
1570char Scanner::scanBlockStyleIndicator() {
1571 char Indicator = ' ';
1572 if (Current != End && (*Current == '>' || *Current == '|')) {
1573 Indicator = *Current;
1574 skip(1);
1575 }
1576 return Indicator;
1577}
1578
1579char Scanner::scanBlockChompingIndicator() {
1580 char Indicator = ' ';
1581 if (Current != End && (*Current == '+' || *Current == '-')) {
1582 Indicator = *Current;
1583 skip(1);
1584 }
1585 return Indicator;
1586}
1587
1588/// Get the number of line breaks after chomping.
1589///
1590/// Return the number of trailing line breaks to emit, depending on
1591/// \p ChompingIndicator.
1592static unsigned getChompedLineBreaks(char ChompingIndicator,
1593 unsigned LineBreaks, StringRef Str) {
1594 if (ChompingIndicator == '-') // Strip all line breaks.
1595 return 0;
1596 if (ChompingIndicator == '+') // Keep all line breaks.
1597 return LineBreaks;
1598 // Clip trailing lines.
1599 return Str.empty() ? 0 : 1;
1600}
1601
1602unsigned Scanner::scanBlockIndentationIndicator() {
1603 unsigned Indent = 0;
1604 if (Current != End && (*Current >= '1' && *Current <= '9')) {
1605 Indent = unsigned(*Current - '0');
1606 skip(1);
1607 }
1608 return Indent;
1609}
1610
1611bool Scanner::scanBlockScalarHeader(char &ChompingIndicator,
1612 unsigned &IndentIndicator, bool &IsDone) {
1613 auto Start = Current;
1614
1615 ChompingIndicator = scanBlockChompingIndicator();
1616 IndentIndicator = scanBlockIndentationIndicator();
1617 // Check for the chomping indicator once again.
1618 if (ChompingIndicator == ' ')
1619 ChompingIndicator = scanBlockChompingIndicator();
1620 Current = skip_while(&Scanner::skip_s_white, Current);
1621 skipComment();
1622
1623 if (Current == End) { // EOF, we have an empty scalar.
1624 Token T;
1625 T.Kind = Token::TK_BlockScalar;
1626 T.Range = StringRef(Start, Current - Start);
1627 TokenQueue.push_back(T);
1628 IsDone = true;
1629 return true;
1630 }
1631
1632 if (!consumeLineBreakIfPresent()) {
1633 setError("Expected a line break after block scalar header", Current);
1634 return false;
1635 }
1636 return true;
1637}
1638
1639bool Scanner::findBlockScalarIndent(unsigned &BlockIndent,
1640 unsigned BlockExitIndent,
1641 unsigned &LineBreaks, bool &IsDone) {
1642 unsigned MaxAllSpaceLineCharacters = 0;
1643 StringRef::iterator LongestAllSpaceLine;
1644
1645 while (true) {
1646 advanceWhile(&Scanner::skip_s_space);
1647 if (skip_nb_char(Current) != Current) {
1648 // This line isn't empty, so try and find the indentation.
1649 if (Column <= BlockExitIndent) { // End of the block literal.
1650 IsDone = true;
1651 return true;
1652 }
1653 // We found the block's indentation.
1654 BlockIndent = Column;
1655 if (MaxAllSpaceLineCharacters > BlockIndent) {
1656 setError(
1657 "Leading all-spaces line must be smaller than the block indent",
1658 LongestAllSpaceLine);
1659 return false;
1660 }
1661 return true;
1662 }
1663 if (skip_b_break(Current) != Current &&
1664 Column > MaxAllSpaceLineCharacters) {
1665 // Record the longest all-space line in case it's longer than the
1666 // discovered block indent.
1667 MaxAllSpaceLineCharacters = Column;
1668 LongestAllSpaceLine = Current;
1669 }
1670
1671 // Check for EOF.
1672 if (Current == End) {
1673 IsDone = true;
1674 return true;
1675 }
1676
1677 if (!consumeLineBreakIfPresent()) {
1678 IsDone = true;
1679 return true;
1680 }
1681 ++LineBreaks;
1682 }
1683 return true;
1684}
1685
1686bool Scanner::scanBlockScalarIndent(unsigned BlockIndent,
1687 unsigned BlockExitIndent, bool &IsDone) {
1688 // Skip the indentation.
1689 while (Column < BlockIndent) {
1690 auto I = skip_s_space(Current);
1691 if (I == Current)
1692 break;
1693 Current = I;
1694 ++Column;
1695 }
1696
1697 if (skip_nb_char(Current) == Current)
1698 return true;
1699
1700 if (Column <= BlockExitIndent) { // End of the block literal.
1701 IsDone = true;
1702 return true;
1703 }
1704
1705 if (Column < BlockIndent) {
1706 if (Current != End && *Current == '#') { // Trailing comment.
1707 IsDone = true;
1708 return true;
1709 }
1710 setError("A text line is less indented than the block scalar", Current);
1711 return false;
1712 }
1713 return true; // A normal text line.
1714}
1715
1716bool Scanner::scanBlockScalar(bool IsLiteral) {
1717 assert(*Current == '|' || *Current == '>');
1718 char StyleIndicator;
1719 char ChompingIndicator;
1720 unsigned BlockIndent;
1721 bool IsDone = false;
1722 if (!scanBlockScalarIndicators(StyleIndicator, ChompingIndicator, BlockIndent,
1723 IsDone))
1724 return false;
1725 if (IsDone)
1726 return true;
1727 bool IsFolded = StyleIndicator == '>';
1728
1729 const auto *Start = Current;
1730 unsigned BlockExitIndent = Indent < 0 ? 0 : (unsigned)Indent;
1731 unsigned LineBreaks = 0;
1732 if (BlockIndent == 0) {
1733 if (!findBlockScalarIndent(BlockIndent, BlockExitIndent, LineBreaks,
1734 IsDone))
1735 return false;
1736 }
1737
1738 // Scan the block's scalars body.
1739 SmallString<256> Str;
1740 while (!IsDone) {
1741 if (!scanBlockScalarIndent(BlockIndent, BlockExitIndent, IsDone))
1742 return false;
1743 if (IsDone)
1744 break;
1745
1746 // Parse the current line.
1747 auto LineStart = Current;
1748 advanceWhile(&Scanner::skip_nb_char);
1749 if (LineStart != Current) {
1750 if (LineBreaks && IsFolded && !Scanner::isLineEmpty(Str)) {
1751 // The folded style "folds" any single line break between content into a
1752 // single space, except when that content is "empty" (only contains
1753 // whitespace) in which case the line break is left as-is.
1754 if (LineBreaks == 1) {
1755 Str.append(LineBreaks,
1756 isLineEmpty(StringRef(LineStart, Current - LineStart))
1757 ? '\n'
1758 : ' ');
1759 }
1760 // If we saw a single line break, we are completely replacing it and so
1761 // want `LineBreaks == 0`. Otherwise this decrement accounts for the
1762 // fact that the first line break is "trimmed", only being used to
1763 // signal a sequence of line breaks which should not be folded.
1764 LineBreaks--;
1765 }
1766 Str.append(LineBreaks, '\n');
1767 Str.append(StringRef(LineStart, Current - LineStart));
1768 LineBreaks = 0;
1769 }
1770
1771 // Check for EOF.
1772 if (Current == End)
1773 break;
1774
1775 if (!consumeLineBreakIfPresent())
1776 break;
1777 ++LineBreaks;
1778 }
1779
1780 if (Current == End && !LineBreaks)
1781 // Ensure that there is at least one line break before the end of file.
1782 LineBreaks = 1;
1783 Str.append(getChompedLineBreaks(ChompingIndicator, LineBreaks, Str), '\n');
1784
1785 // New lines may start a simple key.
1786 if (!FlowLevel)
1787 IsSimpleKeyAllowed = true;
1788 IsAdjacentValueAllowedInFlow = false;
1789
1790 Token T;
1791 T.Kind = Token::TK_BlockScalar;
1792 T.Range = StringRef(Start, Current - Start);
1793 T.Value = std::string(Str);
1794 TokenQueue.push_back(T);
1795 return true;
1796}
1797
1798bool Scanner::scanTag() {
1799 StringRef::iterator Start = Current;
1800 unsigned ColStart = Column;
1801 skip(1); // Eat !.
1802 if (Current == End || isBlankOrBreak(Current)); // An empty tag.
1803 else if (*Current == '<') {
1804 skip(1);
1805 scan_ns_uri_char();
1806 if (!consume('>'))
1807 return false;
1808 } else {
1809 // FIXME: Actually parse the c-ns-shorthand-tag rule.
1810 Current = skip_while(&Scanner::skip_ns_char, Current);
1811 }
1812
1813 Token T;
1814 T.Kind = Token::TK_Tag;
1815 T.Range = StringRef(Start, Current - Start);
1816 TokenQueue.push_back(T);
1817
1818 // Tags can be simple keys.
1819 saveSimpleKeyCandidate(--TokenQueue.end(), ColStart, false);
1820
1821 IsSimpleKeyAllowed = false;
1822 IsAdjacentValueAllowedInFlow = false;
1823
1824 return true;
1825}
1826
1827bool Scanner::fetchMoreTokens() {
1828 if (IsStartOfStream)
1829 return scanStreamStart();
1830
1831 scanToNextToken();
1832
1833 if (Current == End)
1834 return scanStreamEnd();
1835
1836 removeStaleSimpleKeyCandidates();
1837
1838 unrollIndent(Column);
1839
1840 if (Column == 0 && *Current == '%')
1841 return scanDirective();
1842
1843 if (Column == 0 && Current + 4 <= End
1844 && *Current == '-'
1845 && *(Current + 1) == '-'
1846 && *(Current + 2) == '-'
1847 && (Current + 3 == End || isBlankOrBreak(Current + 3)))
1848 return scanDocumentIndicator(true);
1849
1850 if (Column == 0 && Current + 4 <= End
1851 && *Current == '.'
1852 && *(Current + 1) == '.'
1853 && *(Current + 2) == '.'
1854 && (Current + 3 == End || isBlankOrBreak(Current + 3)))
1855 return scanDocumentIndicator(false);
1856
1857 if (*Current == '[')
1858 return scanFlowCollectionStart(true);
1859
1860 if (*Current == '{')
1861 return scanFlowCollectionStart(false);
1862
1863 if (*Current == ']')
1864 return scanFlowCollectionEnd(true);
1865
1866 if (*Current == '}')
1867 return scanFlowCollectionEnd(false);
1868
1869 if (*Current == ',')
1870 return scanFlowEntry();
1871
1872 if (*Current == '-' && (isBlankOrBreak(Current + 1) || Current + 1 == End))
1873 return scanBlockEntry();
1874
1875 if (*Current == '?' && (Current + 1 == End || isBlankOrBreak(Current + 1)))
1876 return scanKey();
1877
1878 if (*Current == ':' &&
1879 (!isPlainSafeNonBlank(Current + 1) || IsAdjacentValueAllowedInFlow))
1880 return scanValue();
1881
1882 if (*Current == '*')
1883 return scanAliasOrAnchor(true);
1884
1885 if (*Current == '&')
1886 return scanAliasOrAnchor(false);
1887
1888 if (*Current == '!')
1889 return scanTag();
1890
1891 if (*Current == '|' && !FlowLevel)
1892 return scanBlockScalar(true);
1893
1894 if (*Current == '>' && !FlowLevel)
1895 return scanBlockScalar(false);
1896
1897 if (*Current == '\'')
1898 return scanFlowScalar(false);
1899
1900 if (*Current == '"')
1901 return scanFlowScalar(true);
1902
1903 // Get a plain scalar.
1904 StringRef FirstChar(Current, 1);
1905 if ((!isBlankOrBreak(Current) &&
1906 FirstChar.find_first_of("-?:,[]{}#&*!|>'\"%@`") == StringRef::npos) ||
1907 (FirstChar.find_first_of("?:-") != StringRef::npos &&
1908 isPlainSafeNonBlank(Current + 1)))
1909 return scanPlainScalar();
1910
1911 setError("Unrecognized character while tokenizing.", Current);
1912 return false;
1913}
1914
1916 std::error_code *EC)
1917 : scanner(new Scanner(Input, SM, ShowColors, EC)) {}
1918
1919Stream::Stream(MemoryBufferRef InputBuffer, SourceMgr &SM, bool ShowColors,
1920 std::error_code *EC)
1921 : scanner(new Scanner(InputBuffer, SM, ShowColors, EC)) {}
1922
1923Stream::~Stream() = default;
1924
1925bool Stream::failed() { return scanner->failed(); }
1926
1928 printError(N ? N->getSourceRange() : SMRange(), Msg, Kind);
1929}
1930
1931void Stream::printError(const SMRange &Range, const Twine &Msg,
1932 SourceMgr::DiagKind Kind) {
1933 scanner->printError(Range.Start, Kind, Msg, Range);
1934}
1935
1937 if (CurrentDoc)
1938 report_fatal_error("Can only iterate over the stream once");
1939
1940 // Skip Stream-Start.
1941 scanner->getNext();
1942
1943 CurrentDoc.reset(new Document(*this));
1944 return document_iterator(CurrentDoc);
1945}
1946
1950
1952 for (Document &Doc : *this)
1953 Doc.skip();
1954}
1955
1956Node::Node(unsigned int Type, std::unique_ptr<Document> &D, StringRef A,
1957 StringRef T)
1958 : Doc(D), TypeID(Type), Anchor(A), Tag(T) {
1959 SMLoc Start = SMLoc::getFromPointer(peekNext().Range.begin());
1960 SourceRange = SMRange(Start, Start);
1961}
1962
1963std::string Node::getVerbatimTag() const {
1964 StringRef Raw = getRawTag();
1965 if (!Raw.empty() && Raw != "!") {
1966 std::string Ret;
1967 if (Raw.find_last_of('!') == 0) {
1968 Ret = std::string(Doc->getTagMap().find("!")->second);
1969 Ret += Raw.substr(1);
1970 return Ret;
1971 } else if (Raw.starts_with("!!")) {
1972 Ret = std::string(Doc->getTagMap().find("!!")->second);
1973 Ret += Raw.substr(2);
1974 return Ret;
1975 } else {
1976 StringRef TagHandle = Raw.substr(0, Raw.find_last_of('!') + 1);
1977 std::map<StringRef, StringRef>::const_iterator It =
1978 Doc->getTagMap().find(TagHandle);
1979 if (It != Doc->getTagMap().end())
1980 Ret = std::string(It->second);
1981 else {
1982 Token T;
1983 T.Kind = Token::TK_Tag;
1984 T.Range = TagHandle;
1985 setError(Twine("Unknown tag handle ") + TagHandle, T);
1986 }
1987 Ret += Raw.substr(Raw.find_last_of('!') + 1);
1988 return Ret;
1989 }
1990 }
1991
1992 switch (getType()) {
1993 case NK_Null:
1994 return "tag:yaml.org,2002:null";
1995 case NK_Scalar:
1996 case NK_BlockScalar:
1997 // TODO: Tag resolution.
1998 return "tag:yaml.org,2002:str";
1999 case NK_Mapping:
2000 return "tag:yaml.org,2002:map";
2001 case NK_Sequence:
2002 return "tag:yaml.org,2002:seq";
2003 }
2004
2005 return "";
2006}
2007
2009 return Doc->peekNext();
2010}
2011
2013 return Doc->getNext();
2014}
2015
2017 return Doc->parseBlockNode();
2018}
2019
2021 return Doc->NodeAllocator;
2022}
2023
2024void Node::setError(const Twine &Msg, Token &Tok) const {
2025 Doc->setError(Msg, Tok);
2026}
2027
2028bool Node::failed() const {
2029 return Doc->failed();
2030}
2031
2033 if (Value[0] == '"')
2034 return getDoubleQuotedValue(Value, Storage);
2035 if (Value[0] == '\'')
2036 return getSingleQuotedValue(Value, Storage);
2037 return getPlainValue(Value, Storage);
2038}
2039
2040/// parseScalarValue - A common parsing routine for all flow scalar styles.
2041/// It handles line break characters by itself, adds regular content characters
2042/// to the result, and forwards escaped sequences to the provided routine for
2043/// the style-specific processing.
2044///
2045/// \param UnquotedValue - An input value without quotation marks.
2046/// \param Storage - A storage for the result if the input value is multiline or
2047/// contains escaped characters.
2048/// \param LookupChars - A set of special characters to search in the input
2049/// string. Should include line break characters and the escape character
2050/// specific for the processing scalar style, if any.
2051/// \param UnescapeCallback - This is called when the escape character is found
2052/// in the input.
2053/// \returns - The unfolded and unescaped value.
2054static StringRef
2056 StringRef LookupChars,
2057 std::function<StringRef(StringRef, SmallVectorImpl<char> &)>
2058 UnescapeCallback) {
2059 size_t I = UnquotedValue.find_first_of(LookupChars);
2060 if (I == StringRef::npos)
2061 return UnquotedValue;
2062
2063 Storage.clear();
2064 Storage.reserve(UnquotedValue.size());
2065 char LastNewLineAddedAs = '\0';
2066 for (; I != StringRef::npos; I = UnquotedValue.find_first_of(LookupChars)) {
2067 if (UnquotedValue[I] != '\r' && UnquotedValue[I] != '\n') {
2068 llvm::append_range(Storage, UnquotedValue.take_front(I));
2069 UnquotedValue = UnescapeCallback(UnquotedValue.drop_front(I), Storage);
2070 LastNewLineAddedAs = '\0';
2071 continue;
2072 }
2073 if (size_t LastNonSWhite = UnquotedValue.find_last_not_of(" \t", I);
2074 LastNonSWhite != StringRef::npos) {
2075 llvm::append_range(Storage, UnquotedValue.take_front(LastNonSWhite + 1));
2076 Storage.push_back(' ');
2077 LastNewLineAddedAs = ' ';
2078 } else {
2079 // Note: we can't just check if the last character in Storage is ' ',
2080 // '\n', or something else; that would give a wrong result for double
2081 // quoted values containing an escaped space character before a new-line
2082 // character.
2083 switch (LastNewLineAddedAs) {
2084 case ' ':
2085 assert(!Storage.empty() && Storage.back() == ' ');
2086 Storage.back() = '\n';
2087 LastNewLineAddedAs = '\n';
2088 break;
2089 case '\n':
2090 assert(!Storage.empty() && Storage.back() == '\n');
2091 Storage.push_back('\n');
2092 break;
2093 default:
2094 Storage.push_back(' ');
2095 LastNewLineAddedAs = ' ';
2096 break;
2097 }
2098 }
2099 // Handle Windows-style EOL
2100 if (UnquotedValue.substr(I, 2) == "\r\n")
2101 I++;
2102 UnquotedValue = UnquotedValue.drop_front(I + 1).ltrim(" \t");
2103 }
2104 llvm::append_range(Storage, UnquotedValue);
2105 return StringRef(Storage.begin(), Storage.size());
2106}
2107
2108StringRef
2109ScalarNode::getDoubleQuotedValue(StringRef RawValue,
2110 SmallVectorImpl<char> &Storage) const {
2111 assert(RawValue.size() >= 2 && RawValue.front() == '"' &&
2112 RawValue.back() == '"');
2113 StringRef UnquotedValue = RawValue.substr(1, RawValue.size() - 2);
2114
2115 auto UnescapeFunc = [this](StringRef UnquotedValue,
2116 SmallVectorImpl<char> &Storage) {
2117 assert(UnquotedValue.take_front(1) == "\\");
2118 if (UnquotedValue.size() == 1) {
2119 Token T;
2120 T.Range = UnquotedValue;
2121 setError("Unrecognized escape code", T);
2122 Storage.clear();
2123 return StringRef();
2124 }
2125 UnquotedValue = UnquotedValue.drop_front(1);
2126 switch (UnquotedValue[0]) {
2127 default: {
2128 Token T;
2129 T.Range = UnquotedValue.take_front(1);
2130 setError("Unrecognized escape code", T);
2131 Storage.clear();
2132 return StringRef();
2133 }
2134 case '\r':
2135 // Shrink the Windows-style EOL.
2136 if (UnquotedValue.size() >= 2 && UnquotedValue[1] == '\n')
2137 UnquotedValue = UnquotedValue.drop_front(1);
2138 [[fallthrough]];
2139 case '\n':
2140 return UnquotedValue.drop_front(1).ltrim(" \t");
2141 case '0':
2142 Storage.push_back(0x00);
2143 break;
2144 case 'a':
2145 Storage.push_back(0x07);
2146 break;
2147 case 'b':
2148 Storage.push_back(0x08);
2149 break;
2150 case 't':
2151 case 0x09:
2152 Storage.push_back(0x09);
2153 break;
2154 case 'n':
2155 Storage.push_back(0x0A);
2156 break;
2157 case 'v':
2158 Storage.push_back(0x0B);
2159 break;
2160 case 'f':
2161 Storage.push_back(0x0C);
2162 break;
2163 case 'r':
2164 Storage.push_back(0x0D);
2165 break;
2166 case 'e':
2167 Storage.push_back(0x1B);
2168 break;
2169 case ' ':
2170 Storage.push_back(0x20);
2171 break;
2172 case '"':
2173 Storage.push_back(0x22);
2174 break;
2175 case '/':
2176 Storage.push_back(0x2F);
2177 break;
2178 case '\\':
2179 Storage.push_back(0x5C);
2180 break;
2181 case 'N':
2182 encodeUTF8(0x85, Storage);
2183 break;
2184 case '_':
2185 encodeUTF8(0xA0, Storage);
2186 break;
2187 case 'L':
2188 encodeUTF8(0x2028, Storage);
2189 break;
2190 case 'P':
2191 encodeUTF8(0x2029, Storage);
2192 break;
2193 case 'x': {
2194 if (UnquotedValue.size() < 3)
2195 // TODO: Report error.
2196 break;
2197 unsigned int UnicodeScalarValue;
2198 if (UnquotedValue.substr(1, 2).getAsInteger(16, UnicodeScalarValue))
2199 // TODO: Report error.
2200 UnicodeScalarValue = 0xFFFD;
2201 encodeUTF8(UnicodeScalarValue, Storage);
2202 return UnquotedValue.drop_front(3);
2203 }
2204 case 'u': {
2205 if (UnquotedValue.size() < 5)
2206 // TODO: Report error.
2207 break;
2208 unsigned int UnicodeScalarValue;
2209 if (UnquotedValue.substr(1, 4).getAsInteger(16, UnicodeScalarValue))
2210 // TODO: Report error.
2211 UnicodeScalarValue = 0xFFFD;
2212 encodeUTF8(UnicodeScalarValue, Storage);
2213 return UnquotedValue.drop_front(5);
2214 }
2215 case 'U': {
2216 if (UnquotedValue.size() < 9)
2217 // TODO: Report error.
2218 break;
2219 unsigned int UnicodeScalarValue;
2220 if (UnquotedValue.substr(1, 8).getAsInteger(16, UnicodeScalarValue))
2221 // TODO: Report error.
2222 UnicodeScalarValue = 0xFFFD;
2223 encodeUTF8(UnicodeScalarValue, Storage);
2224 return UnquotedValue.drop_front(9);
2225 }
2226 }
2227 return UnquotedValue.drop_front(1);
2228 };
2229
2230 return parseScalarValue(UnquotedValue, Storage, "\\\r\n", UnescapeFunc);
2231}
2232
2233StringRef ScalarNode::getSingleQuotedValue(StringRef RawValue,
2234 SmallVectorImpl<char> &Storage) {
2235 assert(RawValue.size() >= 2 && RawValue.front() == '\'' &&
2236 RawValue.back() == '\'');
2237 StringRef UnquotedValue = RawValue.substr(1, RawValue.size() - 2);
2238
2239 auto UnescapeFunc = [](StringRef UnquotedValue,
2240 SmallVectorImpl<char> &Storage) {
2241 assert(UnquotedValue.take_front(2) == "''");
2242 Storage.push_back('\'');
2243 return UnquotedValue.drop_front(2);
2244 };
2245
2246 return parseScalarValue(UnquotedValue, Storage, "'\r\n", UnescapeFunc);
2247}
2248
2249StringRef ScalarNode::getPlainValue(StringRef RawValue,
2250 SmallVectorImpl<char> &Storage) {
2251 // Trim trailing whitespace ('b-char' and 's-white').
2252 // NOTE: Alternatively we could change the scanner to not include whitespace
2253 // here in the first place.
2254 RawValue = RawValue.rtrim("\r\n \t");
2255 return parseScalarValue(RawValue, Storage, "\r\n", nullptr);
2256}
2257
2259 if (Key)
2260 return Key;
2261 // Handle implicit null keys.
2262 {
2263 Token &t = peekNext();
2264 if ( t.Kind == Token::TK_BlockEnd
2265 || t.Kind == Token::TK_Value
2266 || t.Kind == Token::TK_Error) {
2267 return Key = new (getAllocator()) NullNode(Doc);
2268 }
2269 if (t.Kind == Token::TK_Key)
2270 getNext(); // skip TK_Key.
2271 }
2272
2273 // Handle explicit null keys.
2274 Token &t = peekNext();
2275 if (t.Kind == Token::TK_BlockEnd || t.Kind == Token::TK_Value) {
2276 return Key = new (getAllocator()) NullNode(Doc);
2277 }
2278
2279 // We've got a normal key.
2280 return Key = parseBlockNode();
2281}
2282
2284 if (Value)
2285 return Value;
2286
2287 if (Node* Key = getKey())
2288 Key->skip();
2289 else {
2290 setError("Null key in Key Value.", peekNext());
2291 return Value = new (getAllocator()) NullNode(Doc);
2292 }
2293
2294 if (failed())
2295 return Value = new (getAllocator()) NullNode(Doc);
2296
2297 // Handle implicit null values.
2298 {
2299 Token &t = peekNext();
2300 if ( t.Kind == Token::TK_BlockEnd
2302 || t.Kind == Token::TK_Key
2304 || t.Kind == Token::TK_Error) {
2305 return Value = new (getAllocator()) NullNode(Doc);
2306 }
2307
2308 if (t.Kind != Token::TK_Value) {
2309 setError("Unexpected token in Key Value.", t);
2310 return Value = new (getAllocator()) NullNode(Doc);
2311 }
2312 getNext(); // skip TK_Value.
2313 }
2314
2315 // Handle explicit null values.
2316 Token &t = peekNext();
2317 if (t.Kind == Token::TK_BlockEnd || t.Kind == Token::TK_Key) {
2318 return Value = new (getAllocator()) NullNode(Doc);
2319 }
2320
2321 // We got a normal value.
2322 return Value = parseBlockNode();
2323}
2324
2325void MappingNode::increment() {
2326 if (failed()) {
2327 IsAtEnd = true;
2328 CurrentEntry = nullptr;
2329 return;
2330 }
2331 if (CurrentEntry) {
2332 CurrentEntry->skip();
2333 if (Type == MT_Inline) {
2334 IsAtEnd = true;
2335 CurrentEntry = nullptr;
2336 return;
2337 }
2338 }
2339 Token T = peekNext();
2340 if (T.Kind == Token::TK_Key || T.Kind == Token::TK_Scalar) {
2341 // KeyValueNode eats the TK_Key. That way it can detect null keys.
2342 CurrentEntry = new (getAllocator()) KeyValueNode(Doc);
2343 } else if (Type == MT_Block) {
2344 switch (T.Kind) {
2345 case Token::TK_BlockEnd:
2346 getNext();
2347 IsAtEnd = true;
2348 CurrentEntry = nullptr;
2349 break;
2350 default:
2351 setError("Unexpected token. Expected Key or Block End", T);
2352 [[fallthrough]];
2353 case Token::TK_Error:
2354 IsAtEnd = true;
2355 CurrentEntry = nullptr;
2356 }
2357 } else {
2358 switch (T.Kind) {
2360 // Eat the flow entry and recurse.
2361 getNext();
2362 return increment();
2364 getNext();
2365 [[fallthrough]];
2366 case Token::TK_Error:
2367 // Set this to end iterator.
2368 IsAtEnd = true;
2369 CurrentEntry = nullptr;
2370 break;
2371 default:
2372 setError( "Unexpected token. Expected Key, Flow Entry, or Flow "
2373 "Mapping End."
2374 , T);
2375 IsAtEnd = true;
2376 CurrentEntry = nullptr;
2377 }
2378 }
2379}
2380
2382 if (failed()) {
2383 IsAtEnd = true;
2384 CurrentEntry = nullptr;
2385 return;
2386 }
2387 if (CurrentEntry)
2388 CurrentEntry->skip();
2389 Token T = peekNext();
2390 if (SeqType == ST_Block) {
2391 switch (T.Kind) {
2393 getNext();
2394 CurrentEntry = parseBlockNode();
2395 if (!CurrentEntry) { // An error occurred.
2396 IsAtEnd = true;
2397 CurrentEntry = nullptr;
2398 }
2399 break;
2400 case Token::TK_BlockEnd:
2401 getNext();
2402 IsAtEnd = true;
2403 CurrentEntry = nullptr;
2404 break;
2405 default:
2406 setError( "Unexpected token. Expected Block Entry or Block End."
2407 , T);
2408 [[fallthrough]];
2409 case Token::TK_Error:
2410 IsAtEnd = true;
2411 CurrentEntry = nullptr;
2412 }
2413 } else if (SeqType == ST_Indentless) {
2414 switch (T.Kind) {
2416 getNext();
2417 CurrentEntry = parseBlockNode();
2418 if (!CurrentEntry) { // An error occurred.
2419 IsAtEnd = true;
2420 CurrentEntry = nullptr;
2421 }
2422 break;
2423 default:
2424 case Token::TK_Error:
2425 IsAtEnd = true;
2426 CurrentEntry = nullptr;
2427 }
2428 } else if (SeqType == ST_Flow) {
2429 switch (T.Kind) {
2431 // Eat the flow entry and recurse.
2432 getNext();
2433 WasPreviousTokenFlowEntry = true;
2434 return increment();
2436 getNext();
2437 [[fallthrough]];
2438 case Token::TK_Error:
2439 // Set this to end iterator.
2440 IsAtEnd = true;
2441 CurrentEntry = nullptr;
2442 break;
2446 setError("Could not find closing ]!", T);
2447 // Set this to end iterator.
2448 IsAtEnd = true;
2449 CurrentEntry = nullptr;
2450 break;
2451 default:
2452 if (!WasPreviousTokenFlowEntry) {
2453 setError("Expected , between entries!", T);
2454 IsAtEnd = true;
2455 CurrentEntry = nullptr;
2456 break;
2457 }
2458 // Otherwise it must be a flow entry.
2459 CurrentEntry = parseBlockNode();
2460 if (!CurrentEntry) {
2461 IsAtEnd = true;
2462 }
2463 WasPreviousTokenFlowEntry = false;
2464 break;
2465 }
2466 }
2467}
2468
2469Document::Document(Stream &S) : stream(S), Root(nullptr) {
2470 // Tag maps starts with two default mappings.
2471 TagMap["!"] = "!";
2472 TagMap["!!"] = "tag:yaml.org,2002:";
2473
2474 if (parseDirectives())
2475 expectToken(Token::TK_DocumentStart);
2476 Token &T = peekNext();
2477 if (T.Kind == Token::TK_DocumentStart)
2478 getNext();
2479}
2480
2482 if (stream.scanner->failed())
2483 return false;
2484 if (!Root && !getRoot())
2485 return false;
2486 Root->skip();
2487 Token &T = peekNext();
2488 if (T.Kind == Token::TK_StreamEnd)
2489 return false;
2490 if (T.Kind == Token::TK_DocumentEnd) {
2491 getNext();
2492 return skip();
2493 }
2494 return true;
2495}
2496
2497Token &Document::peekNext() {
2498 return stream.scanner->peekNext();
2499}
2500
2501Token Document::getNext() {
2502 return stream.scanner->getNext();
2503}
2504
2505void Document::setError(const Twine &Message, Token &Location) const {
2506 stream.scanner->setError(Message, Location.Range.begin());
2507}
2508
2509bool Document::failed() const {
2510 return stream.scanner->failed();
2511}
2512
2514 Token T = peekNext();
2515 // Handle properties.
2516 Token AnchorInfo;
2517 Token TagInfo;
2518parse_property:
2519 switch (T.Kind) {
2520 case Token::TK_Alias:
2521 getNext();
2522 return new (NodeAllocator) AliasNode(stream.CurrentDoc, T.Range.substr(1));
2523 case Token::TK_Anchor:
2524 if (AnchorInfo.Kind == Token::TK_Anchor) {
2525 setError("Already encountered an anchor for this node!", T);
2526 return nullptr;
2527 }
2528 AnchorInfo = getNext(); // Consume TK_Anchor.
2529 T = peekNext();
2530 goto parse_property;
2531 case Token::TK_Tag:
2532 if (TagInfo.Kind == Token::TK_Tag) {
2533 setError("Already encountered a tag for this node!", T);
2534 return nullptr;
2535 }
2536 TagInfo = getNext(); // Consume TK_Tag.
2537 T = peekNext();
2538 goto parse_property;
2539 default:
2540 break;
2541 }
2542
2543 switch (T.Kind) {
2545 // We got an unindented BlockEntry sequence. This is not terminated with
2546 // a BlockEnd.
2547 // Don't eat the TK_BlockEntry, SequenceNode needs it.
2548 return new (NodeAllocator) SequenceNode( stream.CurrentDoc
2549 , AnchorInfo.Range.substr(1)
2550 , TagInfo.Range
2553 getNext();
2554 return new (NodeAllocator)
2555 SequenceNode( stream.CurrentDoc
2556 , AnchorInfo.Range.substr(1)
2557 , TagInfo.Range
2560 getNext();
2561 return new (NodeAllocator)
2562 MappingNode( stream.CurrentDoc
2563 , AnchorInfo.Range.substr(1)
2564 , TagInfo.Range
2567 getNext();
2568 return new (NodeAllocator)
2569 SequenceNode( stream.CurrentDoc
2570 , AnchorInfo.Range.substr(1)
2571 , TagInfo.Range
2574 getNext();
2575 return new (NodeAllocator)
2576 MappingNode( stream.CurrentDoc
2577 , AnchorInfo.Range.substr(1)
2578 , TagInfo.Range
2580 case Token::TK_Scalar:
2581 getNext();
2582 return new (NodeAllocator)
2583 ScalarNode( stream.CurrentDoc
2584 , AnchorInfo.Range.substr(1)
2585 , TagInfo.Range
2586 , T.Range);
2587 case Token::TK_BlockScalar: {
2588 getNext();
2589 StringRef NullTerminatedStr(T.Value.c_str(), T.Value.length() + 1);
2590 StringRef StrCopy = NullTerminatedStr.copy(NodeAllocator).drop_back();
2591 return new (NodeAllocator)
2592 BlockScalarNode(stream.CurrentDoc, AnchorInfo.Range.substr(1),
2593 TagInfo.Range, StrCopy, T.Range);
2594 }
2595 case Token::TK_Key:
2596 // Don't eat the TK_Key, KeyValueNode expects it.
2597 return new (NodeAllocator)
2598 MappingNode( stream.CurrentDoc
2599 , AnchorInfo.Range.substr(1)
2600 , TagInfo.Range
2605 default:
2606 // TODO: Properly handle tags. "[!!str ]" should resolve to !!str "", not
2607 // !!null null.
2608 return new (NodeAllocator) NullNode(stream.CurrentDoc);
2611 case Token::TK_FlowEntry: {
2612 if (Root && (isa<MappingNode>(Root) || isa<SequenceNode>(Root)))
2613 return new (NodeAllocator) NullNode(stream.CurrentDoc);
2614
2615 setError("Unexpected token", T);
2616 return nullptr;
2617 }
2618 case Token::TK_Error:
2619 return nullptr;
2620 }
2621 llvm_unreachable("Control flow shouldn't reach here.");
2622 return nullptr;
2623}
2624
2625bool Document::parseDirectives() {
2626 bool isDirective = false;
2627 while (true) {
2628 Token T = peekNext();
2629 if (T.Kind == Token::TK_TagDirective) {
2630 parseTAGDirective();
2631 isDirective = true;
2632 } else if (T.Kind == Token::TK_VersionDirective) {
2633 parseYAMLDirective();
2634 isDirective = true;
2635 } else {
2636 break;
2637 }
2638 }
2639 return isDirective;
2640}
2641
2642void Document::parseYAMLDirective() {
2643 getNext(); // Eat %YAML <version>
2644}
2645
2646void Document::parseTAGDirective() {
2647 Token Tag = getNext(); // %TAG <handle> <prefix>
2648 StringRef T = Tag.Range;
2649 // Strip %TAG
2650 T = T.substr(T.find_first_of(" \t")).ltrim(" \t");
2651 std::size_t HandleEnd = T.find_first_of(" \t");
2652 StringRef TagHandle = T.substr(0, HandleEnd);
2653 StringRef TagPrefix = T.substr(HandleEnd).ltrim(" \t");
2654 TagMap[TagHandle] = TagPrefix;
2655}
2656
2657bool Document::expectToken(int TK) {
2658 Token T = getNext();
2659 if (T.Kind != TK) {
2660 setError("Unexpected token", T);
2661 return false;
2662 }
2663 return true;
2664}
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")
#define LLVM_ATTRIBUTE_NOINLINE
LLVM_ATTRIBUTE_NOINLINE - On compilers where we have a directive to do so, mark a method "not for inl...
Definition Compiler.h:346
#define I(x, y, z)
Definition MD5.cpp:58
static Cursor skipComment(Cursor C)
Skip a line comment and return the updated cursor.
Definition MILexer.cpp:94
#define T
ConstantRange Range(APInt(BitWidth, Low), APInt(BitWidth, High))
static bool encodeUTF8(size_t CodePoint, char *Output)
This file contains some templates that are useful if you are working with the STL at all.
This file defines the SmallString class.
This file defines the SmallVector class.
This file contains some functions that are useful when dealing with strings.
static EncodingInfo getUnicodeEncoding(StringRef Input)
getUnicodeEncoding - Reads up to the first 4 bytes to determine the Unicode encoding form of Input.
static bool is_ns_hex_digit(const char C)
static bool is_ns_word_char(const char C)
static unsigned getChompedLineBreaks(char ChompingIndicator, unsigned LineBreaks, StringRef Str)
Get the number of line breaks after chomping.
std::pair< uint32_t, unsigned > UTF8Decoded
The Unicode scalar value of a UTF-8 minimal well-formed code unit subsequence and the subsequence's l...
UnicodeEncodingForm
@ UEF_UTF32_LE
UTF-32 Little Endian.
@ UEF_UTF16_BE
UTF-16 Big Endian.
@ UEF_UTF16_LE
UTF-16 Little Endian.
@ UEF_UTF32_BE
UTF-32 Big Endian.
@ UEF_UTF8
UTF-8 or ascii.
@ UEF_Unknown
Not a valid Unicode encoding.
static LLVM_ATTRIBUTE_NOINLINE bool wasEscaped(StringRef::iterator First, StringRef::iterator Position)
static StringRef parseScalarValue(StringRef UnquotedValue, SmallVectorImpl< char > &Storage, StringRef LookupChars, std::function< StringRef(StringRef, SmallVectorImpl< char > &)> UnescapeCallback)
parseScalarValue - A common parsing routine for all flow scalar styles.
std::pair< UnicodeEncodingForm, unsigned > EncodingInfo
EncodingInfo - Holds the encoding type and length of the byte order mark if it exists.
BumpPtrList< Token > TokenQueueT
static UTF8Decoded decodeUTF8(StringRef Range)
IteratorImpl< T, typename list_type::iterator > iterator
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition ArrayRef.h:41
Tagged union holding either a T or a Error.
Definition Error.h:485
const char * getBufferStart() const
const char * getBufferEnd() const
static std::unique_ptr< MemoryBuffer > getMemBuffer(StringRef InputData, StringRef BufferName="", bool RequiresNullTerminator=true)
Open the specified memory range as a MemoryBuffer.
Represents a location in source code.
Definition SMLoc.h:23
static SMLoc getFromPointer(const char *Ptr)
Definition SMLoc.h:36
Represents a range in source code.
Definition SMLoc.h:48
SmallString - A SmallString is just a SmallVector with methods and accessors that make it work better...
Definition SmallString.h:26
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
void reserve(size_type N)
typename SuperClass::iterator iterator
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
This owns the files read by a parser, handles include stacks, and handles diagnostic wrangling.
Definition SourceMgr.h:32
LLVM_ABI void PrintMessage(raw_ostream &OS, SMLoc Loc, DiagKind Kind, const Twine &Msg, ArrayRef< SMRange > Ranges={}, ArrayRef< SMFixIt > FixIts={}, bool ShowColors=true) const
Emit a message about the specified location with the specified string.
unsigned AddNewSourceBuffer(std::unique_ptr< MemoryBuffer > F, SMLoc IncludeLoc)
Add a new source buffer to this source manager.
Definition SourceMgr.h:145
StringRef - Represent a constant reference to a string, i.e.
Definition StringRef.h:55
LLVM_ABI size_t find_last_not_of(char C, size_t From=npos) const
Find the last character in the string that is not C, or npos if not found.
bool getAsInteger(unsigned Radix, T &Result) const
Parse the current string as an integer of the specified radix.
Definition StringRef.h:480
const char * iterator
Definition StringRef.h:59
constexpr StringRef substr(size_t Start, size_t N=npos) const
Return a reference to the substring from [Start, Start + N).
Definition StringRef.h:581
bool starts_with(StringRef Prefix) const
Check if this string starts with the given Prefix.
Definition StringRef.h:269
constexpr bool empty() const
empty - Check if the string is empty.
Definition StringRef.h:151
StringRef drop_front(size_t N=1) const
Return a StringRef equal to 'this' but with the first N elements dropped.
Definition StringRef.h:619
char back() const
back - Get the last character in the string.
Definition StringRef.h:163
constexpr size_t size() const
size - Get the string size.
Definition StringRef.h:154
char front() const
front - Get the first character in the string.
Definition StringRef.h:157
size_t find_last_of(char C, size_t From=npos) const
Find the last character in the string that is C, or npos if not found.
Definition StringRef.h:409
StringRef ltrim(char Char) const
Return string with consecutive Char characters starting from the the left removed.
Definition StringRef.h:800
size_t find_first_of(char C, size_t From=0) const
Find the first character in the string that is C, or npos if not found.
Definition StringRef.h:384
StringRef rtrim(char Char) const
Return string with consecutive Char characters starting from the right removed.
Definition StringRef.h:812
StringRef take_front(size_t N=1) const
Return a StringRef equal to 'this' but with only the first N elements remaining.
Definition StringRef.h:590
StringRef copy(Allocator &A) const
Definition StringRef.h:170
static constexpr size_t npos
Definition StringRef.h:57
StringRef drop_back(size_t N=1) const
Return a StringRef equal to 'this' but with the last N elements dropped.
Definition StringRef.h:626
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition Twine.h:82
The instances of the Type class are immutable: once they are created, they are never changed.
Definition Type.h:45
This class implements an extremely fast bulk output stream that can only output to a stream.
Definition raw_ostream.h:53
Represents an alias to a Node with an anchor.
Definition YAMLParser.h:521
A block scalar node is an opaque datum that can be presented as a series of zero or more Unicode scal...
Definition YAMLParser.h:262
LLVM_ABI Node * parseBlockNode()
Root for parsing a node. Returns a single node.
LLVM_ABI bool skip()
Finish parsing the current document and return true if there are more.
Node * getRoot()
Parse and return the root level node.
Definition YAMLParser.h:550
friend class Node
Definition YAMLParser.h:559
LLVM_ABI Document(Stream &ParentStream)
The Input class is used to parse a yaml document into in-memory structs and vectors.
Node * getValue()
Parse and return the value.
void skip() override
Definition YAMLParser.h:313
Node * getKey()
Parse and return the key.
Represents a YAML map created from either a block map for a flow map.
Definition YAMLParser.h:421
@ MT_Inline
An inline mapping node is used for "[key: value]".
Definition YAMLParser.h:428
Abstract base class for all Nodes.
Definition YAMLParser.h:121
StringRef getRawTag() const
Get the tag as it was written in the document.
Definition YAMLParser.h:161
unsigned int getType() const
Definition YAMLParser.h:180
std::string getVerbatimTag() const
Get the verbatium tag for a given Node.
bool failed() const
std::unique_ptr< Document > & Doc
Definition YAMLParser.h:183
Node(unsigned int Type, std::unique_ptr< Document > &, StringRef Anchor, StringRef Tag)
virtual void skip()
Definition YAMLParser.h:178
BumpPtrAllocator & getAllocator()
Node * parseBlockNode()
void setError(const Twine &Message, Token &Location) const
A null value.
Definition YAMLParser.h:199
A scalar node is an opaque datum that can be presented as a series of zero or more Unicode scalar val...
Definition YAMLParser.h:214
StringRef getValue(SmallVectorImpl< char > &Storage) const
Gets the value of this node as a StringRef.
Scans YAML tokens from a MemoryBuffer.
Scanner(StringRef Input, SourceMgr &SM, bool ShowColors=true, std::error_code *EC=nullptr)
void setError(const Twine &Message, StringRef::iterator Position)
Token getNext()
Parse the next token and pop it from the queue.
bool failed()
Returns true if an error occurred while parsing.
void printError(SMLoc Loc, SourceMgr::DiagKind Kind, const Twine &Message, ArrayRef< SMRange > Ranges={})
Token & peekNext()
Parse the next token and return it without popping it.
Represents a YAML sequence created from either a block sequence for a flow sequence.
Definition YAMLParser.h:469
This class represents a YAML stream potentially containing multiple documents.
Definition YAMLParser.h:88
LLVM_ABI document_iterator end()
LLVM_ABI document_iterator begin()
LLVM_ABI ~Stream()
LLVM_ABI Stream(StringRef Input, SourceMgr &, bool ShowColors=true, std::error_code *EC=nullptr)
This keeps a reference to the string referenced by Input.
LLVM_ABI bool failed()
friend class Document
Definition YAMLParser.h:114
LLVM_ABI void printError(Node *N, const Twine &Msg, SourceMgr::DiagKind Kind=SourceMgr::DK_Error)
LLVM_ABI void skip()
Iterator abstraction for Documents over a Stream.
Definition YAMLParser.h:595
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ C
The default llvm calling convention, compatible with C.
Definition CallingConv.h:34
constexpr double e
Definition MathExtras.h:47
NodeAddr< FuncNode * > Func
Definition RDFGraph.h:393
LLVM_ABI bool isPrintable(int UCS)
Determines if a character is likely to be displayed correctly on the terminal.
Definition Unicode.cpp:27
LLVM_ABI bool dumpTokens(StringRef Input, raw_ostream &)
Dump all the tokens in this stream to OS.
LLVM_ABI std::optional< bool > parseBool(StringRef S)
Parse S as a bool according to https://yaml.org/type/bool.html.
LLVM_ABI bool scanTokens(StringRef Input)
Scans all tokens in input without outputting anything.
void skip(CollectionType &C)
Definition YAMLParser.h:405
LLVM_ABI std::string escape(StringRef Input, bool EscapePrintable=true)
Escape Input for a double quoted scalar; if EscapePrintable is true, all UTF8 sequences will be escap...
This is an optimization pass for GlobalISel generic memory operations.
std::error_code make_error_code(BitcodeError E)
testing::Matcher< const detail::ErrorHolder & > Failed()
Definition Error.h:198
void append_range(Container &C, Range &&R)
Wrapper function to append range R to container C.
Definition STLExtras.h:2116
std::string utohexstr(uint64_t X, bool LowerCase=false, unsigned Width=0)
bool operator==(const AddressRangeValuePair &LHS, const AddressRangeValuePair &RHS)
bool isAlpha(char C)
Checks if character C is a valid letter as classified by "C" locale.
LLVM_ABI void report_fatal_error(Error Err, bool gen_crash_diag=true)
Definition Error.cpp:167
bool isAlnum(char C)
Checks whether character C is either a decimal digit or an uppercase or lowercase letter as classifie...
BumpPtrAllocatorImpl BumpPtrAllocator
The standard BumpPtrAllocator which just uses the default template parameters.
Definition Allocator.h:383
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
Definition Casting.h:548
@ Other
Any other memory.
Definition ModRef.h:68
@ First
Helpers to iterate all locations in the MemoryEffectsBase class.
Definition ModRef.h:71
FunctionAddr VTableAddr Next
Definition InstrProf.h:141
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
Definition STLExtras.h:1877
AllocatorList< T, BumpPtrAllocator > BumpPtrList
#define N
Token - A single YAML token.
enum llvm::yaml::Token::TokenKind Kind
std::string Value
The value of a block scalar node.
StringRef Range
A string of length 0 or more whose begin() points to the logical location of the token in the input.