LCOV - llvm-toolchain.info - lib/Support/YAMLParser.cpp

LCOV - code coverage report

Current view:	top level - lib/Support - YAMLParser.cpp (source / functions)		Hit	Total	Coverage
Test:	llvm-toolchain.info	Lines:	1044	1229	84.9 %
Date:	2018-10-20 13:21:21	Functions:	83	97	85.6 %
Legend:	Lines: hit not hit

          Line data    Source code

       1             : //===- YAMLParser.cpp - Simple YAML parser --------------------------------===//
       2             : //
       3             : //                     The LLVM Compiler Infrastructure
       4             : //
       5             : // This file is distributed under the University of Illinois Open Source
       6             : // License. See LICENSE.TXT for details.
       7             : //
       8             : //===----------------------------------------------------------------------===//
       9             : //
      10             : //  This file implements a YAML parser.
      11             : //
      12             : //===----------------------------------------------------------------------===//
      13             : 
      14             : #include "llvm/Support/YAMLParser.h"
      15             : #include "llvm/ADT/AllocatorList.h"
      16             : #include "llvm/ADT/ArrayRef.h"
      17             : #include "llvm/ADT/None.h"
      18             : #include "llvm/ADT/STLExtras.h"
      19             : #include "llvm/ADT/SmallString.h"
      20             : #include "llvm/ADT/SmallVector.h"
      21             : #include "llvm/ADT/StringExtras.h"
      22             : #include "llvm/ADT/StringRef.h"
      23             : #include "llvm/ADT/Twine.h"
      24             : #include "llvm/Support/Compiler.h"
      25             : #include "llvm/Support/ErrorHandling.h"
      26             : #include "llvm/Support/MemoryBuffer.h"
      27             : #include "llvm/Support/SMLoc.h"
      28             : #include "llvm/Support/SourceMgr.h"
      29             : #include "llvm/Support/Unicode.h"
      30             : #include "llvm/Support/raw_ostream.h"
      31             : #include <algorithm>
      32             : #include <cassert>
      33             : #include <cstddef>
      34             : #include <cstdint>
      35             : #include <map>
      36             : #include <memory>
      37             : #include <string>
      38             : #include <system_error>
      39             : #include <utility>
      40             : 
      41             : using namespace llvm;
      42             : using namespace yaml;
      43             : 
      44             : enum UnicodeEncodingForm {
      45             :   UEF_UTF32_LE, ///< UTF-32 Little Endian
      46             :   UEF_UTF32_BE, ///< UTF-32 Big Endian
      47             :   UEF_UTF16_LE, ///< UTF-16 Little Endian
      48             :   UEF_UTF16_BE, ///< UTF-16 Big Endian
      49             :   UEF_UTF8,     ///< UTF-8 or ascii.
      50             :   UEF_Unknown   ///< Not a valid Unicode encoding.
      51             : };
      52             : 
      53             : /// EncodingInfo - Holds the encoding type and length of the byte order mark if
      54             : ///                it exists. Length is in {0, 2, 3, 4}.
      55             : using EncodingInfo = std::pair<UnicodeEncodingForm, unsigned>;
      56             : 
      57             : /// getUnicodeEncoding - Reads up to the first 4 bytes to determine the Unicode
      58             : ///                      encoding form of \a Input.
      59             : ///
      60             : /// @param Input A string of length 0 or more.
      61             : /// @returns An EncodingInfo indicating the Unicode encoding form of the input
      62             : ///          and how long the byte order mark is if one exists.
      63        4482 : static EncodingInfo getUnicodeEncoding(StringRef Input) {
      64        4482 :   if (Input.empty())
      65         456 :     return std::make_pair(UEF_Unknown, 0);
      66             : 
      67        4026 :   switch (uint8_t(Input[0])) {
      68             :   case 0x00:
      69           0 :     if (Input.size() >= 4) {
      70             :       if (  Input[1] == 0
      71           0 :          && uint8_t(Input[2]) == 0xFE
      72           0 :          && uint8_t(Input[3]) == 0xFF)
      73           0 :         return std::make_pair(UEF_UTF32_BE, 4);
      74           0 :       if (Input[1] == 0 && Input[2] == 0 && Input[3] != 0)
      75           0 :         return std::make_pair(UEF_UTF32_BE, 0);
      76             :     }
      77             : 
      78           0 :     if (Input.size() >= 2 && Input[1] != 0)
      79           0 :       return std::make_pair(UEF_UTF16_BE, 0);
      80           0 :     return std::make_pair(UEF_Unknown, 0);
      81             :   case 0xFF:
      82             :     if (  Input.size() >= 4
      83           0 :        && uint8_t(Input[1]) == 0xFE
      84           0 :        && Input[2] == 0
      85           0 :        && Input[3] == 0)
      86           0 :       return std::make_pair(UEF_UTF32_LE, 4);
      87             : 
      88           0 :     if (Input.size() >= 2 && uint8_t(Input[1]) == 0xFE)
      89           0 :       return std::make_pair(UEF_UTF16_LE, 2);
      90           0 :     return std::make_pair(UEF_Unknown, 0);
      91             :   case 0xFE:
      92           0 :     if (Input.size() >= 2 && uint8_t(Input[1]) == 0xFF)
      93           0 :       return std::make_pair(UEF_UTF16_BE, 2);
      94           0 :     return std::make_pair(UEF_Unknown, 0);
      95             :   case 0xEF:
      96             :     if (  Input.size() >= 3
      97           3 :        && uint8_t(Input[1]) == 0xBB
      98           6 :        && uint8_t(Input[2]) == 0xBF)
      99           3 :       return std::make_pair(UEF_UTF8, 3);
     100           0 :     return std::make_pair(UEF_Unknown, 0);
     101             :   }
     102             : 
     103             :   // It could still be utf-32 or utf-16.
     104        4023 :   if (Input.size() >= 4 && Input[1] == 0 && Input[2] == 0 && Input[3] == 0)
     105           0 :     return std::make_pair(UEF_UTF32_LE, 0);
     106             : 
     107        4023 :   if (Input.size() >= 2 && Input[1] == 0)
     108           0 :     return std::make_pair(UEF_UTF16_LE, 0);
     109             : 
     110        4023 :   return std::make_pair(UEF_UTF8, 0);
     111             : }
     112             : 
     113             : /// Pin the vtables to this file.
     114           0 : void Node::anchor() {}
     115           0 : void NullNode::anchor() {}
     116           0 : void ScalarNode::anchor() {}
     117           0 : void BlockScalarNode::anchor() {}
     118           0 : void KeyValueNode::anchor() {}
     119           0 : void MappingNode::anchor() {}
     120           0 : void SequenceNode::anchor() {}
     121           0 : void AliasNode::anchor() {}
     122             : 
     123             : namespace llvm {
     124             : namespace yaml {
     125             : 
     126             : /// Token - A single YAML token.
     127     5316910 : struct Token {
     128             :   enum TokenKind {
     129             :     TK_Error, // Uninitialized token.
     130             :     TK_StreamStart,
     131             :     TK_StreamEnd,
     132             :     TK_VersionDirective,
     133             :     TK_TagDirective,
     134             :     TK_DocumentStart,
     135             :     TK_DocumentEnd,
     136             :     TK_BlockEntry,
     137             :     TK_BlockEnd,
     138             :     TK_BlockSequenceStart,
     139             :     TK_BlockMappingStart,
     140             :     TK_FlowEntry,
     141             :     TK_FlowSequenceStart,
     142             :     TK_FlowSequenceEnd,
     143             :     TK_FlowMappingStart,
     144             :     TK_FlowMappingEnd,
     145             :     TK_Key,
     146             :     TK_Value,
     147             :     TK_Scalar,
     148             :     TK_BlockScalar,
     149             :     TK_Alias,
     150             :     TK_Anchor,
     151             :     TK_Tag
     152             :   } Kind = TK_Error;
     153             : 
     154             :   /// A string of length 0 or more whose begin() points to the logical location
     155             :   /// of the token in the input.
     156             :   StringRef Range;
     157             : 
     158             :   /// The value of a block scalar node.
     159             :   std::string Value;
     160             : 
     161     1394409 :   Token() = default;
     162             : };
     163             : 
     164             : } // end namespace yaml
     165             : } // end namespace llvm
     166             : 
     167             : using TokenQueueT = BumpPtrList<Token>;
     168             : 
     169             : namespace {
     170             : 
     171             : /// This struct is used to track simple keys.
     172             : ///
     173             : /// Simple keys are handled by creating an entry in SimpleKeys for each Token
     174             : /// which could legally be the start of a simple key. When peekNext is called,
     175             : /// if the Token To be returned is referenced by a SimpleKey, we continue
     176             : /// tokenizing until that potential simple key has either been found to not be
     177             : /// a simple key (we moved on to the next line or went further than 1024 chars).
     178             : /// Or when we run into a Value, and then insert a Key token (and possibly
     179             : /// others) before the SimpleKey's Tok.
     180             : struct SimpleKey {
     181             :   TokenQueueT::iterator Tok;
     182             :   unsigned Column;
     183             :   unsigned Line;
     184             :   unsigned FlowLevel;
     185             :   bool IsRequired;
     186             : 
     187             :   bool operator ==(const SimpleKey &Other) {
     188             :     return Tok == Other.Tok;
     189             :   }
     190             : };
     191             : 
     192             : } // end anonymous namespace
     193             : 
     194             : /// The Unicode scalar value of a UTF-8 minimal well-formed code unit
     195             : ///        subsequence and the subsequence's length in code units (uint8_t).
     196             : ///        A length of 0 represents an error.
     197             : using UTF8Decoded = std::pair<uint32_t, unsigned>;
     198             : 
     199          84 : static UTF8Decoded decodeUTF8(StringRef Range) {
     200             :   StringRef::iterator Position= Range.begin();
     201          84 :   StringRef::iterator End = Range.end();
     202             :   // 1 byte: [0x00, 0x7f]
     203             :   // Bit pattern: 0xxxxxxx
     204          84 :   if ((*Position & 0x80) == 0) {
     205           0 :      return std::make_pair(*Position, 1);
     206             :   }
     207             :   // 2 bytes: [0x80, 0x7ff]
     208             :   // Bit pattern: 110xxxxx 10xxxxxx
     209          84 :   if (Position + 1 != End &&
     210          84 :       ((*Position & 0xE0) == 0xC0) &&
     211          57 :       ((*(Position + 1) & 0xC0) == 0x80)) {
     212         114 :     uint32_t codepoint = ((*Position & 0x1F) << 6) |
     213          57 :                           (*(Position + 1) & 0x3F);
     214          57 :     if (codepoint >= 0x80)
     215          57 :       return std::make_pair(codepoint, 2);
     216             :   }
     217             :   // 3 bytes: [0x8000, 0xffff]
     218             :   // Bit pattern: 1110xxxx 10xxxxxx 10xxxxxx
     219          27 :   if (Position + 2 != End &&
     220          27 :       ((*Position & 0xF0) == 0xE0) &&
     221          27 :       ((*(Position + 1) & 0xC0) == 0x80) &&
     222          27 :       ((*(Position + 2) & 0xC0) == 0x80)) {
     223          54 :     uint32_t codepoint = ((*Position & 0x0F) << 12) |
     224          54 :                          ((*(Position + 1) & 0x3F) << 6) |
     225          27 :                           (*(Position + 2) & 0x3F);
     226             :     // Codepoints between 0xD800 and 0xDFFF are invalid, as
     227             :     // they are high / low surrogate halves used by UTF-16.
     228          27 :     if (codepoint >= 0x800 &&
     229          27 :         (codepoint < 0xD800 || codepoint > 0xDFFF))
     230          27 :       return std::make_pair(codepoint, 3);
     231             :   }
     232             :   // 4 bytes: [0x10000, 0x10FFFF]
     233             :   // Bit pattern: 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx
     234           0 :   if (Position + 3 != End &&
     235           0 :       ((*Position & 0xF8) == 0xF0) &&
     236           0 :       ((*(Position + 1) & 0xC0) == 0x80) &&
     237           0 :       ((*(Position + 2) & 0xC0) == 0x80) &&
     238           0 :       ((*(Position + 3) & 0xC0) == 0x80)) {
     239           0 :     uint32_t codepoint = ((*Position & 0x07) << 18) |
     240           0 :                          ((*(Position + 1) & 0x3F) << 12) |
     241           0 :                          ((*(Position + 2) & 0x3F) << 6) |
     242           0 :                           (*(Position + 3) & 0x3F);
     243           0 :     if (codepoint >= 0x10000 && codepoint <= 0x10FFFF)
     244           0 :       return std::make_pair(codepoint, 4);
     245             :   }
     246           0 :   return std::make_pair(0, 0);
     247             : }
     248             : 
     249             : namespace llvm {
     250             : namespace yaml {
     251             : 
     252             : /// Scans YAML tokens from a MemoryBuffer.
     253             : class Scanner {
     254             : public:
     255             :   Scanner(StringRef Input, SourceMgr &SM, bool ShowColors = true,
     256             :           std::error_code *EC = nullptr);
     257             :   Scanner(MemoryBufferRef Buffer, SourceMgr &SM_, bool ShowColors = true,
     258             :           std::error_code *EC = nullptr);
     259             : 
     260             :   /// Parse the next token and return it without popping it.
     261             :   Token &peekNext();
     262             : 
     263             :   /// Parse the next token and pop it from the queue.
     264             :   Token getNext();
     265             : 
     266           0 :   void printError(SMLoc Loc, SourceMgr::DiagKind Kind, const Twine &Message,
     267             :                   ArrayRef<SMRange> Ranges = None) {
     268         181 :     SM.PrintMessage(Loc, Kind, Message, Ranges, /* FixIts= */ None, ShowColors);
     269           0 :   }
     270             : 
     271          45 :   void setError(const Twine &Message, StringRef::iterator Position) {
     272          45 :     if (Current >= End)
     273          31 :       Current = End - 1;
     274             : 
     275             :     // propagate the error if possible
     276          45 :     if (EC)
     277           3 :       *EC = make_error_code(std::errc::invalid_argument);
     278             : 
     279             :     // Don't print out more errors after the first one we encounter. The rest
     280             :     // are just the result of the first, and have no meaning.
     281          45 :     if (!Failed)
     282          45 :       printError(SMLoc::getFromPointer(Current), SourceMgr::DK_Error, Message);
     283          45 :     Failed = true;
     284          45 :   }
     285             : 
     286             :   void setError(const Twine &Message) {
     287           1 :     setError(Message, Current);
     288             :   }
     289             : 
     290             :   /// Returns true if an error occurred while parsing.
     291           0 :   bool failed() {
     292           0 :     return Failed;
     293             :   }
     294             : 
     295             : private:
     296             :   void init(MemoryBufferRef Buffer);
     297             : 
     298           0 :   StringRef currentInput() {
     299        4482 :     return StringRef(Current, End - Current);
     300             :   }
     301             : 
     302             :   /// Decode a UTF-8 minimal well-formed code unit subsequence starting
     303             :   ///        at \a Position.
     304             :   ///
     305             :   /// If the UTF-8 code units starting at Position do not form a well-formed
     306             :   /// code unit subsequence, then the Unicode scalar value is 0, and the length
     307             :   /// is 0.
     308           0 :   UTF8Decoded decodeUTF8(StringRef::iterator Position) {
     309          64 :     return ::decodeUTF8(StringRef(Position, End - Position));
     310             :   }
     311             : 
     312             :   // The following functions are based on the gramar rules in the YAML spec. The
     313             :   // style of the function names it meant to closely match how they are written
     314             :   // in the spec. The number within the [] is the number of the grammar rule in
     315             :   // the spec.
     316             :   //
     317             :   // See 4.2 [Production Naming Conventions] for the meaning of the prefixes.
     318             :   //
     319             :   // c-
     320             :   //   A production starting and ending with a special character.
     321             :   // b-
     322             :   //   A production matching a single line break.
     323             :   // nb-
     324             :   //   A production starting and ending with a non-break character.
     325             :   // s-
     326             :   //   A production starting and ending with a white space character.
     327             :   // ns-
     328             :   //   A production starting and ending with a non-space character.
     329             :   // l-
     330             :   //   A production matching complete line(s).
     331             : 
     332             :   /// Skip a single nb-char[27] starting at Position.
     333             :   ///
     334             :   /// A nb-char is 0x9 | [0x20-0x7E] | 0x85 | [0xA0-0xD7FF] | [0xE000-0xFEFE]
     335             :   ///                  | [0xFF00-0xFFFD] | [0x10000-0x10FFFF]
     336             :   ///
     337             :   /// @returns The code unit after the nb-char, or Position if it's not an
     338             :   ///          nb-char.
     339             :   StringRef::iterator skip_nb_char(StringRef::iterator Position);
     340             : 
     341             :   /// Skip a single b-break[28] starting at Position.
     342             :   ///
     343             :   /// A b-break is 0xD 0xA | 0xD | 0xA
     344             :   ///
     345             :   /// @returns The code unit after the b-break, or Position if it's not a
     346             :   ///          b-break.
     347             :   StringRef::iterator skip_b_break(StringRef::iterator Position);
     348             : 
     349             :   /// Skip a single s-space[31] starting at Position.
     350             :   ///
     351             :   /// An s-space is 0x20
     352             :   ///
     353             :   /// @returns The code unit after the s-space, or Position if it's not a
     354             :   ///          s-space.
     355             :   StringRef::iterator skip_s_space(StringRef::iterator Position);
     356             : 
     357             :   /// Skip a single s-white[33] starting at Position.
     358             :   ///
     359             :   /// A s-white is 0x20 | 0x9
     360             :   ///
     361             :   /// @returns The code unit after the s-white, or Position if it's not a
     362             :   ///          s-white.
     363             :   StringRef::iterator skip_s_white(StringRef::iterator Position);
     364             : 
     365             :   /// Skip a single ns-char[34] starting at Position.
     366             :   ///
     367             :   /// A ns-char is nb-char - s-white
     368             :   ///
     369             :   /// @returns The code unit after the ns-char, or Position if it's not a
     370             :   ///          ns-char.
     371             :   StringRef::iterator skip_ns_char(StringRef::iterator Position);
     372             : 
     373             :   using SkipWhileFunc = StringRef::iterator (Scanner::*)(StringRef::iterator);
     374             : 
     375             :   /// Skip minimal well-formed code unit subsequences until Func
     376             :   ///        returns its input.
     377             :   ///
     378             :   /// @returns The code unit after the last minimal well-formed code unit
     379             :   ///          subsequence that Func accepted.
     380             :   StringRef::iterator skip_while( SkipWhileFunc Func
     381             :                                 , StringRef::iterator Position);
     382             : 
     383             :   /// Skip minimal well-formed code unit subsequences until Func returns its
     384             :   /// input.
     385             :   void advanceWhile(SkipWhileFunc Func);
     386             : 
     387             :   /// Scan ns-uri-char[39]s starting at Cur.
     388             :   ///
     389             :   /// This updates Cur and Column while scanning.
     390             :   void scan_ns_uri_char();
     391             : 
     392             :   /// Consume a minimal well-formed code unit subsequence starting at
     393             :   ///        \a Cur. Return false if it is not the same Unicode scalar value as
     394             :   ///        \a Expected. This updates \a Column.
     395             :   bool consume(uint32_t Expected);
     396             : 
     397             :   /// Skip \a Distance UTF-8 code units. Updates \a Cur and \a Column.
     398             :   void skip(uint32_t Distance);
     399             : 
     400             :   /// Return true if the minimal well-formed code unit subsequence at
     401             :   ///        Pos is whitespace or a new line
     402             :   bool isBlankOrBreak(StringRef::iterator Position);
     403             : 
     404             :   /// Consume a single b-break[28] if it's present at the current position.
     405             :   ///
     406             :   /// Return false if the code unit at the current position isn't a line break.
     407             :   bool consumeLineBreakIfPresent();
     408             : 
     409             :   /// If IsSimpleKeyAllowed, create and push_back a new SimpleKey.
     410             :   void saveSimpleKeyCandidate( TokenQueueT::iterator Tok
     411             :                              , unsigned AtColumn
     412             :                              , bool IsRequired);
     413             : 
     414             :   /// Remove simple keys that can no longer be valid simple keys.
     415             :   ///
     416             :   /// Invalid simple keys are not on the current line or are further than 1024
     417             :   /// columns back.
     418             :   void removeStaleSimpleKeyCandidates();
     419             : 
     420             :   /// Remove all simple keys on FlowLevel \a Level.
     421             :   void removeSimpleKeyCandidatesOnFlowLevel(unsigned Level);
     422             : 
     423             :   /// Unroll indentation in \a Indents back to \a Col. Creates BlockEnd
     424             :   ///        tokens if needed.
     425             :   bool unrollIndent(int ToColumn);
     426             : 
     427             :   /// Increase indent to \a Col. Creates \a Kind token at \a InsertPoint
     428             :   ///        if needed.
     429             :   bool rollIndent( int ToColumn
     430             :                  , Token::TokenKind Kind
     431             :                  , TokenQueueT::iterator InsertPoint);
     432             : 
     433             :   /// Skip a single-line comment when the comment starts at the current
     434             :   /// position of the scanner.
     435             :   void skipComment();
     436             : 
     437             :   /// Skip whitespace and comments until the start of the next token.
     438             :   void scanToNextToken();
     439             : 
     440             :   /// Must be the first token generated.
     441             :   bool scanStreamStart();
     442             : 
     443             :   /// Generate tokens needed to close out the stream.
     444             :   bool scanStreamEnd();
     445             : 
     446             :   /// Scan a %BLAH directive.
     447             :   bool scanDirective();
     448             : 
     449             :   /// Scan a ... or ---.
     450             :   bool scanDocumentIndicator(bool IsStart);
     451             : 
     452             :   /// Scan a [ or { and generate the proper flow collection start token.
     453             :   bool scanFlowCollectionStart(bool IsSequence);
     454             : 
     455             :   /// Scan a ] or } and generate the proper flow collection end token.
     456             :   bool scanFlowCollectionEnd(bool IsSequence);
     457             : 
     458             :   /// Scan the , that separates entries in a flow collection.
     459             :   bool scanFlowEntry();
     460             : 
     461             :   /// Scan the - that starts block sequence entries.
     462             :   bool scanBlockEntry();
     463             : 
     464             :   /// Scan an explicit ? indicating a key.
     465             :   bool scanKey();
     466             : 
     467             :   /// Scan an explicit : indicating a value.
     468             :   bool scanValue();
     469             : 
     470             :   /// Scan a quoted scalar.
     471             :   bool scanFlowScalar(bool IsDoubleQuoted);
     472             : 
     473             :   /// Scan an unquoted scalar.
     474             :   bool scanPlainScalar();
     475             : 
     476             :   /// Scan an Alias or Anchor starting with * or &.
     477             :   bool scanAliasOrAnchor(bool IsAlias);
     478             : 
     479             :   /// Scan a block scalar starting with | or >.
     480             :   bool scanBlockScalar(bool IsLiteral);
     481             : 
     482             :   /// Scan a chomping indicator in a block scalar header.
     483             :   char scanBlockChompingIndicator();
     484             : 
     485             :   /// Scan an indentation indicator in a block scalar header.
     486             :   unsigned scanBlockIndentationIndicator();
     487             : 
     488             :   /// Scan a block scalar header.
     489             :   ///
     490             :   /// Return false if an error occurred.
     491             :   bool scanBlockScalarHeader(char &ChompingIndicator, unsigned &IndentIndicator,
     492             :                              bool &IsDone);
     493             : 
     494             :   /// Look for the indentation level of a block scalar.
     495             :   ///
     496             :   /// Return false if an error occurred.
     497             :   bool findBlockScalarIndent(unsigned &BlockIndent, unsigned BlockExitIndent,
     498             :                              unsigned &LineBreaks, bool &IsDone);
     499             : 
     500             :   /// Scan the indentation of a text line in a block scalar.
     501             :   ///
     502             :   /// Return false if an error occurred.
     503             :   bool scanBlockScalarIndent(unsigned BlockIndent, unsigned BlockExitIndent,
     504             :                              bool &IsDone);
     505             : 
     506             :   /// Scan a tag of the form !stuff.
     507             :   bool scanTag();
     508             : 
     509             :   /// Dispatch to the next scanning function based on \a *Cur.
     510             :   bool fetchMoreTokens();
     511             : 
     512             :   /// The SourceMgr used for diagnostics and buffer management.
     513             :   SourceMgr &SM;
     514             : 
     515             :   /// The original input.
     516             :   MemoryBufferRef InputBuffer;
     517             : 
     518             :   /// The current position of the scanner.
     519             :   StringRef::iterator Current;
     520             : 
     521             :   /// The end of the input (one past the last character).
     522             :   StringRef::iterator End;
     523             : 
     524             :   /// Current YAML indentation level in spaces.
     525             :   int Indent;
     526             : 
     527             :   /// Current column number in Unicode code points.
     528             :   unsigned Column;
     529             : 
     530             :   /// Current line number.
     531             :   unsigned Line;
     532             : 
     533             :   /// How deep we are in flow style containers. 0 Means at block level.
     534             :   unsigned FlowLevel;
     535             : 
     536             :   /// Are we at the start of the stream?
     537             :   bool IsStartOfStream;
     538             : 
     539             :   /// Can the next token be the start of a simple key?
     540             :   bool IsSimpleKeyAllowed;
     541             : 
     542             :   /// True if an error has occurred.
     543             :   bool Failed;
     544             : 
     545             :   /// Should colors be used when printing out the diagnostic messages?
     546             :   bool ShowColors;
     547             : 
     548             :   /// Queue of tokens. This is required to queue up tokens while looking
     549             :   ///        for the end of a simple key. And for cases where a single character
     550             :   ///        can produce multiple tokens (e.g. BlockEnd).
     551             :   TokenQueueT TokenQueue;
     552             : 
     553             :   /// Indentation levels.
     554             :   SmallVector<int, 4> Indents;
     555             : 
     556             :   /// Potential simple keys.
     557             :   SmallVector<SimpleKey, 4> SimpleKeys;
     558             : 
     559             :   std::error_code *EC;
     560             : };
     561             : 
     562             : } // end namespace yaml
     563             : } // end namespace llvm
     564             : 
     565             : /// encodeUTF8 - Encode \a UnicodeScalarValue in UTF-8 and append it to result.
     566          10 : static void encodeUTF8( uint32_t UnicodeScalarValue
     567             :                       , SmallVectorImpl<char> &Result) {
     568          10 :   if (UnicodeScalarValue <= 0x7F) {
     569           5 :     Result.push_back(UnicodeScalarValue & 0x7F);
     570           5 :   } else if (UnicodeScalarValue <= 0x7FF) {
     571           2 :     uint8_t FirstByte = 0xC0 | ((UnicodeScalarValue & 0x7C0) >> 6);
     572           2 :     uint8_t SecondByte = 0x80 | (UnicodeScalarValue & 0x3F);
     573           2 :     Result.push_back(FirstByte);
     574           2 :     Result.push_back(SecondByte);
     575           3 :   } else if (UnicodeScalarValue <= 0xFFFF) {
     576           3 :     uint8_t FirstByte = 0xE0 | ((UnicodeScalarValue & 0xF000) >> 12);
     577           3 :     uint8_t SecondByte = 0x80 | ((UnicodeScalarValue & 0xFC0) >> 6);
     578           3 :     uint8_t ThirdByte = 0x80 | (UnicodeScalarValue & 0x3F);
     579           3 :     Result.push_back(FirstByte);
     580           3 :     Result.push_back(SecondByte);
     581           3 :     Result.push_back(ThirdByte);
     582           0 :   } else if (UnicodeScalarValue <= 0x10FFFF) {
     583           0 :     uint8_t FirstByte = 0xF0 | ((UnicodeScalarValue & 0x1F0000) >> 18);
     584           0 :     uint8_t SecondByte = 0x80 | ((UnicodeScalarValue & 0x3F000) >> 12);
     585           0 :     uint8_t ThirdByte = 0x80 | ((UnicodeScalarValue & 0xFC0) >> 6);
     586           0 :     uint8_t FourthByte = 0x80 | (UnicodeScalarValue & 0x3F);
     587           0 :     Result.push_back(FirstByte);
     588           0 :     Result.push_back(SecondByte);
     589           0 :     Result.push_back(ThirdByte);
     590           0 :     Result.push_back(FourthByte);
     591             :   }
     592          10 : }
     593             : 
     594           0 : bool yaml::dumpTokens(StringRef Input, raw_ostream &OS) {
     595             :   SourceMgr SM;
     596           0 :   Scanner scanner(Input, SM);
     597             :   while (true) {
     598           0 :     Token T = scanner.getNext();
     599           0 :     switch (T.Kind) {
     600           0 :     case Token::TK_StreamStart:
     601           0 :       OS << "Stream-Start: ";
     602           0 :       break;
     603           0 :     case Token::TK_StreamEnd:
     604           0 :       OS << "Stream-End: ";
     605           0 :       break;
     606           0 :     case Token::TK_VersionDirective:
     607           0 :       OS << "Version-Directive: ";
     608           0 :       break;
     609           0 :     case Token::TK_TagDirective:
     610           0 :       OS << "Tag-Directive: ";
     611           0 :       break;
     612           0 :     case Token::TK_DocumentStart:
     613           0 :       OS << "Document-Start: ";
     614           0 :       break;
     615           0 :     case Token::TK_DocumentEnd:
     616           0 :       OS << "Document-End: ";
     617           0 :       break;
     618           0 :     case Token::TK_BlockEntry:
     619           0 :       OS << "Block-Entry: ";
     620           0 :       break;
     621           0 :     case Token::TK_BlockEnd:
     622           0 :       OS << "Block-End: ";
     623           0 :       break;
     624           0 :     case Token::TK_BlockSequenceStart:
     625           0 :       OS << "Block-Sequence-Start: ";
     626           0 :       break;
     627           0 :     case Token::TK_BlockMappingStart:
     628           0 :       OS << "Block-Mapping-Start: ";
     629           0 :       break;
     630           0 :     case Token::TK_FlowEntry:
     631           0 :       OS << "Flow-Entry: ";
     632           0 :       break;
     633           0 :     case Token::TK_FlowSequenceStart:
     634           0 :       OS << "Flow-Sequence-Start: ";
     635           0 :       break;
     636           0 :     case Token::TK_FlowSequenceEnd:
     637           0 :       OS << "Flow-Sequence-End: ";
     638           0 :       break;
     639           0 :     case Token::TK_FlowMappingStart:
     640           0 :       OS << "Flow-Mapping-Start: ";
     641           0 :       break;
     642           0 :     case Token::TK_FlowMappingEnd:
     643           0 :       OS << "Flow-Mapping-End: ";
     644           0 :       break;
     645           0 :     case Token::TK_Key:
     646           0 :       OS << "Key: ";
     647           0 :       break;
     648           0 :     case Token::TK_Value:
     649           0 :       OS << "Value: ";
     650           0 :       break;
     651           0 :     case Token::TK_Scalar:
     652           0 :       OS << "Scalar: ";
     653           0 :       break;
     654           0 :     case Token::TK_BlockScalar:
     655           0 :       OS << "Block Scalar: ";
     656           0 :       break;
     657           0 :     case Token::TK_Alias:
     658           0 :       OS << "Alias: ";
     659           0 :       break;
     660           0 :     case Token::TK_Anchor:
     661           0 :       OS << "Anchor: ";
     662           0 :       break;
     663           0 :     case Token::TK_Tag:
     664           0 :       OS << "Tag: ";
     665           0 :       break;
     666             :     case Token::TK_Error:
     667             :       break;
     668             :     }
     669           0 :     OS << T.Range << "\n";
     670           0 :     if (T.Kind == Token::TK_StreamEnd)
     671             :       break;
     672           0 :     else if (T.Kind == Token::TK_Error)
     673             :       return false;
     674             :   }
     675           0 :   return true;
     676             : }
     677             : 
     678           0 : bool yaml::scanTokens(StringRef Input) {
     679             :   SourceMgr SM;
     680           0 :   Scanner scanner(Input, SM);
     681             :   while (true) {
     682           0 :     Token T = scanner.getNext();
     683           0 :     if (T.Kind == Token::TK_StreamEnd)
     684             :       break;
     685           0 :     else if (T.Kind == Token::TK_Error)
     686             :       return false;
     687             :   }
     688           0 :   return true;
     689             : }
     690             : 
     691        1708 : std::string yaml::escape(StringRef Input, bool EscapePrintable) {
     692             :   std::string EscapedInput;
     693       51924 :   for (StringRef::iterator i = Input.begin(), e = Input.end(); i != e; ++i) {
     694       50216 :     if (*i == '\\')
     695             :       EscapedInput += "\\\\";
     696       50209 :     else if (*i == '"')
     697             :       EscapedInput += "\\\"";
     698       50173 :     else if (*i == 0)
     699             :       EscapedInput += "\\0";
     700       50172 :     else if (*i == 0x07)
     701             :       EscapedInput += "\\a";
     702       50171 :     else if (*i == 0x08)
     703             :       EscapedInput += "\\b";
     704       50169 :     else if (*i == 0x09)
     705             :       EscapedInput += "\\t";
     706       50139 :     else if (*i == 0x0A)
     707             :       EscapedInput += "\\n";
     708       49930 :     else if (*i == 0x0B)
     709             :       EscapedInput += "\\v";
     710       49929 :     else if (*i == 0x0C)
     711             :       EscapedInput += "\\f";
     712       49928 :     else if (*i == 0x0D)
     713             :       EscapedInput += "\\r";
     714       49926 :     else if (*i == 0x1B)
     715             :       EscapedInput += "\\e";
     716       49925 :     else if ((unsigned char)*i < 0x20) { // Control characters not handled above.
     717           6 :       std::string HexStr = utohexstr(*i);
     718          12 :       EscapedInput += "\\x" + std::string(2 - HexStr.size(), '0') + HexStr;
     719       49919 :     } else if (*i & 0x80) { // UTF-8 multiple code unit subsequence.
     720             :       UTF8Decoded UnicodeScalarValue
     721         104 :         = decodeUTF8(StringRef(i, Input.end() - i));
     722          52 :       if (UnicodeScalarValue.second == 0) {
     723             :         // Found invalid char.
     724             :         SmallString<4> Val;
     725           0 :         encodeUTF8(0xFFFD, Val);
     726             :         EscapedInput.insert(EscapedInput.end(), Val.begin(), Val.end());
     727             :         // FIXME: Error reporting.
     728             :         return EscapedInput;
     729             :       }
     730          52 :       if (UnicodeScalarValue.first == 0x85)
     731             :         EscapedInput += "\\N";
     732          33 :       else if (UnicodeScalarValue.first == 0xA0)
     733             :         EscapedInput += "\\_";
     734          32 :       else if (UnicodeScalarValue.first == 0x2028)
     735             :         EscapedInput += "\\L";
     736          25 :       else if (UnicodeScalarValue.first == 0x2029)
     737             :         EscapedInput += "\\P";
     738          38 :       else if (!EscapePrintable &&
     739          17 :                sys::unicode::isPrintable(UnicodeScalarValue.first))
     740          16 :         EscapedInput += StringRef(i, UnicodeScalarValue.second);
     741             :       else {
     742           5 :         std::string HexStr = utohexstr(UnicodeScalarValue.first);
     743           5 :         if (HexStr.size() <= 2)
     744           0 :           EscapedInput += "\\x" + std::string(2 - HexStr.size(), '0') + HexStr;
     745           5 :         else if (HexStr.size() <= 4)
     746          10 :           EscapedInput += "\\u" + std::string(4 - HexStr.size(), '0') + HexStr;
     747           0 :         else if (HexStr.size() <= 8)
     748           0 :           EscapedInput += "\\U" + std::string(8 - HexStr.size(), '0') + HexStr;
     749             :       }
     750          52 :       i += UnicodeScalarValue.second - 1;
     751             :     } else
     752       49867 :       EscapedInput.push_back(*i);
     753             :   }
     754             :   return EscapedInput;
     755             : }
     756             : 
     757        4373 : Scanner::Scanner(StringRef Input, SourceMgr &sm, bool ShowColors,
     758        4373 :                  std::error_code *EC)
     759       13119 :     : SM(sm), ShowColors(ShowColors), EC(EC) {
     760        4373 :   init(MemoryBufferRef(Input, "YAML"));
     761        4373 : }
     762             : 
     763         109 : Scanner::Scanner(MemoryBufferRef Buffer, SourceMgr &SM_, bool ShowColors,
     764         109 :                  std::error_code *EC)
     765         218 :     : SM(SM_), ShowColors(ShowColors), EC(EC) {
     766         109 :   init(Buffer);
     767         109 : }
     768             : 
     769        4482 : void Scanner::init(MemoryBufferRef Buffer) {
     770        4482 :   InputBuffer = Buffer;
     771        4482 :   Current = InputBuffer.getBufferStart();
     772        4482 :   End = InputBuffer.getBufferEnd();
     773        4482 :   Indent = -1;
     774        4482 :   Column = 0;
     775        4482 :   Line = 0;
     776        4482 :   FlowLevel = 0;
     777        4482 :   IsStartOfStream = true;
     778        4482 :   IsSimpleKeyAllowed = true;
     779        4482 :   Failed = false;
     780             :   std::unique_ptr<MemoryBuffer> InputBufferOwner =
     781        4482 :       MemoryBuffer::getMemBuffer(Buffer);
     782       13446 :   SM.AddNewSourceBuffer(std::move(InputBufferOwner), SMLoc());
     783        4482 : }
     784             : 
     785     5206778 : Token &Scanner::peekNext() {
     786             :   // If the current token is a possible simple key, keep parsing until we
     787             :   // can confirm.
     788             :   bool NeedMore = false;
     789             :   while (true) {
     790     5719640 :     if (TokenQueue.empty() || NeedMore) {
     791     1244458 :       if (!fetchMoreTokens()) {
     792             :         TokenQueue.clear();
     793          48 :         TokenQueue.push_back(Token());
     794          24 :         return TokenQueue.front();
     795             :       }
     796             :     }
     797             :     assert(!TokenQueue.empty() &&
     798             :             "fetchMoreTokens lied about getting tokens!");
     799             : 
     800     5719616 :     removeStaleSimpleKeyCandidates();
     801             :     SimpleKey SK;
     802     5719616 :     SK.Tok = TokenQueue.begin();
     803     5719616 :     if (!is_contained(SimpleKeys, SK))
     804             :       break;
     805             :     else
     806             :       NeedMore = true;
     807      512862 :   }
     808     5206754 :   return TokenQueue.front();
     809             : }
     810             : 
     811     1618831 : Token Scanner::getNext() {
     812     1618831 :   Token Ret = peekNext();
     813             :   // TokenQueue can be empty if there was an error getting the next token.
     814     1618831 :   if (!TokenQueue.empty())
     815             :     TokenQueue.pop_front();
     816             : 
     817             :   // There cannot be any referenced Token's if the TokenQueue is empty. So do a
     818             :   // quick deallocation of them all.
     819     1618831 :   if (TokenQueue.empty())
     820             :     TokenQueue.resetAlloc();
     821             : 
     822     1618831 :   return Ret;
     823             : }
     824             : 
     825     8985952 : StringRef::iterator Scanner::skip_nb_char(StringRef::iterator Position) {
     826     8985952 :   if (Position == End)
     827             :     return Position;
     828             :   // Check 7 bit c-printable - b-char.
     829    17971154 :   if (   *Position == 0x09
     830     8985577 :       || (*Position >= 0x20 && *Position <= 0x7E))
     831     8819176 :     return Position + 1;
     832             : 
     833             :   // Check for valid UTF-8.
     834      166401 :   if (uint8_t(*Position) & 0x80) {
     835             :     UTF8Decoded u8d = decodeUTF8(Position);
     836          32 :     if (   u8d.second != 0
     837          32 :         && u8d.first != 0xFEFF
     838          31 :         && ( u8d.first == 0x85
     839          31 :           || ( u8d.first >= 0xA0
     840             :             && u8d.first <= 0xD7FF)
     841           0 :           || ( u8d.first >= 0xE000
     842           0 :             && u8d.first <= 0xFFFD)
     843           0 :           || ( u8d.first >= 0x10000
     844           0 :             && u8d.first <= 0x10FFFF)))
     845          31 :       return Position + u8d.second;
     846             :   }
     847             :   return Position;
     848             : }
     849             : 
     850     1863364 : StringRef::iterator Scanner::skip_b_break(StringRef::iterator Position) {
     851     1863364 :   if (Position == End)
     852             :     return Position;
     853     1859874 :   if (*Position == 0x0D) {
     854         717 :     if (Position + 1 != End && *(Position + 1) == 0x0A)
     855         698 :       return Position + 2;
     856             :     return Position + 1;
     857             :   }
     858             : 
     859     1859157 :   if (*Position == 0x0A)
     860      622666 :     return Position + 1;
     861             :   return Position;
     862             : }
     863             : 
     864      228184 : StringRef::iterator Scanner::skip_s_space(StringRef::iterator Position) {
     865      228184 :   if (Position == End)
     866             :     return Position;
     867      228125 :   if (*Position == ' ')
     868      204758 :     return Position + 1;
     869             :   return Position;
     870             : }
     871             : 
     872      552260 : StringRef::iterator Scanner::skip_s_white(StringRef::iterator Position) {
     873      552260 :   if (Position == End)
     874             :     return Position;
     875      552259 :   if (*Position == ' ' || *Position == '\t')
     876      451092 :     return Position + 1;
     877             :   return Position;
     878             : }
     879             : 
     880       13151 : StringRef::iterator Scanner::skip_ns_char(StringRef::iterator Position) {
     881       13151 :   if (Position == End)
     882             :     return Position;
     883       13151 :   if (*Position == ' ' || *Position == '\t')
     884             :     return Position;
     885       13047 :   return skip_nb_char(Position);
     886             : }
     887             : 
     888      128056 : StringRef::iterator Scanner::skip_while( SkipWhileFunc Func
     889             :                                        , StringRef::iterator Position) {
     890             :   while (true) {
     891     4183176 :     StringRef::iterator i = (this->*Func)(Position);
     892     4183176 :     if (i == Position)
     893             :       break;
     894             :     Position = i;
     895             :   }
     896      128056 :   return Position;
     897             : }
     898             : 
     899      120315 : void Scanner::advanceWhile(SkipWhileFunc Func) {
     900      120315 :   auto Final = skip_while(Func, Current);
     901      120315 :   Column += Final - Current;
     902      120315 :   Current = Final;
     903      120315 : }
     904             : 
     905             : static bool is_ns_hex_digit(const char C) {
     906           0 :   return    (C >= '0' && C <= '9')
     907           0 :          || (C >= 'a' && C <= 'z')
     908           0 :          || (C >= 'A' && C <= 'Z');
     909             : }
     910             : 
     911             : static bool is_ns_word_char(const char C) {
     912          36 :   return    C == '-'
     913          36 :          || (C >= 'a' && C <= 'z')
     914          36 :          || (C >= 'A' && C <= 'Z');
     915             : }
     916             : 
     917           3 : void Scanner::scan_ns_uri_char() {
     918             :   while (true) {
     919          36 :     if (Current == End)
     920             :       break;
     921          36 :     if ((   *Current == '%'
     922           0 :           && Current + 2 < End
     923           0 :           && is_ns_hex_digit(*(Current + 1))
     924           0 :           && is_ns_hex_digit(*(Current + 2)))
     925             :         || is_ns_word_char(*Current)
     926          46 :         || StringRef(Current, 1).find_first_of("#;/?:@&=+$,_.!~*'()[]")
     927             :           != StringRef::npos) {
     928          33 :       ++Current;
     929          33 :       ++Column;
     930             :     } else
     931             :       break;
     932             :   }
     933           3 : }
     934             : 
     935          22 : bool Scanner::consume(uint32_t Expected) {
     936          22 :   if (Expected >= 0x80)
     937           0 :     report_fatal_error("Not dealing with this yet");
     938          22 :   if (Current == End)
     939             :     return false;
     940          22 :   if (uint8_t(*Current) >= 0x80)
     941           0 :     report_fatal_error("Not dealing with this yet");
     942          22 :   if (uint8_t(*Current) == Expected) {
     943          20 :     ++Current;
     944          20 :     ++Column;
     945          20 :     return true;
     946             :   }
     947             :   return false;
     948             : }
     949             : 
     950     2946477 : void Scanner::skip(uint32_t Distance) {
     951     2946477 :   Current += Distance;
     952     2946477 :   Column += Distance;
     953             :   assert(Current <= End && "Skipped past the end");
     954     2946477 : }
     955             : 
     956     4562415 : bool Scanner::isBlankOrBreak(StringRef::iterator Position) {
     957     4562415 :   if (Position == End)
     958             :     return false;
     959     4561069 :   return *Position == ' ' || *Position == '\t' || *Position == '\r' ||
     960             :          *Position == '\n';
     961             : }
     962             : 
     963      120264 : bool Scanner::consumeLineBreakIfPresent() {
     964      120264 :   auto Next = skip_b_break(Current);
     965      120264 :   if (Next == Current)
     966             :     return false;
     967      120261 :   Column = 0;
     968      120261 :   ++Line;
     969      120261 :   Current = Next;
     970      120261 :   return true;
     971             : }
     972             : 
     973      653179 : void Scanner::saveSimpleKeyCandidate( TokenQueueT::iterator Tok
     974             :                                     , unsigned AtColumn
     975             :                                     , bool IsRequired) {
     976      653179 :   if (IsSimpleKeyAllowed) {
     977             :     SimpleKey SK;
     978      375471 :     SK.Tok = Tok;
     979      375471 :     SK.Line = Line;
     980      375471 :     SK.Column = AtColumn;
     981      375471 :     SK.IsRequired = IsRequired;
     982      375471 :     SK.FlowLevel = FlowLevel;
     983      375471 :     SimpleKeys.push_back(SK);
     984             :   }
     985      653179 : }
     986             : 
     987     6956104 : void Scanner::removeStaleSimpleKeyCandidates() {
     988             :   for (SmallVectorImpl<SimpleKey>::iterator i = SimpleKeys.begin();
     989     8400772 :                                             i != SimpleKeys.end();) {
     990     1444668 :     if (i->Line != Line || i->Column + 1024 < Column) {
     991       60107 :       if (i->IsRequired)
     992           0 :         setError( "Could not find expected : for simple key"
     993             :                 , i->Tok->Range.begin());
     994       60107 :       i = SimpleKeys.erase(i);
     995             :     } else
     996     1384561 :       ++i;
     997             :   }
     998     6956104 : }
     999             : 
    1000      264382 : void Scanner::removeSimpleKeyCandidatesOnFlowLevel(unsigned Level) {
    1001      264382 :   if (!SimpleKeys.empty() && (SimpleKeys.end() - 1)->FlowLevel == Level)
    1002             :     SimpleKeys.pop_back();
    1003      264382 : }
    1004             : 
    1005     1254329 : bool Scanner::unrollIndent(int ToColumn) {
    1006             :   Token T;
    1007             :   // Indentation is ignored in flow.
    1008     1254329 :   if (FlowLevel != 0)
    1009             :     return true;
    1010             : 
    1011      492803 :   while (Indent > ToColumn) {
    1012       39982 :     T.Kind = Token::TK_BlockEnd;
    1013       39982 :     T.Range = StringRef(Current, 1);
    1014       39982 :     TokenQueue.push_back(T);
    1015       39982 :     Indent = Indents.pop_back_val();
    1016             :   }
    1017             : 
    1018             :   return true;
    1019             : }
    1020             : 
    1021      333666 : bool Scanner::rollIndent( int ToColumn
    1022             :                         , Token::TokenKind Kind
    1023             :                         , TokenQueueT::iterator InsertPoint) {
    1024      333666 :   if (FlowLevel)
    1025             :     return true;
    1026      162289 :   if (Indent < ToColumn) {
    1027       40013 :     Indents.push_back(Indent);
    1028       40013 :     Indent = ToColumn;
    1029             : 
    1030             :     Token T;
    1031       40013 :     T.Kind = Kind;
    1032       40013 :     T.Range = StringRef(Current, 0);
    1033       40013 :     TokenQueue.insert(InsertPoint, T);
    1034             :   }
    1035             :   return true;
    1036             : }
    1037             : 
    1038     1652753 : void Scanner::skipComment() {
    1039     1652753 :   if (*Current != '#')
    1040             :     return;
    1041             :   while (true) {
    1042             :     // This may skip more than one byte, thus Column is only incremented
    1043             :     // for code points.
    1044     1781835 :     StringRef::iterator I = skip_nb_char(Current);
    1045     1781835 :     if (I == Current)
    1046             :       break;
    1047     1745684 :     Current = I;
    1048     1745684 :     ++Column;
    1049     1745684 :   }
    1050             : }
    1051             : 
    1052     1239976 : void Scanner::scanToNextToken() {
    1053             :   while (true) {
    1054     5674115 :     while (*Current == ' ' || *Current == '\t') {
    1055     2013458 :       skip(1);
    1056             :     }
    1057             : 
    1058     1647199 :     skipComment();
    1059             : 
    1060             :     // Skip EOL.
    1061     1647199 :     StringRef::iterator i = skip_b_break(Current);
    1062     1647199 :     if (i == Current)
    1063             :       break;
    1064      407223 :     Current = i;
    1065      407223 :     ++Line;
    1066      407223 :     Column = 0;
    1067             :     // New lines may start a simple key.
    1068      407223 :     if (!FlowLevel)
    1069      198304 :       IsSimpleKeyAllowed = true;
    1070             :   }
    1071     1239976 : }
    1072             : 
    1073        4482 : bool Scanner::scanStreamStart() {
    1074        4482 :   IsStartOfStream = false;
    1075             : 
    1076        4482 :   EncodingInfo EI = getUnicodeEncoding(currentInput());
    1077             : 
    1078             :   Token T;
    1079        4482 :   T.Kind = Token::TK_StreamStart;
    1080        4482 :   T.Range = StringRef(Current, EI.second);
    1081        4482 :   TokenQueue.push_back(T);
    1082        4482 :   Current += EI.second;
    1083        4482 :   return true;
    1084             : }
    1085             : 
    1086        3488 : bool Scanner::scanStreamEnd() {
    1087             :   // Force an ending new line if one isn't present.
    1088        3488 :   if (Column != 0) {
    1089         523 :     Column = 0;
    1090         523 :     ++Line;
    1091             :   }
    1092             : 
    1093        3488 :   unrollIndent(-1);
    1094             :   SimpleKeys.clear();
    1095        3488 :   IsSimpleKeyAllowed = false;
    1096             : 
    1097             :   Token T;
    1098        3488 :   T.Kind = Token::TK_StreamEnd;
    1099        3488 :   T.Range = StringRef(Current, 0);
    1100        3488 :   TokenQueue.push_back(T);
    1101        3488 :   return true;
    1102             : }
    1103             : 
    1104          19 : bool Scanner::scanDirective() {
    1105             :   // Reset the indentation level.
    1106          19 :   unrollIndent(-1);
    1107             :   SimpleKeys.clear();
    1108          19 :   IsSimpleKeyAllowed = false;
    1109             : 
    1110          19 :   StringRef::iterator Start = Current;
    1111          19 :   consume('%');
    1112          19 :   StringRef::iterator NameStart = Current;
    1113          19 :   Current = skip_while(&Scanner::skip_ns_char, Current);
    1114          19 :   StringRef Name(NameStart, Current - NameStart);
    1115          19 :   Current = skip_while(&Scanner::skip_s_white, Current);
    1116             : 
    1117             :   Token T;
    1118             :   if (Name == "YAML") {
    1119           5 :     Current = skip_while(&Scanner::skip_ns_char, Current);
    1120           5 :     T.Kind = Token::TK_VersionDirective;
    1121           5 :     T.Range = StringRef(Start, Current - Start);
    1122           5 :     TokenQueue.push_back(T);
    1123           5 :     return true;
    1124             :   } else if(Name == "TAG") {
    1125          13 :     Current = skip_while(&Scanner::skip_ns_char, Current);
    1126          13 :     Current = skip_while(&Scanner::skip_s_white, Current);
    1127          13 :     Current = skip_while(&Scanner::skip_ns_char, Current);
    1128          13 :     T.Kind = Token::TK_TagDirective;
    1129          13 :     T.Range = StringRef(Start, Current - Start);
    1130          13 :     TokenQueue.push_back(T);
    1131          13 :     return true;
    1132             :   }
    1133             :   return false;
    1134             : }
    1135             : 
    1136       14334 : bool Scanner::scanDocumentIndicator(bool IsStart) {
    1137       14334 :   unrollIndent(-1);
    1138             :   SimpleKeys.clear();
    1139       14334 :   IsSimpleKeyAllowed = false;
    1140             : 
    1141             :   Token T;
    1142       14334 :   T.Kind = IsStart ? Token::TK_DocumentStart : Token::TK_DocumentEnd;
    1143       14334 :   T.Range = StringRef(Current, 3);
    1144       14334 :   skip(3);
    1145       14334 :   TokenQueue.push_back(T);
    1146       14334 :   return true;
    1147             : }
    1148             : 
    1149       64896 : bool Scanner::scanFlowCollectionStart(bool IsSequence) {
    1150             :   Token T;
    1151       64896 :   T.Kind = IsSequence ? Token::TK_FlowSequenceStart
    1152             :                       : Token::TK_FlowMappingStart;
    1153       64896 :   T.Range = StringRef(Current, 1);
    1154       64896 :   skip(1);
    1155       64896 :   TokenQueue.push_back(T);
    1156             : 
    1157             :   // [ and { may begin a simple key.
    1158      129792 :   saveSimpleKeyCandidate(--TokenQueue.end(), Column - 1, false);
    1159             : 
    1160             :   // And may also be followed by a simple key.
    1161       64896 :   IsSimpleKeyAllowed = true;
    1162       64896 :   ++FlowLevel;
    1163       64896 :   return true;
    1164             : }
    1165             : 
    1166       64818 : bool Scanner::scanFlowCollectionEnd(bool IsSequence) {
    1167       64818 :   removeSimpleKeyCandidatesOnFlowLevel(FlowLevel);
    1168       64818 :   IsSimpleKeyAllowed = false;
    1169             :   Token T;
    1170       64818 :   T.Kind = IsSequence ? Token::TK_FlowSequenceEnd
    1171             :                       : Token::TK_FlowMappingEnd;
    1172       64818 :   T.Range = StringRef(Current, 1);
    1173       64818 :   skip(1);
    1174       64818 :   TokenQueue.push_back(T);
    1175       64818 :   if (FlowLevel)
    1176       64818 :     --FlowLevel;
    1177       64818 :   return true;
    1178             : }
    1179             : 
    1180      164872 : bool Scanner::scanFlowEntry() {
    1181      164872 :   removeSimpleKeyCandidatesOnFlowLevel(FlowLevel);
    1182      164872 :   IsSimpleKeyAllowed = true;
    1183             :   Token T;
    1184      164872 :   T.Kind = Token::TK_FlowEntry;
    1185      164872 :   T.Range = StringRef(Current, 1);
    1186      164872 :   skip(1);
    1187      164872 :   TokenQueue.push_back(T);
    1188      164872 :   return true;
    1189             : }
    1190             : 
    1191       34661 : bool Scanner::scanBlockEntry() {
    1192       34661 :   rollIndent(Column, Token::TK_BlockSequenceStart, TokenQueue.end());
    1193       34661 :   removeSimpleKeyCandidatesOnFlowLevel(FlowLevel);
    1194       34661 :   IsSimpleKeyAllowed = true;
    1195             :   Token T;
    1196       34661 :   T.Kind = Token::TK_BlockEntry;
    1197       34661 :   T.Range = StringRef(Current, 1);
    1198       34661 :   skip(1);
    1199       34661 :   TokenQueue.push_back(T);
    1200       34661 :   return true;
    1201             : }
    1202             : 
    1203          31 : bool Scanner::scanKey() {
    1204          31 :   if (!FlowLevel)
    1205          17 :     rollIndent(Column, Token::TK_BlockMappingStart, TokenQueue.end());
    1206             : 
    1207          31 :   removeSimpleKeyCandidatesOnFlowLevel(FlowLevel);
    1208          31 :   IsSimpleKeyAllowed = !FlowLevel;
    1209             : 
    1210             :   Token T;
    1211          31 :   T.Kind = Token::TK_Key;
    1212          31 :   T.Range = StringRef(Current, 1);
    1213          31 :   skip(1);
    1214          31 :   TokenQueue.push_back(T);
    1215          31 :   return true;
    1216             : }
    1217             : 
    1218      299003 : bool Scanner::scanValue() {
    1219             :   // If the previous token could have been a simple key, insert the key token
    1220             :   // into the token queue.
    1221      299003 :   if (!SimpleKeys.empty()) {
    1222      298980 :     SimpleKey SK = SimpleKeys.pop_back_val();
    1223             :     Token T;
    1224      298980 :     T.Kind = Token::TK_Key;
    1225      298980 :     T.Range = SK.Tok->Range;
    1226             :     TokenQueueT::iterator i, e;
    1227      582933 :     for (i = TokenQueue.begin(), e = TokenQueue.end(); i != e; ++i) {
    1228      582933 :       if (i == SK.Tok)
    1229             :         break;
    1230             :     }
    1231             :     assert(i != e && "SimpleKey not in token queue!");
    1232      298980 :     i = TokenQueue.insert(i, T);
    1233             : 
    1234             :     // We may also need to add a Block-Mapping-Start token.
    1235      298980 :     rollIndent(SK.Column, Token::TK_BlockMappingStart, i);
    1236             : 
    1237      298980 :     IsSimpleKeyAllowed = false;
    1238             :   } else {
    1239          23 :     if (!FlowLevel)
    1240           8 :       rollIndent(Column, Token::TK_BlockMappingStart, TokenQueue.end());
    1241          23 :     IsSimpleKeyAllowed = !FlowLevel;
    1242             :   }
    1243             : 
    1244             :   Token T;
    1245      299003 :   T.Kind = Token::TK_Value;
    1246      299003 :   T.Range = StringRef(Current, 1);
    1247      299003 :   skip(1);
    1248      299003 :   TokenQueue.push_back(T);
    1249      299003 :   return true;
    1250             : }
    1251             : 
    1252             : // Forbidding inlining improves performance by roughly 20%.
    1253             : // FIXME: Remove once llvm optimizes this to the faster version without hints.
    1254             : LLVM_ATTRIBUTE_NOINLINE static bool
    1255             : wasEscaped(StringRef::iterator First, StringRef::iterator Position);
    1256             : 
    1257             : // Returns whether a character at 'Position' was escaped with a leading '\'.
    1258             : // 'First' specifies the position of the first character in the string.
    1259      569266 : static bool wasEscaped(StringRef::iterator First,
    1260             :                        StringRef::iterator Position) {
    1261             :   assert(Position - 1 >= First);
    1262      569266 :   StringRef::iterator I = Position - 1;
    1263             :   // We calculate the number of consecutive '\'s before the current position
    1264             :   // by iterating backwards through our string.
    1265     1139508 :   while (I >= First && *I == '\\') --I;
    1266             :   // (Position - 1 - I) now contains the number of '\'s before the current
    1267             :   // position. If it is odd, the character at 'Position' was escaped.
    1268      569266 :   return (Position - 1 - I) % 2 == 1;
    1269             : }
    1270             : 
    1271      265021 : bool Scanner::scanFlowScalar(bool IsDoubleQuoted) {
    1272      265021 :   StringRef::iterator Start = Current;
    1273      265021 :   unsigned ColStart = Column;
    1274      265021 :   if (IsDoubleQuoted) {
    1275             :     do {
    1276      816617 :       ++Current;
    1277    58196958 :       while (Current != End && *Current != '"')
    1278    57380341 :         ++Current;
    1279             :       // Repeat until the previous character was not a '\' or was an escaped
    1280             :       // backslash.
    1281             :     } while (   Current != End
    1282      816607 :              && *(Current - 1) == '\\'
    1283     1385883 :              && wasEscaped(Start + 1, Current));
    1284             :   } else {
    1285       17666 :     skip(1);
    1286             :     while (true) {
    1287             :       // Skip a ' followed by another '.
    1288      422966 :       if (Current + 1 < End && *Current == '\'' && *(Current + 1) == '\'') {
    1289           9 :         skip(2);
    1290           9 :         continue;
    1291      422957 :       } else if (*Current == '\'')
    1292             :         break;
    1293      405292 :       StringRef::iterator i = skip_nb_char(Current);
    1294      405292 :       if (i == Current) {
    1295          15 :         i = skip_b_break(Current);
    1296          15 :         if (i == Current)
    1297             :           break;
    1298          14 :         Current = i;
    1299          14 :         Column = 0;
    1300          14 :         ++Line;
    1301             :       } else {
    1302      405277 :         if (i == End)
    1303             :           break;
    1304      405277 :         Current = i;
    1305      405277 :         ++Column;
    1306             :       }
    1307             :     }
    1308             :   }
    1309             : 
    1310      265021 :   if (Current == End) {
    1311          11 :     setError("Expected quote at end of scalar", Current);
    1312          11 :     return false;
    1313             :   }
    1314             : 
    1315      265010 :   skip(1); // Skip ending quote.
    1316             :   Token T;
    1317      265010 :   T.Kind = Token::TK_Scalar;
    1318      265010 :   T.Range = StringRef(Start, Current - Start);
    1319      265010 :   TokenQueue.push_back(T);
    1320             : 
    1321      265010 :   saveSimpleKeyCandidate(--TokenQueue.end(), ColStart, false);
    1322             : 
    1323      265010 :   IsSimpleKeyAllowed = false;
    1324             : 
    1325             :   return true;
    1326             : }
    1327             : 
    1328      321139 : bool Scanner::scanPlainScalar() {
    1329      321139 :   StringRef::iterator Start = Current;
    1330      321139 :   unsigned ColStart = Column;
    1331             :   unsigned LeadingBlanks = 0;
    1332             :   assert(Indent >= -1 && "Indent must be >= -1 !");
    1333      321139 :   unsigned indent = static_cast<unsigned>(Indent + 1);
    1334             :   while (true) {
    1335      336793 :     if (*Current == '#')
    1336             :       break;
    1337             : 
    1338     2848666 :     while (!isBlankOrBreak(Current)) {
    1339      632655 :       if (  FlowLevel && *Current == ':'
    1340     2785325 :           && !(isBlankOrBreak(Current + 1) || *(Current + 1) == ',')) {
    1341           2 :         setError("Found unexpected ':' while scanning a plain scalar", Current);
    1342           2 :         return false;
    1343             :       }
    1344             : 
    1345             :       // Check for the end of the plain scalar.
    1346      173790 :       if (  (*Current == ':' && isBlankOrBreak(Current + 1))
    1347     2739255 :           || (  FlowLevel
    1348     1346759 :           && (StringRef(Current, 1).find_first_of(",:?[]{}")
    1349             :               != StringRef::npos)))
    1350             :         break;
    1351             : 
    1352     2512428 :       StringRef::iterator i = skip_nb_char(Current);
    1353     2512428 :       if (i == Current)
    1354             :         break;
    1355     2512185 :       Current = i;
    1356     2512185 :       ++Column;
    1357             :     }
    1358             : 
    1359             :     // Are we at the end?
    1360      336479 :     if (!isBlankOrBreak(Current))
    1361             :       break;
    1362             : 
    1363             :     // Eat blanks.
    1364      109478 :     StringRef::iterator Tmp = Current;
    1365      656050 :     while (isBlankOrBreak(Tmp)) {
    1366      546572 :       StringRef::iterator i = skip_s_white(Tmp);
    1367      546572 :       if (i != Tmp) {
    1368      450990 :         if (LeadingBlanks && (Column < indent) && *Tmp == '\t') {
    1369           0 :           setError("Found invalid tab character in indentation", Tmp);
    1370           0 :           return false;
    1371             :         }
    1372             :         Tmp = i;
    1373      450990 :         ++Column;
    1374             :       } else {
    1375       95582 :         i = skip_b_break(Tmp);
    1376       95582 :         if (!LeadingBlanks)
    1377             :           LeadingBlanks = 1;
    1378             :         Tmp = i;
    1379       95582 :         Column = 0;
    1380       95582 :         ++Line;
    1381             :       }
    1382             :     }
    1383             : 
    1384      109478 :     if (!FlowLevel && Column < indent)
    1385             :       break;
    1386             : 
    1387       15654 :     Current = Tmp;
    1388       15654 :   }
    1389      321137 :   if (Start == Current) {
    1390           1 :     setError("Got empty plain scalar", Start);
    1391           1 :     return false;
    1392             :   }
    1393             :   Token T;
    1394      321136 :   T.Kind = Token::TK_Scalar;
    1395      321136 :   T.Range = StringRef(Start, Current - Start);
    1396      321136 :   TokenQueue.push_back(T);
    1397             : 
    1398             :   // Plain scalars can be simple keys.
    1399      321136 :   saveSimpleKeyCandidate(--TokenQueue.end(), ColStart, false);
    1400             : 
    1401      321136 :   IsSimpleKeyAllowed = false;
    1402             : 
    1403             :   return true;
    1404             : }
    1405             : 
    1406          29 : bool Scanner::scanAliasOrAnchor(bool IsAlias) {
    1407          29 :   StringRef::iterator Start = Current;
    1408          29 :   unsigned ColStart = Column;
    1409          29 :   skip(1);
    1410             :   while(true) {
    1411         168 :     if (   *Current == '[' || *Current == ']'
    1412         168 :         || *Current == '{' || *Current == '}'
    1413         168 :         || *Current == ','
    1414         164 :         || *Current == ':')
    1415             :       break;
    1416         162 :     StringRef::iterator i = skip_ns_char(Current);
    1417         162 :     if (i == Current)
    1418             :       break;
    1419         139 :     Current = i;
    1420         139 :     ++Column;
    1421         139 :   }
    1422             : 
    1423          29 :   if (Start == Current) {
    1424           0 :     setError("Got empty alias or anchor", Start);
    1425           0 :     return false;
    1426             :   }
    1427             : 
    1428             :   Token T;
    1429          29 :   T.Kind = IsAlias ? Token::TK_Alias : Token::TK_Anchor;
    1430          29 :   T.Range = StringRef(Start, Current - Start);
    1431          29 :   TokenQueue.push_back(T);
    1432             : 
    1433             :   // Alias and anchors can be simple keys.
    1434          29 :   saveSimpleKeyCandidate(--TokenQueue.end(), ColStart, false);
    1435             : 
    1436          29 :   IsSimpleKeyAllowed = false;
    1437             : 
    1438             :   return true;
    1439             : }
    1440             : 
    1441       11092 : char Scanner::scanBlockChompingIndicator() {
    1442             :   char Indicator = ' ';
    1443       11092 :   if (Current != End && (*Current == '+' || *Current == '-')) {
    1444             :     Indicator = *Current;
    1445          17 :     skip(1);
    1446             :   }
    1447       11092 :   return Indicator;
    1448             : }
    1449             : 
    1450             : /// Get the number of line breaks after chomping.
    1451             : ///
    1452             : /// Return the number of trailing line breaks to emit, depending on
    1453             : /// \p ChompingIndicator.
    1454             : static unsigned getChompedLineBreaks(char ChompingIndicator,
    1455             :                                      unsigned LineBreaks, StringRef Str) {
    1456        5546 :   if (ChompingIndicator == '-') // Strip all line breaks.
    1457             :     return 0;
    1458        5535 :   if (ChompingIndicator == '+') // Keep all line breaks.
    1459             :     return LineBreaks;
    1460             :   // Clip trailing lines.
    1461        5529 :   return Str.empty() ? 0 : 1;
    1462             : }
    1463             : 
    1464        5554 : unsigned Scanner::scanBlockIndentationIndicator() {
    1465             :   unsigned Indent = 0;
    1466        5554 :   if (Current != End && (*Current >= '1' && *Current <= '9')) {
    1467           6 :     Indent = unsigned(*Current - '0');
    1468           6 :     skip(1);
    1469             :   }
    1470        5554 :   return Indent;
    1471             : }
    1472             : 
    1473        5554 : bool Scanner::scanBlockScalarHeader(char &ChompingIndicator,
    1474             :                                     unsigned &IndentIndicator, bool &IsDone) {
    1475        5554 :   auto Start = Current;
    1476             : 
    1477        5554 :   ChompingIndicator = scanBlockChompingIndicator();
    1478        5554 :   IndentIndicator = scanBlockIndentationIndicator();
    1479             :   // Check for the chomping indicator once again.
    1480        5554 :   if (ChompingIndicator == ' ')
    1481        5538 :     ChompingIndicator = scanBlockChompingIndicator();
    1482        5554 :   Current = skip_while(&Scanner::skip_s_white, Current);
    1483        5554 :   skipComment();
    1484             : 
    1485        5554 :   if (Current == End) { // EOF, we have an empty scalar.
    1486             :     Token T;
    1487           2 :     T.Kind = Token::TK_BlockScalar;
    1488           2 :     T.Range = StringRef(Start, Current - Start);
    1489           2 :     TokenQueue.push_back(T);
    1490           2 :     IsDone = true;
    1491             :     return true;
    1492             :   }
    1493             : 
    1494        5552 :   if (!consumeLineBreakIfPresent()) {
    1495           6 :     setError("Expected a line break after block scalar header", Current);
    1496           3 :     return false;
    1497             :   }
    1498             :   return true;
    1499             : }
    1500             : 
    1501        5543 : bool Scanner::findBlockScalarIndent(unsigned &BlockIndent,
    1502             :                                     unsigned BlockExitIndent,
    1503             :                                     unsigned &LineBreaks, bool &IsDone) {
    1504             :   unsigned MaxAllSpaceLineCharacters = 0;
    1505             :   StringRef::iterator LongestAllSpaceLine;
    1506             : 
    1507             :   while (true) {
    1508        5846 :     advanceWhile(&Scanner::skip_s_space);
    1509        5846 :     if (skip_nb_char(Current) != Current) {
    1510             :       // This line isn't empty, so try and find the indentation.
    1511        5542 :       if (Column <= BlockExitIndent) { // End of the block literal.
    1512           9 :         IsDone = true;
    1513           9 :         return true;
    1514             :       }
    1515             :       // We found the block's indentation.
    1516        5533 :       BlockIndent = Column;
    1517        5533 :       if (MaxAllSpaceLineCharacters > BlockIndent) {
    1518           1 :         setError(
    1519             :             "Leading all-spaces line must be smaller than the block indent",
    1520             :             LongestAllSpaceLine);
    1521           1 :         return false;
    1522             :       }
    1523             :       return true;
    1524             :     }
    1525         304 :     if (skip_b_break(Current) != Current &&
    1526         303 :         Column > MaxAllSpaceLineCharacters) {
    1527             :       // Record the longest all-space line in case it's longer than the
    1528             :       // discovered block indent.
    1529             :       MaxAllSpaceLineCharacters = Column;
    1530             :       LongestAllSpaceLine = Current;
    1531             :     }
    1532             : 
    1533             :     // Check for EOF.
    1534         304 :     if (Current == End) {
    1535           1 :       IsDone = true;
    1536           1 :       return true;
    1537             :     }
    1538             : 
    1539         303 :     if (!consumeLineBreakIfPresent()) {
    1540           0 :       IsDone = true;
    1541           0 :       return true;
    1542             :     }
    1543         303 :     ++LineBreaks;
    1544             :   }
    1545             :   return true;
    1546             : }
    1547             : 
    1548      119947 : bool Scanner::scanBlockScalarIndent(unsigned BlockIndent,
    1549             :                                     unsigned BlockExitIndent, bool &IsDone) {
    1550             :   // Skip the indentation.
    1551      313609 :   while (Column < BlockIndent) {
    1552      211242 :     auto I = skip_s_space(Current);
    1553      211242 :     if (I == Current)
    1554             :       break;
    1555      193662 :     Current = I;
    1556      193662 :     ++Column;
    1557             :   }
    1558             : 
    1559      119947 :   if (skip_nb_char(Current) == Current)
    1560             :     return true;
    1561             : 
    1562      106458 :   if (Column <= BlockExitIndent) { // End of the block literal.
    1563        5474 :     IsDone = true;
    1564        5474 :     return true;
    1565             :   }
    1566             : 
    1567      100984 :   if (Column < BlockIndent) {
    1568           4 :     if (Current != End && *Current == '#') { // Trailing comment.
    1569           2 :       IsDone = true;
    1570           2 :       return true;
    1571             :     }
    1572           2 :     setError("A text line is less indented than the block scalar", Current);
    1573           2 :     return false;
    1574             :   }
    1575             :   return true; // A normal text line.
    1576             : }
    1577             : 
    1578        5554 : bool Scanner::scanBlockScalar(bool IsLiteral) {
    1579             :   // Eat '|' or '>'
    1580             :   assert(*Current == '|' || *Current == '>');
    1581        5554 :   skip(1);
    1582             : 
    1583             :   char ChompingIndicator;
    1584             :   unsigned BlockIndent;
    1585        5554 :   bool IsDone = false;
    1586        5554 :   if (!scanBlockScalarHeader(ChompingIndicator, BlockIndent, IsDone))
    1587             :     return false;
    1588        5551 :   if (IsDone)
    1589             :     return true;
    1590             : 
    1591        5549 :   auto Start = Current;
    1592        5549 :   unsigned BlockExitIndent = Indent < 0 ? 0 : (unsigned)Indent;
    1593        5549 :   unsigned LineBreaks = 0;
    1594        5549 :   if (BlockIndent == 0) {
    1595        5543 :     if (!findBlockScalarIndent(BlockIndent, BlockExitIndent, LineBreaks,
    1596             :                                IsDone))
    1597             :       return false;
    1598             :   }
    1599             : 
    1600             :   // Scan the block's scalars body.
    1601             :   SmallString<256> Str;
    1602      119957 :   while (!IsDone) {
    1603      119947 :     if (!scanBlockScalarIndent(BlockIndent, BlockExitIndent, IsDone))
    1604             :       return false;
    1605      119945 :     if (IsDone)
    1606             :       break;
    1607             : 
    1608             :     // Parse the current line.
    1609      114469 :     auto LineStart = Current;
    1610      114469 :     advanceWhile(&Scanner::skip_nb_char);
    1611      114469 :     if (LineStart != Current) {
    1612      100980 :       Str.append(LineBreaks, '\n');
    1613      100980 :       Str.append(StringRef(LineStart, Current - LineStart));
    1614      100980 :       LineBreaks = 0;
    1615             :     }
    1616             : 
    1617             :     // Check for EOF.
    1618      114469 :     if (Current == End)
    1619             :       break;
    1620             : 
    1621      114409 :     if (!consumeLineBreakIfPresent())
    1622             :       break;
    1623      114409 :     ++LineBreaks;
    1624             :   }
    1625             : 
    1626        5546 :   if (Current == End && !LineBreaks)
    1627             :     // Ensure that there is at least one line break before the end of file.
    1628           3 :     LineBreaks = 1;
    1629       11081 :   Str.append(getChompedLineBreaks(ChompingIndicator, LineBreaks, Str), '\n');
    1630             : 
    1631             :   // New lines may start a simple key.
    1632        5546 :   if (!FlowLevel)
    1633        5546 :     IsSimpleKeyAllowed = true;
    1634             : 
    1635             :   Token T;
    1636        5546 :   T.Kind = Token::TK_BlockScalar;
    1637        5546 :   T.Range = StringRef(Start, Current - Start);
    1638        5546 :   T.Value = Str.str().str();
    1639        5546 :   TokenQueue.push_back(T);
    1640             :   return true;
    1641             : }
    1642             : 
    1643        2110 : bool Scanner::scanTag() {
    1644        2110 :   StringRef::iterator Start = Current;
    1645        2110 :   unsigned ColStart = Column;
    1646        2110 :   skip(1); // Eat !.
    1647        2110 :   if (Current == End || isBlankOrBreak(Current)); // An empty tag.
    1648        2108 :   else if (*Current == '<') {
    1649           3 :     skip(1);
    1650           3 :     scan_ns_uri_char();
    1651           3 :     if (!consume('>'))
    1652             :       return false;
    1653             :   } else {
    1654             :     // FIXME: Actually parse the c-ns-shorthand-tag rule.
    1655        2105 :     Current = skip_while(&Scanner::skip_ns_char, Current);
    1656             :   }
    1657             : 
    1658             :   Token T;
    1659        2108 :   T.Kind = Token::TK_Tag;
    1660        2108 :   T.Range = StringRef(Start, Current - Start);
    1661        2108 :   TokenQueue.push_back(T);
    1662             : 
    1663             :   // Tags can be simple keys.
    1664        2108 :   saveSimpleKeyCandidate(--TokenQueue.end(), ColStart, false);
    1665             : 
    1666        2108 :   IsSimpleKeyAllowed = false;
    1667             : 
    1668             :   return true;
    1669             : }
    1670             : 
    1671     1244458 : bool Scanner::fetchMoreTokens() {
    1672     1244458 :   if (IsStartOfStream)
    1673        4482 :     return scanStreamStart();
    1674             : 
    1675     1239976 :   scanToNextToken();
    1676             : 
    1677     1239976 :   if (Current == End)
    1678        3488 :     return scanStreamEnd();
    1679             : 
    1680     1236488 :   removeStaleSimpleKeyCandidates();
    1681             : 
    1682     1236488 :   unrollIndent(Column);
    1683             : 
    1684     1236488 :   if (Column == 0 && *Current == '%')
    1685          19 :     return scanDirective();
    1686             : 
    1687      133031 :   if (Column == 0 && Current + 4 <= End
    1688      132892 :       && *Current == '-'
    1689        8325 :       && *(Current + 1) == '-'
    1690        7310 :       && *(Current + 2) == '-'
    1691     1243779 :       && (Current + 3 == End || isBlankOrBreak(Current + 3)))
    1692        7309 :     return scanDocumentIndicator(true);
    1693             : 
    1694      125722 :   if (Column == 0 && Current + 4 <= End
    1695      125583 :       && *Current == '.'
    1696        7027 :       && *(Current + 1) == '.'
    1697        7027 :       && *(Current + 2) == '.'
    1698     1236187 :       && (Current + 3 == End || isBlankOrBreak(Current + 3)))
    1699        7025 :     return scanDocumentIndicator(false);
    1700             : 
    1701     1222135 :   if (*Current == '[')
    1702        5462 :     return scanFlowCollectionStart(true);
    1703             : 
    1704     1216673 :   if (*Current == '{')
    1705       59434 :     return scanFlowCollectionStart(false);
    1706             : 
    1707     1157239 :   if (*Current == ']')
    1708        5431 :     return scanFlowCollectionEnd(true);
    1709             : 
    1710     1151808 :   if (*Current == '}')
    1711       59387 :     return scanFlowCollectionEnd(false);
    1712             : 
    1713     1092421 :   if (*Current == ',')
    1714      164872 :     return scanFlowEntry();
    1715             : 
    1716      927549 :   if (*Current == '-' && isBlankOrBreak(Current + 1))
    1717       34661 :     return scanBlockEntry();
    1718             : 
    1719      892888 :   if (*Current == '?' && (FlowLevel || isBlankOrBreak(Current + 1)))
    1720          31 :     return scanKey();
    1721             : 
    1722      892857 :   if (*Current == ':' && (FlowLevel || isBlankOrBreak(Current + 1)))
    1723      299003 :     return scanValue();
    1724             : 
    1725      593854 :   if (*Current == '*')
    1726          16 :     return scanAliasOrAnchor(true);
    1727             : 
    1728      593838 :   if (*Current == '&')
    1729          13 :     return scanAliasOrAnchor(false);
    1730             : 
    1731      593825 :   if (*Current == '!')
    1732        2110 :     return scanTag();
    1733             : 
    1734      591715 :   if (*Current == '|' && !FlowLevel)
    1735        5534 :     return scanBlockScalar(true);
    1736             : 
    1737      586181 :   if (*Current == '>' && !FlowLevel)
    1738          20 :     return scanBlockScalar(false);
    1739             : 
    1740      586161 :   if (*Current == '\'')
    1741       17666 :     return scanFlowScalar(false);
    1742             : 
    1743      568495 :   if (*Current == '"')
    1744      247355 :     return scanFlowScalar(true);
    1745             : 
    1746             :   // Get a plain scalar.
    1747             :   StringRef FirstChar(Current, 1);
    1748      642280 :   if (!(isBlankOrBreak(Current)
    1749      321141 :         || FirstChar.find_first_of("-?:,[]{}#&*!|>'\"%@`") != StringRef::npos)
    1750         709 :       || (*Current == '-' && !isBlankOrBreak(Current + 1))
    1751           2 :       || (!FlowLevel && (*Current == '?' || *Current == ':')
    1752           1 :           && isBlankOrBreak(Current + 1))
    1753      321142 :       || (!FlowLevel && *Current == ':'
    1754           1 :                       && Current + 2 < End
    1755           1 :                       && *(Current + 1) == ':'
    1756           1 :                       && !isBlankOrBreak(Current + 2)))
    1757      321139 :     return scanPlainScalar();
    1758             : 
    1759           1 :   setError("Unrecognized character while tokenizing.");
    1760           1 :   return false;
    1761             : }
    1762             : 
    1763        4373 : Stream::Stream(StringRef Input, SourceMgr &SM, bool ShowColors,
    1764        4373 :                std::error_code *EC)
    1765        4373 :     : scanner(new Scanner(Input, SM, ShowColors, EC)), CurrentDoc() {}
    1766             : 
    1767         109 : Stream::Stream(MemoryBufferRef InputBuffer, SourceMgr &SM, bool ShowColors,
    1768         109 :                std::error_code *EC)
    1769         109 :     : scanner(new Scanner(InputBuffer, SM, ShowColors, EC)), CurrentDoc() {}
    1770             : 
    1771             : Stream::~Stream() = default;
    1772             : 
    1773        1920 : bool Stream::failed() { return scanner->failed(); }
    1774             : 
    1775         136 : void Stream::printError(Node *N, const Twine &Msg) {
    1776         272 :   scanner->printError( N->getSourceRange().Start
    1777             :                      , SourceMgr::DK_Error
    1778             :                      , Msg
    1779         136 :                      , N->getSourceRange());
    1780         136 : }
    1781             : 
    1782        4482 : document_iterator Stream::begin() {
    1783        4482 :   if (CurrentDoc)
    1784           0 :     report_fatal_error("Can only iterate over the stream once");
    1785             : 
    1786             :   // Skip Stream-Start.
    1787        4482 :   scanner->getNext();
    1788             : 
    1789        4482 :   CurrentDoc.reset(new Document(*this));
    1790        4482 :   return document_iterator(CurrentDoc);
    1791             : }
    1792             : 
    1793       18037 : document_iterator Stream::end() {
    1794       18037 :   return document_iterator();
    1795             : }
    1796             : 
    1797          53 : void Stream::skip() {
    1798         107 :   for (document_iterator i = begin(), e = end(); i != e; ++i)
    1799         108 :     i->skip();
    1800          53 : }
    1801             : 
    1802      998782 : Node::Node(unsigned int Type, std::unique_ptr<Document> &D, StringRef A,
    1803      998782 :            StringRef T)
    1804      998782 :     : Doc(D), TypeID(Type), Anchor(A), Tag(T) {
    1805      998782 :   SMLoc Start = SMLoc::getFromPointer(peekNext().Range.begin());
    1806      998782 :   SourceRange = SMRange(Start, Start);
    1807      998782 : }
    1808             : 
    1809        5192 : std::string Node::getVerbatimTag() const {
    1810        5192 :   StringRef Raw = getRawTag();
    1811        5192 :   if (!Raw.empty() && Raw != "!") {
    1812             :     std::string Ret;
    1813        3989 :     if (Raw.find_last_of('!') == 0) {
    1814       11856 :       Ret = Doc->getTagMap().find("!")->second;
    1815        3952 :       Ret += Raw.substr(1);
    1816             :       return Ret;
    1817             :     } else if (Raw.startswith("!!")) {
    1818          93 :       Ret = Doc->getTagMap().find("!!")->second;
    1819             :       Ret += Raw.substr(2);
    1820             :       return Ret;
    1821             :     } else {
    1822           6 :       StringRef TagHandle = Raw.substr(0, Raw.find_last_of('!') + 1);
    1823             :       std::map<StringRef, StringRef>::const_iterator It =
    1824           6 :           Doc->getTagMap().find(TagHandle);
    1825          12 :       if (It != Doc->getTagMap().end())
    1826           8 :         Ret = It->second;
    1827             :       else {
    1828             :         Token T;
    1829           2 :         T.Kind = Token::TK_Tag;
    1830           2 :         T.Range = TagHandle;
    1831           2 :         setError(Twine("Unknown tag handle ") + TagHandle, T);
    1832             :       }
    1833          12 :       Ret += Raw.substr(Raw.find_last_of('!') + 1);
    1834             :       return Ret;
    1835             :     }
    1836             :   }
    1837             : 
    1838        1203 :   switch (getType()) {
    1839             :   case NK_Null:
    1840          42 :     return "tag:yaml.org,2002:null";
    1841             :   case NK_Scalar:
    1842             :   case NK_BlockScalar:
    1843             :     // TODO: Tag resolution.
    1844         861 :     return "tag:yaml.org,2002:str";
    1845             :   case NK_Mapping:
    1846         206 :     return "tag:yaml.org,2002:map";
    1847             :   case NK_Sequence:
    1848          94 :     return "tag:yaml.org,2002:seq";
    1849             :   }
    1850             : 
    1851           0 :   return "";
    1852             : }
    1853             : 
    1854     2855634 : Token &Node::peekNext() {
    1855     5711268 :   return Doc->peekNext();
    1856             : }
    1857             : 
    1858      902173 : Token Node::getNext() {
    1859     1804346 :   return Doc->getNext();
    1860             : }
    1861             : 
    1862      687663 : Node *Node::parseBlockNode() {
    1863     1375326 :   return Doc->parseBlockNode();
    1864             : }
    1865             : 
    1866      301592 : BumpPtrAllocator &Node::getAllocator() {
    1867      603184 :   return Doc->NodeAllocator;
    1868             : }
    1869             : 
    1870          22 : void Node::setError(const Twine &Msg, Token &Tok) const {
    1871          44 :   Doc->setError(Msg, Tok);
    1872          22 : }
    1873             : 
    1874      959901 : bool Node::failed() const {
    1875     1919802 :   return Doc->failed();
    1876             : }
    1877             : 
    1878      493230 : StringRef ScalarNode::getValue(SmallVectorImpl<char> &Storage) const {
    1879             :   // TODO: Handle newlines properly. We need to remove leading whitespace.
    1880      986460 :   if (Value[0] == '"') { // Double quoted.
    1881             :     // Pull off the leading and trailing "s.
    1882      330554 :     StringRef UnquotedValue = Value.substr(1, Value.size() - 2);
    1883             :     // Search for characters that would require unescaping the value.
    1884      165277 :     StringRef::size_type i = UnquotedValue.find_first_of("\\\r\n");
    1885      165277 :     if (i != StringRef::npos)
    1886          41 :       return unescapeDoubleQuoted(UnquotedValue, i, Storage);
    1887      165236 :     return UnquotedValue;
    1888      327953 :   } else if (Value[0] == '\'') { // Single quoted.
    1889             :     // Pull off the leading and trailing 's.
    1890       34536 :     StringRef UnquotedValue = Value.substr(1, Value.size() - 2);
    1891       17260 :     StringRef::size_type i = UnquotedValue.find('\'');
    1892           8 :     if (i != StringRef::npos) {
    1893             :       // We're going to need Storage.
    1894             :       Storage.clear();
    1895             :       Storage.reserve(UnquotedValue.size());
    1896          17 :       for (; i != StringRef::npos; i = UnquotedValue.find('\'')) {
    1897           9 :         StringRef Valid(UnquotedValue.begin(), i);
    1898           9 :         Storage.insert(Storage.end(), Valid.begin(), Valid.end());
    1899           9 :         Storage.push_back('\'');
    1900          18 :         UnquotedValue = UnquotedValue.substr(i + 2);
    1901             :       }
    1902          16 :       Storage.insert(Storage.end(), UnquotedValue.begin(), UnquotedValue.end());
    1903          16 :       return StringRef(Storage.begin(), Storage.size());
    1904             :     }
    1905       17260 :     return UnquotedValue;
    1906             :   }
    1907             :   // Plain or block.
    1908      310685 :   return Value.rtrim(' ');
    1909             : }
    1910             : 
    1911          41 : StringRef ScalarNode::unescapeDoubleQuoted( StringRef UnquotedValue
    1912             :                                           , StringRef::size_type i
    1913             :                                           , SmallVectorImpl<char> &Storage)
    1914             :                                           const {
    1915             :   // Use Storage to build proper value.
    1916             :   Storage.clear();
    1917             :   Storage.reserve(UnquotedValue.size());
    1918         162 :   for (; i != StringRef::npos; i = UnquotedValue.find_first_of("\\\r\n")) {
    1919             :     // Insert all previous chars into Storage.
    1920         122 :     StringRef Valid(UnquotedValue.begin(), i);
    1921         122 :     Storage.insert(Storage.end(), Valid.begin(), Valid.end());
    1922             :     // Chop off inserted chars.
    1923         122 :     UnquotedValue = UnquotedValue.substr(i);
    1924             : 
    1925             :     assert(!UnquotedValue.empty() && "Can't be empty!");
    1926             : 
    1927             :     // Parse escape or line break.
    1928         122 :     switch (UnquotedValue[0]) {
    1929          23 :     case '\r':
    1930             :     case '\n':
    1931          23 :       Storage.push_back('\n');
    1932             :       if (   UnquotedValue.size() > 1
    1933          23 :           && (UnquotedValue[1] == '\r' || UnquotedValue[1] == '\n'))
    1934           3 :         UnquotedValue = UnquotedValue.substr(1);
    1935          23 :       UnquotedValue = UnquotedValue.substr(1);
    1936          23 :       break;
    1937             :     default:
    1938          99 :       if (UnquotedValue.size() == 1)
    1939             :         // TODO: Report error.
    1940             :         break;
    1941          99 :       UnquotedValue = UnquotedValue.substr(1);
    1942             :       switch (UnquotedValue[0]) {
    1943             :       default: {
    1944             :           Token T;
    1945           1 :           T.Range = StringRef(UnquotedValue.begin(), 1);
    1946           2 :           setError("Unrecognized escape code!", T);
    1947           1 :           return "";
    1948             :         }
    1949             :       case '\r':
    1950             :       case '\n':
    1951             :         // Remove the new line.
    1952             :         if (   UnquotedValue.size() > 1
    1953           5 :             && (UnquotedValue[1] == '\r' || UnquotedValue[1] == '\n'))
    1954           0 :           UnquotedValue = UnquotedValue.substr(1);
    1955             :         // If this was just a single byte newline, it will get skipped
    1956             :         // below.
    1957             :         break;
    1958           1 :       case '0':
    1959           1 :         Storage.push_back(0x00);
    1960           1 :         break;
    1961           1 :       case 'a':
    1962           1 :         Storage.push_back(0x07);
    1963           1 :         break;
    1964           2 :       case 'b':
    1965           2 :         Storage.push_back(0x08);
    1966           2 :         break;
    1967           3 :       case 't':
    1968             :       case 0x09:
    1969           3 :         Storage.push_back(0x09);
    1970           3 :         break;
    1971           5 :       case 'n':
    1972           5 :         Storage.push_back(0x0A);
    1973           5 :         break;
    1974           1 :       case 'v':
    1975           1 :         Storage.push_back(0x0B);
    1976           1 :         break;
    1977           1 :       case 'f':
    1978           1 :         Storage.push_back(0x0C);
    1979           1 :         break;
    1980           2 :       case 'r':
    1981           2 :         Storage.push_back(0x0D);
    1982           2 :         break;
    1983           1 :       case 'e':
    1984           1 :         Storage.push_back(0x1B);
    1985           1 :         break;
    1986           3 :       case ' ':
    1987           3 :         Storage.push_back(0x20);
    1988           3 :         break;
    1989          52 :       case '"':
    1990          52 :         Storage.push_back(0x22);
    1991          52 :         break;
    1992           0 :       case '/':
    1993           0 :         Storage.push_back(0x2F);
    1994           0 :         break;
    1995          11 :       case '\\':
    1996          11 :         Storage.push_back(0x5C);
    1997          11 :         break;
    1998           1 :       case 'N':
    1999           1 :         encodeUTF8(0x85, Storage);
    2000           1 :         break;
    2001           1 :       case '_':
    2002           1 :         encodeUTF8(0xA0, Storage);
    2003           1 :         break;
    2004           1 :       case 'L':
    2005           1 :         encodeUTF8(0x2028, Storage);
    2006           1 :         break;
    2007           1 :       case 'P':
    2008           1 :         encodeUTF8(0x2029, Storage);
    2009           1 :         break;
    2010             :       case 'x': {
    2011           3 :           if (UnquotedValue.size() < 3)
    2012             :             // TODO: Report error.
    2013             :             break;
    2014             :           unsigned int UnicodeScalarValue;
    2015           6 :           if (UnquotedValue.substr(1, 2).getAsInteger(16, UnicodeScalarValue))
    2016             :             // TODO: Report error.
    2017             :             UnicodeScalarValue = 0xFFFD;
    2018           3 :           encodeUTF8(UnicodeScalarValue, Storage);
    2019           3 :           UnquotedValue = UnquotedValue.substr(2);
    2020           3 :           break;
    2021             :         }
    2022             :       case 'u': {
    2023           2 :           if (UnquotedValue.size() < 5)
    2024             :             // TODO: Report error.
    2025             :             break;
    2026             :           unsigned int UnicodeScalarValue;
    2027           4 :           if (UnquotedValue.substr(1, 4).getAsInteger(16, UnicodeScalarValue))
    2028             :             // TODO: Report error.
    2029             :             UnicodeScalarValue = 0xFFFD;
    2030           2 :           encodeUTF8(UnicodeScalarValue, Storage);
    2031           2 :           UnquotedValue = UnquotedValue.substr(4);
    2032           2 :           break;
    2033             :         }
    2034             :       case 'U': {
    2035           1 :           if (UnquotedValue.size() < 9)
    2036             :             // TODO: Report error.
    2037             :             break;
    2038             :           unsigned int UnicodeScalarValue;
    2039           2 :           if (UnquotedValue.substr(1, 8).getAsInteger(16, UnicodeScalarValue))
    2040             :             // TODO: Report error.
    2041             :             UnicodeScalarValue = 0xFFFD;
    2042           1 :           encodeUTF8(UnicodeScalarValue, Storage);
    2043           1 :           UnquotedValue = UnquotedValue.substr(8);
    2044           1 :           break;
    2045             :         }
    2046             :       }
    2047          98 :       UnquotedValue = UnquotedValue.substr(1);
    2048             :     }
    2049             :   }
    2050          80 :   Storage.insert(Storage.end(), UnquotedValue.begin(), UnquotedValue.end());
    2051          80 :   return StringRef(Storage.begin(), Storage.size());
    2052             : }
    2053             : 
    2054     1010695 : Node *KeyValueNode::getKey() {
    2055     1010695 :   if (Key)
    2056             :     return Key;
    2057             :   // Handle implicit null keys.
    2058             :   {
    2059      299006 :     Token &t = peekNext();
    2060      598012 :     if (   t.Kind == Token::TK_BlockEnd
    2061      299006 :         || t.Kind == Token::TK_Value
    2062      299006 :         || t.Kind == Token::TK_Error) {
    2063           0 :       return Key = new (getAllocator()) NullNode(Doc);
    2064             :     }
    2065      299006 :     if (t.Kind == Token::TK_Key)
    2066      597978 :       getNext(); // skip TK_Key.
    2067             :   }
    2068             : 
    2069             :   // Handle explicit null keys.
    2070      299006 :   Token &t = peekNext();
    2071      299006 :   if (t.Kind == Token::TK_BlockEnd || t.Kind == Token::TK_Value) {
    2072           6 :     return Key = new (getAllocator()) NullNode(Doc);
    2073             :   }
    2074             : 
    2075             :   // We've got a normal key.
    2076      299003 :   return Key = parseBlockNode();
    2077             : }
    2078             : 
    2079      710458 : Node *KeyValueNode::getValue() {
    2080      710458 :   if (Value)
    2081             :     return Value;
    2082      298992 :   getKey()->skip();
    2083      298992 :   if (failed())
    2084           2 :     return Value = new (getAllocator()) NullNode(Doc);
    2085             : 
    2086             :   // Handle implicit null values.
    2087             :   {
    2088      298991 :     Token &t = peekNext();
    2089      597982 :     if (   t.Kind == Token::TK_BlockEnd
    2090      298991 :         || t.Kind == Token::TK_FlowMappingEnd
    2091      298981 :         || t.Kind == Token::TK_Key
    2092      298976 :         || t.Kind == Token::TK_FlowEntry
    2093      298971 :         || t.Kind == Token::TK_Error) {
    2094          40 :       return Value = new (getAllocator()) NullNode(Doc);
    2095             :     }
    2096             : 
    2097      298971 :     if (t.Kind != Token::TK_Value) {
    2098           3 :       setError("Unexpected token in Key Value.", t);
    2099           6 :       return Value = new (getAllocator()) NullNode(Doc);
    2100             :     }
    2101      298968 :     getNext(); // skip TK_Value.
    2102             :   }
    2103             : 
    2104             :   // Handle explicit null values.
    2105      298968 :   Token &t = peekNext();
    2106      298968 :   if (t.Kind == Token::TK_BlockEnd || t.Kind == Token::TK_Key) {
    2107        5118 :     return Value = new (getAllocator()) NullNode(Doc);
    2108             :   }
    2109             : 
    2110             :   // We got a normal value.
    2111      296409 :   return Value = parseBlockNode();
    2112             : }
    2113             : 
    2114      501991 : void MappingNode::increment() {
    2115      501991 :   if (failed()) {
    2116          10 :     IsAtEnd = true;
    2117          10 :     CurrentEntry = nullptr;
    2118      112057 :     return;
    2119             :   }
    2120      501981 :   if (CurrentEntry) {
    2121      410954 :     CurrentEntry->skip();
    2122      410954 :     if (Type == MT_Inline) {
    2123          12 :       IsAtEnd = true;
    2124          12 :       CurrentEntry = nullptr;
    2125          12 :       return;
    2126             :     }
    2127             :   }
    2128      501969 :   Token T = peekNext();
    2129      501969 :   if (T.Kind == Token::TK_Key || T.Kind == Token::TK_Scalar) {
    2130             :     // KeyValueNode eats the TK_Key. That way it can detect null keys.
    2131      598012 :     CurrentEntry = new (getAllocator()) KeyValueNode(Doc);
    2132      202963 :   } else if (Type == MT_Block) {
    2133       31564 :     switch (T.Kind) {
    2134       31564 :     case Token::TK_BlockEnd:
    2135       31564 :       getNext();
    2136       31564 :       IsAtEnd = true;
    2137       31564 :       CurrentEntry = nullptr;
    2138       31564 :       break;
    2139           0 :     default:
    2140           0 :       setError("Unexpected token. Expected Key or Block End", T);
    2141             :       LLVM_FALLTHROUGH;
    2142           0 :     case Token::TK_Error:
    2143           0 :       IsAtEnd = true;
    2144           0 :       CurrentEntry = nullptr;
    2145             :     }
    2146             :   } else {
    2147      171399 :     switch (T.Kind) {
    2148      112035 :     case Token::TK_FlowEntry:
    2149             :       // Eat the flow entry and recurse.
    2150      112035 :       getNext();
    2151      112035 :       return increment();
    2152       59357 :     case Token::TK_FlowMappingEnd:
    2153      118714 :       getNext();
    2154             :       LLVM_FALLTHROUGH;
    2155       59357 :     case Token::TK_Error:
    2156             :       // Set this to end iterator.
    2157       59357 :       IsAtEnd = true;
    2158       59357 :       CurrentEntry = nullptr;
    2159       59357 :       break;
    2160           7 :     default:
    2161           7 :       setError( "Unexpected token. Expected Key, Flow Entry, or Flow "
    2162             :                 "Mapping End."
    2163             :               , T);
    2164           7 :       IsAtEnd = true;
    2165           7 :       CurrentEntry = nullptr;
    2166             :     }
    2167             :   }
    2168             : }
    2169             : 
    2170      158918 : void SequenceNode::increment() {
    2171      158918 :   if (failed()) {
    2172           6 :     IsAtEnd = true;
    2173           6 :     CurrentEntry = nullptr;
    2174       52820 :     return;
    2175             :   }
    2176      158912 :   if (CurrentEntry)
    2177      145006 :     CurrentEntry->skip();
    2178      158912 :   Token T = peekNext();
    2179      158912 :   if (SeqType == ST_Block) {
    2180       43002 :     switch (T.Kind) {
    2181       34590 :     case Token::TK_BlockEntry:
    2182       34590 :       getNext();
    2183       34590 :       CurrentEntry = parseBlockNode();
    2184       34590 :       if (!CurrentEntry) { // An error occurred.
    2185           1 :         IsAtEnd = true;
    2186           1 :         CurrentEntry = nullptr;
    2187             :       }
    2188             :       break;
    2189        8412 :     case Token::TK_BlockEnd:
    2190        8412 :       getNext();
    2191        8412 :       IsAtEnd = true;
    2192        8412 :       CurrentEntry = nullptr;
    2193        8412 :       break;
    2194           0 :     default:
    2195           0 :       setError( "Unexpected token. Expected Block Entry or Block End."
    2196             :               , T);
    2197             :       LLVM_FALLTHROUGH;
    2198           0 :     case Token::TK_Error:
    2199           0 :       IsAtEnd = true;
    2200           0 :       CurrentEntry = nullptr;
    2201             :     }
    2202      115910 :   } else if (SeqType == ST_Indentless) {
    2203         118 :     switch (T.Kind) {
    2204          68 :     case Token::TK_BlockEntry:
    2205          68 :       getNext();
    2206          68 :       CurrentEntry = parseBlockNode();
    2207          68 :       if (!CurrentEntry) { // An error occurred.
    2208           0 :         IsAtEnd = true;
    2209           0 :         CurrentEntry = nullptr;
    2210             :       }
    2211             :       break;
    2212          50 :     default:
    2213             :     case Token::TK_Error:
    2214          50 :       IsAtEnd = true;
    2215          50 :       CurrentEntry = nullptr;
    2216             :     }
    2217      115792 :   } else if (SeqType == ST_Flow) {
    2218      115792 :     switch (T.Kind) {
    2219       52814 :     case Token::TK_FlowEntry:
    2220             :       // Eat the flow entry and recurse.
    2221       52814 :       getNext();
    2222       52814 :       WasPreviousTokenFlowEntry = true;
    2223       52814 :       return increment();
    2224        5376 :     case Token::TK_FlowSequenceEnd:
    2225       10752 :       getNext();
    2226             :       LLVM_FALLTHROUGH;
    2227        5376 :     case Token::TK_Error:
    2228             :       // Set this to end iterator.
    2229        5376 :       IsAtEnd = true;
    2230        5376 :       CurrentEntry = nullptr;
    2231        5376 :       break;
    2232           7 :     case Token::TK_StreamEnd:
    2233             :     case Token::TK_DocumentEnd:
    2234             :     case Token::TK_DocumentStart:
    2235           7 :       setError("Could not find closing ]!", T);
    2236             :       // Set this to end iterator.
    2237           7 :       IsAtEnd = true;
    2238           7 :       CurrentEntry = nullptr;
    2239           7 :       break;
    2240       57595 :     default:
    2241       57595 :       if (!WasPreviousTokenFlowEntry) {
    2242           2 :         setError("Expected , between entries!", T);
    2243           2 :         IsAtEnd = true;
    2244           2 :         CurrentEntry = nullptr;
    2245           2 :         break;
    2246             :       }
    2247             :       // Otherwise it must be a flow entry.
    2248       57593 :       CurrentEntry = parseBlockNode();
    2249       57593 :       if (!CurrentEntry) {
    2250           0 :         IsAtEnd = true;
    2251             :       }
    2252       57593 :       WasPreviousTokenFlowEntry = false;
    2253       57593 :       break;
    2254             :     }
    2255             :   }
    2256             : }
    2257             : 
    2258       19102 : Document::Document(Stream &S) : stream(S), Root(nullptr) {
    2259             :   // Tag maps starts with two default mappings.
    2260        9551 :   TagMap["!"] = "!";
    2261        9551 :   TagMap["!!"] = "tag:yaml.org,2002:";
    2262             : 
    2263        9551 :   if (parseDirectives())
    2264          13 :     expectToken(Token::TK_DocumentStart);
    2265        9551 :   Token &T = peekNext();
    2266        9551 :   if (T.Kind == Token::TK_DocumentStart)
    2267       14518 :     getNext();
    2268        9551 : }
    2269             : 
    2270       13914 : bool Document::skip()  {
    2271       27828 :   if (stream.scanner->failed())
    2272             :     return false;
    2273       13868 :   if (!Root)
    2274             :     getRoot();
    2275       13868 :   Root->skip();
    2276       13868 :   Token &T = peekNext();
    2277       13868 :   if (T.Kind == Token::TK_StreamEnd)
    2278             :     return false;
    2279       11296 :   if (T.Kind == Token::TK_DocumentEnd) {
    2280        6215 :     getNext();
    2281        6215 :     return skip();
    2282             :   }
    2283             :   return true;
    2284             : }
    2285             : 
    2286     3587947 : Token &Document::peekNext() {
    2287     7175894 :   return stream.scanner->peekNext();
    2288             : }
    2289             : 
    2290     1614349 : Token Document::getNext() {
    2291     3228698 :   return stream.scanner->getNext();
    2292             : }
    2293             : 
    2294          24 : void Document::setError(const Twine &Message, Token &Location) const {
    2295          48 :   stream.scanner->setError(Message, Location.Range.begin());
    2296          24 : }
    2297             : 
    2298      959901 : bool Document::failed() const {
    2299     1919802 :   return stream.scanner->failed();
    2300             : }
    2301             : 
    2302      697204 : Node *Document::parseBlockNode() {
    2303      697204 :   Token T = peekNext();
    2304             :   // Handle properties.
    2305             :   Token AnchorInfo;
    2306             :   Token TagInfo;
    2307             : parse_property:
    2308      699325 :   switch (T.Kind) {
    2309          16 :   case Token::TK_Alias:
    2310          32 :     getNext();
    2311          16 :     return new (NodeAllocator) AliasNode(stream.CurrentDoc, T.Range.substr(1));
    2312          13 :   case Token::TK_Anchor:
    2313          13 :     if (AnchorInfo.Kind == Token::TK_Anchor) {
    2314           0 :       setError("Already encountered an anchor for this node!", T);
    2315           0 :       return nullptr;
    2316             :     }
    2317          13 :     AnchorInfo = getNext(); // Consume TK_Anchor.
    2318          13 :     T = peekNext();
    2319          13 :     goto parse_property;
    2320        2108 :   case Token::TK_Tag:
    2321        2108 :     if (TagInfo.Kind == Token::TK_Tag) {
    2322           0 :       setError("Already encountered a tag for this node!", T);
    2323           0 :       return nullptr;
    2324             :     }
    2325        2108 :     TagInfo = getNext(); // Consume TK_Tag.
    2326        2108 :     T = peekNext();
    2327        2108 :     goto parse_property;
    2328             :   default:
    2329             :     break;
    2330             :   }
    2331             : 
    2332      697188 :   switch (T.Kind) {
    2333          50 :   case Token::TK_BlockEntry:
    2334             :     // We got an unindented BlockEntry sequence. This is not terminated with
    2335             :     // a BlockEnd.
    2336             :     // Don't eat the TK_BlockEntry, SequenceNode needs it.
    2337          50 :     return new (NodeAllocator) SequenceNode( stream.CurrentDoc
    2338          50 :                                            , AnchorInfo.Range.substr(1)
    2339          50 :                                            , TagInfo.Range
    2340          50 :                                            , SequenceNode::ST_Indentless);
    2341        8419 :   case Token::TK_BlockSequenceStart:
    2342       16838 :     getNext();
    2343        8419 :     return new (NodeAllocator)
    2344        8419 :       SequenceNode( stream.CurrentDoc
    2345        8419 :                   , AnchorInfo.Range.substr(1)
    2346        8419 :                   , TagInfo.Range
    2347        8419 :                   , SequenceNode::ST_Block);
    2348       31594 :   case Token::TK_BlockMappingStart:
    2349       63188 :     getNext();
    2350       31594 :     return new (NodeAllocator)
    2351       31594 :       MappingNode( stream.CurrentDoc
    2352       31594 :                  , AnchorInfo.Range.substr(1)
    2353       31594 :                  , TagInfo.Range
    2354       31594 :                  , MappingNode::MT_Block);
    2355        5447 :   case Token::TK_FlowSequenceStart:
    2356       10894 :     getNext();
    2357        5447 :     return new (NodeAllocator)
    2358        5447 :       SequenceNode( stream.CurrentDoc
    2359        5447 :                   , AnchorInfo.Range.substr(1)
    2360        5447 :                   , TagInfo.Range
    2361        5447 :                   , SequenceNode::ST_Flow);
    2362       59430 :   case Token::TK_FlowMappingStart:
    2363      118860 :     getNext();
    2364       59430 :     return new (NodeAllocator)
    2365       59430 :       MappingNode( stream.CurrentDoc
    2366       59430 :                  , AnchorInfo.Range.substr(1)
    2367       59430 :                  , TagInfo.Range
    2368       59430 :                  , MappingNode::MT_Flow);
    2369      586096 :   case Token::TK_Scalar:
    2370     1172192 :     getNext();
    2371      586096 :     return new (NodeAllocator)
    2372      586096 :       ScalarNode( stream.CurrentDoc
    2373      586096 :                 , AnchorInfo.Range.substr(1)
    2374      586096 :                 , TagInfo.Range
    2375     1172192 :                 , T.Range);
    2376        5548 :   case Token::TK_BlockScalar: {
    2377        5548 :     getNext();
    2378        5548 :     StringRef NullTerminatedStr(T.Value.c_str(), T.Value.length() + 1);
    2379       11096 :     StringRef StrCopy = NullTerminatedStr.copy(NodeAllocator).drop_back();
    2380             :     return new (NodeAllocator)
    2381        5548 :         BlockScalarNode(stream.CurrentDoc, AnchorInfo.Range.substr(1),
    2382       11096 :                         TagInfo.Range, StrCopy, T.Range);
    2383             :   }
    2384          13 :   case Token::TK_Key:
    2385             :     // Don't eat the TK_Key, KeyValueNode expects it.
    2386          13 :     return new (NodeAllocator)
    2387          13 :       MappingNode( stream.CurrentDoc
    2388          13 :                  , AnchorInfo.Range.substr(1)
    2389          13 :                  , TagInfo.Range
    2390          13 :                  , MappingNode::MT_Inline);
    2391         577 :   case Token::TK_DocumentStart:
    2392             :   case Token::TK_DocumentEnd:
    2393             :   case Token::TK_StreamEnd:
    2394             :   default:
    2395             :     // TODO: Properly handle tags. "[!!str ]" should resolve to !!str "", not
    2396             :     //       !!null null.
    2397         577 :     return new (NodeAllocator) NullNode(stream.CurrentDoc);
    2398             :   case Token::TK_Error:
    2399             :     return nullptr;
    2400             :   }
    2401             :   llvm_unreachable("Control flow shouldn't reach here.");
    2402             :   return nullptr;
    2403             : }
    2404             : 
    2405        9551 : bool Document::parseDirectives() {
    2406             :   bool isDirective = false;
    2407             :   while (true) {
    2408        9569 :     Token T = peekNext();
    2409        9569 :     if (T.Kind == Token::TK_TagDirective) {
    2410          13 :       parseTAGDirective();
    2411             :       isDirective = true;
    2412        9556 :     } else if (T.Kind == Token::TK_VersionDirective) {
    2413           5 :       parseYAMLDirective();
    2414             :       isDirective = true;
    2415             :     } else
    2416             :       break;
    2417             :   }
    2418        9551 :   return isDirective;
    2419             : }
    2420             : 
    2421           5 : void Document::parseYAMLDirective() {
    2422           5 :   getNext(); // Eat %YAML <version>
    2423           5 : }
    2424             : 
    2425          13 : void Document::parseTAGDirective() {
    2426          13 :   Token Tag = getNext(); // %TAG <handle> <prefix>
    2427          13 :   StringRef T = Tag.Range;
    2428             :   // Strip %TAG
    2429          39 :   T = T.substr(T.find_first_of(" \t")).ltrim(" \t");
    2430          13 :   std::size_t HandleEnd = T.find_first_of(" \t");
    2431          13 :   StringRef TagHandle = T.substr(0, HandleEnd);
    2432          26 :   StringRef TagPrefix = T.substr(HandleEnd).ltrim(" \t");
    2433          13 :   TagMap[TagHandle] = TagPrefix;
    2434          13 : }
    2435             : 
    2436          13 : bool Document::expectToken(int TK) {
    2437          13 :   Token T = getNext();
    2438          13 :   if (T.Kind != TK) {
    2439           2 :     setError("Unexpected token", T);
    2440           2 :     return false;
    2441             :   }
    2442             :   return true;
    2443             : }

Generated by: LCOV version 1.13