LLVM  16.0.0git
YAMLParser.cpp
Go to the documentation of this file.
1 //===- YAMLParser.cpp - Simple YAML parser --------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements a YAML parser.
10 //
11 //===----------------------------------------------------------------------===//
12 
14 #include "llvm/ADT/AllocatorList.h"
15 #include "llvm/ADT/ArrayRef.h"
16 #include "llvm/ADT/None.h"
17 #include "llvm/ADT/STLExtras.h"
18 #include "llvm/ADT/SmallString.h"
19 #include "llvm/ADT/SmallVector.h"
20 #include "llvm/ADT/StringExtras.h"
21 #include "llvm/ADT/StringRef.h"
22 #include "llvm/ADT/Twine.h"
23 #include "llvm/Support/Compiler.h"
26 #include "llvm/Support/SMLoc.h"
27 #include "llvm/Support/SourceMgr.h"
28 #include "llvm/Support/Unicode.h"
30 #include <cassert>
31 #include <cstddef>
32 #include <cstdint>
33 #include <map>
34 #include <memory>
35 #include <string>
36 #include <system_error>
37 #include <utility>
38 
39 using namespace llvm;
40 using namespace yaml;
41 
43  UEF_UTF32_LE, ///< UTF-32 Little Endian
44  UEF_UTF32_BE, ///< UTF-32 Big Endian
45  UEF_UTF16_LE, ///< UTF-16 Little Endian
46  UEF_UTF16_BE, ///< UTF-16 Big Endian
47  UEF_UTF8, ///< UTF-8 or ascii.
48  UEF_Unknown ///< Not a valid Unicode encoding.
49 };
50 
51 /// EncodingInfo - Holds the encoding type and length of the byte order mark if
52 /// it exists. Length is in {0, 2, 3, 4}.
53 using EncodingInfo = std::pair<UnicodeEncodingForm, unsigned>;
54 
55 /// getUnicodeEncoding - Reads up to the first 4 bytes to determine the Unicode
56 /// encoding form of \a Input.
57 ///
58 /// @param Input A string of length 0 or more.
59 /// @returns An EncodingInfo indicating the Unicode encoding form of the input
60 /// and how long the byte order mark is if one exists.
62  if (Input.empty())
63  return std::make_pair(UEF_Unknown, 0);
64 
65  switch (uint8_t(Input[0])) {
66  case 0x00:
67  if (Input.size() >= 4) {
68  if ( Input[1] == 0
69  && uint8_t(Input[2]) == 0xFE
70  && uint8_t(Input[3]) == 0xFF)
71  return std::make_pair(UEF_UTF32_BE, 4);
72  if (Input[1] == 0 && Input[2] == 0 && Input[3] != 0)
73  return std::make_pair(UEF_UTF32_BE, 0);
74  }
75 
76  if (Input.size() >= 2 && Input[1] != 0)
77  return std::make_pair(UEF_UTF16_BE, 0);
78  return std::make_pair(UEF_Unknown, 0);
79  case 0xFF:
80  if ( Input.size() >= 4
81  && uint8_t(Input[1]) == 0xFE
82  && Input[2] == 0
83  && Input[3] == 0)
84  return std::make_pair(UEF_UTF32_LE, 4);
85 
86  if (Input.size() >= 2 && uint8_t(Input[1]) == 0xFE)
87  return std::make_pair(UEF_UTF16_LE, 2);
88  return std::make_pair(UEF_Unknown, 0);
89  case 0xFE:
90  if (Input.size() >= 2 && uint8_t(Input[1]) == 0xFF)
91  return std::make_pair(UEF_UTF16_BE, 2);
92  return std::make_pair(UEF_Unknown, 0);
93  case 0xEF:
94  if ( Input.size() >= 3
95  && uint8_t(Input[1]) == 0xBB
96  && uint8_t(Input[2]) == 0xBF)
97  return std::make_pair(UEF_UTF8, 3);
98  return std::make_pair(UEF_Unknown, 0);
99  }
100 
101  // It could still be utf-32 or utf-16.
102  if (Input.size() >= 4 && Input[1] == 0 && Input[2] == 0 && Input[3] == 0)
103  return std::make_pair(UEF_UTF32_LE, 0);
104 
105  if (Input.size() >= 2 && Input[1] == 0)
106  return std::make_pair(UEF_UTF16_LE, 0);
107 
108  return std::make_pair(UEF_UTF8, 0);
109 }
110 
111 /// Pin the vtables to this file.
112 void Node::anchor() {}
113 void NullNode::anchor() {}
114 void ScalarNode::anchor() {}
115 void BlockScalarNode::anchor() {}
116 void KeyValueNode::anchor() {}
117 void MappingNode::anchor() {}
118 void SequenceNode::anchor() {}
119 void AliasNode::anchor() {}
120 
121 namespace llvm {
122 namespace yaml {
123 
124 /// Token - A single YAML token.
125 struct Token {
126  enum TokenKind {
127  TK_Error, // Uninitialized token.
149  TK_Tag
150  } Kind = TK_Error;
151 
152  /// A string of length 0 or more whose begin() points to the logical location
153  /// of the token in the input.
155 
156  /// The value of a block scalar node.
157  std::string Value;
158 
159  Token() = default;
160 };
161 
162 } // end namespace yaml
163 } // end namespace llvm
164 
166 
167 namespace {
168 
169 /// This struct is used to track simple keys.
170 ///
171 /// Simple keys are handled by creating an entry in SimpleKeys for each Token
172 /// which could legally be the start of a simple key. When peekNext is called,
173 /// if the Token To be returned is referenced by a SimpleKey, we continue
174 /// tokenizing until that potential simple key has either been found to not be
175 /// a simple key (we moved on to the next line or went further than 1024 chars).
176 /// Or when we run into a Value, and then insert a Key token (and possibly
177 /// others) before the SimpleKey's Tok.
178 struct SimpleKey {
180  unsigned Column = 0;
181  unsigned Line = 0;
182  unsigned FlowLevel = 0;
183  bool IsRequired = false;
184 
185  bool operator ==(const SimpleKey &Other) {
186  return Tok == Other.Tok;
187  }
188 };
189 
190 } // end anonymous namespace
191 
192 /// The Unicode scalar value of a UTF-8 minimal well-formed code unit
193 /// subsequence and the subsequence's length in code units (uint8_t).
194 /// A length of 0 represents an error.
195 using UTF8Decoded = std::pair<uint32_t, unsigned>;
196 
198  StringRef::iterator Position= Range.begin();
199  StringRef::iterator End = Range.end();
200  // 1 byte: [0x00, 0x7f]
201  // Bit pattern: 0xxxxxxx
202  if (Position < End && (*Position & 0x80) == 0) {
203  return std::make_pair(*Position, 1);
204  }
205  // 2 bytes: [0x80, 0x7ff]
206  // Bit pattern: 110xxxxx 10xxxxxx
207  if (Position + 1 < End && ((*Position & 0xE0) == 0xC0) &&
208  ((*(Position + 1) & 0xC0) == 0x80)) {
209  uint32_t codepoint = ((*Position & 0x1F) << 6) |
210  (*(Position + 1) & 0x3F);
211  if (codepoint >= 0x80)
212  return std::make_pair(codepoint, 2);
213  }
214  // 3 bytes: [0x8000, 0xffff]
215  // Bit pattern: 1110xxxx 10xxxxxx 10xxxxxx
216  if (Position + 2 < End && ((*Position & 0xF0) == 0xE0) &&
217  ((*(Position + 1) & 0xC0) == 0x80) &&
218  ((*(Position + 2) & 0xC0) == 0x80)) {
219  uint32_t codepoint = ((*Position & 0x0F) << 12) |
220  ((*(Position + 1) & 0x3F) << 6) |
221  (*(Position + 2) & 0x3F);
222  // Codepoints between 0xD800 and 0xDFFF are invalid, as
223  // they are high / low surrogate halves used by UTF-16.
224  if (codepoint >= 0x800 &&
225  (codepoint < 0xD800 || codepoint > 0xDFFF))
226  return std::make_pair(codepoint, 3);
227  }
228  // 4 bytes: [0x10000, 0x10FFFF]
229  // Bit pattern: 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx
230  if (Position + 3 < End && ((*Position & 0xF8) == 0xF0) &&
231  ((*(Position + 1) & 0xC0) == 0x80) &&
232  ((*(Position + 2) & 0xC0) == 0x80) &&
233  ((*(Position + 3) & 0xC0) == 0x80)) {
234  uint32_t codepoint = ((*Position & 0x07) << 18) |
235  ((*(Position + 1) & 0x3F) << 12) |
236  ((*(Position + 2) & 0x3F) << 6) |
237  (*(Position + 3) & 0x3F);
238  if (codepoint >= 0x10000 && codepoint <= 0x10FFFF)
239  return std::make_pair(codepoint, 4);
240  }
241  return std::make_pair(0, 0);
242 }
243 
244 namespace llvm {
245 namespace yaml {
246 
247 /// Scans YAML tokens from a MemoryBuffer.
248 class Scanner {
249 public:
250  Scanner(StringRef Input, SourceMgr &SM, bool ShowColors = true,
251  std::error_code *EC = nullptr);
252  Scanner(MemoryBufferRef Buffer, SourceMgr &SM_, bool ShowColors = true,
253  std::error_code *EC = nullptr);
254 
255  /// Parse the next token and return it without popping it.
256  Token &peekNext();
257 
258  /// Parse the next token and pop it from the queue.
259  Token getNext();
260 
261  void printError(SMLoc Loc, SourceMgr::DiagKind Kind, const Twine &Message,
262  ArrayRef<SMRange> Ranges = None) {
263  SM.PrintMessage(Loc, Kind, Message, Ranges, /* FixIts= */ None, ShowColors);
264  }
265 
266  void setError(const Twine &Message, StringRef::iterator Position) {
267  if (Position >= End)
268  Position = End - 1;
269 
270  // propagate the error if possible
271  if (EC)
272  *EC = make_error_code(std::errc::invalid_argument);
273 
274  // Don't print out more errors after the first one we encounter. The rest
275  // are just the result of the first, and have no meaning.
276  if (!Failed)
277  printError(SMLoc::getFromPointer(Position), SourceMgr::DK_Error, Message);
278  Failed = true;
279  }
280 
281  /// Returns true if an error occurred while parsing.
282  bool failed() {
283  return Failed;
284  }
285 
286 private:
287  void init(MemoryBufferRef Buffer);
288 
289  StringRef currentInput() {
290  return StringRef(Current, End - Current);
291  }
292 
293  /// Decode a UTF-8 minimal well-formed code unit subsequence starting
294  /// at \a Position.
295  ///
296  /// If the UTF-8 code units starting at Position do not form a well-formed
297  /// code unit subsequence, then the Unicode scalar value is 0, and the length
298  /// is 0.
300  return ::decodeUTF8(StringRef(Position, End - Position));
301  }
302 
303  // The following functions are based on the gramar rules in the YAML spec. The
304  // style of the function names it meant to closely match how they are written
305  // in the spec. The number within the [] is the number of the grammar rule in
306  // the spec.
307  //
308  // See 4.2 [Production Naming Conventions] for the meaning of the prefixes.
309  //
310  // c-
311  // A production starting and ending with a special character.
312  // b-
313  // A production matching a single line break.
314  // nb-
315  // A production starting and ending with a non-break character.
316  // s-
317  // A production starting and ending with a white space character.
318  // ns-
319  // A production starting and ending with a non-space character.
320  // l-
321  // A production matching complete line(s).
322 
323  /// Skip a single nb-char[27] starting at Position.
324  ///
325  /// A nb-char is 0x9 | [0x20-0x7E] | 0x85 | [0xA0-0xD7FF] | [0xE000-0xFEFE]
326  /// | [0xFF00-0xFFFD] | [0x10000-0x10FFFF]
327  ///
328  /// @returns The code unit after the nb-char, or Position if it's not an
329  /// nb-char.
330  StringRef::iterator skip_nb_char(StringRef::iterator Position);
331 
332  /// Skip a single b-break[28] starting at Position.
333  ///
334  /// A b-break is 0xD 0xA | 0xD | 0xA
335  ///
336  /// @returns The code unit after the b-break, or Position if it's not a
337  /// b-break.
338  StringRef::iterator skip_b_break(StringRef::iterator Position);
339 
340  /// Skip a single s-space[31] starting at Position.
341  ///
342  /// An s-space is 0x20
343  ///
344  /// @returns The code unit after the s-space, or Position if it's not a
345  /// s-space.
346  StringRef::iterator skip_s_space(StringRef::iterator Position);
347 
348  /// Skip a single s-white[33] starting at Position.
349  ///
350  /// A s-white is 0x20 | 0x9
351  ///
352  /// @returns The code unit after the s-white, or Position if it's not a
353  /// s-white.
354  StringRef::iterator skip_s_white(StringRef::iterator Position);
355 
356  /// Skip a single ns-char[34] starting at Position.
357  ///
358  /// A ns-char is nb-char - s-white
359  ///
360  /// @returns The code unit after the ns-char, or Position if it's not a
361  /// ns-char.
362  StringRef::iterator skip_ns_char(StringRef::iterator Position);
363 
364  using SkipWhileFunc = StringRef::iterator (Scanner::*)(StringRef::iterator);
365 
366  /// Skip minimal well-formed code unit subsequences until Func
367  /// returns its input.
368  ///
369  /// @returns The code unit after the last minimal well-formed code unit
370  /// subsequence that Func accepted.
371  StringRef::iterator skip_while( SkipWhileFunc Func
372  , StringRef::iterator Position);
373 
374  /// Skip minimal well-formed code unit subsequences until Func returns its
375  /// input.
376  void advanceWhile(SkipWhileFunc Func);
377 
378  /// Scan ns-uri-char[39]s starting at Cur.
379  ///
380  /// This updates Cur and Column while scanning.
381  void scan_ns_uri_char();
382 
383  /// Consume a minimal well-formed code unit subsequence starting at
384  /// \a Cur. Return false if it is not the same Unicode scalar value as
385  /// \a Expected. This updates \a Column.
386  bool consume(uint32_t Expected);
387 
388  /// Skip \a Distance UTF-8 code units. Updates \a Cur and \a Column.
389  void skip(uint32_t Distance);
390 
391  /// Return true if the minimal well-formed code unit subsequence at
392  /// Pos is whitespace or a new line
393  bool isBlankOrBreak(StringRef::iterator Position);
394 
395  /// Return true if the line is a line break, false otherwise.
396  bool isLineEmpty(StringRef Line);
397 
398  /// Consume a single b-break[28] if it's present at the current position.
399  ///
400  /// Return false if the code unit at the current position isn't a line break.
401  bool consumeLineBreakIfPresent();
402 
403  /// If IsSimpleKeyAllowed, create and push_back a new SimpleKey.
404  void saveSimpleKeyCandidate( TokenQueueT::iterator Tok
405  , unsigned AtColumn
406  , bool IsRequired);
407 
408  /// Remove simple keys that can no longer be valid simple keys.
409  ///
410  /// Invalid simple keys are not on the current line or are further than 1024
411  /// columns back.
412  void removeStaleSimpleKeyCandidates();
413 
414  /// Remove all simple keys on FlowLevel \a Level.
415  void removeSimpleKeyCandidatesOnFlowLevel(unsigned Level);
416 
417  /// Unroll indentation in \a Indents back to \a Col. Creates BlockEnd
418  /// tokens if needed.
419  bool unrollIndent(int ToColumn);
420 
421  /// Increase indent to \a Col. Creates \a Kind token at \a InsertPoint
422  /// if needed.
423  bool rollIndent( int ToColumn
424  , Token::TokenKind Kind
425  , TokenQueueT::iterator InsertPoint);
426 
427  /// Skip a single-line comment when the comment starts at the current
428  /// position of the scanner.
429  void skipComment();
430 
431  /// Skip whitespace and comments until the start of the next token.
432  void scanToNextToken();
433 
434  /// Must be the first token generated.
435  bool scanStreamStart();
436 
437  /// Generate tokens needed to close out the stream.
438  bool scanStreamEnd();
439 
440  /// Scan a %BLAH directive.
441  bool scanDirective();
442 
443  /// Scan a ... or ---.
444  bool scanDocumentIndicator(bool IsStart);
445 
446  /// Scan a [ or { and generate the proper flow collection start token.
447  bool scanFlowCollectionStart(bool IsSequence);
448 
449  /// Scan a ] or } and generate the proper flow collection end token.
450  bool scanFlowCollectionEnd(bool IsSequence);
451 
452  /// Scan the , that separates entries in a flow collection.
453  bool scanFlowEntry();
454 
455  /// Scan the - that starts block sequence entries.
456  bool scanBlockEntry();
457 
458  /// Scan an explicit ? indicating a key.
459  bool scanKey();
460 
461  /// Scan an explicit : indicating a value.
462  bool scanValue();
463 
464  /// Scan a quoted scalar.
465  bool scanFlowScalar(bool IsDoubleQuoted);
466 
467  /// Scan an unquoted scalar.
468  bool scanPlainScalar();
469 
470  /// Scan an Alias or Anchor starting with * or &.
471  bool scanAliasOrAnchor(bool IsAlias);
472 
473  /// Scan a block scalar starting with | or >.
474  bool scanBlockScalar(bool IsLiteral);
475 
476  /// Scan a block scalar style indicator and header.
477  ///
478  /// Note: This is distinct from scanBlockScalarHeader to mirror the fact that
479  /// YAML does not consider the style indicator to be a part of the header.
480  ///
481  /// Return false if an error occurred.
482  bool scanBlockScalarIndicators(char &StyleIndicator, char &ChompingIndicator,
483  unsigned &IndentIndicator, bool &IsDone);
484 
485  /// Scan a style indicator in a block scalar header.
486  char scanBlockStyleIndicator();
487 
488  /// Scan a chomping indicator in a block scalar header.
489  char scanBlockChompingIndicator();
490 
491  /// Scan an indentation indicator in a block scalar header.
492  unsigned scanBlockIndentationIndicator();
493 
494  /// Scan a block scalar header.
495  ///
496  /// Return false if an error occurred.
497  bool scanBlockScalarHeader(char &ChompingIndicator, unsigned &IndentIndicator,
498  bool &IsDone);
499 
500  /// Look for the indentation level of a block scalar.
501  ///
502  /// Return false if an error occurred.
503  bool findBlockScalarIndent(unsigned &BlockIndent, unsigned BlockExitIndent,
504  unsigned &LineBreaks, bool &IsDone);
505 
506  /// Scan the indentation of a text line in a block scalar.
507  ///
508  /// Return false if an error occurred.
509  bool scanBlockScalarIndent(unsigned BlockIndent, unsigned BlockExitIndent,
510  bool &IsDone);
511 
512  /// Scan a tag of the form !stuff.
513  bool scanTag();
514 
515  /// Dispatch to the next scanning function based on \a *Cur.
516  bool fetchMoreTokens();
517 
518  /// The SourceMgr used for diagnostics and buffer management.
519  SourceMgr &SM;
520 
521  /// The original input.
522  MemoryBufferRef InputBuffer;
523 
524  /// The current position of the scanner.
525  StringRef::iterator Current;
526 
527  /// The end of the input (one past the last character).
529 
530  /// Current YAML indentation level in spaces.
531  int Indent;
532 
533  /// Current column number in Unicode code points.
534  unsigned Column;
535 
536  /// Current line number.
537  unsigned Line;
538 
539  /// How deep we are in flow style containers. 0 Means at block level.
540  unsigned FlowLevel;
541 
542  /// Are we at the start of the stream?
543  bool IsStartOfStream;
544 
545  /// Can the next token be the start of a simple key?
546  bool IsSimpleKeyAllowed;
547 
548  /// True if an error has occurred.
549  bool Failed;
550 
551  /// Should colors be used when printing out the diagnostic messages?
552  bool ShowColors;
553 
554  /// Queue of tokens. This is required to queue up tokens while looking
555  /// for the end of a simple key. And for cases where a single character
556  /// can produce multiple tokens (e.g. BlockEnd).
557  TokenQueueT TokenQueue;
558 
559  /// Indentation levels.
560  SmallVector<int, 4> Indents;
561 
562  /// Potential simple keys.
563  SmallVector<SimpleKey, 4> SimpleKeys;
564 
565  std::error_code *EC;
566 };
567 
568 } // end namespace yaml
569 } // end namespace llvm
570 
571 /// encodeUTF8 - Encode \a UnicodeScalarValue in UTF-8 and append it to result.
572 static void encodeUTF8( uint32_t UnicodeScalarValue
573  , SmallVectorImpl<char> &Result) {
574  if (UnicodeScalarValue <= 0x7F) {
575  Result.push_back(UnicodeScalarValue & 0x7F);
576  } else if (UnicodeScalarValue <= 0x7FF) {
577  uint8_t FirstByte = 0xC0 | ((UnicodeScalarValue & 0x7C0) >> 6);
578  uint8_t SecondByte = 0x80 | (UnicodeScalarValue & 0x3F);
579  Result.push_back(FirstByte);
580  Result.push_back(SecondByte);
581  } else if (UnicodeScalarValue <= 0xFFFF) {
582  uint8_t FirstByte = 0xE0 | ((UnicodeScalarValue & 0xF000) >> 12);
583  uint8_t SecondByte = 0x80 | ((UnicodeScalarValue & 0xFC0) >> 6);
584  uint8_t ThirdByte = 0x80 | (UnicodeScalarValue & 0x3F);
585  Result.push_back(FirstByte);
586  Result.push_back(SecondByte);
587  Result.push_back(ThirdByte);
588  } else if (UnicodeScalarValue <= 0x10FFFF) {
589  uint8_t FirstByte = 0xF0 | ((UnicodeScalarValue & 0x1F0000) >> 18);
590  uint8_t SecondByte = 0x80 | ((UnicodeScalarValue & 0x3F000) >> 12);
591  uint8_t ThirdByte = 0x80 | ((UnicodeScalarValue & 0xFC0) >> 6);
592  uint8_t FourthByte = 0x80 | (UnicodeScalarValue & 0x3F);
593  Result.push_back(FirstByte);
594  Result.push_back(SecondByte);
595  Result.push_back(ThirdByte);
596  Result.push_back(FourthByte);
597  }
598 }
599 
601  SourceMgr SM;
602  Scanner scanner(Input, SM);
603  while (true) {
604  Token T = scanner.getNext();
605  switch (T.Kind) {
607  OS << "Stream-Start: ";
608  break;
609  case Token::TK_StreamEnd:
610  OS << "Stream-End: ";
611  break;
613  OS << "Version-Directive: ";
614  break;
616  OS << "Tag-Directive: ";
617  break;
619  OS << "Document-Start: ";
620  break;
622  OS << "Document-End: ";
623  break;
625  OS << "Block-Entry: ";
626  break;
627  case Token::TK_BlockEnd:
628  OS << "Block-End: ";
629  break;
631  OS << "Block-Sequence-Start: ";
632  break;
634  OS << "Block-Mapping-Start: ";
635  break;
636  case Token::TK_FlowEntry:
637  OS << "Flow-Entry: ";
638  break;
640  OS << "Flow-Sequence-Start: ";
641  break;
643  OS << "Flow-Sequence-End: ";
644  break;
646  OS << "Flow-Mapping-Start: ";
647  break;
649  OS << "Flow-Mapping-End: ";
650  break;
651  case Token::TK_Key:
652  OS << "Key: ";
653  break;
654  case Token::TK_Value:
655  OS << "Value: ";
656  break;
657  case Token::TK_Scalar:
658  OS << "Scalar: ";
659  break;
661  OS << "Block Scalar: ";
662  break;
663  case Token::TK_Alias:
664  OS << "Alias: ";
665  break;
666  case Token::TK_Anchor:
667  OS << "Anchor: ";
668  break;
669  case Token::TK_Tag:
670  OS << "Tag: ";
671  break;
672  case Token::TK_Error:
673  break;
674  }
675  OS << T.Range << "\n";
676  if (T.Kind == Token::TK_StreamEnd)
677  break;
678  else if (T.Kind == Token::TK_Error)
679  return false;
680  }
681  return true;
682 }
683 
685  SourceMgr SM;
686  Scanner scanner(Input, SM);
687  while (true) {
688  Token T = scanner.getNext();
689  if (T.Kind == Token::TK_StreamEnd)
690  break;
691  else if (T.Kind == Token::TK_Error)
692  return false;
693  }
694  return true;
695 }
696 
697 std::string yaml::escape(StringRef Input, bool EscapePrintable) {
698  std::string EscapedInput;
699  for (StringRef::iterator i = Input.begin(), e = Input.end(); i != e; ++i) {
700  if (*i == '\\')
701  EscapedInput += "\\\\";
702  else if (*i == '"')
703  EscapedInput += "\\\"";
704  else if (*i == 0)
705  EscapedInput += "\\0";
706  else if (*i == 0x07)
707  EscapedInput += "\\a";
708  else if (*i == 0x08)
709  EscapedInput += "\\b";
710  else if (*i == 0x09)
711  EscapedInput += "\\t";
712  else if (*i == 0x0A)
713  EscapedInput += "\\n";
714  else if (*i == 0x0B)
715  EscapedInput += "\\v";
716  else if (*i == 0x0C)
717  EscapedInput += "\\f";
718  else if (*i == 0x0D)
719  EscapedInput += "\\r";
720  else if (*i == 0x1B)
721  EscapedInput += "\\e";
722  else if ((unsigned char)*i < 0x20) { // Control characters not handled above.
723  std::string HexStr = utohexstr(*i);
724  EscapedInput += "\\x" + std::string(2 - HexStr.size(), '0') + HexStr;
725  } else if (*i & 0x80) { // UTF-8 multiple code unit subsequence.
726  UTF8Decoded UnicodeScalarValue
727  = decodeUTF8(StringRef(i, Input.end() - i));
728  if (UnicodeScalarValue.second == 0) {
729  // Found invalid char.
730  SmallString<4> Val;
731  encodeUTF8(0xFFFD, Val);
732  llvm::append_range(EscapedInput, Val);
733  // FIXME: Error reporting.
734  return EscapedInput;
735  }
736  if (UnicodeScalarValue.first == 0x85)
737  EscapedInput += "\\N";
738  else if (UnicodeScalarValue.first == 0xA0)
739  EscapedInput += "\\_";
740  else if (UnicodeScalarValue.first == 0x2028)
741  EscapedInput += "\\L";
742  else if (UnicodeScalarValue.first == 0x2029)
743  EscapedInput += "\\P";
744  else if (!EscapePrintable &&
745  sys::unicode::isPrintable(UnicodeScalarValue.first))
746  EscapedInput += StringRef(i, UnicodeScalarValue.second);
747  else {
748  std::string HexStr = utohexstr(UnicodeScalarValue.first);
749  if (HexStr.size() <= 2)
750  EscapedInput += "\\x" + std::string(2 - HexStr.size(), '0') + HexStr;
751  else if (HexStr.size() <= 4)
752  EscapedInput += "\\u" + std::string(4 - HexStr.size(), '0') + HexStr;
753  else if (HexStr.size() <= 8)
754  EscapedInput += "\\U" + std::string(8 - HexStr.size(), '0') + HexStr;
755  }
756  i += UnicodeScalarValue.second - 1;
757  } else
758  EscapedInput.push_back(*i);
759  }
760  return EscapedInput;
761 }
762 
764  switch (S.size()) {
765  case 1:
766  switch (S.front()) {
767  case 'y':
768  case 'Y':
769  return true;
770  case 'n':
771  case 'N':
772  return false;
773  default:
774  return None;
775  }
776  case 2:
777  switch (S.front()) {
778  case 'O':
779  if (S[1] == 'N') // ON
780  return true;
781  [[fallthrough]];
782  case 'o':
783  if (S[1] == 'n') //[Oo]n
784  return true;
785  return None;
786  case 'N':
787  if (S[1] == 'O') // NO
788  return false;
789  [[fallthrough]];
790  case 'n':
791  if (S[1] == 'o') //[Nn]o
792  return false;
793  return None;
794  default:
795  return None;
796  }
797  case 3:
798  switch (S.front()) {
799  case 'O':
800  if (S.drop_front() == "FF") // OFF
801  return false;
802  [[fallthrough]];
803  case 'o':
804  if (S.drop_front() == "ff") //[Oo]ff
805  return false;
806  return None;
807  case 'Y':
808  if (S.drop_front() == "ES") // YES
809  return true;
810  [[fallthrough]];
811  case 'y':
812  if (S.drop_front() == "es") //[Yy]es
813  return true;
814  return None;
815  default:
816  return None;
817  }
818  case 4:
819  switch (S.front()) {
820  case 'T':
821  if (S.drop_front() == "RUE") // TRUE
822  return true;
823  [[fallthrough]];
824  case 't':
825  if (S.drop_front() == "rue") //[Tt]rue
826  return true;
827  return None;
828  default:
829  return None;
830  }
831  case 5:
832  switch (S.front()) {
833  case 'F':
834  if (S.drop_front() == "ALSE") // FALSE
835  return false;
836  [[fallthrough]];
837  case 'f':
838  if (S.drop_front() == "alse") //[Ff]alse
839  return false;
840  return None;
841  default:
842  return None;
843  }
844  default:
845  return None;
846  }
847 }
848 
849 Scanner::Scanner(StringRef Input, SourceMgr &sm, bool ShowColors,
850  std::error_code *EC)
851  : SM(sm), ShowColors(ShowColors), EC(EC) {
852  init(MemoryBufferRef(Input, "YAML"));
853 }
854 
855 Scanner::Scanner(MemoryBufferRef Buffer, SourceMgr &SM_, bool ShowColors,
856  std::error_code *EC)
857  : SM(SM_), ShowColors(ShowColors), EC(EC) {
858  init(Buffer);
859 }
860 
861 void Scanner::init(MemoryBufferRef Buffer) {
862  InputBuffer = Buffer;
863  Current = InputBuffer.getBufferStart();
864  End = InputBuffer.getBufferEnd();
865  Indent = -1;
866  Column = 0;
867  Line = 0;
868  FlowLevel = 0;
869  IsStartOfStream = true;
870  IsSimpleKeyAllowed = true;
871  Failed = false;
872  std::unique_ptr<MemoryBuffer> InputBufferOwner =
873  MemoryBuffer::getMemBuffer(Buffer, /*RequiresNullTerminator=*/false);
874  SM.AddNewSourceBuffer(std::move(InputBufferOwner), SMLoc());
875 }
876 
878  // If the current token is a possible simple key, keep parsing until we
879  // can confirm.
880  bool NeedMore = false;
881  while (true) {
882  if (TokenQueue.empty() || NeedMore) {
883  if (!fetchMoreTokens()) {
884  TokenQueue.clear();
885  SimpleKeys.clear();
886  TokenQueue.push_back(Token());
887  return TokenQueue.front();
888  }
889  }
890  assert(!TokenQueue.empty() &&
891  "fetchMoreTokens lied about getting tokens!");
892 
893  removeStaleSimpleKeyCandidates();
894  SimpleKey SK;
895  SK.Tok = TokenQueue.begin();
896  if (!is_contained(SimpleKeys, SK))
897  break;
898  else
899  NeedMore = true;
900  }
901  return TokenQueue.front();
902 }
903 
905  Token Ret = peekNext();
906  // TokenQueue can be empty if there was an error getting the next token.
907  if (!TokenQueue.empty())
908  TokenQueue.pop_front();
909 
910  // There cannot be any referenced Token's if the TokenQueue is empty. So do a
911  // quick deallocation of them all.
912  if (TokenQueue.empty())
913  TokenQueue.resetAlloc();
914 
915  return Ret;
916 }
917 
918 StringRef::iterator Scanner::skip_nb_char(StringRef::iterator Position) {
919  if (Position == End)
920  return Position;
921  // Check 7 bit c-printable - b-char.
922  if ( *Position == 0x09
923  || (*Position >= 0x20 && *Position <= 0x7E))
924  return Position + 1;
925 
926  // Check for valid UTF-8.
927  if (uint8_t(*Position) & 0x80) {
928  UTF8Decoded u8d = decodeUTF8(Position);
929  if ( u8d.second != 0
930  && u8d.first != 0xFEFF
931  && ( u8d.first == 0x85
932  || ( u8d.first >= 0xA0
933  && u8d.first <= 0xD7FF)
934  || ( u8d.first >= 0xE000
935  && u8d.first <= 0xFFFD)
936  || ( u8d.first >= 0x10000
937  && u8d.first <= 0x10FFFF)))
938  return Position + u8d.second;
939  }
940  return Position;
941 }
942 
943 StringRef::iterator Scanner::skip_b_break(StringRef::iterator Position) {
944  if (Position == End)
945  return Position;
946  if (*Position == 0x0D) {
947  if (Position + 1 != End && *(Position + 1) == 0x0A)
948  return Position + 2;
949  return Position + 1;
950  }
951 
952  if (*Position == 0x0A)
953  return Position + 1;
954  return Position;
955 }
956 
957 StringRef::iterator Scanner::skip_s_space(StringRef::iterator Position) {
958  if (Position == End)
959  return Position;
960  if (*Position == ' ')
961  return Position + 1;
962  return Position;
963 }
964 
965 StringRef::iterator Scanner::skip_s_white(StringRef::iterator Position) {
966  if (Position == End)
967  return Position;
968  if (*Position == ' ' || *Position == '\t')
969  return Position + 1;
970  return Position;
971 }
972 
973 StringRef::iterator Scanner::skip_ns_char(StringRef::iterator Position) {
974  if (Position == End)
975  return Position;
976  if (*Position == ' ' || *Position == '\t')
977  return Position;
978  return skip_nb_char(Position);
979 }
980 
981 StringRef::iterator Scanner::skip_while( SkipWhileFunc Func
982  , StringRef::iterator Position) {
983  while (true) {
984  StringRef::iterator i = (this->*Func)(Position);
985  if (i == Position)
986  break;
987  Position = i;
988  }
989  return Position;
990 }
991 
992 void Scanner::advanceWhile(SkipWhileFunc Func) {
993  auto Final = skip_while(Func, Current);
994  Column += Final - Current;
995  Current = Final;
996 }
997 
998 static bool is_ns_hex_digit(const char C) { return isAlnum(C); }
999 
1000 static bool is_ns_word_char(const char C) { return C == '-' || isAlpha(C); }
1001 
1002 void Scanner::scan_ns_uri_char() {
1003  while (true) {
1004  if (Current == End)
1005  break;
1006  if (( *Current == '%'
1007  && Current + 2 < End
1008  && is_ns_hex_digit(*(Current + 1))
1009  && is_ns_hex_digit(*(Current + 2)))
1010  || is_ns_word_char(*Current)
1011  || StringRef(Current, 1).find_first_of("#;/?:@&=+$,_.!~*'()[]")
1012  != StringRef::npos) {
1013  ++Current;
1014  ++Column;
1015  } else
1016  break;
1017  }
1018 }
1019 
1020 bool Scanner::consume(uint32_t Expected) {
1021  if (Expected >= 0x80) {
1022  setError("Cannot consume non-ascii characters", Current);
1023  return false;
1024  }
1025  if (Current == End)
1026  return false;
1027  if (uint8_t(*Current) >= 0x80) {
1028  setError("Cannot consume non-ascii characters", Current);
1029  return false;
1030  }
1031  if (uint8_t(*Current) == Expected) {
1032  ++Current;
1033  ++Column;
1034  return true;
1035  }
1036  return false;
1037 }
1038 
1039 void Scanner::skip(uint32_t Distance) {
1040  Current += Distance;
1041  Column += Distance;
1042  assert(Current <= End && "Skipped past the end");
1043 }
1044 
1045 bool Scanner::isBlankOrBreak(StringRef::iterator Position) {
1046  if (Position == End)
1047  return false;
1048  return *Position == ' ' || *Position == '\t' || *Position == '\r' ||
1049  *Position == '\n';
1050 }
1051 
1052 bool Scanner::isLineEmpty(StringRef Line) {
1053  for (const auto *Position = Line.begin(); Position != Line.end(); ++Position)
1054  if (!isBlankOrBreak(Position))
1055  return false;
1056  return true;
1057 }
1058 
1059 bool Scanner::consumeLineBreakIfPresent() {
1060  auto Next = skip_b_break(Current);
1061  if (Next == Current)
1062  return false;
1063  Column = 0;
1064  ++Line;
1065  Current = Next;
1066  return true;
1067 }
1068 
1069 void Scanner::saveSimpleKeyCandidate( TokenQueueT::iterator Tok
1070  , unsigned AtColumn
1071  , bool IsRequired) {
1072  if (IsSimpleKeyAllowed) {
1073  SimpleKey SK;
1074  SK.Tok = Tok;
1075  SK.Line = Line;
1076  SK.Column = AtColumn;
1077  SK.IsRequired = IsRequired;
1078  SK.FlowLevel = FlowLevel;
1079  SimpleKeys.push_back(SK);
1080  }
1081 }
1082 
1083 void Scanner::removeStaleSimpleKeyCandidates() {
1084  for (SmallVectorImpl<SimpleKey>::iterator i = SimpleKeys.begin();
1085  i != SimpleKeys.end();) {
1086  if (i->Line != Line || i->Column + 1024 < Column) {
1087  if (i->IsRequired)
1088  setError( "Could not find expected : for simple key"
1089  , i->Tok->Range.begin());
1090  i = SimpleKeys.erase(i);
1091  } else
1092  ++i;
1093  }
1094 }
1095 
1096 void Scanner::removeSimpleKeyCandidatesOnFlowLevel(unsigned Level) {
1097  if (!SimpleKeys.empty() && (SimpleKeys.end() - 1)->FlowLevel == Level)
1098  SimpleKeys.pop_back();
1099 }
1100 
1101 bool Scanner::unrollIndent(int ToColumn) {
1102  Token T;
1103  // Indentation is ignored in flow.
1104  if (FlowLevel != 0)
1105  return true;
1106 
1107  while (Indent > ToColumn) {
1108  T.Kind = Token::TK_BlockEnd;
1109  T.Range = StringRef(Current, 1);
1110  TokenQueue.push_back(T);
1111  Indent = Indents.pop_back_val();
1112  }
1113 
1114  return true;
1115 }
1116 
1117 bool Scanner::rollIndent( int ToColumn
1118  , Token::TokenKind Kind
1119  , TokenQueueT::iterator InsertPoint) {
1120  if (FlowLevel)
1121  return true;
1122  if (Indent < ToColumn) {
1123  Indents.push_back(Indent);
1124  Indent = ToColumn;
1125 
1126  Token T;
1127  T.Kind = Kind;
1128  T.Range = StringRef(Current, 0);
1129  TokenQueue.insert(InsertPoint, T);
1130  }
1131  return true;
1132 }
1133 
1134 void Scanner::skipComment() {
1135  if (Current == End || *Current != '#')
1136  return;
1137  while (true) {
1138  // This may skip more than one byte, thus Column is only incremented
1139  // for code points.
1140  StringRef::iterator I = skip_nb_char(Current);
1141  if (I == Current)
1142  break;
1143  Current = I;
1144  ++Column;
1145  }
1146 }
1147 
1148 void Scanner::scanToNextToken() {
1149  while (true) {
1150  while (Current != End && (*Current == ' ' || *Current == '\t')) {
1151  skip(1);
1152  }
1153 
1154  skipComment();
1155 
1156  // Skip EOL.
1157  StringRef::iterator i = skip_b_break(Current);
1158  if (i == Current)
1159  break;
1160  Current = i;
1161  ++Line;
1162  Column = 0;
1163  // New lines may start a simple key.
1164  if (!FlowLevel)
1165  IsSimpleKeyAllowed = true;
1166  }
1167 }
1168 
1169 bool Scanner::scanStreamStart() {
1170  IsStartOfStream = false;
1171 
1172  EncodingInfo EI = getUnicodeEncoding(currentInput());
1173 
1174  Token T;
1175  T.Kind = Token::TK_StreamStart;
1176  T.Range = StringRef(Current, EI.second);
1177  TokenQueue.push_back(T);
1178  Current += EI.second;
1179  return true;
1180 }
1181 
1182 bool Scanner::scanStreamEnd() {
1183  // Force an ending new line if one isn't present.
1184  if (Column != 0) {
1185  Column = 0;
1186  ++Line;
1187  }
1188 
1189  unrollIndent(-1);
1190  SimpleKeys.clear();
1191  IsSimpleKeyAllowed = false;
1192 
1193  Token T;
1194  T.Kind = Token::TK_StreamEnd;
1195  T.Range = StringRef(Current, 0);
1196  TokenQueue.push_back(T);
1197  return true;
1198 }
1199 
1200 bool Scanner::scanDirective() {
1201  // Reset the indentation level.
1202  unrollIndent(-1);
1203  SimpleKeys.clear();
1204  IsSimpleKeyAllowed = false;
1205 
1206  StringRef::iterator Start = Current;
1207  consume('%');
1208  StringRef::iterator NameStart = Current;
1209  Current = skip_while(&Scanner::skip_ns_char, Current);
1210  StringRef Name(NameStart, Current - NameStart);
1211  Current = skip_while(&Scanner::skip_s_white, Current);
1212 
1213  Token T;
1214  if (Name == "YAML") {
1215  Current = skip_while(&Scanner::skip_ns_char, Current);
1217  T.Range = StringRef(Start, Current - Start);
1218  TokenQueue.push_back(T);
1219  return true;
1220  } else if(Name == "TAG") {
1221  Current = skip_while(&Scanner::skip_ns_char, Current);
1222  Current = skip_while(&Scanner::skip_s_white, Current);
1223  Current = skip_while(&Scanner::skip_ns_char, Current);
1224  T.Kind = Token::TK_TagDirective;
1225  T.Range = StringRef(Start, Current - Start);
1226  TokenQueue.push_back(T);
1227  return true;
1228  }
1229  return false;
1230 }
1231 
1232 bool Scanner::scanDocumentIndicator(bool IsStart) {
1233  unrollIndent(-1);
1234  SimpleKeys.clear();
1235  IsSimpleKeyAllowed = false;
1236 
1237  Token T;
1238  T.Kind = IsStart ? Token::TK_DocumentStart : Token::TK_DocumentEnd;
1239  T.Range = StringRef(Current, 3);
1240  skip(3);
1241  TokenQueue.push_back(T);
1242  return true;
1243 }
1244 
1245 bool Scanner::scanFlowCollectionStart(bool IsSequence) {
1246  Token T;
1247  T.Kind = IsSequence ? Token::TK_FlowSequenceStart
1249  T.Range = StringRef(Current, 1);
1250  skip(1);
1251  TokenQueue.push_back(T);
1252 
1253  // [ and { may begin a simple key.
1254  saveSimpleKeyCandidate(--TokenQueue.end(), Column - 1, false);
1255 
1256  // And may also be followed by a simple key.
1257  IsSimpleKeyAllowed = true;
1258  ++FlowLevel;
1259  return true;
1260 }
1261 
1262 bool Scanner::scanFlowCollectionEnd(bool IsSequence) {
1263  removeSimpleKeyCandidatesOnFlowLevel(FlowLevel);
1264  IsSimpleKeyAllowed = false;
1265  Token T;
1266  T.Kind = IsSequence ? Token::TK_FlowSequenceEnd
1268  T.Range = StringRef(Current, 1);
1269  skip(1);
1270  TokenQueue.push_back(T);
1271  if (FlowLevel)
1272  --FlowLevel;
1273  return true;
1274 }
1275 
1276 bool Scanner::scanFlowEntry() {
1277  removeSimpleKeyCandidatesOnFlowLevel(FlowLevel);
1278  IsSimpleKeyAllowed = true;
1279  Token T;
1280  T.Kind = Token::TK_FlowEntry;
1281  T.Range = StringRef(Current, 1);
1282  skip(1);
1283  TokenQueue.push_back(T);
1284  return true;
1285 }
1286 
1287 bool Scanner::scanBlockEntry() {
1288  rollIndent(Column, Token::TK_BlockSequenceStart, TokenQueue.end());
1289  removeSimpleKeyCandidatesOnFlowLevel(FlowLevel);
1290  IsSimpleKeyAllowed = true;
1291  Token T;
1292  T.Kind = Token::TK_BlockEntry;
1293  T.Range = StringRef(Current, 1);
1294  skip(1);
1295  TokenQueue.push_back(T);
1296  return true;
1297 }
1298 
1299 bool Scanner::scanKey() {
1300  if (!FlowLevel)
1301  rollIndent(Column, Token::TK_BlockMappingStart, TokenQueue.end());
1302 
1303  removeSimpleKeyCandidatesOnFlowLevel(FlowLevel);
1304  IsSimpleKeyAllowed = !FlowLevel;
1305 
1306  Token T;
1307  T.Kind = Token::TK_Key;
1308  T.Range = StringRef(Current, 1);
1309  skip(1);
1310  TokenQueue.push_back(T);
1311  return true;
1312 }
1313 
1314 bool Scanner::scanValue() {
1315  // If the previous token could have been a simple key, insert the key token
1316  // into the token queue.
1317  if (!SimpleKeys.empty()) {
1318  SimpleKey SK = SimpleKeys.pop_back_val();
1319  Token T;
1320  T.Kind = Token::TK_Key;
1321  T.Range = SK.Tok->Range;
1323  for (i = TokenQueue.begin(), e = TokenQueue.end(); i != e; ++i) {
1324  if (i == SK.Tok)
1325  break;
1326  }
1327  if (i == e) {
1328  Failed = true;
1329  return false;
1330  }
1331  i = TokenQueue.insert(i, T);
1332 
1333  // We may also need to add a Block-Mapping-Start token.
1334  rollIndent(SK.Column, Token::TK_BlockMappingStart, i);
1335 
1336  IsSimpleKeyAllowed = false;
1337  } else {
1338  if (!FlowLevel)
1339  rollIndent(Column, Token::TK_BlockMappingStart, TokenQueue.end());
1340  IsSimpleKeyAllowed = !FlowLevel;
1341  }
1342 
1343  Token T;
1344  T.Kind = Token::TK_Value;
1345  T.Range = StringRef(Current, 1);
1346  skip(1);
1347  TokenQueue.push_back(T);
1348  return true;
1349 }
1350 
1351 // Forbidding inlining improves performance by roughly 20%.
1352 // FIXME: Remove once llvm optimizes this to the faster version without hints.
1353 LLVM_ATTRIBUTE_NOINLINE static bool
1355 
1356 // Returns whether a character at 'Position' was escaped with a leading '\'.
1357 // 'First' specifies the position of the first character in the string.
1359  StringRef::iterator Position) {
1360  assert(Position - 1 >= First);
1361  StringRef::iterator I = Position - 1;
1362  // We calculate the number of consecutive '\'s before the current position
1363  // by iterating backwards through our string.
1364  while (I >= First && *I == '\\') --I;
1365  // (Position - 1 - I) now contains the number of '\'s before the current
1366  // position. If it is odd, the character at 'Position' was escaped.
1367  return (Position - 1 - I) % 2 == 1;
1368 }
1369 
1370 bool Scanner::scanFlowScalar(bool IsDoubleQuoted) {
1371  StringRef::iterator Start = Current;
1372  unsigned ColStart = Column;
1373  if (IsDoubleQuoted) {
1374  do {
1375  ++Current;
1376  while (Current != End && *Current != '"')
1377  ++Current;
1378  // Repeat until the previous character was not a '\' or was an escaped
1379  // backslash.
1380  } while ( Current != End
1381  && *(Current - 1) == '\\'
1382  && wasEscaped(Start + 1, Current));
1383  } else {
1384  skip(1);
1385  while (Current != End) {
1386  // Skip a ' followed by another '.
1387  if (Current + 1 < End && *Current == '\'' && *(Current + 1) == '\'') {
1388  skip(2);
1389  continue;
1390  } else if (*Current == '\'')
1391  break;
1392  StringRef::iterator i = skip_nb_char(Current);
1393  if (i == Current) {
1394  i = skip_b_break(Current);
1395  if (i == Current)
1396  break;
1397  Current = i;
1398  Column = 0;
1399  ++Line;
1400  } else {
1401  if (i == End)
1402  break;
1403  Current = i;
1404  ++Column;
1405  }
1406  }
1407  }
1408 
1409  if (Current == End) {
1410  setError("Expected quote at end of scalar", Current);
1411  return false;
1412  }
1413 
1414  skip(1); // Skip ending quote.
1415  Token T;
1416  T.Kind = Token::TK_Scalar;
1417  T.Range = StringRef(Start, Current - Start);
1418  TokenQueue.push_back(T);
1419 
1420  saveSimpleKeyCandidate(--TokenQueue.end(), ColStart, false);
1421 
1422  IsSimpleKeyAllowed = false;
1423 
1424  return true;
1425 }
1426 
1427 bool Scanner::scanPlainScalar() {
1428  StringRef::iterator Start = Current;
1429  unsigned ColStart = Column;
1430  unsigned LeadingBlanks = 0;
1431  assert(Indent >= -1 && "Indent must be >= -1 !");
1432  unsigned indent = static_cast<unsigned>(Indent + 1);
1433  while (Current != End) {
1434  if (*Current == '#')
1435  break;
1436 
1437  while (Current != End && !isBlankOrBreak(Current)) {
1438  if (FlowLevel && *Current == ':' &&
1439  (Current + 1 == End ||
1440  !(isBlankOrBreak(Current + 1) || *(Current + 1) == ','))) {
1441  setError("Found unexpected ':' while scanning a plain scalar", Current);
1442  return false;
1443  }
1444 
1445  // Check for the end of the plain scalar.
1446  if ( (*Current == ':' && isBlankOrBreak(Current + 1))
1447  || ( FlowLevel
1448  && (StringRef(Current, 1).find_first_of(",:?[]{}")
1449  != StringRef::npos)))
1450  break;
1451 
1452  StringRef::iterator i = skip_nb_char(Current);
1453  if (i == Current)
1454  break;
1455  Current = i;
1456  ++Column;
1457  }
1458 
1459  // Are we at the end?
1460  if (!isBlankOrBreak(Current))
1461  break;
1462 
1463  // Eat blanks.
1464  StringRef::iterator Tmp = Current;
1465  while (isBlankOrBreak(Tmp)) {
1466  StringRef::iterator i = skip_s_white(Tmp);
1467  if (i != Tmp) {
1468  if (LeadingBlanks && (Column < indent) && *Tmp == '\t') {
1469  setError("Found invalid tab character in indentation", Tmp);
1470  return false;
1471  }
1472  Tmp = i;
1473  ++Column;
1474  } else {
1475  i = skip_b_break(Tmp);
1476  if (!LeadingBlanks)
1477  LeadingBlanks = 1;
1478  Tmp = i;
1479  Column = 0;
1480  ++Line;
1481  }
1482  }
1483 
1484  if (!FlowLevel && Column < indent)
1485  break;
1486 
1487  Current = Tmp;
1488  }
1489  if (Start == Current) {
1490  setError("Got empty plain scalar", Start);
1491  return false;
1492  }
1493  Token T;
1494  T.Kind = Token::TK_Scalar;
1495  T.Range = StringRef(Start, Current - Start);
1496  TokenQueue.push_back(T);
1497 
1498  // Plain scalars can be simple keys.
1499  saveSimpleKeyCandidate(--TokenQueue.end(), ColStart, false);
1500 
1501  IsSimpleKeyAllowed = false;
1502 
1503  return true;
1504 }
1505 
1506 bool Scanner::scanAliasOrAnchor(bool IsAlias) {
1507  StringRef::iterator Start = Current;
1508  unsigned ColStart = Column;
1509  skip(1);
1510  while (Current != End) {
1511  if ( *Current == '[' || *Current == ']'
1512  || *Current == '{' || *Current == '}'
1513  || *Current == ','
1514  || *Current == ':')
1515  break;
1516  StringRef::iterator i = skip_ns_char(Current);
1517  if (i == Current)
1518  break;
1519  Current = i;
1520  ++Column;
1521  }
1522 
1523  if (Start + 1 == Current) {
1524  setError("Got empty alias or anchor", Start);
1525  return false;
1526  }
1527 
1528  Token T;
1530  T.Range = StringRef(Start, Current - Start);
1531  TokenQueue.push_back(T);
1532 
1533  // Alias and anchors can be simple keys.
1534  saveSimpleKeyCandidate(--TokenQueue.end(), ColStart, false);
1535 
1536  IsSimpleKeyAllowed = false;
1537 
1538  return true;
1539 }
1540 
1541 bool Scanner::scanBlockScalarIndicators(char &StyleIndicator,
1542  char &ChompingIndicator,
1543  unsigned &IndentIndicator,
1544  bool &IsDone) {
1545  StyleIndicator = scanBlockStyleIndicator();
1546  if (!scanBlockScalarHeader(ChompingIndicator, IndentIndicator, IsDone))
1547  return false;
1548  return true;
1549 }
1550 
1551 char Scanner::scanBlockStyleIndicator() {
1552  char Indicator = ' ';
1553  if (Current != End && (*Current == '>' || *Current == '|')) {
1554  Indicator = *Current;
1555  skip(1);
1556  }
1557  return Indicator;
1558 }
1559 
1560 char Scanner::scanBlockChompingIndicator() {
1561  char Indicator = ' ';
1562  if (Current != End && (*Current == '+' || *Current == '-')) {
1563  Indicator = *Current;
1564  skip(1);
1565  }
1566  return Indicator;
1567 }
1568 
1569 /// Get the number of line breaks after chomping.
1570 ///
1571 /// Return the number of trailing line breaks to emit, depending on
1572 /// \p ChompingIndicator.
1573 static unsigned getChompedLineBreaks(char ChompingIndicator,
1574  unsigned LineBreaks, StringRef Str) {
1575  if (ChompingIndicator == '-') // Strip all line breaks.
1576  return 0;
1577  if (ChompingIndicator == '+') // Keep all line breaks.
1578  return LineBreaks;
1579  // Clip trailing lines.
1580  return Str.empty() ? 0 : 1;
1581 }
1582 
1583 unsigned Scanner::scanBlockIndentationIndicator() {
1584  unsigned Indent = 0;
1585  if (Current != End && (*Current >= '1' && *Current <= '9')) {
1586  Indent = unsigned(*Current - '0');
1587  skip(1);
1588  }
1589  return Indent;
1590 }
1591 
1592 bool Scanner::scanBlockScalarHeader(char &ChompingIndicator,
1593  unsigned &IndentIndicator, bool &IsDone) {
1594  auto Start = Current;
1595 
1596  ChompingIndicator = scanBlockChompingIndicator();
1597  IndentIndicator = scanBlockIndentationIndicator();
1598  // Check for the chomping indicator once again.
1599  if (ChompingIndicator == ' ')
1600  ChompingIndicator = scanBlockChompingIndicator();
1601  Current = skip_while(&Scanner::skip_s_white, Current);
1602  skipComment();
1603 
1604  if (Current == End) { // EOF, we have an empty scalar.
1605  Token T;
1606  T.Kind = Token::TK_BlockScalar;
1607  T.Range = StringRef(Start, Current - Start);
1608  TokenQueue.push_back(T);
1609  IsDone = true;
1610  return true;
1611  }
1612 
1613  if (!consumeLineBreakIfPresent()) {
1614  setError("Expected a line break after block scalar header", Current);
1615  return false;
1616  }
1617  return true;
1618 }
1619 
1620 bool Scanner::findBlockScalarIndent(unsigned &BlockIndent,
1621  unsigned BlockExitIndent,
1622  unsigned &LineBreaks, bool &IsDone) {
1623  unsigned MaxAllSpaceLineCharacters = 0;
1624  StringRef::iterator LongestAllSpaceLine;
1625 
1626  while (true) {
1627  advanceWhile(&Scanner::skip_s_space);
1628  if (skip_nb_char(Current) != Current) {
1629  // This line isn't empty, so try and find the indentation.
1630  if (Column <= BlockExitIndent) { // End of the block literal.
1631  IsDone = true;
1632  return true;
1633  }
1634  // We found the block's indentation.
1635  BlockIndent = Column;
1636  if (MaxAllSpaceLineCharacters > BlockIndent) {
1637  setError(
1638  "Leading all-spaces line must be smaller than the block indent",
1639  LongestAllSpaceLine);
1640  return false;
1641  }
1642  return true;
1643  }
1644  if (skip_b_break(Current) != Current &&
1645  Column > MaxAllSpaceLineCharacters) {
1646  // Record the longest all-space line in case it's longer than the
1647  // discovered block indent.
1648  MaxAllSpaceLineCharacters = Column;
1649  LongestAllSpaceLine = Current;
1650  }
1651 
1652  // Check for EOF.
1653  if (Current == End) {
1654  IsDone = true;
1655  return true;
1656  }
1657 
1658  if (!consumeLineBreakIfPresent()) {
1659  IsDone = true;
1660  return true;
1661  }
1662  ++LineBreaks;
1663  }
1664  return true;
1665 }
1666 
1667 bool Scanner::scanBlockScalarIndent(unsigned BlockIndent,
1668  unsigned BlockExitIndent, bool &IsDone) {
1669  // Skip the indentation.
1670  while (Column < BlockIndent) {
1671  auto I = skip_s_space(Current);
1672  if (I == Current)
1673  break;
1674  Current = I;
1675  ++Column;
1676  }
1677 
1678  if (skip_nb_char(Current) == Current)
1679  return true;
1680 
1681  if (Column <= BlockExitIndent) { // End of the block literal.
1682  IsDone = true;
1683  return true;
1684  }
1685 
1686  if (Column < BlockIndent) {
1687  if (Current != End && *Current == '#') { // Trailing comment.
1688  IsDone = true;
1689  return true;
1690  }
1691  setError("A text line is less indented than the block scalar", Current);
1692  return false;
1693  }
1694  return true; // A normal text line.
1695 }
1696 
1697 bool Scanner::scanBlockScalar(bool IsLiteral) {
1698  assert(*Current == '|' || *Current == '>');
1699  char StyleIndicator;
1700  char ChompingIndicator;
1701  unsigned BlockIndent;
1702  bool IsDone = false;
1703  if (!scanBlockScalarIndicators(StyleIndicator, ChompingIndicator, BlockIndent,
1704  IsDone))
1705  return false;
1706  if (IsDone)
1707  return true;
1708  bool IsFolded = StyleIndicator == '>';
1709 
1710  const auto *Start = Current;
1711  unsigned BlockExitIndent = Indent < 0 ? 0 : (unsigned)Indent;
1712  unsigned LineBreaks = 0;
1713  if (BlockIndent == 0) {
1714  if (!findBlockScalarIndent(BlockIndent, BlockExitIndent, LineBreaks,
1715  IsDone))
1716  return false;
1717  }
1718 
1719  // Scan the block's scalars body.
1720  SmallString<256> Str;
1721  while (!IsDone) {
1722  if (!scanBlockScalarIndent(BlockIndent, BlockExitIndent, IsDone))
1723  return false;
1724  if (IsDone)
1725  break;
1726 
1727  // Parse the current line.
1728  auto LineStart = Current;
1729  advanceWhile(&Scanner::skip_nb_char);
1730  if (LineStart != Current) {
1731  if (LineBreaks && IsFolded && !Scanner::isLineEmpty(Str)) {
1732  // The folded style "folds" any single line break between content into a
1733  // single space, except when that content is "empty" (only contains
1734  // whitespace) in which case the line break is left as-is.
1735  if (LineBreaks == 1) {
1736  Str.append(LineBreaks,
1737  isLineEmpty(StringRef(LineStart, Current - LineStart))
1738  ? '\n'
1739  : ' ');
1740  }
1741  // If we saw a single line break, we are completely replacing it and so
1742  // want `LineBreaks == 0`. Otherwise this decrement accounts for the
1743  // fact that the first line break is "trimmed", only being used to
1744  // signal a sequence of line breaks which should not be folded.
1745  LineBreaks--;
1746  }
1747  Str.append(LineBreaks, '\n');
1748  Str.append(StringRef(LineStart, Current - LineStart));
1749  LineBreaks = 0;
1750  }
1751 
1752  // Check for EOF.
1753  if (Current == End)
1754  break;
1755 
1756  if (!consumeLineBreakIfPresent())
1757  break;
1758  ++LineBreaks;
1759  }
1760 
1761  if (Current == End && !LineBreaks)
1762  // Ensure that there is at least one line break before the end of file.
1763  LineBreaks = 1;
1764  Str.append(getChompedLineBreaks(ChompingIndicator, LineBreaks, Str), '\n');
1765 
1766  // New lines may start a simple key.
1767  if (!FlowLevel)
1768  IsSimpleKeyAllowed = true;
1769 
1770  Token T;
1771  T.Kind = Token::TK_BlockScalar;
1772  T.Range = StringRef(Start, Current - Start);
1773  T.Value = std::string(Str);
1774  TokenQueue.push_back(T);
1775  return true;
1776 }
1777 
1778 bool Scanner::scanTag() {
1779  StringRef::iterator Start = Current;
1780  unsigned ColStart = Column;
1781  skip(1); // Eat !.
1782  if (Current == End || isBlankOrBreak(Current)); // An empty tag.
1783  else if (*Current == '<') {
1784  skip(1);
1785  scan_ns_uri_char();
1786  if (!consume('>'))
1787  return false;
1788  } else {
1789  // FIXME: Actually parse the c-ns-shorthand-tag rule.
1790  Current = skip_while(&Scanner::skip_ns_char, Current);
1791  }
1792 
1793  Token T;
1794  T.Kind = Token::TK_Tag;
1795  T.Range = StringRef(Start, Current - Start);
1796  TokenQueue.push_back(T);
1797 
1798  // Tags can be simple keys.
1799  saveSimpleKeyCandidate(--TokenQueue.end(), ColStart, false);
1800 
1801  IsSimpleKeyAllowed = false;
1802 
1803  return true;
1804 }
1805 
1806 bool Scanner::fetchMoreTokens() {
1807  if (IsStartOfStream)
1808  return scanStreamStart();
1809 
1810  scanToNextToken();
1811 
1812  if (Current == End)
1813  return scanStreamEnd();
1814 
1815  removeStaleSimpleKeyCandidates();
1816 
1817  unrollIndent(Column);
1818 
1819  if (Column == 0 && *Current == '%')
1820  return scanDirective();
1821 
1822  if (Column == 0 && Current + 4 <= End
1823  && *Current == '-'
1824  && *(Current + 1) == '-'
1825  && *(Current + 2) == '-'
1826  && (Current + 3 == End || isBlankOrBreak(Current + 3)))
1827  return scanDocumentIndicator(true);
1828 
1829  if (Column == 0 && Current + 4 <= End
1830  && *Current == '.'
1831  && *(Current + 1) == '.'
1832  && *(Current + 2) == '.'
1833  && (Current + 3 == End || isBlankOrBreak(Current + 3)))
1834  return scanDocumentIndicator(false);
1835 
1836  if (*Current == '[')
1837  return scanFlowCollectionStart(true);
1838 
1839  if (*Current == '{')
1840  return scanFlowCollectionStart(false);
1841 
1842  if (*Current == ']')
1843  return scanFlowCollectionEnd(true);
1844 
1845  if (*Current == '}')
1846  return scanFlowCollectionEnd(false);
1847 
1848  if (*Current == ',')
1849  return scanFlowEntry();
1850 
1851  if (*Current == '-' && isBlankOrBreak(Current + 1))
1852  return scanBlockEntry();
1853 
1854  if (*Current == '?' && (FlowLevel || isBlankOrBreak(Current + 1)))
1855  return scanKey();
1856 
1857  if (*Current == ':' && (FlowLevel || isBlankOrBreak(Current + 1)))
1858  return scanValue();
1859 
1860  if (*Current == '*')
1861  return scanAliasOrAnchor(true);
1862 
1863  if (*Current == '&')
1864  return scanAliasOrAnchor(false);
1865 
1866  if (*Current == '!')
1867  return scanTag();
1868 
1869  if (*Current == '|' && !FlowLevel)
1870  return scanBlockScalar(true);
1871 
1872  if (*Current == '>' && !FlowLevel)
1873  return scanBlockScalar(false);
1874 
1875  if (*Current == '\'')
1876  return scanFlowScalar(false);
1877 
1878  if (*Current == '"')
1879  return scanFlowScalar(true);
1880 
1881  // Get a plain scalar.
1882  StringRef FirstChar(Current, 1);
1883  if (!(isBlankOrBreak(Current)
1884  || FirstChar.find_first_of("-?:,[]{}#&*!|>'\"%@`") != StringRef::npos)
1885  || (*Current == '-' && !isBlankOrBreak(Current + 1))
1886  || (!FlowLevel && (*Current == '?' || *Current == ':')
1887  && isBlankOrBreak(Current + 1))
1888  || (!FlowLevel && *Current == ':'
1889  && Current + 2 < End
1890  && *(Current + 1) == ':'
1891  && !isBlankOrBreak(Current + 2)))
1892  return scanPlainScalar();
1893 
1894  setError("Unrecognized character while tokenizing.", Current);
1895  return false;
1896 }
1897 
1898 Stream::Stream(StringRef Input, SourceMgr &SM, bool ShowColors,
1899  std::error_code *EC)
1900  : scanner(new Scanner(Input, SM, ShowColors, EC)) {}
1901 
1902 Stream::Stream(MemoryBufferRef InputBuffer, SourceMgr &SM, bool ShowColors,
1903  std::error_code *EC)
1904  : scanner(new Scanner(InputBuffer, SM, ShowColors, EC)) {}
1905 
1906 Stream::~Stream() = default;
1907 
1908 bool Stream::failed() { return scanner->failed(); }
1909 
1911  printError(N ? N->getSourceRange() : SMRange(), Msg, Kind);
1912 }
1913 
1914 void Stream::printError(const SMRange &Range, const Twine &Msg,
1915  SourceMgr::DiagKind Kind) {
1916  scanner->printError(Range.Start, Kind, Msg, Range);
1917 }
1918 
1920  if (CurrentDoc)
1921  report_fatal_error("Can only iterate over the stream once");
1922 
1923  // Skip Stream-Start.
1924  scanner->getNext();
1925 
1926  CurrentDoc.reset(new Document(*this));
1927  return document_iterator(CurrentDoc);
1928 }
1929 
1931  return document_iterator();
1932 }
1933 
1935  for (Document &Doc : *this)
1936  Doc.skip();
1937 }
1938 
1939 Node::Node(unsigned int Type, std::unique_ptr<Document> &D, StringRef A,
1940  StringRef T)
1941  : Doc(D), TypeID(Type), Anchor(A), Tag(T) {
1942  SMLoc Start = SMLoc::getFromPointer(peekNext().Range.begin());
1943  SourceRange = SMRange(Start, Start);
1944 }
1945 
1946 std::string Node::getVerbatimTag() const {
1947  StringRef Raw = getRawTag();
1948  if (!Raw.empty() && Raw != "!") {
1949  std::string Ret;
1950  if (Raw.find_last_of('!') == 0) {
1951  Ret = std::string(Doc->getTagMap().find("!")->second);
1952  Ret += Raw.substr(1);
1953  return Ret;
1954  } else if (Raw.startswith("!!")) {
1955  Ret = std::string(Doc->getTagMap().find("!!")->second);
1956  Ret += Raw.substr(2);
1957  return Ret;
1958  } else {
1959  StringRef TagHandle = Raw.substr(0, Raw.find_last_of('!') + 1);
1960  std::map<StringRef, StringRef>::const_iterator It =
1961  Doc->getTagMap().find(TagHandle);
1962  if (It != Doc->getTagMap().end())
1963  Ret = std::string(It->second);
1964  else {
1965  Token T;
1966  T.Kind = Token::TK_Tag;
1967  T.Range = TagHandle;
1968  setError(Twine("Unknown tag handle ") + TagHandle, T);
1969  }
1970  Ret += Raw.substr(Raw.find_last_of('!') + 1);
1971  return Ret;
1972  }
1973  }
1974 
1975  switch (getType()) {
1976  case NK_Null:
1977  return "tag:yaml.org,2002:null";
1978  case NK_Scalar:
1979  case NK_BlockScalar:
1980  // TODO: Tag resolution.
1981  return "tag:yaml.org,2002:str";
1982  case NK_Mapping:
1983  return "tag:yaml.org,2002:map";
1984  case NK_Sequence:
1985  return "tag:yaml.org,2002:seq";
1986  }
1987 
1988  return "";
1989 }
1990 
1992  return Doc->peekNext();
1993 }
1994 
1996  return Doc->getNext();
1997 }
1998 
2000  return Doc->parseBlockNode();
2001 }
2002 
2004  return Doc->NodeAllocator;
2005 }
2006 
2007 void Node::setError(const Twine &Msg, Token &Tok) const {
2008  Doc->setError(Msg, Tok);
2009 }
2010 
2011 bool Node::failed() const {
2012  return Doc->failed();
2013 }
2014 
2016  // TODO: Handle newlines properly. We need to remove leading whitespace.
2017  if (Value[0] == '"') { // Double quoted.
2018  // Pull off the leading and trailing "s.
2019  StringRef UnquotedValue = Value.substr(1, Value.size() - 2);
2020  // Search for characters that would require unescaping the value.
2021  StringRef::size_type i = UnquotedValue.find_first_of("\\\r\n");
2022  if (i != StringRef::npos)
2023  return unescapeDoubleQuoted(UnquotedValue, i, Storage);
2024  return UnquotedValue;
2025  } else if (Value[0] == '\'') { // Single quoted.
2026  // Pull off the leading and trailing 's.
2027  StringRef UnquotedValue = Value.substr(1, Value.size() - 2);
2028  StringRef::size_type i = UnquotedValue.find('\'');
2029  if (i != StringRef::npos) {
2030  // We're going to need Storage.
2031  Storage.clear();
2032  Storage.reserve(UnquotedValue.size());
2033  for (; i != StringRef::npos; i = UnquotedValue.find('\'')) {
2034  StringRef Valid(UnquotedValue.begin(), i);
2035  llvm::append_range(Storage, Valid);
2036  Storage.push_back('\'');
2037  UnquotedValue = UnquotedValue.substr(i + 2);
2038  }
2039  llvm::append_range(Storage, UnquotedValue);
2040  return StringRef(Storage.begin(), Storage.size());
2041  }
2042  return UnquotedValue;
2043  }
2044  // Plain or block.
2045  return Value.rtrim(' ');
2046 }
2047 
2048 StringRef ScalarNode::unescapeDoubleQuoted( StringRef UnquotedValue
2050  , SmallVectorImpl<char> &Storage)
2051  const {
2052  // Use Storage to build proper value.
2053  Storage.clear();
2054  Storage.reserve(UnquotedValue.size());
2055  for (; i != StringRef::npos; i = UnquotedValue.find_first_of("\\\r\n")) {
2056  // Insert all previous chars into Storage.
2057  StringRef Valid(UnquotedValue.begin(), i);
2058  llvm::append_range(Storage, Valid);
2059  // Chop off inserted chars.
2060  UnquotedValue = UnquotedValue.substr(i);
2061 
2062  assert(!UnquotedValue.empty() && "Can't be empty!");
2063 
2064  // Parse escape or line break.
2065  switch (UnquotedValue[0]) {
2066  case '\r':
2067  case '\n':
2068  Storage.push_back('\n');
2069  if ( UnquotedValue.size() > 1
2070  && (UnquotedValue[1] == '\r' || UnquotedValue[1] == '\n'))
2071  UnquotedValue = UnquotedValue.substr(1);
2072  UnquotedValue = UnquotedValue.substr(1);
2073  break;
2074  default:
2075  if (UnquotedValue.size() == 1) {
2076  Token T;
2077  T.Range = StringRef(UnquotedValue.begin(), 1);
2078  setError("Unrecognized escape code", T);
2079  return "";
2080  }
2081  UnquotedValue = UnquotedValue.substr(1);
2082  switch (UnquotedValue[0]) {
2083  default: {
2084  Token T;
2085  T.Range = StringRef(UnquotedValue.begin(), 1);
2086  setError("Unrecognized escape code", T);
2087  return "";
2088  }
2089  case '\r':
2090  case '\n':
2091  // Remove the new line.
2092  if ( UnquotedValue.size() > 1
2093  && (UnquotedValue[1] == '\r' || UnquotedValue[1] == '\n'))
2094  UnquotedValue = UnquotedValue.substr(1);
2095  // If this was just a single byte newline, it will get skipped
2096  // below.
2097  break;
2098  case '0':
2099  Storage.push_back(0x00);
2100  break;
2101  case 'a':
2102  Storage.push_back(0x07);
2103  break;
2104  case 'b':
2105  Storage.push_back(0x08);
2106  break;
2107  case 't':
2108  case 0x09:
2109  Storage.push_back(0x09);
2110  break;
2111  case 'n':
2112  Storage.push_back(0x0A);
2113  break;
2114  case 'v':
2115  Storage.push_back(0x0B);
2116  break;
2117  case 'f':
2118  Storage.push_back(0x0C);
2119  break;
2120  case 'r':
2121  Storage.push_back(0x0D);
2122  break;
2123  case 'e':
2124  Storage.push_back(0x1B);
2125  break;
2126  case ' ':
2127  Storage.push_back(0x20);
2128  break;
2129  case '"':
2130  Storage.push_back(0x22);
2131  break;
2132  case '/':
2133  Storage.push_back(0x2F);
2134  break;
2135  case '\\':
2136  Storage.push_back(0x5C);
2137  break;
2138  case 'N':
2139  encodeUTF8(0x85, Storage);
2140  break;
2141  case '_':
2142  encodeUTF8(0xA0, Storage);
2143  break;
2144  case 'L':
2145  encodeUTF8(0x2028, Storage);
2146  break;
2147  case 'P':
2148  encodeUTF8(0x2029, Storage);
2149  break;
2150  case 'x': {
2151  if (UnquotedValue.size() < 3)
2152  // TODO: Report error.
2153  break;
2154  unsigned int UnicodeScalarValue;
2155  if (UnquotedValue.substr(1, 2).getAsInteger(16, UnicodeScalarValue))
2156  // TODO: Report error.
2157  UnicodeScalarValue = 0xFFFD;
2158  encodeUTF8(UnicodeScalarValue, Storage);
2159  UnquotedValue = UnquotedValue.substr(2);
2160  break;
2161  }
2162  case 'u': {
2163  if (UnquotedValue.size() < 5)
2164  // TODO: Report error.
2165  break;
2166  unsigned int UnicodeScalarValue;
2167  if (UnquotedValue.substr(1, 4).getAsInteger(16, UnicodeScalarValue))
2168  // TODO: Report error.
2169  UnicodeScalarValue = 0xFFFD;
2170  encodeUTF8(UnicodeScalarValue, Storage);
2171  UnquotedValue = UnquotedValue.substr(4);
2172  break;
2173  }
2174  case 'U': {
2175  if (UnquotedValue.size() < 9)
2176  // TODO: Report error.
2177  break;
2178  unsigned int UnicodeScalarValue;
2179  if (UnquotedValue.substr(1, 8).getAsInteger(16, UnicodeScalarValue))
2180  // TODO: Report error.
2181  UnicodeScalarValue = 0xFFFD;
2182  encodeUTF8(UnicodeScalarValue, Storage);
2183  UnquotedValue = UnquotedValue.substr(8);
2184  break;
2185  }
2186  }
2187  UnquotedValue = UnquotedValue.substr(1);
2188  }
2189  }
2190  llvm::append_range(Storage, UnquotedValue);
2191  return StringRef(Storage.begin(), Storage.size());
2192 }
2193 
2195  if (Key)
2196  return Key;
2197  // Handle implicit null keys.
2198  {
2199  Token &t = peekNext();
2200  if ( t.Kind == Token::TK_BlockEnd
2201  || t.Kind == Token::TK_Value
2202  || t.Kind == Token::TK_Error) {
2203  return Key = new (getAllocator()) NullNode(Doc);
2204  }
2205  if (t.Kind == Token::TK_Key)
2206  getNext(); // skip TK_Key.
2207  }
2208 
2209  // Handle explicit null keys.
2210  Token &t = peekNext();
2211  if (t.Kind == Token::TK_BlockEnd || t.Kind == Token::TK_Value) {
2212  return Key = new (getAllocator()) NullNode(Doc);
2213  }
2214 
2215  // We've got a normal key.
2216  return Key = parseBlockNode();
2217 }
2218 
2220  if (Value)
2221  return Value;
2222 
2223  if (Node* Key = getKey())
2224  Key->skip();
2225  else {
2226  setError("Null key in Key Value.", peekNext());
2227  return Value = new (getAllocator()) NullNode(Doc);
2228  }
2229 
2230  if (failed())
2231  return Value = new (getAllocator()) NullNode(Doc);
2232 
2233  // Handle implicit null values.
2234  {
2235  Token &t = peekNext();
2236  if ( t.Kind == Token::TK_BlockEnd
2237  || t.Kind == Token::TK_FlowMappingEnd
2238  || t.Kind == Token::TK_Key
2239  || t.Kind == Token::TK_FlowEntry
2240  || t.Kind == Token::TK_Error) {
2241  return Value = new (getAllocator()) NullNode(Doc);
2242  }
2243 
2244  if (t.Kind != Token::TK_Value) {
2245  setError("Unexpected token in Key Value.", t);
2246  return Value = new (getAllocator()) NullNode(Doc);
2247  }
2248  getNext(); // skip TK_Value.
2249  }
2250 
2251  // Handle explicit null values.
2252  Token &t = peekNext();
2253  if (t.Kind == Token::TK_BlockEnd || t.Kind == Token::TK_Key) {
2254  return Value = new (getAllocator()) NullNode(Doc);
2255  }
2256 
2257  // We got a normal value.
2258  return Value = parseBlockNode();
2259 }
2260 
2261 void MappingNode::increment() {
2262  if (failed()) {
2263  IsAtEnd = true;
2264  CurrentEntry = nullptr;
2265  return;
2266  }
2267  if (CurrentEntry) {
2268  CurrentEntry->skip();
2269  if (Type == MT_Inline) {
2270  IsAtEnd = true;
2271  CurrentEntry = nullptr;
2272  return;
2273  }
2274  }
2275  Token T = peekNext();
2276  if (T.Kind == Token::TK_Key || T.Kind == Token::TK_Scalar) {
2277  // KeyValueNode eats the TK_Key. That way it can detect null keys.
2278  CurrentEntry = new (getAllocator()) KeyValueNode(Doc);
2279  } else if (Type == MT_Block) {
2280  switch (T.Kind) {
2281  case Token::TK_BlockEnd:
2282  getNext();
2283  IsAtEnd = true;
2284  CurrentEntry = nullptr;
2285  break;
2286  default:
2287  setError("Unexpected token. Expected Key or Block End", T);
2288  [[fallthrough]];
2289  case Token::TK_Error:
2290  IsAtEnd = true;
2291  CurrentEntry = nullptr;
2292  }
2293  } else {
2294  switch (T.Kind) {
2295  case Token::TK_FlowEntry:
2296  // Eat the flow entry and recurse.
2297  getNext();
2298  return increment();
2300  getNext();
2301  [[fallthrough]];
2302  case Token::TK_Error:
2303  // Set this to end iterator.
2304  IsAtEnd = true;
2305  CurrentEntry = nullptr;
2306  break;
2307  default:
2308  setError( "Unexpected token. Expected Key, Flow Entry, or Flow "
2309  "Mapping End."
2310  , T);
2311  IsAtEnd = true;
2312  CurrentEntry = nullptr;
2313  }
2314  }
2315 }
2316 
2318  if (failed()) {
2319  IsAtEnd = true;
2320  CurrentEntry = nullptr;
2321  return;
2322  }
2323  if (CurrentEntry)
2324  CurrentEntry->skip();
2325  Token T = peekNext();
2326  if (SeqType == ST_Block) {
2327  switch (T.Kind) {
2328  case Token::TK_BlockEntry:
2329  getNext();
2330  CurrentEntry = parseBlockNode();
2331  if (!CurrentEntry) { // An error occurred.
2332  IsAtEnd = true;
2333  CurrentEntry = nullptr;
2334  }
2335  break;
2336  case Token::TK_BlockEnd:
2337  getNext();
2338  IsAtEnd = true;
2339  CurrentEntry = nullptr;
2340  break;
2341  default:
2342  setError( "Unexpected token. Expected Block Entry or Block End."
2343  , T);
2344  [[fallthrough]];
2345  case Token::TK_Error:
2346  IsAtEnd = true;
2347  CurrentEntry = nullptr;
2348  }
2349  } else if (SeqType == ST_Indentless) {
2350  switch (T.Kind) {
2351  case Token::TK_BlockEntry:
2352  getNext();
2353  CurrentEntry = parseBlockNode();
2354  if (!CurrentEntry) { // An error occurred.
2355  IsAtEnd = true;
2356  CurrentEntry = nullptr;
2357  }
2358  break;
2359  default:
2360  case Token::TK_Error:
2361  IsAtEnd = true;
2362  CurrentEntry = nullptr;
2363  }
2364  } else if (SeqType == ST_Flow) {
2365  switch (T.Kind) {
2366  case Token::TK_FlowEntry:
2367  // Eat the flow entry and recurse.
2368  getNext();
2369  WasPreviousTokenFlowEntry = true;
2370  return increment();
2372  getNext();
2373  [[fallthrough]];
2374  case Token::TK_Error:
2375  // Set this to end iterator.
2376  IsAtEnd = true;
2377  CurrentEntry = nullptr;
2378  break;
2379  case Token::TK_StreamEnd:
2380  case Token::TK_DocumentEnd:
2382  setError("Could not find closing ]!", T);
2383  // Set this to end iterator.
2384  IsAtEnd = true;
2385  CurrentEntry = nullptr;
2386  break;
2387  default:
2388  if (!WasPreviousTokenFlowEntry) {
2389  setError("Expected , between entries!", T);
2390  IsAtEnd = true;
2391  CurrentEntry = nullptr;
2392  break;
2393  }
2394  // Otherwise it must be a flow entry.
2395  CurrentEntry = parseBlockNode();
2396  if (!CurrentEntry) {
2397  IsAtEnd = true;
2398  }
2399  WasPreviousTokenFlowEntry = false;
2400  break;
2401  }
2402  }
2403 }
2404 
2405 Document::Document(Stream &S) : stream(S), Root(nullptr) {
2406  // Tag maps starts with two default mappings.
2407  TagMap["!"] = "!";
2408  TagMap["!!"] = "tag:yaml.org,2002:";
2409 
2410  if (parseDirectives())
2411  expectToken(Token::TK_DocumentStart);
2412  Token &T = peekNext();
2413  if (T.Kind == Token::TK_DocumentStart)
2414  getNext();
2415 }
2416 
2418  if (stream.scanner->failed())
2419  return false;
2420  if (!Root && !getRoot())
2421  return false;
2422  Root->skip();
2423  Token &T = peekNext();
2424  if (T.Kind == Token::TK_StreamEnd)
2425  return false;
2426  if (T.Kind == Token::TK_DocumentEnd) {
2427  getNext();
2428  return skip();
2429  }
2430  return true;
2431 }
2432 
2433 Token &Document::peekNext() {
2434  return stream.scanner->peekNext();
2435 }
2436 
2437 Token Document::getNext() {
2438  return stream.scanner->getNext();
2439 }
2440 
2441 void Document::setError(const Twine &Message, Token &Location) const {
2442  stream.scanner->setError(Message, Location.Range.begin());
2443 }
2444 
2445 bool Document::failed() const {
2446  return stream.scanner->failed();
2447 }
2448 
2450  Token T = peekNext();
2451  // Handle properties.
2452  Token AnchorInfo;
2453  Token TagInfo;
2454 parse_property:
2455  switch (T.Kind) {
2456  case Token::TK_Alias:
2457  getNext();
2458  return new (NodeAllocator) AliasNode(stream.CurrentDoc, T.Range.substr(1));
2459  case Token::TK_Anchor:
2460  if (AnchorInfo.Kind == Token::TK_Anchor) {
2461  setError("Already encountered an anchor for this node!", T);
2462  return nullptr;
2463  }
2464  AnchorInfo = getNext(); // Consume TK_Anchor.
2465  T = peekNext();
2466  goto parse_property;
2467  case Token::TK_Tag:
2468  if (TagInfo.Kind == Token::TK_Tag) {
2469  setError("Already encountered a tag for this node!", T);
2470  return nullptr;
2471  }
2472  TagInfo = getNext(); // Consume TK_Tag.
2473  T = peekNext();
2474  goto parse_property;
2475  default:
2476  break;
2477  }
2478 
2479  switch (T.Kind) {
2480  case Token::TK_BlockEntry:
2481  // We got an unindented BlockEntry sequence. This is not terminated with
2482  // a BlockEnd.
2483  // Don't eat the TK_BlockEntry, SequenceNode needs it.
2484  return new (NodeAllocator) SequenceNode( stream.CurrentDoc
2485  , AnchorInfo.Range.substr(1)
2486  , TagInfo.Range
2489  getNext();
2490  return new (NodeAllocator)
2491  SequenceNode( stream.CurrentDoc
2492  , AnchorInfo.Range.substr(1)
2493  , TagInfo.Range
2496  getNext();
2497  return new (NodeAllocator)
2498  MappingNode( stream.CurrentDoc
2499  , AnchorInfo.Range.substr(1)
2500  , TagInfo.Range
2503  getNext();
2504  return new (NodeAllocator)
2505  SequenceNode( stream.CurrentDoc
2506  , AnchorInfo.Range.substr(1)
2507  , TagInfo.Range
2510  getNext();
2511  return new (NodeAllocator)
2512  MappingNode( stream.CurrentDoc
2513  , AnchorInfo.Range.substr(1)
2514  , TagInfo.Range
2516  case Token::TK_Scalar:
2517  getNext();
2518  return new (NodeAllocator)
2519  ScalarNode( stream.CurrentDoc
2520  , AnchorInfo.Range.substr(1)
2521  , TagInfo.Range
2522  , T.Range);
2523  case Token::TK_BlockScalar: {
2524  getNext();
2525  StringRef NullTerminatedStr(T.Value.c_str(), T.Value.length() + 1);
2526  StringRef StrCopy = NullTerminatedStr.copy(NodeAllocator).drop_back();
2527  return new (NodeAllocator)
2528  BlockScalarNode(stream.CurrentDoc, AnchorInfo.Range.substr(1),
2529  TagInfo.Range, StrCopy, T.Range);
2530  }
2531  case Token::TK_Key:
2532  // Don't eat the TK_Key, KeyValueNode expects it.
2533  return new (NodeAllocator)
2534  MappingNode( stream.CurrentDoc
2535  , AnchorInfo.Range.substr(1)
2536  , TagInfo.Range
2539  case Token::TK_DocumentEnd:
2540  case Token::TK_StreamEnd:
2541  default:
2542  // TODO: Properly handle tags. "[!!str ]" should resolve to !!str "", not
2543  // !!null null.
2544  return new (NodeAllocator) NullNode(stream.CurrentDoc);
2547  case Token::TK_FlowEntry: {
2548  if (Root && (isa<MappingNode>(Root) || isa<SequenceNode>(Root)))
2549  return new (NodeAllocator) NullNode(stream.CurrentDoc);
2550 
2551  setError("Unexpected token", T);
2552  return nullptr;
2553  }
2554  case Token::TK_Error:
2555  return nullptr;
2556  }
2557  llvm_unreachable("Control flow shouldn't reach here.");
2558  return nullptr;
2559 }
2560 
2561 bool Document::parseDirectives() {
2562  bool isDirective = false;
2563  while (true) {
2564  Token T = peekNext();
2565  if (T.Kind == Token::TK_TagDirective) {
2566  parseTAGDirective();
2567  isDirective = true;
2568  } else if (T.Kind == Token::TK_VersionDirective) {
2569  parseYAMLDirective();
2570  isDirective = true;
2571  } else
2572  break;
2573  }
2574  return isDirective;
2575 }
2576 
2577 void Document::parseYAMLDirective() {
2578  getNext(); // Eat %YAML <version>
2579 }
2580 
2581 void Document::parseTAGDirective() {
2582  Token Tag = getNext(); // %TAG <handle> <prefix>
2583  StringRef T = Tag.Range;
2584  // Strip %TAG
2585  T = T.substr(T.find_first_of(" \t")).ltrim(" \t");
2586  std::size_t HandleEnd = T.find_first_of(" \t");
2587  StringRef TagHandle = T.substr(0, HandleEnd);
2588  StringRef TagPrefix = T.substr(HandleEnd).ltrim(" \t");
2589  TagMap[TagHandle] = TagPrefix;
2590 }
2591 
2592 bool Document::expectToken(int TK) {
2593  Token T = getNext();
2594  if (T.Kind != TK) {
2595  setError("Unexpected token", T);
2596  return false;
2597  }
2598  return true;
2599 }
i
i
Definition: README.txt:29
MemoryBuffer.h
llvm::yaml::AliasNode
Represents an alias to a Node with an anchor.
Definition: YAMLParser.h:513
llvm::yaml::Document
A YAML Stream is a sequence of Documents.
Definition: YAMLParser.h:530
llvm::AllocatorList::front
T & front()
Definition: AllocatorList.h:175
llvm::AllocatorList::iterator
IteratorImpl< T, typename list_type::iterator > iterator
Definition: AllocatorList.h:123
llvm::yaml::Token::TK_DocumentStart
@ TK_DocumentStart
Definition: YAMLParser.cpp:132
llvm::yaml::Token::TK_TagDirective
@ TK_TagDirective
Definition: YAMLParser.cpp:131
llvm::MemoryBufferRef::getBufferStart
const char * getBufferStart() const
Definition: MemoryBufferRef.h:35
llvm
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
llvm::yaml::Node::NK_Sequence
@ NK_Sequence
Definition: YAMLParser.h:128
llvm::SmallVectorImpl::erase
iterator erase(const_iterator CI)
Definition: SmallVector.h:741
llvm::yaml::Token::Kind
enum llvm::yaml::Token::TokenKind Kind
llvm::yaml::Scanner::failed
bool failed()
Returns true if an error occurred while parsing.
Definition: YAMLParser.cpp:282
llvm::yaml::KeyValueNode::skip
void skip() override
Definition: YAMLParser.h:305
llvm::yaml::Node::getRawTag
StringRef getRawTag() const
Get the tag as it was written in the document.
Definition: YAMLParser.h:158
llvm::yaml::Token::TK_DocumentEnd
@ TK_DocumentEnd
Definition: YAMLParser.cpp:133
llvm::yaml::SequenceNode::increment
void increment()
Definition: YAMLParser.cpp:2317
llvm::yaml::operator==
bool operator==(const BinaryRef &LHS, const BinaryRef &RHS)
Definition: YAML.h:98
llvm::yaml::escape
std::string escape(StringRef Input, bool EscapePrintable=true)
Escape Input for a double quoted scalar; if EscapePrintable is true, all UTF8 sequences will be escap...
Definition: YAMLParser.cpp:697
StringRef.h
llvm::yaml::Node
Abstract base class for all Nodes.
Definition: YAMLParser.h:118
YAMLParser.h
UEF_UTF16_LE
@ UEF_UTF16_LE
UTF-16 Little Endian.
Definition: YAMLParser.cpp:45
llvm::StringRef::npos
static constexpr size_t npos
Definition: StringRef.h:52
llvm::SmallVector< int, 4 >
llvm::yaml::Scanner
Scans YAML tokens from a MemoryBuffer.
Definition: YAMLParser.cpp:248
ErrorHandling.h
llvm::yaml::Token::TK_BlockMappingStart
@ TK_BlockMappingStart
Definition: YAMLParser.cpp:137
llvm::yaml::KeyValueNode::getValue
Node * getValue()
Parse and return the value.
Definition: YAMLParser.cpp:2219
llvm::yaml::Token::TK_Error
@ TK_Error
Definition: YAMLParser.cpp:127
llvm::StringRef::find_first_of
size_t find_first_of(char C, size_t From=0) const
Find the first character in the string that is C, or npos if not found.
Definition: StringRef.h:381
llvm::Type
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
llvm::yaml::ScalarNode
A scalar node is an opaque datum that can be presented as a series of zero or more Unicode scalar val...
Definition: YAMLParser.h:211
decodeUTF8
static UTF8Decoded decodeUTF8(StringRef Range)
Definition: YAMLParser.cpp:197
llvm::Optional< bool >
llvm::StringRef::substr
StringRef substr(size_t Start, size_t N=npos) const
Return a reference to the substring from [Start, Start + N).
Definition: StringRef.h:564
llvm::yaml::MappingNode
Represents a YAML map created from either a block map for a flow map.
Definition: YAMLParser.h:413
T
#define T
Definition: Mips16ISelLowering.cpp:341
llvm::MipsISD::Ret
@ Ret
Definition: MipsISelLowering.h:119
llvm::Expected
Tagged union holding either a T or a Error.
Definition: APFloat.h:41
STLExtras.h
llvm::yaml::Document::Document
Document(Stream &ParentStream)
Definition: YAMLParser.cpp:2405
llvm::dwarf::Tag
Tag
Definition: Dwarf.h:105
llvm::yaml::skip
void skip(CollectionType &C)
Definition: YAMLParser.h:397
llvm::MemoryBufferRef
Definition: MemoryBufferRef.h:22
llvm::yaml::scanTokens
bool scanTokens(StringRef Input)
Scans all tokens in input without outputting anything.
Definition: YAMLParser.cpp:684
size_t
new
Common register allocation spilling lr str ldr sxth r3 ldr mla r4 can lr mov lr str ldr sxth r3 mla r4 and then merge mul and lr str ldr sxth r3 mla r4 It also increase the likelihood the store may become dead bb27 Successors according to LLVM ID Predecessors according to mbb< bb27, 0x8b0a7c0 > Note ADDri is not a two address instruction its result reg1037 is an operand of the PHI node in bb76 and its operand reg1039 is the result of the PHI node We should treat it as a two address code and make sure the ADDri is scheduled after any node that reads reg1039 Use info(i.e. register scavenger) to assign it a free register to allow reuse the collector could move the objects and invalidate the derived pointer This is bad enough in the first but safe points can crop up unpredictably **array_addr i32 n y store obj * new
Definition: README.txt:125
encodeUTF8
static void encodeUTF8(uint32_t UnicodeScalarValue, SmallVectorImpl< char > &Result)
encodeUTF8 - Encode UnicodeScalarValue in UTF-8 and append it to result.
Definition: YAMLParser.cpp:572
llvm::yaml::Token::TokenKind
TokenKind
Definition: YAMLParser.cpp:126
llvm::yaml::Node::failed
bool failed() const
Definition: YAMLParser.cpp:2011
llvm::yaml::Stream::Document
friend class Document
Definition: YAMLParser.h:111
llvm::yaml::NullNode
A null value.
Definition: YAMLParser.h:196
llvm::MemoryBuffer::getMemBuffer
static std::unique_ptr< MemoryBuffer > getMemBuffer(StringRef InputData, StringRef BufferName="", bool RequiresNullTerminator=true)
Open the specified memory range as a MemoryBuffer.
Definition: MemoryBuffer.cpp:117
llvm::yaml::KeyValueNode::getKey
Node * getKey()
Parse and return the key.
Definition: YAMLParser.cpp:2194
llvm::SMLoc
Represents a location in source code.
Definition: SMLoc.h:23
llvm::StringRef::startswith
bool startswith(StringRef Prefix) const
Definition: StringRef.h:260
llvm::Failed
testing::Matcher< const detail::ErrorHolder & > Failed()
Definition: Error.h:198
llvm::AllocatorList::resetAlloc
void resetAlloc()
Reset the underlying allocator.
Definition: AllocatorList.h:222
SmallString.h
llvm::yaml::Token::TK_Anchor
@ TK_Anchor
Definition: YAMLParser.cpp:148
C
(vector float) vec_cmpeq(*A, *B) C
Definition: README_ALTIVEC.txt:86
llvm::yaml::Token::TK_FlowEntry
@ TK_FlowEntry
Definition: YAMLParser.cpp:138
Twine.h
llvm::yaml::Token::TK_Value
@ TK_Value
Definition: YAMLParser.cpp:144
llvm::yaml::Token::TK_VersionDirective
@ TK_VersionDirective
Definition: YAMLParser.cpp:130
t
bitcast float %x to i32 %s=and i32 %t, 2147483647 %d=bitcast i32 %s to float ret float %d } declare float @fabsf(float %n) define float @bar(float %x) nounwind { %d=call float @fabsf(float %x) ret float %d } This IR(from PR6194):target datalayout="e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" target triple="x86_64-apple-darwin10.0.0" %0=type { double, double } %struct.float3=type { float, float, float } define void @test(%0, %struct.float3 *nocapture %res) nounwind noinline ssp { entry:%tmp18=extractvalue %0 %0, 0 t
Definition: README-SSE.txt:788
llvm::StringRef::getAsInteger
bool getAsInteger(unsigned Radix, T &Result) const
Parse the current string as an integer of the specified radix.
Definition: StringRef.h:474
llvm::StringRef::iterator
const char * iterator
Definition: StringRef.h:54
llvm::yaml::SequenceNode
Represents a YAML sequence created from either a block sequence for a flow sequence.
Definition: YAMLParser.h:461
llvm::yaml::Node::Node
Node(unsigned int Type, std::unique_ptr< Document > &, StringRef Anchor, StringRef Tag)
Definition: YAMLParser.cpp:1939
llvm::yaml::Stream::end
document_iterator end()
Definition: YAMLParser.cpp:1930
llvm::yaml::dumpTokens
bool dumpTokens(StringRef Input, raw_ostream &)
Dump all the tokens in this stream to OS.
Definition: YAMLParser.cpp:600
First
into llvm powi allowing the code generator to produce balanced multiplication trees First
Definition: README.txt:54
llvm::logicalview::LVSortMode::Line
@ Line
UEF_Unknown
@ UEF_Unknown
Not a valid Unicode encoding.
Definition: YAMLParser.cpp:48
SMLoc.h
llvm::yaml::Stream::printError
void printError(Node *N, const Twine &Msg, SourceMgr::DiagKind Kind=SourceMgr::DK_Error)
Definition: YAMLParser.cpp:1910
llvm::report_fatal_error
void report_fatal_error(Error Err, bool gen_crash_diag=true)
Report a serious error, calling any installed error handler.
Definition: Error.cpp:145
llvm::yaml::Node::NK_Mapping
@ NK_Mapping
Definition: YAMLParser.h:127
llvm::raw_ostream
This class implements an extremely fast bulk output stream that can only output to a stream.
Definition: raw_ostream.h:53
llvm::yaml::Node::SourceRange
SMRange SourceRange
Definition: YAMLParser.h:181
llvm::yaml::parseBool
llvm::Optional< bool > parseBool(StringRef S)
Parse S as a bool according to https://yaml.org/type/bool.html.
Definition: YAMLParser.cpp:763
llvm::yaml::Token::TK_StreamStart
@ TK_StreamStart
Definition: YAMLParser.cpp:128
llvm::AllocatorList::begin
iterator begin()
Definition: AllocatorList.h:161
llvm::yaml::Node::skip
virtual void skip()
Definition: YAMLParser.h:175
llvm::lltok::Kind
Kind
Definition: LLToken.h:18
SourceMgr.h
UnicodeEncodingForm
UnicodeEncodingForm
Definition: YAMLParser.cpp:42
llvm::yaml::MappingNode::MT_Inline
@ MT_Inline
An inline mapping node is used for "[key: value]".
Definition: YAMLParser.h:420
llvm::SmallString< 4 >
llvm::yaml::Stream::skip
void skip()
Definition: YAMLParser.cpp:1934
llvm::yaml::BlockScalarNode
A block scalar node is an opaque datum that can be presented as a series of zero or more Unicode scal...
Definition: YAMLParser.h:254
llvm::StringRef::empty
constexpr bool empty() const
empty - Check if the string is empty.
Definition: StringRef.h:134
llvm::yaml::Scanner::getNext
Token getNext()
Parse the next token and pop it from the queue.
Definition: YAMLParser.cpp:904
llvm::AllocatorList::end
iterator end()
Definition: AllocatorList.h:162
llvm::AllocatorList::pop_front
void pop_front()
Definition: AllocatorList.h:207
llvm::yaml::Node::getAllocator
BumpPtrAllocator & getAllocator()
Definition: YAMLParser.cpp:2003
llvm::yaml::Node::Doc
std::unique_ptr< Document > & Doc
Definition: YAMLParser.h:180
llvm::yaml::Stream::~Stream
~Stream()
llvm::yaml::Document::parseBlockNode
Node * parseBlockNode()
Root for parsing a node. Returns a single node.
Definition: YAMLParser.cpp:2449
llvm::yaml::Scanner::printError
void printError(SMLoc Loc, SourceMgr::DiagKind Kind, const Twine &Message, ArrayRef< SMRange > Ranges=None)
Definition: YAMLParser.cpp:261
llvm::MemoryBufferRef::getBufferEnd
const char * getBufferEnd() const
Definition: MemoryBufferRef.h:36
D
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")
llvm::yaml::Node::getNext
Token getNext()
Definition: YAMLParser.cpp:1995
llvm::StringRef::end
iterator end() const
Definition: StringRef.h:113
llvm::yaml::SequenceNode::ST_Flow
@ ST_Flow
Definition: YAMLParser.h:467
llvm::yaml::Token::TK_Tag
@ TK_Tag
Definition: YAMLParser.cpp:149
llvm::SourceMgr::PrintMessage
void PrintMessage(raw_ostream &OS, SMLoc Loc, DiagKind Kind, const Twine &Msg, ArrayRef< SMRange > Ranges={}, ArrayRef< SMFixIt > FixIts={}, bool ShowColors=true) const
Emit a message about the specified location with the specified string.
Definition: SourceMgr.cpp:348
llvm::BumpPtrAllocatorImpl
Allocate memory in an ever growing pool, as if by bump-pointer.
Definition: Allocator.h:63
move
compiles ldr LCPI1_0 ldr ldr mov lsr tst moveq r1 ldr LCPI1_1 and r0 bx lr It would be better to do something like to fold the shift into the conditional move
Definition: README.txt:546
llvm::yaml::Token::TK_BlockSequenceStart
@ TK_BlockSequenceStart
Definition: YAMLParser.cpp:136
llvm::numbers::e
constexpr double e
Definition: MathExtras.h:53
is_ns_word_char
static bool is_ns_word_char(const char C)
Definition: YAMLParser.cpp:1000
EncodingInfo
std::pair< UnicodeEncodingForm, unsigned > EncodingInfo
EncodingInfo - Holds the encoding type and length of the byte order mark if it exists.
Definition: YAMLParser.cpp:53
I
#define I(x, y, z)
Definition: MD5.cpp:58
llvm::yaml::MappingNode::MT_Block
@ MT_Block
Definition: YAMLParser.h:418
UEF_UTF16_BE
@ UEF_UTF16_BE
UTF-16 Big Endian.
Definition: YAMLParser.cpp:46
StringExtras.h
llvm::yaml::Token
Token - A single YAML token.
Definition: YAMLParser.cpp:125
llvm::cl::init
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:447
llvm::AllocatorList
A linked-list with a custom, local allocator.
Definition: AllocatorList.h:33
llvm::yaml::Token::Range
StringRef Range
A string of length 0 or more whose begin() points to the logical location of the token in the input.
Definition: YAMLParser.cpp:154
llvm::is_contained
bool is_contained(R &&Range, const E &Element)
Wrapper function around std::find to detect if an element exists in a container.
Definition: STLExtras.h:1843
ArrayRef.h
llvm::yaml::Node::NK_BlockScalar
@ NK_BlockScalar
Definition: YAMLParser.h:125
wasEscaped
static LLVM_ATTRIBUTE_NOINLINE bool wasEscaped(StringRef::iterator First, StringRef::iterator Position)
Definition: YAMLParser.cpp:1358
assert
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
llvm::yaml::Token::TK_FlowSequenceEnd
@ TK_FlowSequenceEnd
Definition: YAMLParser.cpp:140
llvm::yaml::Token::TK_Alias
@ TK_Alias
Definition: YAMLParser.cpp:147
llvm::yaml::Token::TK_BlockEntry
@ TK_BlockEntry
Definition: YAMLParser.cpp:134
llvm::yaml::Token::TK_Scalar
@ TK_Scalar
Definition: YAMLParser.cpp:145
llvm::yaml::Document::getRoot
Node * getRoot()
Parse and return the root level node.
Definition: YAMLParser.h:542
llvm::yaml::Scanner::setError
void setError(const Twine &Message, StringRef::iterator Position)
Definition: YAMLParser.cpp:266
llvm::yaml::Stream::failed
bool failed()
Definition: YAMLParser.cpp:1908
llvm::yaml::SequenceNode::ST_Indentless
@ ST_Indentless
Definition: YAMLParser.h:475
UEF_UTF8
@ UEF_UTF8
UTF-8 or ascii.
Definition: YAMLParser.cpp:47
llvm::ArrayRef
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: APInt.h:32
None.h
llvm::StringRef
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:50
llvm_unreachable
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
Definition: ErrorHandling.h:143
llvm::yaml::Token::TK_BlockScalar
@ TK_BlockScalar
Definition: YAMLParser.cpp:146
llvm::AllocatorList::empty
bool empty()
Definition: AllocatorList.h:158
llvm::yaml::Node::getVerbatimTag
std::string getVerbatimTag() const
Get the verbatium tag for a given Node.
Definition: YAMLParser.cpp:1946
llvm::SourceMgr
This owns the files read by a parser, handles include stacks, and handles diagnostic wrangling.
Definition: SourceMgr.h:31
llvm::yaml::KeyValueNode
A key and value pair.
Definition: YAMLParser.h:284
uint32_t
Compiler.h
llvm::append_range
void append_range(Container &C, Range &&R)
Wrapper function to append a range to a container.
Definition: STLExtras.h:1988
UEF_UTF32_LE
@ UEF_UTF32_LE
UTF-32 Little Endian.
Definition: YAMLParser.cpp:43
llvm::yaml::Stream
This class represents a YAML stream potentially containing multiple documents.
Definition: YAMLParser.h:85
llvm::yaml::Stream::Stream
Stream(StringRef Input, SourceMgr &, bool ShowColors=true, std::error_code *EC=nullptr)
This keeps a reference to the string referenced by Input.
Definition: YAMLParser.cpp:1898
llvm::AllocatorList::insert
iterator insert(iterator I, T &&V)
Definition: AllocatorList.h:183
S
add sub stmia L5 ldr r0 bl L_printf $stub Instead of a and a wouldn t it be better to do three moves *Return an aggregate type is even return S
Definition: README.txt:210
is_ns_hex_digit
static bool is_ns_hex_digit(const char C)
Definition: YAMLParser.cpp:998
llvm::StringRef::find_last_of
size_t find_last_of(char C, size_t From=npos) const
Find the last character in the string that is C, or npos if not found.
Definition: StringRef.h:404
llvm::yaml::Token::TK_BlockEnd
@ TK_BlockEnd
Definition: YAMLParser.cpp:135
llvm::yaml::SequenceNode::ST_Block
@ ST_Block
Definition: YAMLParser.h:466
Unicode.h
llvm::yaml::Document::skip
bool skip()
Finish parsing the current document and return true if there are more.
Definition: YAMLParser.cpp:2417
llvm::make_error_code
std::error_code make_error_code(BitcodeError E)
Definition: BitcodeReader.h:274
llvm::AMDGPU::SendMsg::Msg
const CustomOperand< const MCSubtargetInfo & > Msg[]
Definition: AMDGPUAsmUtils.cpp:39
skipComment
static Cursor skipComment(Cursor C)
Skip a line comment and return the updated cursor.
Definition: MILexer.cpp:95
llvm::StringRef::size
constexpr size_t size() const
size - Get the string size.
Definition: StringRef.h:137
llvm::AllocatorList::clear
void clear()
Definition: AllocatorList.h:205
llvm::yaml::Stream::begin
document_iterator begin()
Definition: YAMLParser.cpp:1919
llvm::yaml::ScalarNode::getValue
StringRef getValue(SmallVectorImpl< char > &Storage) const
Gets the value of this node as a StringRef.
Definition: YAMLParser.cpp:2015
llvm::SourceMgr::DiagKind
DiagKind
Definition: SourceMgr.h:33
llvm::Twine
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition: Twine.h:81
llvm::GraphProgram::Name
Name
Definition: GraphWriter.h:50
llvm::None
constexpr std::nullopt_t None
Definition: None.h:27
getUnicodeEncoding
static EncodingInfo getUnicodeEncoding(StringRef Input)
getUnicodeEncoding - Reads up to the first 4 bytes to determine the Unicode encoding form of Input.
Definition: YAMLParser.cpp:61
llvm::yaml::Token::TK_StreamEnd
@ TK_StreamEnd
Definition: YAMLParser.cpp:129
llvm::yaml::Token::Value
std::string Value
The value of a block scalar node.
Definition: YAMLParser.cpp:157
llvm::yaml::Node::peekNext
Token & peekNext()
Definition: YAMLParser.cpp:1991
llvm::SourceMgr::DK_Error
@ DK_Error
Definition: SourceMgr.h:34
llvm::yaml::Node::NK_Null
@ NK_Null
Definition: YAMLParser.h:123
llvm::SourceMgr::AddNewSourceBuffer
unsigned AddNewSourceBuffer(std::unique_ptr< MemoryBuffer > F, SMLoc IncludeLoc)
Add a new source buffer to this source manager.
Definition: SourceMgr.h:144
llvm::SmallVectorImpl::clear
void clear()
Definition: SmallVector.h:614
llvm::yaml::MappingNode::MT_Flow
@ MT_Flow
Definition: YAMLParser.h:419
LLVM_ATTRIBUTE_NOINLINE
#define LLVM_ATTRIBUTE_NOINLINE
LLVM_ATTRIBUTE_NOINLINE - On compilers where we have a directive to do so, mark a method "not for inl...
Definition: Compiler.h:220
UEF_UTF32_BE
@ UEF_UTF32_BE
UTF-32 Big Endian.
Definition: YAMLParser.cpp:44
SmallVector.h
llvm::SMRange
Represents a range in source code.
Definition: SMLoc.h:48
llvm::yaml::Node::setError
void setError(const Twine &Message, Token &Location) const
Definition: YAMLParser.cpp:2007
N
#define N
llvm::yaml::Scanner::peekNext
Token & peekNext()
Parse the next token and return it without popping it.
Definition: YAMLParser.cpp:877
llvm::yaml::Token::TK_Key
@ TK_Key
Definition: YAMLParser.cpp:143
llvm::SmallVectorImpl::pop_back_val
T pop_back_val()
Definition: SmallVector.h:677
llvm::SmallVectorImpl< char >
llvm::SMLoc::getFromPointer
static SMLoc getFromPointer(const char *Ptr)
Definition: SMLoc.h:36
llvm::yaml::Token::TK_FlowMappingEnd
@ TK_FlowMappingEnd
Definition: YAMLParser.cpp:142
llvm::yaml::Scanner::Scanner
Scanner(StringRef Input, SourceMgr &SM, bool ShowColors=true, std::error_code *EC=nullptr)
Definition: YAMLParser.cpp:849
llvm::yaml::document_iterator
Iterator abstraction for Documents over a Stream.
Definition: YAMLParser.h:587
llvm::StringRef::find
size_t find(char C, size_t From=0) const
Search for the first character C in the string.
Definition: StringRef.h:294
AllocatorList.h
llvm::sys::unicode::isPrintable
bool isPrintable(int UCS)
Determines if a character is likely to be displayed correctly on the terminal.
Definition: Unicode.cpp:27
llvm::codeview::LocalSymFlags::IsAlias
@ IsAlias
llvm::Type::TypeID
TypeID
Definitions of all of the base types for the Type system.
Definition: Type.h:54
llvm::yaml::Token::TK_FlowSequenceStart
@ TK_FlowSequenceStart
Definition: YAMLParser.cpp:139
llvm::yaml::Node::parseBlockNode
Node * parseBlockNode()
Definition: YAMLParser.cpp:1999
raw_ostream.h
llvm::SmallVectorImpl::reserve
void reserve(size_type N)
Definition: SmallVector.h:667
llvm::StringRef::begin
iterator begin() const
Definition: StringRef.h:111
llvm::yaml::Token::TK_FlowMappingStart
@ TK_FlowMappingStart
Definition: YAMLParser.cpp:141
llvm::StringRef::copy
StringRef copy(Allocator &A) const
Definition: StringRef.h:153
llvm::Value
LLVM Value Representation.
Definition: Value.h:74
llvm::yaml::Node::NK_Scalar
@ NK_Scalar
Definition: YAMLParser.h:124
llvm::yaml::Node::getType
unsigned int getType() const
Definition: YAMLParser.h:177
llvm::AllocatorList::push_back
void push_back(T &&V)
Definition: AllocatorList.h:208
UTF8Decoded
std::pair< uint32_t, unsigned > UTF8Decoded
The Unicode scalar value of a UTF-8 minimal well-formed code unit subsequence and the subsequence's l...
Definition: YAMLParser.cpp:195
llvm::StringRef::drop_back
StringRef drop_back(size_t N=1) const
Return a StringRef equal to 'this' but with the last N elements dropped.
Definition: StringRef.h:608
Other
Optional< std::vector< StOtherPiece > > Other
Definition: ELFYAML.cpp:1251
getChompedLineBreaks
static unsigned getChompedLineBreaks(char ChompingIndicator, unsigned LineBreaks, StringRef Str)
Get the number of line breaks after chomping.
Definition: YAMLParser.cpp:1573