File: | build/source/llvm/lib/Support/YAMLParser.cpp |
Warning: | line 1637, column 9 2nd function call argument is an uninitialized value |
Press '?' to see keyboard shortcuts
Keyboard shortcuts:
1 | //===- YAMLParser.cpp - Simple YAML parser --------------------------------===// | |||
2 | // | |||
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. | |||
4 | // See https://llvm.org/LICENSE.txt for license information. | |||
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception | |||
6 | // | |||
7 | //===----------------------------------------------------------------------===// | |||
8 | // | |||
9 | // This file implements a YAML parser. | |||
10 | // | |||
11 | //===----------------------------------------------------------------------===// | |||
12 | ||||
13 | #include "llvm/Support/YAMLParser.h" | |||
14 | #include "llvm/ADT/AllocatorList.h" | |||
15 | #include "llvm/ADT/ArrayRef.h" | |||
16 | #include "llvm/ADT/STLExtras.h" | |||
17 | #include "llvm/ADT/SmallString.h" | |||
18 | #include "llvm/ADT/SmallVector.h" | |||
19 | #include "llvm/ADT/StringExtras.h" | |||
20 | #include "llvm/ADT/StringRef.h" | |||
21 | #include "llvm/ADT/Twine.h" | |||
22 | #include "llvm/Support/Compiler.h" | |||
23 | #include "llvm/Support/ErrorHandling.h" | |||
24 | #include "llvm/Support/MemoryBuffer.h" | |||
25 | #include "llvm/Support/SMLoc.h" | |||
26 | #include "llvm/Support/SourceMgr.h" | |||
27 | #include "llvm/Support/Unicode.h" | |||
28 | #include "llvm/Support/raw_ostream.h" | |||
29 | #include <cassert> | |||
30 | #include <cstddef> | |||
31 | #include <cstdint> | |||
32 | #include <map> | |||
33 | #include <memory> | |||
34 | #include <string> | |||
35 | #include <system_error> | |||
36 | #include <utility> | |||
37 | ||||
38 | using namespace llvm; | |||
39 | using namespace yaml; | |||
40 | ||||
41 | enum UnicodeEncodingForm { | |||
42 | UEF_UTF32_LE, ///< UTF-32 Little Endian | |||
43 | UEF_UTF32_BE, ///< UTF-32 Big Endian | |||
44 | UEF_UTF16_LE, ///< UTF-16 Little Endian | |||
45 | UEF_UTF16_BE, ///< UTF-16 Big Endian | |||
46 | UEF_UTF8, ///< UTF-8 or ascii. | |||
47 | UEF_Unknown ///< Not a valid Unicode encoding. | |||
48 | }; | |||
49 | ||||
50 | /// EncodingInfo - Holds the encoding type and length of the byte order mark if | |||
51 | /// it exists. Length is in {0, 2, 3, 4}. | |||
52 | using EncodingInfo = std::pair<UnicodeEncodingForm, unsigned>; | |||
53 | ||||
54 | /// getUnicodeEncoding - Reads up to the first 4 bytes to determine the Unicode | |||
55 | /// encoding form of \a Input. | |||
56 | /// | |||
57 | /// @param Input A string of length 0 or more. | |||
58 | /// @returns An EncodingInfo indicating the Unicode encoding form of the input | |||
59 | /// and how long the byte order mark is if one exists. | |||
60 | static EncodingInfo getUnicodeEncoding(StringRef Input) { | |||
61 | if (Input.empty()) | |||
62 | return std::make_pair(UEF_Unknown, 0); | |||
63 | ||||
64 | switch (uint8_t(Input[0])) { | |||
65 | case 0x00: | |||
66 | if (Input.size() >= 4) { | |||
67 | if ( Input[1] == 0 | |||
68 | && uint8_t(Input[2]) == 0xFE | |||
69 | && uint8_t(Input[3]) == 0xFF) | |||
70 | return std::make_pair(UEF_UTF32_BE, 4); | |||
71 | if (Input[1] == 0 && Input[2] == 0 && Input[3] != 0) | |||
72 | return std::make_pair(UEF_UTF32_BE, 0); | |||
73 | } | |||
74 | ||||
75 | if (Input.size() >= 2 && Input[1] != 0) | |||
76 | return std::make_pair(UEF_UTF16_BE, 0); | |||
77 | return std::make_pair(UEF_Unknown, 0); | |||
78 | case 0xFF: | |||
79 | if ( Input.size() >= 4 | |||
80 | && uint8_t(Input[1]) == 0xFE | |||
81 | && Input[2] == 0 | |||
82 | && Input[3] == 0) | |||
83 | return std::make_pair(UEF_UTF32_LE, 4); | |||
84 | ||||
85 | if (Input.size() >= 2 && uint8_t(Input[1]) == 0xFE) | |||
86 | return std::make_pair(UEF_UTF16_LE, 2); | |||
87 | return std::make_pair(UEF_Unknown, 0); | |||
88 | case 0xFE: | |||
89 | if (Input.size() >= 2 && uint8_t(Input[1]) == 0xFF) | |||
90 | return std::make_pair(UEF_UTF16_BE, 2); | |||
91 | return std::make_pair(UEF_Unknown, 0); | |||
92 | case 0xEF: | |||
93 | if ( Input.size() >= 3 | |||
94 | && uint8_t(Input[1]) == 0xBB | |||
95 | && uint8_t(Input[2]) == 0xBF) | |||
96 | return std::make_pair(UEF_UTF8, 3); | |||
97 | return std::make_pair(UEF_Unknown, 0); | |||
98 | } | |||
99 | ||||
100 | // It could still be utf-32 or utf-16. | |||
101 | if (Input.size() >= 4 && Input[1] == 0 && Input[2] == 0 && Input[3] == 0) | |||
102 | return std::make_pair(UEF_UTF32_LE, 0); | |||
103 | ||||
104 | if (Input.size() >= 2 && Input[1] == 0) | |||
105 | return std::make_pair(UEF_UTF16_LE, 0); | |||
106 | ||||
107 | return std::make_pair(UEF_UTF8, 0); | |||
108 | } | |||
109 | ||||
110 | /// Pin the vtables to this file. | |||
111 | void Node::anchor() {} | |||
112 | void NullNode::anchor() {} | |||
113 | void ScalarNode::anchor() {} | |||
114 | void BlockScalarNode::anchor() {} | |||
115 | void KeyValueNode::anchor() {} | |||
116 | void MappingNode::anchor() {} | |||
117 | void SequenceNode::anchor() {} | |||
118 | void AliasNode::anchor() {} | |||
119 | ||||
120 | namespace llvm { | |||
121 | namespace yaml { | |||
122 | ||||
123 | /// Token - A single YAML token. | |||
124 | struct Token { | |||
125 | enum TokenKind { | |||
126 | TK_Error, // Uninitialized token. | |||
127 | TK_StreamStart, | |||
128 | TK_StreamEnd, | |||
129 | TK_VersionDirective, | |||
130 | TK_TagDirective, | |||
131 | TK_DocumentStart, | |||
132 | TK_DocumentEnd, | |||
133 | TK_BlockEntry, | |||
134 | TK_BlockEnd, | |||
135 | TK_BlockSequenceStart, | |||
136 | TK_BlockMappingStart, | |||
137 | TK_FlowEntry, | |||
138 | TK_FlowSequenceStart, | |||
139 | TK_FlowSequenceEnd, | |||
140 | TK_FlowMappingStart, | |||
141 | TK_FlowMappingEnd, | |||
142 | TK_Key, | |||
143 | TK_Value, | |||
144 | TK_Scalar, | |||
145 | TK_BlockScalar, | |||
146 | TK_Alias, | |||
147 | TK_Anchor, | |||
148 | TK_Tag | |||
149 | } Kind = TK_Error; | |||
150 | ||||
151 | /// A string of length 0 or more whose begin() points to the logical location | |||
152 | /// of the token in the input. | |||
153 | StringRef Range; | |||
154 | ||||
155 | /// The value of a block scalar node. | |||
156 | std::string Value; | |||
157 | ||||
158 | Token() = default; | |||
159 | }; | |||
160 | ||||
161 | } // end namespace yaml | |||
162 | } // end namespace llvm | |||
163 | ||||
164 | using TokenQueueT = BumpPtrList<Token>; | |||
165 | ||||
166 | namespace { | |||
167 | ||||
168 | /// This struct is used to track simple keys. | |||
169 | /// | |||
170 | /// Simple keys are handled by creating an entry in SimpleKeys for each Token | |||
171 | /// which could legally be the start of a simple key. When peekNext is called, | |||
172 | /// if the Token To be returned is referenced by a SimpleKey, we continue | |||
173 | /// tokenizing until that potential simple key has either been found to not be | |||
174 | /// a simple key (we moved on to the next line or went further than 1024 chars). | |||
175 | /// Or when we run into a Value, and then insert a Key token (and possibly | |||
176 | /// others) before the SimpleKey's Tok. | |||
177 | struct SimpleKey { | |||
178 | TokenQueueT::iterator Tok; | |||
179 | unsigned Column = 0; | |||
180 | unsigned Line = 0; | |||
181 | unsigned FlowLevel = 0; | |||
182 | bool IsRequired = false; | |||
183 | ||||
184 | bool operator ==(const SimpleKey &Other) { | |||
185 | return Tok == Other.Tok; | |||
186 | } | |||
187 | }; | |||
188 | ||||
189 | } // end anonymous namespace | |||
190 | ||||
191 | /// The Unicode scalar value of a UTF-8 minimal well-formed code unit | |||
192 | /// subsequence and the subsequence's length in code units (uint8_t). | |||
193 | /// A length of 0 represents an error. | |||
194 | using UTF8Decoded = std::pair<uint32_t, unsigned>; | |||
195 | ||||
196 | static UTF8Decoded decodeUTF8(StringRef Range) { | |||
197 | StringRef::iterator Position= Range.begin(); | |||
198 | StringRef::iterator End = Range.end(); | |||
199 | // 1 byte: [0x00, 0x7f] | |||
200 | // Bit pattern: 0xxxxxxx | |||
201 | if (Position < End && (*Position & 0x80) == 0) { | |||
202 | return std::make_pair(*Position, 1); | |||
203 | } | |||
204 | // 2 bytes: [0x80, 0x7ff] | |||
205 | // Bit pattern: 110xxxxx 10xxxxxx | |||
206 | if (Position + 1 < End && ((*Position & 0xE0) == 0xC0) && | |||
207 | ((*(Position + 1) & 0xC0) == 0x80)) { | |||
208 | uint32_t codepoint = ((*Position & 0x1F) << 6) | | |||
209 | (*(Position + 1) & 0x3F); | |||
210 | if (codepoint >= 0x80) | |||
211 | return std::make_pair(codepoint, 2); | |||
212 | } | |||
213 | // 3 bytes: [0x8000, 0xffff] | |||
214 | // Bit pattern: 1110xxxx 10xxxxxx 10xxxxxx | |||
215 | if (Position + 2 < End && ((*Position & 0xF0) == 0xE0) && | |||
216 | ((*(Position + 1) & 0xC0) == 0x80) && | |||
217 | ((*(Position + 2) & 0xC0) == 0x80)) { | |||
218 | uint32_t codepoint = ((*Position & 0x0F) << 12) | | |||
219 | ((*(Position + 1) & 0x3F) << 6) | | |||
220 | (*(Position + 2) & 0x3F); | |||
221 | // Codepoints between 0xD800 and 0xDFFF are invalid, as | |||
222 | // they are high / low surrogate halves used by UTF-16. | |||
223 | if (codepoint >= 0x800 && | |||
224 | (codepoint < 0xD800 || codepoint > 0xDFFF)) | |||
225 | return std::make_pair(codepoint, 3); | |||
226 | } | |||
227 | // 4 bytes: [0x10000, 0x10FFFF] | |||
228 | // Bit pattern: 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx | |||
229 | if (Position + 3 < End && ((*Position & 0xF8) == 0xF0) && | |||
230 | ((*(Position + 1) & 0xC0) == 0x80) && | |||
231 | ((*(Position + 2) & 0xC0) == 0x80) && | |||
232 | ((*(Position + 3) & 0xC0) == 0x80)) { | |||
233 | uint32_t codepoint = ((*Position & 0x07) << 18) | | |||
234 | ((*(Position + 1) & 0x3F) << 12) | | |||
235 | ((*(Position + 2) & 0x3F) << 6) | | |||
236 | (*(Position + 3) & 0x3F); | |||
237 | if (codepoint >= 0x10000 && codepoint <= 0x10FFFF) | |||
238 | return std::make_pair(codepoint, 4); | |||
239 | } | |||
240 | return std::make_pair(0, 0); | |||
241 | } | |||
242 | ||||
243 | namespace llvm { | |||
244 | namespace yaml { | |||
245 | ||||
246 | /// Scans YAML tokens from a MemoryBuffer. | |||
247 | class Scanner { | |||
248 | public: | |||
249 | Scanner(StringRef Input, SourceMgr &SM, bool ShowColors = true, | |||
250 | std::error_code *EC = nullptr); | |||
251 | Scanner(MemoryBufferRef Buffer, SourceMgr &SM_, bool ShowColors = true, | |||
252 | std::error_code *EC = nullptr); | |||
253 | ||||
254 | /// Parse the next token and return it without popping it. | |||
255 | Token &peekNext(); | |||
256 | ||||
257 | /// Parse the next token and pop it from the queue. | |||
258 | Token getNext(); | |||
259 | ||||
260 | void printError(SMLoc Loc, SourceMgr::DiagKind Kind, const Twine &Message, | |||
261 | ArrayRef<SMRange> Ranges = std::nullopt) { | |||
262 | SM.PrintMessage(Loc, Kind, Message, Ranges, /* FixIts= */ std::nullopt, | |||
263 | ShowColors); | |||
264 | } | |||
265 | ||||
266 | void setError(const Twine &Message, StringRef::iterator Position) { | |||
267 | if (Position >= End) | |||
268 | Position = End - 1; | |||
269 | ||||
270 | // propagate the error if possible | |||
271 | if (EC) | |||
272 | *EC = make_error_code(std::errc::invalid_argument); | |||
273 | ||||
274 | // Don't print out more errors after the first one we encounter. The rest | |||
275 | // are just the result of the first, and have no meaning. | |||
276 | if (!Failed) | |||
277 | printError(SMLoc::getFromPointer(Position), SourceMgr::DK_Error, Message); | |||
278 | Failed = true; | |||
279 | } | |||
280 | ||||
281 | /// Returns true if an error occurred while parsing. | |||
282 | bool failed() { | |||
283 | return Failed; | |||
284 | } | |||
285 | ||||
286 | private: | |||
287 | void init(MemoryBufferRef Buffer); | |||
288 | ||||
289 | StringRef currentInput() { | |||
290 | return StringRef(Current, End - Current); | |||
291 | } | |||
292 | ||||
293 | /// Decode a UTF-8 minimal well-formed code unit subsequence starting | |||
294 | /// at \a Position. | |||
295 | /// | |||
296 | /// If the UTF-8 code units starting at Position do not form a well-formed | |||
297 | /// code unit subsequence, then the Unicode scalar value is 0, and the length | |||
298 | /// is 0. | |||
299 | UTF8Decoded decodeUTF8(StringRef::iterator Position) { | |||
300 | return ::decodeUTF8(StringRef(Position, End - Position)); | |||
301 | } | |||
302 | ||||
303 | // The following functions are based on the gramar rules in the YAML spec. The | |||
304 | // style of the function names it meant to closely match how they are written | |||
305 | // in the spec. The number within the [] is the number of the grammar rule in | |||
306 | // the spec. | |||
307 | // | |||
308 | // See 4.2 [Production Naming Conventions] for the meaning of the prefixes. | |||
309 | // | |||
310 | // c- | |||
311 | // A production starting and ending with a special character. | |||
312 | // b- | |||
313 | // A production matching a single line break. | |||
314 | // nb- | |||
315 | // A production starting and ending with a non-break character. | |||
316 | // s- | |||
317 | // A production starting and ending with a white space character. | |||
318 | // ns- | |||
319 | // A production starting and ending with a non-space character. | |||
320 | // l- | |||
321 | // A production matching complete line(s). | |||
322 | ||||
323 | /// Skip a single nb-char[27] starting at Position. | |||
324 | /// | |||
325 | /// A nb-char is 0x9 | [0x20-0x7E] | 0x85 | [0xA0-0xD7FF] | [0xE000-0xFEFE] | |||
326 | /// | [0xFF00-0xFFFD] | [0x10000-0x10FFFF] | |||
327 | /// | |||
328 | /// @returns The code unit after the nb-char, or Position if it's not an | |||
329 | /// nb-char. | |||
330 | StringRef::iterator skip_nb_char(StringRef::iterator Position); | |||
331 | ||||
332 | /// Skip a single b-break[28] starting at Position. | |||
333 | /// | |||
334 | /// A b-break is 0xD 0xA | 0xD | 0xA | |||
335 | /// | |||
336 | /// @returns The code unit after the b-break, or Position if it's not a | |||
337 | /// b-break. | |||
338 | StringRef::iterator skip_b_break(StringRef::iterator Position); | |||
339 | ||||
340 | /// Skip a single s-space[31] starting at Position. | |||
341 | /// | |||
342 | /// An s-space is 0x20 | |||
343 | /// | |||
344 | /// @returns The code unit after the s-space, or Position if it's not a | |||
345 | /// s-space. | |||
346 | StringRef::iterator skip_s_space(StringRef::iterator Position); | |||
347 | ||||
348 | /// Skip a single s-white[33] starting at Position. | |||
349 | /// | |||
350 | /// A s-white is 0x20 | 0x9 | |||
351 | /// | |||
352 | /// @returns The code unit after the s-white, or Position if it's not a | |||
353 | /// s-white. | |||
354 | StringRef::iterator skip_s_white(StringRef::iterator Position); | |||
355 | ||||
356 | /// Skip a single ns-char[34] starting at Position. | |||
357 | /// | |||
358 | /// A ns-char is nb-char - s-white | |||
359 | /// | |||
360 | /// @returns The code unit after the ns-char, or Position if it's not a | |||
361 | /// ns-char. | |||
362 | StringRef::iterator skip_ns_char(StringRef::iterator Position); | |||
363 | ||||
364 | using SkipWhileFunc = StringRef::iterator (Scanner::*)(StringRef::iterator); | |||
365 | ||||
366 | /// Skip minimal well-formed code unit subsequences until Func | |||
367 | /// returns its input. | |||
368 | /// | |||
369 | /// @returns The code unit after the last minimal well-formed code unit | |||
370 | /// subsequence that Func accepted. | |||
371 | StringRef::iterator skip_while( SkipWhileFunc Func | |||
372 | , StringRef::iterator Position); | |||
373 | ||||
374 | /// Skip minimal well-formed code unit subsequences until Func returns its | |||
375 | /// input. | |||
376 | void advanceWhile(SkipWhileFunc Func); | |||
377 | ||||
378 | /// Scan ns-uri-char[39]s starting at Cur. | |||
379 | /// | |||
380 | /// This updates Cur and Column while scanning. | |||
381 | void scan_ns_uri_char(); | |||
382 | ||||
383 | /// Consume a minimal well-formed code unit subsequence starting at | |||
384 | /// \a Cur. Return false if it is not the same Unicode scalar value as | |||
385 | /// \a Expected. This updates \a Column. | |||
386 | bool consume(uint32_t Expected); | |||
387 | ||||
388 | /// Skip \a Distance UTF-8 code units. Updates \a Cur and \a Column. | |||
389 | void skip(uint32_t Distance); | |||
390 | ||||
391 | /// Return true if the minimal well-formed code unit subsequence at | |||
392 | /// Pos is whitespace or a new line | |||
393 | bool isBlankOrBreak(StringRef::iterator Position); | |||
394 | ||||
395 | /// Return true if the line is a line break, false otherwise. | |||
396 | bool isLineEmpty(StringRef Line); | |||
397 | ||||
398 | /// Consume a single b-break[28] if it's present at the current position. | |||
399 | /// | |||
400 | /// Return false if the code unit at the current position isn't a line break. | |||
401 | bool consumeLineBreakIfPresent(); | |||
402 | ||||
403 | /// If IsSimpleKeyAllowed, create and push_back a new SimpleKey. | |||
404 | void saveSimpleKeyCandidate( TokenQueueT::iterator Tok | |||
405 | , unsigned AtColumn | |||
406 | , bool IsRequired); | |||
407 | ||||
408 | /// Remove simple keys that can no longer be valid simple keys. | |||
409 | /// | |||
410 | /// Invalid simple keys are not on the current line or are further than 1024 | |||
411 | /// columns back. | |||
412 | void removeStaleSimpleKeyCandidates(); | |||
413 | ||||
414 | /// Remove all simple keys on FlowLevel \a Level. | |||
415 | void removeSimpleKeyCandidatesOnFlowLevel(unsigned Level); | |||
416 | ||||
417 | /// Unroll indentation in \a Indents back to \a Col. Creates BlockEnd | |||
418 | /// tokens if needed. | |||
419 | bool unrollIndent(int ToColumn); | |||
420 | ||||
421 | /// Increase indent to \a Col. Creates \a Kind token at \a InsertPoint | |||
422 | /// if needed. | |||
423 | bool rollIndent( int ToColumn | |||
424 | , Token::TokenKind Kind | |||
425 | , TokenQueueT::iterator InsertPoint); | |||
426 | ||||
427 | /// Skip a single-line comment when the comment starts at the current | |||
428 | /// position of the scanner. | |||
429 | void skipComment(); | |||
430 | ||||
431 | /// Skip whitespace and comments until the start of the next token. | |||
432 | void scanToNextToken(); | |||
433 | ||||
434 | /// Must be the first token generated. | |||
435 | bool scanStreamStart(); | |||
436 | ||||
437 | /// Generate tokens needed to close out the stream. | |||
438 | bool scanStreamEnd(); | |||
439 | ||||
440 | /// Scan a %BLAH directive. | |||
441 | bool scanDirective(); | |||
442 | ||||
443 | /// Scan a ... or ---. | |||
444 | bool scanDocumentIndicator(bool IsStart); | |||
445 | ||||
446 | /// Scan a [ or { and generate the proper flow collection start token. | |||
447 | bool scanFlowCollectionStart(bool IsSequence); | |||
448 | ||||
449 | /// Scan a ] or } and generate the proper flow collection end token. | |||
450 | bool scanFlowCollectionEnd(bool IsSequence); | |||
451 | ||||
452 | /// Scan the , that separates entries in a flow collection. | |||
453 | bool scanFlowEntry(); | |||
454 | ||||
455 | /// Scan the - that starts block sequence entries. | |||
456 | bool scanBlockEntry(); | |||
457 | ||||
458 | /// Scan an explicit ? indicating a key. | |||
459 | bool scanKey(); | |||
460 | ||||
461 | /// Scan an explicit : indicating a value. | |||
462 | bool scanValue(); | |||
463 | ||||
464 | /// Scan a quoted scalar. | |||
465 | bool scanFlowScalar(bool IsDoubleQuoted); | |||
466 | ||||
467 | /// Scan an unquoted scalar. | |||
468 | bool scanPlainScalar(); | |||
469 | ||||
470 | /// Scan an Alias or Anchor starting with * or &. | |||
471 | bool scanAliasOrAnchor(bool IsAlias); | |||
472 | ||||
473 | /// Scan a block scalar starting with | or >. | |||
474 | bool scanBlockScalar(bool IsLiteral); | |||
475 | ||||
476 | /// Scan a block scalar style indicator and header. | |||
477 | /// | |||
478 | /// Note: This is distinct from scanBlockScalarHeader to mirror the fact that | |||
479 | /// YAML does not consider the style indicator to be a part of the header. | |||
480 | /// | |||
481 | /// Return false if an error occurred. | |||
482 | bool scanBlockScalarIndicators(char &StyleIndicator, char &ChompingIndicator, | |||
483 | unsigned &IndentIndicator, bool &IsDone); | |||
484 | ||||
485 | /// Scan a style indicator in a block scalar header. | |||
486 | char scanBlockStyleIndicator(); | |||
487 | ||||
488 | /// Scan a chomping indicator in a block scalar header. | |||
489 | char scanBlockChompingIndicator(); | |||
490 | ||||
491 | /// Scan an indentation indicator in a block scalar header. | |||
492 | unsigned scanBlockIndentationIndicator(); | |||
493 | ||||
494 | /// Scan a block scalar header. | |||
495 | /// | |||
496 | /// Return false if an error occurred. | |||
497 | bool scanBlockScalarHeader(char &ChompingIndicator, unsigned &IndentIndicator, | |||
498 | bool &IsDone); | |||
499 | ||||
500 | /// Look for the indentation level of a block scalar. | |||
501 | /// | |||
502 | /// Return false if an error occurred. | |||
503 | bool findBlockScalarIndent(unsigned &BlockIndent, unsigned BlockExitIndent, | |||
504 | unsigned &LineBreaks, bool &IsDone); | |||
505 | ||||
506 | /// Scan the indentation of a text line in a block scalar. | |||
507 | /// | |||
508 | /// Return false if an error occurred. | |||
509 | bool scanBlockScalarIndent(unsigned BlockIndent, unsigned BlockExitIndent, | |||
510 | bool &IsDone); | |||
511 | ||||
512 | /// Scan a tag of the form !stuff. | |||
513 | bool scanTag(); | |||
514 | ||||
515 | /// Dispatch to the next scanning function based on \a *Cur. | |||
516 | bool fetchMoreTokens(); | |||
517 | ||||
518 | /// The SourceMgr used for diagnostics and buffer management. | |||
519 | SourceMgr &SM; | |||
520 | ||||
521 | /// The original input. | |||
522 | MemoryBufferRef InputBuffer; | |||
523 | ||||
524 | /// The current position of the scanner. | |||
525 | StringRef::iterator Current; | |||
526 | ||||
527 | /// The end of the input (one past the last character). | |||
528 | StringRef::iterator End; | |||
529 | ||||
530 | /// Current YAML indentation level in spaces. | |||
531 | int Indent; | |||
532 | ||||
533 | /// Current column number in Unicode code points. | |||
534 | unsigned Column; | |||
535 | ||||
536 | /// Current line number. | |||
537 | unsigned Line; | |||
538 | ||||
539 | /// How deep we are in flow style containers. 0 Means at block level. | |||
540 | unsigned FlowLevel; | |||
541 | ||||
542 | /// Are we at the start of the stream? | |||
543 | bool IsStartOfStream; | |||
544 | ||||
545 | /// Can the next token be the start of a simple key? | |||
546 | bool IsSimpleKeyAllowed; | |||
547 | ||||
548 | /// True if an error has occurred. | |||
549 | bool Failed; | |||
550 | ||||
551 | /// Should colors be used when printing out the diagnostic messages? | |||
552 | bool ShowColors; | |||
553 | ||||
554 | /// Queue of tokens. This is required to queue up tokens while looking | |||
555 | /// for the end of a simple key. And for cases where a single character | |||
556 | /// can produce multiple tokens (e.g. BlockEnd). | |||
557 | TokenQueueT TokenQueue; | |||
558 | ||||
559 | /// Indentation levels. | |||
560 | SmallVector<int, 4> Indents; | |||
561 | ||||
562 | /// Potential simple keys. | |||
563 | SmallVector<SimpleKey, 4> SimpleKeys; | |||
564 | ||||
565 | std::error_code *EC; | |||
566 | }; | |||
567 | ||||
568 | } // end namespace yaml | |||
569 | } // end namespace llvm | |||
570 | ||||
571 | /// encodeUTF8 - Encode \a UnicodeScalarValue in UTF-8 and append it to result. | |||
572 | static void encodeUTF8( uint32_t UnicodeScalarValue | |||
573 | , SmallVectorImpl<char> &Result) { | |||
574 | if (UnicodeScalarValue <= 0x7F) { | |||
575 | Result.push_back(UnicodeScalarValue & 0x7F); | |||
576 | } else if (UnicodeScalarValue <= 0x7FF) { | |||
577 | uint8_t FirstByte = 0xC0 | ((UnicodeScalarValue & 0x7C0) >> 6); | |||
578 | uint8_t SecondByte = 0x80 | (UnicodeScalarValue & 0x3F); | |||
579 | Result.push_back(FirstByte); | |||
580 | Result.push_back(SecondByte); | |||
581 | } else if (UnicodeScalarValue <= 0xFFFF) { | |||
582 | uint8_t FirstByte = 0xE0 | ((UnicodeScalarValue & 0xF000) >> 12); | |||
583 | uint8_t SecondByte = 0x80 | ((UnicodeScalarValue & 0xFC0) >> 6); | |||
584 | uint8_t ThirdByte = 0x80 | (UnicodeScalarValue & 0x3F); | |||
585 | Result.push_back(FirstByte); | |||
586 | Result.push_back(SecondByte); | |||
587 | Result.push_back(ThirdByte); | |||
588 | } else if (UnicodeScalarValue <= 0x10FFFF) { | |||
589 | uint8_t FirstByte = 0xF0 | ((UnicodeScalarValue & 0x1F0000) >> 18); | |||
590 | uint8_t SecondByte = 0x80 | ((UnicodeScalarValue & 0x3F000) >> 12); | |||
591 | uint8_t ThirdByte = 0x80 | ((UnicodeScalarValue & 0xFC0) >> 6); | |||
592 | uint8_t FourthByte = 0x80 | (UnicodeScalarValue & 0x3F); | |||
593 | Result.push_back(FirstByte); | |||
594 | Result.push_back(SecondByte); | |||
595 | Result.push_back(ThirdByte); | |||
596 | Result.push_back(FourthByte); | |||
597 | } | |||
598 | } | |||
599 | ||||
600 | bool yaml::dumpTokens(StringRef Input, raw_ostream &OS) { | |||
601 | SourceMgr SM; | |||
602 | Scanner scanner(Input, SM); | |||
603 | while (true) { | |||
604 | Token T = scanner.getNext(); | |||
605 | switch (T.Kind) { | |||
606 | case Token::TK_StreamStart: | |||
607 | OS << "Stream-Start: "; | |||
608 | break; | |||
609 | case Token::TK_StreamEnd: | |||
610 | OS << "Stream-End: "; | |||
611 | break; | |||
612 | case Token::TK_VersionDirective: | |||
613 | OS << "Version-Directive: "; | |||
614 | break; | |||
615 | case Token::TK_TagDirective: | |||
616 | OS << "Tag-Directive: "; | |||
617 | break; | |||
618 | case Token::TK_DocumentStart: | |||
619 | OS << "Document-Start: "; | |||
620 | break; | |||
621 | case Token::TK_DocumentEnd: | |||
622 | OS << "Document-End: "; | |||
623 | break; | |||
624 | case Token::TK_BlockEntry: | |||
625 | OS << "Block-Entry: "; | |||
626 | break; | |||
627 | case Token::TK_BlockEnd: | |||
628 | OS << "Block-End: "; | |||
629 | break; | |||
630 | case Token::TK_BlockSequenceStart: | |||
631 | OS << "Block-Sequence-Start: "; | |||
632 | break; | |||
633 | case Token::TK_BlockMappingStart: | |||
634 | OS << "Block-Mapping-Start: "; | |||
635 | break; | |||
636 | case Token::TK_FlowEntry: | |||
637 | OS << "Flow-Entry: "; | |||
638 | break; | |||
639 | case Token::TK_FlowSequenceStart: | |||
640 | OS << "Flow-Sequence-Start: "; | |||
641 | break; | |||
642 | case Token::TK_FlowSequenceEnd: | |||
643 | OS << "Flow-Sequence-End: "; | |||
644 | break; | |||
645 | case Token::TK_FlowMappingStart: | |||
646 | OS << "Flow-Mapping-Start: "; | |||
647 | break; | |||
648 | case Token::TK_FlowMappingEnd: | |||
649 | OS << "Flow-Mapping-End: "; | |||
650 | break; | |||
651 | case Token::TK_Key: | |||
652 | OS << "Key: "; | |||
653 | break; | |||
654 | case Token::TK_Value: | |||
655 | OS << "Value: "; | |||
656 | break; | |||
657 | case Token::TK_Scalar: | |||
658 | OS << "Scalar: "; | |||
659 | break; | |||
660 | case Token::TK_BlockScalar: | |||
661 | OS << "Block Scalar: "; | |||
662 | break; | |||
663 | case Token::TK_Alias: | |||
664 | OS << "Alias: "; | |||
665 | break; | |||
666 | case Token::TK_Anchor: | |||
667 | OS << "Anchor: "; | |||
668 | break; | |||
669 | case Token::TK_Tag: | |||
670 | OS << "Tag: "; | |||
671 | break; | |||
672 | case Token::TK_Error: | |||
673 | break; | |||
674 | } | |||
675 | OS << T.Range << "\n"; | |||
676 | if (T.Kind == Token::TK_StreamEnd) | |||
677 | break; | |||
678 | else if (T.Kind == Token::TK_Error) | |||
679 | return false; | |||
680 | } | |||
681 | return true; | |||
682 | } | |||
683 | ||||
684 | bool yaml::scanTokens(StringRef Input) { | |||
685 | SourceMgr SM; | |||
686 | Scanner scanner(Input, SM); | |||
687 | while (true) { | |||
688 | Token T = scanner.getNext(); | |||
689 | if (T.Kind == Token::TK_StreamEnd) | |||
690 | break; | |||
691 | else if (T.Kind == Token::TK_Error) | |||
692 | return false; | |||
693 | } | |||
694 | return true; | |||
695 | } | |||
696 | ||||
697 | std::string yaml::escape(StringRef Input, bool EscapePrintable) { | |||
698 | std::string EscapedInput; | |||
699 | for (StringRef::iterator i = Input.begin(), e = Input.end(); i != e; ++i) { | |||
700 | if (*i == '\\') | |||
701 | EscapedInput += "\\\\"; | |||
702 | else if (*i == '"') | |||
703 | EscapedInput += "\\\""; | |||
704 | else if (*i == 0) | |||
705 | EscapedInput += "\\0"; | |||
706 | else if (*i == 0x07) | |||
707 | EscapedInput += "\\a"; | |||
708 | else if (*i == 0x08) | |||
709 | EscapedInput += "\\b"; | |||
710 | else if (*i == 0x09) | |||
711 | EscapedInput += "\\t"; | |||
712 | else if (*i == 0x0A) | |||
713 | EscapedInput += "\\n"; | |||
714 | else if (*i == 0x0B) | |||
715 | EscapedInput += "\\v"; | |||
716 | else if (*i == 0x0C) | |||
717 | EscapedInput += "\\f"; | |||
718 | else if (*i == 0x0D) | |||
719 | EscapedInput += "\\r"; | |||
720 | else if (*i == 0x1B) | |||
721 | EscapedInput += "\\e"; | |||
722 | else if ((unsigned char)*i < 0x20) { // Control characters not handled above. | |||
723 | std::string HexStr = utohexstr(*i); | |||
724 | EscapedInput += "\\x" + std::string(2 - HexStr.size(), '0') + HexStr; | |||
725 | } else if (*i & 0x80) { // UTF-8 multiple code unit subsequence. | |||
726 | UTF8Decoded UnicodeScalarValue | |||
727 | = decodeUTF8(StringRef(i, Input.end() - i)); | |||
728 | if (UnicodeScalarValue.second == 0) { | |||
729 | // Found invalid char. | |||
730 | SmallString<4> Val; | |||
731 | encodeUTF8(0xFFFD, Val); | |||
732 | llvm::append_range(EscapedInput, Val); | |||
733 | // FIXME: Error reporting. | |||
734 | return EscapedInput; | |||
735 | } | |||
736 | if (UnicodeScalarValue.first == 0x85) | |||
737 | EscapedInput += "\\N"; | |||
738 | else if (UnicodeScalarValue.first == 0xA0) | |||
739 | EscapedInput += "\\_"; | |||
740 | else if (UnicodeScalarValue.first == 0x2028) | |||
741 | EscapedInput += "\\L"; | |||
742 | else if (UnicodeScalarValue.first == 0x2029) | |||
743 | EscapedInput += "\\P"; | |||
744 | else if (!EscapePrintable && | |||
745 | sys::unicode::isPrintable(UnicodeScalarValue.first)) | |||
746 | EscapedInput += StringRef(i, UnicodeScalarValue.second); | |||
747 | else { | |||
748 | std::string HexStr = utohexstr(UnicodeScalarValue.first); | |||
749 | if (HexStr.size() <= 2) | |||
750 | EscapedInput += "\\x" + std::string(2 - HexStr.size(), '0') + HexStr; | |||
751 | else if (HexStr.size() <= 4) | |||
752 | EscapedInput += "\\u" + std::string(4 - HexStr.size(), '0') + HexStr; | |||
753 | else if (HexStr.size() <= 8) | |||
754 | EscapedInput += "\\U" + std::string(8 - HexStr.size(), '0') + HexStr; | |||
755 | } | |||
756 | i += UnicodeScalarValue.second - 1; | |||
757 | } else | |||
758 | EscapedInput.push_back(*i); | |||
759 | } | |||
760 | return EscapedInput; | |||
761 | } | |||
762 | ||||
763 | std::optional<bool> yaml::parseBool(StringRef S) { | |||
764 | switch (S.size()) { | |||
765 | case 1: | |||
766 | switch (S.front()) { | |||
767 | case 'y': | |||
768 | case 'Y': | |||
769 | return true; | |||
770 | case 'n': | |||
771 | case 'N': | |||
772 | return false; | |||
773 | default: | |||
774 | return std::nullopt; | |||
775 | } | |||
776 | case 2: | |||
777 | switch (S.front()) { | |||
778 | case 'O': | |||
779 | if (S[1] == 'N') // ON | |||
780 | return true; | |||
781 | [[fallthrough]]; | |||
782 | case 'o': | |||
783 | if (S[1] == 'n') //[Oo]n | |||
784 | return true; | |||
785 | return std::nullopt; | |||
786 | case 'N': | |||
787 | if (S[1] == 'O') // NO | |||
788 | return false; | |||
789 | [[fallthrough]]; | |||
790 | case 'n': | |||
791 | if (S[1] == 'o') //[Nn]o | |||
792 | return false; | |||
793 | return std::nullopt; | |||
794 | default: | |||
795 | return std::nullopt; | |||
796 | } | |||
797 | case 3: | |||
798 | switch (S.front()) { | |||
799 | case 'O': | |||
800 | if (S.drop_front() == "FF") // OFF | |||
801 | return false; | |||
802 | [[fallthrough]]; | |||
803 | case 'o': | |||
804 | if (S.drop_front() == "ff") //[Oo]ff | |||
805 | return false; | |||
806 | return std::nullopt; | |||
807 | case 'Y': | |||
808 | if (S.drop_front() == "ES") // YES | |||
809 | return true; | |||
810 | [[fallthrough]]; | |||
811 | case 'y': | |||
812 | if (S.drop_front() == "es") //[Yy]es | |||
813 | return true; | |||
814 | return std::nullopt; | |||
815 | default: | |||
816 | return std::nullopt; | |||
817 | } | |||
818 | case 4: | |||
819 | switch (S.front()) { | |||
820 | case 'T': | |||
821 | if (S.drop_front() == "RUE") // TRUE | |||
822 | return true; | |||
823 | [[fallthrough]]; | |||
824 | case 't': | |||
825 | if (S.drop_front() == "rue") //[Tt]rue | |||
826 | return true; | |||
827 | return std::nullopt; | |||
828 | default: | |||
829 | return std::nullopt; | |||
830 | } | |||
831 | case 5: | |||
832 | switch (S.front()) { | |||
833 | case 'F': | |||
834 | if (S.drop_front() == "ALSE") // FALSE | |||
835 | return false; | |||
836 | [[fallthrough]]; | |||
837 | case 'f': | |||
838 | if (S.drop_front() == "alse") //[Ff]alse | |||
839 | return false; | |||
840 | return std::nullopt; | |||
841 | default: | |||
842 | return std::nullopt; | |||
843 | } | |||
844 | default: | |||
845 | return std::nullopt; | |||
846 | } | |||
847 | } | |||
848 | ||||
849 | Scanner::Scanner(StringRef Input, SourceMgr &sm, bool ShowColors, | |||
850 | std::error_code *EC) | |||
851 | : SM(sm), ShowColors(ShowColors), EC(EC) { | |||
852 | init(MemoryBufferRef(Input, "YAML")); | |||
853 | } | |||
854 | ||||
855 | Scanner::Scanner(MemoryBufferRef Buffer, SourceMgr &SM_, bool ShowColors, | |||
856 | std::error_code *EC) | |||
857 | : SM(SM_), ShowColors(ShowColors), EC(EC) { | |||
858 | init(Buffer); | |||
859 | } | |||
860 | ||||
861 | void Scanner::init(MemoryBufferRef Buffer) { | |||
862 | InputBuffer = Buffer; | |||
863 | Current = InputBuffer.getBufferStart(); | |||
864 | End = InputBuffer.getBufferEnd(); | |||
865 | Indent = -1; | |||
866 | Column = 0; | |||
867 | Line = 0; | |||
868 | FlowLevel = 0; | |||
869 | IsStartOfStream = true; | |||
870 | IsSimpleKeyAllowed = true; | |||
871 | Failed = false; | |||
872 | std::unique_ptr<MemoryBuffer> InputBufferOwner = | |||
873 | MemoryBuffer::getMemBuffer(Buffer, /*RequiresNullTerminator=*/false); | |||
874 | SM.AddNewSourceBuffer(std::move(InputBufferOwner), SMLoc()); | |||
875 | } | |||
876 | ||||
877 | Token &Scanner::peekNext() { | |||
878 | // If the current token is a possible simple key, keep parsing until we | |||
879 | // can confirm. | |||
880 | bool NeedMore = false; | |||
881 | while (true) { | |||
882 | if (TokenQueue.empty() || NeedMore) { | |||
883 | if (!fetchMoreTokens()) { | |||
884 | TokenQueue.clear(); | |||
885 | SimpleKeys.clear(); | |||
886 | TokenQueue.push_back(Token()); | |||
887 | return TokenQueue.front(); | |||
888 | } | |||
889 | } | |||
890 | assert(!TokenQueue.empty() &&(static_cast <bool> (!TokenQueue.empty() && "fetchMoreTokens lied about getting tokens!" ) ? void (0) : __assert_fail ("!TokenQueue.empty() && \"fetchMoreTokens lied about getting tokens!\"" , "llvm/lib/Support/YAMLParser.cpp", 891, __extension__ __PRETTY_FUNCTION__ )) | |||
891 | "fetchMoreTokens lied about getting tokens!")(static_cast <bool> (!TokenQueue.empty() && "fetchMoreTokens lied about getting tokens!" ) ? void (0) : __assert_fail ("!TokenQueue.empty() && \"fetchMoreTokens lied about getting tokens!\"" , "llvm/lib/Support/YAMLParser.cpp", 891, __extension__ __PRETTY_FUNCTION__ )); | |||
892 | ||||
893 | removeStaleSimpleKeyCandidates(); | |||
894 | SimpleKey SK; | |||
895 | SK.Tok = TokenQueue.begin(); | |||
896 | if (!is_contained(SimpleKeys, SK)) | |||
897 | break; | |||
898 | else | |||
899 | NeedMore = true; | |||
900 | } | |||
901 | return TokenQueue.front(); | |||
902 | } | |||
903 | ||||
904 | Token Scanner::getNext() { | |||
905 | Token Ret = peekNext(); | |||
906 | // TokenQueue can be empty if there was an error getting the next token. | |||
907 | if (!TokenQueue.empty()) | |||
908 | TokenQueue.pop_front(); | |||
909 | ||||
910 | // There cannot be any referenced Token's if the TokenQueue is empty. So do a | |||
911 | // quick deallocation of them all. | |||
912 | if (TokenQueue.empty()) | |||
913 | TokenQueue.resetAlloc(); | |||
914 | ||||
915 | return Ret; | |||
916 | } | |||
917 | ||||
918 | StringRef::iterator Scanner::skip_nb_char(StringRef::iterator Position) { | |||
919 | if (Position == End) | |||
920 | return Position; | |||
921 | // Check 7 bit c-printable - b-char. | |||
922 | if ( *Position == 0x09 | |||
923 | || (*Position >= 0x20 && *Position <= 0x7E)) | |||
924 | return Position + 1; | |||
925 | ||||
926 | // Check for valid UTF-8. | |||
927 | if (uint8_t(*Position) & 0x80) { | |||
928 | UTF8Decoded u8d = decodeUTF8(Position); | |||
929 | if ( u8d.second != 0 | |||
930 | && u8d.first != 0xFEFF | |||
931 | && ( u8d.first == 0x85 | |||
932 | || ( u8d.first >= 0xA0 | |||
933 | && u8d.first <= 0xD7FF) | |||
934 | || ( u8d.first >= 0xE000 | |||
935 | && u8d.first <= 0xFFFD) | |||
936 | || ( u8d.first >= 0x10000 | |||
937 | && u8d.first <= 0x10FFFF))) | |||
938 | return Position + u8d.second; | |||
939 | } | |||
940 | return Position; | |||
941 | } | |||
942 | ||||
943 | StringRef::iterator Scanner::skip_b_break(StringRef::iterator Position) { | |||
944 | if (Position == End) | |||
945 | return Position; | |||
946 | if (*Position == 0x0D) { | |||
947 | if (Position + 1 != End && *(Position + 1) == 0x0A) | |||
948 | return Position + 2; | |||
949 | return Position + 1; | |||
950 | } | |||
951 | ||||
952 | if (*Position == 0x0A) | |||
953 | return Position + 1; | |||
954 | return Position; | |||
955 | } | |||
956 | ||||
957 | StringRef::iterator Scanner::skip_s_space(StringRef::iterator Position) { | |||
958 | if (Position == End) | |||
959 | return Position; | |||
960 | if (*Position == ' ') | |||
961 | return Position + 1; | |||
962 | return Position; | |||
963 | } | |||
964 | ||||
965 | StringRef::iterator Scanner::skip_s_white(StringRef::iterator Position) { | |||
966 | if (Position == End) | |||
967 | return Position; | |||
968 | if (*Position == ' ' || *Position == '\t') | |||
969 | return Position + 1; | |||
970 | return Position; | |||
971 | } | |||
972 | ||||
973 | StringRef::iterator Scanner::skip_ns_char(StringRef::iterator Position) { | |||
974 | if (Position == End) | |||
975 | return Position; | |||
976 | if (*Position == ' ' || *Position == '\t') | |||
977 | return Position; | |||
978 | return skip_nb_char(Position); | |||
979 | } | |||
980 | ||||
981 | StringRef::iterator Scanner::skip_while( SkipWhileFunc Func | |||
982 | , StringRef::iterator Position) { | |||
983 | while (true) { | |||
984 | StringRef::iterator i = (this->*Func)(Position); | |||
985 | if (i == Position) | |||
986 | break; | |||
987 | Position = i; | |||
988 | } | |||
989 | return Position; | |||
990 | } | |||
991 | ||||
992 | void Scanner::advanceWhile(SkipWhileFunc Func) { | |||
993 | auto Final = skip_while(Func, Current); | |||
994 | Column += Final - Current; | |||
995 | Current = Final; | |||
996 | } | |||
997 | ||||
998 | static bool is_ns_hex_digit(const char C) { return isAlnum(C); } | |||
999 | ||||
1000 | static bool is_ns_word_char(const char C) { return C == '-' || isAlpha(C); } | |||
1001 | ||||
1002 | void Scanner::scan_ns_uri_char() { | |||
1003 | while (true) { | |||
1004 | if (Current == End) | |||
1005 | break; | |||
1006 | if (( *Current == '%' | |||
1007 | && Current + 2 < End | |||
1008 | && is_ns_hex_digit(*(Current + 1)) | |||
1009 | && is_ns_hex_digit(*(Current + 2))) | |||
1010 | || is_ns_word_char(*Current) | |||
1011 | || StringRef(Current, 1).find_first_of("#;/?:@&=+$,_.!~*'()[]") | |||
1012 | != StringRef::npos) { | |||
1013 | ++Current; | |||
1014 | ++Column; | |||
1015 | } else | |||
1016 | break; | |||
1017 | } | |||
1018 | } | |||
1019 | ||||
1020 | bool Scanner::consume(uint32_t Expected) { | |||
1021 | if (Expected >= 0x80) { | |||
1022 | setError("Cannot consume non-ascii characters", Current); | |||
1023 | return false; | |||
1024 | } | |||
1025 | if (Current == End) | |||
1026 | return false; | |||
1027 | if (uint8_t(*Current) >= 0x80) { | |||
1028 | setError("Cannot consume non-ascii characters", Current); | |||
1029 | return false; | |||
1030 | } | |||
1031 | if (uint8_t(*Current) == Expected) { | |||
1032 | ++Current; | |||
1033 | ++Column; | |||
1034 | return true; | |||
1035 | } | |||
1036 | return false; | |||
1037 | } | |||
1038 | ||||
1039 | void Scanner::skip(uint32_t Distance) { | |||
1040 | Current += Distance; | |||
1041 | Column += Distance; | |||
1042 | assert(Current <= End && "Skipped past the end")(static_cast <bool> (Current <= End && "Skipped past the end" ) ? void (0) : __assert_fail ("Current <= End && \"Skipped past the end\"" , "llvm/lib/Support/YAMLParser.cpp", 1042, __extension__ __PRETTY_FUNCTION__ )); | |||
1043 | } | |||
1044 | ||||
1045 | bool Scanner::isBlankOrBreak(StringRef::iterator Position) { | |||
1046 | if (Position == End) | |||
1047 | return false; | |||
1048 | return *Position == ' ' || *Position == '\t' || *Position == '\r' || | |||
1049 | *Position == '\n'; | |||
1050 | } | |||
1051 | ||||
1052 | bool Scanner::isLineEmpty(StringRef Line) { | |||
1053 | for (const auto *Position = Line.begin(); Position != Line.end(); ++Position) | |||
1054 | if (!isBlankOrBreak(Position)) | |||
1055 | return false; | |||
1056 | return true; | |||
1057 | } | |||
1058 | ||||
1059 | bool Scanner::consumeLineBreakIfPresent() { | |||
1060 | auto Next = skip_b_break(Current); | |||
1061 | if (Next == Current) | |||
1062 | return false; | |||
1063 | Column = 0; | |||
1064 | ++Line; | |||
1065 | Current = Next; | |||
1066 | return true; | |||
1067 | } | |||
1068 | ||||
1069 | void Scanner::saveSimpleKeyCandidate( TokenQueueT::iterator Tok | |||
1070 | , unsigned AtColumn | |||
1071 | , bool IsRequired) { | |||
1072 | if (IsSimpleKeyAllowed) { | |||
1073 | SimpleKey SK; | |||
1074 | SK.Tok = Tok; | |||
1075 | SK.Line = Line; | |||
1076 | SK.Column = AtColumn; | |||
1077 | SK.IsRequired = IsRequired; | |||
1078 | SK.FlowLevel = FlowLevel; | |||
1079 | SimpleKeys.push_back(SK); | |||
1080 | } | |||
1081 | } | |||
1082 | ||||
1083 | void Scanner::removeStaleSimpleKeyCandidates() { | |||
1084 | for (SmallVectorImpl<SimpleKey>::iterator i = SimpleKeys.begin(); | |||
1085 | i != SimpleKeys.end();) { | |||
1086 | if (i->Line != Line || i->Column + 1024 < Column) { | |||
1087 | if (i->IsRequired) | |||
1088 | setError( "Could not find expected : for simple key" | |||
1089 | , i->Tok->Range.begin()); | |||
1090 | i = SimpleKeys.erase(i); | |||
1091 | } else | |||
1092 | ++i; | |||
1093 | } | |||
1094 | } | |||
1095 | ||||
1096 | void Scanner::removeSimpleKeyCandidatesOnFlowLevel(unsigned Level) { | |||
1097 | if (!SimpleKeys.empty() && (SimpleKeys.end() - 1)->FlowLevel == Level) | |||
1098 | SimpleKeys.pop_back(); | |||
1099 | } | |||
1100 | ||||
1101 | bool Scanner::unrollIndent(int ToColumn) { | |||
1102 | Token T; | |||
1103 | // Indentation is ignored in flow. | |||
1104 | if (FlowLevel != 0) | |||
1105 | return true; | |||
1106 | ||||
1107 | while (Indent > ToColumn) { | |||
1108 | T.Kind = Token::TK_BlockEnd; | |||
1109 | T.Range = StringRef(Current, 1); | |||
1110 | TokenQueue.push_back(T); | |||
1111 | Indent = Indents.pop_back_val(); | |||
1112 | } | |||
1113 | ||||
1114 | return true; | |||
1115 | } | |||
1116 | ||||
1117 | bool Scanner::rollIndent( int ToColumn | |||
1118 | , Token::TokenKind Kind | |||
1119 | , TokenQueueT::iterator InsertPoint) { | |||
1120 | if (FlowLevel) | |||
1121 | return true; | |||
1122 | if (Indent < ToColumn) { | |||
1123 | Indents.push_back(Indent); | |||
1124 | Indent = ToColumn; | |||
1125 | ||||
1126 | Token T; | |||
1127 | T.Kind = Kind; | |||
1128 | T.Range = StringRef(Current, 0); | |||
1129 | TokenQueue.insert(InsertPoint, T); | |||
1130 | } | |||
1131 | return true; | |||
1132 | } | |||
1133 | ||||
1134 | void Scanner::skipComment() { | |||
1135 | if (Current == End || *Current != '#') | |||
1136 | return; | |||
1137 | while (true) { | |||
1138 | // This may skip more than one byte, thus Column is only incremented | |||
1139 | // for code points. | |||
1140 | StringRef::iterator I = skip_nb_char(Current); | |||
1141 | if (I == Current) | |||
1142 | break; | |||
1143 | Current = I; | |||
1144 | ++Column; | |||
1145 | } | |||
1146 | } | |||
1147 | ||||
1148 | void Scanner::scanToNextToken() { | |||
1149 | while (true) { | |||
1150 | while (Current != End && (*Current == ' ' || *Current == '\t')) { | |||
1151 | skip(1); | |||
1152 | } | |||
1153 | ||||
1154 | skipComment(); | |||
1155 | ||||
1156 | // Skip EOL. | |||
1157 | StringRef::iterator i = skip_b_break(Current); | |||
1158 | if (i == Current) | |||
1159 | break; | |||
1160 | Current = i; | |||
1161 | ++Line; | |||
1162 | Column = 0; | |||
1163 | // New lines may start a simple key. | |||
1164 | if (!FlowLevel) | |||
1165 | IsSimpleKeyAllowed = true; | |||
1166 | } | |||
1167 | } | |||
1168 | ||||
1169 | bool Scanner::scanStreamStart() { | |||
1170 | IsStartOfStream = false; | |||
1171 | ||||
1172 | EncodingInfo EI = getUnicodeEncoding(currentInput()); | |||
1173 | ||||
1174 | Token T; | |||
1175 | T.Kind = Token::TK_StreamStart; | |||
1176 | T.Range = StringRef(Current, EI.second); | |||
1177 | TokenQueue.push_back(T); | |||
1178 | Current += EI.second; | |||
1179 | return true; | |||
1180 | } | |||
1181 | ||||
1182 | bool Scanner::scanStreamEnd() { | |||
1183 | // Force an ending new line if one isn't present. | |||
1184 | if (Column != 0) { | |||
1185 | Column = 0; | |||
1186 | ++Line; | |||
1187 | } | |||
1188 | ||||
1189 | unrollIndent(-1); | |||
1190 | SimpleKeys.clear(); | |||
1191 | IsSimpleKeyAllowed = false; | |||
1192 | ||||
1193 | Token T; | |||
1194 | T.Kind = Token::TK_StreamEnd; | |||
1195 | T.Range = StringRef(Current, 0); | |||
1196 | TokenQueue.push_back(T); | |||
1197 | return true; | |||
1198 | } | |||
1199 | ||||
1200 | bool Scanner::scanDirective() { | |||
1201 | // Reset the indentation level. | |||
1202 | unrollIndent(-1); | |||
1203 | SimpleKeys.clear(); | |||
1204 | IsSimpleKeyAllowed = false; | |||
1205 | ||||
1206 | StringRef::iterator Start = Current; | |||
1207 | consume('%'); | |||
1208 | StringRef::iterator NameStart = Current; | |||
1209 | Current = skip_while(&Scanner::skip_ns_char, Current); | |||
1210 | StringRef Name(NameStart, Current - NameStart); | |||
1211 | Current = skip_while(&Scanner::skip_s_white, Current); | |||
1212 | ||||
1213 | Token T; | |||
1214 | if (Name == "YAML") { | |||
1215 | Current = skip_while(&Scanner::skip_ns_char, Current); | |||
1216 | T.Kind = Token::TK_VersionDirective; | |||
1217 | T.Range = StringRef(Start, Current - Start); | |||
1218 | TokenQueue.push_back(T); | |||
1219 | return true; | |||
1220 | } else if(Name == "TAG") { | |||
1221 | Current = skip_while(&Scanner::skip_ns_char, Current); | |||
1222 | Current = skip_while(&Scanner::skip_s_white, Current); | |||
1223 | Current = skip_while(&Scanner::skip_ns_char, Current); | |||
1224 | T.Kind = Token::TK_TagDirective; | |||
1225 | T.Range = StringRef(Start, Current - Start); | |||
1226 | TokenQueue.push_back(T); | |||
1227 | return true; | |||
1228 | } | |||
1229 | return false; | |||
1230 | } | |||
1231 | ||||
1232 | bool Scanner::scanDocumentIndicator(bool IsStart) { | |||
1233 | unrollIndent(-1); | |||
1234 | SimpleKeys.clear(); | |||
1235 | IsSimpleKeyAllowed = false; | |||
1236 | ||||
1237 | Token T; | |||
1238 | T.Kind = IsStart ? Token::TK_DocumentStart : Token::TK_DocumentEnd; | |||
1239 | T.Range = StringRef(Current, 3); | |||
1240 | skip(3); | |||
1241 | TokenQueue.push_back(T); | |||
1242 | return true; | |||
1243 | } | |||
1244 | ||||
1245 | bool Scanner::scanFlowCollectionStart(bool IsSequence) { | |||
1246 | Token T; | |||
1247 | T.Kind = IsSequence ? Token::TK_FlowSequenceStart | |||
1248 | : Token::TK_FlowMappingStart; | |||
1249 | T.Range = StringRef(Current, 1); | |||
1250 | skip(1); | |||
1251 | TokenQueue.push_back(T); | |||
1252 | ||||
1253 | // [ and { may begin a simple key. | |||
1254 | saveSimpleKeyCandidate(--TokenQueue.end(), Column - 1, false); | |||
1255 | ||||
1256 | // And may also be followed by a simple key. | |||
1257 | IsSimpleKeyAllowed = true; | |||
1258 | ++FlowLevel; | |||
1259 | return true; | |||
1260 | } | |||
1261 | ||||
1262 | bool Scanner::scanFlowCollectionEnd(bool IsSequence) { | |||
1263 | removeSimpleKeyCandidatesOnFlowLevel(FlowLevel); | |||
1264 | IsSimpleKeyAllowed = false; | |||
1265 | Token T; | |||
1266 | T.Kind = IsSequence ? Token::TK_FlowSequenceEnd | |||
1267 | : Token::TK_FlowMappingEnd; | |||
1268 | T.Range = StringRef(Current, 1); | |||
1269 | skip(1); | |||
1270 | TokenQueue.push_back(T); | |||
1271 | if (FlowLevel) | |||
1272 | --FlowLevel; | |||
1273 | return true; | |||
1274 | } | |||
1275 | ||||
1276 | bool Scanner::scanFlowEntry() { | |||
1277 | removeSimpleKeyCandidatesOnFlowLevel(FlowLevel); | |||
1278 | IsSimpleKeyAllowed = true; | |||
1279 | Token T; | |||
1280 | T.Kind = Token::TK_FlowEntry; | |||
1281 | T.Range = StringRef(Current, 1); | |||
1282 | skip(1); | |||
1283 | TokenQueue.push_back(T); | |||
1284 | return true; | |||
1285 | } | |||
1286 | ||||
1287 | bool Scanner::scanBlockEntry() { | |||
1288 | rollIndent(Column, Token::TK_BlockSequenceStart, TokenQueue.end()); | |||
1289 | removeSimpleKeyCandidatesOnFlowLevel(FlowLevel); | |||
1290 | IsSimpleKeyAllowed = true; | |||
1291 | Token T; | |||
1292 | T.Kind = Token::TK_BlockEntry; | |||
1293 | T.Range = StringRef(Current, 1); | |||
1294 | skip(1); | |||
1295 | TokenQueue.push_back(T); | |||
1296 | return true; | |||
1297 | } | |||
1298 | ||||
1299 | bool Scanner::scanKey() { | |||
1300 | if (!FlowLevel) | |||
1301 | rollIndent(Column, Token::TK_BlockMappingStart, TokenQueue.end()); | |||
1302 | ||||
1303 | removeSimpleKeyCandidatesOnFlowLevel(FlowLevel); | |||
1304 | IsSimpleKeyAllowed = !FlowLevel; | |||
1305 | ||||
1306 | Token T; | |||
1307 | T.Kind = Token::TK_Key; | |||
1308 | T.Range = StringRef(Current, 1); | |||
1309 | skip(1); | |||
1310 | TokenQueue.push_back(T); | |||
1311 | return true; | |||
1312 | } | |||
1313 | ||||
1314 | bool Scanner::scanValue() { | |||
1315 | // If the previous token could have been a simple key, insert the key token | |||
1316 | // into the token queue. | |||
1317 | if (!SimpleKeys.empty()) { | |||
1318 | SimpleKey SK = SimpleKeys.pop_back_val(); | |||
1319 | Token T; | |||
1320 | T.Kind = Token::TK_Key; | |||
1321 | T.Range = SK.Tok->Range; | |||
1322 | TokenQueueT::iterator i, e; | |||
1323 | for (i = TokenQueue.begin(), e = TokenQueue.end(); i != e; ++i) { | |||
1324 | if (i == SK.Tok) | |||
1325 | break; | |||
1326 | } | |||
1327 | if (i == e) { | |||
1328 | Failed = true; | |||
1329 | return false; | |||
1330 | } | |||
1331 | i = TokenQueue.insert(i, T); | |||
1332 | ||||
1333 | // We may also need to add a Block-Mapping-Start token. | |||
1334 | rollIndent(SK.Column, Token::TK_BlockMappingStart, i); | |||
1335 | ||||
1336 | IsSimpleKeyAllowed = false; | |||
1337 | } else { | |||
1338 | if (!FlowLevel) | |||
1339 | rollIndent(Column, Token::TK_BlockMappingStart, TokenQueue.end()); | |||
1340 | IsSimpleKeyAllowed = !FlowLevel; | |||
1341 | } | |||
1342 | ||||
1343 | Token T; | |||
1344 | T.Kind = Token::TK_Value; | |||
1345 | T.Range = StringRef(Current, 1); | |||
1346 | skip(1); | |||
1347 | TokenQueue.push_back(T); | |||
1348 | return true; | |||
1349 | } | |||
1350 | ||||
1351 | // Forbidding inlining improves performance by roughly 20%. | |||
1352 | // FIXME: Remove once llvm optimizes this to the faster version without hints. | |||
1353 | LLVM_ATTRIBUTE_NOINLINE__attribute__((noinline)) static bool | |||
1354 | wasEscaped(StringRef::iterator First, StringRef::iterator Position); | |||
1355 | ||||
1356 | // Returns whether a character at 'Position' was escaped with a leading '\'. | |||
1357 | // 'First' specifies the position of the first character in the string. | |||
1358 | static bool wasEscaped(StringRef::iterator First, | |||
1359 | StringRef::iterator Position) { | |||
1360 | assert(Position - 1 >= First)(static_cast <bool> (Position - 1 >= First) ? void ( 0) : __assert_fail ("Position - 1 >= First", "llvm/lib/Support/YAMLParser.cpp" , 1360, __extension__ __PRETTY_FUNCTION__)); | |||
1361 | StringRef::iterator I = Position - 1; | |||
1362 | // We calculate the number of consecutive '\'s before the current position | |||
1363 | // by iterating backwards through our string. | |||
1364 | while (I >= First && *I == '\\') --I; | |||
1365 | // (Position - 1 - I) now contains the number of '\'s before the current | |||
1366 | // position. If it is odd, the character at 'Position' was escaped. | |||
1367 | return (Position - 1 - I) % 2 == 1; | |||
1368 | } | |||
1369 | ||||
1370 | bool Scanner::scanFlowScalar(bool IsDoubleQuoted) { | |||
1371 | StringRef::iterator Start = Current; | |||
1372 | unsigned ColStart = Column; | |||
1373 | if (IsDoubleQuoted) { | |||
1374 | do { | |||
1375 | ++Current; | |||
1376 | while (Current != End && *Current != '"') | |||
1377 | ++Current; | |||
1378 | // Repeat until the previous character was not a '\' or was an escaped | |||
1379 | // backslash. | |||
1380 | } while ( Current != End | |||
1381 | && *(Current - 1) == '\\' | |||
1382 | && wasEscaped(Start + 1, Current)); | |||
1383 | } else { | |||
1384 | skip(1); | |||
1385 | while (Current != End) { | |||
1386 | // Skip a ' followed by another '. | |||
1387 | if (Current + 1 < End && *Current == '\'' && *(Current + 1) == '\'') { | |||
1388 | skip(2); | |||
1389 | continue; | |||
1390 | } else if (*Current == '\'') | |||
1391 | break; | |||
1392 | StringRef::iterator i = skip_nb_char(Current); | |||
1393 | if (i == Current) { | |||
1394 | i = skip_b_break(Current); | |||
1395 | if (i == Current) | |||
1396 | break; | |||
1397 | Current = i; | |||
1398 | Column = 0; | |||
1399 | ++Line; | |||
1400 | } else { | |||
1401 | if (i == End) | |||
1402 | break; | |||
1403 | Current = i; | |||
1404 | ++Column; | |||
1405 | } | |||
1406 | } | |||
1407 | } | |||
1408 | ||||
1409 | if (Current == End) { | |||
1410 | setError("Expected quote at end of scalar", Current); | |||
1411 | return false; | |||
1412 | } | |||
1413 | ||||
1414 | skip(1); // Skip ending quote. | |||
1415 | Token T; | |||
1416 | T.Kind = Token::TK_Scalar; | |||
1417 | T.Range = StringRef(Start, Current - Start); | |||
1418 | TokenQueue.push_back(T); | |||
1419 | ||||
1420 | saveSimpleKeyCandidate(--TokenQueue.end(), ColStart, false); | |||
1421 | ||||
1422 | IsSimpleKeyAllowed = false; | |||
1423 | ||||
1424 | return true; | |||
1425 | } | |||
1426 | ||||
1427 | bool Scanner::scanPlainScalar() { | |||
1428 | StringRef::iterator Start = Current; | |||
1429 | unsigned ColStart = Column; | |||
1430 | unsigned LeadingBlanks = 0; | |||
1431 | assert(Indent >= -1 && "Indent must be >= -1 !")(static_cast <bool> (Indent >= -1 && "Indent must be >= -1 !" ) ? void (0) : __assert_fail ("Indent >= -1 && \"Indent must be >= -1 !\"" , "llvm/lib/Support/YAMLParser.cpp", 1431, __extension__ __PRETTY_FUNCTION__ )); | |||
1432 | unsigned indent = static_cast<unsigned>(Indent + 1); | |||
1433 | while (Current != End) { | |||
1434 | if (*Current == '#') | |||
1435 | break; | |||
1436 | ||||
1437 | while (Current != End && !isBlankOrBreak(Current)) { | |||
1438 | if (FlowLevel && *Current == ':' && | |||
1439 | (Current + 1 == End || | |||
1440 | !(isBlankOrBreak(Current + 1) || *(Current + 1) == ','))) { | |||
1441 | setError("Found unexpected ':' while scanning a plain scalar", Current); | |||
1442 | return false; | |||
1443 | } | |||
1444 | ||||
1445 | // Check for the end of the plain scalar. | |||
1446 | if ( (*Current == ':' && isBlankOrBreak(Current + 1)) | |||
1447 | || ( FlowLevel | |||
1448 | && (StringRef(Current, 1).find_first_of(",:?[]{}") | |||
1449 | != StringRef::npos))) | |||
1450 | break; | |||
1451 | ||||
1452 | StringRef::iterator i = skip_nb_char(Current); | |||
1453 | if (i == Current) | |||
1454 | break; | |||
1455 | Current = i; | |||
1456 | ++Column; | |||
1457 | } | |||
1458 | ||||
1459 | // Are we at the end? | |||
1460 | if (!isBlankOrBreak(Current)) | |||
1461 | break; | |||
1462 | ||||
1463 | // Eat blanks. | |||
1464 | StringRef::iterator Tmp = Current; | |||
1465 | while (isBlankOrBreak(Tmp)) { | |||
1466 | StringRef::iterator i = skip_s_white(Tmp); | |||
1467 | if (i != Tmp) { | |||
1468 | if (LeadingBlanks && (Column < indent) && *Tmp == '\t') { | |||
1469 | setError("Found invalid tab character in indentation", Tmp); | |||
1470 | return false; | |||
1471 | } | |||
1472 | Tmp = i; | |||
1473 | ++Column; | |||
1474 | } else { | |||
1475 | i = skip_b_break(Tmp); | |||
1476 | if (!LeadingBlanks) | |||
1477 | LeadingBlanks = 1; | |||
1478 | Tmp = i; | |||
1479 | Column = 0; | |||
1480 | ++Line; | |||
1481 | } | |||
1482 | } | |||
1483 | ||||
1484 | if (!FlowLevel && Column < indent) | |||
1485 | break; | |||
1486 | ||||
1487 | Current = Tmp; | |||
1488 | } | |||
1489 | if (Start == Current) { | |||
1490 | setError("Got empty plain scalar", Start); | |||
1491 | return false; | |||
1492 | } | |||
1493 | Token T; | |||
1494 | T.Kind = Token::TK_Scalar; | |||
1495 | T.Range = StringRef(Start, Current - Start); | |||
1496 | TokenQueue.push_back(T); | |||
1497 | ||||
1498 | // Plain scalars can be simple keys. | |||
1499 | saveSimpleKeyCandidate(--TokenQueue.end(), ColStart, false); | |||
1500 | ||||
1501 | IsSimpleKeyAllowed = false; | |||
1502 | ||||
1503 | return true; | |||
1504 | } | |||
1505 | ||||
1506 | bool Scanner::scanAliasOrAnchor(bool IsAlias) { | |||
1507 | StringRef::iterator Start = Current; | |||
1508 | unsigned ColStart = Column; | |||
1509 | skip(1); | |||
1510 | while (Current != End) { | |||
1511 | if ( *Current == '[' || *Current == ']' | |||
1512 | || *Current == '{' || *Current == '}' | |||
1513 | || *Current == ',' | |||
1514 | || *Current == ':') | |||
1515 | break; | |||
1516 | StringRef::iterator i = skip_ns_char(Current); | |||
1517 | if (i == Current) | |||
1518 | break; | |||
1519 | Current = i; | |||
1520 | ++Column; | |||
1521 | } | |||
1522 | ||||
1523 | if (Start + 1 == Current) { | |||
1524 | setError("Got empty alias or anchor", Start); | |||
1525 | return false; | |||
1526 | } | |||
1527 | ||||
1528 | Token T; | |||
1529 | T.Kind = IsAlias ? Token::TK_Alias : Token::TK_Anchor; | |||
1530 | T.Range = StringRef(Start, Current - Start); | |||
1531 | TokenQueue.push_back(T); | |||
1532 | ||||
1533 | // Alias and anchors can be simple keys. | |||
1534 | saveSimpleKeyCandidate(--TokenQueue.end(), ColStart, false); | |||
1535 | ||||
1536 | IsSimpleKeyAllowed = false; | |||
1537 | ||||
1538 | return true; | |||
1539 | } | |||
1540 | ||||
1541 | bool Scanner::scanBlockScalarIndicators(char &StyleIndicator, | |||
1542 | char &ChompingIndicator, | |||
1543 | unsigned &IndentIndicator, | |||
1544 | bool &IsDone) { | |||
1545 | StyleIndicator = scanBlockStyleIndicator(); | |||
1546 | if (!scanBlockScalarHeader(ChompingIndicator, IndentIndicator, IsDone)) | |||
1547 | return false; | |||
1548 | return true; | |||
1549 | } | |||
1550 | ||||
1551 | char Scanner::scanBlockStyleIndicator() { | |||
1552 | char Indicator = ' '; | |||
1553 | if (Current != End && (*Current == '>' || *Current == '|')) { | |||
1554 | Indicator = *Current; | |||
1555 | skip(1); | |||
1556 | } | |||
1557 | return Indicator; | |||
1558 | } | |||
1559 | ||||
1560 | char Scanner::scanBlockChompingIndicator() { | |||
1561 | char Indicator = ' '; | |||
1562 | if (Current != End && (*Current == '+' || *Current == '-')) { | |||
1563 | Indicator = *Current; | |||
1564 | skip(1); | |||
1565 | } | |||
1566 | return Indicator; | |||
1567 | } | |||
1568 | ||||
1569 | /// Get the number of line breaks after chomping. | |||
1570 | /// | |||
1571 | /// Return the number of trailing line breaks to emit, depending on | |||
1572 | /// \p ChompingIndicator. | |||
1573 | static unsigned getChompedLineBreaks(char ChompingIndicator, | |||
1574 | unsigned LineBreaks, StringRef Str) { | |||
1575 | if (ChompingIndicator == '-') // Strip all line breaks. | |||
1576 | return 0; | |||
1577 | if (ChompingIndicator == '+') // Keep all line breaks. | |||
1578 | return LineBreaks; | |||
1579 | // Clip trailing lines. | |||
1580 | return Str.empty() ? 0 : 1; | |||
1581 | } | |||
1582 | ||||
1583 | unsigned Scanner::scanBlockIndentationIndicator() { | |||
1584 | unsigned Indent = 0; | |||
1585 | if (Current != End && (*Current >= '1' && *Current <= '9')) { | |||
1586 | Indent = unsigned(*Current - '0'); | |||
1587 | skip(1); | |||
1588 | } | |||
1589 | return Indent; | |||
1590 | } | |||
1591 | ||||
1592 | bool Scanner::scanBlockScalarHeader(char &ChompingIndicator, | |||
1593 | unsigned &IndentIndicator, bool &IsDone) { | |||
1594 | auto Start = Current; | |||
1595 | ||||
1596 | ChompingIndicator = scanBlockChompingIndicator(); | |||
1597 | IndentIndicator = scanBlockIndentationIndicator(); | |||
1598 | // Check for the chomping indicator once again. | |||
1599 | if (ChompingIndicator == ' ') | |||
1600 | ChompingIndicator = scanBlockChompingIndicator(); | |||
1601 | Current = skip_while(&Scanner::skip_s_white, Current); | |||
1602 | skipComment(); | |||
1603 | ||||
1604 | if (Current == End) { // EOF, we have an empty scalar. | |||
1605 | Token T; | |||
1606 | T.Kind = Token::TK_BlockScalar; | |||
1607 | T.Range = StringRef(Start, Current - Start); | |||
1608 | TokenQueue.push_back(T); | |||
1609 | IsDone = true; | |||
1610 | return true; | |||
1611 | } | |||
1612 | ||||
1613 | if (!consumeLineBreakIfPresent()) { | |||
1614 | setError("Expected a line break after block scalar header", Current); | |||
1615 | return false; | |||
1616 | } | |||
1617 | return true; | |||
1618 | } | |||
1619 | ||||
1620 | bool Scanner::findBlockScalarIndent(unsigned &BlockIndent, | |||
1621 | unsigned BlockExitIndent, | |||
1622 | unsigned &LineBreaks, bool &IsDone) { | |||
1623 | unsigned MaxAllSpaceLineCharacters = 0; | |||
1624 | StringRef::iterator LongestAllSpaceLine; | |||
| ||||
1625 | ||||
1626 | while (true) { | |||
1627 | advanceWhile(&Scanner::skip_s_space); | |||
1628 | if (skip_nb_char(Current) != Current) { | |||
1629 | // This line isn't empty, so try and find the indentation. | |||
1630 | if (Column <= BlockExitIndent) { // End of the block literal. | |||
1631 | IsDone = true; | |||
1632 | return true; | |||
1633 | } | |||
1634 | // We found the block's indentation. | |||
1635 | BlockIndent = Column; | |||
1636 | if (MaxAllSpaceLineCharacters > BlockIndent) { | |||
1637 | setError( | |||
| ||||
1638 | "Leading all-spaces line must be smaller than the block indent", | |||
1639 | LongestAllSpaceLine); | |||
1640 | return false; | |||
1641 | } | |||
1642 | return true; | |||
1643 | } | |||
1644 | if (skip_b_break(Current) != Current && | |||
1645 | Column > MaxAllSpaceLineCharacters) { | |||
1646 | // Record the longest all-space line in case it's longer than the | |||
1647 | // discovered block indent. | |||
1648 | MaxAllSpaceLineCharacters = Column; | |||
1649 | LongestAllSpaceLine = Current; | |||
1650 | } | |||
1651 | ||||
1652 | // Check for EOF. | |||
1653 | if (Current == End) { | |||
1654 | IsDone = true; | |||
1655 | return true; | |||
1656 | } | |||
1657 | ||||
1658 | if (!consumeLineBreakIfPresent()) { | |||
1659 | IsDone = true; | |||
1660 | return true; | |||
1661 | } | |||
1662 | ++LineBreaks; | |||
1663 | } | |||
1664 | return true; | |||
1665 | } | |||
1666 | ||||
1667 | bool Scanner::scanBlockScalarIndent(unsigned BlockIndent, | |||
1668 | unsigned BlockExitIndent, bool &IsDone) { | |||
1669 | // Skip the indentation. | |||
1670 | while (Column < BlockIndent) { | |||
1671 | auto I = skip_s_space(Current); | |||
1672 | if (I == Current) | |||
1673 | break; | |||
1674 | Current = I; | |||
1675 | ++Column; | |||
1676 | } | |||
1677 | ||||
1678 | if (skip_nb_char(Current) == Current) | |||
1679 | return true; | |||
1680 | ||||
1681 | if (Column <= BlockExitIndent) { // End of the block literal. | |||
1682 | IsDone = true; | |||
1683 | return true; | |||
1684 | } | |||
1685 | ||||
1686 | if (Column < BlockIndent) { | |||
1687 | if (Current != End && *Current == '#') { // Trailing comment. | |||
1688 | IsDone = true; | |||
1689 | return true; | |||
1690 | } | |||
1691 | setError("A text line is less indented than the block scalar", Current); | |||
1692 | return false; | |||
1693 | } | |||
1694 | return true; // A normal text line. | |||
1695 | } | |||
1696 | ||||
1697 | bool Scanner::scanBlockScalar(bool IsLiteral) { | |||
1698 | assert(*Current == '|' || *Current == '>')(static_cast <bool> (*Current == '|' || *Current == '>' ) ? void (0) : __assert_fail ("*Current == '|' || *Current == '>'" , "llvm/lib/Support/YAMLParser.cpp", 1698, __extension__ __PRETTY_FUNCTION__ )); | |||
1699 | char StyleIndicator; | |||
1700 | char ChompingIndicator; | |||
1701 | unsigned BlockIndent; | |||
1702 | bool IsDone = false; | |||
1703 | if (!scanBlockScalarIndicators(StyleIndicator, ChompingIndicator, BlockIndent, | |||
1704 | IsDone)) | |||
1705 | return false; | |||
1706 | if (IsDone) | |||
1707 | return true; | |||
1708 | bool IsFolded = StyleIndicator == '>'; | |||
1709 | ||||
1710 | const auto *Start = Current; | |||
1711 | unsigned BlockExitIndent = Indent < 0 ? 0 : (unsigned)Indent; | |||
1712 | unsigned LineBreaks = 0; | |||
1713 | if (BlockIndent == 0) { | |||
1714 | if (!findBlockScalarIndent(BlockIndent, BlockExitIndent, LineBreaks, | |||
1715 | IsDone)) | |||
1716 | return false; | |||
1717 | } | |||
1718 | ||||
1719 | // Scan the block's scalars body. | |||
1720 | SmallString<256> Str; | |||
1721 | while (!IsDone) { | |||
1722 | if (!scanBlockScalarIndent(BlockIndent, BlockExitIndent, IsDone)) | |||
1723 | return false; | |||
1724 | if (IsDone) | |||
1725 | break; | |||
1726 | ||||
1727 | // Parse the current line. | |||
1728 | auto LineStart = Current; | |||
1729 | advanceWhile(&Scanner::skip_nb_char); | |||
1730 | if (LineStart != Current) { | |||
1731 | if (LineBreaks && IsFolded && !Scanner::isLineEmpty(Str)) { | |||
1732 | // The folded style "folds" any single line break between content into a | |||
1733 | // single space, except when that content is "empty" (only contains | |||
1734 | // whitespace) in which case the line break is left as-is. | |||
1735 | if (LineBreaks == 1) { | |||
1736 | Str.append(LineBreaks, | |||
1737 | isLineEmpty(StringRef(LineStart, Current - LineStart)) | |||
1738 | ? '\n' | |||
1739 | : ' '); | |||
1740 | } | |||
1741 | // If we saw a single line break, we are completely replacing it and so | |||
1742 | // want `LineBreaks == 0`. Otherwise this decrement accounts for the | |||
1743 | // fact that the first line break is "trimmed", only being used to | |||
1744 | // signal a sequence of line breaks which should not be folded. | |||
1745 | LineBreaks--; | |||
1746 | } | |||
1747 | Str.append(LineBreaks, '\n'); | |||
1748 | Str.append(StringRef(LineStart, Current - LineStart)); | |||
1749 | LineBreaks = 0; | |||
1750 | } | |||
1751 | ||||
1752 | // Check for EOF. | |||
1753 | if (Current == End) | |||
1754 | break; | |||
1755 | ||||
1756 | if (!consumeLineBreakIfPresent()) | |||
1757 | break; | |||
1758 | ++LineBreaks; | |||
1759 | } | |||
1760 | ||||
1761 | if (Current == End && !LineBreaks) | |||
1762 | // Ensure that there is at least one line break before the end of file. | |||
1763 | LineBreaks = 1; | |||
1764 | Str.append(getChompedLineBreaks(ChompingIndicator, LineBreaks, Str), '\n'); | |||
1765 | ||||
1766 | // New lines may start a simple key. | |||
1767 | if (!FlowLevel) | |||
1768 | IsSimpleKeyAllowed = true; | |||
1769 | ||||
1770 | Token T; | |||
1771 | T.Kind = Token::TK_BlockScalar; | |||
1772 | T.Range = StringRef(Start, Current - Start); | |||
1773 | T.Value = std::string(Str); | |||
1774 | TokenQueue.push_back(T); | |||
1775 | return true; | |||
1776 | } | |||
1777 | ||||
1778 | bool Scanner::scanTag() { | |||
1779 | StringRef::iterator Start = Current; | |||
1780 | unsigned ColStart = Column; | |||
1781 | skip(1); // Eat !. | |||
1782 | if (Current == End || isBlankOrBreak(Current)); // An empty tag. | |||
1783 | else if (*Current == '<') { | |||
1784 | skip(1); | |||
1785 | scan_ns_uri_char(); | |||
1786 | if (!consume('>')) | |||
1787 | return false; | |||
1788 | } else { | |||
1789 | // FIXME: Actually parse the c-ns-shorthand-tag rule. | |||
1790 | Current = skip_while(&Scanner::skip_ns_char, Current); | |||
1791 | } | |||
1792 | ||||
1793 | Token T; | |||
1794 | T.Kind = Token::TK_Tag; | |||
1795 | T.Range = StringRef(Start, Current - Start); | |||
1796 | TokenQueue.push_back(T); | |||
1797 | ||||
1798 | // Tags can be simple keys. | |||
1799 | saveSimpleKeyCandidate(--TokenQueue.end(), ColStart, false); | |||
1800 | ||||
1801 | IsSimpleKeyAllowed = false; | |||
1802 | ||||
1803 | return true; | |||
1804 | } | |||
1805 | ||||
1806 | bool Scanner::fetchMoreTokens() { | |||
1807 | if (IsStartOfStream) | |||
1808 | return scanStreamStart(); | |||
1809 | ||||
1810 | scanToNextToken(); | |||
1811 | ||||
1812 | if (Current == End) | |||
1813 | return scanStreamEnd(); | |||
1814 | ||||
1815 | removeStaleSimpleKeyCandidates(); | |||
1816 | ||||
1817 | unrollIndent(Column); | |||
1818 | ||||
1819 | if (Column == 0 && *Current == '%') | |||
1820 | return scanDirective(); | |||
1821 | ||||
1822 | if (Column == 0 && Current + 4 <= End | |||
1823 | && *Current == '-' | |||
1824 | && *(Current + 1) == '-' | |||
1825 | && *(Current + 2) == '-' | |||
1826 | && (Current + 3 == End || isBlankOrBreak(Current + 3))) | |||
1827 | return scanDocumentIndicator(true); | |||
1828 | ||||
1829 | if (Column == 0 && Current + 4 <= End | |||
1830 | && *Current == '.' | |||
1831 | && *(Current + 1) == '.' | |||
1832 | && *(Current + 2) == '.' | |||
1833 | && (Current + 3 == End || isBlankOrBreak(Current + 3))) | |||
1834 | return scanDocumentIndicator(false); | |||
1835 | ||||
1836 | if (*Current == '[') | |||
1837 | return scanFlowCollectionStart(true); | |||
1838 | ||||
1839 | if (*Current == '{') | |||
1840 | return scanFlowCollectionStart(false); | |||
1841 | ||||
1842 | if (*Current == ']') | |||
1843 | return scanFlowCollectionEnd(true); | |||
1844 | ||||
1845 | if (*Current == '}') | |||
1846 | return scanFlowCollectionEnd(false); | |||
1847 | ||||
1848 | if (*Current == ',') | |||
1849 | return scanFlowEntry(); | |||
1850 | ||||
1851 | if (*Current == '-' && isBlankOrBreak(Current + 1)) | |||
1852 | return scanBlockEntry(); | |||
1853 | ||||
1854 | if (*Current == '?' && (FlowLevel || isBlankOrBreak(Current + 1))) | |||
1855 | return scanKey(); | |||
1856 | ||||
1857 | if (*Current == ':' && (FlowLevel || isBlankOrBreak(Current + 1))) | |||
1858 | return scanValue(); | |||
1859 | ||||
1860 | if (*Current == '*') | |||
1861 | return scanAliasOrAnchor(true); | |||
1862 | ||||
1863 | if (*Current == '&') | |||
1864 | return scanAliasOrAnchor(false); | |||
1865 | ||||
1866 | if (*Current == '!') | |||
1867 | return scanTag(); | |||
1868 | ||||
1869 | if (*Current == '|' && !FlowLevel) | |||
1870 | return scanBlockScalar(true); | |||
1871 | ||||
1872 | if (*Current == '>' && !FlowLevel) | |||
1873 | return scanBlockScalar(false); | |||
1874 | ||||
1875 | if (*Current == '\'') | |||
1876 | return scanFlowScalar(false); | |||
1877 | ||||
1878 | if (*Current == '"') | |||
1879 | return scanFlowScalar(true); | |||
1880 | ||||
1881 | // Get a plain scalar. | |||
1882 | StringRef FirstChar(Current, 1); | |||
1883 | if (!(isBlankOrBreak(Current) | |||
1884 | || FirstChar.find_first_of("-?:,[]{}#&*!|>'\"%@`") != StringRef::npos) | |||
1885 | || (*Current == '-' && !isBlankOrBreak(Current + 1)) | |||
1886 | || (!FlowLevel && (*Current == '?' || *Current == ':') | |||
1887 | && isBlankOrBreak(Current + 1)) | |||
1888 | || (!FlowLevel && *Current == ':' | |||
1889 | && Current + 2 < End | |||
1890 | && *(Current + 1) == ':' | |||
1891 | && !isBlankOrBreak(Current + 2))) | |||
1892 | return scanPlainScalar(); | |||
1893 | ||||
1894 | setError("Unrecognized character while tokenizing.", Current); | |||
1895 | return false; | |||
1896 | } | |||
1897 | ||||
1898 | Stream::Stream(StringRef Input, SourceMgr &SM, bool ShowColors, | |||
1899 | std::error_code *EC) | |||
1900 | : scanner(new Scanner(Input, SM, ShowColors, EC)) {} | |||
1901 | ||||
1902 | Stream::Stream(MemoryBufferRef InputBuffer, SourceMgr &SM, bool ShowColors, | |||
1903 | std::error_code *EC) | |||
1904 | : scanner(new Scanner(InputBuffer, SM, ShowColors, EC)) {} | |||
1905 | ||||
1906 | Stream::~Stream() = default; | |||
1907 | ||||
1908 | bool Stream::failed() { return scanner->failed(); } | |||
1909 | ||||
1910 | void Stream::printError(Node *N, const Twine &Msg, SourceMgr::DiagKind Kind) { | |||
1911 | printError(N ? N->getSourceRange() : SMRange(), Msg, Kind); | |||
1912 | } | |||
1913 | ||||
1914 | void Stream::printError(const SMRange &Range, const Twine &Msg, | |||
1915 | SourceMgr::DiagKind Kind) { | |||
1916 | scanner->printError(Range.Start, Kind, Msg, Range); | |||
1917 | } | |||
1918 | ||||
1919 | document_iterator Stream::begin() { | |||
1920 | if (CurrentDoc) | |||
1921 | report_fatal_error("Can only iterate over the stream once"); | |||
1922 | ||||
1923 | // Skip Stream-Start. | |||
1924 | scanner->getNext(); | |||
1925 | ||||
1926 | CurrentDoc.reset(new Document(*this)); | |||
1927 | return document_iterator(CurrentDoc); | |||
1928 | } | |||
1929 | ||||
1930 | document_iterator Stream::end() { | |||
1931 | return document_iterator(); | |||
1932 | } | |||
1933 | ||||
1934 | void Stream::skip() { | |||
1935 | for (Document &Doc : *this) | |||
1936 | Doc.skip(); | |||
1937 | } | |||
1938 | ||||
1939 | Node::Node(unsigned int Type, std::unique_ptr<Document> &D, StringRef A, | |||
1940 | StringRef T) | |||
1941 | : Doc(D), TypeID(Type), Anchor(A), Tag(T) { | |||
1942 | SMLoc Start = SMLoc::getFromPointer(peekNext().Range.begin()); | |||
1943 | SourceRange = SMRange(Start, Start); | |||
1944 | } | |||
1945 | ||||
1946 | std::string Node::getVerbatimTag() const { | |||
1947 | StringRef Raw = getRawTag(); | |||
1948 | if (!Raw.empty() && Raw != "!") { | |||
1949 | std::string Ret; | |||
1950 | if (Raw.find_last_of('!') == 0) { | |||
1951 | Ret = std::string(Doc->getTagMap().find("!")->second); | |||
1952 | Ret += Raw.substr(1); | |||
1953 | return Ret; | |||
1954 | } else if (Raw.startswith("!!")) { | |||
1955 | Ret = std::string(Doc->getTagMap().find("!!")->second); | |||
1956 | Ret += Raw.substr(2); | |||
1957 | return Ret; | |||
1958 | } else { | |||
1959 | StringRef TagHandle = Raw.substr(0, Raw.find_last_of('!') + 1); | |||
1960 | std::map<StringRef, StringRef>::const_iterator It = | |||
1961 | Doc->getTagMap().find(TagHandle); | |||
1962 | if (It != Doc->getTagMap().end()) | |||
1963 | Ret = std::string(It->second); | |||
1964 | else { | |||
1965 | Token T; | |||
1966 | T.Kind = Token::TK_Tag; | |||
1967 | T.Range = TagHandle; | |||
1968 | setError(Twine("Unknown tag handle ") + TagHandle, T); | |||
1969 | } | |||
1970 | Ret += Raw.substr(Raw.find_last_of('!') + 1); | |||
1971 | return Ret; | |||
1972 | } | |||
1973 | } | |||
1974 | ||||
1975 | switch (getType()) { | |||
1976 | case NK_Null: | |||
1977 | return "tag:yaml.org,2002:null"; | |||
1978 | case NK_Scalar: | |||
1979 | case NK_BlockScalar: | |||
1980 | // TODO: Tag resolution. | |||
1981 | return "tag:yaml.org,2002:str"; | |||
1982 | case NK_Mapping: | |||
1983 | return "tag:yaml.org,2002:map"; | |||
1984 | case NK_Sequence: | |||
1985 | return "tag:yaml.org,2002:seq"; | |||
1986 | } | |||
1987 | ||||
1988 | return ""; | |||
1989 | } | |||
1990 | ||||
1991 | Token &Node::peekNext() { | |||
1992 | return Doc->peekNext(); | |||
1993 | } | |||
1994 | ||||
1995 | Token Node::getNext() { | |||
1996 | return Doc->getNext(); | |||
1997 | } | |||
1998 | ||||
1999 | Node *Node::parseBlockNode() { | |||
2000 | return Doc->parseBlockNode(); | |||
2001 | } | |||
2002 | ||||
2003 | BumpPtrAllocator &Node::getAllocator() { | |||
2004 | return Doc->NodeAllocator; | |||
2005 | } | |||
2006 | ||||
2007 | void Node::setError(const Twine &Msg, Token &Tok) const { | |||
2008 | Doc->setError(Msg, Tok); | |||
2009 | } | |||
2010 | ||||
2011 | bool Node::failed() const { | |||
2012 | return Doc->failed(); | |||
2013 | } | |||
2014 | ||||
2015 | StringRef ScalarNode::getValue(SmallVectorImpl<char> &Storage) const { | |||
2016 | // TODO: Handle newlines properly. We need to remove leading whitespace. | |||
2017 | if (Value[0] == '"') { // Double quoted. | |||
2018 | // Pull off the leading and trailing "s. | |||
2019 | StringRef UnquotedValue = Value.substr(1, Value.size() - 2); | |||
2020 | // Search for characters that would require unescaping the value. | |||
2021 | StringRef::size_type i = UnquotedValue.find_first_of("\\\r\n"); | |||
2022 | if (i != StringRef::npos) | |||
2023 | return unescapeDoubleQuoted(UnquotedValue, i, Storage); | |||
2024 | return UnquotedValue; | |||
2025 | } else if (Value[0] == '\'') { // Single quoted. | |||
2026 | // Pull off the leading and trailing 's. | |||
2027 | StringRef UnquotedValue = Value.substr(1, Value.size() - 2); | |||
2028 | StringRef::size_type i = UnquotedValue.find('\''); | |||
2029 | if (i != StringRef::npos) { | |||
2030 | // We're going to need Storage. | |||
2031 | Storage.clear(); | |||
2032 | Storage.reserve(UnquotedValue.size()); | |||
2033 | for (; i != StringRef::npos; i = UnquotedValue.find('\'')) { | |||
2034 | StringRef Valid(UnquotedValue.begin(), i); | |||
2035 | llvm::append_range(Storage, Valid); | |||
2036 | Storage.push_back('\''); | |||
2037 | UnquotedValue = UnquotedValue.substr(i + 2); | |||
2038 | } | |||
2039 | llvm::append_range(Storage, UnquotedValue); | |||
2040 | return StringRef(Storage.begin(), Storage.size()); | |||
2041 | } | |||
2042 | return UnquotedValue; | |||
2043 | } | |||
2044 | // Plain. | |||
2045 | // Trim whitespace ('b-char' and 's-white'). | |||
2046 | // NOTE: Alternatively we could change the scanner to not include whitespace | |||
2047 | // here in the first place. | |||
2048 | return Value.rtrim("\x0A\x0D\x20\x09"); | |||
2049 | } | |||
2050 | ||||
2051 | StringRef ScalarNode::unescapeDoubleQuoted( StringRef UnquotedValue | |||
2052 | , StringRef::size_type i | |||
2053 | , SmallVectorImpl<char> &Storage) | |||
2054 | const { | |||
2055 | // Use Storage to build proper value. | |||
2056 | Storage.clear(); | |||
2057 | Storage.reserve(UnquotedValue.size()); | |||
2058 | for (; i != StringRef::npos; i = UnquotedValue.find_first_of("\\\r\n")) { | |||
2059 | // Insert all previous chars into Storage. | |||
2060 | StringRef Valid(UnquotedValue.begin(), i); | |||
2061 | llvm::append_range(Storage, Valid); | |||
2062 | // Chop off inserted chars. | |||
2063 | UnquotedValue = UnquotedValue.substr(i); | |||
2064 | ||||
2065 | assert(!UnquotedValue.empty() && "Can't be empty!")(static_cast <bool> (!UnquotedValue.empty() && "Can't be empty!" ) ? void (0) : __assert_fail ("!UnquotedValue.empty() && \"Can't be empty!\"" , "llvm/lib/Support/YAMLParser.cpp", 2065, __extension__ __PRETTY_FUNCTION__ )); | |||
2066 | ||||
2067 | // Parse escape or line break. | |||
2068 | switch (UnquotedValue[0]) { | |||
2069 | case '\r': | |||
2070 | case '\n': | |||
2071 | Storage.push_back('\n'); | |||
2072 | if ( UnquotedValue.size() > 1 | |||
2073 | && (UnquotedValue[1] == '\r' || UnquotedValue[1] == '\n')) | |||
2074 | UnquotedValue = UnquotedValue.substr(1); | |||
2075 | UnquotedValue = UnquotedValue.substr(1); | |||
2076 | break; | |||
2077 | default: | |||
2078 | if (UnquotedValue.size() == 1) { | |||
2079 | Token T; | |||
2080 | T.Range = StringRef(UnquotedValue.begin(), 1); | |||
2081 | setError("Unrecognized escape code", T); | |||
2082 | return ""; | |||
2083 | } | |||
2084 | UnquotedValue = UnquotedValue.substr(1); | |||
2085 | switch (UnquotedValue[0]) { | |||
2086 | default: { | |||
2087 | Token T; | |||
2088 | T.Range = StringRef(UnquotedValue.begin(), 1); | |||
2089 | setError("Unrecognized escape code", T); | |||
2090 | return ""; | |||
2091 | } | |||
2092 | case '\r': | |||
2093 | case '\n': | |||
2094 | // Remove the new line. | |||
2095 | if ( UnquotedValue.size() > 1 | |||
2096 | && (UnquotedValue[1] == '\r' || UnquotedValue[1] == '\n')) | |||
2097 | UnquotedValue = UnquotedValue.substr(1); | |||
2098 | // If this was just a single byte newline, it will get skipped | |||
2099 | // below. | |||
2100 | break; | |||
2101 | case '0': | |||
2102 | Storage.push_back(0x00); | |||
2103 | break; | |||
2104 | case 'a': | |||
2105 | Storage.push_back(0x07); | |||
2106 | break; | |||
2107 | case 'b': | |||
2108 | Storage.push_back(0x08); | |||
2109 | break; | |||
2110 | case 't': | |||
2111 | case 0x09: | |||
2112 | Storage.push_back(0x09); | |||
2113 | break; | |||
2114 | case 'n': | |||
2115 | Storage.push_back(0x0A); | |||
2116 | break; | |||
2117 | case 'v': | |||
2118 | Storage.push_back(0x0B); | |||
2119 | break; | |||
2120 | case 'f': | |||
2121 | Storage.push_back(0x0C); | |||
2122 | break; | |||
2123 | case 'r': | |||
2124 | Storage.push_back(0x0D); | |||
2125 | break; | |||
2126 | case 'e': | |||
2127 | Storage.push_back(0x1B); | |||
2128 | break; | |||
2129 | case ' ': | |||
2130 | Storage.push_back(0x20); | |||
2131 | break; | |||
2132 | case '"': | |||
2133 | Storage.push_back(0x22); | |||
2134 | break; | |||
2135 | case '/': | |||
2136 | Storage.push_back(0x2F); | |||
2137 | break; | |||
2138 | case '\\': | |||
2139 | Storage.push_back(0x5C); | |||
2140 | break; | |||
2141 | case 'N': | |||
2142 | encodeUTF8(0x85, Storage); | |||
2143 | break; | |||
2144 | case '_': | |||
2145 | encodeUTF8(0xA0, Storage); | |||
2146 | break; | |||
2147 | case 'L': | |||
2148 | encodeUTF8(0x2028, Storage); | |||
2149 | break; | |||
2150 | case 'P': | |||
2151 | encodeUTF8(0x2029, Storage); | |||
2152 | break; | |||
2153 | case 'x': { | |||
2154 | if (UnquotedValue.size() < 3) | |||
2155 | // TODO: Report error. | |||
2156 | break; | |||
2157 | unsigned int UnicodeScalarValue; | |||
2158 | if (UnquotedValue.substr(1, 2).getAsInteger(16, UnicodeScalarValue)) | |||
2159 | // TODO: Report error. | |||
2160 | UnicodeScalarValue = 0xFFFD; | |||
2161 | encodeUTF8(UnicodeScalarValue, Storage); | |||
2162 | UnquotedValue = UnquotedValue.substr(2); | |||
2163 | break; | |||
2164 | } | |||
2165 | case 'u': { | |||
2166 | if (UnquotedValue.size() < 5) | |||
2167 | // TODO: Report error. | |||
2168 | break; | |||
2169 | unsigned int UnicodeScalarValue; | |||
2170 | if (UnquotedValue.substr(1, 4).getAsInteger(16, UnicodeScalarValue)) | |||
2171 | // TODO: Report error. | |||
2172 | UnicodeScalarValue = 0xFFFD; | |||
2173 | encodeUTF8(UnicodeScalarValue, Storage); | |||
2174 | UnquotedValue = UnquotedValue.substr(4); | |||
2175 | break; | |||
2176 | } | |||
2177 | case 'U': { | |||
2178 | if (UnquotedValue.size() < 9) | |||
2179 | // TODO: Report error. | |||
2180 | break; | |||
2181 | unsigned int UnicodeScalarValue; | |||
2182 | if (UnquotedValue.substr(1, 8).getAsInteger(16, UnicodeScalarValue)) | |||
2183 | // TODO: Report error. | |||
2184 | UnicodeScalarValue = 0xFFFD; | |||
2185 | encodeUTF8(UnicodeScalarValue, Storage); | |||
2186 | UnquotedValue = UnquotedValue.substr(8); | |||
2187 | break; | |||
2188 | } | |||
2189 | } | |||
2190 | UnquotedValue = UnquotedValue.substr(1); | |||
2191 | } | |||
2192 | } | |||
2193 | llvm::append_range(Storage, UnquotedValue); | |||
2194 | return StringRef(Storage.begin(), Storage.size()); | |||
2195 | } | |||
2196 | ||||
2197 | Node *KeyValueNode::getKey() { | |||
2198 | if (Key) | |||
2199 | return Key; | |||
2200 | // Handle implicit null keys. | |||
2201 | { | |||
2202 | Token &t = peekNext(); | |||
2203 | if ( t.Kind == Token::TK_BlockEnd | |||
2204 | || t.Kind == Token::TK_Value | |||
2205 | || t.Kind == Token::TK_Error) { | |||
2206 | return Key = new (getAllocator()) NullNode(Doc); | |||
2207 | } | |||
2208 | if (t.Kind == Token::TK_Key) | |||
2209 | getNext(); // skip TK_Key. | |||
2210 | } | |||
2211 | ||||
2212 | // Handle explicit null keys. | |||
2213 | Token &t = peekNext(); | |||
2214 | if (t.Kind == Token::TK_BlockEnd || t.Kind == Token::TK_Value) { | |||
2215 | return Key = new (getAllocator()) NullNode(Doc); | |||
2216 | } | |||
2217 | ||||
2218 | // We've got a normal key. | |||
2219 | return Key = parseBlockNode(); | |||
2220 | } | |||
2221 | ||||
2222 | Node *KeyValueNode::getValue() { | |||
2223 | if (Value) | |||
2224 | return Value; | |||
2225 | ||||
2226 | if (Node* Key = getKey()) | |||
2227 | Key->skip(); | |||
2228 | else { | |||
2229 | setError("Null key in Key Value.", peekNext()); | |||
2230 | return Value = new (getAllocator()) NullNode(Doc); | |||
2231 | } | |||
2232 | ||||
2233 | if (failed()) | |||
2234 | return Value = new (getAllocator()) NullNode(Doc); | |||
2235 | ||||
2236 | // Handle implicit null values. | |||
2237 | { | |||
2238 | Token &t = peekNext(); | |||
2239 | if ( t.Kind == Token::TK_BlockEnd | |||
2240 | || t.Kind == Token::TK_FlowMappingEnd | |||
2241 | || t.Kind == Token::TK_Key | |||
2242 | || t.Kind == Token::TK_FlowEntry | |||
2243 | || t.Kind == Token::TK_Error) { | |||
2244 | return Value = new (getAllocator()) NullNode(Doc); | |||
2245 | } | |||
2246 | ||||
2247 | if (t.Kind != Token::TK_Value) { | |||
2248 | setError("Unexpected token in Key Value.", t); | |||
2249 | return Value = new (getAllocator()) NullNode(Doc); | |||
2250 | } | |||
2251 | getNext(); // skip TK_Value. | |||
2252 | } | |||
2253 | ||||
2254 | // Handle explicit null values. | |||
2255 | Token &t = peekNext(); | |||
2256 | if (t.Kind == Token::TK_BlockEnd || t.Kind == Token::TK_Key) { | |||
2257 | return Value = new (getAllocator()) NullNode(Doc); | |||
2258 | } | |||
2259 | ||||
2260 | // We got a normal value. | |||
2261 | return Value = parseBlockNode(); | |||
2262 | } | |||
2263 | ||||
2264 | void MappingNode::increment() { | |||
2265 | if (failed()) { | |||
2266 | IsAtEnd = true; | |||
2267 | CurrentEntry = nullptr; | |||
2268 | return; | |||
2269 | } | |||
2270 | if (CurrentEntry) { | |||
2271 | CurrentEntry->skip(); | |||
2272 | if (Type == MT_Inline) { | |||
2273 | IsAtEnd = true; | |||
2274 | CurrentEntry = nullptr; | |||
2275 | return; | |||
2276 | } | |||
2277 | } | |||
2278 | Token T = peekNext(); | |||
2279 | if (T.Kind == Token::TK_Key || T.Kind == Token::TK_Scalar) { | |||
2280 | // KeyValueNode eats the TK_Key. That way it can detect null keys. | |||
2281 | CurrentEntry = new (getAllocator()) KeyValueNode(Doc); | |||
2282 | } else if (Type == MT_Block) { | |||
2283 | switch (T.Kind) { | |||
2284 | case Token::TK_BlockEnd: | |||
2285 | getNext(); | |||
2286 | IsAtEnd = true; | |||
2287 | CurrentEntry = nullptr; | |||
2288 | break; | |||
2289 | default: | |||
2290 | setError("Unexpected token. Expected Key or Block End", T); | |||
2291 | [[fallthrough]]; | |||
2292 | case Token::TK_Error: | |||
2293 | IsAtEnd = true; | |||
2294 | CurrentEntry = nullptr; | |||
2295 | } | |||
2296 | } else { | |||
2297 | switch (T.Kind) { | |||
2298 | case Token::TK_FlowEntry: | |||
2299 | // Eat the flow entry and recurse. | |||
2300 | getNext(); | |||
2301 | return increment(); | |||
2302 | case Token::TK_FlowMappingEnd: | |||
2303 | getNext(); | |||
2304 | [[fallthrough]]; | |||
2305 | case Token::TK_Error: | |||
2306 | // Set this to end iterator. | |||
2307 | IsAtEnd = true; | |||
2308 | CurrentEntry = nullptr; | |||
2309 | break; | |||
2310 | default: | |||
2311 | setError( "Unexpected token. Expected Key, Flow Entry, or Flow " | |||
2312 | "Mapping End." | |||
2313 | , T); | |||
2314 | IsAtEnd = true; | |||
2315 | CurrentEntry = nullptr; | |||
2316 | } | |||
2317 | } | |||
2318 | } | |||
2319 | ||||
2320 | void SequenceNode::increment() { | |||
2321 | if (failed()) { | |||
2322 | IsAtEnd = true; | |||
2323 | CurrentEntry = nullptr; | |||
2324 | return; | |||
2325 | } | |||
2326 | if (CurrentEntry) | |||
2327 | CurrentEntry->skip(); | |||
2328 | Token T = peekNext(); | |||
2329 | if (SeqType == ST_Block) { | |||
2330 | switch (T.Kind) { | |||
2331 | case Token::TK_BlockEntry: | |||
2332 | getNext(); | |||
2333 | CurrentEntry = parseBlockNode(); | |||
2334 | if (!CurrentEntry) { // An error occurred. | |||
2335 | IsAtEnd = true; | |||
2336 | CurrentEntry = nullptr; | |||
2337 | } | |||
2338 | break; | |||
2339 | case Token::TK_BlockEnd: | |||
2340 | getNext(); | |||
2341 | IsAtEnd = true; | |||
2342 | CurrentEntry = nullptr; | |||
2343 | break; | |||
2344 | default: | |||
2345 | setError( "Unexpected token. Expected Block Entry or Block End." | |||
2346 | , T); | |||
2347 | [[fallthrough]]; | |||
2348 | case Token::TK_Error: | |||
2349 | IsAtEnd = true; | |||
2350 | CurrentEntry = nullptr; | |||
2351 | } | |||
2352 | } else if (SeqType == ST_Indentless) { | |||
2353 | switch (T.Kind) { | |||
2354 | case Token::TK_BlockEntry: | |||
2355 | getNext(); | |||
2356 | CurrentEntry = parseBlockNode(); | |||
2357 | if (!CurrentEntry) { // An error occurred. | |||
2358 | IsAtEnd = true; | |||
2359 | CurrentEntry = nullptr; | |||
2360 | } | |||
2361 | break; | |||
2362 | default: | |||
2363 | case Token::TK_Error: | |||
2364 | IsAtEnd = true; | |||
2365 | CurrentEntry = nullptr; | |||
2366 | } | |||
2367 | } else if (SeqType == ST_Flow) { | |||
2368 | switch (T.Kind) { | |||
2369 | case Token::TK_FlowEntry: | |||
2370 | // Eat the flow entry and recurse. | |||
2371 | getNext(); | |||
2372 | WasPreviousTokenFlowEntry = true; | |||
2373 | return increment(); | |||
2374 | case Token::TK_FlowSequenceEnd: | |||
2375 | getNext(); | |||
2376 | [[fallthrough]]; | |||
2377 | case Token::TK_Error: | |||
2378 | // Set this to end iterator. | |||
2379 | IsAtEnd = true; | |||
2380 | CurrentEntry = nullptr; | |||
2381 | break; | |||
2382 | case Token::TK_StreamEnd: | |||
2383 | case Token::TK_DocumentEnd: | |||
2384 | case Token::TK_DocumentStart: | |||
2385 | setError("Could not find closing ]!", T); | |||
2386 | // Set this to end iterator. | |||
2387 | IsAtEnd = true; | |||
2388 | CurrentEntry = nullptr; | |||
2389 | break; | |||
2390 | default: | |||
2391 | if (!WasPreviousTokenFlowEntry) { | |||
2392 | setError("Expected , between entries!", T); | |||
2393 | IsAtEnd = true; | |||
2394 | CurrentEntry = nullptr; | |||
2395 | break; | |||
2396 | } | |||
2397 | // Otherwise it must be a flow entry. | |||
2398 | CurrentEntry = parseBlockNode(); | |||
2399 | if (!CurrentEntry) { | |||
2400 | IsAtEnd = true; | |||
2401 | } | |||
2402 | WasPreviousTokenFlowEntry = false; | |||
2403 | break; | |||
2404 | } | |||
2405 | } | |||
2406 | } | |||
2407 | ||||
2408 | Document::Document(Stream &S) : stream(S), Root(nullptr) { | |||
2409 | // Tag maps starts with two default mappings. | |||
2410 | TagMap["!"] = "!"; | |||
2411 | TagMap["!!"] = "tag:yaml.org,2002:"; | |||
2412 | ||||
2413 | if (parseDirectives()) | |||
2414 | expectToken(Token::TK_DocumentStart); | |||
2415 | Token &T = peekNext(); | |||
2416 | if (T.Kind == Token::TK_DocumentStart) | |||
2417 | getNext(); | |||
2418 | } | |||
2419 | ||||
2420 | bool Document::skip() { | |||
2421 | if (stream.scanner->failed()) | |||
2422 | return false; | |||
2423 | if (!Root && !getRoot()) | |||
2424 | return false; | |||
2425 | Root->skip(); | |||
2426 | Token &T = peekNext(); | |||
2427 | if (T.Kind == Token::TK_StreamEnd) | |||
2428 | return false; | |||
2429 | if (T.Kind == Token::TK_DocumentEnd) { | |||
2430 | getNext(); | |||
2431 | return skip(); | |||
2432 | } | |||
2433 | return true; | |||
2434 | } | |||
2435 | ||||
2436 | Token &Document::peekNext() { | |||
2437 | return stream.scanner->peekNext(); | |||
2438 | } | |||
2439 | ||||
2440 | Token Document::getNext() { | |||
2441 | return stream.scanner->getNext(); | |||
2442 | } | |||
2443 | ||||
2444 | void Document::setError(const Twine &Message, Token &Location) const { | |||
2445 | stream.scanner->setError(Message, Location.Range.begin()); | |||
2446 | } | |||
2447 | ||||
2448 | bool Document::failed() const { | |||
2449 | return stream.scanner->failed(); | |||
2450 | } | |||
2451 | ||||
2452 | Node *Document::parseBlockNode() { | |||
2453 | Token T = peekNext(); | |||
2454 | // Handle properties. | |||
2455 | Token AnchorInfo; | |||
2456 | Token TagInfo; | |||
2457 | parse_property: | |||
2458 | switch (T.Kind) { | |||
2459 | case Token::TK_Alias: | |||
2460 | getNext(); | |||
2461 | return new (NodeAllocator) AliasNode(stream.CurrentDoc, T.Range.substr(1)); | |||
2462 | case Token::TK_Anchor: | |||
2463 | if (AnchorInfo.Kind == Token::TK_Anchor) { | |||
2464 | setError("Already encountered an anchor for this node!", T); | |||
2465 | return nullptr; | |||
2466 | } | |||
2467 | AnchorInfo = getNext(); // Consume TK_Anchor. | |||
2468 | T = peekNext(); | |||
2469 | goto parse_property; | |||
2470 | case Token::TK_Tag: | |||
2471 | if (TagInfo.Kind == Token::TK_Tag) { | |||
2472 | setError("Already encountered a tag for this node!", T); | |||
2473 | return nullptr; | |||
2474 | } | |||
2475 | TagInfo = getNext(); // Consume TK_Tag. | |||
2476 | T = peekNext(); | |||
2477 | goto parse_property; | |||
2478 | default: | |||
2479 | break; | |||
2480 | } | |||
2481 | ||||
2482 | switch (T.Kind) { | |||
2483 | case Token::TK_BlockEntry: | |||
2484 | // We got an unindented BlockEntry sequence. This is not terminated with | |||
2485 | // a BlockEnd. | |||
2486 | // Don't eat the TK_BlockEntry, SequenceNode needs it. | |||
2487 | return new (NodeAllocator) SequenceNode( stream.CurrentDoc | |||
2488 | , AnchorInfo.Range.substr(1) | |||
2489 | , TagInfo.Range | |||
2490 | , SequenceNode::ST_Indentless); | |||
2491 | case Token::TK_BlockSequenceStart: | |||
2492 | getNext(); | |||
2493 | return new (NodeAllocator) | |||
2494 | SequenceNode( stream.CurrentDoc | |||
2495 | , AnchorInfo.Range.substr(1) | |||
2496 | , TagInfo.Range | |||
2497 | , SequenceNode::ST_Block); | |||
2498 | case Token::TK_BlockMappingStart: | |||
2499 | getNext(); | |||
2500 | return new (NodeAllocator) | |||
2501 | MappingNode( stream.CurrentDoc | |||
2502 | , AnchorInfo.Range.substr(1) | |||
2503 | , TagInfo.Range | |||
2504 | , MappingNode::MT_Block); | |||
2505 | case Token::TK_FlowSequenceStart: | |||
2506 | getNext(); | |||
2507 | return new (NodeAllocator) | |||
2508 | SequenceNode( stream.CurrentDoc | |||
2509 | , AnchorInfo.Range.substr(1) | |||
2510 | , TagInfo.Range | |||
2511 | , SequenceNode::ST_Flow); | |||
2512 | case Token::TK_FlowMappingStart: | |||
2513 | getNext(); | |||
2514 | return new (NodeAllocator) | |||
2515 | MappingNode( stream.CurrentDoc | |||
2516 | , AnchorInfo.Range.substr(1) | |||
2517 | , TagInfo.Range | |||
2518 | , MappingNode::MT_Flow); | |||
2519 | case Token::TK_Scalar: | |||
2520 | getNext(); | |||
2521 | return new (NodeAllocator) | |||
2522 | ScalarNode( stream.CurrentDoc | |||
2523 | , AnchorInfo.Range.substr(1) | |||
2524 | , TagInfo.Range | |||
2525 | , T.Range); | |||
2526 | case Token::TK_BlockScalar: { | |||
2527 | getNext(); | |||
2528 | StringRef NullTerminatedStr(T.Value.c_str(), T.Value.length() + 1); | |||
2529 | StringRef StrCopy = NullTerminatedStr.copy(NodeAllocator).drop_back(); | |||
2530 | return new (NodeAllocator) | |||
2531 | BlockScalarNode(stream.CurrentDoc, AnchorInfo.Range.substr(1), | |||
2532 | TagInfo.Range, StrCopy, T.Range); | |||
2533 | } | |||
2534 | case Token::TK_Key: | |||
2535 | // Don't eat the TK_Key, KeyValueNode expects it. | |||
2536 | return new (NodeAllocator) | |||
2537 | MappingNode( stream.CurrentDoc | |||
2538 | , AnchorInfo.Range.substr(1) | |||
2539 | , TagInfo.Range | |||
2540 | , MappingNode::MT_Inline); | |||
2541 | case Token::TK_DocumentStart: | |||
2542 | case Token::TK_DocumentEnd: | |||
2543 | case Token::TK_StreamEnd: | |||
2544 | default: | |||
2545 | // TODO: Properly handle tags. "[!!str ]" should resolve to !!str "", not | |||
2546 | // !!null null. | |||
2547 | return new (NodeAllocator) NullNode(stream.CurrentDoc); | |||
2548 | case Token::TK_FlowMappingEnd: | |||
2549 | case Token::TK_FlowSequenceEnd: | |||
2550 | case Token::TK_FlowEntry: { | |||
2551 | if (Root && (isa<MappingNode>(Root) || isa<SequenceNode>(Root))) | |||
2552 | return new (NodeAllocator) NullNode(stream.CurrentDoc); | |||
2553 | ||||
2554 | setError("Unexpected token", T); | |||
2555 | return nullptr; | |||
2556 | } | |||
2557 | case Token::TK_Error: | |||
2558 | return nullptr; | |||
2559 | } | |||
2560 | llvm_unreachable("Control flow shouldn't reach here.")::llvm::llvm_unreachable_internal("Control flow shouldn't reach here." , "llvm/lib/Support/YAMLParser.cpp", 2560); | |||
2561 | return nullptr; | |||
2562 | } | |||
2563 | ||||
2564 | bool Document::parseDirectives() { | |||
2565 | bool isDirective = false; | |||
2566 | while (true) { | |||
2567 | Token T = peekNext(); | |||
2568 | if (T.Kind == Token::TK_TagDirective) { | |||
2569 | parseTAGDirective(); | |||
2570 | isDirective = true; | |||
2571 | } else if (T.Kind == Token::TK_VersionDirective) { | |||
2572 | parseYAMLDirective(); | |||
2573 | isDirective = true; | |||
2574 | } else | |||
2575 | break; | |||
2576 | } | |||
2577 | return isDirective; | |||
2578 | } | |||
2579 | ||||
2580 | void Document::parseYAMLDirective() { | |||
2581 | getNext(); // Eat %YAML <version> | |||
2582 | } | |||
2583 | ||||
2584 | void Document::parseTAGDirective() { | |||
2585 | Token Tag = getNext(); // %TAG <handle> <prefix> | |||
2586 | StringRef T = Tag.Range; | |||
2587 | // Strip %TAG | |||
2588 | T = T.substr(T.find_first_of(" \t")).ltrim(" \t"); | |||
2589 | std::size_t HandleEnd = T.find_first_of(" \t"); | |||
2590 | StringRef TagHandle = T.substr(0, HandleEnd); | |||
2591 | StringRef TagPrefix = T.substr(HandleEnd).ltrim(" \t"); | |||
2592 | TagMap[TagHandle] = TagPrefix; | |||
2593 | } | |||
2594 | ||||
2595 | bool Document::expectToken(int TK) { | |||
2596 | Token T = getNext(); | |||
2597 | if (T.Kind != TK) { | |||
2598 | setError("Unexpected token", T); | |||
2599 | return false; | |||
2600 | } | |||
2601 | return true; | |||
2602 | } |