File: | llvm/lib/Support/YAMLParser.cpp |
Warning: | line 1597, column 9 2nd function call argument is an uninitialized value |
Press '?' to see keyboard shortcuts
Keyboard shortcuts:
1 | //===- YAMLParser.cpp - Simple YAML parser --------------------------------===// | |||
2 | // | |||
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. | |||
4 | // See https://llvm.org/LICENSE.txt for license information. | |||
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception | |||
6 | // | |||
7 | //===----------------------------------------------------------------------===// | |||
8 | // | |||
9 | // This file implements a YAML parser. | |||
10 | // | |||
11 | //===----------------------------------------------------------------------===// | |||
12 | ||||
13 | #include "llvm/Support/YAMLParser.h" | |||
14 | #include "llvm/ADT/AllocatorList.h" | |||
15 | #include "llvm/ADT/ArrayRef.h" | |||
16 | #include "llvm/ADT/None.h" | |||
17 | #include "llvm/ADT/STLExtras.h" | |||
18 | #include "llvm/ADT/SmallString.h" | |||
19 | #include "llvm/ADT/SmallVector.h" | |||
20 | #include "llvm/ADT/StringExtras.h" | |||
21 | #include "llvm/ADT/StringRef.h" | |||
22 | #include "llvm/ADT/Twine.h" | |||
23 | #include "llvm/Support/Compiler.h" | |||
24 | #include "llvm/Support/ErrorHandling.h" | |||
25 | #include "llvm/Support/MemoryBuffer.h" | |||
26 | #include "llvm/Support/SMLoc.h" | |||
27 | #include "llvm/Support/SourceMgr.h" | |||
28 | #include "llvm/Support/Unicode.h" | |||
29 | #include "llvm/Support/raw_ostream.h" | |||
30 | #include <algorithm> | |||
31 | #include <cassert> | |||
32 | #include <cstddef> | |||
33 | #include <cstdint> | |||
34 | #include <map> | |||
35 | #include <memory> | |||
36 | #include <string> | |||
37 | #include <system_error> | |||
38 | #include <utility> | |||
39 | ||||
40 | using namespace llvm; | |||
41 | using namespace yaml; | |||
42 | ||||
43 | enum UnicodeEncodingForm { | |||
44 | UEF_UTF32_LE, ///< UTF-32 Little Endian | |||
45 | UEF_UTF32_BE, ///< UTF-32 Big Endian | |||
46 | UEF_UTF16_LE, ///< UTF-16 Little Endian | |||
47 | UEF_UTF16_BE, ///< UTF-16 Big Endian | |||
48 | UEF_UTF8, ///< UTF-8 or ascii. | |||
49 | UEF_Unknown ///< Not a valid Unicode encoding. | |||
50 | }; | |||
51 | ||||
52 | /// EncodingInfo - Holds the encoding type and length of the byte order mark if | |||
53 | /// it exists. Length is in {0, 2, 3, 4}. | |||
54 | using EncodingInfo = std::pair<UnicodeEncodingForm, unsigned>; | |||
55 | ||||
56 | /// getUnicodeEncoding - Reads up to the first 4 bytes to determine the Unicode | |||
57 | /// encoding form of \a Input. | |||
58 | /// | |||
59 | /// @param Input A string of length 0 or more. | |||
60 | /// @returns An EncodingInfo indicating the Unicode encoding form of the input | |||
61 | /// and how long the byte order mark is if one exists. | |||
62 | static EncodingInfo getUnicodeEncoding(StringRef Input) { | |||
63 | if (Input.empty()) | |||
64 | return std::make_pair(UEF_Unknown, 0); | |||
65 | ||||
66 | switch (uint8_t(Input[0])) { | |||
67 | case 0x00: | |||
68 | if (Input.size() >= 4) { | |||
69 | if ( Input[1] == 0 | |||
70 | && uint8_t(Input[2]) == 0xFE | |||
71 | && uint8_t(Input[3]) == 0xFF) | |||
72 | return std::make_pair(UEF_UTF32_BE, 4); | |||
73 | if (Input[1] == 0 && Input[2] == 0 && Input[3] != 0) | |||
74 | return std::make_pair(UEF_UTF32_BE, 0); | |||
75 | } | |||
76 | ||||
77 | if (Input.size() >= 2 && Input[1] != 0) | |||
78 | return std::make_pair(UEF_UTF16_BE, 0); | |||
79 | return std::make_pair(UEF_Unknown, 0); | |||
80 | case 0xFF: | |||
81 | if ( Input.size() >= 4 | |||
82 | && uint8_t(Input[1]) == 0xFE | |||
83 | && Input[2] == 0 | |||
84 | && Input[3] == 0) | |||
85 | return std::make_pair(UEF_UTF32_LE, 4); | |||
86 | ||||
87 | if (Input.size() >= 2 && uint8_t(Input[1]) == 0xFE) | |||
88 | return std::make_pair(UEF_UTF16_LE, 2); | |||
89 | return std::make_pair(UEF_Unknown, 0); | |||
90 | case 0xFE: | |||
91 | if (Input.size() >= 2 && uint8_t(Input[1]) == 0xFF) | |||
92 | return std::make_pair(UEF_UTF16_BE, 2); | |||
93 | return std::make_pair(UEF_Unknown, 0); | |||
94 | case 0xEF: | |||
95 | if ( Input.size() >= 3 | |||
96 | && uint8_t(Input[1]) == 0xBB | |||
97 | && uint8_t(Input[2]) == 0xBF) | |||
98 | return std::make_pair(UEF_UTF8, 3); | |||
99 | return std::make_pair(UEF_Unknown, 0); | |||
100 | } | |||
101 | ||||
102 | // It could still be utf-32 or utf-16. | |||
103 | if (Input.size() >= 4 && Input[1] == 0 && Input[2] == 0 && Input[3] == 0) | |||
104 | return std::make_pair(UEF_UTF32_LE, 0); | |||
105 | ||||
106 | if (Input.size() >= 2 && Input[1] == 0) | |||
107 | return std::make_pair(UEF_UTF16_LE, 0); | |||
108 | ||||
109 | return std::make_pair(UEF_UTF8, 0); | |||
110 | } | |||
111 | ||||
112 | /// Pin the vtables to this file. | |||
113 | void Node::anchor() {} | |||
114 | void NullNode::anchor() {} | |||
115 | void ScalarNode::anchor() {} | |||
116 | void BlockScalarNode::anchor() {} | |||
117 | void KeyValueNode::anchor() {} | |||
118 | void MappingNode::anchor() {} | |||
119 | void SequenceNode::anchor() {} | |||
120 | void AliasNode::anchor() {} | |||
121 | ||||
122 | namespace llvm { | |||
123 | namespace yaml { | |||
124 | ||||
125 | /// Token - A single YAML token. | |||
126 | struct Token { | |||
127 | enum TokenKind { | |||
128 | TK_Error, // Uninitialized token. | |||
129 | TK_StreamStart, | |||
130 | TK_StreamEnd, | |||
131 | TK_VersionDirective, | |||
132 | TK_TagDirective, | |||
133 | TK_DocumentStart, | |||
134 | TK_DocumentEnd, | |||
135 | TK_BlockEntry, | |||
136 | TK_BlockEnd, | |||
137 | TK_BlockSequenceStart, | |||
138 | TK_BlockMappingStart, | |||
139 | TK_FlowEntry, | |||
140 | TK_FlowSequenceStart, | |||
141 | TK_FlowSequenceEnd, | |||
142 | TK_FlowMappingStart, | |||
143 | TK_FlowMappingEnd, | |||
144 | TK_Key, | |||
145 | TK_Value, | |||
146 | TK_Scalar, | |||
147 | TK_BlockScalar, | |||
148 | TK_Alias, | |||
149 | TK_Anchor, | |||
150 | TK_Tag | |||
151 | } Kind = TK_Error; | |||
152 | ||||
153 | /// A string of length 0 or more whose begin() points to the logical location | |||
154 | /// of the token in the input. | |||
155 | StringRef Range; | |||
156 | ||||
157 | /// The value of a block scalar node. | |||
158 | std::string Value; | |||
159 | ||||
160 | Token() = default; | |||
161 | }; | |||
162 | ||||
163 | } // end namespace yaml | |||
164 | } // end namespace llvm | |||
165 | ||||
166 | using TokenQueueT = BumpPtrList<Token>; | |||
167 | ||||
168 | namespace { | |||
169 | ||||
170 | /// This struct is used to track simple keys. | |||
171 | /// | |||
172 | /// Simple keys are handled by creating an entry in SimpleKeys for each Token | |||
173 | /// which could legally be the start of a simple key. When peekNext is called, | |||
174 | /// if the Token To be returned is referenced by a SimpleKey, we continue | |||
175 | /// tokenizing until that potential simple key has either been found to not be | |||
176 | /// a simple key (we moved on to the next line or went further than 1024 chars). | |||
177 | /// Or when we run into a Value, and then insert a Key token (and possibly | |||
178 | /// others) before the SimpleKey's Tok. | |||
179 | struct SimpleKey { | |||
180 | TokenQueueT::iterator Tok; | |||
181 | unsigned Column = 0; | |||
182 | unsigned Line = 0; | |||
183 | unsigned FlowLevel = 0; | |||
184 | bool IsRequired = false; | |||
185 | ||||
186 | bool operator ==(const SimpleKey &Other) { | |||
187 | return Tok == Other.Tok; | |||
188 | } | |||
189 | }; | |||
190 | ||||
191 | } // end anonymous namespace | |||
192 | ||||
193 | /// The Unicode scalar value of a UTF-8 minimal well-formed code unit | |||
194 | /// subsequence and the subsequence's length in code units (uint8_t). | |||
195 | /// A length of 0 represents an error. | |||
196 | using UTF8Decoded = std::pair<uint32_t, unsigned>; | |||
197 | ||||
198 | static UTF8Decoded decodeUTF8(StringRef Range) { | |||
199 | StringRef::iterator Position= Range.begin(); | |||
200 | StringRef::iterator End = Range.end(); | |||
201 | // 1 byte: [0x00, 0x7f] | |||
202 | // Bit pattern: 0xxxxxxx | |||
203 | if (Position < End && (*Position & 0x80) == 0) { | |||
204 | return std::make_pair(*Position, 1); | |||
205 | } | |||
206 | // 2 bytes: [0x80, 0x7ff] | |||
207 | // Bit pattern: 110xxxxx 10xxxxxx | |||
208 | if (Position + 1 < End && ((*Position & 0xE0) == 0xC0) && | |||
209 | ((*(Position + 1) & 0xC0) == 0x80)) { | |||
210 | uint32_t codepoint = ((*Position & 0x1F) << 6) | | |||
211 | (*(Position + 1) & 0x3F); | |||
212 | if (codepoint >= 0x80) | |||
213 | return std::make_pair(codepoint, 2); | |||
214 | } | |||
215 | // 3 bytes: [0x8000, 0xffff] | |||
216 | // Bit pattern: 1110xxxx 10xxxxxx 10xxxxxx | |||
217 | if (Position + 2 < End && ((*Position & 0xF0) == 0xE0) && | |||
218 | ((*(Position + 1) & 0xC0) == 0x80) && | |||
219 | ((*(Position + 2) & 0xC0) == 0x80)) { | |||
220 | uint32_t codepoint = ((*Position & 0x0F) << 12) | | |||
221 | ((*(Position + 1) & 0x3F) << 6) | | |||
222 | (*(Position + 2) & 0x3F); | |||
223 | // Codepoints between 0xD800 and 0xDFFF are invalid, as | |||
224 | // they are high / low surrogate halves used by UTF-16. | |||
225 | if (codepoint >= 0x800 && | |||
226 | (codepoint < 0xD800 || codepoint > 0xDFFF)) | |||
227 | return std::make_pair(codepoint, 3); | |||
228 | } | |||
229 | // 4 bytes: [0x10000, 0x10FFFF] | |||
230 | // Bit pattern: 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx | |||
231 | if (Position + 3 < End && ((*Position & 0xF8) == 0xF0) && | |||
232 | ((*(Position + 1) & 0xC0) == 0x80) && | |||
233 | ((*(Position + 2) & 0xC0) == 0x80) && | |||
234 | ((*(Position + 3) & 0xC0) == 0x80)) { | |||
235 | uint32_t codepoint = ((*Position & 0x07) << 18) | | |||
236 | ((*(Position + 1) & 0x3F) << 12) | | |||
237 | ((*(Position + 2) & 0x3F) << 6) | | |||
238 | (*(Position + 3) & 0x3F); | |||
239 | if (codepoint >= 0x10000 && codepoint <= 0x10FFFF) | |||
240 | return std::make_pair(codepoint, 4); | |||
241 | } | |||
242 | return std::make_pair(0, 0); | |||
243 | } | |||
244 | ||||
245 | namespace llvm { | |||
246 | namespace yaml { | |||
247 | ||||
248 | /// Scans YAML tokens from a MemoryBuffer. | |||
249 | class Scanner { | |||
250 | public: | |||
251 | Scanner(StringRef Input, SourceMgr &SM, bool ShowColors = true, | |||
252 | std::error_code *EC = nullptr); | |||
253 | Scanner(MemoryBufferRef Buffer, SourceMgr &SM_, bool ShowColors = true, | |||
254 | std::error_code *EC = nullptr); | |||
255 | ||||
256 | /// Parse the next token and return it without popping it. | |||
257 | Token &peekNext(); | |||
258 | ||||
259 | /// Parse the next token and pop it from the queue. | |||
260 | Token getNext(); | |||
261 | ||||
262 | void printError(SMLoc Loc, SourceMgr::DiagKind Kind, const Twine &Message, | |||
263 | ArrayRef<SMRange> Ranges = None) { | |||
264 | SM.PrintMessage(Loc, Kind, Message, Ranges, /* FixIts= */ None, ShowColors); | |||
265 | } | |||
266 | ||||
267 | void setError(const Twine &Message, StringRef::iterator Position) { | |||
268 | if (Position >= End) | |||
269 | Position = End - 1; | |||
270 | ||||
271 | // propagate the error if possible | |||
272 | if (EC) | |||
273 | *EC = make_error_code(std::errc::invalid_argument); | |||
274 | ||||
275 | // Don't print out more errors after the first one we encounter. The rest | |||
276 | // are just the result of the first, and have no meaning. | |||
277 | if (!Failed) | |||
278 | printError(SMLoc::getFromPointer(Position), SourceMgr::DK_Error, Message); | |||
279 | Failed = true; | |||
280 | } | |||
281 | ||||
282 | /// Returns true if an error occurred while parsing. | |||
283 | bool failed() { | |||
284 | return Failed; | |||
285 | } | |||
286 | ||||
287 | private: | |||
288 | void init(MemoryBufferRef Buffer); | |||
289 | ||||
290 | StringRef currentInput() { | |||
291 | return StringRef(Current, End - Current); | |||
292 | } | |||
293 | ||||
294 | /// Decode a UTF-8 minimal well-formed code unit subsequence starting | |||
295 | /// at \a Position. | |||
296 | /// | |||
297 | /// If the UTF-8 code units starting at Position do not form a well-formed | |||
298 | /// code unit subsequence, then the Unicode scalar value is 0, and the length | |||
299 | /// is 0. | |||
300 | UTF8Decoded decodeUTF8(StringRef::iterator Position) { | |||
301 | return ::decodeUTF8(StringRef(Position, End - Position)); | |||
302 | } | |||
303 | ||||
304 | // The following functions are based on the gramar rules in the YAML spec. The | |||
305 | // style of the function names it meant to closely match how they are written | |||
306 | // in the spec. The number within the [] is the number of the grammar rule in | |||
307 | // the spec. | |||
308 | // | |||
309 | // See 4.2 [Production Naming Conventions] for the meaning of the prefixes. | |||
310 | // | |||
311 | // c- | |||
312 | // A production starting and ending with a special character. | |||
313 | // b- | |||
314 | // A production matching a single line break. | |||
315 | // nb- | |||
316 | // A production starting and ending with a non-break character. | |||
317 | // s- | |||
318 | // A production starting and ending with a white space character. | |||
319 | // ns- | |||
320 | // A production starting and ending with a non-space character. | |||
321 | // l- | |||
322 | // A production matching complete line(s). | |||
323 | ||||
324 | /// Skip a single nb-char[27] starting at Position. | |||
325 | /// | |||
326 | /// A nb-char is 0x9 | [0x20-0x7E] | 0x85 | [0xA0-0xD7FF] | [0xE000-0xFEFE] | |||
327 | /// | [0xFF00-0xFFFD] | [0x10000-0x10FFFF] | |||
328 | /// | |||
329 | /// @returns The code unit after the nb-char, or Position if it's not an | |||
330 | /// nb-char. | |||
331 | StringRef::iterator skip_nb_char(StringRef::iterator Position); | |||
332 | ||||
333 | /// Skip a single b-break[28] starting at Position. | |||
334 | /// | |||
335 | /// A b-break is 0xD 0xA | 0xD | 0xA | |||
336 | /// | |||
337 | /// @returns The code unit after the b-break, or Position if it's not a | |||
338 | /// b-break. | |||
339 | StringRef::iterator skip_b_break(StringRef::iterator Position); | |||
340 | ||||
341 | /// Skip a single s-space[31] starting at Position. | |||
342 | /// | |||
343 | /// An s-space is 0x20 | |||
344 | /// | |||
345 | /// @returns The code unit after the s-space, or Position if it's not a | |||
346 | /// s-space. | |||
347 | StringRef::iterator skip_s_space(StringRef::iterator Position); | |||
348 | ||||
349 | /// Skip a single s-white[33] starting at Position. | |||
350 | /// | |||
351 | /// A s-white is 0x20 | 0x9 | |||
352 | /// | |||
353 | /// @returns The code unit after the s-white, or Position if it's not a | |||
354 | /// s-white. | |||
355 | StringRef::iterator skip_s_white(StringRef::iterator Position); | |||
356 | ||||
357 | /// Skip a single ns-char[34] starting at Position. | |||
358 | /// | |||
359 | /// A ns-char is nb-char - s-white | |||
360 | /// | |||
361 | /// @returns The code unit after the ns-char, or Position if it's not a | |||
362 | /// ns-char. | |||
363 | StringRef::iterator skip_ns_char(StringRef::iterator Position); | |||
364 | ||||
365 | using SkipWhileFunc = StringRef::iterator (Scanner::*)(StringRef::iterator); | |||
366 | ||||
367 | /// Skip minimal well-formed code unit subsequences until Func | |||
368 | /// returns its input. | |||
369 | /// | |||
370 | /// @returns The code unit after the last minimal well-formed code unit | |||
371 | /// subsequence that Func accepted. | |||
372 | StringRef::iterator skip_while( SkipWhileFunc Func | |||
373 | , StringRef::iterator Position); | |||
374 | ||||
375 | /// Skip minimal well-formed code unit subsequences until Func returns its | |||
376 | /// input. | |||
377 | void advanceWhile(SkipWhileFunc Func); | |||
378 | ||||
379 | /// Scan ns-uri-char[39]s starting at Cur. | |||
380 | /// | |||
381 | /// This updates Cur and Column while scanning. | |||
382 | void scan_ns_uri_char(); | |||
383 | ||||
384 | /// Consume a minimal well-formed code unit subsequence starting at | |||
385 | /// \a Cur. Return false if it is not the same Unicode scalar value as | |||
386 | /// \a Expected. This updates \a Column. | |||
387 | bool consume(uint32_t Expected); | |||
388 | ||||
389 | /// Skip \a Distance UTF-8 code units. Updates \a Cur and \a Column. | |||
390 | void skip(uint32_t Distance); | |||
391 | ||||
392 | /// Return true if the minimal well-formed code unit subsequence at | |||
393 | /// Pos is whitespace or a new line | |||
394 | bool isBlankOrBreak(StringRef::iterator Position); | |||
395 | ||||
396 | /// Consume a single b-break[28] if it's present at the current position. | |||
397 | /// | |||
398 | /// Return false if the code unit at the current position isn't a line break. | |||
399 | bool consumeLineBreakIfPresent(); | |||
400 | ||||
401 | /// If IsSimpleKeyAllowed, create and push_back a new SimpleKey. | |||
402 | void saveSimpleKeyCandidate( TokenQueueT::iterator Tok | |||
403 | , unsigned AtColumn | |||
404 | , bool IsRequired); | |||
405 | ||||
406 | /// Remove simple keys that can no longer be valid simple keys. | |||
407 | /// | |||
408 | /// Invalid simple keys are not on the current line or are further than 1024 | |||
409 | /// columns back. | |||
410 | void removeStaleSimpleKeyCandidates(); | |||
411 | ||||
412 | /// Remove all simple keys on FlowLevel \a Level. | |||
413 | void removeSimpleKeyCandidatesOnFlowLevel(unsigned Level); | |||
414 | ||||
415 | /// Unroll indentation in \a Indents back to \a Col. Creates BlockEnd | |||
416 | /// tokens if needed. | |||
417 | bool unrollIndent(int ToColumn); | |||
418 | ||||
419 | /// Increase indent to \a Col. Creates \a Kind token at \a InsertPoint | |||
420 | /// if needed. | |||
421 | bool rollIndent( int ToColumn | |||
422 | , Token::TokenKind Kind | |||
423 | , TokenQueueT::iterator InsertPoint); | |||
424 | ||||
425 | /// Skip a single-line comment when the comment starts at the current | |||
426 | /// position of the scanner. | |||
427 | void skipComment(); | |||
428 | ||||
429 | /// Skip whitespace and comments until the start of the next token. | |||
430 | void scanToNextToken(); | |||
431 | ||||
432 | /// Must be the first token generated. | |||
433 | bool scanStreamStart(); | |||
434 | ||||
435 | /// Generate tokens needed to close out the stream. | |||
436 | bool scanStreamEnd(); | |||
437 | ||||
438 | /// Scan a %BLAH directive. | |||
439 | bool scanDirective(); | |||
440 | ||||
441 | /// Scan a ... or ---. | |||
442 | bool scanDocumentIndicator(bool IsStart); | |||
443 | ||||
444 | /// Scan a [ or { and generate the proper flow collection start token. | |||
445 | bool scanFlowCollectionStart(bool IsSequence); | |||
446 | ||||
447 | /// Scan a ] or } and generate the proper flow collection end token. | |||
448 | bool scanFlowCollectionEnd(bool IsSequence); | |||
449 | ||||
450 | /// Scan the , that separates entries in a flow collection. | |||
451 | bool scanFlowEntry(); | |||
452 | ||||
453 | /// Scan the - that starts block sequence entries. | |||
454 | bool scanBlockEntry(); | |||
455 | ||||
456 | /// Scan an explicit ? indicating a key. | |||
457 | bool scanKey(); | |||
458 | ||||
459 | /// Scan an explicit : indicating a value. | |||
460 | bool scanValue(); | |||
461 | ||||
462 | /// Scan a quoted scalar. | |||
463 | bool scanFlowScalar(bool IsDoubleQuoted); | |||
464 | ||||
465 | /// Scan an unquoted scalar. | |||
466 | bool scanPlainScalar(); | |||
467 | ||||
468 | /// Scan an Alias or Anchor starting with * or &. | |||
469 | bool scanAliasOrAnchor(bool IsAlias); | |||
470 | ||||
471 | /// Scan a block scalar starting with | or >. | |||
472 | bool scanBlockScalar(bool IsLiteral); | |||
473 | ||||
474 | /// Scan a chomping indicator in a block scalar header. | |||
475 | char scanBlockChompingIndicator(); | |||
476 | ||||
477 | /// Scan an indentation indicator in a block scalar header. | |||
478 | unsigned scanBlockIndentationIndicator(); | |||
479 | ||||
480 | /// Scan a block scalar header. | |||
481 | /// | |||
482 | /// Return false if an error occurred. | |||
483 | bool scanBlockScalarHeader(char &ChompingIndicator, unsigned &IndentIndicator, | |||
484 | bool &IsDone); | |||
485 | ||||
486 | /// Look for the indentation level of a block scalar. | |||
487 | /// | |||
488 | /// Return false if an error occurred. | |||
489 | bool findBlockScalarIndent(unsigned &BlockIndent, unsigned BlockExitIndent, | |||
490 | unsigned &LineBreaks, bool &IsDone); | |||
491 | ||||
492 | /// Scan the indentation of a text line in a block scalar. | |||
493 | /// | |||
494 | /// Return false if an error occurred. | |||
495 | bool scanBlockScalarIndent(unsigned BlockIndent, unsigned BlockExitIndent, | |||
496 | bool &IsDone); | |||
497 | ||||
498 | /// Scan a tag of the form !stuff. | |||
499 | bool scanTag(); | |||
500 | ||||
501 | /// Dispatch to the next scanning function based on \a *Cur. | |||
502 | bool fetchMoreTokens(); | |||
503 | ||||
504 | /// The SourceMgr used for diagnostics and buffer management. | |||
505 | SourceMgr &SM; | |||
506 | ||||
507 | /// The original input. | |||
508 | MemoryBufferRef InputBuffer; | |||
509 | ||||
510 | /// The current position of the scanner. | |||
511 | StringRef::iterator Current; | |||
512 | ||||
513 | /// The end of the input (one past the last character). | |||
514 | StringRef::iterator End; | |||
515 | ||||
516 | /// Current YAML indentation level in spaces. | |||
517 | int Indent; | |||
518 | ||||
519 | /// Current column number in Unicode code points. | |||
520 | unsigned Column; | |||
521 | ||||
522 | /// Current line number. | |||
523 | unsigned Line; | |||
524 | ||||
525 | /// How deep we are in flow style containers. 0 Means at block level. | |||
526 | unsigned FlowLevel; | |||
527 | ||||
528 | /// Are we at the start of the stream? | |||
529 | bool IsStartOfStream; | |||
530 | ||||
531 | /// Can the next token be the start of a simple key? | |||
532 | bool IsSimpleKeyAllowed; | |||
533 | ||||
534 | /// True if an error has occurred. | |||
535 | bool Failed; | |||
536 | ||||
537 | /// Should colors be used when printing out the diagnostic messages? | |||
538 | bool ShowColors; | |||
539 | ||||
540 | /// Queue of tokens. This is required to queue up tokens while looking | |||
541 | /// for the end of a simple key. And for cases where a single character | |||
542 | /// can produce multiple tokens (e.g. BlockEnd). | |||
543 | TokenQueueT TokenQueue; | |||
544 | ||||
545 | /// Indentation levels. | |||
546 | SmallVector<int, 4> Indents; | |||
547 | ||||
548 | /// Potential simple keys. | |||
549 | SmallVector<SimpleKey, 4> SimpleKeys; | |||
550 | ||||
551 | std::error_code *EC; | |||
552 | }; | |||
553 | ||||
554 | } // end namespace yaml | |||
555 | } // end namespace llvm | |||
556 | ||||
557 | /// encodeUTF8 - Encode \a UnicodeScalarValue in UTF-8 and append it to result. | |||
558 | static void encodeUTF8( uint32_t UnicodeScalarValue | |||
559 | , SmallVectorImpl<char> &Result) { | |||
560 | if (UnicodeScalarValue <= 0x7F) { | |||
561 | Result.push_back(UnicodeScalarValue & 0x7F); | |||
562 | } else if (UnicodeScalarValue <= 0x7FF) { | |||
563 | uint8_t FirstByte = 0xC0 | ((UnicodeScalarValue & 0x7C0) >> 6); | |||
564 | uint8_t SecondByte = 0x80 | (UnicodeScalarValue & 0x3F); | |||
565 | Result.push_back(FirstByte); | |||
566 | Result.push_back(SecondByte); | |||
567 | } else if (UnicodeScalarValue <= 0xFFFF) { | |||
568 | uint8_t FirstByte = 0xE0 | ((UnicodeScalarValue & 0xF000) >> 12); | |||
569 | uint8_t SecondByte = 0x80 | ((UnicodeScalarValue & 0xFC0) >> 6); | |||
570 | uint8_t ThirdByte = 0x80 | (UnicodeScalarValue & 0x3F); | |||
571 | Result.push_back(FirstByte); | |||
572 | Result.push_back(SecondByte); | |||
573 | Result.push_back(ThirdByte); | |||
574 | } else if (UnicodeScalarValue <= 0x10FFFF) { | |||
575 | uint8_t FirstByte = 0xF0 | ((UnicodeScalarValue & 0x1F0000) >> 18); | |||
576 | uint8_t SecondByte = 0x80 | ((UnicodeScalarValue & 0x3F000) >> 12); | |||
577 | uint8_t ThirdByte = 0x80 | ((UnicodeScalarValue & 0xFC0) >> 6); | |||
578 | uint8_t FourthByte = 0x80 | (UnicodeScalarValue & 0x3F); | |||
579 | Result.push_back(FirstByte); | |||
580 | Result.push_back(SecondByte); | |||
581 | Result.push_back(ThirdByte); | |||
582 | Result.push_back(FourthByte); | |||
583 | } | |||
584 | } | |||
585 | ||||
586 | bool yaml::dumpTokens(StringRef Input, raw_ostream &OS) { | |||
587 | SourceMgr SM; | |||
588 | Scanner scanner(Input, SM); | |||
589 | while (true) { | |||
590 | Token T = scanner.getNext(); | |||
591 | switch (T.Kind) { | |||
592 | case Token::TK_StreamStart: | |||
593 | OS << "Stream-Start: "; | |||
594 | break; | |||
595 | case Token::TK_StreamEnd: | |||
596 | OS << "Stream-End: "; | |||
597 | break; | |||
598 | case Token::TK_VersionDirective: | |||
599 | OS << "Version-Directive: "; | |||
600 | break; | |||
601 | case Token::TK_TagDirective: | |||
602 | OS << "Tag-Directive: "; | |||
603 | break; | |||
604 | case Token::TK_DocumentStart: | |||
605 | OS << "Document-Start: "; | |||
606 | break; | |||
607 | case Token::TK_DocumentEnd: | |||
608 | OS << "Document-End: "; | |||
609 | break; | |||
610 | case Token::TK_BlockEntry: | |||
611 | OS << "Block-Entry: "; | |||
612 | break; | |||
613 | case Token::TK_BlockEnd: | |||
614 | OS << "Block-End: "; | |||
615 | break; | |||
616 | case Token::TK_BlockSequenceStart: | |||
617 | OS << "Block-Sequence-Start: "; | |||
618 | break; | |||
619 | case Token::TK_BlockMappingStart: | |||
620 | OS << "Block-Mapping-Start: "; | |||
621 | break; | |||
622 | case Token::TK_FlowEntry: | |||
623 | OS << "Flow-Entry: "; | |||
624 | break; | |||
625 | case Token::TK_FlowSequenceStart: | |||
626 | OS << "Flow-Sequence-Start: "; | |||
627 | break; | |||
628 | case Token::TK_FlowSequenceEnd: | |||
629 | OS << "Flow-Sequence-End: "; | |||
630 | break; | |||
631 | case Token::TK_FlowMappingStart: | |||
632 | OS << "Flow-Mapping-Start: "; | |||
633 | break; | |||
634 | case Token::TK_FlowMappingEnd: | |||
635 | OS << "Flow-Mapping-End: "; | |||
636 | break; | |||
637 | case Token::TK_Key: | |||
638 | OS << "Key: "; | |||
639 | break; | |||
640 | case Token::TK_Value: | |||
641 | OS << "Value: "; | |||
642 | break; | |||
643 | case Token::TK_Scalar: | |||
644 | OS << "Scalar: "; | |||
645 | break; | |||
646 | case Token::TK_BlockScalar: | |||
647 | OS << "Block Scalar: "; | |||
648 | break; | |||
649 | case Token::TK_Alias: | |||
650 | OS << "Alias: "; | |||
651 | break; | |||
652 | case Token::TK_Anchor: | |||
653 | OS << "Anchor: "; | |||
654 | break; | |||
655 | case Token::TK_Tag: | |||
656 | OS << "Tag: "; | |||
657 | break; | |||
658 | case Token::TK_Error: | |||
659 | break; | |||
660 | } | |||
661 | OS << T.Range << "\n"; | |||
662 | if (T.Kind == Token::TK_StreamEnd) | |||
663 | break; | |||
664 | else if (T.Kind == Token::TK_Error) | |||
665 | return false; | |||
666 | } | |||
667 | return true; | |||
668 | } | |||
669 | ||||
670 | bool yaml::scanTokens(StringRef Input) { | |||
671 | SourceMgr SM; | |||
672 | Scanner scanner(Input, SM); | |||
673 | while (true) { | |||
674 | Token T = scanner.getNext(); | |||
675 | if (T.Kind == Token::TK_StreamEnd) | |||
676 | break; | |||
677 | else if (T.Kind == Token::TK_Error) | |||
678 | return false; | |||
679 | } | |||
680 | return true; | |||
681 | } | |||
682 | ||||
683 | std::string yaml::escape(StringRef Input, bool EscapePrintable) { | |||
684 | std::string EscapedInput; | |||
685 | for (StringRef::iterator i = Input.begin(), e = Input.end(); i != e; ++i) { | |||
686 | if (*i == '\\') | |||
687 | EscapedInput += "\\\\"; | |||
688 | else if (*i == '"') | |||
689 | EscapedInput += "\\\""; | |||
690 | else if (*i == 0) | |||
691 | EscapedInput += "\\0"; | |||
692 | else if (*i == 0x07) | |||
693 | EscapedInput += "\\a"; | |||
694 | else if (*i == 0x08) | |||
695 | EscapedInput += "\\b"; | |||
696 | else if (*i == 0x09) | |||
697 | EscapedInput += "\\t"; | |||
698 | else if (*i == 0x0A) | |||
699 | EscapedInput += "\\n"; | |||
700 | else if (*i == 0x0B) | |||
701 | EscapedInput += "\\v"; | |||
702 | else if (*i == 0x0C) | |||
703 | EscapedInput += "\\f"; | |||
704 | else if (*i == 0x0D) | |||
705 | EscapedInput += "\\r"; | |||
706 | else if (*i == 0x1B) | |||
707 | EscapedInput += "\\e"; | |||
708 | else if ((unsigned char)*i < 0x20) { // Control characters not handled above. | |||
709 | std::string HexStr = utohexstr(*i); | |||
710 | EscapedInput += "\\x" + std::string(2 - HexStr.size(), '0') + HexStr; | |||
711 | } else if (*i & 0x80) { // UTF-8 multiple code unit subsequence. | |||
712 | UTF8Decoded UnicodeScalarValue | |||
713 | = decodeUTF8(StringRef(i, Input.end() - i)); | |||
714 | if (UnicodeScalarValue.second == 0) { | |||
715 | // Found invalid char. | |||
716 | SmallString<4> Val; | |||
717 | encodeUTF8(0xFFFD, Val); | |||
718 | llvm::append_range(EscapedInput, Val); | |||
719 | // FIXME: Error reporting. | |||
720 | return EscapedInput; | |||
721 | } | |||
722 | if (UnicodeScalarValue.first == 0x85) | |||
723 | EscapedInput += "\\N"; | |||
724 | else if (UnicodeScalarValue.first == 0xA0) | |||
725 | EscapedInput += "\\_"; | |||
726 | else if (UnicodeScalarValue.first == 0x2028) | |||
727 | EscapedInput += "\\L"; | |||
728 | else if (UnicodeScalarValue.first == 0x2029) | |||
729 | EscapedInput += "\\P"; | |||
730 | else if (!EscapePrintable && | |||
731 | sys::unicode::isPrintable(UnicodeScalarValue.first)) | |||
732 | EscapedInput += StringRef(i, UnicodeScalarValue.second); | |||
733 | else { | |||
734 | std::string HexStr = utohexstr(UnicodeScalarValue.first); | |||
735 | if (HexStr.size() <= 2) | |||
736 | EscapedInput += "\\x" + std::string(2 - HexStr.size(), '0') + HexStr; | |||
737 | else if (HexStr.size() <= 4) | |||
738 | EscapedInput += "\\u" + std::string(4 - HexStr.size(), '0') + HexStr; | |||
739 | else if (HexStr.size() <= 8) | |||
740 | EscapedInput += "\\U" + std::string(8 - HexStr.size(), '0') + HexStr; | |||
741 | } | |||
742 | i += UnicodeScalarValue.second - 1; | |||
743 | } else | |||
744 | EscapedInput.push_back(*i); | |||
745 | } | |||
746 | return EscapedInput; | |||
747 | } | |||
748 | ||||
749 | llvm::Optional<bool> yaml::parseBool(StringRef S) { | |||
750 | switch (S.size()) { | |||
751 | case 1: | |||
752 | switch (S.front()) { | |||
753 | case 'y': | |||
754 | case 'Y': | |||
755 | return true; | |||
756 | case 'n': | |||
757 | case 'N': | |||
758 | return false; | |||
759 | default: | |||
760 | return None; | |||
761 | } | |||
762 | case 2: | |||
763 | switch (S.front()) { | |||
764 | case 'O': | |||
765 | if (S[1] == 'N') // ON | |||
766 | return true; | |||
767 | LLVM_FALLTHROUGH[[gnu::fallthrough]]; | |||
768 | case 'o': | |||
769 | if (S[1] == 'n') //[Oo]n | |||
770 | return true; | |||
771 | return None; | |||
772 | case 'N': | |||
773 | if (S[1] == 'O') // NO | |||
774 | return false; | |||
775 | LLVM_FALLTHROUGH[[gnu::fallthrough]]; | |||
776 | case 'n': | |||
777 | if (S[1] == 'o') //[Nn]o | |||
778 | return false; | |||
779 | return None; | |||
780 | default: | |||
781 | return None; | |||
782 | } | |||
783 | case 3: | |||
784 | switch (S.front()) { | |||
785 | case 'O': | |||
786 | if (S.drop_front() == "FF") // OFF | |||
787 | return false; | |||
788 | LLVM_FALLTHROUGH[[gnu::fallthrough]]; | |||
789 | case 'o': | |||
790 | if (S.drop_front() == "ff") //[Oo]ff | |||
791 | return false; | |||
792 | return None; | |||
793 | case 'Y': | |||
794 | if (S.drop_front() == "ES") // YES | |||
795 | return true; | |||
796 | LLVM_FALLTHROUGH[[gnu::fallthrough]]; | |||
797 | case 'y': | |||
798 | if (S.drop_front() == "es") //[Yy]es | |||
799 | return true; | |||
800 | return None; | |||
801 | default: | |||
802 | return None; | |||
803 | } | |||
804 | case 4: | |||
805 | switch (S.front()) { | |||
806 | case 'T': | |||
807 | if (S.drop_front() == "RUE") // TRUE | |||
808 | return true; | |||
809 | LLVM_FALLTHROUGH[[gnu::fallthrough]]; | |||
810 | case 't': | |||
811 | if (S.drop_front() == "rue") //[Tt]rue | |||
812 | return true; | |||
813 | return None; | |||
814 | default: | |||
815 | return None; | |||
816 | } | |||
817 | case 5: | |||
818 | switch (S.front()) { | |||
819 | case 'F': | |||
820 | if (S.drop_front() == "ALSE") // FALSE | |||
821 | return false; | |||
822 | LLVM_FALLTHROUGH[[gnu::fallthrough]]; | |||
823 | case 'f': | |||
824 | if (S.drop_front() == "alse") //[Ff]alse | |||
825 | return false; | |||
826 | return None; | |||
827 | default: | |||
828 | return None; | |||
829 | } | |||
830 | default: | |||
831 | return None; | |||
832 | } | |||
833 | } | |||
834 | ||||
835 | Scanner::Scanner(StringRef Input, SourceMgr &sm, bool ShowColors, | |||
836 | std::error_code *EC) | |||
837 | : SM(sm), ShowColors(ShowColors), EC(EC) { | |||
838 | init(MemoryBufferRef(Input, "YAML")); | |||
839 | } | |||
840 | ||||
841 | Scanner::Scanner(MemoryBufferRef Buffer, SourceMgr &SM_, bool ShowColors, | |||
842 | std::error_code *EC) | |||
843 | : SM(SM_), ShowColors(ShowColors), EC(EC) { | |||
844 | init(Buffer); | |||
845 | } | |||
846 | ||||
847 | void Scanner::init(MemoryBufferRef Buffer) { | |||
848 | InputBuffer = Buffer; | |||
849 | Current = InputBuffer.getBufferStart(); | |||
850 | End = InputBuffer.getBufferEnd(); | |||
851 | Indent = -1; | |||
852 | Column = 0; | |||
853 | Line = 0; | |||
854 | FlowLevel = 0; | |||
855 | IsStartOfStream = true; | |||
856 | IsSimpleKeyAllowed = true; | |||
857 | Failed = false; | |||
858 | std::unique_ptr<MemoryBuffer> InputBufferOwner = | |||
859 | MemoryBuffer::getMemBuffer(Buffer, /*RequiresNullTerminator=*/false); | |||
860 | SM.AddNewSourceBuffer(std::move(InputBufferOwner), SMLoc()); | |||
861 | } | |||
862 | ||||
863 | Token &Scanner::peekNext() { | |||
864 | // If the current token is a possible simple key, keep parsing until we | |||
865 | // can confirm. | |||
866 | bool NeedMore = false; | |||
867 | while (true) { | |||
868 | if (TokenQueue.empty() || NeedMore) { | |||
869 | if (!fetchMoreTokens()) { | |||
870 | TokenQueue.clear(); | |||
871 | SimpleKeys.clear(); | |||
872 | TokenQueue.push_back(Token()); | |||
873 | return TokenQueue.front(); | |||
874 | } | |||
875 | } | |||
876 | assert(!TokenQueue.empty() &&(static_cast <bool> (!TokenQueue.empty() && "fetchMoreTokens lied about getting tokens!" ) ? void (0) : __assert_fail ("!TokenQueue.empty() && \"fetchMoreTokens lied about getting tokens!\"" , "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Support/YAMLParser.cpp" , 877, __extension__ __PRETTY_FUNCTION__)) | |||
877 | "fetchMoreTokens lied about getting tokens!")(static_cast <bool> (!TokenQueue.empty() && "fetchMoreTokens lied about getting tokens!" ) ? void (0) : __assert_fail ("!TokenQueue.empty() && \"fetchMoreTokens lied about getting tokens!\"" , "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Support/YAMLParser.cpp" , 877, __extension__ __PRETTY_FUNCTION__)); | |||
878 | ||||
879 | removeStaleSimpleKeyCandidates(); | |||
880 | SimpleKey SK; | |||
881 | SK.Tok = TokenQueue.begin(); | |||
882 | if (!is_contained(SimpleKeys, SK)) | |||
883 | break; | |||
884 | else | |||
885 | NeedMore = true; | |||
886 | } | |||
887 | return TokenQueue.front(); | |||
888 | } | |||
889 | ||||
890 | Token Scanner::getNext() { | |||
891 | Token Ret = peekNext(); | |||
892 | // TokenQueue can be empty if there was an error getting the next token. | |||
893 | if (!TokenQueue.empty()) | |||
894 | TokenQueue.pop_front(); | |||
895 | ||||
896 | // There cannot be any referenced Token's if the TokenQueue is empty. So do a | |||
897 | // quick deallocation of them all. | |||
898 | if (TokenQueue.empty()) | |||
899 | TokenQueue.resetAlloc(); | |||
900 | ||||
901 | return Ret; | |||
902 | } | |||
903 | ||||
904 | StringRef::iterator Scanner::skip_nb_char(StringRef::iterator Position) { | |||
905 | if (Position == End) | |||
906 | return Position; | |||
907 | // Check 7 bit c-printable - b-char. | |||
908 | if ( *Position == 0x09 | |||
909 | || (*Position >= 0x20 && *Position <= 0x7E)) | |||
910 | return Position + 1; | |||
911 | ||||
912 | // Check for valid UTF-8. | |||
913 | if (uint8_t(*Position) & 0x80) { | |||
914 | UTF8Decoded u8d = decodeUTF8(Position); | |||
915 | if ( u8d.second != 0 | |||
916 | && u8d.first != 0xFEFF | |||
917 | && ( u8d.first == 0x85 | |||
918 | || ( u8d.first >= 0xA0 | |||
919 | && u8d.first <= 0xD7FF) | |||
920 | || ( u8d.first >= 0xE000 | |||
921 | && u8d.first <= 0xFFFD) | |||
922 | || ( u8d.first >= 0x10000 | |||
923 | && u8d.first <= 0x10FFFF))) | |||
924 | return Position + u8d.second; | |||
925 | } | |||
926 | return Position; | |||
927 | } | |||
928 | ||||
929 | StringRef::iterator Scanner::skip_b_break(StringRef::iterator Position) { | |||
930 | if (Position == End) | |||
931 | return Position; | |||
932 | if (*Position == 0x0D) { | |||
933 | if (Position + 1 != End && *(Position + 1) == 0x0A) | |||
934 | return Position + 2; | |||
935 | return Position + 1; | |||
936 | } | |||
937 | ||||
938 | if (*Position == 0x0A) | |||
939 | return Position + 1; | |||
940 | return Position; | |||
941 | } | |||
942 | ||||
943 | StringRef::iterator Scanner::skip_s_space(StringRef::iterator Position) { | |||
944 | if (Position == End) | |||
945 | return Position; | |||
946 | if (*Position == ' ') | |||
947 | return Position + 1; | |||
948 | return Position; | |||
949 | } | |||
950 | ||||
951 | StringRef::iterator Scanner::skip_s_white(StringRef::iterator Position) { | |||
952 | if (Position == End) | |||
953 | return Position; | |||
954 | if (*Position == ' ' || *Position == '\t') | |||
955 | return Position + 1; | |||
956 | return Position; | |||
957 | } | |||
958 | ||||
959 | StringRef::iterator Scanner::skip_ns_char(StringRef::iterator Position) { | |||
960 | if (Position == End) | |||
961 | return Position; | |||
962 | if (*Position == ' ' || *Position == '\t') | |||
963 | return Position; | |||
964 | return skip_nb_char(Position); | |||
965 | } | |||
966 | ||||
967 | StringRef::iterator Scanner::skip_while( SkipWhileFunc Func | |||
968 | , StringRef::iterator Position) { | |||
969 | while (true) { | |||
970 | StringRef::iterator i = (this->*Func)(Position); | |||
971 | if (i == Position) | |||
972 | break; | |||
973 | Position = i; | |||
974 | } | |||
975 | return Position; | |||
976 | } | |||
977 | ||||
978 | void Scanner::advanceWhile(SkipWhileFunc Func) { | |||
979 | auto Final = skip_while(Func, Current); | |||
980 | Column += Final - Current; | |||
981 | Current = Final; | |||
982 | } | |||
983 | ||||
984 | static bool is_ns_hex_digit(const char C) { return isAlnum(C); } | |||
985 | ||||
986 | static bool is_ns_word_char(const char C) { return C == '-' || isAlpha(C); } | |||
987 | ||||
988 | void Scanner::scan_ns_uri_char() { | |||
989 | while (true) { | |||
990 | if (Current == End) | |||
991 | break; | |||
992 | if (( *Current == '%' | |||
993 | && Current + 2 < End | |||
994 | && is_ns_hex_digit(*(Current + 1)) | |||
995 | && is_ns_hex_digit(*(Current + 2))) | |||
996 | || is_ns_word_char(*Current) | |||
997 | || StringRef(Current, 1).find_first_of("#;/?:@&=+$,_.!~*'()[]") | |||
998 | != StringRef::npos) { | |||
999 | ++Current; | |||
1000 | ++Column; | |||
1001 | } else | |||
1002 | break; | |||
1003 | } | |||
1004 | } | |||
1005 | ||||
1006 | bool Scanner::consume(uint32_t Expected) { | |||
1007 | if (Expected >= 0x80) { | |||
1008 | setError("Cannot consume non-ascii characters", Current); | |||
1009 | return false; | |||
1010 | } | |||
1011 | if (Current == End) | |||
1012 | return false; | |||
1013 | if (uint8_t(*Current) >= 0x80) { | |||
1014 | setError("Cannot consume non-ascii characters", Current); | |||
1015 | return false; | |||
1016 | } | |||
1017 | if (uint8_t(*Current) == Expected) { | |||
1018 | ++Current; | |||
1019 | ++Column; | |||
1020 | return true; | |||
1021 | } | |||
1022 | return false; | |||
1023 | } | |||
1024 | ||||
1025 | void Scanner::skip(uint32_t Distance) { | |||
1026 | Current += Distance; | |||
1027 | Column += Distance; | |||
1028 | assert(Current <= End && "Skipped past the end")(static_cast <bool> (Current <= End && "Skipped past the end" ) ? void (0) : __assert_fail ("Current <= End && \"Skipped past the end\"" , "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Support/YAMLParser.cpp" , 1028, __extension__ __PRETTY_FUNCTION__)); | |||
1029 | } | |||
1030 | ||||
1031 | bool Scanner::isBlankOrBreak(StringRef::iterator Position) { | |||
1032 | if (Position == End) | |||
1033 | return false; | |||
1034 | return *Position == ' ' || *Position == '\t' || *Position == '\r' || | |||
1035 | *Position == '\n'; | |||
1036 | } | |||
1037 | ||||
1038 | bool Scanner::consumeLineBreakIfPresent() { | |||
1039 | auto Next = skip_b_break(Current); | |||
1040 | if (Next == Current) | |||
1041 | return false; | |||
1042 | Column = 0; | |||
1043 | ++Line; | |||
1044 | Current = Next; | |||
1045 | return true; | |||
1046 | } | |||
1047 | ||||
1048 | void Scanner::saveSimpleKeyCandidate( TokenQueueT::iterator Tok | |||
1049 | , unsigned AtColumn | |||
1050 | , bool IsRequired) { | |||
1051 | if (IsSimpleKeyAllowed) { | |||
1052 | SimpleKey SK; | |||
1053 | SK.Tok = Tok; | |||
1054 | SK.Line = Line; | |||
1055 | SK.Column = AtColumn; | |||
1056 | SK.IsRequired = IsRequired; | |||
1057 | SK.FlowLevel = FlowLevel; | |||
1058 | SimpleKeys.push_back(SK); | |||
1059 | } | |||
1060 | } | |||
1061 | ||||
1062 | void Scanner::removeStaleSimpleKeyCandidates() { | |||
1063 | for (SmallVectorImpl<SimpleKey>::iterator i = SimpleKeys.begin(); | |||
1064 | i != SimpleKeys.end();) { | |||
1065 | if (i->Line != Line || i->Column + 1024 < Column) { | |||
1066 | if (i->IsRequired) | |||
1067 | setError( "Could not find expected : for simple key" | |||
1068 | , i->Tok->Range.begin()); | |||
1069 | i = SimpleKeys.erase(i); | |||
1070 | } else | |||
1071 | ++i; | |||
1072 | } | |||
1073 | } | |||
1074 | ||||
1075 | void Scanner::removeSimpleKeyCandidatesOnFlowLevel(unsigned Level) { | |||
1076 | if (!SimpleKeys.empty() && (SimpleKeys.end() - 1)->FlowLevel == Level) | |||
1077 | SimpleKeys.pop_back(); | |||
1078 | } | |||
1079 | ||||
1080 | bool Scanner::unrollIndent(int ToColumn) { | |||
1081 | Token T; | |||
1082 | // Indentation is ignored in flow. | |||
1083 | if (FlowLevel != 0) | |||
1084 | return true; | |||
1085 | ||||
1086 | while (Indent > ToColumn) { | |||
1087 | T.Kind = Token::TK_BlockEnd; | |||
1088 | T.Range = StringRef(Current, 1); | |||
1089 | TokenQueue.push_back(T); | |||
1090 | Indent = Indents.pop_back_val(); | |||
1091 | } | |||
1092 | ||||
1093 | return true; | |||
1094 | } | |||
1095 | ||||
1096 | bool Scanner::rollIndent( int ToColumn | |||
1097 | , Token::TokenKind Kind | |||
1098 | , TokenQueueT::iterator InsertPoint) { | |||
1099 | if (FlowLevel) | |||
1100 | return true; | |||
1101 | if (Indent < ToColumn) { | |||
1102 | Indents.push_back(Indent); | |||
1103 | Indent = ToColumn; | |||
1104 | ||||
1105 | Token T; | |||
1106 | T.Kind = Kind; | |||
1107 | T.Range = StringRef(Current, 0); | |||
1108 | TokenQueue.insert(InsertPoint, T); | |||
1109 | } | |||
1110 | return true; | |||
1111 | } | |||
1112 | ||||
1113 | void Scanner::skipComment() { | |||
1114 | if (Current == End || *Current != '#') | |||
1115 | return; | |||
1116 | while (true) { | |||
1117 | // This may skip more than one byte, thus Column is only incremented | |||
1118 | // for code points. | |||
1119 | StringRef::iterator I = skip_nb_char(Current); | |||
1120 | if (I == Current) | |||
1121 | break; | |||
1122 | Current = I; | |||
1123 | ++Column; | |||
1124 | } | |||
1125 | } | |||
1126 | ||||
1127 | void Scanner::scanToNextToken() { | |||
1128 | while (true) { | |||
1129 | while (Current != End && (*Current == ' ' || *Current == '\t')) { | |||
1130 | skip(1); | |||
1131 | } | |||
1132 | ||||
1133 | skipComment(); | |||
1134 | ||||
1135 | // Skip EOL. | |||
1136 | StringRef::iterator i = skip_b_break(Current); | |||
1137 | if (i == Current) | |||
1138 | break; | |||
1139 | Current = i; | |||
1140 | ++Line; | |||
1141 | Column = 0; | |||
1142 | // New lines may start a simple key. | |||
1143 | if (!FlowLevel) | |||
1144 | IsSimpleKeyAllowed = true; | |||
1145 | } | |||
1146 | } | |||
1147 | ||||
1148 | bool Scanner::scanStreamStart() { | |||
1149 | IsStartOfStream = false; | |||
1150 | ||||
1151 | EncodingInfo EI = getUnicodeEncoding(currentInput()); | |||
1152 | ||||
1153 | Token T; | |||
1154 | T.Kind = Token::TK_StreamStart; | |||
1155 | T.Range = StringRef(Current, EI.second); | |||
1156 | TokenQueue.push_back(T); | |||
1157 | Current += EI.second; | |||
1158 | return true; | |||
1159 | } | |||
1160 | ||||
1161 | bool Scanner::scanStreamEnd() { | |||
1162 | // Force an ending new line if one isn't present. | |||
1163 | if (Column != 0) { | |||
1164 | Column = 0; | |||
1165 | ++Line; | |||
1166 | } | |||
1167 | ||||
1168 | unrollIndent(-1); | |||
1169 | SimpleKeys.clear(); | |||
1170 | IsSimpleKeyAllowed = false; | |||
1171 | ||||
1172 | Token T; | |||
1173 | T.Kind = Token::TK_StreamEnd; | |||
1174 | T.Range = StringRef(Current, 0); | |||
1175 | TokenQueue.push_back(T); | |||
1176 | return true; | |||
1177 | } | |||
1178 | ||||
1179 | bool Scanner::scanDirective() { | |||
1180 | // Reset the indentation level. | |||
1181 | unrollIndent(-1); | |||
1182 | SimpleKeys.clear(); | |||
1183 | IsSimpleKeyAllowed = false; | |||
1184 | ||||
1185 | StringRef::iterator Start = Current; | |||
1186 | consume('%'); | |||
1187 | StringRef::iterator NameStart = Current; | |||
1188 | Current = skip_while(&Scanner::skip_ns_char, Current); | |||
1189 | StringRef Name(NameStart, Current - NameStart); | |||
1190 | Current = skip_while(&Scanner::skip_s_white, Current); | |||
1191 | ||||
1192 | Token T; | |||
1193 | if (Name == "YAML") { | |||
1194 | Current = skip_while(&Scanner::skip_ns_char, Current); | |||
1195 | T.Kind = Token::TK_VersionDirective; | |||
1196 | T.Range = StringRef(Start, Current - Start); | |||
1197 | TokenQueue.push_back(T); | |||
1198 | return true; | |||
1199 | } else if(Name == "TAG") { | |||
1200 | Current = skip_while(&Scanner::skip_ns_char, Current); | |||
1201 | Current = skip_while(&Scanner::skip_s_white, Current); | |||
1202 | Current = skip_while(&Scanner::skip_ns_char, Current); | |||
1203 | T.Kind = Token::TK_TagDirective; | |||
1204 | T.Range = StringRef(Start, Current - Start); | |||
1205 | TokenQueue.push_back(T); | |||
1206 | return true; | |||
1207 | } | |||
1208 | return false; | |||
1209 | } | |||
1210 | ||||
1211 | bool Scanner::scanDocumentIndicator(bool IsStart) { | |||
1212 | unrollIndent(-1); | |||
1213 | SimpleKeys.clear(); | |||
1214 | IsSimpleKeyAllowed = false; | |||
1215 | ||||
1216 | Token T; | |||
1217 | T.Kind = IsStart ? Token::TK_DocumentStart : Token::TK_DocumentEnd; | |||
1218 | T.Range = StringRef(Current, 3); | |||
1219 | skip(3); | |||
1220 | TokenQueue.push_back(T); | |||
1221 | return true; | |||
1222 | } | |||
1223 | ||||
1224 | bool Scanner::scanFlowCollectionStart(bool IsSequence) { | |||
1225 | Token T; | |||
1226 | T.Kind = IsSequence ? Token::TK_FlowSequenceStart | |||
1227 | : Token::TK_FlowMappingStart; | |||
1228 | T.Range = StringRef(Current, 1); | |||
1229 | skip(1); | |||
1230 | TokenQueue.push_back(T); | |||
1231 | ||||
1232 | // [ and { may begin a simple key. | |||
1233 | saveSimpleKeyCandidate(--TokenQueue.end(), Column - 1, false); | |||
1234 | ||||
1235 | // And may also be followed by a simple key. | |||
1236 | IsSimpleKeyAllowed = true; | |||
1237 | ++FlowLevel; | |||
1238 | return true; | |||
1239 | } | |||
1240 | ||||
1241 | bool Scanner::scanFlowCollectionEnd(bool IsSequence) { | |||
1242 | removeSimpleKeyCandidatesOnFlowLevel(FlowLevel); | |||
1243 | IsSimpleKeyAllowed = false; | |||
1244 | Token T; | |||
1245 | T.Kind = IsSequence ? Token::TK_FlowSequenceEnd | |||
1246 | : Token::TK_FlowMappingEnd; | |||
1247 | T.Range = StringRef(Current, 1); | |||
1248 | skip(1); | |||
1249 | TokenQueue.push_back(T); | |||
1250 | if (FlowLevel) | |||
1251 | --FlowLevel; | |||
1252 | return true; | |||
1253 | } | |||
1254 | ||||
1255 | bool Scanner::scanFlowEntry() { | |||
1256 | removeSimpleKeyCandidatesOnFlowLevel(FlowLevel); | |||
1257 | IsSimpleKeyAllowed = true; | |||
1258 | Token T; | |||
1259 | T.Kind = Token::TK_FlowEntry; | |||
1260 | T.Range = StringRef(Current, 1); | |||
1261 | skip(1); | |||
1262 | TokenQueue.push_back(T); | |||
1263 | return true; | |||
1264 | } | |||
1265 | ||||
1266 | bool Scanner::scanBlockEntry() { | |||
1267 | rollIndent(Column, Token::TK_BlockSequenceStart, TokenQueue.end()); | |||
1268 | removeSimpleKeyCandidatesOnFlowLevel(FlowLevel); | |||
1269 | IsSimpleKeyAllowed = true; | |||
1270 | Token T; | |||
1271 | T.Kind = Token::TK_BlockEntry; | |||
1272 | T.Range = StringRef(Current, 1); | |||
1273 | skip(1); | |||
1274 | TokenQueue.push_back(T); | |||
1275 | return true; | |||
1276 | } | |||
1277 | ||||
1278 | bool Scanner::scanKey() { | |||
1279 | if (!FlowLevel) | |||
1280 | rollIndent(Column, Token::TK_BlockMappingStart, TokenQueue.end()); | |||
1281 | ||||
1282 | removeSimpleKeyCandidatesOnFlowLevel(FlowLevel); | |||
1283 | IsSimpleKeyAllowed = !FlowLevel; | |||
1284 | ||||
1285 | Token T; | |||
1286 | T.Kind = Token::TK_Key; | |||
1287 | T.Range = StringRef(Current, 1); | |||
1288 | skip(1); | |||
1289 | TokenQueue.push_back(T); | |||
1290 | return true; | |||
1291 | } | |||
1292 | ||||
1293 | bool Scanner::scanValue() { | |||
1294 | // If the previous token could have been a simple key, insert the key token | |||
1295 | // into the token queue. | |||
1296 | if (!SimpleKeys.empty()) { | |||
1297 | SimpleKey SK = SimpleKeys.pop_back_val(); | |||
1298 | Token T; | |||
1299 | T.Kind = Token::TK_Key; | |||
1300 | T.Range = SK.Tok->Range; | |||
1301 | TokenQueueT::iterator i, e; | |||
1302 | for (i = TokenQueue.begin(), e = TokenQueue.end(); i != e; ++i) { | |||
1303 | if (i == SK.Tok) | |||
1304 | break; | |||
1305 | } | |||
1306 | if (i == e) { | |||
1307 | Failed = true; | |||
1308 | return false; | |||
1309 | } | |||
1310 | i = TokenQueue.insert(i, T); | |||
1311 | ||||
1312 | // We may also need to add a Block-Mapping-Start token. | |||
1313 | rollIndent(SK.Column, Token::TK_BlockMappingStart, i); | |||
1314 | ||||
1315 | IsSimpleKeyAllowed = false; | |||
1316 | } else { | |||
1317 | if (!FlowLevel) | |||
1318 | rollIndent(Column, Token::TK_BlockMappingStart, TokenQueue.end()); | |||
1319 | IsSimpleKeyAllowed = !FlowLevel; | |||
1320 | } | |||
1321 | ||||
1322 | Token T; | |||
1323 | T.Kind = Token::TK_Value; | |||
1324 | T.Range = StringRef(Current, 1); | |||
1325 | skip(1); | |||
1326 | TokenQueue.push_back(T); | |||
1327 | return true; | |||
1328 | } | |||
1329 | ||||
1330 | // Forbidding inlining improves performance by roughly 20%. | |||
1331 | // FIXME: Remove once llvm optimizes this to the faster version without hints. | |||
1332 | LLVM_ATTRIBUTE_NOINLINE__attribute__((noinline)) static bool | |||
1333 | wasEscaped(StringRef::iterator First, StringRef::iterator Position); | |||
1334 | ||||
1335 | // Returns whether a character at 'Position' was escaped with a leading '\'. | |||
1336 | // 'First' specifies the position of the first character in the string. | |||
1337 | static bool wasEscaped(StringRef::iterator First, | |||
1338 | StringRef::iterator Position) { | |||
1339 | assert(Position - 1 >= First)(static_cast <bool> (Position - 1 >= First) ? void ( 0) : __assert_fail ("Position - 1 >= First", "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Support/YAMLParser.cpp" , 1339, __extension__ __PRETTY_FUNCTION__)); | |||
1340 | StringRef::iterator I = Position - 1; | |||
1341 | // We calculate the number of consecutive '\'s before the current position | |||
1342 | // by iterating backwards through our string. | |||
1343 | while (I >= First && *I == '\\') --I; | |||
1344 | // (Position - 1 - I) now contains the number of '\'s before the current | |||
1345 | // position. If it is odd, the character at 'Position' was escaped. | |||
1346 | return (Position - 1 - I) % 2 == 1; | |||
1347 | } | |||
1348 | ||||
1349 | bool Scanner::scanFlowScalar(bool IsDoubleQuoted) { | |||
1350 | StringRef::iterator Start = Current; | |||
1351 | unsigned ColStart = Column; | |||
1352 | if (IsDoubleQuoted) { | |||
1353 | do { | |||
1354 | ++Current; | |||
1355 | while (Current != End && *Current != '"') | |||
1356 | ++Current; | |||
1357 | // Repeat until the previous character was not a '\' or was an escaped | |||
1358 | // backslash. | |||
1359 | } while ( Current != End | |||
1360 | && *(Current - 1) == '\\' | |||
1361 | && wasEscaped(Start + 1, Current)); | |||
1362 | } else { | |||
1363 | skip(1); | |||
1364 | while (Current != End) { | |||
1365 | // Skip a ' followed by another '. | |||
1366 | if (Current + 1 < End && *Current == '\'' && *(Current + 1) == '\'') { | |||
1367 | skip(2); | |||
1368 | continue; | |||
1369 | } else if (*Current == '\'') | |||
1370 | break; | |||
1371 | StringRef::iterator i = skip_nb_char(Current); | |||
1372 | if (i == Current) { | |||
1373 | i = skip_b_break(Current); | |||
1374 | if (i == Current) | |||
1375 | break; | |||
1376 | Current = i; | |||
1377 | Column = 0; | |||
1378 | ++Line; | |||
1379 | } else { | |||
1380 | if (i == End) | |||
1381 | break; | |||
1382 | Current = i; | |||
1383 | ++Column; | |||
1384 | } | |||
1385 | } | |||
1386 | } | |||
1387 | ||||
1388 | if (Current == End) { | |||
1389 | setError("Expected quote at end of scalar", Current); | |||
1390 | return false; | |||
1391 | } | |||
1392 | ||||
1393 | skip(1); // Skip ending quote. | |||
1394 | Token T; | |||
1395 | T.Kind = Token::TK_Scalar; | |||
1396 | T.Range = StringRef(Start, Current - Start); | |||
1397 | TokenQueue.push_back(T); | |||
1398 | ||||
1399 | saveSimpleKeyCandidate(--TokenQueue.end(), ColStart, false); | |||
1400 | ||||
1401 | IsSimpleKeyAllowed = false; | |||
1402 | ||||
1403 | return true; | |||
1404 | } | |||
1405 | ||||
1406 | bool Scanner::scanPlainScalar() { | |||
1407 | StringRef::iterator Start = Current; | |||
1408 | unsigned ColStart = Column; | |||
1409 | unsigned LeadingBlanks = 0; | |||
1410 | assert(Indent >= -1 && "Indent must be >= -1 !")(static_cast <bool> (Indent >= -1 && "Indent must be >= -1 !" ) ? void (0) : __assert_fail ("Indent >= -1 && \"Indent must be >= -1 !\"" , "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Support/YAMLParser.cpp" , 1410, __extension__ __PRETTY_FUNCTION__)); | |||
1411 | unsigned indent = static_cast<unsigned>(Indent + 1); | |||
1412 | while (Current != End) { | |||
1413 | if (*Current == '#') | |||
1414 | break; | |||
1415 | ||||
1416 | while (Current != End && !isBlankOrBreak(Current)) { | |||
1417 | if (FlowLevel && *Current == ':' && | |||
1418 | (Current + 1 == End || | |||
1419 | !(isBlankOrBreak(Current + 1) || *(Current + 1) == ','))) { | |||
1420 | setError("Found unexpected ':' while scanning a plain scalar", Current); | |||
1421 | return false; | |||
1422 | } | |||
1423 | ||||
1424 | // Check for the end of the plain scalar. | |||
1425 | if ( (*Current == ':' && isBlankOrBreak(Current + 1)) | |||
1426 | || ( FlowLevel | |||
1427 | && (StringRef(Current, 1).find_first_of(",:?[]{}") | |||
1428 | != StringRef::npos))) | |||
1429 | break; | |||
1430 | ||||
1431 | StringRef::iterator i = skip_nb_char(Current); | |||
1432 | if (i == Current) | |||
1433 | break; | |||
1434 | Current = i; | |||
1435 | ++Column; | |||
1436 | } | |||
1437 | ||||
1438 | // Are we at the end? | |||
1439 | if (!isBlankOrBreak(Current)) | |||
1440 | break; | |||
1441 | ||||
1442 | // Eat blanks. | |||
1443 | StringRef::iterator Tmp = Current; | |||
1444 | while (isBlankOrBreak(Tmp)) { | |||
1445 | StringRef::iterator i = skip_s_white(Tmp); | |||
1446 | if (i != Tmp) { | |||
1447 | if (LeadingBlanks && (Column < indent) && *Tmp == '\t') { | |||
1448 | setError("Found invalid tab character in indentation", Tmp); | |||
1449 | return false; | |||
1450 | } | |||
1451 | Tmp = i; | |||
1452 | ++Column; | |||
1453 | } else { | |||
1454 | i = skip_b_break(Tmp); | |||
1455 | if (!LeadingBlanks) | |||
1456 | LeadingBlanks = 1; | |||
1457 | Tmp = i; | |||
1458 | Column = 0; | |||
1459 | ++Line; | |||
1460 | } | |||
1461 | } | |||
1462 | ||||
1463 | if (!FlowLevel && Column < indent) | |||
1464 | break; | |||
1465 | ||||
1466 | Current = Tmp; | |||
1467 | } | |||
1468 | if (Start == Current) { | |||
1469 | setError("Got empty plain scalar", Start); | |||
1470 | return false; | |||
1471 | } | |||
1472 | Token T; | |||
1473 | T.Kind = Token::TK_Scalar; | |||
1474 | T.Range = StringRef(Start, Current - Start); | |||
1475 | TokenQueue.push_back(T); | |||
1476 | ||||
1477 | // Plain scalars can be simple keys. | |||
1478 | saveSimpleKeyCandidate(--TokenQueue.end(), ColStart, false); | |||
1479 | ||||
1480 | IsSimpleKeyAllowed = false; | |||
1481 | ||||
1482 | return true; | |||
1483 | } | |||
1484 | ||||
1485 | bool Scanner::scanAliasOrAnchor(bool IsAlias) { | |||
1486 | StringRef::iterator Start = Current; | |||
1487 | unsigned ColStart = Column; | |||
1488 | skip(1); | |||
1489 | while (Current != End) { | |||
1490 | if ( *Current == '[' || *Current == ']' | |||
1491 | || *Current == '{' || *Current == '}' | |||
1492 | || *Current == ',' | |||
1493 | || *Current == ':') | |||
1494 | break; | |||
1495 | StringRef::iterator i = skip_ns_char(Current); | |||
1496 | if (i == Current) | |||
1497 | break; | |||
1498 | Current = i; | |||
1499 | ++Column; | |||
1500 | } | |||
1501 | ||||
1502 | if (Start + 1 == Current) { | |||
1503 | setError("Got empty alias or anchor", Start); | |||
1504 | return false; | |||
1505 | } | |||
1506 | ||||
1507 | Token T; | |||
1508 | T.Kind = IsAlias ? Token::TK_Alias : Token::TK_Anchor; | |||
1509 | T.Range = StringRef(Start, Current - Start); | |||
1510 | TokenQueue.push_back(T); | |||
1511 | ||||
1512 | // Alias and anchors can be simple keys. | |||
1513 | saveSimpleKeyCandidate(--TokenQueue.end(), ColStart, false); | |||
1514 | ||||
1515 | IsSimpleKeyAllowed = false; | |||
1516 | ||||
1517 | return true; | |||
1518 | } | |||
1519 | ||||
1520 | char Scanner::scanBlockChompingIndicator() { | |||
1521 | char Indicator = ' '; | |||
1522 | if (Current != End && (*Current == '+' || *Current == '-')) { | |||
1523 | Indicator = *Current; | |||
1524 | skip(1); | |||
1525 | } | |||
1526 | return Indicator; | |||
1527 | } | |||
1528 | ||||
1529 | /// Get the number of line breaks after chomping. | |||
1530 | /// | |||
1531 | /// Return the number of trailing line breaks to emit, depending on | |||
1532 | /// \p ChompingIndicator. | |||
1533 | static unsigned getChompedLineBreaks(char ChompingIndicator, | |||
1534 | unsigned LineBreaks, StringRef Str) { | |||
1535 | if (ChompingIndicator == '-') // Strip all line breaks. | |||
1536 | return 0; | |||
1537 | if (ChompingIndicator == '+') // Keep all line breaks. | |||
1538 | return LineBreaks; | |||
1539 | // Clip trailing lines. | |||
1540 | return Str.empty() ? 0 : 1; | |||
1541 | } | |||
1542 | ||||
1543 | unsigned Scanner::scanBlockIndentationIndicator() { | |||
1544 | unsigned Indent = 0; | |||
1545 | if (Current != End && (*Current >= '1' && *Current <= '9')) { | |||
1546 | Indent = unsigned(*Current - '0'); | |||
1547 | skip(1); | |||
1548 | } | |||
1549 | return Indent; | |||
1550 | } | |||
1551 | ||||
1552 | bool Scanner::scanBlockScalarHeader(char &ChompingIndicator, | |||
1553 | unsigned &IndentIndicator, bool &IsDone) { | |||
1554 | auto Start = Current; | |||
1555 | ||||
1556 | ChompingIndicator = scanBlockChompingIndicator(); | |||
1557 | IndentIndicator = scanBlockIndentationIndicator(); | |||
1558 | // Check for the chomping indicator once again. | |||
1559 | if (ChompingIndicator == ' ') | |||
1560 | ChompingIndicator = scanBlockChompingIndicator(); | |||
1561 | Current = skip_while(&Scanner::skip_s_white, Current); | |||
1562 | skipComment(); | |||
1563 | ||||
1564 | if (Current == End) { // EOF, we have an empty scalar. | |||
1565 | Token T; | |||
1566 | T.Kind = Token::TK_BlockScalar; | |||
1567 | T.Range = StringRef(Start, Current - Start); | |||
1568 | TokenQueue.push_back(T); | |||
1569 | IsDone = true; | |||
1570 | return true; | |||
1571 | } | |||
1572 | ||||
1573 | if (!consumeLineBreakIfPresent()) { | |||
1574 | setError("Expected a line break after block scalar header", Current); | |||
1575 | return false; | |||
1576 | } | |||
1577 | return true; | |||
1578 | } | |||
1579 | ||||
1580 | bool Scanner::findBlockScalarIndent(unsigned &BlockIndent, | |||
1581 | unsigned BlockExitIndent, | |||
1582 | unsigned &LineBreaks, bool &IsDone) { | |||
1583 | unsigned MaxAllSpaceLineCharacters = 0; | |||
1584 | StringRef::iterator LongestAllSpaceLine; | |||
| ||||
1585 | ||||
1586 | while (true) { | |||
1587 | advanceWhile(&Scanner::skip_s_space); | |||
1588 | if (skip_nb_char(Current) != Current) { | |||
1589 | // This line isn't empty, so try and find the indentation. | |||
1590 | if (Column <= BlockExitIndent) { // End of the block literal. | |||
1591 | IsDone = true; | |||
1592 | return true; | |||
1593 | } | |||
1594 | // We found the block's indentation. | |||
1595 | BlockIndent = Column; | |||
1596 | if (MaxAllSpaceLineCharacters > BlockIndent) { | |||
1597 | setError( | |||
| ||||
1598 | "Leading all-spaces line must be smaller than the block indent", | |||
1599 | LongestAllSpaceLine); | |||
1600 | return false; | |||
1601 | } | |||
1602 | return true; | |||
1603 | } | |||
1604 | if (skip_b_break(Current) != Current && | |||
1605 | Column > MaxAllSpaceLineCharacters) { | |||
1606 | // Record the longest all-space line in case it's longer than the | |||
1607 | // discovered block indent. | |||
1608 | MaxAllSpaceLineCharacters = Column; | |||
1609 | LongestAllSpaceLine = Current; | |||
1610 | } | |||
1611 | ||||
1612 | // Check for EOF. | |||
1613 | if (Current == End) { | |||
1614 | IsDone = true; | |||
1615 | return true; | |||
1616 | } | |||
1617 | ||||
1618 | if (!consumeLineBreakIfPresent()) { | |||
1619 | IsDone = true; | |||
1620 | return true; | |||
1621 | } | |||
1622 | ++LineBreaks; | |||
1623 | } | |||
1624 | return true; | |||
1625 | } | |||
1626 | ||||
1627 | bool Scanner::scanBlockScalarIndent(unsigned BlockIndent, | |||
1628 | unsigned BlockExitIndent, bool &IsDone) { | |||
1629 | // Skip the indentation. | |||
1630 | while (Column < BlockIndent) { | |||
1631 | auto I = skip_s_space(Current); | |||
1632 | if (I == Current) | |||
1633 | break; | |||
1634 | Current = I; | |||
1635 | ++Column; | |||
1636 | } | |||
1637 | ||||
1638 | if (skip_nb_char(Current) == Current) | |||
1639 | return true; | |||
1640 | ||||
1641 | if (Column <= BlockExitIndent) { // End of the block literal. | |||
1642 | IsDone = true; | |||
1643 | return true; | |||
1644 | } | |||
1645 | ||||
1646 | if (Column < BlockIndent) { | |||
1647 | if (Current != End && *Current == '#') { // Trailing comment. | |||
1648 | IsDone = true; | |||
1649 | return true; | |||
1650 | } | |||
1651 | setError("A text line is less indented than the block scalar", Current); | |||
1652 | return false; | |||
1653 | } | |||
1654 | return true; // A normal text line. | |||
1655 | } | |||
1656 | ||||
1657 | bool Scanner::scanBlockScalar(bool IsLiteral) { | |||
1658 | // Eat '|' or '>' | |||
1659 | assert(*Current == '|' || *Current == '>')(static_cast <bool> (*Current == '|' || *Current == '>' ) ? void (0) : __assert_fail ("*Current == '|' || *Current == '>'" , "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Support/YAMLParser.cpp" , 1659, __extension__ __PRETTY_FUNCTION__)); | |||
1660 | skip(1); | |||
1661 | ||||
1662 | char ChompingIndicator; | |||
1663 | unsigned BlockIndent; | |||
1664 | bool IsDone = false; | |||
1665 | if (!scanBlockScalarHeader(ChompingIndicator, BlockIndent, IsDone)) | |||
1666 | return false; | |||
1667 | if (IsDone) | |||
1668 | return true; | |||
1669 | ||||
1670 | auto Start = Current; | |||
1671 | unsigned BlockExitIndent = Indent < 0 ? 0 : (unsigned)Indent; | |||
1672 | unsigned LineBreaks = 0; | |||
1673 | if (BlockIndent == 0) { | |||
1674 | if (!findBlockScalarIndent(BlockIndent, BlockExitIndent, LineBreaks, | |||
1675 | IsDone)) | |||
1676 | return false; | |||
1677 | } | |||
1678 | ||||
1679 | // Scan the block's scalars body. | |||
1680 | SmallString<256> Str; | |||
1681 | while (!IsDone) { | |||
1682 | if (!scanBlockScalarIndent(BlockIndent, BlockExitIndent, IsDone)) | |||
1683 | return false; | |||
1684 | if (IsDone) | |||
1685 | break; | |||
1686 | ||||
1687 | // Parse the current line. | |||
1688 | auto LineStart = Current; | |||
1689 | advanceWhile(&Scanner::skip_nb_char); | |||
1690 | if (LineStart != Current) { | |||
1691 | Str.append(LineBreaks, '\n'); | |||
1692 | Str.append(StringRef(LineStart, Current - LineStart)); | |||
1693 | LineBreaks = 0; | |||
1694 | } | |||
1695 | ||||
1696 | // Check for EOF. | |||
1697 | if (Current == End) | |||
1698 | break; | |||
1699 | ||||
1700 | if (!consumeLineBreakIfPresent()) | |||
1701 | break; | |||
1702 | ++LineBreaks; | |||
1703 | } | |||
1704 | ||||
1705 | if (Current == End && !LineBreaks) | |||
1706 | // Ensure that there is at least one line break before the end of file. | |||
1707 | LineBreaks = 1; | |||
1708 | Str.append(getChompedLineBreaks(ChompingIndicator, LineBreaks, Str), '\n'); | |||
1709 | ||||
1710 | // New lines may start a simple key. | |||
1711 | if (!FlowLevel) | |||
1712 | IsSimpleKeyAllowed = true; | |||
1713 | ||||
1714 | Token T; | |||
1715 | T.Kind = Token::TK_BlockScalar; | |||
1716 | T.Range = StringRef(Start, Current - Start); | |||
1717 | T.Value = std::string(Str); | |||
1718 | TokenQueue.push_back(T); | |||
1719 | return true; | |||
1720 | } | |||
1721 | ||||
1722 | bool Scanner::scanTag() { | |||
1723 | StringRef::iterator Start = Current; | |||
1724 | unsigned ColStart = Column; | |||
1725 | skip(1); // Eat !. | |||
1726 | if (Current == End || isBlankOrBreak(Current)); // An empty tag. | |||
1727 | else if (*Current == '<') { | |||
1728 | skip(1); | |||
1729 | scan_ns_uri_char(); | |||
1730 | if (!consume('>')) | |||
1731 | return false; | |||
1732 | } else { | |||
1733 | // FIXME: Actually parse the c-ns-shorthand-tag rule. | |||
1734 | Current = skip_while(&Scanner::skip_ns_char, Current); | |||
1735 | } | |||
1736 | ||||
1737 | Token T; | |||
1738 | T.Kind = Token::TK_Tag; | |||
1739 | T.Range = StringRef(Start, Current - Start); | |||
1740 | TokenQueue.push_back(T); | |||
1741 | ||||
1742 | // Tags can be simple keys. | |||
1743 | saveSimpleKeyCandidate(--TokenQueue.end(), ColStart, false); | |||
1744 | ||||
1745 | IsSimpleKeyAllowed = false; | |||
1746 | ||||
1747 | return true; | |||
1748 | } | |||
1749 | ||||
1750 | bool Scanner::fetchMoreTokens() { | |||
1751 | if (IsStartOfStream) | |||
1752 | return scanStreamStart(); | |||
1753 | ||||
1754 | scanToNextToken(); | |||
1755 | ||||
1756 | if (Current == End) | |||
1757 | return scanStreamEnd(); | |||
1758 | ||||
1759 | removeStaleSimpleKeyCandidates(); | |||
1760 | ||||
1761 | unrollIndent(Column); | |||
1762 | ||||
1763 | if (Column == 0 && *Current == '%') | |||
1764 | return scanDirective(); | |||
1765 | ||||
1766 | if (Column == 0 && Current + 4 <= End | |||
1767 | && *Current == '-' | |||
1768 | && *(Current + 1) == '-' | |||
1769 | && *(Current + 2) == '-' | |||
1770 | && (Current + 3 == End || isBlankOrBreak(Current + 3))) | |||
1771 | return scanDocumentIndicator(true); | |||
1772 | ||||
1773 | if (Column == 0 && Current + 4 <= End | |||
1774 | && *Current == '.' | |||
1775 | && *(Current + 1) == '.' | |||
1776 | && *(Current + 2) == '.' | |||
1777 | && (Current + 3 == End || isBlankOrBreak(Current + 3))) | |||
1778 | return scanDocumentIndicator(false); | |||
1779 | ||||
1780 | if (*Current == '[') | |||
1781 | return scanFlowCollectionStart(true); | |||
1782 | ||||
1783 | if (*Current == '{') | |||
1784 | return scanFlowCollectionStart(false); | |||
1785 | ||||
1786 | if (*Current == ']') | |||
1787 | return scanFlowCollectionEnd(true); | |||
1788 | ||||
1789 | if (*Current == '}') | |||
1790 | return scanFlowCollectionEnd(false); | |||
1791 | ||||
1792 | if (*Current == ',') | |||
1793 | return scanFlowEntry(); | |||
1794 | ||||
1795 | if (*Current == '-' && isBlankOrBreak(Current + 1)) | |||
1796 | return scanBlockEntry(); | |||
1797 | ||||
1798 | if (*Current == '?' && (FlowLevel || isBlankOrBreak(Current + 1))) | |||
1799 | return scanKey(); | |||
1800 | ||||
1801 | if (*Current == ':' && (FlowLevel || isBlankOrBreak(Current + 1))) | |||
1802 | return scanValue(); | |||
1803 | ||||
1804 | if (*Current == '*') | |||
1805 | return scanAliasOrAnchor(true); | |||
1806 | ||||
1807 | if (*Current == '&') | |||
1808 | return scanAliasOrAnchor(false); | |||
1809 | ||||
1810 | if (*Current == '!') | |||
1811 | return scanTag(); | |||
1812 | ||||
1813 | if (*Current == '|' && !FlowLevel) | |||
1814 | return scanBlockScalar(true); | |||
1815 | ||||
1816 | if (*Current == '>' && !FlowLevel) | |||
1817 | return scanBlockScalar(false); | |||
1818 | ||||
1819 | if (*Current == '\'') | |||
1820 | return scanFlowScalar(false); | |||
1821 | ||||
1822 | if (*Current == '"') | |||
1823 | return scanFlowScalar(true); | |||
1824 | ||||
1825 | // Get a plain scalar. | |||
1826 | StringRef FirstChar(Current, 1); | |||
1827 | if (!(isBlankOrBreak(Current) | |||
1828 | || FirstChar.find_first_of("-?:,[]{}#&*!|>'\"%@`") != StringRef::npos) | |||
1829 | || (*Current == '-' && !isBlankOrBreak(Current + 1)) | |||
1830 | || (!FlowLevel && (*Current == '?' || *Current == ':') | |||
1831 | && isBlankOrBreak(Current + 1)) | |||
1832 | || (!FlowLevel && *Current == ':' | |||
1833 | && Current + 2 < End | |||
1834 | && *(Current + 1) == ':' | |||
1835 | && !isBlankOrBreak(Current + 2))) | |||
1836 | return scanPlainScalar(); | |||
1837 | ||||
1838 | setError("Unrecognized character while tokenizing.", Current); | |||
1839 | return false; | |||
1840 | } | |||
1841 | ||||
1842 | Stream::Stream(StringRef Input, SourceMgr &SM, bool ShowColors, | |||
1843 | std::error_code *EC) | |||
1844 | : scanner(new Scanner(Input, SM, ShowColors, EC)), CurrentDoc() {} | |||
1845 | ||||
1846 | Stream::Stream(MemoryBufferRef InputBuffer, SourceMgr &SM, bool ShowColors, | |||
1847 | std::error_code *EC) | |||
1848 | : scanner(new Scanner(InputBuffer, SM, ShowColors, EC)), CurrentDoc() {} | |||
1849 | ||||
1850 | Stream::~Stream() = default; | |||
1851 | ||||
1852 | bool Stream::failed() { return scanner->failed(); } | |||
1853 | ||||
1854 | void Stream::printError(Node *N, const Twine &Msg, SourceMgr::DiagKind Kind) { | |||
1855 | printError(N ? N->getSourceRange() : SMRange(), Msg, Kind); | |||
1856 | } | |||
1857 | ||||
1858 | void Stream::printError(const SMRange &Range, const Twine &Msg, | |||
1859 | SourceMgr::DiagKind Kind) { | |||
1860 | scanner->printError(Range.Start, Kind, Msg, Range); | |||
1861 | } | |||
1862 | ||||
1863 | document_iterator Stream::begin() { | |||
1864 | if (CurrentDoc) | |||
1865 | report_fatal_error("Can only iterate over the stream once"); | |||
1866 | ||||
1867 | // Skip Stream-Start. | |||
1868 | scanner->getNext(); | |||
1869 | ||||
1870 | CurrentDoc.reset(new Document(*this)); | |||
1871 | return document_iterator(CurrentDoc); | |||
1872 | } | |||
1873 | ||||
1874 | document_iterator Stream::end() { | |||
1875 | return document_iterator(); | |||
1876 | } | |||
1877 | ||||
1878 | void Stream::skip() { | |||
1879 | for (document_iterator i = begin(), e = end(); i != e; ++i) | |||
1880 | i->skip(); | |||
1881 | } | |||
1882 | ||||
1883 | Node::Node(unsigned int Type, std::unique_ptr<Document> &D, StringRef A, | |||
1884 | StringRef T) | |||
1885 | : Doc(D), TypeID(Type), Anchor(A), Tag(T) { | |||
1886 | SMLoc Start = SMLoc::getFromPointer(peekNext().Range.begin()); | |||
1887 | SourceRange = SMRange(Start, Start); | |||
1888 | } | |||
1889 | ||||
1890 | std::string Node::getVerbatimTag() const { | |||
1891 | StringRef Raw = getRawTag(); | |||
1892 | if (!Raw.empty() && Raw != "!") { | |||
1893 | std::string Ret; | |||
1894 | if (Raw.find_last_of('!') == 0) { | |||
1895 | Ret = std::string(Doc->getTagMap().find("!")->second); | |||
1896 | Ret += Raw.substr(1); | |||
1897 | return Ret; | |||
1898 | } else if (Raw.startswith("!!")) { | |||
1899 | Ret = std::string(Doc->getTagMap().find("!!")->second); | |||
1900 | Ret += Raw.substr(2); | |||
1901 | return Ret; | |||
1902 | } else { | |||
1903 | StringRef TagHandle = Raw.substr(0, Raw.find_last_of('!') + 1); | |||
1904 | std::map<StringRef, StringRef>::const_iterator It = | |||
1905 | Doc->getTagMap().find(TagHandle); | |||
1906 | if (It != Doc->getTagMap().end()) | |||
1907 | Ret = std::string(It->second); | |||
1908 | else { | |||
1909 | Token T; | |||
1910 | T.Kind = Token::TK_Tag; | |||
1911 | T.Range = TagHandle; | |||
1912 | setError(Twine("Unknown tag handle ") + TagHandle, T); | |||
1913 | } | |||
1914 | Ret += Raw.substr(Raw.find_last_of('!') + 1); | |||
1915 | return Ret; | |||
1916 | } | |||
1917 | } | |||
1918 | ||||
1919 | switch (getType()) { | |||
1920 | case NK_Null: | |||
1921 | return "tag:yaml.org,2002:null"; | |||
1922 | case NK_Scalar: | |||
1923 | case NK_BlockScalar: | |||
1924 | // TODO: Tag resolution. | |||
1925 | return "tag:yaml.org,2002:str"; | |||
1926 | case NK_Mapping: | |||
1927 | return "tag:yaml.org,2002:map"; | |||
1928 | case NK_Sequence: | |||
1929 | return "tag:yaml.org,2002:seq"; | |||
1930 | } | |||
1931 | ||||
1932 | return ""; | |||
1933 | } | |||
1934 | ||||
1935 | Token &Node::peekNext() { | |||
1936 | return Doc->peekNext(); | |||
1937 | } | |||
1938 | ||||
1939 | Token Node::getNext() { | |||
1940 | return Doc->getNext(); | |||
1941 | } | |||
1942 | ||||
1943 | Node *Node::parseBlockNode() { | |||
1944 | return Doc->parseBlockNode(); | |||
1945 | } | |||
1946 | ||||
1947 | BumpPtrAllocator &Node::getAllocator() { | |||
1948 | return Doc->NodeAllocator; | |||
1949 | } | |||
1950 | ||||
1951 | void Node::setError(const Twine &Msg, Token &Tok) const { | |||
1952 | Doc->setError(Msg, Tok); | |||
1953 | } | |||
1954 | ||||
1955 | bool Node::failed() const { | |||
1956 | return Doc->failed(); | |||
1957 | } | |||
1958 | ||||
1959 | StringRef ScalarNode::getValue(SmallVectorImpl<char> &Storage) const { | |||
1960 | // TODO: Handle newlines properly. We need to remove leading whitespace. | |||
1961 | if (Value[0] == '"') { // Double quoted. | |||
1962 | // Pull off the leading and trailing "s. | |||
1963 | StringRef UnquotedValue = Value.substr(1, Value.size() - 2); | |||
1964 | // Search for characters that would require unescaping the value. | |||
1965 | StringRef::size_type i = UnquotedValue.find_first_of("\\\r\n"); | |||
1966 | if (i != StringRef::npos) | |||
1967 | return unescapeDoubleQuoted(UnquotedValue, i, Storage); | |||
1968 | return UnquotedValue; | |||
1969 | } else if (Value[0] == '\'') { // Single quoted. | |||
1970 | // Pull off the leading and trailing 's. | |||
1971 | StringRef UnquotedValue = Value.substr(1, Value.size() - 2); | |||
1972 | StringRef::size_type i = UnquotedValue.find('\''); | |||
1973 | if (i != StringRef::npos) { | |||
1974 | // We're going to need Storage. | |||
1975 | Storage.clear(); | |||
1976 | Storage.reserve(UnquotedValue.size()); | |||
1977 | for (; i != StringRef::npos; i = UnquotedValue.find('\'')) { | |||
1978 | StringRef Valid(UnquotedValue.begin(), i); | |||
1979 | llvm::append_range(Storage, Valid); | |||
1980 | Storage.push_back('\''); | |||
1981 | UnquotedValue = UnquotedValue.substr(i + 2); | |||
1982 | } | |||
1983 | llvm::append_range(Storage, UnquotedValue); | |||
1984 | return StringRef(Storage.begin(), Storage.size()); | |||
1985 | } | |||
1986 | return UnquotedValue; | |||
1987 | } | |||
1988 | // Plain or block. | |||
1989 | return Value.rtrim(' '); | |||
1990 | } | |||
1991 | ||||
1992 | StringRef ScalarNode::unescapeDoubleQuoted( StringRef UnquotedValue | |||
1993 | , StringRef::size_type i | |||
1994 | , SmallVectorImpl<char> &Storage) | |||
1995 | const { | |||
1996 | // Use Storage to build proper value. | |||
1997 | Storage.clear(); | |||
1998 | Storage.reserve(UnquotedValue.size()); | |||
1999 | for (; i != StringRef::npos; i = UnquotedValue.find_first_of("\\\r\n")) { | |||
2000 | // Insert all previous chars into Storage. | |||
2001 | StringRef Valid(UnquotedValue.begin(), i); | |||
2002 | llvm::append_range(Storage, Valid); | |||
2003 | // Chop off inserted chars. | |||
2004 | UnquotedValue = UnquotedValue.substr(i); | |||
2005 | ||||
2006 | assert(!UnquotedValue.empty() && "Can't be empty!")(static_cast <bool> (!UnquotedValue.empty() && "Can't be empty!" ) ? void (0) : __assert_fail ("!UnquotedValue.empty() && \"Can't be empty!\"" , "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Support/YAMLParser.cpp" , 2006, __extension__ __PRETTY_FUNCTION__)); | |||
2007 | ||||
2008 | // Parse escape or line break. | |||
2009 | switch (UnquotedValue[0]) { | |||
2010 | case '\r': | |||
2011 | case '\n': | |||
2012 | Storage.push_back('\n'); | |||
2013 | if ( UnquotedValue.size() > 1 | |||
2014 | && (UnquotedValue[1] == '\r' || UnquotedValue[1] == '\n')) | |||
2015 | UnquotedValue = UnquotedValue.substr(1); | |||
2016 | UnquotedValue = UnquotedValue.substr(1); | |||
2017 | break; | |||
2018 | default: | |||
2019 | if (UnquotedValue.size() == 1) { | |||
2020 | Token T; | |||
2021 | T.Range = StringRef(UnquotedValue.begin(), 1); | |||
2022 | setError("Unrecognized escape code", T); | |||
2023 | return ""; | |||
2024 | } | |||
2025 | UnquotedValue = UnquotedValue.substr(1); | |||
2026 | switch (UnquotedValue[0]) { | |||
2027 | default: { | |||
2028 | Token T; | |||
2029 | T.Range = StringRef(UnquotedValue.begin(), 1); | |||
2030 | setError("Unrecognized escape code", T); | |||
2031 | return ""; | |||
2032 | } | |||
2033 | case '\r': | |||
2034 | case '\n': | |||
2035 | // Remove the new line. | |||
2036 | if ( UnquotedValue.size() > 1 | |||
2037 | && (UnquotedValue[1] == '\r' || UnquotedValue[1] == '\n')) | |||
2038 | UnquotedValue = UnquotedValue.substr(1); | |||
2039 | // If this was just a single byte newline, it will get skipped | |||
2040 | // below. | |||
2041 | break; | |||
2042 | case '0': | |||
2043 | Storage.push_back(0x00); | |||
2044 | break; | |||
2045 | case 'a': | |||
2046 | Storage.push_back(0x07); | |||
2047 | break; | |||
2048 | case 'b': | |||
2049 | Storage.push_back(0x08); | |||
2050 | break; | |||
2051 | case 't': | |||
2052 | case 0x09: | |||
2053 | Storage.push_back(0x09); | |||
2054 | break; | |||
2055 | case 'n': | |||
2056 | Storage.push_back(0x0A); | |||
2057 | break; | |||
2058 | case 'v': | |||
2059 | Storage.push_back(0x0B); | |||
2060 | break; | |||
2061 | case 'f': | |||
2062 | Storage.push_back(0x0C); | |||
2063 | break; | |||
2064 | case 'r': | |||
2065 | Storage.push_back(0x0D); | |||
2066 | break; | |||
2067 | case 'e': | |||
2068 | Storage.push_back(0x1B); | |||
2069 | break; | |||
2070 | case ' ': | |||
2071 | Storage.push_back(0x20); | |||
2072 | break; | |||
2073 | case '"': | |||
2074 | Storage.push_back(0x22); | |||
2075 | break; | |||
2076 | case '/': | |||
2077 | Storage.push_back(0x2F); | |||
2078 | break; | |||
2079 | case '\\': | |||
2080 | Storage.push_back(0x5C); | |||
2081 | break; | |||
2082 | case 'N': | |||
2083 | encodeUTF8(0x85, Storage); | |||
2084 | break; | |||
2085 | case '_': | |||
2086 | encodeUTF8(0xA0, Storage); | |||
2087 | break; | |||
2088 | case 'L': | |||
2089 | encodeUTF8(0x2028, Storage); | |||
2090 | break; | |||
2091 | case 'P': | |||
2092 | encodeUTF8(0x2029, Storage); | |||
2093 | break; | |||
2094 | case 'x': { | |||
2095 | if (UnquotedValue.size() < 3) | |||
2096 | // TODO: Report error. | |||
2097 | break; | |||
2098 | unsigned int UnicodeScalarValue; | |||
2099 | if (UnquotedValue.substr(1, 2).getAsInteger(16, UnicodeScalarValue)) | |||
2100 | // TODO: Report error. | |||
2101 | UnicodeScalarValue = 0xFFFD; | |||
2102 | encodeUTF8(UnicodeScalarValue, Storage); | |||
2103 | UnquotedValue = UnquotedValue.substr(2); | |||
2104 | break; | |||
2105 | } | |||
2106 | case 'u': { | |||
2107 | if (UnquotedValue.size() < 5) | |||
2108 | // TODO: Report error. | |||
2109 | break; | |||
2110 | unsigned int UnicodeScalarValue; | |||
2111 | if (UnquotedValue.substr(1, 4).getAsInteger(16, UnicodeScalarValue)) | |||
2112 | // TODO: Report error. | |||
2113 | UnicodeScalarValue = 0xFFFD; | |||
2114 | encodeUTF8(UnicodeScalarValue, Storage); | |||
2115 | UnquotedValue = UnquotedValue.substr(4); | |||
2116 | break; | |||
2117 | } | |||
2118 | case 'U': { | |||
2119 | if (UnquotedValue.size() < 9) | |||
2120 | // TODO: Report error. | |||
2121 | break; | |||
2122 | unsigned int UnicodeScalarValue; | |||
2123 | if (UnquotedValue.substr(1, 8).getAsInteger(16, UnicodeScalarValue)) | |||
2124 | // TODO: Report error. | |||
2125 | UnicodeScalarValue = 0xFFFD; | |||
2126 | encodeUTF8(UnicodeScalarValue, Storage); | |||
2127 | UnquotedValue = UnquotedValue.substr(8); | |||
2128 | break; | |||
2129 | } | |||
2130 | } | |||
2131 | UnquotedValue = UnquotedValue.substr(1); | |||
2132 | } | |||
2133 | } | |||
2134 | llvm::append_range(Storage, UnquotedValue); | |||
2135 | return StringRef(Storage.begin(), Storage.size()); | |||
2136 | } | |||
2137 | ||||
2138 | Node *KeyValueNode::getKey() { | |||
2139 | if (Key) | |||
2140 | return Key; | |||
2141 | // Handle implicit null keys. | |||
2142 | { | |||
2143 | Token &t = peekNext(); | |||
2144 | if ( t.Kind == Token::TK_BlockEnd | |||
2145 | || t.Kind == Token::TK_Value | |||
2146 | || t.Kind == Token::TK_Error) { | |||
2147 | return Key = new (getAllocator()) NullNode(Doc); | |||
2148 | } | |||
2149 | if (t.Kind == Token::TK_Key) | |||
2150 | getNext(); // skip TK_Key. | |||
2151 | } | |||
2152 | ||||
2153 | // Handle explicit null keys. | |||
2154 | Token &t = peekNext(); | |||
2155 | if (t.Kind == Token::TK_BlockEnd || t.Kind == Token::TK_Value) { | |||
2156 | return Key = new (getAllocator()) NullNode(Doc); | |||
2157 | } | |||
2158 | ||||
2159 | // We've got a normal key. | |||
2160 | return Key = parseBlockNode(); | |||
2161 | } | |||
2162 | ||||
2163 | Node *KeyValueNode::getValue() { | |||
2164 | if (Value) | |||
2165 | return Value; | |||
2166 | ||||
2167 | if (Node* Key = getKey()) | |||
2168 | Key->skip(); | |||
2169 | else { | |||
2170 | setError("Null key in Key Value.", peekNext()); | |||
2171 | return Value = new (getAllocator()) NullNode(Doc); | |||
2172 | } | |||
2173 | ||||
2174 | if (failed()) | |||
2175 | return Value = new (getAllocator()) NullNode(Doc); | |||
2176 | ||||
2177 | // Handle implicit null values. | |||
2178 | { | |||
2179 | Token &t = peekNext(); | |||
2180 | if ( t.Kind == Token::TK_BlockEnd | |||
2181 | || t.Kind == Token::TK_FlowMappingEnd | |||
2182 | || t.Kind == Token::TK_Key | |||
2183 | || t.Kind == Token::TK_FlowEntry | |||
2184 | || t.Kind == Token::TK_Error) { | |||
2185 | return Value = new (getAllocator()) NullNode(Doc); | |||
2186 | } | |||
2187 | ||||
2188 | if (t.Kind != Token::TK_Value) { | |||
2189 | setError("Unexpected token in Key Value.", t); | |||
2190 | return Value = new (getAllocator()) NullNode(Doc); | |||
2191 | } | |||
2192 | getNext(); // skip TK_Value. | |||
2193 | } | |||
2194 | ||||
2195 | // Handle explicit null values. | |||
2196 | Token &t = peekNext(); | |||
2197 | if (t.Kind == Token::TK_BlockEnd || t.Kind == Token::TK_Key) { | |||
2198 | return Value = new (getAllocator()) NullNode(Doc); | |||
2199 | } | |||
2200 | ||||
2201 | // We got a normal value. | |||
2202 | return Value = parseBlockNode(); | |||
2203 | } | |||
2204 | ||||
2205 | void MappingNode::increment() { | |||
2206 | if (failed()) { | |||
2207 | IsAtEnd = true; | |||
2208 | CurrentEntry = nullptr; | |||
2209 | return; | |||
2210 | } | |||
2211 | if (CurrentEntry) { | |||
2212 | CurrentEntry->skip(); | |||
2213 | if (Type == MT_Inline) { | |||
2214 | IsAtEnd = true; | |||
2215 | CurrentEntry = nullptr; | |||
2216 | return; | |||
2217 | } | |||
2218 | } | |||
2219 | Token T = peekNext(); | |||
2220 | if (T.Kind == Token::TK_Key || T.Kind == Token::TK_Scalar) { | |||
2221 | // KeyValueNode eats the TK_Key. That way it can detect null keys. | |||
2222 | CurrentEntry = new (getAllocator()) KeyValueNode(Doc); | |||
2223 | } else if (Type == MT_Block) { | |||
2224 | switch (T.Kind) { | |||
2225 | case Token::TK_BlockEnd: | |||
2226 | getNext(); | |||
2227 | IsAtEnd = true; | |||
2228 | CurrentEntry = nullptr; | |||
2229 | break; | |||
2230 | default: | |||
2231 | setError("Unexpected token. Expected Key or Block End", T); | |||
2232 | LLVM_FALLTHROUGH[[gnu::fallthrough]]; | |||
2233 | case Token::TK_Error: | |||
2234 | IsAtEnd = true; | |||
2235 | CurrentEntry = nullptr; | |||
2236 | } | |||
2237 | } else { | |||
2238 | switch (T.Kind) { | |||
2239 | case Token::TK_FlowEntry: | |||
2240 | // Eat the flow entry and recurse. | |||
2241 | getNext(); | |||
2242 | return increment(); | |||
2243 | case Token::TK_FlowMappingEnd: | |||
2244 | getNext(); | |||
2245 | LLVM_FALLTHROUGH[[gnu::fallthrough]]; | |||
2246 | case Token::TK_Error: | |||
2247 | // Set this to end iterator. | |||
2248 | IsAtEnd = true; | |||
2249 | CurrentEntry = nullptr; | |||
2250 | break; | |||
2251 | default: | |||
2252 | setError( "Unexpected token. Expected Key, Flow Entry, or Flow " | |||
2253 | "Mapping End." | |||
2254 | , T); | |||
2255 | IsAtEnd = true; | |||
2256 | CurrentEntry = nullptr; | |||
2257 | } | |||
2258 | } | |||
2259 | } | |||
2260 | ||||
2261 | void SequenceNode::increment() { | |||
2262 | if (failed()) { | |||
2263 | IsAtEnd = true; | |||
2264 | CurrentEntry = nullptr; | |||
2265 | return; | |||
2266 | } | |||
2267 | if (CurrentEntry) | |||
2268 | CurrentEntry->skip(); | |||
2269 | Token T = peekNext(); | |||
2270 | if (SeqType == ST_Block) { | |||
2271 | switch (T.Kind) { | |||
2272 | case Token::TK_BlockEntry: | |||
2273 | getNext(); | |||
2274 | CurrentEntry = parseBlockNode(); | |||
2275 | if (!CurrentEntry) { // An error occurred. | |||
2276 | IsAtEnd = true; | |||
2277 | CurrentEntry = nullptr; | |||
2278 | } | |||
2279 | break; | |||
2280 | case Token::TK_BlockEnd: | |||
2281 | getNext(); | |||
2282 | IsAtEnd = true; | |||
2283 | CurrentEntry = nullptr; | |||
2284 | break; | |||
2285 | default: | |||
2286 | setError( "Unexpected token. Expected Block Entry or Block End." | |||
2287 | , T); | |||
2288 | LLVM_FALLTHROUGH[[gnu::fallthrough]]; | |||
2289 | case Token::TK_Error: | |||
2290 | IsAtEnd = true; | |||
2291 | CurrentEntry = nullptr; | |||
2292 | } | |||
2293 | } else if (SeqType == ST_Indentless) { | |||
2294 | switch (T.Kind) { | |||
2295 | case Token::TK_BlockEntry: | |||
2296 | getNext(); | |||
2297 | CurrentEntry = parseBlockNode(); | |||
2298 | if (!CurrentEntry) { // An error occurred. | |||
2299 | IsAtEnd = true; | |||
2300 | CurrentEntry = nullptr; | |||
2301 | } | |||
2302 | break; | |||
2303 | default: | |||
2304 | case Token::TK_Error: | |||
2305 | IsAtEnd = true; | |||
2306 | CurrentEntry = nullptr; | |||
2307 | } | |||
2308 | } else if (SeqType == ST_Flow) { | |||
2309 | switch (T.Kind) { | |||
2310 | case Token::TK_FlowEntry: | |||
2311 | // Eat the flow entry and recurse. | |||
2312 | getNext(); | |||
2313 | WasPreviousTokenFlowEntry = true; | |||
2314 | return increment(); | |||
2315 | case Token::TK_FlowSequenceEnd: | |||
2316 | getNext(); | |||
2317 | LLVM_FALLTHROUGH[[gnu::fallthrough]]; | |||
2318 | case Token::TK_Error: | |||
2319 | // Set this to end iterator. | |||
2320 | IsAtEnd = true; | |||
2321 | CurrentEntry = nullptr; | |||
2322 | break; | |||
2323 | case Token::TK_StreamEnd: | |||
2324 | case Token::TK_DocumentEnd: | |||
2325 | case Token::TK_DocumentStart: | |||
2326 | setError("Could not find closing ]!", T); | |||
2327 | // Set this to end iterator. | |||
2328 | IsAtEnd = true; | |||
2329 | CurrentEntry = nullptr; | |||
2330 | break; | |||
2331 | default: | |||
2332 | if (!WasPreviousTokenFlowEntry) { | |||
2333 | setError("Expected , between entries!", T); | |||
2334 | IsAtEnd = true; | |||
2335 | CurrentEntry = nullptr; | |||
2336 | break; | |||
2337 | } | |||
2338 | // Otherwise it must be a flow entry. | |||
2339 | CurrentEntry = parseBlockNode(); | |||
2340 | if (!CurrentEntry) { | |||
2341 | IsAtEnd = true; | |||
2342 | } | |||
2343 | WasPreviousTokenFlowEntry = false; | |||
2344 | break; | |||
2345 | } | |||
2346 | } | |||
2347 | } | |||
2348 | ||||
2349 | Document::Document(Stream &S) : stream(S), Root(nullptr) { | |||
2350 | // Tag maps starts with two default mappings. | |||
2351 | TagMap["!"] = "!"; | |||
2352 | TagMap["!!"] = "tag:yaml.org,2002:"; | |||
2353 | ||||
2354 | if (parseDirectives()) | |||
2355 | expectToken(Token::TK_DocumentStart); | |||
2356 | Token &T = peekNext(); | |||
2357 | if (T.Kind == Token::TK_DocumentStart) | |||
2358 | getNext(); | |||
2359 | } | |||
2360 | ||||
2361 | bool Document::skip() { | |||
2362 | if (stream.scanner->failed()) | |||
2363 | return false; | |||
2364 | if (!Root && !getRoot()) | |||
2365 | return false; | |||
2366 | Root->skip(); | |||
2367 | Token &T = peekNext(); | |||
2368 | if (T.Kind == Token::TK_StreamEnd) | |||
2369 | return false; | |||
2370 | if (T.Kind == Token::TK_DocumentEnd) { | |||
2371 | getNext(); | |||
2372 | return skip(); | |||
2373 | } | |||
2374 | return true; | |||
2375 | } | |||
2376 | ||||
2377 | Token &Document::peekNext() { | |||
2378 | return stream.scanner->peekNext(); | |||
2379 | } | |||
2380 | ||||
2381 | Token Document::getNext() { | |||
2382 | return stream.scanner->getNext(); | |||
2383 | } | |||
2384 | ||||
2385 | void Document::setError(const Twine &Message, Token &Location) const { | |||
2386 | stream.scanner->setError(Message, Location.Range.begin()); | |||
2387 | } | |||
2388 | ||||
2389 | bool Document::failed() const { | |||
2390 | return stream.scanner->failed(); | |||
2391 | } | |||
2392 | ||||
2393 | Node *Document::parseBlockNode() { | |||
2394 | Token T = peekNext(); | |||
2395 | // Handle properties. | |||
2396 | Token AnchorInfo; | |||
2397 | Token TagInfo; | |||
2398 | parse_property: | |||
2399 | switch (T.Kind) { | |||
2400 | case Token::TK_Alias: | |||
2401 | getNext(); | |||
2402 | return new (NodeAllocator) AliasNode(stream.CurrentDoc, T.Range.substr(1)); | |||
2403 | case Token::TK_Anchor: | |||
2404 | if (AnchorInfo.Kind == Token::TK_Anchor) { | |||
2405 | setError("Already encountered an anchor for this node!", T); | |||
2406 | return nullptr; | |||
2407 | } | |||
2408 | AnchorInfo = getNext(); // Consume TK_Anchor. | |||
2409 | T = peekNext(); | |||
2410 | goto parse_property; | |||
2411 | case Token::TK_Tag: | |||
2412 | if (TagInfo.Kind == Token::TK_Tag) { | |||
2413 | setError("Already encountered a tag for this node!", T); | |||
2414 | return nullptr; | |||
2415 | } | |||
2416 | TagInfo = getNext(); // Consume TK_Tag. | |||
2417 | T = peekNext(); | |||
2418 | goto parse_property; | |||
2419 | default: | |||
2420 | break; | |||
2421 | } | |||
2422 | ||||
2423 | switch (T.Kind) { | |||
2424 | case Token::TK_BlockEntry: | |||
2425 | // We got an unindented BlockEntry sequence. This is not terminated with | |||
2426 | // a BlockEnd. | |||
2427 | // Don't eat the TK_BlockEntry, SequenceNode needs it. | |||
2428 | return new (NodeAllocator) SequenceNode( stream.CurrentDoc | |||
2429 | , AnchorInfo.Range.substr(1) | |||
2430 | , TagInfo.Range | |||
2431 | , SequenceNode::ST_Indentless); | |||
2432 | case Token::TK_BlockSequenceStart: | |||
2433 | getNext(); | |||
2434 | return new (NodeAllocator) | |||
2435 | SequenceNode( stream.CurrentDoc | |||
2436 | , AnchorInfo.Range.substr(1) | |||
2437 | , TagInfo.Range | |||
2438 | , SequenceNode::ST_Block); | |||
2439 | case Token::TK_BlockMappingStart: | |||
2440 | getNext(); | |||
2441 | return new (NodeAllocator) | |||
2442 | MappingNode( stream.CurrentDoc | |||
2443 | , AnchorInfo.Range.substr(1) | |||
2444 | , TagInfo.Range | |||
2445 | , MappingNode::MT_Block); | |||
2446 | case Token::TK_FlowSequenceStart: | |||
2447 | getNext(); | |||
2448 | return new (NodeAllocator) | |||
2449 | SequenceNode( stream.CurrentDoc | |||
2450 | , AnchorInfo.Range.substr(1) | |||
2451 | , TagInfo.Range | |||
2452 | , SequenceNode::ST_Flow); | |||
2453 | case Token::TK_FlowMappingStart: | |||
2454 | getNext(); | |||
2455 | return new (NodeAllocator) | |||
2456 | MappingNode( stream.CurrentDoc | |||
2457 | , AnchorInfo.Range.substr(1) | |||
2458 | , TagInfo.Range | |||
2459 | , MappingNode::MT_Flow); | |||
2460 | case Token::TK_Scalar: | |||
2461 | getNext(); | |||
2462 | return new (NodeAllocator) | |||
2463 | ScalarNode( stream.CurrentDoc | |||
2464 | , AnchorInfo.Range.substr(1) | |||
2465 | , TagInfo.Range | |||
2466 | , T.Range); | |||
2467 | case Token::TK_BlockScalar: { | |||
2468 | getNext(); | |||
2469 | StringRef NullTerminatedStr(T.Value.c_str(), T.Value.length() + 1); | |||
2470 | StringRef StrCopy = NullTerminatedStr.copy(NodeAllocator).drop_back(); | |||
2471 | return new (NodeAllocator) | |||
2472 | BlockScalarNode(stream.CurrentDoc, AnchorInfo.Range.substr(1), | |||
2473 | TagInfo.Range, StrCopy, T.Range); | |||
2474 | } | |||
2475 | case Token::TK_Key: | |||
2476 | // Don't eat the TK_Key, KeyValueNode expects it. | |||
2477 | return new (NodeAllocator) | |||
2478 | MappingNode( stream.CurrentDoc | |||
2479 | , AnchorInfo.Range.substr(1) | |||
2480 | , TagInfo.Range | |||
2481 | , MappingNode::MT_Inline); | |||
2482 | case Token::TK_DocumentStart: | |||
2483 | case Token::TK_DocumentEnd: | |||
2484 | case Token::TK_StreamEnd: | |||
2485 | default: | |||
2486 | // TODO: Properly handle tags. "[!!str ]" should resolve to !!str "", not | |||
2487 | // !!null null. | |||
2488 | return new (NodeAllocator) NullNode(stream.CurrentDoc); | |||
2489 | case Token::TK_FlowMappingEnd: | |||
2490 | case Token::TK_FlowSequenceEnd: | |||
2491 | case Token::TK_FlowEntry: { | |||
2492 | if (Root && (isa<MappingNode>(Root) || isa<SequenceNode>(Root))) | |||
2493 | return new (NodeAllocator) NullNode(stream.CurrentDoc); | |||
2494 | ||||
2495 | setError("Unexpected token", T); | |||
2496 | return nullptr; | |||
2497 | } | |||
2498 | case Token::TK_Error: | |||
2499 | return nullptr; | |||
2500 | } | |||
2501 | llvm_unreachable("Control flow shouldn't reach here.")::llvm::llvm_unreachable_internal("Control flow shouldn't reach here." , "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Support/YAMLParser.cpp" , 2501); | |||
2502 | return nullptr; | |||
2503 | } | |||
2504 | ||||
2505 | bool Document::parseDirectives() { | |||
2506 | bool isDirective = false; | |||
2507 | while (true) { | |||
2508 | Token T = peekNext(); | |||
2509 | if (T.Kind == Token::TK_TagDirective) { | |||
2510 | parseTAGDirective(); | |||
2511 | isDirective = true; | |||
2512 | } else if (T.Kind == Token::TK_VersionDirective) { | |||
2513 | parseYAMLDirective(); | |||
2514 | isDirective = true; | |||
2515 | } else | |||
2516 | break; | |||
2517 | } | |||
2518 | return isDirective; | |||
2519 | } | |||
2520 | ||||
2521 | void Document::parseYAMLDirective() { | |||
2522 | getNext(); // Eat %YAML <version> | |||
2523 | } | |||
2524 | ||||
2525 | void Document::parseTAGDirective() { | |||
2526 | Token Tag = getNext(); // %TAG <handle> <prefix> | |||
2527 | StringRef T = Tag.Range; | |||
2528 | // Strip %TAG | |||
2529 | T = T.substr(T.find_first_of(" \t")).ltrim(" \t"); | |||
2530 | std::size_t HandleEnd = T.find_first_of(" \t"); | |||
2531 | StringRef TagHandle = T.substr(0, HandleEnd); | |||
2532 | StringRef TagPrefix = T.substr(HandleEnd).ltrim(" \t"); | |||
2533 | TagMap[TagHandle] = TagPrefix; | |||
2534 | } | |||
2535 | ||||
2536 | bool Document::expectToken(int TK) { | |||
2537 | Token T = getNext(); | |||
2538 | if (T.Kind != TK) { | |||
2539 | setError("Unexpected token", T); | |||
2540 | return false; | |||
2541 | } | |||
2542 | return true; | |||
2543 | } |