LLVM  3.7.0
YAMLParser.h
Go to the documentation of this file.
1 //===--- YAMLParser.h - Simple YAML parser --------------------------------===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This is a YAML 1.2 parser.
11 //
12 // See http://www.yaml.org/spec/1.2/spec.html for the full standard.
13 //
14 // This currently does not implement the following:
15 // * Multi-line literal folding.
16 // * Tag resolution.
17 // * UTF-16.
18 // * BOMs anywhere other than the first Unicode scalar value in the file.
19 //
20 // The most important class here is Stream. This represents a YAML stream with
21 // 0, 1, or many documents.
22 //
23 // SourceMgr sm;
24 // StringRef input = getInput();
25 // yaml::Stream stream(input, sm);
26 //
27 // for (yaml::document_iterator di = stream.begin(), de = stream.end();
28 // di != de; ++di) {
29 // yaml::Node *n = di->getRoot();
30 // if (n) {
31 // // Do something with n...
32 // } else
33 // break;
34 // }
35 //
36 //===----------------------------------------------------------------------===//
37 
38 #ifndef LLVM_SUPPORT_YAMLPARSER_H
39 #define LLVM_SUPPORT_YAMLPARSER_H
40 
41 #include "llvm/ADT/StringRef.h"
42 #include "llvm/Support/Allocator.h"
43 #include "llvm/Support/SMLoc.h"
44 #include <limits>
45 #include <map>
46 #include <utility>
47 
48 namespace llvm {
49 class MemoryBufferRef;
50 class SourceMgr;
51 class Twine;
52 class raw_ostream;
53 
54 namespace yaml {
55 
56 class document_iterator;
57 class Document;
58 class Node;
59 class Scanner;
60 struct Token;
61 
62 /// \brief Dump all the tokens in this stream to OS.
63 /// \returns true if there was an error, false otherwise.
64 bool dumpTokens(StringRef Input, raw_ostream &);
65 
66 /// \brief Scans all tokens in input without outputting anything. This is used
67 /// for benchmarking the tokenizer.
68 /// \returns true if there was an error, false otherwise.
69 bool scanTokens(StringRef Input);
70 
71 /// \brief Escape \a Input for a double quoted scalar.
72 std::string escape(StringRef Input);
73 
74 /// \brief This class represents a YAML stream potentially containing multiple
75 /// documents.
76 class Stream {
77 public:
78  /// \brief This keeps a reference to the string referenced by \p Input.
79  Stream(StringRef Input, SourceMgr &, bool ShowColors = true);
80 
81  Stream(MemoryBufferRef InputBuffer, SourceMgr &, bool ShowColors = true);
82  ~Stream();
83 
86  void skip();
87  bool failed();
88  bool validate() {
89  skip();
90  return !failed();
91  }
92 
93  void printError(Node *N, const Twine &Msg);
94 
95 private:
96  std::unique_ptr<Scanner> scanner;
97  std::unique_ptr<Document> CurrentDoc;
98 
99  friend class Document;
100 };
101 
102 /// \brief Abstract base class for all Nodes.
103 class Node {
104  virtual void anchor();
105 
106 public:
107  enum NodeKind {
115  };
116 
117  Node(unsigned int Type, std::unique_ptr<Document> &, StringRef Anchor,
118  StringRef Tag);
119 
120  /// \brief Get the value of the anchor attached to this node. If it does not
121  /// have one, getAnchor().size() will be 0.
122  StringRef getAnchor() const { return Anchor; }
123 
124  /// \brief Get the tag as it was written in the document. This does not
125  /// perform tag resolution.
126  StringRef getRawTag() const { return Tag; }
127 
128  /// \brief Get the verbatium tag for a given Node. This performs tag resoluton
129  /// and substitution.
130  std::string getVerbatimTag() const;
131 
132  SMRange getSourceRange() const { return SourceRange; }
133  void setSourceRange(SMRange SR) { SourceRange = SR; }
134 
135  // These functions forward to Document and Scanner.
136  Token &peekNext();
137  Token getNext();
138  Node *parseBlockNode();
140  void setError(const Twine &Message, Token &Location) const;
141  bool failed() const;
142 
143  virtual void skip() {}
144 
145  unsigned int getType() const { return TypeID; }
146 
147  void *operator new(size_t Size, BumpPtrAllocator &Alloc,
148  size_t Alignment = 16) throw() {
149  return Alloc.Allocate(Size, Alignment);
150  }
151 
152  void operator delete(void *Ptr, BumpPtrAllocator &Alloc, size_t Size) throw() {
153  Alloc.Deallocate(Ptr, Size);
154  }
155 
156 protected:
157  std::unique_ptr<Document> &Doc;
159 
160  void operator delete(void *) throw() {}
161 
162  ~Node() = default;
163 
164 private:
165  unsigned int TypeID;
166  StringRef Anchor;
167  /// \brief The tag as typed in the document.
168  StringRef Tag;
169 };
170 
171 /// \brief A null value.
172 ///
173 /// Example:
174 /// !!null null
175 class NullNode final : public Node {
176  void anchor() override;
177 
178 public:
179  NullNode(std::unique_ptr<Document> &D)
180  : Node(NK_Null, D, StringRef(), StringRef()) {}
181 
182  static inline bool classof(const Node *N) { return N->getType() == NK_Null; }
183 };
184 
185 /// \brief A scalar node is an opaque datum that can be presented as a
186 /// series of zero or more Unicode scalar values.
187 ///
188 /// Example:
189 /// Adena
190 class ScalarNode final : public Node {
191  void anchor() override;
192 
193 public:
194  ScalarNode(std::unique_ptr<Document> &D, StringRef Anchor, StringRef Tag,
195  StringRef Val)
196  : Node(NK_Scalar, D, Anchor, Tag), Value(Val) {
197  SMLoc Start = SMLoc::getFromPointer(Val.begin());
198  SMLoc End = SMLoc::getFromPointer(Val.end());
199  SourceRange = SMRange(Start, End);
200  }
201 
202  // Return Value without any escaping or folding or other fun YAML stuff. This
203  // is the exact bytes that are contained in the file (after conversion to
204  // utf8).
205  StringRef getRawValue() const { return Value; }
206 
207  /// \brief Gets the value of this node as a StringRef.
208  ///
209  /// \param Storage is used to store the content of the returned StringRef iff
210  /// it requires any modification from how it appeared in the source.
211  /// This happens with escaped characters and multi-line literals.
212  StringRef getValue(SmallVectorImpl<char> &Storage) const;
213 
214  static inline bool classof(const Node *N) {
215  return N->getType() == NK_Scalar;
216  }
217 
218 private:
220 
221  StringRef unescapeDoubleQuoted(StringRef UnquotedValue,
222  StringRef::size_type Start,
223  SmallVectorImpl<char> &Storage) const;
224 };
225 
226 /// \brief A block scalar node is an opaque datum that can be presented as a
227 /// series of zero or more Unicode scalar values.
228 ///
229 /// Example:
230 /// |
231 /// Hello
232 /// World
233 class BlockScalarNode final : public Node {
234  void anchor() override;
235 
236 public:
237  BlockScalarNode(std::unique_ptr<Document> &D, StringRef Anchor, StringRef Tag,
238  StringRef Value, StringRef RawVal)
239  : Node(NK_BlockScalar, D, Anchor, Tag), Value(Value) {
240  SMLoc Start = SMLoc::getFromPointer(RawVal.begin());
241  SMLoc End = SMLoc::getFromPointer(RawVal.end());
242  SourceRange = SMRange(Start, End);
243  }
244 
245  /// \brief Gets the value of this node as a StringRef.
246  StringRef getValue() const { return Value; }
247 
248  static inline bool classof(const Node *N) {
249  return N->getType() == NK_BlockScalar;
250  }
251 
252 private:
254 };
255 
256 /// \brief A key and value pair. While not technically a Node under the YAML
257 /// representation graph, it is easier to treat them this way.
258 ///
259 /// TODO: Consider making this not a child of Node.
260 ///
261 /// Example:
262 /// Section: .text
263 class KeyValueNode final : public Node {
264  void anchor() override;
265 
266 public:
267  KeyValueNode(std::unique_ptr<Document> &D)
268  : Node(NK_KeyValue, D, StringRef(), StringRef()), Key(nullptr),
269  Value(nullptr) {}
270 
271  /// \brief Parse and return the key.
272  ///
273  /// This may be called multiple times.
274  ///
275  /// \returns The key, or nullptr if failed() == true.
276  Node *getKey();
277 
278  /// \brief Parse and return the value.
279  ///
280  /// This may be called multiple times.
281  ///
282  /// \returns The value, or nullptr if failed() == true.
283  Node *getValue();
284 
285  void skip() override {
286  getKey()->skip();
287  if (Node *Val = getValue())
288  Val->skip();
289  }
290 
291  static inline bool classof(const Node *N) {
292  return N->getType() == NK_KeyValue;
293  }
294 
295 private:
296  Node *Key;
297  Node *Value;
298 };
299 
300 /// \brief This is an iterator abstraction over YAML collections shared by both
301 /// sequences and maps.
302 ///
303 /// BaseT must have a ValueT* member named CurrentEntry and a member function
304 /// increment() which must set CurrentEntry to 0 to create an end iterator.
305 template <class BaseT, class ValueT>
307  : public std::iterator<std::forward_iterator_tag, ValueT> {
308 public:
309  basic_collection_iterator() : Base(nullptr) {}
310  basic_collection_iterator(BaseT *B) : Base(B) {}
311 
312  ValueT *operator->() const {
313  assert(Base && Base->CurrentEntry && "Attempted to access end iterator!");
314  return Base->CurrentEntry;
315  }
316 
317  ValueT &operator*() const {
318  assert(Base && Base->CurrentEntry &&
319  "Attempted to dereference end iterator!");
320  return *Base->CurrentEntry;
321  }
322 
323  operator ValueT *() const {
324  assert(Base && Base->CurrentEntry && "Attempted to access end iterator!");
325  return Base->CurrentEntry;
326  }
327 
329  if (Base != Other.Base)
330  return true;
331  return (Base && Other.Base) &&
332  Base->CurrentEntry != Other.Base->CurrentEntry;
333  }
334 
336  assert(Base && "Attempted to advance iterator past end!");
337  Base->increment();
338  // Create an end iterator.
339  if (!Base->CurrentEntry)
340  Base = nullptr;
341  return *this;
342  }
343 
344 private:
345  BaseT *Base;
346 };
347 
348 // The following two templates are used for both MappingNode and Sequence Node.
349 template <class CollectionType>
350 typename CollectionType::iterator begin(CollectionType &C) {
351  assert(C.IsAtBeginning && "You may only iterate over a collection once!");
352  C.IsAtBeginning = false;
353  typename CollectionType::iterator ret(&C);
354  ++ret;
355  return ret;
356 }
357 
358 template <class CollectionType> void skip(CollectionType &C) {
359  // TODO: support skipping from the middle of a parsed collection ;/
360  assert((C.IsAtBeginning || C.IsAtEnd) && "Cannot skip mid parse!");
361  if (C.IsAtBeginning)
362  for (typename CollectionType::iterator i = begin(C), e = C.end(); i != e;
363  ++i)
364  i->skip();
365 }
366 
367 /// \brief Represents a YAML map created from either a block map for a flow map.
368 ///
369 /// This parses the YAML stream as increment() is called.
370 ///
371 /// Example:
372 /// Name: _main
373 /// Scope: Global
374 class MappingNode final : public Node {
375  void anchor() override;
376 
377 public:
378  enum MappingType {
381  MT_Inline ///< An inline mapping node is used for "[key: value]".
382  };
383 
384  MappingNode(std::unique_ptr<Document> &D, StringRef Anchor, StringRef Tag,
385  MappingType MT)
386  : Node(NK_Mapping, D, Anchor, Tag), Type(MT), IsAtBeginning(true),
387  IsAtEnd(false), CurrentEntry(nullptr) {}
388 
391  template <class T> friend typename T::iterator yaml::begin(T &);
392  template <class T> friend void yaml::skip(T &);
393 
394  iterator begin() { return yaml::begin(*this); }
395 
396  iterator end() { return iterator(); }
397 
398  void skip() override { yaml::skip(*this); }
399 
400  static inline bool classof(const Node *N) {
401  return N->getType() == NK_Mapping;
402  }
403 
404 private:
406  bool IsAtBeginning;
407  bool IsAtEnd;
408  KeyValueNode *CurrentEntry;
409 
410  void increment();
411 };
412 
413 /// \brief Represents a YAML sequence created from either a block sequence for a
414 /// flow sequence.
415 ///
416 /// This parses the YAML stream as increment() is called.
417 ///
418 /// Example:
419 /// - Hello
420 /// - World
421 class SequenceNode final : public Node {
422  void anchor() override;
423 
424 public:
428  // Use for:
429  //
430  // key:
431  // - val1
432  // - val2
433  //
434  // As a BlockMappingEntry and BlockEnd are not created in this case.
436  };
437 
438  SequenceNode(std::unique_ptr<Document> &D, StringRef Anchor, StringRef Tag,
440  : Node(NK_Sequence, D, Anchor, Tag), SeqType(ST), IsAtBeginning(true),
441  IsAtEnd(false),
442  WasPreviousTokenFlowEntry(true), // Start with an imaginary ','.
443  CurrentEntry(nullptr) {}
444 
447  template <class T> friend typename T::iterator yaml::begin(T &);
448  template <class T> friend void yaml::skip(T &);
449 
450  void increment();
451 
452  iterator begin() { return yaml::begin(*this); }
453 
454  iterator end() { return iterator(); }
455 
456  void skip() override { yaml::skip(*this); }
457 
458  static inline bool classof(const Node *N) {
459  return N->getType() == NK_Sequence;
460  }
461 
462 private:
463  SequenceType SeqType;
464  bool IsAtBeginning;
465  bool IsAtEnd;
466  bool WasPreviousTokenFlowEntry;
467  Node *CurrentEntry;
468 };
469 
470 /// \brief Represents an alias to a Node with an anchor.
471 ///
472 /// Example:
473 /// *AnchorName
474 class AliasNode final : public Node {
475  void anchor() override;
476 
477 public:
478  AliasNode(std::unique_ptr<Document> &D, StringRef Val)
479  : Node(NK_Alias, D, StringRef(), StringRef()), Name(Val) {}
480 
481  StringRef getName() const { return Name; }
482  Node *getTarget();
483 
484  static inline bool classof(const Node *N) { return N->getType() == NK_Alias; }
485 
486 private:
487  StringRef Name;
488 };
489 
490 /// \brief A YAML Stream is a sequence of Documents. A document contains a root
491 /// node.
492 class Document {
493 public:
494  /// \brief Root for parsing a node. Returns a single node.
495  Node *parseBlockNode();
496 
497  Document(Stream &ParentStream);
498 
499  /// \brief Finish parsing the current document and return true if there are
500  /// more. Return false otherwise.
501  bool skip();
502 
503  /// \brief Parse and return the root level node.
505  if (Root)
506  return Root;
507  return Root = parseBlockNode();
508  }
509 
510  const std::map<StringRef, StringRef> &getTagMap() const { return TagMap; }
511 
512 private:
513  friend class Node;
514  friend class document_iterator;
515 
516  /// \brief Stream to read tokens from.
517  Stream &stream;
518 
519  /// \brief Used to allocate nodes to. All are destroyed without calling their
520  /// destructor when the document is destroyed.
521  BumpPtrAllocator NodeAllocator;
522 
523  /// \brief The root node. Used to support skipping a partially parsed
524  /// document.
525  Node *Root;
526 
527  /// \brief Maps tag prefixes to their expansion.
528  std::map<StringRef, StringRef> TagMap;
529 
530  Token &peekNext();
531  Token getNext();
532  void setError(const Twine &Message, Token &Location) const;
533  bool failed() const;
534 
535  /// \brief Parse %BLAH directives and return true if any were encountered.
536  bool parseDirectives();
537 
538  /// \brief Parse %YAML
539  void parseYAMLDirective();
540 
541  /// \brief Parse %TAG
542  void parseTAGDirective();
543 
544  /// \brief Consume the next token and error if it is not \a TK.
545  bool expectToken(int TK);
546 };
547 
548 /// \brief Iterator abstraction for Documents over a Stream.
550 public:
551  document_iterator() : Doc(nullptr) {}
552  document_iterator(std::unique_ptr<Document> &D) : Doc(&D) {}
553 
555  if (isAtEnd() || Other.isAtEnd())
556  return isAtEnd() && Other.isAtEnd();
557 
558  return Doc == Other.Doc;
559  }
560  bool operator!=(const document_iterator &Other) { return !(*this == Other); }
561 
563  assert(Doc && "incrementing iterator past the end.");
564  if (!(*Doc)->skip()) {
565  Doc->reset(nullptr);
566  } else {
567  Stream &S = (*Doc)->stream;
568  Doc->reset(new Document(S));
569  }
570  return *this;
571  }
572 
573  Document &operator*() { return *Doc->get(); }
574 
575  std::unique_ptr<Document> &operator->() { return *Doc; }
576 
577 private:
578  bool isAtEnd() const { return !Doc || !*Doc; }
579 
580  std::unique_ptr<Document> *Doc;
581 };
582 
583 } // End namespace yaml.
584 
585 } // End namespace llvm.
586 
587 #endif
static bool classof(const Node *N)
Definition: YAMLParser.h:400
document_iterator operator++()
Definition: YAMLParser.h:562
Represents a range in source code.
Definition: SMLoc.h:47
AliasNode(std::unique_ptr< Document > &D, StringRef Val)
Definition: YAMLParser.h:478
const std::map< StringRef, StringRef > & getTagMap() const
Definition: YAMLParser.h:510
std::unique_ptr< Document > & Doc
Definition: YAMLParser.h:157
std::string getVerbatimTag() const
Get the verbatium tag for a given Node.
SequenceNode(std::unique_ptr< Document > &D, StringRef Anchor, StringRef Tag, SequenceType ST)
Definition: YAMLParser.h:438
Node(unsigned int Type, std::unique_ptr< Document > &, StringRef Anchor, StringRef Tag)
Type::TypeID TypeID
bool scanTokens(StringRef Input)
Scans all tokens in input without outputting anything.
Definition: YAMLParser.cpp:685
This is an iterator abstraction over YAML collections shared by both sequences and maps...
Definition: YAMLParser.h:306
StringRef getValue(SmallVectorImpl< char > &Storage) const
Gets the value of this node as a StringRef.
StringRef getRawTag() const
Get the tag as it was written in the document.
Definition: YAMLParser.h:126
Represents a YAML sequence created from either a block sequence for a flow sequence.
Definition: YAMLParser.h:421
Node * getKey()
Parse and return the key.
KeyValueNode(std::unique_ptr< Document > &D)
Definition: YAMLParser.h:267
Node * parseBlockNode()
document_iterator begin()
Represents an alias to a Node with an anchor.
Definition: YAMLParser.h:474
void skip(CollectionType &C)
Definition: YAMLParser.h:358
static bool classof(const Node *N)
Definition: YAMLParser.h:458
static bool classof(const Node *N)
Definition: YAMLParser.h:484
document_iterator end()
This file defines the MallocAllocator and BumpPtrAllocator interfaces.
void setError(const Twine &Message, Token &Location) const
std::unique_ptr< Document > & operator->()
Definition: YAMLParser.h:575
StringRef getRawValue() const
Definition: YAMLParser.h:205
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition: Twine.h:79
StringRef getValue() const
Gets the value of this node as a StringRef.
Definition: YAMLParser.h:246
document_iterator(std::unique_ptr< Document > &D)
Definition: YAMLParser.h:552
static bool classof(const Node *N)
Definition: YAMLParser.h:248
BlockScalarNode(std::unique_ptr< Document > &D, StringRef Anchor, StringRef Tag, StringRef Value, StringRef RawVal)
Definition: YAMLParser.h:237
#define false
Definition: ConvertUTF.c:65
ELFYAML::ELF_STO Other
Definition: ELFYAML.cpp:591
ScalarNode(std::unique_ptr< Document > &D, StringRef Anchor, StringRef Tag, StringRef Val)
Definition: YAMLParser.h:194
The Input class is used to parse a yaml document into in-memory structs and vectors.
Definition: YAMLTraits.h:970
CollectionType::iterator begin(CollectionType &C)
Definition: YAMLParser.h:350
virtual void skip()
Definition: YAMLParser.h:143
A key and value pair.
Definition: YAMLParser.h:263
Node * getRoot()
Parse and return the root level node.
Definition: YAMLParser.h:504
basic_collection_iterator< SequenceNode, Node > iterator
Definition: YAMLParser.h:446
static bool classof(const Node *N)
Definition: YAMLParser.h:214
iterator begin() const
Definition: StringRef.h:90
bool failed() const
bool dumpTokens(StringRef Input, raw_ostream &)
Dump all the tokens in this stream to OS.
Definition: YAMLParser.cpp:601
#define true
Definition: ConvertUTF.c:66
The instances of the Type class are immutable: once they are created, they are never changed...
Definition: Type.h:45
void printError(Node *N, const Twine &Msg)
Allocate memory in an ever growing pool, as if by bump-pointer.
Definition: Allocator.h:135
std::string escape(StringRef Input)
Escape Input for a double quoted scalar.
Definition: YAMLParser.cpp:698
Document(Stream &ParentStream)
This owns the files read by a parser, handles include stacks, and handles diagnostic wrangling...
Definition: SourceMgr.h:35
void skip() override
Definition: YAMLParser.h:285
BumpPtrAllocator & getAllocator()
A scalar node is an opaque datum that can be presented as a series of zero or more Unicode scalar val...
Definition: YAMLParser.h:190
A null value.
Definition: YAMLParser.h:175
static bool classof(const Node *N)
Definition: YAMLParser.h:291
void setSourceRange(SMRange SR)
Definition: YAMLParser.h:133
Node * getValue()
Parse and return the value.
void skip() override
Definition: YAMLParser.h:398
bool skip()
Finish parsing the current document and return true if there are more.
Token & peekNext()
This class represents a YAML stream potentially containing multiple documents.
Definition: YAMLParser.h:76
StringRef getAnchor() const
Get the value of the anchor attached to this node.
Definition: YAMLParser.h:122
Stream(StringRef Input, SourceMgr &, bool ShowColors=true)
This keeps a reference to the string referenced by Input.
StringRef getName() const
Definition: YAMLParser.h:481
bool operator!=(const basic_collection_iterator &Other) const
Definition: YAMLParser.h:328
static SMLoc getFromPointer(const char *Ptr)
Definition: SMLoc.h:35
SMRange getSourceRange() const
Definition: YAMLParser.h:132
static bool classof(const Node *N)
Definition: YAMLParser.h:182
size_t size_type
Definition: StringRef.h:45
void skip() override
Definition: YAMLParser.h:456
#define N
Token - A single YAML token.
Definition: YAMLParser.cpp:111
A block scalar node is an opaque datum that can be presented as a series of zero or more Unicode scal...
Definition: YAMLParser.h:233
Represents a YAML map created from either a block map for a flow map.
Definition: YAMLParser.h:374
Iterator abstraction for Documents over a Stream.
Definition: YAMLParser.h:549
unsigned int getType() const
Definition: YAMLParser.h:145
basic_collection_iterator< MappingNode, KeyValueNode > iterator
Definition: YAMLParser.h:390
SMRange SourceRange
Definition: YAMLParser.h:158
LLVM Value Representation.
Definition: Value.h:69
NullNode(std::unique_ptr< Document > &D)
Definition: YAMLParser.h:179
iterator end() const
Definition: StringRef.h:92
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:40
bool operator==(const document_iterator &Other)
Definition: YAMLParser.h:554
basic_collection_iterator & operator++()
Definition: YAMLParser.h:335
Represents a location in source code.
Definition: SMLoc.h:23
bool operator!=(const document_iterator &Other)
Definition: YAMLParser.h:560
An inline mapping node is used for "[key: value]".
Definition: YAMLParser.h:381
Node * parseBlockNode()
Root for parsing a node. Returns a single node.
MappingNode(std::unique_ptr< Document > &D, StringRef Anchor, StringRef Tag, MappingType MT)
Definition: YAMLParser.h:384
A YAML Stream is a sequence of Documents.
Definition: YAMLParser.h:492
Abstract base class for all Nodes.
Definition: YAMLParser.h:103