LCOV - code coverage report
Current view: top level - include/llvm/Support - YAMLParser.h (source / functions) Hit Total Coverage
Test: llvm-toolchain.info Lines: 70 71 98.6 %
Date: 2017-09-14 15:23:50 Functions: 12 12 100.0 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : //===- YAMLParser.h - Simple YAML parser ------------------------*- C++ -*-===//
       2             : //
       3             : //                     The LLVM Compiler Infrastructure
       4             : //
       5             : // This file is distributed under the University of Illinois Open Source
       6             : // License. See LICENSE.TXT for details.
       7             : //
       8             : //===----------------------------------------------------------------------===//
       9             : //
      10             : //  This is a YAML 1.2 parser.
      11             : //
      12             : //  See http://www.yaml.org/spec/1.2/spec.html for the full standard.
      13             : //
      14             : //  This currently does not implement the following:
      15             : //    * Multi-line literal folding.
      16             : //    * Tag resolution.
      17             : //    * UTF-16.
      18             : //    * BOMs anywhere other than the first Unicode scalar value in the file.
      19             : //
      20             : //  The most important class here is Stream. This represents a YAML stream with
      21             : //  0, 1, or many documents.
      22             : //
      23             : //  SourceMgr sm;
      24             : //  StringRef input = getInput();
      25             : //  yaml::Stream stream(input, sm);
      26             : //
      27             : //  for (yaml::document_iterator di = stream.begin(), de = stream.end();
      28             : //       di != de; ++di) {
      29             : //    yaml::Node *n = di->getRoot();
      30             : //    if (n) {
      31             : //      // Do something with n...
      32             : //    } else
      33             : //      break;
      34             : //  }
      35             : //
      36             : //===----------------------------------------------------------------------===//
      37             : 
      38             : #ifndef LLVM_SUPPORT_YAMLPARSER_H
      39             : #define LLVM_SUPPORT_YAMLPARSER_H
      40             : 
      41             : #include "llvm/ADT/StringRef.h"
      42             : #include "llvm/Support/Allocator.h"
      43             : #include "llvm/Support/SMLoc.h"
      44             : #include <cassert>
      45             : #include <cstddef>
      46             : #include <iterator>
      47             : #include <map>
      48             : #include <memory>
      49             : #include <string>
      50             : #include <system_error>
      51             : 
      52             : namespace llvm {
      53             : 
      54             : class MemoryBufferRef;
      55             : class SourceMgr;
      56             : class raw_ostream;
      57             : class Twine;
      58             : 
      59             : namespace yaml {
      60             : 
      61             : class Document;
      62             : class document_iterator;
      63             : class Node;
      64             : class Scanner;
      65             : struct Token;
      66             : 
      67             : /// \brief Dump all the tokens in this stream to OS.
      68             : /// \returns true if there was an error, false otherwise.
      69             : bool dumpTokens(StringRef Input, raw_ostream &);
      70             : 
      71             : /// \brief Scans all tokens in input without outputting anything. This is used
      72             : ///        for benchmarking the tokenizer.
      73             : /// \returns true if there was an error, false otherwise.
      74             : bool scanTokens(StringRef Input);
      75             : 
      76             : /// \brief Escape \a Input for a double quoted scalar.
      77             : std::string escape(StringRef Input);
      78             : 
      79             : /// \brief This class represents a YAML stream potentially containing multiple
      80             : ///        documents.
      81        6756 : class Stream {
      82             : public:
      83             :   /// \brief This keeps a reference to the string referenced by \p Input.
      84             :   Stream(StringRef Input, SourceMgr &, bool ShowColors = true,
      85             :          std::error_code *EC = nullptr);
      86             : 
      87             :   Stream(MemoryBufferRef InputBuffer, SourceMgr &, bool ShowColors = true,
      88             :          std::error_code *EC = nullptr);
      89             :   ~Stream();
      90             : 
      91             :   document_iterator begin();
      92             :   document_iterator end();
      93             :   void skip();
      94             :   bool failed();
      95             : 
      96             :   bool validate() {
      97          52 :     skip();
      98          52 :     return !failed();
      99             :   }
     100             : 
     101             :   void printError(Node *N, const Twine &Msg);
     102             : 
     103             : private:
     104             :   friend class Document;
     105             : 
     106             :   std::unique_ptr<Scanner> scanner;
     107             :   std::unique_ptr<Document> CurrentDoc;
     108             : };
     109             : 
     110             : /// \brief Abstract base class for all Nodes.
     111             : class Node {
     112             :   virtual void anchor();
     113             : 
     114             : public:
     115             :   enum NodeKind {
     116             :     NK_Null,
     117             :     NK_Scalar,
     118             :     NK_BlockScalar,
     119             :     NK_KeyValue,
     120             :     NK_Mapping,
     121             :     NK_Sequence,
     122             :     NK_Alias
     123             :   };
     124             : 
     125             :   Node(unsigned int Type, std::unique_ptr<Document> &, StringRef Anchor,
     126             :        StringRef Tag);
     127             : 
     128             :   void *operator new(size_t Size, BumpPtrAllocator &Alloc,
     129             :                      size_t Alignment = 16) noexcept {
     130      622409 :     return Alloc.Allocate(Size, Alignment);
     131             :   }
     132             : 
     133             :   void operator delete(void *Ptr, BumpPtrAllocator &Alloc,
     134             :                        size_t Size) noexcept {
     135             :     Alloc.Deallocate(Ptr, Size);
     136             :   }
     137             : 
     138             :   void operator delete(void *) noexcept = delete;
     139             : 
     140             :   /// \brief Get the value of the anchor attached to this node. If it does not
     141             :   ///        have one, getAnchor().size() will be 0.
     142             :   StringRef getAnchor() const { return Anchor; }
     143             : 
     144             :   /// \brief Get the tag as it was written in the document. This does not
     145             :   ///   perform tag resolution.
     146             :   StringRef getRawTag() const { return Tag; }
     147             : 
     148             :   /// \brief Get the verbatium tag for a given Node. This performs tag resoluton
     149             :   ///   and substitution.
     150             :   std::string getVerbatimTag() const;
     151             : 
     152             :   SMRange getSourceRange() const { return SourceRange; }
     153             :   void setSourceRange(SMRange SR) { SourceRange = SR; }
     154             : 
     155             :   // These functions forward to Document and Scanner.
     156             :   Token &peekNext();
     157             :   Token getNext();
     158             :   Node *parseBlockNode();
     159             :   BumpPtrAllocator &getAllocator();
     160             :   void setError(const Twine &Message, Token &Location) const;
     161             :   bool failed() const;
     162             : 
     163      681246 :   virtual void skip() {}
     164             : 
     165             :   unsigned int getType() const { return TypeID; }
     166             : 
     167             : protected:
     168             :   std::unique_ptr<Document> &Doc;
     169             :   SMRange SourceRange;
     170             : 
     171             :   ~Node() = default;
     172             : 
     173             : private:
     174             :   unsigned int TypeID;
     175             :   StringRef Anchor;
     176             :   /// \brief The tag as typed in the document.
     177             :   StringRef Tag;
     178             : };
     179             : 
     180             : /// \brief A null value.
     181             : ///
     182             : /// Example:
     183             : ///   !!null null
     184             : class NullNode final : public Node {
     185             :   void anchor() override;
     186             : 
     187             : public:
     188         980 :   NullNode(std::unique_ptr<Document> &D)
     189         980 :       : Node(NK_Null, D, StringRef(), StringRef()) {}
     190             : 
     191        6363 :   static bool classof(const Node *N) { return N->getType() == NK_Null; }
     192             : };
     193             : 
     194             : /// \brief A scalar node is an opaque datum that can be presented as a
     195             : ///        series of zero or more Unicode scalar values.
     196             : ///
     197             : /// Example:
     198             : ///   Adena
     199             : class ScalarNode final : public Node {
     200             :   void anchor() override;
     201             : 
     202             : public:
     203             :   ScalarNode(std::unique_ptr<Document> &D, StringRef Anchor, StringRef Tag,
     204             :              StringRef Val)
     205      367968 :       : Node(NK_Scalar, D, Anchor, Tag), Value(Val) {
     206      735936 :     SMLoc Start = SMLoc::getFromPointer(Val.begin());
     207      735936 :     SMLoc End = SMLoc::getFromPointer(Val.end());
     208      367968 :     SourceRange = SMRange(Start, End);
     209             :   }
     210             : 
     211             :   // Return Value without any escaping or folding or other fun YAML stuff. This
     212             :   // is the exact bytes that are contained in the file (after conversion to
     213             :   // utf8).
     214             :   StringRef getRawValue() const { return Value; }
     215             : 
     216             :   /// \brief Gets the value of this node as a StringRef.
     217             :   ///
     218             :   /// \param Storage is used to store the content of the returned StringRef iff
     219             :   ///        it requires any modification from how it appeared in the source.
     220             :   ///        This happens with escaped characters and multi-line literals.
     221             :   StringRef getValue(SmallVectorImpl<char> &Storage) const;
     222             : 
     223             :   static bool classof(const Node *N) {
     224      706894 :     return N->getType() == NK_Scalar;
     225             :   }
     226             : 
     227             : private:
     228             :   StringRef Value;
     229             : 
     230             :   StringRef unescapeDoubleQuoted(StringRef UnquotedValue,
     231             :                                  StringRef::size_type Start,
     232             :                                  SmallVectorImpl<char> &Storage) const;
     233             : };
     234             : 
     235             : /// \brief A block scalar node is an opaque datum that can be presented as a
     236             : ///        series of zero or more Unicode scalar values.
     237             : ///
     238             : /// Example:
     239             : ///   |
     240             : ///     Hello
     241             : ///     World
     242             : class BlockScalarNode final : public Node {
     243             :   void anchor() override;
     244             : 
     245             : public:
     246             :   BlockScalarNode(std::unique_ptr<Document> &D, StringRef Anchor, StringRef Tag,
     247             :                   StringRef Value, StringRef RawVal)
     248        2897 :       : Node(NK_BlockScalar, D, Anchor, Tag), Value(Value) {
     249        5794 :     SMLoc Start = SMLoc::getFromPointer(RawVal.begin());
     250        5794 :     SMLoc End = SMLoc::getFromPointer(RawVal.end());
     251        2897 :     SourceRange = SMRange(Start, End);
     252             :   }
     253             : 
     254             :   /// \brief Gets the value of this node as a StringRef.
     255             :   StringRef getValue() const { return Value; }
     256             : 
     257             :   static bool classof(const Node *N) {
     258       49136 :     return N->getType() == NK_BlockScalar;
     259             :   }
     260             : 
     261             : private:
     262             :   StringRef Value;
     263             : };
     264             : 
     265             : /// \brief A key and value pair. While not technically a Node under the YAML
     266             : ///        representation graph, it is easier to treat them this way.
     267             : ///
     268             : /// TODO: Consider making this not a child of Node.
     269             : ///
     270             : /// Example:
     271             : ///   Section: .text
     272             : class KeyValueNode final : public Node {
     273             :   void anchor() override;
     274             : 
     275             : public:
     276      186221 :   KeyValueNode(std::unique_ptr<Document> &D)
     277      186221 :       : Node(NK_KeyValue, D, StringRef(), StringRef()) {}
     278             : 
     279             :   /// \brief Parse and return the key.
     280             :   ///
     281             :   /// This may be called multiple times.
     282             :   ///
     283             :   /// \returns The key, or nullptr if failed() == true.
     284             :   Node *getKey();
     285             : 
     286             :   /// \brief Parse and return the value.
     287             :   ///
     288             :   /// This may be called multiple times.
     289             :   ///
     290             :   /// \returns The value, or nullptr if failed() == true.
     291             :   Node *getValue();
     292             : 
     293      242629 :   void skip() override {
     294      242629 :     getKey()->skip();
     295      242629 :     if (Node *Val = getValue())
     296      242625 :       Val->skip();
     297      242629 :   }
     298             : 
     299             :   static bool classof(const Node *N) {
     300             :     return N->getType() == NK_KeyValue;
     301             :   }
     302             : 
     303             : private:
     304             :   Node *Key = nullptr;
     305             :   Node *Value = nullptr;
     306             : };
     307             : 
     308             : /// \brief This is an iterator abstraction over YAML collections shared by both
     309             : ///        sequences and maps.
     310             : ///
     311             : /// BaseT must have a ValueT* member named CurrentEntry and a member function
     312             : /// increment() which must set CurrentEntry to 0 to create an end iterator.
     313             : template <class BaseT, class ValueT>
     314             : class basic_collection_iterator
     315             :     : public std::iterator<std::input_iterator_tag, ValueT> {
     316             : public:
     317             :   basic_collection_iterator() = default;
     318       64312 :   basic_collection_iterator(BaseT *B) : Base(B) {}
     319             : 
     320             :   ValueT *operator->() const {
     321             :     assert(Base && Base->CurrentEntry && "Attempted to access end iterator!");
     322        6140 :     return Base->CurrentEntry;
     323             :   }
     324             : 
     325             :   ValueT &operator*() const {
     326             :     assert(Base && Base->CurrentEntry &&
     327             :            "Attempted to dereference end iterator!");
     328      241502 :     return *Base->CurrentEntry;
     329             :   }
     330             : 
     331             :   operator ValueT *() const {
     332             :     assert(Base && Base->CurrentEntry && "Attempted to access end iterator!");
     333         303 :     return Base->CurrentEntry;
     334             :   }
     335             : 
     336             :   /// Note on EqualityComparable:
     337             :   ///
     338             :   /// The iterator is not re-entrant,
     339             :   /// it is meant to be used for parsing YAML on-demand
     340             :   /// Once iteration started - it can point only to one entry at a time
     341             :   /// hence Base.CurrentEntry and Other.Base.CurrentEntry are equal
     342             :   /// iff Base and Other.Base are equal.
     343             :   bool operator==(const basic_collection_iterator &Other) const {
     344             :     if (Base && (Base == Other.Base)) {
     345             :       assert((Base->CurrentEntry == Other.Base->CurrentEntry)
     346             :              && "Equal Bases expected to point to equal Entries");
     347             :     }
     348             : 
     349             :     return Base == Other.Base;
     350             :   }
     351             : 
     352             :   bool operator!=(const basic_collection_iterator &Other) const {
     353           3 :     return !(Base == Other.Base);
     354             :   }
     355             : 
     356             :   basic_collection_iterator &operator++() {
     357             :     assert(Base && "Attempted to advance iterator past end!");
     358      308423 :     Base->increment();
     359             :     // Create an end iterator.
     360      308423 :     if (!Base->CurrentEntry)
     361       64222 :       Base = nullptr;
     362             :     return *this;
     363             :   }
     364             : 
     365             : private:
     366             :   BaseT *Base = nullptr;
     367             : };
     368             : 
     369             : // The following two templates are used for both MappingNode and Sequence Node.
     370             : template <class CollectionType>
     371             : typename CollectionType::iterator begin(CollectionType &C) {
     372             :   assert(C.IsAtBeginning && "You may only iterate over a collection once!");
     373       64312 :   C.IsAtBeginning = false;
     374       64312 :   typename CollectionType::iterator ret(&C);
     375          16 :   ++ret;
     376             :   return ret;
     377             : }
     378             : 
     379       85327 : template <class CollectionType> void skip(CollectionType &C) {
     380             :   // TODO: support skipping from the middle of a parsed collection ;/
     381             :   assert((C.IsAtBeginning || C.IsAtEnd) && "Cannot skip mid parse!");
     382       85327 :   if (C.IsAtBeginning)
     383         672 :     for (typename CollectionType::iterator i = begin(C), e = C.end(); i != e;
     384             :          ++i)
     385         472 :       i->skip();
     386       85327 : }
     387             : 
     388             : /// \brief Represents a YAML map created from either a block map for a flow map.
     389             : ///
     390             : /// This parses the YAML stream as increment() is called.
     391             : ///
     392             : /// Example:
     393             : ///   Name: _main
     394             : ///   Scope: Global
     395             : class MappingNode final : public Node {
     396             :   void anchor() override;
     397             : 
     398             : public:
     399             :   enum MappingType {
     400             :     MT_Block,
     401             :     MT_Flow,
     402             :     MT_Inline ///< An inline mapping node is used for "[key: value]".
     403             :   };
     404             : 
     405             :   MappingNode(std::unique_ptr<Document> &D, StringRef Anchor, StringRef Tag,
     406             :               MappingType MT)
     407       53981 :       : Node(NK_Mapping, D, Anchor, Tag), Type(MT) {}
     408             : 
     409             :   friend class basic_collection_iterator<MappingNode, KeyValueNode>;
     410             : 
     411             :   using iterator = basic_collection_iterator<MappingNode, KeyValueNode>;
     412             : 
     413             :   template <class T> friend typename T::iterator yaml::begin(T &);
     414             :   template <class T> friend void yaml::skip(T &);
     415             : 
     416           1 :   iterator begin() { return yaml::begin(*this); }
     417             : 
     418             :   iterator end() { return iterator(); }
     419             : 
     420       75069 :   void skip() override { yaml::skip(*this); }
     421             : 
     422             :   static bool classof(const Node *N) {
     423      366154 :     return N->getType() == NK_Mapping;
     424             :   }
     425             : 
     426             : private:
     427             :   MappingType Type;
     428             :   bool IsAtBeginning = true;
     429             :   bool IsAtEnd = false;
     430             :   KeyValueNode *CurrentEntry = nullptr;
     431             : 
     432             :   void increment();
     433             : };
     434             : 
     435             : /// \brief Represents a YAML sequence created from either a block sequence for a
     436             : ///        flow sequence.
     437             : ///
     438             : /// This parses the YAML stream as increment() is called.
     439             : ///
     440             : /// Example:
     441             : ///   - Hello
     442             : ///   - World
     443             : class SequenceNode final : public Node {
     444             :   void anchor() override;
     445             : 
     446             : public:
     447             :   enum SequenceType {
     448             :     ST_Block,
     449             :     ST_Flow,
     450             :     // Use for:
     451             :     //
     452             :     // key:
     453             :     // - val1
     454             :     // - val2
     455             :     //
     456             :     // As a BlockMappingEntry and BlockEnd are not created in this case.
     457             :     ST_Indentless
     458             :   };
     459             : 
     460             :   SequenceNode(std::unique_ptr<Document> &D, StringRef Anchor, StringRef Tag,
     461             :                SequenceType ST)
     462       10346 :       : Node(NK_Sequence, D, Anchor, Tag), SeqType(ST) {}
     463             : 
     464             :   friend class basic_collection_iterator<SequenceNode, Node>;
     465             : 
     466             :   using iterator = basic_collection_iterator<SequenceNode, Node>;
     467             : 
     468             :   template <class T> friend typename T::iterator yaml::begin(T &);
     469             :   template <class T> friend void yaml::skip(T &);
     470             : 
     471             :   void increment();
     472             : 
     473          15 :   iterator begin() { return yaml::begin(*this); }
     474             : 
     475           7 :   iterator end() { return iterator(); }
     476             : 
     477       10258 :   void skip() override { yaml::skip(*this); }
     478             : 
     479             :   static bool classof(const Node *N) {
     480      166848 :     return N->getType() == NK_Sequence;
     481             :   }
     482             : 
     483             : private:
     484             :   SequenceType SeqType;
     485             :   bool IsAtBeginning = true;
     486             :   bool IsAtEnd = false;
     487             :   bool WasPreviousTokenFlowEntry = true; // Start with an imaginary ','.
     488             :   Node *CurrentEntry = nullptr;
     489             : };
     490             : 
     491             : /// \brief Represents an alias to a Node with an anchor.
     492             : ///
     493             : /// Example:
     494             : ///   *AnchorName
     495             : class AliasNode final : public Node {
     496             :   void anchor() override;
     497             : 
     498             : public:
     499          16 :   AliasNode(std::unique_ptr<Document> &D, StringRef Val)
     500          16 :       : Node(NK_Alias, D, StringRef(), StringRef()), Name(Val) {}
     501             : 
     502             :   StringRef getName() const { return Name; }
     503             :   Node *getTarget();
     504             : 
     505          58 :   static bool classof(const Node *N) { return N->getType() == NK_Alias; }
     506             : 
     507             : private:
     508             :   StringRef Name;
     509             : };
     510             : 
     511             : /// \brief A YAML Stream is a sequence of Documents. A document contains a root
     512             : ///        node.
     513       11572 : class Document {
     514             : public:
     515             :   Document(Stream &ParentStream);
     516             : 
     517             :   /// \brief Root for parsing a node. Returns a single node.
     518             :   Node *parseBlockNode();
     519             : 
     520             :   /// \brief Finish parsing the current document and return true if there are
     521             :   ///        more. Return false otherwise.
     522             :   bool skip();
     523             : 
     524             :   /// \brief Parse and return the root level node.
     525             :   Node *getRoot() {
     526        5730 :     if (Root)
     527             :       return Root;
     528        5786 :     return Root = parseBlockNode();
     529             :   }
     530             : 
     531        2297 :   const std::map<StringRef, StringRef> &getTagMap() const { return TagMap; }
     532             : 
     533             : private:
     534             :   friend class Node;
     535             :   friend class document_iterator;
     536             : 
     537             :   /// \brief Stream to read tokens from.
     538             :   Stream &stream;
     539             : 
     540             :   /// \brief Used to allocate nodes to. All are destroyed without calling their
     541             :   ///        destructor when the document is destroyed.
     542             :   BumpPtrAllocator NodeAllocator;
     543             : 
     544             :   /// \brief The root node. Used to support skipping a partially parsed
     545             :   ///        document.
     546             :   Node *Root;
     547             : 
     548             :   /// \brief Maps tag prefixes to their expansion.
     549             :   std::map<StringRef, StringRef> TagMap;
     550             : 
     551             :   Token &peekNext();
     552             :   Token getNext();
     553             :   void setError(const Twine &Message, Token &Location) const;
     554             :   bool failed() const;
     555             : 
     556             :   /// \brief Parse %BLAH directives and return true if any were encountered.
     557             :   bool parseDirectives();
     558             : 
     559             :   /// \brief Parse %YAML
     560             :   void parseYAMLDirective();
     561             : 
     562             :   /// \brief Parse %TAG
     563             :   void parseTAGDirective();
     564             : 
     565             :   /// \brief Consume the next token and error if it is not \a TK.
     566             :   bool expectToken(int TK);
     567             : };
     568             : 
     569             : /// \brief Iterator abstraction for Documents over a Stream.
     570             : class document_iterator {
     571             : public:
     572        2972 :   document_iterator() = default;
     573             :   document_iterator(std::unique_ptr<Document> &D) : Doc(&D) {}
     574             : 
     575       11154 :   bool operator==(const document_iterator &Other) {
     576        8000 :     if (isAtEnd() || Other.isAtEnd())
     577             :       return isAtEnd() && Other.isAtEnd();
     578             : 
     579           0 :     return Doc == Other.Doc;
     580             :   }
     581       10991 :   bool operator!=(const document_iterator &Other) { return !(*this == Other); }
     582             : 
     583        4267 :   document_iterator operator++() {
     584             :     assert(Doc && "incrementing iterator past the end.");
     585        8534 :     if (!(*Doc)->skip()) {
     586        1861 :       Doc->reset(nullptr);
     587             :     } else {
     588        4812 :       Stream &S = (*Doc)->stream;
     589        2406 :       Doc->reset(new Document(S));
     590             :     }
     591        4267 :     return *this;
     592             :   }
     593             : 
     594         264 :   Document &operator*() { return *Doc->get(); }
     595             : 
     596        5649 :   std::unique_ptr<Document> &operator->() { return *Doc; }
     597             : 
     598             : private:
     599       52616 :   bool isAtEnd() const { return !Doc || !*Doc; }
     600             : 
     601             :   std::unique_ptr<Document> *Doc = nullptr;
     602             : };
     603             : 
     604             : } // end namespace yaml
     605             : 
     606             : } // end namespace llvm
     607             : 
     608             : #endif // LLVM_SUPPORT_YAMLPARSER_H

Generated by: LCOV version 1.13