LCOV - code coverage report
Current view: top level - include/llvm/Support - YAMLParser.h (source / functions) Hit Total Coverage
Test: llvm-toolchain.info Lines: 101 166 60.8 %
Date: 2018-10-20 13:21:21 Functions: 12 33 36.4 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : //===- YAMLParser.h - Simple YAML parser ------------------------*- C++ -*-===//
       2             : //
       3             : //                     The LLVM Compiler Infrastructure
       4             : //
       5             : // This file is distributed under the University of Illinois Open Source
       6             : // License. See LICENSE.TXT for details.
       7             : //
       8             : //===----------------------------------------------------------------------===//
       9             : //
      10             : //  This is a YAML 1.2 parser.
      11             : //
      12             : //  See http://www.yaml.org/spec/1.2/spec.html for the full standard.
      13             : //
      14             : //  This currently does not implement the following:
      15             : //    * Multi-line literal folding.
      16             : //    * Tag resolution.
      17             : //    * UTF-16.
      18             : //    * BOMs anywhere other than the first Unicode scalar value in the file.
      19             : //
      20             : //  The most important class here is Stream. This represents a YAML stream with
      21             : //  0, 1, or many documents.
      22             : //
      23             : //  SourceMgr sm;
      24             : //  StringRef input = getInput();
      25             : //  yaml::Stream stream(input, sm);
      26             : //
      27             : //  for (yaml::document_iterator di = stream.begin(), de = stream.end();
      28             : //       di != de; ++di) {
      29             : //    yaml::Node *n = di->getRoot();
      30             : //    if (n) {
      31             : //      // Do something with n...
      32             : //    } else
      33             : //      break;
      34             : //  }
      35             : //
      36             : //===----------------------------------------------------------------------===//
      37             : 
      38             : #ifndef LLVM_SUPPORT_YAMLPARSER_H
      39             : #define LLVM_SUPPORT_YAMLPARSER_H
      40             : 
      41             : #include "llvm/ADT/StringRef.h"
      42             : #include "llvm/Support/Allocator.h"
      43             : #include "llvm/Support/SMLoc.h"
      44             : #include <cassert>
      45             : #include <cstddef>
      46             : #include <iterator>
      47             : #include <map>
      48             : #include <memory>
      49             : #include <string>
      50             : #include <system_error>
      51             : 
      52             : namespace llvm {
      53             : 
      54             : class MemoryBufferRef;
      55             : class SourceMgr;
      56             : class raw_ostream;
      57             : class Twine;
      58             : 
      59             : namespace yaml {
      60             : 
      61             : class Document;
      62             : class document_iterator;
      63             : class Node;
      64             : class Scanner;
      65             : struct Token;
      66             : 
      67             : /// Dump all the tokens in this stream to OS.
      68             : /// \returns true if there was an error, false otherwise.
      69             : bool dumpTokens(StringRef Input, raw_ostream &);
      70             : 
      71             : /// Scans all tokens in input without outputting anything. This is used
      72             : ///        for benchmarking the tokenizer.
      73             : /// \returns true if there was an error, false otherwise.
      74             : bool scanTokens(StringRef Input);
      75             : 
      76             : /// Escape \a Input for a double quoted scalar; if \p EscapePrintable
      77             : /// is true, all UTF8 sequences will be escaped, if \p EscapePrintable is
      78             : /// false, those UTF8 sequences encoding printable unicode scalars will not be
      79             : /// escaped, but emitted verbatim.
      80             : std::string escape(StringRef Input, bool EscapePrintable = true);
      81             : 
      82             : /// This class represents a YAML stream potentially containing multiple
      83             : ///        documents.
      84        6291 : class Stream {
      85             : public:
      86             :   /// This keeps a reference to the string referenced by \p Input.
      87             :   Stream(StringRef Input, SourceMgr &, bool ShowColors = true,
      88             :          std::error_code *EC = nullptr);
      89             : 
      90             :   Stream(MemoryBufferRef InputBuffer, SourceMgr &, bool ShowColors = true,
      91             :          std::error_code *EC = nullptr);
      92             :   ~Stream();
      93             : 
      94             :   document_iterator begin();
      95             :   document_iterator end();
      96             :   void skip();
      97             :   bool failed();
      98             : 
      99             :   bool validate() {
     100          53 :     skip();
     101          53 :     return !failed();
     102             :   }
     103             : 
     104             :   void printError(Node *N, const Twine &Msg);
     105             : 
     106             : private:
     107             :   friend class Document;
     108             : 
     109             :   std::unique_ptr<Scanner> scanner;
     110             :   std::unique_ptr<Document> CurrentDoc;
     111             : };
     112             : 
     113             : /// Abstract base class for all Nodes.
     114             : class Node {
     115             :   virtual void anchor();
     116             : 
     117             : public:
     118             :   enum NodeKind {
     119             :     NK_Null,
     120             :     NK_Scalar,
     121             :     NK_BlockScalar,
     122             :     NK_KeyValue,
     123             :     NK_Mapping,
     124             :     NK_Sequence,
     125             :     NK_Alias
     126             :   };
     127             : 
     128             :   Node(unsigned int Type, std::unique_ptr<Document> &, StringRef Anchor,
     129             :        StringRef Tag);
     130             : 
     131             :   // It's not safe to copy YAML nodes; the document is streamed and the position
     132             :   // is part of the state.
     133             :   Node(const Node &) = delete;
     134             :   void operator=(const Node &) = delete;
     135             : 
     136             :   void *operator new(size_t Size, BumpPtrAllocator &Alloc,
     137             :                      size_t Alignment = 16) noexcept {
     138      998782 :     return Alloc.Allocate(Size, Alignment);
     139             :   }
     140             : 
     141             :   void operator delete(void *Ptr, BumpPtrAllocator &Alloc,
     142             :                        size_t Size) noexcept {
     143             :     Alloc.Deallocate(Ptr, Size);
     144             :   }
     145             : 
     146             :   void operator delete(void *) noexcept = delete;
     147             : 
     148             :   /// Get the value of the anchor attached to this node. If it does not
     149             :   ///        have one, getAnchor().size() will be 0.
     150           0 :   StringRef getAnchor() const { return Anchor; }
     151             : 
     152             :   /// Get the tag as it was written in the document. This does not
     153             :   ///   perform tag resolution.
     154           0 :   StringRef getRawTag() const { return Tag; }
     155             : 
     156             :   /// Get the verbatium tag for a given Node. This performs tag resoluton
     157             :   ///   and substitution.
     158             :   std::string getVerbatimTag() const;
     159             : 
     160           0 :   SMRange getSourceRange() const { return SourceRange; }
     161             :   void setSourceRange(SMRange SR) { SourceRange = SR; }
     162             : 
     163             :   // These functions forward to Document and Scanner.
     164             :   Token &peekNext();
     165             :   Token getNext();
     166             :   Node *parseBlockNode();
     167             :   BumpPtrAllocator &getAllocator();
     168             :   void setError(const Twine &Message, Token &Location) const;
     169             :   bool failed() const;
     170             : 
     171     1137898 :   virtual void skip() {}
     172             : 
     173           0 :   unsigned int getType() const { return TypeID; }
     174             : 
     175             : protected:
     176             :   std::unique_ptr<Document> &Doc;
     177             :   SMRange SourceRange;
     178             : 
     179             :   ~Node() = default;
     180             : 
     181             : private:
     182             :   unsigned int TypeID;
     183             :   StringRef Anchor;
     184             :   /// The tag as typed in the document.
     185             :   StringRef Tag;
     186             : };
     187             : 
     188             : /// A null value.
     189             : ///
     190             : /// Example:
     191             : ///   !!null null
     192             : class NullNode final : public Node {
     193             :   void anchor() override;
     194             : 
     195             : public:
     196        3163 :   NullNode(std::unique_ptr<Document> &D)
     197        3163 :       : Node(NK_Null, D, StringRef(), StringRef()) {}
     198             : 
     199        8407 :   static bool classof(const Node *N) { return N->getType() == NK_Null; }
     200             : };
     201             : 
     202             : /// A scalar node is an opaque datum that can be presented as a
     203             : ///        series of zero or more Unicode scalar values.
     204             : ///
     205             : /// Example:
     206             : ///   Adena
     207             : class ScalarNode final : public Node {
     208             :   void anchor() override;
     209             : 
     210             : public:
     211             :   ScalarNode(std::unique_ptr<Document> &D, StringRef Anchor, StringRef Tag,
     212             :              StringRef Val)
     213      586096 :       : Node(NK_Scalar, D, Anchor, Tag), Value(Val) {
     214             :     SMLoc Start = SMLoc::getFromPointer(Val.begin());
     215             :     SMLoc End = SMLoc::getFromPointer(Val.end());
     216      586096 :     SourceRange = SMRange(Start, End);
     217             :   }
     218             : 
     219             :   // Return Value without any escaping or folding or other fun YAML stuff. This
     220             :   // is the exact bytes that are contained in the file (after conversion to
     221             :   // utf8).
     222           0 :   StringRef getRawValue() const { return Value; }
     223             : 
     224             :   /// Gets the value of this node as a StringRef.
     225             :   ///
     226             :   /// \param Storage is used to store the content of the returned StringRef iff
     227             :   ///        it requires any modification from how it appeared in the source.
     228             :   ///        This happens with escaped characters and multi-line literals.
     229             :   StringRef getValue(SmallVectorImpl<char> &Storage) const;
     230             : 
     231             :   static bool classof(const Node *N) {
     232     1127268 :     return N->getType() == NK_Scalar;
     233             :   }
     234             : 
     235             : private:
     236             :   StringRef Value;
     237             : 
     238             :   StringRef unescapeDoubleQuoted(StringRef UnquotedValue,
     239             :                                  StringRef::size_type Start,
     240             :                                  SmallVectorImpl<char> &Storage) const;
     241             : };
     242             : 
     243             : /// A block scalar node is an opaque datum that can be presented as a
     244             : ///        series of zero or more Unicode scalar values.
     245             : ///
     246             : /// Example:
     247             : ///   |
     248             : ///     Hello
     249             : ///     World
     250             : class BlockScalarNode final : public Node {
     251             :   void anchor() override;
     252             : 
     253             : public:
     254             :   BlockScalarNode(std::unique_ptr<Document> &D, StringRef Anchor, StringRef Tag,
     255             :                   StringRef Value, StringRef RawVal)
     256        5548 :       : Node(NK_BlockScalar, D, Anchor, Tag), Value(Value) {
     257             :     SMLoc Start = SMLoc::getFromPointer(RawVal.begin());
     258             :     SMLoc End = SMLoc::getFromPointer(RawVal.end());
     259        5548 :     SourceRange = SMRange(Start, End);
     260             :   }
     261             : 
     262             :   /// Gets the value of this node as a StringRef.
     263           0 :   StringRef getValue() const { return Value; }
     264             : 
     265             :   static bool classof(const Node *N) {
     266        1256 :     return N->getType() == NK_BlockScalar;
     267             :   }
     268             : 
     269             : private:
     270             :   StringRef Value;
     271             : };
     272             : 
     273             : /// A key and value pair. While not technically a Node under the YAML
     274             : ///        representation graph, it is easier to treat them this way.
     275             : ///
     276             : /// TODO: Consider making this not a child of Node.
     277             : ///
     278             : /// Example:
     279             : ///   Section: .text
     280             : class KeyValueNode final : public Node {
     281             :   void anchor() override;
     282             : 
     283             : public:
     284      299006 :   KeyValueNode(std::unique_ptr<Document> &D)
     285      299006 :       : Node(NK_KeyValue, D, StringRef(), StringRef()) {}
     286             : 
     287             :   /// Parse and return the key.
     288             :   ///
     289             :   /// This may be called multiple times.
     290             :   ///
     291             :   /// \returns The key, or nullptr if failed() == true.
     292             :   Node *getKey();
     293             : 
     294             :   /// Parse and return the value.
     295             :   ///
     296             :   /// This may be called multiple times.
     297             :   ///
     298             :   /// \returns The value, or nullptr if failed() == true.
     299             :   Node *getValue();
     300             : 
     301      415192 :   void skip() override {
     302      415192 :     if (Node *Key = getKey()) {
     303      415191 :       Key->skip();
     304      415191 :       if (Node *Val = getValue())
     305      415187 :         Val->skip();
     306             :     }
     307      415192 :   }
     308             : 
     309             :   static bool classof(const Node *N) {
     310             :     return N->getType() == NK_KeyValue;
     311             :   }
     312             : 
     313             : private:
     314             :   Node *Key = nullptr;
     315             :   Node *Value = nullptr;
     316             : };
     317             : 
     318             : /// This is an iterator abstraction over YAML collections shared by both
     319             : ///        sequences and maps.
     320             : ///
     321             : /// BaseT must have a ValueT* member named CurrentEntry and a member function
     322             : /// increment() which must set CurrentEntry to 0 to create an end iterator.
     323             : template <class BaseT, class ValueT>
     324             : class basic_collection_iterator
     325             :     : public std::iterator<std::input_iterator_tag, ValueT> {
     326             : public:
     327             :   basic_collection_iterator() = default;
     328             :   basic_collection_iterator(BaseT *B) : Base(B) {}
     329             : 
     330           0 :   ValueT *operator->() const {
     331             :     assert(Base && Base->CurrentEntry && "Attempted to access end iterator!");
     332        5718 :     return Base->CurrentEntry;
     333             :   }
     334           0 : 
     335           0 :   ValueT &operator*() const {
     336           0 :     assert(Base && Base->CurrentEntry &&
     337             :            "Attempted to dereference end iterator!");
     338      343337 :     return *Base->CurrentEntry;
     339             :   }
     340           0 : 
     341           0 :   operator ValueT *() const {
     342             :     assert(Base && Base->CurrentEntry && "Attempted to access end iterator!");
     343         303 :     return Base->CurrentEntry;
     344             :   }
     345           0 : 
     346             :   /// Note on EqualityComparable:
     347             :   ///
     348           0 :   /// The iterator is not re-entrant,
     349             :   /// it is meant to be used for parsing YAML on-demand
     350             :   /// Once iteration started - it can point only to one entry at a time
     351             :   /// hence Base.CurrentEntry and Other.Base.CurrentEntry are equal
     352             :   /// iff Base and Other.Base are equal.
     353           0 :   bool operator==(const basic_collection_iterator &Other) const {
     354           0 :     if (Base && (Base == Other.Base)) {
     355             :       assert((Base->CurrentEntry == Other.Base->CurrentEntry)
     356             :              && "Equal Bases expected to point to equal Entries");
     357             :     }
     358             : 
     359           0 :     return Base == Other.Base;
     360             :   }
     361             : 
     362           0 :   bool operator!=(const basic_collection_iterator &Other) const {
     363           3 :     return !(Base == Other.Base);
     364           0 :   }
     365           0 : 
     366           0 :   basic_collection_iterator &operator++() {
     367             :     assert(Base && "Attempted to advance iterator past end!");
     368          86 :     Base->increment();
     369           0 :     // Create an end iterator.
     370         131 :     if (!Base->CurrentEntry)
     371           0 :       Base = nullptr;
     372           0 :     return *this;
     373           0 :   }
     374         679 : 
     375           0 : private:
     376         977 :   BaseT *Base = nullptr;
     377           0 : };
     378           0 : 
     379           0 : // The following two templates are used for both MappingNode and Sequence Node.
     380           0 : template <class CollectionType>
     381             : typename CollectionType::iterator begin(CollectionType &C) {
     382        4947 :   assert(C.IsAtBeginning && "You may only iterate over a collection once!");
     383          45 :   C.IsAtBeginning = false;
     384      391103 :   typename CollectionType::iterator ret(&C);
     385           0 :   ++ret;
     386      486255 :   return ret;
     387             : }
     388           0 : 
     389         298 : template <class CollectionType> void skip(CollectionType &C) {
     390             :   // TODO: support skipping from the middle of a parsed collection ;/
     391           0 :   assert((C.IsAtBeginning || C.IsAtEnd) && "Cannot skip mid parse!");
     392           0 :   if (C.IsAtBeginning)
     393             :     for (typename CollectionType::iterator i = begin(C), e = C.end(); i != e;
     394           0 :          ++i)
     395           0 :       i->skip();
     396             : }
     397        1875 : 
     398             : /// Represents a YAML map created from either a block map for a flow map.
     399      101974 : ///
     400           0 : /// This parses the YAML stream as increment() is called.
     401             : ///
     402           0 : /// Example:
     403      151055 : ///   Name: _main
     404             : ///   Scope: Global
     405             : class MappingNode final : public Node {
     406      151055 :   void anchor() override;
     407        6822 : 
     408             : public:
     409        4947 :   enum MappingType {
     410      151055 :     MT_Block,
     411       14403 :     MT_Flow,
     412             :     MT_Inline ///< An inline mapping node is used for "[key: value]".
     413             :   };
     414       14403 : 
     415        1381 :   MappingNode(std::unique_ptr<Document> &D, StringRef Anchor, StringRef Tag,
     416             :               MappingType MT)
     417         709 :       : Node(NK_Mapping, D, Anchor, Tag), Type(MT) {}
     418       14403 : 
     419      136652 :   friend class basic_collection_iterator<MappingNode, KeyValueNode>;
     420             : 
     421             :   using iterator = basic_collection_iterator<MappingNode, KeyValueNode>;
     422      136652 : 
     423        5441 :   template <class T> friend typename T::iterator yaml::begin(T &);
     424             :   template <class T> friend void yaml::skip(T &);
     425        4238 : 
     426      136652 :   iterator begin() { return yaml::begin(*this); }
     427             : 
     428           0 :   iterator end() { return iterator(); }
     429             : 
     430             :   void skip() override { yaml::skip(*this); }
     431             : 
     432             :   static bool classof(const Node *N) {
     433          28 :     return N->getType() == NK_Mapping;
     434           0 :   }
     435             : 
     436             : private:
     437             :   MappingType Type;
     438             :   bool IsAtBeginning = true;
     439           0 :   bool IsAtEnd = false;
     440             :   KeyValueNode *CurrentEntry = nullptr;
     441             : 
     442             :   void increment();
     443             : };
     444           0 : 
     445             : /// Represents a YAML sequence created from either a block sequence for a
     446             : ///        flow sequence.
     447       91037 : ///
     448             : /// This parses the YAML stream as increment() is called.
     449      461151 : ///
     450             : /// Example:
     451             : ///   - Hello
     452             : ///   - World
     453             : class SequenceNode final : public Node {
     454             :   void anchor() override;
     455             : 
     456             : public:
     457             :   enum SequenceType {
     458           0 :     ST_Block,
     459             :     ST_Flow,
     460      136652 :     // Use for:
     461             :     //
     462             :     // key:
     463             :     // - val1
     464             :     // - val2
     465           0 :     //
     466             :     // As a BlockMappingEntry and BlockEnd are not created in this case.
     467             :     ST_Indentless
     468             :   };
     469             : 
     470             :   SequenceNode(std::unique_ptr<Document> &D, StringRef Anchor, StringRef Tag,
     471             :                SequenceType ST)
     472             :       : Node(NK_Sequence, D, Anchor, Tag), SeqType(ST) {}
     473             : 
     474             :   friend class basic_collection_iterator<SequenceNode, Node>;
     475             : 
     476             :   using iterator = basic_collection_iterator<SequenceNode, Node>;
     477             : 
     478             :   template <class T> friend typename T::iterator yaml::begin(T &);
     479             :   template <class T> friend void yaml::skip(T &);
     480             : 
     481             :   void increment();
     482             : 
     483             :   iterator begin() { return yaml::begin(*this); }
     484             : 
     485           0 :   iterator end() { return iterator(); }
     486             : 
     487             :   void skip() override { yaml::skip(*this); }
     488             : 
     489             :   static bool classof(const Node *N) {
     490          16 :     return N->getType() == NK_Sequence;
     491           0 :   }
     492             : 
     493             : private:
     494             :   SequenceType SeqType;
     495             :   bool IsAtBeginning = true;
     496           0 :   bool IsAtEnd = false;
     497             :   bool WasPreviousTokenFlowEntry = true; // Start with an imaginary ','.
     498             :   Node *CurrentEntry = nullptr;
     499             : };
     500             : 
     501           0 : /// Represents an alias to a Node with an anchor.
     502       13916 : ///
     503             : /// Example:
     504             : ///   *AnchorName
     505             : class AliasNode final : public Node {
     506       99590 :   void anchor() override;
     507             : 
     508             : public:
     509             :   AliasNode(std::unique_ptr<Document> &D, StringRef Val)
     510             :       : Node(NK_Alias, D, StringRef(), StringRef()), Name(Val) {}
     511             : 
     512             :   StringRef getName() const { return Name; }
     513             :   Node *getTarget();
     514             : 
     515           0 :   static bool classof(const Node *N) { return N->getType() == NK_Alias; }
     516             : 
     517       14403 : private:
     518           0 :   StringRef Name;
     519             : };
     520             : 
     521           0 : /// A YAML Stream is a sequence of Documents. A document contains a root
     522           0 : ///        node.
     523           0 : class Document {
     524             : public:
     525             :   Document(Stream &ParentStream);
     526             : 
     527             :   /// Root for parsing a node. Returns a single node.
     528             :   Node *parseBlockNode();
     529           0 : 
     530             :   /// Finish parsing the current document and return true if there are
     531             :   ///        more. Return false otherwise.
     532             :   bool skip();
     533             : 
     534             :   /// Parse and return the root level node.
     535             :   Node *getRoot() {
     536          23 :     if (Root)
     537             :       return Root;
     538          21 :     return Root = parseBlockNode();
     539          16 :   }
     540          16 : 
     541             :   const std::map<StringRef, StringRef> &getTagMap() const { return TagMap; }
     542         191 : 
     543             : private:
     544         191 :   friend class Node;
     545             :   friend class document_iterator;
     546             : 
     547             :   /// Stream to read tokens from.
     548             :   Stream &stream;
     549             : 
     550             :   /// Used to allocate nodes to. All are destroyed without calling their
     551             :   ///        destructor when the document is destroyed.
     552        8659 :   BumpPtrAllocator NodeAllocator;
     553           0 : 
     554        8654 :   /// The root node. Used to support skipping a partially parsed
     555             :   ///        document.
     556             :   Node *Root;
     557             : 
     558             :   /// Maps tag prefixes to their expansion.
     559             :   std::map<StringRef, StringRef> TagMap;
     560             : 
     561             :   Token &peekNext();
     562             :   Token getNext();
     563             :   void setError(const Twine &Message, Token &Location) const;
     564             :   bool failed() const;
     565             : 
     566             :   /// Parse %BLAH directives and return true if any were encountered.
     567             :   bool parseDirectives();
     568         580 : 
     569             :   /// Parse %YAML
     570           0 :   void parseYAMLDirective();
     571             : 
     572             :   /// Parse %TAG
     573             :   void parseTAGDirective();
     574             : 
     575             :   /// Consume the next token and error if it is not \a TK.
     576             :   bool expectToken(int TK);
     577             : };
     578             : 
     579             : /// Iterator abstraction for Documents over a Stream.
     580             : class document_iterator {
     581             : public:
     582             :   document_iterator() = default;
     583             :   document_iterator(std::unique_ptr<Document> &D) : Doc(&D) {}
     584             : 
     585           4 :   bool operator==(const document_iterator &Other) const {
     586           6 :     if (isAtEnd() || Other.isAtEnd())
     587           2 :       return isAtEnd() && Other.isAtEnd();
     588             : 
     589           0 :     return Doc == Other.Doc;
     590             :   }
     591         355 :   bool operator!=(const document_iterator &Other) const {
     592         550 :     return !(*this == Other);
     593         164 :   }
     594             : 
     595           2 :   document_iterator operator++() {
     596             :     assert(Doc && "incrementing iterator past the end.");
     597           4 :     if (!(*Doc)->skip()) {
     598        4369 :       Doc->reset(nullptr);
     599             :     } else {
     600           0 :       Stream &S = (*Doc)->stream;
     601       17999 :       Doc->reset(new Document(S));
     602       31423 :     }
     603        4577 :     return *this;
     604         164 :   }
     605           0 : 
     606          22 :   Document &operator*() { return *Doc->get(); }
     607          22 : 
     608       10737 :   std::unique_ptr<Document> &operator->() { return *Doc; }
     609         186 : 
     610             : private:
     611        7407 :   bool isAtEnd() const { return !Doc || !*Doc; }
     612             : 
     613       14806 :   std::unique_ptr<Document> *Doc = nullptr;
     614        2357 : };
     615         107 : 
     616        5207 : } // end namespace yaml
     617        5454 : 
     618           0 : } // end namespace llvm
     619        7403 : 
     620             : #endif // LLVM_SUPPORT_YAMLPARSER_H

Generated by: LCOV version 1.13