LLVM API Documentation
00001 //===--- YAMLParser.h - Simple YAML parser --------------------------------===// 00002 // 00003 // The LLVM Compiler Infrastructure 00004 // 00005 // This file is distributed under the University of Illinois Open Source 00006 // License. See LICENSE.TXT for details. 00007 // 00008 //===----------------------------------------------------------------------===// 00009 // 00010 // This is a YAML 1.2 parser. 00011 // 00012 // See http://www.yaml.org/spec/1.2/spec.html for the full standard. 00013 // 00014 // This currently does not implement the following: 00015 // * Multi-line literal folding. 00016 // * Tag resolution. 00017 // * UTF-16. 00018 // * BOMs anywhere other than the first Unicode scalar value in the file. 00019 // 00020 // The most important class here is Stream. This represents a YAML stream with 00021 // 0, 1, or many documents. 00022 // 00023 // SourceMgr sm; 00024 // StringRef input = getInput(); 00025 // yaml::Stream stream(input, sm); 00026 // 00027 // for (yaml::document_iterator di = stream.begin(), de = stream.end(); 00028 // di != de; ++di) { 00029 // yaml::Node *n = di->getRoot(); 00030 // if (n) { 00031 // // Do something with n... 00032 // } else 00033 // break; 00034 // } 00035 // 00036 //===----------------------------------------------------------------------===// 00037 00038 #ifndef LLVM_SUPPORT_YAMLPARSER_H 00039 #define LLVM_SUPPORT_YAMLPARSER_H 00040 00041 #include "llvm/ADT/OwningPtr.h" 00042 #include "llvm/ADT/SmallString.h" 00043 #include "llvm/ADT/StringRef.h" 00044 #include "llvm/Support/Allocator.h" 00045 #include "llvm/Support/SMLoc.h" 00046 #include <limits> 00047 #include <utility> 00048 00049 namespace llvm { 00050 class MemoryBuffer; 00051 class SourceMgr; 00052 class raw_ostream; 00053 class Twine; 00054 00055 namespace yaml { 00056 00057 class document_iterator; 00058 class Document; 00059 class Node; 00060 class Scanner; 00061 struct Token; 00062 00063 /// @brief Dump all the tokens in this stream to OS. 00064 /// @returns true if there was an error, false otherwise. 00065 bool dumpTokens(StringRef Input, raw_ostream &); 00066 00067 /// @brief Scans all tokens in input without outputting anything. This is used 00068 /// for benchmarking the tokenizer. 00069 /// @returns true if there was an error, false otherwise. 00070 bool scanTokens(StringRef Input); 00071 00072 /// @brief Escape \a Input for a double quoted scalar. 00073 std::string escape(StringRef Input); 00074 00075 /// @brief This class represents a YAML stream potentially containing multiple 00076 /// documents. 00077 class Stream { 00078 public: 00079 /// @brief This keeps a reference to the string referenced by \p Input. 00080 Stream(StringRef Input, SourceMgr &); 00081 00082 /// @brief This takes ownership of \p InputBuffer. 00083 Stream(MemoryBuffer *InputBuffer, SourceMgr &); 00084 ~Stream(); 00085 00086 document_iterator begin(); 00087 document_iterator end(); 00088 void skip(); 00089 bool failed(); 00090 bool validate() { 00091 skip(); 00092 return !failed(); 00093 } 00094 00095 void printError(Node *N, const Twine &Msg); 00096 00097 private: 00098 OwningPtr<Scanner> scanner; 00099 OwningPtr<Document> CurrentDoc; 00100 00101 friend class Document; 00102 00103 /// @brief Validate a %YAML x.x directive. 00104 void handleYAMLDirective(const Token &); 00105 }; 00106 00107 /// @brief Abstract base class for all Nodes. 00108 class Node { 00109 public: 00110 enum NodeKind { 00111 NK_Null, 00112 NK_Scalar, 00113 NK_KeyValue, 00114 NK_Mapping, 00115 NK_Sequence, 00116 NK_Alias 00117 }; 00118 00119 Node(unsigned int Type, OwningPtr<Document>&, StringRef Anchor); 00120 00121 /// @brief Get the value of the anchor attached to this node. If it does not 00122 /// have one, getAnchor().size() will be 0. 00123 StringRef getAnchor() const { return Anchor; } 00124 00125 SMRange getSourceRange() const { return SourceRange; } 00126 void setSourceRange(SMRange SR) { SourceRange = SR; } 00127 00128 // These functions forward to Document and Scanner. 00129 Token &peekNext(); 00130 Token getNext(); 00131 Node *parseBlockNode(); 00132 BumpPtrAllocator &getAllocator(); 00133 void setError(const Twine &Message, Token &Location) const; 00134 bool failed() const; 00135 00136 virtual void skip() {} 00137 00138 unsigned int getType() const { return TypeID; } 00139 00140 void *operator new ( size_t Size 00141 , BumpPtrAllocator &Alloc 00142 , size_t Alignment = 16) throw() { 00143 return Alloc.Allocate(Size, Alignment); 00144 } 00145 00146 void operator delete(void *Ptr, BumpPtrAllocator &Alloc, size_t) throw() { 00147 Alloc.Deallocate(Ptr); 00148 } 00149 00150 protected: 00151 OwningPtr<Document> &Doc; 00152 SMRange SourceRange; 00153 00154 void operator delete(void *) throw() {} 00155 00156 virtual ~Node() {} 00157 00158 private: 00159 unsigned int TypeID; 00160 StringRef Anchor; 00161 }; 00162 00163 /// @brief A null value. 00164 /// 00165 /// Example: 00166 /// !!null null 00167 class NullNode : public Node { 00168 public: 00169 NullNode(OwningPtr<Document> &D) : Node(NK_Null, D, StringRef()) {} 00170 00171 static inline bool classof(const Node *N) { 00172 return N->getType() == NK_Null; 00173 } 00174 }; 00175 00176 /// @brief A scalar node is an opaque datum that can be presented as a 00177 /// series of zero or more Unicode scalar values. 00178 /// 00179 /// Example: 00180 /// Adena 00181 class ScalarNode : public Node { 00182 public: 00183 ScalarNode(OwningPtr<Document> &D, StringRef Anchor, StringRef Val) 00184 : Node(NK_Scalar, D, Anchor) 00185 , Value(Val) { 00186 SMLoc Start = SMLoc::getFromPointer(Val.begin()); 00187 SMLoc End = SMLoc::getFromPointer(Val.end()); 00188 SourceRange = SMRange(Start, End); 00189 } 00190 00191 // Return Value without any escaping or folding or other fun YAML stuff. This 00192 // is the exact bytes that are contained in the file (after conversion to 00193 // utf8). 00194 StringRef getRawValue() const { return Value; } 00195 00196 /// @brief Gets the value of this node as a StringRef. 00197 /// 00198 /// @param Storage is used to store the content of the returned StringRef iff 00199 /// it requires any modification from how it appeared in the source. 00200 /// This happens with escaped characters and multi-line literals. 00201 StringRef getValue(SmallVectorImpl<char> &Storage) const; 00202 00203 static inline bool classof(const Node *N) { 00204 return N->getType() == NK_Scalar; 00205 } 00206 00207 private: 00208 StringRef Value; 00209 00210 StringRef unescapeDoubleQuoted( StringRef UnquotedValue 00211 , StringRef::size_type Start 00212 , SmallVectorImpl<char> &Storage) const; 00213 }; 00214 00215 /// @brief A key and value pair. While not technically a Node under the YAML 00216 /// representation graph, it is easier to treat them this way. 00217 /// 00218 /// TODO: Consider making this not a child of Node. 00219 /// 00220 /// Example: 00221 /// Section: .text 00222 class KeyValueNode : public Node { 00223 public: 00224 KeyValueNode(OwningPtr<Document> &D) 00225 : Node(NK_KeyValue, D, StringRef()) 00226 , Key(0) 00227 , Value(0) 00228 {} 00229 00230 /// @brief Parse and return the key. 00231 /// 00232 /// This may be called multiple times. 00233 /// 00234 /// @returns The key, or nullptr if failed() == true. 00235 Node *getKey(); 00236 00237 /// @brief Parse and return the value. 00238 /// 00239 /// This may be called multiple times. 00240 /// 00241 /// @returns The value, or nullptr if failed() == true. 00242 Node *getValue(); 00243 00244 virtual void skip() LLVM_OVERRIDE { 00245 getKey()->skip(); 00246 getValue()->skip(); 00247 } 00248 00249 static inline bool classof(const Node *N) { 00250 return N->getType() == NK_KeyValue; 00251 } 00252 00253 private: 00254 Node *Key; 00255 Node *Value; 00256 }; 00257 00258 /// @brief This is an iterator abstraction over YAML collections shared by both 00259 /// sequences and maps. 00260 /// 00261 /// BaseT must have a ValueT* member named CurrentEntry and a member function 00262 /// increment() which must set CurrentEntry to 0 to create an end iterator. 00263 template <class BaseT, class ValueT> 00264 class basic_collection_iterator 00265 : public std::iterator<std::forward_iterator_tag, ValueT> { 00266 public: 00267 basic_collection_iterator() : Base(0) {} 00268 basic_collection_iterator(BaseT *B) : Base(B) {} 00269 00270 ValueT *operator ->() const { 00271 assert(Base && Base->CurrentEntry && "Attempted to access end iterator!"); 00272 return Base->CurrentEntry; 00273 } 00274 00275 ValueT &operator *() const { 00276 assert(Base && Base->CurrentEntry && 00277 "Attempted to dereference end iterator!"); 00278 return *Base->CurrentEntry; 00279 } 00280 00281 operator ValueT*() const { 00282 assert(Base && Base->CurrentEntry && "Attempted to access end iterator!"); 00283 return Base->CurrentEntry; 00284 } 00285 00286 bool operator !=(const basic_collection_iterator &Other) const { 00287 if(Base != Other.Base) 00288 return true; 00289 return (Base && Other.Base) && Base->CurrentEntry 00290 != Other.Base->CurrentEntry; 00291 } 00292 00293 basic_collection_iterator &operator++() { 00294 assert(Base && "Attempted to advance iterator past end!"); 00295 Base->increment(); 00296 // Create an end iterator. 00297 if (Base->CurrentEntry == 0) 00298 Base = 0; 00299 return *this; 00300 } 00301 00302 private: 00303 BaseT *Base; 00304 }; 00305 00306 // The following two templates are used for both MappingNode and Sequence Node. 00307 template <class CollectionType> 00308 typename CollectionType::iterator begin(CollectionType &C) { 00309 assert(C.IsAtBeginning && "You may only iterate over a collection once!"); 00310 C.IsAtBeginning = false; 00311 typename CollectionType::iterator ret(&C); 00312 ++ret; 00313 return ret; 00314 } 00315 00316 template <class CollectionType> 00317 void skip(CollectionType &C) { 00318 // TODO: support skipping from the middle of a parsed collection ;/ 00319 assert((C.IsAtBeginning || C.IsAtEnd) && "Cannot skip mid parse!"); 00320 if (C.IsAtBeginning) 00321 for (typename CollectionType::iterator i = begin(C), e = C.end(); 00322 i != e; ++i) 00323 i->skip(); 00324 } 00325 00326 /// @brief Represents a YAML map created from either a block map for a flow map. 00327 /// 00328 /// This parses the YAML stream as increment() is called. 00329 /// 00330 /// Example: 00331 /// Name: _main 00332 /// Scope: Global 00333 class MappingNode : public Node { 00334 public: 00335 enum MappingType { 00336 MT_Block, 00337 MT_Flow, 00338 MT_Inline ///< An inline mapping node is used for "[key: value]". 00339 }; 00340 00341 MappingNode(OwningPtr<Document> &D, StringRef Anchor, MappingType MT) 00342 : Node(NK_Mapping, D, Anchor) 00343 , Type(MT) 00344 , IsAtBeginning(true) 00345 , IsAtEnd(false) 00346 , CurrentEntry(0) 00347 {} 00348 00349 friend class basic_collection_iterator<MappingNode, KeyValueNode>; 00350 typedef basic_collection_iterator<MappingNode, KeyValueNode> iterator; 00351 template <class T> friend typename T::iterator yaml::begin(T &); 00352 template <class T> friend void yaml::skip(T &); 00353 00354 iterator begin() { 00355 return yaml::begin(*this); 00356 } 00357 00358 iterator end() { return iterator(); } 00359 00360 virtual void skip() LLVM_OVERRIDE { 00361 yaml::skip(*this); 00362 } 00363 00364 static inline bool classof(const Node *N) { 00365 return N->getType() == NK_Mapping; 00366 } 00367 00368 private: 00369 MappingType Type; 00370 bool IsAtBeginning; 00371 bool IsAtEnd; 00372 KeyValueNode *CurrentEntry; 00373 00374 void increment(); 00375 }; 00376 00377 /// @brief Represents a YAML sequence created from either a block sequence for a 00378 /// flow sequence. 00379 /// 00380 /// This parses the YAML stream as increment() is called. 00381 /// 00382 /// Example: 00383 /// - Hello 00384 /// - World 00385 class SequenceNode : public Node { 00386 public: 00387 enum SequenceType { 00388 ST_Block, 00389 ST_Flow, 00390 // Use for: 00391 // 00392 // key: 00393 // - val1 00394 // - val2 00395 // 00396 // As a BlockMappingEntry and BlockEnd are not created in this case. 00397 ST_Indentless 00398 }; 00399 00400 SequenceNode(OwningPtr<Document> &D, StringRef Anchor, SequenceType ST) 00401 : Node(NK_Sequence, D, Anchor) 00402 , SeqType(ST) 00403 , IsAtBeginning(true) 00404 , IsAtEnd(false) 00405 , WasPreviousTokenFlowEntry(true) // Start with an imaginary ','. 00406 , CurrentEntry(0) 00407 {} 00408 00409 friend class basic_collection_iterator<SequenceNode, Node>; 00410 typedef basic_collection_iterator<SequenceNode, Node> iterator; 00411 template <class T> friend typename T::iterator yaml::begin(T &); 00412 template <class T> friend void yaml::skip(T &); 00413 00414 void increment(); 00415 00416 iterator begin() { 00417 return yaml::begin(*this); 00418 } 00419 00420 iterator end() { return iterator(); } 00421 00422 virtual void skip() LLVM_OVERRIDE { 00423 yaml::skip(*this); 00424 } 00425 00426 static inline bool classof(const Node *N) { 00427 return N->getType() == NK_Sequence; 00428 } 00429 00430 private: 00431 SequenceType SeqType; 00432 bool IsAtBeginning; 00433 bool IsAtEnd; 00434 bool WasPreviousTokenFlowEntry; 00435 Node *CurrentEntry; 00436 }; 00437 00438 /// @brief Represents an alias to a Node with an anchor. 00439 /// 00440 /// Example: 00441 /// *AnchorName 00442 class AliasNode : public Node { 00443 public: 00444 AliasNode(OwningPtr<Document> &D, StringRef Val) 00445 : Node(NK_Alias, D, StringRef()), Name(Val) {} 00446 00447 StringRef getName() const { return Name; } 00448 Node *getTarget(); 00449 00450 static inline bool classof(const Node *N) { 00451 return N->getType() == NK_Alias; 00452 } 00453 00454 private: 00455 StringRef Name; 00456 }; 00457 00458 /// @brief A YAML Stream is a sequence of Documents. A document contains a root 00459 /// node. 00460 class Document { 00461 public: 00462 /// @brief Root for parsing a node. Returns a single node. 00463 Node *parseBlockNode(); 00464 00465 Document(Stream &ParentStream); 00466 00467 /// @brief Finish parsing the current document and return true if there are 00468 /// more. Return false otherwise. 00469 bool skip(); 00470 00471 /// @brief Parse and return the root level node. 00472 Node *getRoot() { 00473 if (Root) 00474 return Root; 00475 return Root = parseBlockNode(); 00476 } 00477 00478 private: 00479 friend class Node; 00480 friend class document_iterator; 00481 00482 /// @brief Stream to read tokens from. 00483 Stream &stream; 00484 00485 /// @brief Used to allocate nodes to. All are destroyed without calling their 00486 /// destructor when the document is destroyed. 00487 BumpPtrAllocator NodeAllocator; 00488 00489 /// @brief The root node. Used to support skipping a partially parsed 00490 /// document. 00491 Node *Root; 00492 00493 Token &peekNext(); 00494 Token getNext(); 00495 void setError(const Twine &Message, Token &Location) const; 00496 bool failed() const; 00497 00498 void handleTagDirective(const Token &Tag) { 00499 // TODO: Track tags. 00500 } 00501 00502 /// @brief Parse %BLAH directives and return true if any were encountered. 00503 bool parseDirectives(); 00504 00505 /// @brief Consume the next token and error if it is not \a TK. 00506 bool expectToken(int TK); 00507 }; 00508 00509 /// @brief Iterator abstraction for Documents over a Stream. 00510 class document_iterator { 00511 public: 00512 document_iterator() : Doc(0) {} 00513 document_iterator(OwningPtr<Document> &D) : Doc(&D) {} 00514 00515 bool operator ==(const document_iterator &Other) { 00516 if (isAtEnd() || Other.isAtEnd()) 00517 return isAtEnd() && Other.isAtEnd(); 00518 00519 return Doc == Other.Doc; 00520 } 00521 bool operator !=(const document_iterator &Other) { 00522 return !(*this == Other); 00523 } 00524 00525 document_iterator operator ++() { 00526 assert(Doc != 0 && "incrementing iterator past the end."); 00527 if (!(*Doc)->skip()) { 00528 Doc->reset(0); 00529 } else { 00530 Stream &S = (*Doc)->stream; 00531 Doc->reset(new Document(S)); 00532 } 00533 return *this; 00534 } 00535 00536 Document &operator *() { 00537 return *Doc->get(); 00538 } 00539 00540 OwningPtr<Document> &operator ->() { 00541 return *Doc; 00542 } 00543 00544 private: 00545 bool isAtEnd() const { 00546 return !Doc || !*Doc; 00547 } 00548 00549 OwningPtr<Document> *Doc; 00550 }; 00551 00552 } 00553 } 00554 00555 #endif