LLVM 20.0.0git
Markup.h
Go to the documentation of this file.
1//===- Markup.h -------------------------------------------------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8///
9/// \file
10/// This file declares the log symbolizer markup data model and parser.
11///
12/// See https://llvm.org/docs/SymbolizerMarkupFormat.html
13///
14//===----------------------------------------------------------------------===//
15
16#ifndef LLVM_DEBUGINFO_SYMBOLIZE_MARKUP_H
17#define LLVM_DEBUGINFO_SYMBOLIZE_MARKUP_H
18
20#include "llvm/ADT/StringRef.h"
21#include "llvm/ADT/StringSet.h"
22#include "llvm/Support/Regex.h"
23
24namespace llvm {
25namespace symbolize {
26
27/// A node of symbolizer markup.
28///
29/// If only the Text field is set, this represents a region of text outside a
30/// markup element. ANSI SGR control codes are also reported this way; if
31/// detected, then the control code will be the entirety of the Text field, and
32/// any surrounding text will be reported as preceding and following nodes.
33struct MarkupNode {
34 /// The full text of this node in the input.
36
37 /// If this represents an element, the tag. Otherwise, empty.
39
40 /// If this represents an element with fields, a list of the field contents.
41 /// Otherwise, empty.
43
44 bool operator==(const MarkupNode &Other) const {
45 return Text == Other.Text && Tag == Other.Tag && Fields == Other.Fields;
46 }
47 bool operator!=(const MarkupNode &Other) const { return !(*this == Other); }
48};
49
50/// Parses a log containing symbolizer markup into a sequence of nodes.
52public:
53 MarkupParser(StringSet<> MultilineTags = {});
54
55 /// Parses an individual \p Line of input.
56 ///
57 /// Nodes from the previous parseLine() call that haven't yet been extracted
58 /// by nextNode() are discarded. The nodes returned by nextNode() may
59 /// reference the input string, so it must be retained by the caller until the
60 /// last use.
61 ///
62 /// Note that some elements may span multiple lines. If a line ends with the
63 /// start of one of these elements, then no nodes will be produced until the
64 /// either the end or something that cannot be part of an element is
65 /// encountered. This may only occur after multiple calls to parseLine(),
66 /// corresponding to the lines of the multi-line element.
67 void parseLine(StringRef Line);
68
69 /// Inform the parser of that the input stream has ended.
70 ///
71 /// This allows the parser to finish any deferred processing (e.g., an
72 /// in-progress multi-line element) and may cause nextNode() to return
73 /// additional nodes.
74 void flush();
75
76 /// Returns the next node in the input sequence.
77 ///
78 /// Calling nextNode() may invalidate the contents of the node returned by the
79 /// previous call.
80 ///
81 /// \returns the next markup node or std::nullopt if none remain.
82 std::optional<MarkupNode> nextNode();
83
84 bool isSGR(const MarkupNode &Node) const {
85 return SGRSyntax.match(Node.Text);
86 }
87
88private:
89 std::optional<MarkupNode> parseElement(StringRef Line);
90 void parseTextOutsideMarkup(StringRef Text);
91 std::optional<StringRef> parseMultiLineBegin(StringRef Line);
92 std::optional<StringRef> parseMultiLineEnd(StringRef Line);
93
94 // Tags of elements that can span multiple lines.
95 const StringSet<> MultilineTags;
96
97 // Contents of a multi-line element that has finished being parsed. Retained
98 // to keep returned StringRefs for the contents valid.
99 std::string FinishedMultiline;
100
101 // Contents of a multi-line element that is still in the process of receiving
102 // lines.
103 std::string InProgressMultiline;
104
105 // The line currently being parsed.
106 StringRef Line;
107
108 // Buffer for nodes parsed from the current line.
110
111 // Next buffer index to return.
112 size_t NextIdx;
113
114 // Regular expression matching supported ANSI SGR escape sequences.
115 const Regex SGRSyntax;
116};
117
118} // end namespace symbolize
119} // end namespace llvm
120
121#endif // LLVM_DEBUGINFO_SYMBOLIZE_MARKUP_H
This file defines the SmallVector class.
StringSet - A set-like wrapper for the StringMap.
bool match(StringRef String, SmallVectorImpl< StringRef > *Matches=nullptr, std::string *Error=nullptr) const
matches - Match the regex against a given String.
Definition: Regex.cpp:83
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1209
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:50
StringSet - A wrapper for StringMap that provides set-like functionality.
Definition: StringSet.h:23
Parses a log containing symbolizer markup into a sequence of nodes.
Definition: Markup.h:51
std::optional< MarkupNode > nextNode()
Returns the next node in the input sequence.
Definition: Markup.cpp:45
void flush()
Inform the parser of that the input stream has ended.
Definition: Markup.cpp:102
bool isSGR(const MarkupNode &Node) const
Definition: Markup.h:84
void parseLine(StringRef Line)
Parses an individual Line of input.
Definition: Markup.cpp:38
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
@ Other
Any other memory.
A node of symbolizer markup.
Definition: Markup.h:33
StringRef Text
The full text of this node in the input.
Definition: Markup.h:35
bool operator!=(const MarkupNode &Other) const
Definition: Markup.h:47
SmallVector< StringRef > Fields
If this represents an element with fields, a list of the field contents.
Definition: Markup.h:42
StringRef Tag
If this represents an element, the tag. Otherwise, empty.
Definition: Markup.h:38
bool operator==(const MarkupNode &Other) const
Definition: Markup.h:44