LLVM 20.0.0git
Markup.cpp
Go to the documentation of this file.
1//===- lib/DebugInfo/Symbolize/Markup.cpp ------------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8///
9/// \file
10/// This file defines the log symbolizer markup data model and parser.
11///
12//===----------------------------------------------------------------------===//
13
15
16#include "llvm/ADT/STLExtras.h"
18
19namespace llvm {
20namespace symbolize {
21
22// Matches the following:
23// "\033[0m"
24// "\033[1m"
25// "\033[30m" -- "\033[37m"
26static const char SGRSyntaxStr[] = "\033\\[([0-1]|3[0-7])m";
27
29 : MultilineTags(std::move(MultilineTags)), SGRSyntax(SGRSyntaxStr) {}
30
32 return Str.take_front(Pos - Str.begin());
33}
34static void advanceTo(StringRef &Str, StringRef::iterator Pos) {
35 Str = Str.drop_front(Pos - Str.begin());
36}
37
39 Buffer.clear();
40 NextIdx = 0;
41 FinishedMultiline.clear();
42 this->Line = Line;
43}
44
45std::optional<MarkupNode> MarkupParser::nextNode() {
46 // Pull something out of the buffer if possible.
47 if (!Buffer.empty()) {
48 if (NextIdx < Buffer.size())
49 return std::move(Buffer[NextIdx++]);
50 NextIdx = 0;
51 Buffer.clear();
52 }
53
54 // The buffer is empty, so parse the next bit of the line.
55
56 if (Line.empty())
57 return std::nullopt;
58
59 if (!InProgressMultiline.empty()) {
60 if (std::optional<StringRef> MultilineEnd = parseMultiLineEnd(Line)) {
61 llvm::append_range(InProgressMultiline, *MultilineEnd);
62 assert(FinishedMultiline.empty() &&
63 "At most one multi-line element can be finished at a time.");
64 FinishedMultiline.swap(InProgressMultiline);
65 // Parse the multi-line element as if it were contiguous.
66 advanceTo(Line, MultilineEnd->end());
67 return *parseElement(FinishedMultiline);
68 }
69
70 // The whole line is part of the multi-line element.
71 llvm::append_range(InProgressMultiline, Line);
72 Line = Line.drop_front(Line.size());
73 return std::nullopt;
74 }
75
76 // Find the first valid markup element, if any.
77 if (std::optional<MarkupNode> Element = parseElement(Line)) {
78 parseTextOutsideMarkup(takeTo(Line, Element->Text.begin()));
79 Buffer.push_back(std::move(*Element));
80 advanceTo(Line, Element->Text.end());
81 return nextNode();
82 }
83
84 // Since there were no valid elements remaining, see if the line opens a
85 // multi-line element.
86 if (std::optional<StringRef> MultilineBegin = parseMultiLineBegin(Line)) {
87 // Emit any text before the element.
88 parseTextOutsideMarkup(takeTo(Line, MultilineBegin->begin()));
89
90 // Begin recording the multi-line element.
91 llvm::append_range(InProgressMultiline, *MultilineBegin);
92 Line = Line.drop_front(Line.size());
93 return nextNode();
94 }
95
96 // The line doesn't contain any more markup elements, so emit it as text.
97 parseTextOutsideMarkup(Line);
98 Line = Line.drop_front(Line.size());
99 return nextNode();
100}
101
103 Buffer.clear();
104 NextIdx = 0;
105 Line = {};
106 if (InProgressMultiline.empty())
107 return;
108 FinishedMultiline.swap(InProgressMultiline);
109 parseTextOutsideMarkup(FinishedMultiline);
110}
111
112// Finds and returns the next valid markup element in the given line. Returns
113// std::nullopt if the line contains no valid elements.
114std::optional<MarkupNode> MarkupParser::parseElement(StringRef Line) {
115 while (true) {
116 // Find next element using begin and end markers.
117 size_t BeginPos = Line.find("{{{");
118 if (BeginPos == StringRef::npos)
119 return std::nullopt;
120 size_t EndPos = Line.find("}}}", BeginPos + 3);
121 if (EndPos == StringRef::npos)
122 return std::nullopt;
123 EndPos += 3;
124 MarkupNode Element;
125 Element.Text = Line.slice(BeginPos, EndPos);
126 Line = Line.substr(EndPos);
127
128 // Parse tag.
129 StringRef Content = Element.Text.drop_front(3).drop_back(3);
130 StringRef FieldsContent;
131 std::tie(Element.Tag, FieldsContent) = Content.split(':');
132 if (Element.Tag.empty())
133 continue;
134
135 // Parse fields.
136 if (!FieldsContent.empty())
137 FieldsContent.split(Element.Fields, ":");
138 else if (Content.back() == ':')
139 Element.Fields.push_back(FieldsContent);
140
141 return Element;
142 }
143}
144
147 Node.Text = Text;
148 return Node;
149}
150
151// Parses a region of text known to be outside any markup elements. Such text
152// may still contain SGR control codes, so the region is further subdivided into
153// control codes and true text regions.
154void MarkupParser::parseTextOutsideMarkup(StringRef Text) {
155 if (Text.empty())
156 return;
158 while (SGRSyntax.match(Text, &Matches)) {
159 // Emit any text before the SGR element.
160 if (Matches.begin()->begin() != Text.begin())
161 Buffer.push_back(textNode(takeTo(Text, Matches.begin()->begin())));
162
163 Buffer.push_back(textNode(*Matches.begin()));
164 advanceTo(Text, Matches.begin()->end());
165 }
166 if (!Text.empty())
167 Buffer.push_back(textNode(Text));
168}
169
170// Given that a line doesn't contain any valid markup, see if it ends with the
171// start of a multi-line element. If so, returns the beginning.
172std::optional<StringRef> MarkupParser::parseMultiLineBegin(StringRef Line) {
173 // A multi-line begin marker must be the last one on the line.
174 size_t BeginPos = Line.rfind("{{{");
175 if (BeginPos == StringRef::npos)
176 return std::nullopt;
177 size_t BeginTagPos = BeginPos + 3;
178
179 // If there are any end markers afterwards, the begin marker cannot belong to
180 // a multi-line element.
181 size_t EndPos = Line.find("}}}", BeginTagPos);
182 if (EndPos != StringRef::npos)
183 return std::nullopt;
184
185 // Check whether the tag is registered multi-line.
186 size_t EndTagPos = Line.find(':', BeginTagPos);
187 if (EndTagPos == StringRef::npos)
188 return std::nullopt;
189 StringRef Tag = Line.slice(BeginTagPos, EndTagPos);
190 if (!MultilineTags.contains(Tag))
191 return std::nullopt;
192 return Line.substr(BeginPos);
193}
194
195// See if the line begins with the ending of an in-progress multi-line element.
196// If so, return the ending.
197std::optional<StringRef> MarkupParser::parseMultiLineEnd(StringRef Line) {
198 size_t EndPos = Line.find("}}}");
199 if (EndPos == StringRef::npos)
200 return std::nullopt;
201 return Line.take_front(EndPos + 3);
202}
203
204} // end namespace symbolize
205} // end namespace llvm
T Content
This file declares the log symbolizer markup data model and parser.
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
This file contains some templates that are useful if you are working with the STL at all.
This file contains some functions that are useful when dealing with strings.
bool match(StringRef String, SmallVectorImpl< StringRef > *Matches=nullptr, std::string *Error=nullptr) const
matches - Match the regex against a given String.
Definition: Regex.cpp:83
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1209
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:50
std::pair< StringRef, StringRef > split(char Separator) const
Split into two substrings around the first occurrence of a separator character.
Definition: StringRef.h:685
constexpr bool empty() const
empty - Check if the string is empty.
Definition: StringRef.h:134
StringRef drop_front(size_t N=1) const
Return a StringRef equal to 'this' but with the first N elements dropped.
Definition: StringRef.h:594
iterator begin() const
Definition: StringRef.h:111
constexpr size_t size() const
size - Get the string size.
Definition: StringRef.h:137
iterator end() const
Definition: StringRef.h:113
static constexpr size_t npos
Definition: StringRef.h:52
StringRef drop_back(size_t N=1) const
Return a StringRef equal to 'this' but with the last N elements dropped.
Definition: StringRef.h:601
StringSet - A wrapper for StringMap that provides set-like functionality.
Definition: StringSet.h:23
bool contains(StringRef key) const
Check if the set contains the given key.
Definition: StringSet.h:55
std::optional< MarkupNode > nextNode()
Returns the next node in the input sequence.
Definition: Markup.cpp:45
void flush()
Inform the parser of that the input stream has ended.
Definition: Markup.cpp:102
MarkupParser(StringSet<> MultilineTags={})
Definition: Markup.cpp:28
void parseLine(StringRef Line)
Parses an individual Line of input.
Definition: Markup.cpp:38
static MarkupNode textNode(StringRef Text)
Definition: Markup.cpp:145
static void advanceTo(StringRef &Str, StringRef::iterator Pos)
Definition: Markup.cpp:34
static const char SGRSyntaxStr[]
Definition: Markup.cpp:26
static StringRef takeTo(StringRef Str, StringRef::iterator Pos)
Definition: Markup.cpp:31
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
void append_range(Container &C, Range &&R)
Wrapper function to append range R to container C.
Definition: STLExtras.h:2098
OutputIt move(R &&Range, OutputIt Out)
Provide wrappers to std::move which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1856
Implement std::hash so that hash_code can be used in STL containers.
Definition: BitVector.h:858
A node of symbolizer markup.
Definition: Markup.h:33
StringRef Text
The full text of this node in the input.
Definition: Markup.h:35
SmallVector< StringRef > Fields
If this represents an element with fields, a list of the field contents.
Definition: Markup.h:42
StringRef Tag
If this represents an element, the tag. Otherwise, empty.
Definition: Markup.h:38