LLVM 22.0.0git
Markup.cpp
Go to the documentation of this file.
1//===- lib/DebugInfo/Symbolize/Markup.cpp ------------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8///
9/// \file
10/// This file defines the log symbolizer markup data model and parser.
11///
12//===----------------------------------------------------------------------===//
13
15
16#include "llvm/ADT/STLExtras.h"
17
18namespace llvm {
19namespace symbolize {
20
21// Matches the following:
22// "\033[0m"
23// "\033[1m"
24// "\033[30m" -- "\033[37m"
25static const char SGRSyntaxStr[] = "\033\\[([0-1]|3[0-7])m";
26
28 : MultilineTags(std::move(MultilineTags)), SGRSyntax(SGRSyntaxStr) {}
29
31 return Str.take_front(Pos - Str.begin());
32}
33static void advanceTo(StringRef &Str, StringRef::iterator Pos) {
34 Str = Str.drop_front(Pos - Str.begin());
35}
36
38 Buffer.clear();
39 NextIdx = 0;
40 FinishedMultiline.clear();
41 this->Line = Line;
42}
43
44std::optional<MarkupNode> MarkupParser::nextNode() {
45 // Pull something out of the buffer if possible.
46 if (!Buffer.empty()) {
47 if (NextIdx < Buffer.size())
48 return std::move(Buffer[NextIdx++]);
49 NextIdx = 0;
50 Buffer.clear();
51 }
52
53 // The buffer is empty, so parse the next bit of the line.
54
55 if (Line.empty())
56 return std::nullopt;
57
58 if (!InProgressMultiline.empty()) {
59 if (std::optional<StringRef> MultilineEnd = parseMultiLineEnd(Line)) {
60 llvm::append_range(InProgressMultiline, *MultilineEnd);
61 assert(FinishedMultiline.empty() &&
62 "At most one multi-line element can be finished at a time.");
63 FinishedMultiline.swap(InProgressMultiline);
64 // Parse the multi-line element as if it were contiguous.
65 advanceTo(Line, MultilineEnd->end());
66 return *parseElement(FinishedMultiline);
67 }
68
69 // The whole line is part of the multi-line element.
70 llvm::append_range(InProgressMultiline, Line);
71 Line = Line.drop_front(Line.size());
72 return std::nullopt;
73 }
74
75 // Find the first valid markup element, if any.
76 if (std::optional<MarkupNode> Element = parseElement(Line)) {
77 parseTextOutsideMarkup(takeTo(Line, Element->Text.begin()));
78 Buffer.push_back(std::move(*Element));
79 advanceTo(Line, Element->Text.end());
80 return nextNode();
81 }
82
83 // Since there were no valid elements remaining, see if the line opens a
84 // multi-line element.
85 if (std::optional<StringRef> MultilineBegin = parseMultiLineBegin(Line)) {
86 // Emit any text before the element.
87 parseTextOutsideMarkup(takeTo(Line, MultilineBegin->begin()));
88
89 // Begin recording the multi-line element.
90 llvm::append_range(InProgressMultiline, *MultilineBegin);
91 Line = Line.drop_front(Line.size());
92 return nextNode();
93 }
94
95 // The line doesn't contain any more markup elements, so emit it as text.
96 parseTextOutsideMarkup(Line);
97 Line = Line.drop_front(Line.size());
98 return nextNode();
99}
100
102 Buffer.clear();
103 NextIdx = 0;
104 Line = {};
105 if (InProgressMultiline.empty())
106 return;
107 FinishedMultiline.swap(InProgressMultiline);
108 parseTextOutsideMarkup(FinishedMultiline);
109}
110
111// Finds and returns the next valid markup element in the given line. Returns
112// std::nullopt if the line contains no valid elements.
113std::optional<MarkupNode> MarkupParser::parseElement(StringRef Line) {
114 while (true) {
115 // Find next element using begin and end markers.
116 size_t BeginPos = Line.find("{{{");
117 if (BeginPos == StringRef::npos)
118 return std::nullopt;
119 size_t EndPos = Line.find("}}}", BeginPos + 3);
120 if (EndPos == StringRef::npos)
121 return std::nullopt;
122 EndPos += 3;
123 MarkupNode Element;
124 Element.Text = Line.slice(BeginPos, EndPos);
125 Line = Line.substr(EndPos);
126
127 // Parse tag.
128 StringRef Content = Element.Text.drop_front(3).drop_back(3);
129 StringRef FieldsContent;
130 std::tie(Element.Tag, FieldsContent) = Content.split(':');
131 if (Element.Tag.empty())
132 continue;
133
134 // Parse fields.
135 if (!FieldsContent.empty())
136 FieldsContent.split(Element.Fields, ":");
137 else if (Content.back() == ':')
138 Element.Fields.push_back(FieldsContent);
139
140 return Element;
141 }
142}
143
146 Node.Text = Text;
147 return Node;
148}
149
150// Parses a region of text known to be outside any markup elements. Such text
151// may still contain SGR control codes, so the region is further subdivided into
152// control codes and true text regions.
153void MarkupParser::parseTextOutsideMarkup(StringRef Text) {
154 if (Text.empty())
155 return;
157 while (SGRSyntax.match(Text, &Matches)) {
158 // Emit any text before the SGR element.
159 if (Matches.begin()->begin() != Text.begin())
160 Buffer.push_back(textNode(takeTo(Text, Matches.begin()->begin())));
161
162 Buffer.push_back(textNode(*Matches.begin()));
163 advanceTo(Text, Matches.begin()->end());
164 }
165 if (!Text.empty())
166 Buffer.push_back(textNode(Text));
167}
168
169// Given that a line doesn't contain any valid markup, see if it ends with the
170// start of a multi-line element. If so, returns the beginning.
171std::optional<StringRef> MarkupParser::parseMultiLineBegin(StringRef Line) {
172 // A multi-line begin marker must be the last one on the line.
173 size_t BeginPos = Line.rfind("{{{");
174 if (BeginPos == StringRef::npos)
175 return std::nullopt;
176 size_t BeginTagPos = BeginPos + 3;
177
178 // If there are any end markers afterwards, the begin marker cannot belong to
179 // a multi-line element.
180 size_t EndPos = Line.find("}}}", BeginTagPos);
181 if (EndPos != StringRef::npos)
182 return std::nullopt;
183
184 // Check whether the tag is registered multi-line.
185 size_t EndTagPos = Line.find(':', BeginTagPos);
186 if (EndTagPos == StringRef::npos)
187 return std::nullopt;
188 StringRef Tag = Line.slice(BeginTagPos, EndTagPos);
189 if (!MultilineTags.contains(Tag))
190 return std::nullopt;
191 return Line.substr(BeginPos);
192}
193
194// See if the line begins with the ending of an in-progress multi-line element.
195// If so, return the ending.
196std::optional<StringRef> MarkupParser::parseMultiLineEnd(StringRef Line) {
197 size_t EndPos = Line.find("}}}");
198 if (EndPos == StringRef::npos)
199 return std::nullopt;
200 return Line.take_front(EndPos + 3);
201}
202
203} // end namespace symbolize
204} // end namespace llvm
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
This file declares the log symbolizer markup data model and parser.
This file contains some templates that are useful if you are working with the STL at all.
LLVM_ABI bool match(StringRef String, SmallVectorImpl< StringRef > *Matches=nullptr, std::string *Error=nullptr) const
matches - Match the regex against a given String.
Definition Regex.cpp:83
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
StringRef - Represent a constant reference to a string, i.e.
Definition StringRef.h:55
std::pair< StringRef, StringRef > split(char Separator) const
Split into two substrings around the first occurrence of a separator character.
Definition StringRef.h:710
const char * iterator
Definition StringRef.h:59
constexpr StringRef substr(size_t Start, size_t N=npos) const
Return a reference to the substring from [Start, Start + N).
Definition StringRef.h:581
constexpr bool empty() const
empty - Check if the string is empty.
Definition StringRef.h:151
StringRef drop_front(size_t N=1) const
Return a StringRef equal to 'this' but with the first N elements dropped.
Definition StringRef.h:619
char back() const
back - Get the last character in the string.
Definition StringRef.h:163
StringRef slice(size_t Start, size_t End) const
Return a reference to the substring from [Start, End).
Definition StringRef.h:694
StringRef take_front(size_t N=1) const
Return a StringRef equal to 'this' but with only the first N elements remaining.
Definition StringRef.h:590
static constexpr size_t npos
Definition StringRef.h:57
StringRef drop_back(size_t N=1) const
Return a StringRef equal to 'this' but with the last N elements dropped.
Definition StringRef.h:626
StringSet - A wrapper for StringMap that provides set-like functionality.
Definition StringSet.h:25
LLVM_ABI std::optional< MarkupNode > nextNode()
Returns the next node in the input sequence.
Definition Markup.cpp:44
LLVM_ABI void flush()
Inform the parser of that the input stream has ended.
Definition Markup.cpp:101
LLVM_ABI MarkupParser(StringSet<> MultilineTags={})
Definition Markup.cpp:27
LLVM_ABI void parseLine(StringRef Line)
Parses an individual Line of input.
Definition Markup.cpp:37
static MarkupNode textNode(StringRef Text)
Definition Markup.cpp:144
static void advanceTo(StringRef &Str, StringRef::iterator Pos)
Definition Markup.cpp:33
static const char SGRSyntaxStr[]
Definition Markup.cpp:25
static StringRef takeTo(StringRef Str, StringRef::iterator Pos)
Definition Markup.cpp:30
This is an optimization pass for GlobalISel generic memory operations.
void append_range(Container &C, Range &&R)
Wrapper function to append range R to container C.
Definition STLExtras.h:2118
OutputIt move(R &&Range, OutputIt Out)
Provide wrappers to std::move which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1849
Implement std::hash so that hash_code can be used in STL containers.
Definition BitVector.h:851
A node of symbolizer markup.
Definition Markup.h:34
StringRef Text
The full text of this node in the input.
Definition Markup.h:36
SmallVector< StringRef > Fields
If this represents an element with fields, a list of the field contents.
Definition Markup.h:43
StringRef Tag
If this represents an element, the tag. Otherwise, empty.
Definition Markup.h:39