LLVM  15.0.0git
Markup.cpp
Go to the documentation of this file.
1 //===- lib/DebugInfo/Symbolize/Markup.cpp ------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 ///
9 /// \file
10 /// This file defines the log symbolizer markup data model and parser.
11 ///
12 //===----------------------------------------------------------------------===//
13 
15 
16 #include "llvm/ADT/STLExtras.h"
17 #include "llvm/ADT/StringExtras.h"
18 
19 namespace llvm {
20 namespace symbolize {
21 
22 // Matches the following:
23 // "\033[0m"
24 // "\033[1m"
25 // "\033[30m" -- "\033[37m"
26 static const char SGRSyntaxStr[] = "\033\\[([0-1]|3[0-7])m";
27 
29  : MultilineTags(std::move(MultilineTags)), SGRSyntax(SGRSyntaxStr) {}
30 
32  return Str.take_front(Pos - Str.begin());
33 }
34 static void advanceTo(StringRef &Str, StringRef::iterator Pos) {
35  Str = Str.drop_front(Pos - Str.begin());
36 }
37 
39  Buffer.clear();
40  NextIdx = 0;
41  FinishedMultiline.clear();
42  this->Line = Line;
43 }
44 
46  // Pull something out of the buffer if possible.
47  if (!Buffer.empty()) {
48  if (NextIdx < Buffer.size())
49  return std::move(Buffer[NextIdx++]);
50  NextIdx = 0;
51  Buffer.clear();
52  }
53 
54  // The buffer is empty, so parse the next bit of the line.
55 
56  if (Line.empty())
57  return None;
58 
59  if (!InProgressMultiline.empty()) {
60  if (Optional<StringRef> MultilineEnd = parseMultiLineEnd(Line)) {
61  llvm::append_range(InProgressMultiline, *MultilineEnd);
62  assert(FinishedMultiline.empty() &&
63  "At most one multi-line element can be finished at a time.");
64  FinishedMultiline.swap(InProgressMultiline);
65  // Parse the multi-line element as if it were contiguous.
66  advanceTo(Line, MultilineEnd->end());
67  return *parseElement(FinishedMultiline);
68  }
69 
70  // The whole line is part of the multi-line element.
71  llvm::append_range(InProgressMultiline, Line);
72  Line = Line.drop_front(Line.size());
73  return None;
74  }
75 
76  // Find the first valid markup element, if any.
77  if (Optional<MarkupNode> Element = parseElement(Line)) {
78  parseTextOutsideMarkup(takeTo(Line, Element->Text.begin()));
79  Buffer.push_back(std::move(*Element));
80  advanceTo(Line, Element->Text.end());
81  return nextNode();
82  }
83 
84  // Since there were no valid elements remaining, see if the line opens a
85  // multi-line element.
86  if (Optional<StringRef> MultilineBegin = parseMultiLineBegin(Line)) {
87  // Emit any text before the element.
88  parseTextOutsideMarkup(takeTo(Line, MultilineBegin->begin()));
89 
90  // Begin recording the multi-line element.
91  llvm::append_range(InProgressMultiline, *MultilineBegin);
92  Line = Line.drop_front(Line.size());
93  return nextNode();
94  }
95 
96  // The line doesn't contain any more markup elements, so emit it as text.
97  parseTextOutsideMarkup(Line);
98  Line = Line.drop_front(Line.size());
99  return nextNode();
100 }
101 
103  if (InProgressMultiline.empty())
104  return;
105  FinishedMultiline.swap(InProgressMultiline);
106  parseTextOutsideMarkup(FinishedMultiline);
107 }
108 
109 // Finds and returns the next valid markup element in the given line. Returns
110 // None if the line contains no valid elements.
111 Optional<MarkupNode> MarkupParser::parseElement(StringRef Line) {
112  while (true) {
113  // Find next element using begin and end markers.
114  size_t BeginPos = Line.find("{{{");
115  if (BeginPos == StringRef::npos)
116  return None;
117  size_t EndPos = Line.find("}}}", BeginPos + 3);
118  if (EndPos == StringRef::npos)
119  return None;
120  EndPos += 3;
121  MarkupNode Element;
122  Element.Text = Line.slice(BeginPos, EndPos);
123  Line = Line.substr(EndPos);
124 
125  // Parse tag.
126  StringRef Content = Element.Text.drop_front(3).drop_back(3);
127  StringRef FieldsContent;
128  std::tie(Element.Tag, FieldsContent) = Content.split(':');
129  if (Element.Tag.empty())
130  continue;
131 
132  // Parse fields.
133  if (!FieldsContent.empty())
134  FieldsContent.split(Element.Fields, ":");
135  else if (Content.back() == ':')
136  Element.Fields.push_back(FieldsContent);
137 
138  return Element;
139  }
140 }
141 
144  Node.Text = Text;
145  return Node;
146 }
147 
148 // Parses a region of text known to be outside any markup elements. Such text
149 // may still contain SGR control codes, so the region is further subdivided into
150 // control codes and true text regions.
151 void MarkupParser::parseTextOutsideMarkup(StringRef Text) {
152  if (Text.empty())
153  return;
154  SmallVector<StringRef> Matches;
155  while (SGRSyntax.match(Text, &Matches)) {
156  // Emit any text before the SGR element.
157  if (Matches.begin()->begin() != Text.begin())
158  Buffer.push_back(textNode(takeTo(Text, Matches.begin()->begin())));
159 
160  Buffer.push_back(textNode(*Matches.begin()));
161  advanceTo(Text, Matches.begin()->end());
162  }
163  if (!Text.empty())
164  Buffer.push_back(textNode(Text));
165 }
166 
167 // Given that a line doesn't contain any valid markup, see if it ends with the
168 // start of a multi-line element. If so, returns the beginning.
169 Optional<StringRef> MarkupParser::parseMultiLineBegin(StringRef Line) {
170  // A multi-line begin marker must be the last one on the line.
171  size_t BeginPos = Line.rfind("{{{");
172  if (BeginPos == StringRef::npos)
173  return None;
174  size_t BeginTagPos = BeginPos + 3;
175 
176  // If there are any end markers afterwards, the begin marker cannot belong to
177  // a multi-line element.
178  size_t EndPos = Line.find("}}}", BeginTagPos);
179  if (EndPos != StringRef::npos)
180  return None;
181 
182  // Check whether the tag is registered multi-line.
183  size_t EndTagPos = Line.find(':', BeginTagPos);
184  if (EndTagPos == StringRef::npos)
185  return None;
186  StringRef Tag = Line.slice(BeginTagPos, EndTagPos);
187  if (!MultilineTags.contains(Tag))
188  return None;
189  return Line.substr(BeginPos);
190 }
191 
192 // See if the line begins with the ending of an in-progress multi-line element.
193 // If so, return the ending.
194 Optional<StringRef> MarkupParser::parseMultiLineEnd(StringRef Line) {
195  size_t EndPos = Line.find("}}}");
196  if (EndPos == StringRef::npos)
197  return None;
198  return Line.take_front(EndPos + 3);
199 }
200 
201 } // end namespace symbolize
202 } // end namespace llvm
llvm
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:17
llvm::symbolize::textNode
static MarkupNode textNode(StringRef Text)
Definition: Markup.cpp:142
llvm::StringRef::npos
static constexpr size_t npos
Definition: StringRef.h:60
llvm::StringRef::find
LLVM_NODISCARD size_t find(char C, size_t From=0) const
Search for the first character C in the string.
Definition: StringRef.h:319
llvm::SmallVector
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1185
Content
T Content
Definition: ELFObjHandler.cpp:88
llvm::Optional
Definition: APInt.h:33
STLExtras.h
llvm::StringRef::slice
LLVM_NODISCARD StringRef slice(size_t Start, size_t End) const
Return a reference to the substring from [Start, End).
Definition: StringRef.h:736
llvm::dwarf::Tag
Tag
Definition: Dwarf.h:105
llvm::Regex::match
bool match(StringRef String, SmallVectorImpl< StringRef > *Matches=nullptr, std::string *Error=nullptr) const
matches - Match the regex against a given String.
Definition: Regex.cpp:86
llvm::symbolize::SGRSyntaxStr
static const char SGRSyntaxStr[]
Definition: Markup.cpp:26
llvm::StringRef::substr
LLVM_NODISCARD StringRef substr(size_t Start, size_t N=npos) const
Return a reference to the substring from [Start, Start + N).
Definition: StringRef.h:615
llvm::symbolize::MarkupParser::flush
void flush()
Inform the parser of that the input stream has ended.
Definition: Markup.cpp:102
llvm::symbolize::MarkupNode::Fields
SmallVector< StringRef > Fields
If this represents an element with fields, a list of the field contents.
Definition: Markup.h:45
llvm::StringRef::split
LLVM_NODISCARD std::pair< StringRef, StringRef > split(char Separator) const
Split into two substrings around the first occurrence of a separator character.
Definition: StringRef.h:753
llvm::StringRef::iterator
const char * iterator
Definition: StringRef.h:62
llvm::symbolize::takeTo
static StringRef takeTo(StringRef Str, StringRef::iterator Pos)
Definition: Markup.cpp:31
llvm::StringSet::contains
bool contains(StringRef key) const
Check if the set contains the given key.
Definition: StringSet.h:51
llvm::symbolize::MarkupParser::MarkupParser
MarkupParser(StringSet<> MultilineTags={})
Definition: Markup.cpp:28
llvm::None
const NoneType None
Definition: None.h:24
llvm::symbolize::MarkupParser::parseLine
void parseLine(StringRef Line)
Parses an individual Line of input.
Definition: Markup.cpp:38
Markup.h
llvm::symbolize::advanceTo
static void advanceTo(StringRef &Str, StringRef::iterator Pos)
Definition: Markup.cpp:34
llvm::StringRef::empty
constexpr LLVM_NODISCARD bool empty() const
empty - Check if the string is empty.
Definition: StringRef.h:153
llvm::StringRef::end
iterator end() const
Definition: StringRef.h:130
move
compiles ldr LCPI1_0 ldr ldr mov lsr tst moveq r1 ldr LCPI1_1 and r0 bx lr It would be better to do something like to fold the shift into the conditional move
Definition: README.txt:546
StringExtras.h
assert
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
llvm::move
OutputIt move(R &&Range, OutputIt Out)
Provide wrappers to std::move which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1675
llvm::symbolize::MarkupNode::Tag
StringRef Tag
If this represents an element, the tag. Otherwise, empty.
Definition: Markup.h:41
llvm::symbolize::MarkupParser::nextNode
Optional< MarkupNode > nextNode()
Returns the next node in the input sequence.
Definition: Markup.cpp:45
llvm::StringRef::drop_back
LLVM_NODISCARD StringRef drop_back(size_t N=1) const
Return a StringRef equal to 'this' but with the last N elements dropped.
Definition: StringRef.h:665
llvm::StringSet
StringSet - A wrapper for StringMap that provides set-like functionality.
Definition: StringSet.h:23
llvm::StringRef
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:58
llvm::append_range
void append_range(Container &C, Range &&R)
Wrapper function to append a range to a container.
Definition: STLExtras.h:1823
Node
Definition: ItaniumDemangle.h:155
llvm::StringRef::size
constexpr LLVM_NODISCARD size_t size() const
size - Get the string size.
Definition: StringRef.h:157
std
Definition: BitVector.h:851
llvm::StringRef::drop_front
LLVM_NODISCARD StringRef drop_front(size_t N=1) const
Return a StringRef equal to 'this' but with the first N elements dropped.
Definition: StringRef.h:657
llvm::symbolize::MarkupNode::Text
StringRef Text
The full text of this node in the input.
Definition: Markup.h:38
llvm::StringRef::begin
iterator begin() const
Definition: StringRef.h:128
llvm::symbolize::MarkupNode
A node of symbolizer markup.
Definition: Markup.h:36