LLVM  16.0.0git
Markup.cpp
Go to the documentation of this file.
1 //===- lib/DebugInfo/Symbolize/Markup.cpp ------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 ///
9 /// \file
10 /// This file defines the log symbolizer markup data model and parser.
11 ///
12 //===----------------------------------------------------------------------===//
13 
15 
16 #include "llvm/ADT/STLExtras.h"
17 #include "llvm/ADT/StringExtras.h"
18 
19 namespace llvm {
20 namespace symbolize {
21 
22 // Matches the following:
23 // "\033[0m"
24 // "\033[1m"
25 // "\033[30m" -- "\033[37m"
26 static const char SGRSyntaxStr[] = "\033\\[([0-1]|3[0-7])m";
27 
29  : MultilineTags(std::move(MultilineTags)), SGRSyntax(SGRSyntaxStr) {}
30 
32  return Str.take_front(Pos - Str.begin());
33 }
34 static void advanceTo(StringRef &Str, StringRef::iterator Pos) {
35  Str = Str.drop_front(Pos - Str.begin());
36 }
37 
39  Buffer.clear();
40  NextIdx = 0;
41  FinishedMultiline.clear();
42  this->Line = Line;
43 }
44 
46  // Pull something out of the buffer if possible.
47  if (!Buffer.empty()) {
48  if (NextIdx < Buffer.size())
49  return std::move(Buffer[NextIdx++]);
50  NextIdx = 0;
51  Buffer.clear();
52  }
53 
54  // The buffer is empty, so parse the next bit of the line.
55 
56  if (Line.empty())
57  return std::nullopt;
58 
59  if (!InProgressMultiline.empty()) {
60  if (Optional<StringRef> MultilineEnd = parseMultiLineEnd(Line)) {
61  llvm::append_range(InProgressMultiline, *MultilineEnd);
62  assert(FinishedMultiline.empty() &&
63  "At most one multi-line element can be finished at a time.");
64  FinishedMultiline.swap(InProgressMultiline);
65  // Parse the multi-line element as if it were contiguous.
66  advanceTo(Line, MultilineEnd->end());
67  return *parseElement(FinishedMultiline);
68  }
69 
70  // The whole line is part of the multi-line element.
71  llvm::append_range(InProgressMultiline, Line);
72  Line = Line.drop_front(Line.size());
73  return std::nullopt;
74  }
75 
76  // Find the first valid markup element, if any.
77  if (Optional<MarkupNode> Element = parseElement(Line)) {
78  parseTextOutsideMarkup(takeTo(Line, Element->Text.begin()));
79  Buffer.push_back(std::move(*Element));
80  advanceTo(Line, Element->Text.end());
81  return nextNode();
82  }
83 
84  // Since there were no valid elements remaining, see if the line opens a
85  // multi-line element.
86  if (Optional<StringRef> MultilineBegin = parseMultiLineBegin(Line)) {
87  // Emit any text before the element.
88  parseTextOutsideMarkup(takeTo(Line, MultilineBegin->begin()));
89 
90  // Begin recording the multi-line element.
91  llvm::append_range(InProgressMultiline, *MultilineBegin);
92  Line = Line.drop_front(Line.size());
93  return nextNode();
94  }
95 
96  // The line doesn't contain any more markup elements, so emit it as text.
97  parseTextOutsideMarkup(Line);
98  Line = Line.drop_front(Line.size());
99  return nextNode();
100 }
101 
103  Buffer.clear();
104  NextIdx = 0;
105  Line = {};
106  if (InProgressMultiline.empty())
107  return;
108  FinishedMultiline.swap(InProgressMultiline);
109  parseTextOutsideMarkup(FinishedMultiline);
110 }
111 
112 // Finds and returns the next valid markup element in the given line. Returns
113 // None if the line contains no valid elements.
114 Optional<MarkupNode> MarkupParser::parseElement(StringRef Line) {
115  while (true) {
116  // Find next element using begin and end markers.
117  size_t BeginPos = Line.find("{{{");
118  if (BeginPos == StringRef::npos)
119  return std::nullopt;
120  size_t EndPos = Line.find("}}}", BeginPos + 3);
121  if (EndPos == StringRef::npos)
122  return std::nullopt;
123  EndPos += 3;
124  MarkupNode Element;
125  Element.Text = Line.slice(BeginPos, EndPos);
126  Line = Line.substr(EndPos);
127 
128  // Parse tag.
129  StringRef Content = Element.Text.drop_front(3).drop_back(3);
130  StringRef FieldsContent;
131  std::tie(Element.Tag, FieldsContent) = Content.split(':');
132  if (Element.Tag.empty())
133  continue;
134 
135  // Parse fields.
136  if (!FieldsContent.empty())
137  FieldsContent.split(Element.Fields, ":");
138  else if (Content.back() == ':')
139  Element.Fields.push_back(FieldsContent);
140 
141  return Element;
142  }
143 }
144 
147  Node.Text = Text;
148  return Node;
149 }
150 
151 // Parses a region of text known to be outside any markup elements. Such text
152 // may still contain SGR control codes, so the region is further subdivided into
153 // control codes and true text regions.
154 void MarkupParser::parseTextOutsideMarkup(StringRef Text) {
155  if (Text.empty())
156  return;
157  SmallVector<StringRef> Matches;
158  while (SGRSyntax.match(Text, &Matches)) {
159  // Emit any text before the SGR element.
160  if (Matches.begin()->begin() != Text.begin())
161  Buffer.push_back(textNode(takeTo(Text, Matches.begin()->begin())));
162 
163  Buffer.push_back(textNode(*Matches.begin()));
164  advanceTo(Text, Matches.begin()->end());
165  }
166  if (!Text.empty())
167  Buffer.push_back(textNode(Text));
168 }
169 
170 // Given that a line doesn't contain any valid markup, see if it ends with the
171 // start of a multi-line element. If so, returns the beginning.
172 Optional<StringRef> MarkupParser::parseMultiLineBegin(StringRef Line) {
173  // A multi-line begin marker must be the last one on the line.
174  size_t BeginPos = Line.rfind("{{{");
175  if (BeginPos == StringRef::npos)
176  return std::nullopt;
177  size_t BeginTagPos = BeginPos + 3;
178 
179  // If there are any end markers afterwards, the begin marker cannot belong to
180  // a multi-line element.
181  size_t EndPos = Line.find("}}}", BeginTagPos);
182  if (EndPos != StringRef::npos)
183  return std::nullopt;
184 
185  // Check whether the tag is registered multi-line.
186  size_t EndTagPos = Line.find(':', BeginTagPos);
187  if (EndTagPos == StringRef::npos)
188  return std::nullopt;
189  StringRef Tag = Line.slice(BeginTagPos, EndTagPos);
190  if (!MultilineTags.contains(Tag))
191  return std::nullopt;
192  return Line.substr(BeginPos);
193 }
194 
195 // See if the line begins with the ending of an in-progress multi-line element.
196 // If so, return the ending.
197 Optional<StringRef> MarkupParser::parseMultiLineEnd(StringRef Line) {
198  size_t EndPos = Line.find("}}}");
199  if (EndPos == StringRef::npos)
200  return std::nullopt;
201  return Line.take_front(EndPos + 3);
202 }
203 
204 } // end namespace symbolize
205 } // end namespace llvm
llvm
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
llvm::symbolize::textNode
static MarkupNode textNode(StringRef Text)
Definition: Markup.cpp:145
llvm::StringRef::npos
static constexpr size_t npos
Definition: StringRef.h:52
llvm::SmallVector
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1199
Content
T Content
Definition: ELFObjHandler.cpp:89
llvm::Optional
Definition: APInt.h:33
STLExtras.h
llvm::dwarf::Tag
Tag
Definition: Dwarf.h:105
llvm::Regex::match
bool match(StringRef String, SmallVectorImpl< StringRef > *Matches=nullptr, std::string *Error=nullptr) const
matches - Match the regex against a given String.
Definition: Regex.cpp:86
llvm::symbolize::SGRSyntaxStr
static const char SGRSyntaxStr[]
Definition: Markup.cpp:26
llvm::symbolize::MarkupParser::flush
void flush()
Inform the parser of that the input stream has ended.
Definition: Markup.cpp:102
llvm::symbolize::MarkupNode::Fields
SmallVector< StringRef > Fields
If this represents an element with fields, a list of the field contents.
Definition: Markup.h:43
llvm::StringRef::iterator
const char * iterator
Definition: StringRef.h:54
llvm::symbolize::takeTo
static StringRef takeTo(StringRef Str, StringRef::iterator Pos)
Definition: Markup.cpp:31
llvm::StringSet::contains
bool contains(StringRef key) const
Check if the set contains the given key.
Definition: StringSet.h:51
llvm::logicalview::LVSortMode::Line
@ Line
llvm::symbolize::MarkupParser::MarkupParser
MarkupParser(StringSet<> MultilineTags={})
Definition: Markup.cpp:28
llvm::symbolize::MarkupParser::parseLine
void parseLine(StringRef Line)
Parses an individual Line of input.
Definition: Markup.cpp:38
llvm::StringRef::empty
constexpr bool empty() const
empty - Check if the string is empty.
Definition: StringRef.h:134
Markup.h
llvm::symbolize::advanceTo
static void advanceTo(StringRef &Str, StringRef::iterator Pos)
Definition: Markup.cpp:34
llvm::StringRef::end
iterator end() const
Definition: StringRef.h:113
move
compiles ldr LCPI1_0 ldr ldr mov lsr tst moveq r1 ldr LCPI1_1 and r0 bx lr It would be better to do something like to fold the shift into the conditional move
Definition: README.txt:546
StringExtras.h
assert
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
llvm::move
OutputIt move(R &&Range, OutputIt Out)
Provide wrappers to std::move which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1861
llvm::symbolize::MarkupNode::Tag
StringRef Tag
If this represents an element, the tag. Otherwise, empty.
Definition: Markup.h:39
llvm::symbolize::MarkupParser::nextNode
Optional< MarkupNode > nextNode()
Returns the next node in the input sequence.
Definition: Markup.cpp:45
llvm::StringSet
StringSet - A wrapper for StringMap that provides set-like functionality.
Definition: StringSet.h:23
llvm::StringRef
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:50
llvm::append_range
void append_range(Container &C, Range &&R)
Wrapper function to append a range to a container.
Definition: STLExtras.h:2013
Node
Definition: ItaniumDemangle.h:156
llvm::StringRef::size
constexpr size_t size() const
size - Get the string size.
Definition: StringRef.h:137
std
Definition: BitVector.h:851
llvm::symbolize::MarkupNode::Text
StringRef Text
The full text of this node in the input.
Definition: Markup.h:36
llvm::StringRef::begin
iterator begin() const
Definition: StringRef.h:111
llvm::StringRef::split
std::pair< StringRef, StringRef > split(char Separator) const
Split into two substrings around the first occurrence of a separator character.
Definition: StringRef.h:693
llvm::symbolize::MarkupNode
A node of symbolizer markup.
Definition: Markup.h:34
llvm::StringRef::drop_front
StringRef drop_front(size_t N=1) const
Return a StringRef equal to 'this' but with the first N elements dropped.
Definition: StringRef.h:602
llvm::StringRef::drop_back
StringRef drop_back(size_t N=1) const
Return a StringRef equal to 'this' but with the last N elements dropped.
Definition: StringRef.h:609