LLVM 22.0.0git
Markup.h
Go to the documentation of this file.
1//===- Markup.h -------------------------------------------------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8///
9/// \file
10/// This file declares the log symbolizer markup data model and parser.
11///
12/// See https://llvm.org/docs/SymbolizerMarkupFormat.html
13///
14//===----------------------------------------------------------------------===//
15
16#ifndef LLVM_DEBUGINFO_SYMBOLIZE_MARKUP_H
17#define LLVM_DEBUGINFO_SYMBOLIZE_MARKUP_H
18
20#include "llvm/ADT/StringRef.h"
21#include "llvm/ADT/StringSet.h"
23#include "llvm/Support/Regex.h"
24
25namespace llvm {
26namespace symbolize {
27
28/// A node of symbolizer markup.
29///
30/// If only the Text field is set, this represents a region of text outside a
31/// markup element. ANSI SGR control codes are also reported this way; if
32/// detected, then the control code will be the entirety of the Text field, and
33/// any surrounding text will be reported as preceding and following nodes.
34struct MarkupNode {
35 /// The full text of this node in the input.
37
38 /// If this represents an element, the tag. Otherwise, empty.
40
41 /// If this represents an element with fields, a list of the field contents.
42 /// Otherwise, empty.
44
45 bool operator==(const MarkupNode &Other) const {
46 return Text == Other.Text && Tag == Other.Tag && Fields == Other.Fields;
47 }
48 bool operator!=(const MarkupNode &Other) const { return !(*this == Other); }
49};
50
51/// Parses a log containing symbolizer markup into a sequence of nodes.
53public:
54 LLVM_ABI MarkupParser(StringSet<> MultilineTags = {});
55
56 /// Parses an individual \p Line of input.
57 ///
58 /// Nodes from the previous parseLine() call that haven't yet been extracted
59 /// by nextNode() are discarded. The nodes returned by nextNode() may
60 /// reference the input string, so it must be retained by the caller until the
61 /// last use.
62 ///
63 /// Note that some elements may span multiple lines. If a line ends with the
64 /// start of one of these elements, then no nodes will be produced until the
65 /// either the end or something that cannot be part of an element is
66 /// encountered. This may only occur after multiple calls to parseLine(),
67 /// corresponding to the lines of the multi-line element.
68 LLVM_ABI void parseLine(StringRef Line);
69
70 /// Inform the parser of that the input stream has ended.
71 ///
72 /// This allows the parser to finish any deferred processing (e.g., an
73 /// in-progress multi-line element) and may cause nextNode() to return
74 /// additional nodes.
75 LLVM_ABI void flush();
76
77 /// Returns the next node in the input sequence.
78 ///
79 /// Calling nextNode() may invalidate the contents of the node returned by the
80 /// previous call.
81 ///
82 /// \returns the next markup node or std::nullopt if none remain.
83 LLVM_ABI std::optional<MarkupNode> nextNode();
84
85 bool isSGR(const MarkupNode &Node) const {
86 return SGRSyntax.match(Node.Text);
87 }
88
89private:
90 std::optional<MarkupNode> parseElement(StringRef Line);
91 void parseTextOutsideMarkup(StringRef Text);
92 std::optional<StringRef> parseMultiLineBegin(StringRef Line);
93 std::optional<StringRef> parseMultiLineEnd(StringRef Line);
94
95 // Tags of elements that can span multiple lines.
96 const StringSet<> MultilineTags;
97
98 // Contents of a multi-line element that has finished being parsed. Retained
99 // to keep returned StringRefs for the contents valid.
100 std::string FinishedMultiline;
101
102 // Contents of a multi-line element that is still in the process of receiving
103 // lines.
104 std::string InProgressMultiline;
105
106 // The line currently being parsed.
107 StringRef Line;
108
109 // Buffer for nodes parsed from the current line.
111
112 // Next buffer index to return.
113 size_t NextIdx;
114
115 // Regular expression matching supported ANSI SGR escape sequences.
116 const Regex SGRSyntax;
117};
118
119} // end namespace symbolize
120} // end namespace llvm
121
122#endif // LLVM_DEBUGINFO_SYMBOLIZE_MARKUP_H
#define LLVM_ABI
Definition Compiler.h:213
This file defines the SmallVector class.
StringSet - A set-like wrapper for the StringMap.
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
StringRef - Represent a constant reference to a string, i.e.
Definition StringRef.h:55
StringSet - A wrapper for StringMap that provides set-like functionality.
Definition StringSet.h:25
LLVM_ABI std::optional< MarkupNode > nextNode()
Returns the next node in the input sequence.
Definition Markup.cpp:44
LLVM_ABI void flush()
Inform the parser of that the input stream has ended.
Definition Markup.cpp:101
bool isSGR(const MarkupNode &Node) const
Definition Markup.h:85
LLVM_ABI MarkupParser(StringSet<> MultilineTags={})
Definition Markup.cpp:27
LLVM_ABI void parseLine(StringRef Line)
Parses an individual Line of input.
Definition Markup.cpp:37
This is an optimization pass for GlobalISel generic memory operations.
@ Other
Any other memory.
Definition ModRef.h:68
A node of symbolizer markup.
Definition Markup.h:34
StringRef Text
The full text of this node in the input.
Definition Markup.h:36
bool operator!=(const MarkupNode &Other) const
Definition Markup.h:48
SmallVector< StringRef > Fields
If this represents an element with fields, a list of the field contents.
Definition Markup.h:43
StringRef Tag
If this represents an element, the tag. Otherwise, empty.
Definition Markup.h:39
bool operator==(const MarkupNode &Other) const
Definition Markup.h:45