LLVM 20.0.0git
Automaton.h
Go to the documentation of this file.
1//===-- Automaton.h - Support for driving TableGen-produced DFAs ----------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file implements class that drive and introspect deterministic finite-
10// state automata (DFAs) as generated by TableGen's -gen-automata backend.
11//
12// For a description of how to define an automaton, see
13// include/llvm/TableGen/Automaton.td.
14//
15// One important detail is that these deterministic automata are created from
16// (potentially) nondeterministic definitions. Therefore a unique sequence of
17// input symbols will produce one path through the DFA but multiple paths
18// through the original NFA. An automaton by default only returns "accepted" or
19// "not accepted", but frequently we want to analyze what NFA path was taken.
20// Finding a path through the NFA states that results in a DFA state can help
21// answer *what* the solution to a problem was, not just that there exists a
22// solution.
23//
24//===----------------------------------------------------------------------===//
25
26#ifndef LLVM_SUPPORT_AUTOMATON_H
27#define LLVM_SUPPORT_AUTOMATON_H
28
29#include "llvm/ADT/ArrayRef.h"
30#include "llvm/ADT/DenseMap.h"
33#include <deque>
34#include <map>
35#include <memory>
36
37namespace llvm {
38
40
41/// Forward define the pair type used by the automata transition info tables.
42///
43/// Experimental results with large tables have shown a significant (multiple
44/// orders of magnitude) parsing speedup by using a custom struct here with a
45/// trivial constructor rather than std::pair<uint64_t, uint64_t>.
48
49 bool operator<(const NfaStatePair &Other) const {
50 return std::make_tuple(FromDfaState, ToDfaState) <
51 std::make_tuple(Other.FromDfaState, Other.ToDfaState);
52 }
53};
54
55namespace internal {
56/// The internal class that maintains all possible paths through an NFA based
57/// on a path through the DFA.
59private:
60 /// Cached transition table. This is a table of NfaStatePairs that contains
61 /// zero-terminated sequences pointed to by DFA transitions.
62 ArrayRef<NfaStatePair> TransitionInfo;
63
64 /// A simple linked-list of traversed states that can have a shared tail. The
65 /// traversed path is stored in reverse order with the latest state as the
66 /// head.
67 struct PathSegment {
68 uint64_t State;
69 PathSegment *Tail;
70 };
71
72 /// We allocate segment objects frequently. Allocate them upfront and dispose
73 /// at the end of a traversal rather than hammering the system allocator.
75
76 /// Heads of each tracked path. These are not ordered.
77 std::deque<PathSegment *> Heads;
78
79 /// The returned paths. This is populated during getPaths.
81
82 /// Create a new segment and return it.
83 PathSegment *makePathSegment(uint64_t State, PathSegment *Tail) {
84 PathSegment *P = Allocator.Allocate();
85 *P = {State, Tail};
86 return P;
87 }
88
89 /// Pairs defines a sequence of possible NFA transitions for a single DFA
90 /// transition.
91 void transition(ArrayRef<NfaStatePair> Pairs) {
92 // Iterate over all existing heads. We will mutate the Heads deque during
93 // iteration.
94 unsigned NumHeads = Heads.size();
95 for (unsigned I = 0; I < NumHeads; ++I) {
96 PathSegment *Head = Heads[I];
97 // The sequence of pairs is sorted. Select the set of pairs that
98 // transition from the current head state.
99 auto PI = lower_bound(Pairs, NfaStatePair{Head->State, 0ULL});
100 auto PE = upper_bound(Pairs, NfaStatePair{Head->State, INT64_MAX});
101 // For every transition from the current head state, add a new path
102 // segment.
103 for (; PI != PE; ++PI)
104 if (PI->FromDfaState == Head->State)
105 Heads.push_back(makePathSegment(PI->ToDfaState, Head));
106 }
107 // Now we've iterated over all the initial heads and added new ones,
108 // dispose of the original heads.
109 Heads.erase(Heads.begin(), std::next(Heads.begin(), NumHeads));
110 }
111
112public:
114 : TransitionInfo(TransitionInfo) {
115 reset();
116 }
117
119 return TransitionInfo;
120 }
121
122 void reset() {
123 Paths.clear();
124 Heads.clear();
125 Allocator.DestroyAll();
126 // The initial NFA state is 0.
127 Heads.push_back(makePathSegment(0ULL, nullptr));
128 }
129
130 void transition(unsigned TransitionInfoIdx) {
131 unsigned EndIdx = TransitionInfoIdx;
132 while (TransitionInfo[EndIdx].ToDfaState != 0)
133 ++EndIdx;
134 ArrayRef<NfaStatePair> Pairs(&TransitionInfo[TransitionInfoIdx],
135 EndIdx - TransitionInfoIdx);
136 transition(Pairs);
137 }
138
140 Paths.clear();
141 for (auto *Head : Heads) {
142 NfaPath P;
143 while (Head->State != 0) {
144 P.push_back(Head->State);
145 Head = Head->Tail;
146 }
147 std::reverse(P.begin(), P.end());
148 Paths.push_back(std::move(P));
149 }
150 return Paths;
151 }
152};
153} // namespace internal
154
155/// A deterministic finite-state automaton. The automaton is defined in
156/// TableGen; this object drives an automaton defined by tblgen-emitted tables.
157///
158/// An automaton accepts a sequence of input tokens ("actions"). This class is
159/// templated on the type of these actions.
160template <typename ActionT> class Automaton {
161 /// Map from {State, Action} to {NewState, TransitionInfoIdx}.
162 /// TransitionInfoIdx is used by the DfaTranscriber to analyze the transition.
163 /// FIXME: This uses a std::map because ActionT can be a pair type including
164 /// an enum. In particular DenseMapInfo<ActionT> must be defined to use
165 /// DenseMap here.
166 /// This is a shared_ptr to allow very quick copy-construction of Automata; this
167 /// state is immutable after construction so this is safe.
168 using MapTy = std::map<std::pair<uint64_t, ActionT>, std::pair<uint64_t, unsigned>>;
169 std::shared_ptr<MapTy> M;
170 /// An optional transcription object. This uses much more state than simply
171 /// traversing the DFA for acceptance, so is heap allocated.
172 std::shared_ptr<internal::NfaTranscriber> Transcriber;
173 /// The initial DFA state is 1.
174 uint64_t State = 1;
175 /// True if we should transcribe and false if not (even if Transcriber is defined).
176 bool Transcribe;
177
178public:
179 /// Create an automaton.
180 /// \param Transitions The Transitions table as created by TableGen. Note that
181 /// because the action type differs per automaton, the
182 /// table type is templated as ArrayRef<InfoT>.
183 /// \param TranscriptionTable The TransitionInfo table as created by TableGen.
184 ///
185 /// Providing the TranscriptionTable argument as non-empty will enable the
186 /// use of transcription, which analyzes the possible paths in the original
187 /// NFA taken by the DFA. NOTE: This is substantially more work than simply
188 /// driving the DFA, so unless you require the getPaths() method leave this
189 /// empty.
190 template <typename InfoT>
192 ArrayRef<NfaStatePair> TranscriptionTable = {}) {
193 if (!TranscriptionTable.empty())
194 Transcriber =
195 std::make_shared<internal::NfaTranscriber>(TranscriptionTable);
196 Transcribe = Transcriber != nullptr;
197 M = std::make_shared<MapTy>();
198 for (const auto &I : Transitions)
199 // Greedily read and cache the transition table.
200 M->emplace(std::make_pair(I.FromDfaState, I.Action),
201 std::make_pair(I.ToDfaState, I.InfoIdx));
202 }
204 : M(Other.M), State(Other.State), Transcribe(Other.Transcribe) {
205 // Transcriber is not thread-safe, so create a new instance on copy.
206 if (Other.Transcriber)
207 Transcriber = std::make_shared<internal::NfaTranscriber>(
208 Other.Transcriber->getTransitionInfo());
209 }
210
211 /// Reset the automaton to its initial state.
212 void reset() {
213 State = 1;
214 if (Transcriber)
215 Transcriber->reset();
216 }
217
218 /// Enable or disable transcription. Transcription is only available if
219 /// TranscriptionTable was provided to the constructor.
220 void enableTranscription(bool Enable = true) {
221 assert(Transcriber &&
222 "Transcription is only available if TranscriptionTable was provided "
223 "to the Automaton constructor");
224 Transcribe = Enable;
225 }
226
227 /// Transition the automaton based on input symbol A. Return true if the
228 /// automaton transitioned to a valid state, false if the automaton
229 /// transitioned to an invalid state.
230 ///
231 /// If this function returns false, all methods are undefined until reset() is
232 /// called.
233 bool add(const ActionT &A) {
234 auto I = M->find({State, A});
235 if (I == M->end())
236 return false;
237 if (Transcriber && Transcribe)
238 Transcriber->transition(I->second.second);
239 State = I->second.first;
240 return true;
241 }
242
243 /// Return true if the automaton can be transitioned based on input symbol A.
244 bool canAdd(const ActionT &A) {
245 auto I = M->find({State, A});
246 return I != M->end();
247 }
248
249 /// Obtain a set of possible paths through the input nondeterministic
250 /// automaton that could be obtained from the sequence of input actions
251 /// presented to this deterministic automaton.
253 assert(Transcriber && Transcribe &&
254 "Can only obtain NFA paths if transcribing!");
255 return Transcriber->getPaths();
256 }
257};
258
259} // namespace llvm
260
261#endif // LLVM_SUPPORT_AUTOMATON_H
This file defines the BumpPtrAllocator interface.
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
This file defines the DenseMap class.
#define I(x, y, z)
Definition: MD5.cpp:58
#define P(N)
Basic Register Allocator
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
This file defines the SmallVector class.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: ArrayRef.h:41
A deterministic finite-state automaton.
Definition: Automaton.h:160
void reset()
Reset the automaton to its initial state.
Definition: Automaton.h:212
bool canAdd(const ActionT &A)
Return true if the automaton can be transitioned based on input symbol A.
Definition: Automaton.h:244
Automaton(const Automaton &Other)
Definition: Automaton.h:203
bool add(const ActionT &A)
Transition the automaton based on input symbol A.
Definition: Automaton.h:233
void enableTranscription(bool Enable=true)
Enable or disable transcription.
Definition: Automaton.h:220
ArrayRef< NfaPath > getNfaPaths()
Obtain a set of possible paths through the input nondeterministic automaton that could be obtained fr...
Definition: Automaton.h:252
Automaton(ArrayRef< InfoT > Transitions, ArrayRef< NfaStatePair > TranscriptionTable={})
Create an automaton.
Definition: Automaton.h:191
void push_back(const T &Elt)
Definition: SmallVector.h:426
A BumpPtrAllocator that allows only elements of a specific type to be allocated.
Definition: Allocator.h:389
The internal class that maintains all possible paths through an NFA based on a path through the DFA.
Definition: Automaton.h:58
NfaTranscriber(ArrayRef< NfaStatePair > TransitionInfo)
Definition: Automaton.h:113
ArrayRef< NfaPath > getPaths()
Definition: Automaton.h:139
void transition(unsigned TransitionInfoIdx)
Definition: Automaton.h:130
ArrayRef< NfaStatePair > getTransitionInfo() const
Definition: Automaton.h:118
#define INT64_MAX
Definition: DataTypes.h:71
@ Tail
Attemps to make calls as fast as possible while guaranteeing that tail call optimization can always b...
Definition: CallingConv.h:76
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
auto upper_bound(R &&Range, T &&Value)
Provide wrappers to std::upper_bound which take ranges instead of having to pass begin/end explicitly...
Definition: STLExtras.h:1974
@ Other
Any other memory.
auto lower_bound(R &&Range, T &&Value)
Provide wrappers to std::lower_bound which take ranges instead of having to pass begin/end explicitly...
Definition: STLExtras.h:1961
@ Enable
Enable colors.
Forward define the pair type used by the automata transition info tables.
Definition: Automaton.h:46
bool operator<(const NfaStatePair &Other) const
Definition: Automaton.h:49
uint64_t FromDfaState
Definition: Automaton.h:47
uint64_t ToDfaState
Definition: Automaton.h:47