LLVM 23.0.0git
SCCIterator.h
Go to the documentation of this file.
1//===- ADT/SCCIterator.h - Strongly Connected Comp. Iter. -------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8/// \file
9///
10/// This builds on the llvm/ADT/GraphTraits.h file to find the strongly
11/// connected components (SCCs) of a graph in O(N+E) time using Tarjan's DFS
12/// algorithm.
13///
14/// The SCC iterator has the important property that if a node in SCC S1 has an
15/// edge to a node in SCC S2, then it visits S1 *after* S2.
16///
17/// To visit S1 *before* S2, use the scc_iterator on the Inverse graph. (NOTE:
18/// This requires some simple wrappers and is not supported yet.)
19///
20//===----------------------------------------------------------------------===//
21
22#ifndef LLVM_ADT_SCCITERATOR_H
23#define LLVM_ADT_SCCITERATOR_H
24
25#include "llvm/ADT/DenseMap.h"
26#include "llvm/ADT/DenseSet.h"
28#include "llvm/ADT/iterator.h"
29#include <cassert>
30#include <cstddef>
31#include <iterator>
32#include <queue>
33#include <set>
34#include <unordered_map>
35#include <unordered_set>
36#include <vector>
37
38namespace llvm {
39
40/// Enumerate the SCCs of a directed graph in reverse topological order
41/// of the SCC DAG.
42///
43/// This is implemented using Tarjan's DFS algorithm using an internal stack to
44/// build up a vector of nodes in a particular SCC. Note that it is a forward
45/// iterator and thus you cannot backtrack or re-visit nodes.
46template <class GraphT, class GT = GraphTraits<GraphT>>
47class scc_iterator : public iterator_facade_base<
48 scc_iterator<GraphT, GT>, std::forward_iterator_tag,
49 const std::vector<typename GT::NodeRef>, ptrdiff_t> {
50 using NodeRef = typename GT::NodeRef;
51 using ChildItTy = typename GT::ChildIteratorType;
52 using SccTy = std::vector<NodeRef>;
53 using reference = typename scc_iterator::reference;
54
55 /// Element of VisitStack during DFS.
56 struct StackElement {
57 NodeRef Node; ///< The current node pointer.
58 ChildItTy NextChild; ///< The next child, modified inplace during DFS.
59 unsigned MinVisited; ///< Minimum uplink value of all children of Node.
60
61 StackElement(NodeRef Node, const ChildItTy &Child, unsigned Min)
62 : Node(Node), NextChild(Child), MinVisited(Min) {}
63
64 bool operator==(const StackElement &Other) const {
65 return Node == Other.Node &&
66 NextChild == Other.NextChild &&
67 MinVisited == Other.MinVisited;
68 }
69 };
70
71 /// The visit counters used to detect when a complete SCC is on the stack.
72 /// visitNum is the global counter.
73 ///
74 /// nodeVisitNumbers are per-node visit numbers, also used as DFS flags.
75 unsigned visitNum;
76 DenseMap<NodeRef, unsigned> nodeVisitNumbers;
77
78 /// Stack holding nodes of the SCC.
79 std::vector<NodeRef> SCCNodeStack;
80
81 /// The current SCC, retrieved using operator*().
82 SccTy CurrentSCC;
83
84 /// DFS stack, Used to maintain the ordering. The top contains the current
85 /// node, the next child to visit, and the minimum uplink value of all child
86 std::vector<StackElement> VisitStack;
87
88 /// A single "visit" within the non-recursive DFS traversal.
89 void DFSVisitOne(NodeRef N);
90
91 /// The stack-based DFS traversal; defined below.
92 void DFSVisitChildren();
93
94 /// Compute the next SCC using the DFS traversal.
95 void GetNextSCC();
96
97 scc_iterator(NodeRef entryN) : visitNum(0) {
98 DFSVisitOne(entryN);
99 GetNextSCC();
100 }
101
102 /// End is when the DFS stack is empty.
103 scc_iterator() = default;
104
105public:
106 static scc_iterator begin(const GraphT &G) {
107 return scc_iterator(GT::getEntryNode(G));
108 }
109 static scc_iterator end(const GraphT &) { return scc_iterator(); }
110
111 /// Direct loop termination test which is more efficient than
112 /// comparison with \c end().
113 bool isAtEnd() const {
114 assert(!CurrentSCC.empty() || VisitStack.empty());
115 return CurrentSCC.empty();
116 }
117
118 bool operator==(const scc_iterator &x) const {
119 return VisitStack == x.VisitStack && CurrentSCC == x.CurrentSCC;
120 }
121
122 scc_iterator &operator++() {
123 GetNextSCC();
124 return *this;
125 }
126
127 reference operator*() const {
128 assert(!CurrentSCC.empty() && "Dereferencing END SCC iterator!");
129 return CurrentSCC;
130 }
131
132 /// Test if the current SCC has a cycle.
133 ///
134 /// If the SCC has more than one node, this is trivially true. If not, it may
135 /// still contain a cycle if the node has an edge back to itself.
136 bool hasCycle() const;
137
138 /// This informs the \c scc_iterator that the specified \c Old node
139 /// has been deleted, and \c New is to be used in its place.
140 void ReplaceNode(NodeRef Old, NodeRef New) {
141 assert(nodeVisitNumbers.count(Old) && "Old not in scc_iterator?");
142 // Do the assignment in two steps, in case 'New' is not yet in the map, and
143 // inserting it causes the map to grow.
144 auto tempVal = nodeVisitNumbers[Old];
145 nodeVisitNumbers[New] = tempVal;
146 nodeVisitNumbers.erase(Old);
147 }
148};
149
150template <class GraphT, class GT>
151void scc_iterator<GraphT, GT>::DFSVisitOne(NodeRef N) {
152 ++visitNum;
153 nodeVisitNumbers[N] = visitNum;
154 SCCNodeStack.push_back(N);
155 VisitStack.push_back(StackElement(N, GT::child_begin(N), visitNum));
156#if 0 // Enable if needed when debugging.
157 dbgs() << "TarjanSCC: Node " << N <<
158 " : visitNum = " << visitNum << "\n";
159#endif
160}
161
162template <class GraphT, class GT>
163void scc_iterator<GraphT, GT>::DFSVisitChildren() {
164 assert(!VisitStack.empty());
165 while (VisitStack.back().NextChild != GT::child_end(VisitStack.back().Node)) {
166 // TOS has at least one more child so continue DFS
167 NodeRef childN = *VisitStack.back().NextChild++;
168 auto Visited = nodeVisitNumbers.find(childN);
169 if (Visited == nodeVisitNumbers.end()) {
170 // this node has never been seen.
171 DFSVisitOne(childN);
172 continue;
173 }
174
175 unsigned childNum = Visited->second;
176 if (VisitStack.back().MinVisited > childNum)
177 VisitStack.back().MinVisited = childNum;
178 }
179}
180
181template <class GraphT, class GT> void scc_iterator<GraphT, GT>::GetNextSCC() {
182 CurrentSCC.clear(); // Prepare to compute the next SCC
183 while (!VisitStack.empty()) {
184 DFSVisitChildren();
185
186 // Pop the leaf on top of the VisitStack.
187 NodeRef visitingN = VisitStack.back().Node;
188 unsigned minVisitNum = VisitStack.back().MinVisited;
189 assert(VisitStack.back().NextChild == GT::child_end(visitingN));
190 VisitStack.pop_back();
191
192 // Propagate MinVisitNum to parent so we can detect the SCC starting node.
193 if (!VisitStack.empty() && VisitStack.back().MinVisited > minVisitNum)
194 VisitStack.back().MinVisited = minVisitNum;
195
196#if 0 // Enable if needed when debugging.
197 dbgs() << "TarjanSCC: Popped node " << visitingN <<
198 " : minVisitNum = " << minVisitNum << "; Node visit num = " <<
199 nodeVisitNumbers[visitingN] << "\n";
200#endif
201
202 if (minVisitNum != nodeVisitNumbers[visitingN])
203 continue;
204
205 // A full SCC is on the SCCNodeStack! It includes all nodes below
206 // visitingN on the stack. Copy those nodes to CurrentSCC,
207 // reset their minVisit values, and return (this suspends
208 // the DFS traversal till the next ++).
209 do {
210 CurrentSCC.push_back(SCCNodeStack.back());
211 SCCNodeStack.pop_back();
212 nodeVisitNumbers[CurrentSCC.back()] = ~0U;
213 } while (CurrentSCC.back() != visitingN);
214 return;
215 }
216}
217
218template <class GraphT, class GT>
220 assert(!CurrentSCC.empty() && "Dereferencing END SCC iterator!");
221 if (CurrentSCC.size() > 1)
222 return true;
223 NodeRef N = CurrentSCC.front();
224 for (ChildItTy CI = GT::child_begin(N), CE = GT::child_end(N); CI != CE;
225 ++CI)
226 if (*CI == N)
227 return true;
228 return false;
229 }
230
231/// Construct the begin iterator for a deduced graph type T.
232template <class T> scc_iterator<T> scc_begin(const T &G) {
234}
235
236/// Construct the end iterator for a deduced graph type T.
237template <class T> scc_iterator<T> scc_end(const T &G) {
238 return scc_iterator<T>::end(G);
239}
240
241/// Sort the nodes of a directed SCC in the decreasing order of the edge
242/// weights. The instantiating GraphT type should have weighted edge type
243/// declared in its graph traits in order to use this iterator.
244///
245/// This is implemented using Kruskal's minimal spanning tree algorithm followed
246/// by Kahn's algorithm to compute a topological order on the MST. First a
247/// maximum spanning tree (forest) is built based on all edges within the SCC
248/// collection. Then a topological walk is initiated on tree nodes that do not
249/// have a predecessor and then applied to all nodes of the SCC. Such order
250/// ensures that high-weighted edges are visited first during the traversal.
251template <class GraphT, class GT = GraphTraits<GraphT>>
253 using NodeType = typename GT::NodeType;
254 using EdgeType = typename GT::EdgeType;
255 using NodesType = std::vector<NodeType *>;
256
257 // Auxilary node information used during the MST calculation.
258 struct NodeInfo {
259 NodeInfo *Group = this;
260 uint32_t Rank = 0;
261 bool Visited = false;
262 DenseSet<const EdgeType *> IncomingMSTEdges;
263 };
264
265 // Find the root group of the node and compress the path from node to the
266 // root.
267 NodeInfo *find(NodeInfo *Node) {
268 if (Node->Group != Node)
269 Node->Group = find(Node->Group);
270 return Node->Group;
271 }
272
273 // Union the source and target node into the same group and return true.
274 // Returns false if they are already in the same group.
275 bool unionGroups(const EdgeType *Edge) {
276 NodeInfo *G1 = find(&NodeInfoMap[Edge->Source]);
277 NodeInfo *G2 = find(&NodeInfoMap[Edge->Target]);
278
279 // If the edge forms a cycle, do not add it to MST
280 if (G1 == G2)
281 return false;
282
283 // Make the smaller rank tree a direct child of high rank tree.
284 if (G1->Rank < G2->Rank)
285 G1->Group = G2;
286 else {
287 G2->Group = G1;
288 // If the ranks are the same, increment root of one tree by one.
289 if (G1->Rank == G2->Rank)
290 G1->Rank++;
291 }
292 return true;
293 }
294
295 std::unordered_map<NodeType *, NodeInfo> NodeInfoMap;
296 NodesType Nodes;
297
298public:
299 scc_member_iterator(const NodesType &InputNodes);
300
301 NodesType &operator*() { return Nodes; }
302};
303
304template <class GraphT, class GT>
306 const NodesType &InputNodes) {
307 if (InputNodes.size() <= 1) {
308 Nodes = InputNodes;
309 return;
310 }
311
312 // Initialize auxilary node information.
313 NodeInfoMap.clear();
314 for (auto *Node : InputNodes) {
315 // Construct a `NodeInfo` object in place. `insert()` would involve a copy
316 // construction, invalidating the initial value of the `Group` field, which
317 // should be `this`.
318 NodeInfoMap.try_emplace(Node);
319 }
320
321 // Sort edges by weights.
322 struct EdgeComparer {
323 bool operator()(const EdgeType *L, const EdgeType *R) const {
324 return L->Weight > R->Weight;
325 }
326 };
327
328 std::multiset<const EdgeType *, EdgeComparer> SortedEdges;
329 for (auto *Node : InputNodes) {
330 for (auto &Edge : Node->Edges) {
331 if (NodeInfoMap.count(Edge.Target))
332 SortedEdges.insert(&Edge);
333 }
334 }
335
336 // Traverse all the edges and compute the Maximum Weight Spanning Tree
337 // using Kruskal's algorithm.
338 std::unordered_set<const EdgeType *> MSTEdges;
339 for (auto *Edge : SortedEdges) {
340 if (unionGroups(Edge))
341 MSTEdges.insert(Edge);
342 }
343
344 // Run Kahn's algorithm on MST to compute a topological traversal order.
345 // The algorithm starts from nodes that have no incoming edge. These nodes are
346 // "roots" of the MST forest. This ensures that nodes are visited before their
347 // descendants are, thus ensures hot edges are processed before cold edges,
348 // based on how MST is computed.
349 std::queue<NodeType *> Queue;
350 for (const auto *Edge : MSTEdges)
351 NodeInfoMap[Edge->Target].IncomingMSTEdges.insert(Edge);
352
353 // Walk through SortedEdges to initialize the queue, instead of using NodeInfoMap
354 // to ensure an ordered deterministic push.
355 for (auto *Edge : SortedEdges) {
356 auto &Info = NodeInfoMap[Edge->Source];
357 if (!Info.Visited && Info.IncomingMSTEdges.empty()) {
358 Queue.push(Edge->Source);
359 Info.Visited = true;
360 }
361 }
362
363 while (!Queue.empty()) {
364 auto *Node = Queue.front();
365 Queue.pop();
366 Nodes.push_back(Node);
367 for (auto &Edge : Node->Edges) {
368 NodeInfo &Info = NodeInfoMap[Edge.Target];
369 Info.IncomingMSTEdges.erase(&Edge);
370 if (MSTEdges.count(&Edge) && Info.IncomingMSTEdges.empty()) {
371 Queue.push(Edge.Target);
372 }
373 }
374 }
375
376 assert(InputNodes.size() == Nodes.size() && "missing nodes in MST");
377 std::reverse(Nodes.begin(), Nodes.end());
378}
379} // end namespace llvm
380
381#endif // LLVM_ADT_SCCITERATOR_H
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
This file defines the DenseMap class.
This file defines the DenseSet and SmallDenseSet classes.
This file defines the little GraphTraits<X> template class that should be specialized by classes that...
#define G(x, y, z)
Definition MD5.cpp:55
#define T
Implements a dense probed hash-table based set.
Definition DenseSet.h:279
CRTP base class which implements the entire standard iterator facade in terms of a minimal subset of ...
Definition iterator.h:80
Enumerate the SCCs of a directed graph in reverse topological order of the SCC DAG.
Definition SCCIterator.h:49
scc_iterator & operator++()
static scc_iterator begin(const GraphT &G)
bool isAtEnd() const
Direct loop termination test which is more efficient than comparison with end().
static scc_iterator end(const GraphT &)
bool operator==(const scc_iterator &x) const
void ReplaceNode(NodeRef Old, NodeRef New)
This informs the scc_iterator that the specified Old node has been deleted, and New is to be used in ...
bool hasCycle() const
Test if the current SCC has a cycle.
reference operator*() const
scc_member_iterator(const NodesType &InputNodes)
std::pair< NodeId, LaneBitmask > NodeRef
Definition RDFLiveness.h:36
This is an optimization pass for GlobalISel generic memory operations.
scc_iterator< T > scc_begin(const T &G)
Construct the begin iterator for a deduced graph type T.
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition Debug.cpp:209
@ Other
Any other memory.
Definition ModRef.h:68
scc_iterator< T > scc_end(const T &G)
Construct the end iterator for a deduced graph type T.
#define N