LLVM 20.0.0git
DependenceGraphBuilder.cpp
Go to the documentation of this file.
1//===- DependenceGraphBuilder.cpp ------------------------------------------==//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8// This file implements common steps of the build algorithm for construction
9// of dependence graphs such as DDG and PDG.
10//===----------------------------------------------------------------------===//
11
17#include "llvm/ADT/Statistic.h"
18#include "llvm/Analysis/DDG.h"
19
20using namespace llvm;
21
22#define DEBUG_TYPE "dgb"
23
24STATISTIC(TotalGraphs, "Number of dependence graphs created.");
25STATISTIC(TotalDefUseEdges, "Number of def-use edges created.");
26STATISTIC(TotalMemoryEdges, "Number of memory dependence edges created.");
27STATISTIC(TotalFineGrainedNodes, "Number of fine-grained nodes created.");
28STATISTIC(TotalPiBlockNodes, "Number of pi-block nodes created.");
29STATISTIC(TotalConfusedEdges,
30 "Number of confused memory dependencies between two nodes.");
31STATISTIC(TotalEdgeReversals,
32 "Number of times the source and sink of dependence was reversed to "
33 "expose cycles in the graph.");
34
36
37//===--------------------------------------------------------------------===//
38// AbstractDependenceGraphBuilder implementation
39//===--------------------------------------------------------------------===//
40
41template <class G>
43 // The BBList is expected to be in program order.
44 size_t NextOrdinal = 1;
45 for (auto *BB : BBList)
46 for (auto &I : *BB)
47 InstOrdinalMap.insert(std::make_pair(&I, NextOrdinal++));
48}
49
50template <class G>
52 ++TotalGraphs;
53 assert(IMap.empty() && "Expected empty instruction map at start");
54 for (BasicBlock *BB : BBList)
55 for (Instruction &I : *BB) {
56 auto &NewNode = createFineGrainedNode(I);
57 IMap.insert(std::make_pair(&I, &NewNode));
58 NodeOrdinalMap.insert(std::make_pair(&NewNode, getOrdinal(I)));
59 ++TotalFineGrainedNodes;
60 }
61}
62
63template <class G>
65 // Create a root node that connects to every connected component of the graph.
66 // This is done to allow graph iterators to visit all the disjoint components
67 // of the graph, in a single walk.
68 //
69 // This algorithm works by going through each node of the graph and for each
70 // node N, do a DFS starting from N. A rooted edge is established between the
71 // root node and N (if N is not yet visited). All the nodes reachable from N
72 // are marked as visited and are skipped in the DFS of subsequent nodes.
73 //
74 // Note: This algorithm tries to limit the number of edges out of the root
75 // node to some extent, but there may be redundant edges created depending on
76 // the iteration order. For example for a graph {A -> B}, an edge from the
77 // root node is added to both nodes if B is visited before A. While it does
78 // not result in minimal number of edges, this approach saves compile-time
79 // while keeping the number of edges in check.
80 auto &RootNode = createRootNode();
82 for (auto *N : Graph) {
83 if (*N == RootNode)
84 continue;
85 for (auto I : depth_first_ext(N, Visited))
86 if (I == N)
87 createRootedEdge(RootNode, *N);
88 }
89}
92 if (!shouldCreatePiBlocks())
93 return;
94
95 LLVM_DEBUG(dbgs() << "==== Start of Creation of Pi-Blocks ===\n");
96
97 // The overall algorithm is as follows:
98 // 1. Identify SCCs and for each SCC create a pi-block node containing all
99 // the nodes in that SCC.
100 // 2. Identify incoming edges incident to the nodes inside of the SCC and
101 // reconnect them to the pi-block node.
102 // 3. Identify outgoing edges from the nodes inside of the SCC to nodes
103 // outside of it and reconnect them so that the edges are coming out of the
104 // SCC node instead.
105
106 // Adding nodes as we iterate through the SCCs cause the SCC
107 // iterators to get invalidated. To prevent this invalidation, we first
108 // collect a list of nodes that are part of an SCC, and then iterate over
109 // those lists to create the pi-block nodes. Each element of the list is a
110 // list of nodes in an SCC. Note: trivial SCCs containing a single node are
111 // ignored.
113 for (auto &SCC : make_range(scc_begin(&Graph), scc_end(&Graph))) {
114 if (SCC.size() > 1)
115 ListOfSCCs.emplace_back(SCC.begin(), SCC.end());
116 }
117
118 for (NodeListType &NL : ListOfSCCs) {
119 LLVM_DEBUG(dbgs() << "Creating pi-block node with " << NL.size()
120 << " nodes in it.\n");
121
122 // SCC iterator may put the nodes in an order that's different from the
123 // program order. To preserve original program order, we sort the list of
124 // nodes based on ordinal numbers computed earlier.
125 llvm::sort(NL, [&](NodeType *LHS, NodeType *RHS) {
126 return getOrdinal(*LHS) < getOrdinal(*RHS);
127 });
128
129 NodeType &PiNode = createPiBlock(NL);
130 ++TotalPiBlockNodes;
131
132 // Build a set to speed up the lookup for edges whose targets
133 // are inside the SCC.
134 SmallPtrSet<NodeType *, 4> NodesInSCC(NL.begin(), NL.end());
135
136 // We have the set of nodes in the SCC. We go through the set of nodes
137 // that are outside of the SCC and look for edges that cross the two sets.
138 for (NodeType *N : Graph) {
139
140 // Skip the SCC node and all the nodes inside of it.
141 if (*N == PiNode || NodesInSCC.count(N))
142 continue;
143
144 enum Direction {
145 Incoming, // Incoming edges to the SCC
146 Outgoing, // Edges going ot of the SCC
147 DirectionCount // To make the enum usable as an array index.
148 };
149
150 // Use these flags to help us avoid creating redundant edges. If there
151 // are more than one edges from an outside node to inside nodes, we only
152 // keep one edge from that node to the pi-block node. Similarly, if
153 // there are more than one edges from inside nodes to an outside node,
154 // we only keep one edge from the pi-block node to the outside node.
155 // There is a flag defined for each direction (incoming vs outgoing) and
156 // for each type of edge supported, using a two-dimensional boolean
157 // array.
158 using EdgeKind = typename EdgeType::EdgeKind;
159 EnumeratedArray<bool, EdgeKind> EdgeAlreadyCreated[DirectionCount]{false,
160 false};
161
162 auto createEdgeOfKind = [this](NodeType &Src, NodeType &Dst,
163 const EdgeKind K) {
164 switch (K) {
165 case EdgeKind::RegisterDefUse:
166 createDefUseEdge(Src, Dst);
167 break;
168 case EdgeKind::MemoryDependence:
169 createMemoryEdge(Src, Dst);
170 break;
171 case EdgeKind::Rooted:
172 createRootedEdge(Src, Dst);
173 break;
174 default:
175 llvm_unreachable("Unsupported type of edge.");
176 }
177 };
178
179 auto reconnectEdges = [&](NodeType *Src, NodeType *Dst, NodeType *New,
180 const Direction Dir) {
181 if (!Src->hasEdgeTo(*Dst))
182 return;
184 dbgs() << "reconnecting("
185 << (Dir == Direction::Incoming ? "incoming)" : "outgoing)")
186 << ":\nSrc:" << *Src << "\nDst:" << *Dst << "\nNew:" << *New
187 << "\n");
188 assert((Dir == Direction::Incoming || Dir == Direction::Outgoing) &&
189 "Invalid direction.");
190
192 Src->findEdgesTo(*Dst, EL);
193 for (EdgeType *OldEdge : EL) {
194 EdgeKind Kind = OldEdge->getKind();
195 if (!EdgeAlreadyCreated[Dir][Kind]) {
196 if (Dir == Direction::Incoming) {
197 createEdgeOfKind(*Src, *New, Kind);
198 LLVM_DEBUG(dbgs() << "created edge from Src to New.\n");
199 } else if (Dir == Direction::Outgoing) {
200 createEdgeOfKind(*New, *Dst, Kind);
201 LLVM_DEBUG(dbgs() << "created edge from New to Dst.\n");
202 }
203 EdgeAlreadyCreated[Dir][Kind] = true;
204 }
205 Src->removeEdge(*OldEdge);
206 destroyEdge(*OldEdge);
207 LLVM_DEBUG(dbgs() << "removed old edge between Src and Dst.\n\n");
208 }
209 };
210
211 for (NodeType *SCCNode : NL) {
212 // Process incoming edges incident to the pi-block node.
213 reconnectEdges(N, SCCNode, &PiNode, Direction::Incoming);
214
215 // Process edges that are coming out of the pi-block node.
216 reconnectEdges(SCCNode, N, &PiNode, Direction::Outgoing);
217 }
218 }
219 }
220
221 // Ordinal maps are no longer needed.
222 InstOrdinalMap.clear();
223 NodeOrdinalMap.clear();
224
225 LLVM_DEBUG(dbgs() << "==== End of Creation of Pi-Blocks ===\n");
226}
227
229 for (NodeType *N : Graph) {
230 InstructionListType SrcIList;
231 N->collectInstructions([](const Instruction *I) { return true; }, SrcIList);
232
233 // Use a set to mark the targets that we link to N, so we don't add
234 // duplicate def-use edges when more than one instruction in a target node
235 // use results of instructions that are contained in N.
236 SmallPtrSet<NodeType *, 4> VisitedTargets;
237
238 for (Instruction *II : SrcIList) {
239 for (User *U : II->users()) {
240 Instruction *UI = dyn_cast<Instruction>(U);
241 if (!UI)
242 continue;
243 NodeType *DstNode = nullptr;
244 if (IMap.find(UI) != IMap.end())
245 DstNode = IMap.find(UI)->second;
246
247 // In the case of loops, the scope of the subgraph is all the
248 // basic blocks (and instructions within them) belonging to the loop. We
249 // simply ignore all the edges coming from (or going into) instructions
250 // or basic blocks outside of this range.
251 if (!DstNode) {
253 dbgs()
254 << "skipped def-use edge since the sink" << *UI
255 << " is outside the range of instructions being considered.\n");
256 continue;
257 }
258
259 // Self dependencies are ignored because they are redundant and
260 // uninteresting.
261 if (DstNode == N) {
263 << "skipped def-use edge since the sink and the source ("
264 << N << ") are the same.\n");
265 continue;
266 }
267
268 if (VisitedTargets.insert(DstNode).second) {
269 createDefUseEdge(*N, *DstNode);
270 ++TotalDefUseEdges;
271 }
272 }
273 }
274 }
275}
276
277template <class G>
279 using DGIterator = typename G::iterator;
280 auto isMemoryAccess = [](const Instruction *I) {
281 return I->mayReadOrWriteMemory();
282 };
283 for (DGIterator SrcIt = Graph.begin(), E = Graph.end(); SrcIt != E; ++SrcIt) {
284 InstructionListType SrcIList;
285 (*SrcIt)->collectInstructions(isMemoryAccess, SrcIList);
286 if (SrcIList.empty())
287 continue;
288
289 for (DGIterator DstIt = SrcIt; DstIt != E; ++DstIt) {
290 if (**SrcIt == **DstIt)
291 continue;
292 InstructionListType DstIList;
293 (*DstIt)->collectInstructions(isMemoryAccess, DstIList);
294 if (DstIList.empty())
295 continue;
296 bool ForwardEdgeCreated = false;
297 bool BackwardEdgeCreated = false;
298 for (Instruction *ISrc : SrcIList) {
299 for (Instruction *IDst : DstIList) {
300 auto D = DI.depends(ISrc, IDst, true);
301 if (!D)
302 continue;
303
304 // If we have a dependence with its left-most non-'=' direction
305 // being '>' we need to reverse the direction of the edge, because
306 // the source of the dependence cannot occur after the sink. For
307 // confused dependencies, we will create edges in both directions to
308 // represent the possibility of a cycle.
309
310 auto createConfusedEdges = [&](NodeType &Src, NodeType &Dst) {
311 if (!ForwardEdgeCreated) {
312 createMemoryEdge(Src, Dst);
313 ++TotalMemoryEdges;
314 }
315 if (!BackwardEdgeCreated) {
316 createMemoryEdge(Dst, Src);
317 ++TotalMemoryEdges;
318 }
319 ForwardEdgeCreated = BackwardEdgeCreated = true;
320 ++TotalConfusedEdges;
321 };
322
323 auto createForwardEdge = [&](NodeType &Src, NodeType &Dst) {
324 if (!ForwardEdgeCreated) {
325 createMemoryEdge(Src, Dst);
326 ++TotalMemoryEdges;
327 }
328 ForwardEdgeCreated = true;
329 };
330
331 auto createBackwardEdge = [&](NodeType &Src, NodeType &Dst) {
332 if (!BackwardEdgeCreated) {
333 createMemoryEdge(Dst, Src);
334 ++TotalMemoryEdges;
335 }
336 BackwardEdgeCreated = true;
337 };
338
339 if (D->isConfused())
340 createConfusedEdges(**SrcIt, **DstIt);
341 else if (D->isOrdered() && !D->isLoopIndependent()) {
342 bool ReversedEdge = false;
343 for (unsigned Level = 1; Level <= D->getLevels(); ++Level) {
344 if (D->getDirection(Level) == Dependence::DVEntry::EQ)
345 continue;
346 else if (D->getDirection(Level) == Dependence::DVEntry::GT) {
347 createBackwardEdge(**SrcIt, **DstIt);
348 ReversedEdge = true;
349 ++TotalEdgeReversals;
350 break;
351 } else if (D->getDirection(Level) == Dependence::DVEntry::LT)
352 break;
353 else {
354 createConfusedEdges(**SrcIt, **DstIt);
355 break;
356 }
357 }
358 if (!ReversedEdge)
359 createForwardEdge(**SrcIt, **DstIt);
360 } else
361 createForwardEdge(**SrcIt, **DstIt);
362
363 // Avoid creating duplicate edges.
364 if (ForwardEdgeCreated && BackwardEdgeCreated)
365 break;
366 }
367
368 // If we've created edges in both directions, there is no more
369 // unique edge that we can create between these two nodes, so we
370 // can exit early.
371 if (ForwardEdgeCreated && BackwardEdgeCreated)
372 break;
373 }
374 }
375 }
376}
377
379 if (!shouldSimplify())
380 return;
381 LLVM_DEBUG(dbgs() << "==== Start of Graph Simplification ===\n");
382
383 // This algorithm works by first collecting a set of candidate nodes that have
384 // an out-degree of one (in terms of def-use edges), and then ignoring those
385 // whose targets have an in-degree more than one. Each node in the resulting
386 // set can then be merged with its corresponding target and put back into the
387 // worklist until no further merge candidates are available.
388 SmallPtrSet<NodeType *, 32> CandidateSourceNodes;
389
390 // A mapping between nodes and their in-degree. To save space, this map
391 // only contains nodes that are targets of nodes in the CandidateSourceNodes.
392 DenseMap<NodeType *, unsigned> TargetInDegreeMap;
393
394 for (NodeType *N : Graph) {
395 if (N->getEdges().size() != 1)
396 continue;
397 EdgeType &Edge = N->back();
398 if (!Edge.isDefUse())
399 continue;
400 CandidateSourceNodes.insert(N);
401
402 // Insert an element into the in-degree map and initialize to zero. The
403 // count will get updated in the next step.
404 TargetInDegreeMap.insert({&Edge.getTargetNode(), 0});
405 }
406
407 LLVM_DEBUG({
408 dbgs() << "Size of candidate src node list:" << CandidateSourceNodes.size()
409 << "\nNode with single outgoing def-use edge:\n";
410 for (NodeType *N : CandidateSourceNodes) {
411 dbgs() << N << "\n";
412 }
413 });
414
415 for (NodeType *N : Graph) {
416 for (EdgeType *E : *N) {
417 NodeType *Tgt = &E->getTargetNode();
418 auto TgtIT = TargetInDegreeMap.find(Tgt);
419 if (TgtIT != TargetInDegreeMap.end())
420 ++(TgtIT->second);
421 }
422 }
423
424 LLVM_DEBUG({
425 dbgs() << "Size of target in-degree map:" << TargetInDegreeMap.size()
426 << "\nContent of in-degree map:\n";
427 for (auto &I : TargetInDegreeMap) {
428 dbgs() << I.first << " --> " << I.second << "\n";
429 }
430 });
431
432 SmallVector<NodeType *, 32> Worklist(CandidateSourceNodes.begin(),
433 CandidateSourceNodes.end());
434 while (!Worklist.empty()) {
435 NodeType &Src = *Worklist.pop_back_val();
436 // As nodes get merged, we need to skip any node that has been removed from
437 // the candidate set (see below).
438 if (!CandidateSourceNodes.erase(&Src))
439 continue;
440
441 assert(Src.getEdges().size() == 1 &&
442 "Expected a single edge from the candidate src node.");
443 NodeType &Tgt = Src.back().getTargetNode();
444 assert(TargetInDegreeMap.find(&Tgt) != TargetInDegreeMap.end() &&
445 "Expected target to be in the in-degree map.");
446
447 if (TargetInDegreeMap[&Tgt] != 1)
448 continue;
449
450 if (!areNodesMergeable(Src, Tgt))
451 continue;
452
453 // Do not merge if there is also an edge from target to src (immediate
454 // cycle).
455 if (Tgt.hasEdgeTo(Src))
456 continue;
457
458 LLVM_DEBUG(dbgs() << "Merging:" << Src << "\nWith:" << Tgt << "\n");
459
460 mergeNodes(Src, Tgt);
461
462 // If the target node is in the candidate set itself, we need to put the
463 // src node back into the worklist again so it gives the target a chance
464 // to get merged into it. For example if we have:
465 // {(a)->(b), (b)->(c), (c)->(d), ...} and the worklist is initially {b, a},
466 // then after merging (a) and (b) together, we need to put (a,b) back in
467 // the worklist so that (c) can get merged in as well resulting in
468 // {(a,b,c) -> d}
469 // We also need to remove the old target (b), from the worklist. We first
470 // remove it from the candidate set here, and skip any item from the
471 // worklist that is not in the set.
472 if (CandidateSourceNodes.erase(&Tgt)) {
473 Worklist.push_back(&Src);
474 CandidateSourceNodes.insert(&Src);
475 LLVM_DEBUG(dbgs() << "Putting " << &Src << " back in the worklist.\n");
476 }
477 }
478 LLVM_DEBUG(dbgs() << "=== End of Graph Simplification ===\n");
479}
480
481template <class G>
483
484 // If we don't create pi-blocks, then we may not have a DAG.
485 if (!shouldCreatePiBlocks())
486 return;
487
489 using NodeKind = typename NodeType::NodeKind;
490 for (NodeType *N : post_order(&Graph)) {
491 if (N->getKind() == NodeKind::PiBlock) {
492 // Put members of the pi-block right after the pi-block itself, for
493 // convenience.
494 const NodeListType &PiBlockMembers = getNodesInPiBlock(*N);
495 llvm::append_range(NodesInPO, PiBlockMembers);
496 }
497 NodesInPO.push_back(N);
498 }
499
500 size_t OldSize = Graph.Nodes.size();
501 Graph.Nodes.clear();
502 append_range(Graph.Nodes, reverse(NodesInPO));
503 if (Graph.Nodes.size() != OldSize)
504 assert(false &&
505 "Expected the number of nodes to stay the same after the sort");
506}
507
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")
#define LLVM_DEBUG(X)
Definition: Debug.h:101
This file builds on the ADT/GraphTraits.h file to build generic depth first graph iterator.
#define NL
This file defines an array type that can be indexed using scoped enum values.
#define I(x, y, z)
Definition: MD5.cpp:58
uint64_t IntrinsicInst * II
This file builds on the ADT/GraphTraits.h file to build a generic graph post order iterator.
This builds on the llvm/ADT/GraphTraits.h file to find the strongly connected components (SCCs) of a ...
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Definition: Statistic.h:166
This abstract builder class defines a set of high-level steps for creating DDG-like graphs.
void createAndConnectRootNode()
Create a root node and add edges such that each node in the graph is reachable from the root.
void computeInstructionOrdinals()
Compute ordinal numbers for each instruction and store them in a map for future look up.
void sortNodesTopologically()
Topologically sort the graph nodes.
void createFineGrainedNodes()
Create fine grained nodes.
void createMemoryDependencyEdges()
Analyze data dependencies that exist between memory loads or stores, in the graph nodes and create ed...
void simplify()
Go through all the nodes in the graph and collapse any two nodes 'a' and 'b' if all of the following ...
void createDefUseEdges()
Analyze the def-use chains and create edges from the nodes containing definitions to the nodes contai...
void createPiBlocks()
Apply graph abstraction to groups of nodes that belong to a strongly connected component of the graph...
LLVM Basic Block Representation.
Definition: BasicBlock.h:61
iterator find(const_arg_type_t< KeyT > Val)
Definition: DenseMap.h:155
unsigned size() const
Definition: DenseMap.h:99
iterator end()
Definition: DenseMap.h:84
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
Definition: DenseMap.h:211
Encapsulate some common data and functionality needed for different variations of data dependence gra...
Definition: DDG.h:255
size_type size() const
Definition: SmallPtrSet.h:95
bool erase(PtrType Ptr)
Remove pointer from the set.
Definition: SmallPtrSet.h:384
iterator end() const
Definition: SmallPtrSet.h:460
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
Definition: SmallPtrSet.h:367
iterator begin() const
Definition: SmallPtrSet.h:455
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
Definition: SmallPtrSet.h:502
bool empty() const
Definition: SmallVector.h:94
reference emplace_back(ArgTypes &&... Args)
Definition: SmallVector.h:950
void push_back(const T &Elt)
Definition: SmallVector.h:426
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1209
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
NodeType
ISD::NodeType enum - This enum defines the target-independent operators for a SelectionDAG.
Definition: ISDOpcodes.h:40
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
iterator_range< df_ext_iterator< T, SetTy > > depth_first_ext(const T &G, SetTy &S)
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
void append_range(Container &C, Range &&R)
Wrapper function to append range R to container C.
Definition: STLExtras.h:2098
scc_iterator< T > scc_begin(const T &G)
Construct the begin iterator for a deduced graph type T.
Definition: SCCIterator.h:233
iterator_range< po_iterator< T > > post_order(const T &G)
auto reverse(ContainerTy &&C)
Definition: STLExtras.h:419
void sort(IteratorTy Start, IteratorTy End)
Definition: STLExtras.h:1647
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
scc_iterator< T > scc_end(const T &G)
Construct the end iterator for a deduced graph type T.
Definition: SCCIterator.h:238
#define N
Incoming for lane maks phi as machine instruction, incoming register Reg and incoming block Block are...
Direction
An enum for the direction of the loop.
Definition: LoopInfo.h:215