LLVM 20.0.0git
SampleProfileInference.cpp
Go to the documentation of this file.
1//===- SampleProfileInference.cpp - Adjust sample profiles in the IR ------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file implements a profile inference algorithm. Given an incomplete and
10// possibly imprecise block counts, the algorithm reconstructs realistic block
11// and edge counts that satisfy flow conservation rules, while minimally modify
12// input block counts.
13//
14//===----------------------------------------------------------------------===//
15
17#include "llvm/ADT/BitVector.h"
19#include "llvm/Support/Debug.h"
20#include <queue>
21#include <set>
22#include <stack>
23#include <unordered_set>
24
25using namespace llvm;
26#define DEBUG_TYPE "sample-profile-inference"
27
28namespace {
29
30static cl::opt<bool> SampleProfileEvenFlowDistribution(
31 "sample-profile-even-flow-distribution", cl::init(true), cl::Hidden,
32 cl::desc("Try to evenly distribute flow when there are multiple equally "
33 "likely options."));
34
35static cl::opt<bool> SampleProfileRebalanceUnknown(
36 "sample-profile-rebalance-unknown", cl::init(true), cl::Hidden,
37 cl::desc("Evenly re-distribute flow among unknown subgraphs."));
38
39static cl::opt<bool> SampleProfileJoinIslands(
40 "sample-profile-join-islands", cl::init(true), cl::Hidden,
41 cl::desc("Join isolated components having positive flow."));
42
43static cl::opt<unsigned> SampleProfileProfiCostBlockInc(
44 "sample-profile-profi-cost-block-inc", cl::init(10), cl::Hidden,
45 cl::desc("The cost of increasing a block's count by one."));
46
47static cl::opt<unsigned> SampleProfileProfiCostBlockDec(
48 "sample-profile-profi-cost-block-dec", cl::init(20), cl::Hidden,
49 cl::desc("The cost of decreasing a block's count by one."));
50
51static cl::opt<unsigned> SampleProfileProfiCostBlockEntryInc(
52 "sample-profile-profi-cost-block-entry-inc", cl::init(40), cl::Hidden,
53 cl::desc("The cost of increasing the entry block's count by one."));
54
55static cl::opt<unsigned> SampleProfileProfiCostBlockEntryDec(
56 "sample-profile-profi-cost-block-entry-dec", cl::init(10), cl::Hidden,
57 cl::desc("The cost of decreasing the entry block's count by one."));
58
59static cl::opt<unsigned> SampleProfileProfiCostBlockZeroInc(
60 "sample-profile-profi-cost-block-zero-inc", cl::init(11), cl::Hidden,
61 cl::desc("The cost of increasing a count of zero-weight block by one."));
62
63static cl::opt<unsigned> SampleProfileProfiCostBlockUnknownInc(
64 "sample-profile-profi-cost-block-unknown-inc", cl::init(0), cl::Hidden,
65 cl::desc("The cost of increasing an unknown block's count by one."));
66
67/// A value indicating an infinite flow/capacity/weight of a block/edge.
68/// Not using numeric_limits<int64_t>::max(), as the values can be summed up
69/// during the execution.
70static constexpr int64_t INF = ((int64_t)1) << 50;
71
72/// The minimum-cost maximum flow algorithm.
73///
74/// The algorithm finds the maximum flow of minimum cost on a given (directed)
75/// network using a modified version of the classical Moore-Bellman-Ford
76/// approach. The algorithm applies a number of augmentation iterations in which
77/// flow is sent along paths of positive capacity from the source to the sink.
78/// The worst-case time complexity of the implementation is O(v(f)*m*n), where
79/// where m is the number of edges, n is the number of vertices, and v(f) is the
80/// value of the maximum flow. However, the observed running time on typical
81/// instances is sub-quadratic, that is, o(n^2).
82///
83/// The input is a set of edges with specified costs and capacities, and a pair
84/// of nodes (source and sink). The output is the flow along each edge of the
85/// minimum total cost respecting the given edge capacities.
86class MinCostMaxFlow {
87public:
88 MinCostMaxFlow(const ProfiParams &Params) : Params(Params) {}
89
90 // Initialize algorithm's data structures for a network of a given size.
91 void initialize(uint64_t NodeCount, uint64_t SourceNode, uint64_t SinkNode) {
92 Source = SourceNode;
93 Target = SinkNode;
94
95 Nodes = std::vector<Node>(NodeCount);
96 Edges = std::vector<std::vector<Edge>>(NodeCount, std::vector<Edge>());
97 if (Params.EvenFlowDistribution)
98 AugmentingEdges =
99 std::vector<std::vector<Edge *>>(NodeCount, std::vector<Edge *>());
100 }
101
102 // Run the algorithm.
103 int64_t run() {
104 LLVM_DEBUG(dbgs() << "Starting profi for " << Nodes.size() << " nodes\n");
105
106 // Iteratively find an augmentation path/dag in the network and send the
107 // flow along its edges
108 size_t AugmentationIters = applyFlowAugmentation();
109
110 // Compute the total flow and its cost
111 int64_t TotalCost = 0;
112 int64_t TotalFlow = 0;
113 for (uint64_t Src = 0; Src < Nodes.size(); Src++) {
114 for (auto &Edge : Edges[Src]) {
115 if (Edge.Flow > 0) {
116 TotalCost += Edge.Cost * Edge.Flow;
117 if (Src == Source)
118 TotalFlow += Edge.Flow;
119 }
120 }
121 }
122 LLVM_DEBUG(dbgs() << "Completed profi after " << AugmentationIters
123 << " iterations with " << TotalFlow << " total flow"
124 << " of " << TotalCost << " cost\n");
125 (void)TotalFlow;
126 (void)AugmentationIters;
127 return TotalCost;
128 }
129
130 /// Adding an edge to the network with a specified capacity and a cost.
131 /// Multiple edges between a pair of nodes are allowed but self-edges
132 /// are not supported.
133 void addEdge(uint64_t Src, uint64_t Dst, int64_t Capacity, int64_t Cost) {
134 assert(Capacity > 0 && "adding an edge of zero capacity");
135 assert(Src != Dst && "loop edge are not supported");
136
137 Edge SrcEdge;
138 SrcEdge.Dst = Dst;
139 SrcEdge.Cost = Cost;
140 SrcEdge.Capacity = Capacity;
141 SrcEdge.Flow = 0;
142 SrcEdge.RevEdgeIndex = Edges[Dst].size();
143
144 Edge DstEdge;
145 DstEdge.Dst = Src;
146 DstEdge.Cost = -Cost;
147 DstEdge.Capacity = 0;
148 DstEdge.Flow = 0;
149 DstEdge.RevEdgeIndex = Edges[Src].size();
150
151 Edges[Src].push_back(SrcEdge);
152 Edges[Dst].push_back(DstEdge);
153 }
154
155 /// Adding an edge to the network of infinite capacity and a given cost.
156 void addEdge(uint64_t Src, uint64_t Dst, int64_t Cost) {
157 addEdge(Src, Dst, INF, Cost);
158 }
159
160 /// Get the total flow from a given source node.
161 /// Returns a list of pairs (target node, amount of flow to the target).
162 std::vector<std::pair<uint64_t, int64_t>> getFlow(uint64_t Src) const {
163 std::vector<std::pair<uint64_t, int64_t>> Flow;
164 for (const auto &Edge : Edges[Src]) {
165 if (Edge.Flow > 0)
166 Flow.push_back(std::make_pair(Edge.Dst, Edge.Flow));
167 }
168 return Flow;
169 }
170
171 /// Get the total flow between a pair of nodes.
172 int64_t getFlow(uint64_t Src, uint64_t Dst) const {
173 int64_t Flow = 0;
174 for (const auto &Edge : Edges[Src]) {
175 if (Edge.Dst == Dst) {
176 Flow += Edge.Flow;
177 }
178 }
179 return Flow;
180 }
181
182private:
183 /// Iteratively find an augmentation path/dag in the network and send the
184 /// flow along its edges. The method returns the number of applied iterations.
185 size_t applyFlowAugmentation() {
186 size_t AugmentationIters = 0;
187 while (findAugmentingPath()) {
188 uint64_t PathCapacity = computeAugmentingPathCapacity();
189 while (PathCapacity > 0) {
190 bool Progress = false;
191 if (Params.EvenFlowDistribution) {
192 // Identify node/edge candidates for augmentation
193 identifyShortestEdges(PathCapacity);
194
195 // Find an augmenting DAG
196 auto AugmentingOrder = findAugmentingDAG();
197
198 // Apply the DAG augmentation
199 Progress = augmentFlowAlongDAG(AugmentingOrder);
200 PathCapacity = computeAugmentingPathCapacity();
201 }
202
203 if (!Progress) {
204 augmentFlowAlongPath(PathCapacity);
205 PathCapacity = 0;
206 }
207
208 AugmentationIters++;
209 }
210 }
211 return AugmentationIters;
212 }
213
214 /// Compute the capacity of the cannonical augmenting path. If the path is
215 /// saturated (that is, no flow can be sent along the path), then return 0.
216 uint64_t computeAugmentingPathCapacity() {
217 uint64_t PathCapacity = INF;
218 uint64_t Now = Target;
219 while (Now != Source) {
220 uint64_t Pred = Nodes[Now].ParentNode;
221 auto &Edge = Edges[Pred][Nodes[Now].ParentEdgeIndex];
222
223 assert(Edge.Capacity >= Edge.Flow && "incorrect edge flow");
224 uint64_t EdgeCapacity = uint64_t(Edge.Capacity - Edge.Flow);
225 PathCapacity = std::min(PathCapacity, EdgeCapacity);
226
227 Now = Pred;
228 }
229 return PathCapacity;
230 }
231
232 /// Check for existence of an augmenting path with a positive capacity.
233 bool findAugmentingPath() {
234 // Initialize data structures
235 for (auto &Node : Nodes) {
236 Node.Distance = INF;
237 Node.ParentNode = uint64_t(-1);
238 Node.ParentEdgeIndex = uint64_t(-1);
239 Node.Taken = false;
240 }
241
242 std::queue<uint64_t> Queue;
243 Queue.push(Source);
244 Nodes[Source].Distance = 0;
245 Nodes[Source].Taken = true;
246 while (!Queue.empty()) {
247 uint64_t Src = Queue.front();
248 Queue.pop();
249 Nodes[Src].Taken = false;
250 // Although the residual network contains edges with negative costs
251 // (in particular, backward edges), it can be shown that there are no
252 // negative-weight cycles and the following two invariants are maintained:
253 // (i) Dist[Source, V] >= 0 and (ii) Dist[V, Target] >= 0 for all nodes V,
254 // where Dist is the length of the shortest path between two nodes. This
255 // allows to prune the search-space of the path-finding algorithm using
256 // the following early-stop criteria:
257 // -- If we find a path with zero-distance from Source to Target, stop the
258 // search, as the path is the shortest since Dist[Source, Target] >= 0;
259 // -- If we have Dist[Source, V] > Dist[Source, Target], then do not
260 // process node V, as it is guaranteed _not_ to be on a shortest path
261 // from Source to Target; it follows from inequalities
262 // Dist[Source, Target] >= Dist[Source, V] + Dist[V, Target]
263 // >= Dist[Source, V]
264 if (!Params.EvenFlowDistribution && Nodes[Target].Distance == 0)
265 break;
266 if (Nodes[Src].Distance > Nodes[Target].Distance)
267 continue;
268
269 // Process adjacent edges
270 for (uint64_t EdgeIdx = 0; EdgeIdx < Edges[Src].size(); EdgeIdx++) {
271 auto &Edge = Edges[Src][EdgeIdx];
272 if (Edge.Flow < Edge.Capacity) {
273 uint64_t Dst = Edge.Dst;
274 int64_t NewDistance = Nodes[Src].Distance + Edge.Cost;
275 if (Nodes[Dst].Distance > NewDistance) {
276 // Update the distance and the parent node/edge
277 Nodes[Dst].Distance = NewDistance;
278 Nodes[Dst].ParentNode = Src;
279 Nodes[Dst].ParentEdgeIndex = EdgeIdx;
280 // Add the node to the queue, if it is not there yet
281 if (!Nodes[Dst].Taken) {
282 Queue.push(Dst);
283 Nodes[Dst].Taken = true;
284 }
285 }
286 }
287 }
288 }
289
290 return Nodes[Target].Distance != INF;
291 }
292
293 /// Update the current flow along the augmenting path.
294 void augmentFlowAlongPath(uint64_t PathCapacity) {
295 assert(PathCapacity > 0 && "found an incorrect augmenting path");
296 uint64_t Now = Target;
297 while (Now != Source) {
298 uint64_t Pred = Nodes[Now].ParentNode;
299 auto &Edge = Edges[Pred][Nodes[Now].ParentEdgeIndex];
300 auto &RevEdge = Edges[Now][Edge.RevEdgeIndex];
301
302 Edge.Flow += PathCapacity;
303 RevEdge.Flow -= PathCapacity;
304
305 Now = Pred;
306 }
307 }
308
309 /// Find an Augmenting DAG order using a modified version of DFS in which we
310 /// can visit a node multiple times. In the DFS search, when scanning each
311 /// edge out of a node, continue search at Edge.Dst endpoint if it has not
312 /// been discovered yet and its NumCalls < MaxDfsCalls. The algorithm
313 /// runs in O(MaxDfsCalls * |Edges| + |Nodes|) time.
314 /// It returns an Augmenting Order (Taken nodes in decreasing Finish time)
315 /// that starts with Source and ends with Target.
316 std::vector<uint64_t> findAugmentingDAG() {
317 // We use a stack based implemenation of DFS to avoid recursion.
318 // Defining DFS data structures:
319 // A pair (NodeIdx, EdgeIdx) at the top of the Stack denotes that
320 // - we are currently visiting Nodes[NodeIdx] and
321 // - the next edge to scan is Edges[NodeIdx][EdgeIdx]
322 typedef std::pair<uint64_t, uint64_t> StackItemType;
323 std::stack<StackItemType> Stack;
324 std::vector<uint64_t> AugmentingOrder;
325
326 // Phase 0: Initialize Node attributes and Time for DFS run
327 for (auto &Node : Nodes) {
328 Node.Discovery = 0;
329 Node.Finish = 0;
330 Node.NumCalls = 0;
331 Node.Taken = false;
332 }
333 uint64_t Time = 0;
334 // Mark Target as Taken
335 // Taken attribute will be propagated backwards from Target towards Source
336 Nodes[Target].Taken = true;
337
338 // Phase 1: Start DFS traversal from Source
339 Stack.emplace(Source, 0);
340 Nodes[Source].Discovery = ++Time;
341 while (!Stack.empty()) {
342 auto NodeIdx = Stack.top().first;
343 auto EdgeIdx = Stack.top().second;
344
345 // If we haven't scanned all edges out of NodeIdx, continue scanning
346 if (EdgeIdx < Edges[NodeIdx].size()) {
347 auto &Edge = Edges[NodeIdx][EdgeIdx];
348 auto &Dst = Nodes[Edge.Dst];
349 Stack.top().second++;
350
351 if (Edge.OnShortestPath) {
352 // If we haven't seen Edge.Dst so far, continue DFS search there
353 if (Dst.Discovery == 0 && Dst.NumCalls < MaxDfsCalls) {
354 Dst.Discovery = ++Time;
355 Stack.emplace(Edge.Dst, 0);
356 Dst.NumCalls++;
357 } else if (Dst.Taken && Dst.Finish != 0) {
358 // Else, if Edge.Dst already have a path to Target, so that NodeIdx
359 Nodes[NodeIdx].Taken = true;
360 }
361 }
362 } else {
363 // If we are done scanning all edge out of NodeIdx
364 Stack.pop();
365 // If we haven't found a path from NodeIdx to Target, forget about it
366 if (!Nodes[NodeIdx].Taken) {
367 Nodes[NodeIdx].Discovery = 0;
368 } else {
369 // If we have found a path from NodeIdx to Target, then finish NodeIdx
370 // and propagate Taken flag to DFS parent unless at the Source
371 Nodes[NodeIdx].Finish = ++Time;
372 // NodeIdx == Source if and only if the stack is empty
373 if (NodeIdx != Source) {
374 assert(!Stack.empty() && "empty stack while running dfs");
375 Nodes[Stack.top().first].Taken = true;
376 }
377 AugmentingOrder.push_back(NodeIdx);
378 }
379 }
380 }
381 // Nodes are collected decreasing Finish time, so the order is reversed
382 std::reverse(AugmentingOrder.begin(), AugmentingOrder.end());
383
384 // Phase 2: Extract all forward (DAG) edges and fill in AugmentingEdges
385 for (size_t Src : AugmentingOrder) {
386 AugmentingEdges[Src].clear();
387 for (auto &Edge : Edges[Src]) {
388 uint64_t Dst = Edge.Dst;
389 if (Edge.OnShortestPath && Nodes[Src].Taken && Nodes[Dst].Taken &&
390 Nodes[Dst].Finish < Nodes[Src].Finish) {
391 AugmentingEdges[Src].push_back(&Edge);
392 }
393 }
394 assert((Src == Target || !AugmentingEdges[Src].empty()) &&
395 "incorrectly constructed augmenting edges");
396 }
397
398 return AugmentingOrder;
399 }
400
401 /// Update the current flow along the given (acyclic) subgraph specified by
402 /// the vertex order, AugmentingOrder. The objective is to send as much flow
403 /// as possible while evenly distributing flow among successors of each node.
404 /// After the update at least one edge is saturated.
405 bool augmentFlowAlongDAG(const std::vector<uint64_t> &AugmentingOrder) {
406 // Phase 0: Initialization
407 for (uint64_t Src : AugmentingOrder) {
408 Nodes[Src].FracFlow = 0;
409 Nodes[Src].IntFlow = 0;
410 for (auto &Edge : AugmentingEdges[Src]) {
411 Edge->AugmentedFlow = 0;
412 }
413 }
414
415 // Phase 1: Send a unit of fractional flow along the DAG
416 uint64_t MaxFlowAmount = INF;
417 Nodes[Source].FracFlow = 1.0;
418 for (uint64_t Src : AugmentingOrder) {
419 assert((Src == Target || Nodes[Src].FracFlow > 0.0) &&
420 "incorrectly computed fractional flow");
421 // Distribute flow evenly among successors of Src
422 uint64_t Degree = AugmentingEdges[Src].size();
423 for (auto &Edge : AugmentingEdges[Src]) {
424 double EdgeFlow = Nodes[Src].FracFlow / Degree;
425 Nodes[Edge->Dst].FracFlow += EdgeFlow;
426 if (Edge->Capacity == INF)
427 continue;
428 uint64_t MaxIntFlow = double(Edge->Capacity - Edge->Flow) / EdgeFlow;
429 MaxFlowAmount = std::min(MaxFlowAmount, MaxIntFlow);
430 }
431 }
432 // Stop early if we cannot send any (integral) flow from Source to Target
433 if (MaxFlowAmount == 0)
434 return false;
435
436 // Phase 2: Send an integral flow of MaxFlowAmount
437 Nodes[Source].IntFlow = MaxFlowAmount;
438 for (uint64_t Src : AugmentingOrder) {
439 if (Src == Target)
440 break;
441 // Distribute flow evenly among successors of Src, rounding up to make
442 // sure all flow is sent
443 uint64_t Degree = AugmentingEdges[Src].size();
444 // We are guaranteeed that Node[Src].IntFlow <= SuccFlow * Degree
445 uint64_t SuccFlow = (Nodes[Src].IntFlow + Degree - 1) / Degree;
446 for (auto &Edge : AugmentingEdges[Src]) {
447 uint64_t Dst = Edge->Dst;
448 uint64_t EdgeFlow = std::min(Nodes[Src].IntFlow, SuccFlow);
449 EdgeFlow = std::min(EdgeFlow, uint64_t(Edge->Capacity - Edge->Flow));
450 Nodes[Dst].IntFlow += EdgeFlow;
451 Nodes[Src].IntFlow -= EdgeFlow;
452 Edge->AugmentedFlow += EdgeFlow;
453 }
454 }
455 assert(Nodes[Target].IntFlow <= MaxFlowAmount);
456 Nodes[Target].IntFlow = 0;
457
458 // Phase 3: Send excess flow back traversing the nodes backwards.
459 // Because of rounding, not all flow can be sent along the edges of Src.
460 // Hence, sending the remaining flow back to maintain flow conservation
461 for (size_t Idx = AugmentingOrder.size() - 1; Idx > 0; Idx--) {
462 uint64_t Src = AugmentingOrder[Idx - 1];
463 // Try to send excess flow back along each edge.
464 // Make sure we only send back flow we just augmented (AugmentedFlow).
465 for (auto &Edge : AugmentingEdges[Src]) {
466 uint64_t Dst = Edge->Dst;
467 if (Nodes[Dst].IntFlow == 0)
468 continue;
469 uint64_t EdgeFlow = std::min(Nodes[Dst].IntFlow, Edge->AugmentedFlow);
470 Nodes[Dst].IntFlow -= EdgeFlow;
471 Nodes[Src].IntFlow += EdgeFlow;
472 Edge->AugmentedFlow -= EdgeFlow;
473 }
474 }
475
476 // Phase 4: Update flow values along all edges
477 bool HasSaturatedEdges = false;
478 for (uint64_t Src : AugmentingOrder) {
479 // Verify that we have sent all the excess flow from the node
480 assert(Src == Source || Nodes[Src].IntFlow == 0);
481 for (auto &Edge : AugmentingEdges[Src]) {
482 assert(uint64_t(Edge->Capacity - Edge->Flow) >= Edge->AugmentedFlow);
483 // Update flow values along the edge and its reverse copy
484 auto &RevEdge = Edges[Edge->Dst][Edge->RevEdgeIndex];
485 Edge->Flow += Edge->AugmentedFlow;
486 RevEdge.Flow -= Edge->AugmentedFlow;
487 if (Edge->Capacity == Edge->Flow && Edge->AugmentedFlow > 0)
488 HasSaturatedEdges = true;
489 }
490 }
491
492 // The augmentation is successful iff at least one edge becomes saturated
493 return HasSaturatedEdges;
494 }
495
496 /// Identify candidate (shortest) edges for augmentation.
497 void identifyShortestEdges(uint64_t PathCapacity) {
498 assert(PathCapacity > 0 && "found an incorrect augmenting DAG");
499 // To make sure the augmentation DAG contains only edges with large residual
500 // capacity, we prune all edges whose capacity is below a fraction of
501 // the capacity of the augmented path.
502 // (All edges of the path itself are always in the DAG)
503 uint64_t MinCapacity = std::max(PathCapacity / 2, uint64_t(1));
504
505 // Decide which edges are on a shortest path from Source to Target
506 for (size_t Src = 0; Src < Nodes.size(); Src++) {
507 // An edge cannot be augmenting if the endpoint has large distance
508 if (Nodes[Src].Distance > Nodes[Target].Distance)
509 continue;
510
511 for (auto &Edge : Edges[Src]) {
512 uint64_t Dst = Edge.Dst;
513 Edge.OnShortestPath =
514 Src != Target && Dst != Source &&
515 Nodes[Dst].Distance <= Nodes[Target].Distance &&
516 Nodes[Dst].Distance == Nodes[Src].Distance + Edge.Cost &&
517 Edge.Capacity > Edge.Flow &&
518 uint64_t(Edge.Capacity - Edge.Flow) >= MinCapacity;
519 }
520 }
521 }
522
523 /// Maximum number of DFS iterations for DAG finding.
524 static constexpr uint64_t MaxDfsCalls = 10;
525
526 /// A node in a flow network.
527 struct Node {
528 /// The cost of the cheapest path from the source to the current node.
529 int64_t Distance;
530 /// The node preceding the current one in the path.
531 uint64_t ParentNode;
532 /// The index of the edge between ParentNode and the current node.
533 uint64_t ParentEdgeIndex;
534 /// An indicator of whether the current node is in a queue.
535 bool Taken;
536
537 /// Data fields utilized in DAG-augmentation:
538 /// Fractional flow.
539 double FracFlow;
540 /// Integral flow.
541 uint64_t IntFlow;
542 /// Discovery time.
543 uint64_t Discovery;
544 /// Finish time.
545 uint64_t Finish;
546 /// NumCalls.
547 uint64_t NumCalls;
548 };
549
550 /// An edge in a flow network.
551 struct Edge {
552 /// The cost of the edge.
553 int64_t Cost;
554 /// The capacity of the edge.
555 int64_t Capacity;
556 /// The current flow on the edge.
557 int64_t Flow;
558 /// The destination node of the edge.
559 uint64_t Dst;
560 /// The index of the reverse edge between Dst and the current node.
561 uint64_t RevEdgeIndex;
562
563 /// Data fields utilized in DAG-augmentation:
564 /// Whether the edge is currently on a shortest path from Source to Target.
565 bool OnShortestPath;
566 /// Extra flow along the edge.
567 uint64_t AugmentedFlow;
568 };
569
570 /// The set of network nodes.
571 std::vector<Node> Nodes;
572 /// The set of network edges.
573 std::vector<std::vector<Edge>> Edges;
574 /// Source node of the flow.
575 uint64_t Source;
576 /// Target (sink) node of the flow.
578 /// Augmenting edges.
579 std::vector<std::vector<Edge *>> AugmentingEdges;
580 /// Params for flow computation.
581 const ProfiParams &Params;
582};
583
584/// A post-processing adjustment of the control flow. It applies two steps by
585/// rerouting some flow and making it more realistic:
586///
587/// - First, it removes all isolated components ("islands") with a positive flow
588/// that are unreachable from the entry block. For every such component, we
589/// find the shortest from the entry to an exit passing through the component,
590/// and increase the flow by one unit along the path.
591///
592/// - Second, it identifies all "unknown subgraphs" consisting of basic blocks
593/// with no sampled counts. Then it rebalnces the flow that goes through such
594/// a subgraph so that each branch is taken with probability 50%.
595/// An unknown subgraph is such that for every two nodes u and v:
596/// - u dominates v and u is not unknown;
597/// - v post-dominates u; and
598/// - all inner-nodes of all (u,v)-paths are unknown.
599///
600class FlowAdjuster {
601public:
602 FlowAdjuster(const ProfiParams &Params, FlowFunction &Func)
603 : Params(Params), Func(Func) {}
604
605 /// Apply the post-processing.
606 void run() {
607 if (Params.JoinIslands) {
608 // Adjust the flow to get rid of isolated components
609 joinIsolatedComponents();
610 }
611
612 if (Params.RebalanceUnknown) {
613 // Rebalance the flow inside unknown subgraphs
614 rebalanceUnknownSubgraphs();
615 }
616 }
617
618private:
619 void joinIsolatedComponents() {
620 // Find blocks that are reachable from the source
621 auto Visited = BitVector(NumBlocks(), false);
622 findReachable(Func.Entry, Visited);
623
624 // Iterate over all non-reachable blocks and adjust their weights
625 for (uint64_t I = 0; I < NumBlocks(); I++) {
626 auto &Block = Func.Blocks[I];
627 if (Block.Flow > 0 && !Visited[I]) {
628 // Find a path from the entry to an exit passing through the block I
629 auto Path = findShortestPath(I);
630 // Increase the flow along the path
631 assert(Path.size() > 0 && Path[0]->Source == Func.Entry &&
632 "incorrectly computed path adjusting control flow");
633 Func.Blocks[Func.Entry].Flow += 1;
634 for (auto &Jump : Path) {
635 Jump->Flow += 1;
636 Func.Blocks[Jump->Target].Flow += 1;
637 // Update reachability
638 findReachable(Jump->Target, Visited);
639 }
640 }
641 }
642 }
643
644 /// Run BFS from a given block along the jumps with a positive flow and mark
645 /// all reachable blocks.
646 void findReachable(uint64_t Src, BitVector &Visited) {
647 if (Visited[Src])
648 return;
649 std::queue<uint64_t> Queue;
650 Queue.push(Src);
651 Visited[Src] = true;
652 while (!Queue.empty()) {
653 Src = Queue.front();
654 Queue.pop();
655 for (auto *Jump : Func.Blocks[Src].SuccJumps) {
656 uint64_t Dst = Jump->Target;
657 if (Jump->Flow > 0 && !Visited[Dst]) {
658 Queue.push(Dst);
659 Visited[Dst] = true;
660 }
661 }
662 }
663 }
664
665 /// Find the shortest path from the entry block to an exit block passing
666 /// through a given block.
667 std::vector<FlowJump *> findShortestPath(uint64_t BlockIdx) {
668 // A path from the entry block to BlockIdx
669 auto ForwardPath = findShortestPath(Func.Entry, BlockIdx);
670 // A path from BlockIdx to an exit block
671 auto BackwardPath = findShortestPath(BlockIdx, AnyExitBlock);
672
673 // Concatenate the two paths
674 std::vector<FlowJump *> Result;
675 Result.insert(Result.end(), ForwardPath.begin(), ForwardPath.end());
676 Result.insert(Result.end(), BackwardPath.begin(), BackwardPath.end());
677 return Result;
678 }
679
680 /// Apply the Dijkstra algorithm to find the shortest path from a given
681 /// Source to a given Target block.
682 /// If Target == -1, then the path ends at an exit block.
683 std::vector<FlowJump *> findShortestPath(uint64_t Source, uint64_t Target) {
684 // Quit early, if possible
685 if (Source == Target)
686 return std::vector<FlowJump *>();
687 if (Func.Blocks[Source].isExit() && Target == AnyExitBlock)
688 return std::vector<FlowJump *>();
689
690 // Initialize data structures
691 auto Distance = std::vector<int64_t>(NumBlocks(), INF);
692 auto Parent = std::vector<FlowJump *>(NumBlocks(), nullptr);
693 Distance[Source] = 0;
694 std::set<std::pair<uint64_t, uint64_t>> Queue;
695 Queue.insert(std::make_pair(Distance[Source], Source));
696
697 // Run the Dijkstra algorithm
698 while (!Queue.empty()) {
699 uint64_t Src = Queue.begin()->second;
700 Queue.erase(Queue.begin());
701 // If we found a solution, quit early
702 if (Src == Target ||
703 (Func.Blocks[Src].isExit() && Target == AnyExitBlock))
704 break;
705
706 for (auto *Jump : Func.Blocks[Src].SuccJumps) {
707 uint64_t Dst = Jump->Target;
708 int64_t JumpDist = jumpDistance(Jump);
709 if (Distance[Dst] > Distance[Src] + JumpDist) {
710 Queue.erase(std::make_pair(Distance[Dst], Dst));
711
712 Distance[Dst] = Distance[Src] + JumpDist;
713 Parent[Dst] = Jump;
714
715 Queue.insert(std::make_pair(Distance[Dst], Dst));
716 }
717 }
718 }
719 // If Target is not provided, find the closest exit block
720 if (Target == AnyExitBlock) {
721 for (uint64_t I = 0; I < NumBlocks(); I++) {
722 if (Func.Blocks[I].isExit() && Parent[I] != nullptr) {
723 if (Target == AnyExitBlock || Distance[Target] > Distance[I]) {
724 Target = I;
725 }
726 }
727 }
728 }
729 assert(Parent[Target] != nullptr && "a path does not exist");
730
731 // Extract the constructed path
732 std::vector<FlowJump *> Result;
733 uint64_t Now = Target;
734 while (Now != Source) {
735 assert(Now == Parent[Now]->Target && "incorrect parent jump");
736 Result.push_back(Parent[Now]);
737 Now = Parent[Now]->Source;
738 }
739 // Reverse the path, since it is extracted from Target to Source
740 std::reverse(Result.begin(), Result.end());
741 return Result;
742 }
743
744 /// A distance of a path for a given jump.
745 /// In order to incite the path to use blocks/jumps with large positive flow,
746 /// and avoid changing branch probability of outgoing edges drastically,
747 /// set the jump distance so as:
748 /// - to minimize the number of unlikely jumps used and subject to that,
749 /// - to minimize the number of Flow == 0 jumps used and subject to that,
750 /// - minimizes total multiplicative Flow increase for the remaining edges.
751 /// To capture this objective with integer distances, we round off fractional
752 /// parts to a multiple of 1 / BaseDistance.
753 int64_t jumpDistance(FlowJump *Jump) const {
754 if (Jump->IsUnlikely)
755 return Params.CostUnlikely;
756 uint64_t BaseDistance =
757 std::max(FlowAdjuster::MinBaseDistance,
758 std::min(Func.Blocks[Func.Entry].Flow,
759 Params.CostUnlikely / (2 * (NumBlocks() + 1))));
760 if (Jump->Flow > 0)
761 return BaseDistance + BaseDistance / Jump->Flow;
762 return 2 * BaseDistance * (NumBlocks() + 1);
763 };
764
765 uint64_t NumBlocks() const { return Func.Blocks.size(); }
766
767 /// Rebalance unknown subgraphs so that the flow is split evenly across the
768 /// outgoing branches of every block of the subgraph. The method iterates over
769 /// blocks with known weight and identifies unknown subgraphs rooted at the
770 /// blocks. Then it verifies if flow rebalancing is feasible and applies it.
771 void rebalanceUnknownSubgraphs() {
772 // Try to find unknown subgraphs from each block
773 for (const FlowBlock &SrcBlock : Func.Blocks) {
774 // Verify if rebalancing rooted at SrcBlock is feasible
775 if (!canRebalanceAtRoot(&SrcBlock))
776 continue;
777
778 // Find an unknown subgraphs starting at SrcBlock. Along the way,
779 // fill in known destinations and intermediate unknown blocks.
780 std::vector<FlowBlock *> UnknownBlocks;
781 std::vector<FlowBlock *> KnownDstBlocks;
782 findUnknownSubgraph(&SrcBlock, KnownDstBlocks, UnknownBlocks);
783
784 // Verify if rebalancing of the subgraph is feasible. If the search is
785 // successful, find the unique destination block (which can be null)
786 FlowBlock *DstBlock = nullptr;
787 if (!canRebalanceSubgraph(&SrcBlock, KnownDstBlocks, UnknownBlocks,
788 DstBlock))
789 continue;
790
791 // We cannot rebalance subgraphs containing cycles among unknown blocks
792 if (!isAcyclicSubgraph(&SrcBlock, DstBlock, UnknownBlocks))
793 continue;
794
795 // Rebalance the flow
796 rebalanceUnknownSubgraph(&SrcBlock, DstBlock, UnknownBlocks);
797 }
798 }
799
800 /// Verify if rebalancing rooted at a given block is possible.
801 bool canRebalanceAtRoot(const FlowBlock *SrcBlock) {
802 // Do not attempt to find unknown subgraphs from an unknown or a
803 // zero-flow block
804 if (SrcBlock->HasUnknownWeight || SrcBlock->Flow == 0)
805 return false;
806
807 // Do not attempt to process subgraphs from a block w/o unknown sucessors
808 bool HasUnknownSuccs = false;
809 for (auto *Jump : SrcBlock->SuccJumps) {
810 if (Func.Blocks[Jump->Target].HasUnknownWeight) {
811 HasUnknownSuccs = true;
812 break;
813 }
814 }
815 if (!HasUnknownSuccs)
816 return false;
817
818 return true;
819 }
820
821 /// Find an unknown subgraph starting at block SrcBlock. The method sets
822 /// identified destinations, KnownDstBlocks, and intermediate UnknownBlocks.
823 void findUnknownSubgraph(const FlowBlock *SrcBlock,
824 std::vector<FlowBlock *> &KnownDstBlocks,
825 std::vector<FlowBlock *> &UnknownBlocks) {
826 // Run BFS from SrcBlock and make sure all paths are going through unknown
827 // blocks and end at a known DstBlock
828 auto Visited = BitVector(NumBlocks(), false);
829 std::queue<uint64_t> Queue;
830
831 Queue.push(SrcBlock->Index);
832 Visited[SrcBlock->Index] = true;
833 while (!Queue.empty()) {
834 auto &Block = Func.Blocks[Queue.front()];
835 Queue.pop();
836 // Process blocks reachable from Block
837 for (auto *Jump : Block.SuccJumps) {
838 // If Jump can be ignored, skip it
839 if (ignoreJump(SrcBlock, nullptr, Jump))
840 continue;
841
842 uint64_t Dst = Jump->Target;
843 // If Dst has been visited, skip Jump
844 if (Visited[Dst])
845 continue;
846 // Process block Dst
847 Visited[Dst] = true;
848 if (!Func.Blocks[Dst].HasUnknownWeight) {
849 KnownDstBlocks.push_back(&Func.Blocks[Dst]);
850 } else {
851 Queue.push(Dst);
852 UnknownBlocks.push_back(&Func.Blocks[Dst]);
853 }
854 }
855 }
856 }
857
858 /// Verify if rebalancing of the subgraph is feasible. If the checks are
859 /// successful, set the unique destination block, DstBlock (can be null).
860 bool canRebalanceSubgraph(const FlowBlock *SrcBlock,
861 const std::vector<FlowBlock *> &KnownDstBlocks,
862 const std::vector<FlowBlock *> &UnknownBlocks,
863 FlowBlock *&DstBlock) {
864 // If the list of unknown blocks is empty, we don't need rebalancing
865 if (UnknownBlocks.empty())
866 return false;
867
868 // If there are multiple known sinks, we can't rebalance
869 if (KnownDstBlocks.size() > 1)
870 return false;
871 DstBlock = KnownDstBlocks.empty() ? nullptr : KnownDstBlocks.front();
872
873 // Verify sinks of the subgraph
874 for (auto *Block : UnknownBlocks) {
875 if (Block->SuccJumps.empty()) {
876 // If there are multiple (known and unknown) sinks, we can't rebalance
877 if (DstBlock != nullptr)
878 return false;
879 continue;
880 }
881 size_t NumIgnoredJumps = 0;
882 for (auto *Jump : Block->SuccJumps) {
883 if (ignoreJump(SrcBlock, DstBlock, Jump))
884 NumIgnoredJumps++;
885 }
886 // If there is a non-sink block in UnknownBlocks with all jumps ignored,
887 // then we can't rebalance
888 if (NumIgnoredJumps == Block->SuccJumps.size())
889 return false;
890 }
891
892 return true;
893 }
894
895 /// Decide whether the Jump is ignored while processing an unknown subgraphs
896 /// rooted at basic block SrcBlock with the destination block, DstBlock.
897 bool ignoreJump(const FlowBlock *SrcBlock, const FlowBlock *DstBlock,
898 const FlowJump *Jump) {
899 // Ignore unlikely jumps with zero flow
900 if (Jump->IsUnlikely && Jump->Flow == 0)
901 return true;
902
903 auto JumpSource = &Func.Blocks[Jump->Source];
904 auto JumpTarget = &Func.Blocks[Jump->Target];
905
906 // Do not ignore jumps coming into DstBlock
907 if (DstBlock != nullptr && JumpTarget == DstBlock)
908 return false;
909
910 // Ignore jumps out of SrcBlock to known blocks
911 if (!JumpTarget->HasUnknownWeight && JumpSource == SrcBlock)
912 return true;
913
914 // Ignore jumps to known blocks with zero flow
915 if (!JumpTarget->HasUnknownWeight && JumpTarget->Flow == 0)
916 return true;
917
918 return false;
919 }
920
921 /// Verify if the given unknown subgraph is acyclic, and if yes, reorder
922 /// UnknownBlocks in the topological order (so that all jumps are "forward").
923 bool isAcyclicSubgraph(const FlowBlock *SrcBlock, const FlowBlock *DstBlock,
924 std::vector<FlowBlock *> &UnknownBlocks) {
925 // Extract local in-degrees in the considered subgraph
926 auto LocalInDegree = std::vector<uint64_t>(NumBlocks(), 0);
927 auto fillInDegree = [&](const FlowBlock *Block) {
928 for (auto *Jump : Block->SuccJumps) {
929 if (ignoreJump(SrcBlock, DstBlock, Jump))
930 continue;
931 LocalInDegree[Jump->Target]++;
932 }
933 };
934 fillInDegree(SrcBlock);
935 for (auto *Block : UnknownBlocks) {
936 fillInDegree(Block);
937 }
938 // A loop containing SrcBlock
939 if (LocalInDegree[SrcBlock->Index] > 0)
940 return false;
941
942 std::vector<FlowBlock *> AcyclicOrder;
943 std::queue<uint64_t> Queue;
944 Queue.push(SrcBlock->Index);
945 while (!Queue.empty()) {
946 FlowBlock *Block = &Func.Blocks[Queue.front()];
947 Queue.pop();
948 // Stop propagation once we reach DstBlock, if any
949 if (DstBlock != nullptr && Block == DstBlock)
950 break;
951
952 // Keep an acyclic order of unknown blocks
953 if (Block->HasUnknownWeight && Block != SrcBlock)
954 AcyclicOrder.push_back(Block);
955
956 // Add to the queue all successors with zero local in-degree
957 for (auto *Jump : Block->SuccJumps) {
958 if (ignoreJump(SrcBlock, DstBlock, Jump))
959 continue;
960 uint64_t Dst = Jump->Target;
961 LocalInDegree[Dst]--;
962 if (LocalInDegree[Dst] == 0) {
963 Queue.push(Dst);
964 }
965 }
966 }
967
968 // If there is a cycle in the subgraph, AcyclicOrder contains only a subset
969 // of all blocks
970 if (UnknownBlocks.size() != AcyclicOrder.size())
971 return false;
972 UnknownBlocks = AcyclicOrder;
973 return true;
974 }
975
976 /// Rebalance a given subgraph rooted at SrcBlock, ending at DstBlock and
977 /// having UnknownBlocks intermediate blocks.
978 void rebalanceUnknownSubgraph(const FlowBlock *SrcBlock,
979 const FlowBlock *DstBlock,
980 const std::vector<FlowBlock *> &UnknownBlocks) {
981 assert(SrcBlock->Flow > 0 && "zero-flow block in unknown subgraph");
982
983 // Ditribute flow from the source block
984 uint64_t BlockFlow = 0;
985 // SrcBlock's flow is the sum of outgoing flows along non-ignored jumps
986 for (auto *Jump : SrcBlock->SuccJumps) {
987 if (ignoreJump(SrcBlock, DstBlock, Jump))
988 continue;
989 BlockFlow += Jump->Flow;
990 }
991 rebalanceBlock(SrcBlock, DstBlock, SrcBlock, BlockFlow);
992
993 // Ditribute flow from the remaining blocks
994 for (auto *Block : UnknownBlocks) {
995 assert(Block->HasUnknownWeight && "incorrect unknown subgraph");
996 uint64_t BlockFlow = 0;
997 // Block's flow is the sum of incoming flows
998 for (auto *Jump : Block->PredJumps) {
999 BlockFlow += Jump->Flow;
1000 }
1001 Block->Flow = BlockFlow;
1002 rebalanceBlock(SrcBlock, DstBlock, Block, BlockFlow);
1003 }
1004 }
1005
1006 /// Redistribute flow for a block in a subgraph rooted at SrcBlock,
1007 /// and ending at DstBlock.
1008 void rebalanceBlock(const FlowBlock *SrcBlock, const FlowBlock *DstBlock,
1009 const FlowBlock *Block, uint64_t BlockFlow) {
1010 // Process all successor jumps and update corresponding flow values
1011 size_t BlockDegree = 0;
1012 for (auto *Jump : Block->SuccJumps) {
1013 if (ignoreJump(SrcBlock, DstBlock, Jump))
1014 continue;
1015 BlockDegree++;
1016 }
1017 // If all successor jumps of the block are ignored, skip it
1018 if (DstBlock == nullptr && BlockDegree == 0)
1019 return;
1020 assert(BlockDegree > 0 && "all outgoing jumps are ignored");
1021
1022 // Each of the Block's successors gets the following amount of flow.
1023 // Rounding the value up so that all flow is propagated
1024 uint64_t SuccFlow = (BlockFlow + BlockDegree - 1) / BlockDegree;
1025 for (auto *Jump : Block->SuccJumps) {
1026 if (ignoreJump(SrcBlock, DstBlock, Jump))
1027 continue;
1028 uint64_t Flow = std::min(SuccFlow, BlockFlow);
1029 Jump->Flow = Flow;
1030 BlockFlow -= Flow;
1031 }
1032 assert(BlockFlow == 0 && "not all flow is propagated");
1033 }
1034
1035 /// A constant indicating an arbitrary exit block of a function.
1036 static constexpr uint64_t AnyExitBlock = uint64_t(-1);
1037 /// Minimum BaseDistance for the jump distance values in island joining.
1038 static constexpr uint64_t MinBaseDistance = 10000;
1039
1040 /// Params for flow computation.
1041 const ProfiParams &Params;
1042 /// The function.
1043 FlowFunction &Func;
1044};
1045
1046std::pair<int64_t, int64_t> assignBlockCosts(const ProfiParams &Params,
1047 const FlowBlock &Block);
1048std::pair<int64_t, int64_t> assignJumpCosts(const ProfiParams &Params,
1049 const FlowJump &Jump);
1050
1051/// Initializing flow network for a given function.
1052///
1053/// Every block is split into two nodes that are responsible for (i) an
1054/// incoming flow, (ii) an outgoing flow; they penalize an increase or a
1055/// reduction of the block weight.
1056void initializeNetwork(const ProfiParams &Params, MinCostMaxFlow &Network,
1057 FlowFunction &Func) {
1058 uint64_t NumBlocks = Func.Blocks.size();
1059 assert(NumBlocks > 1 && "Too few blocks in a function");
1060 uint64_t NumJumps = Func.Jumps.size();
1061 assert(NumJumps > 0 && "Too few jumps in a function");
1062
1063 // Introducing dummy source/sink pairs to allow flow circulation.
1064 // The nodes corresponding to blocks of the function have indices in
1065 // the range [0 .. 2 * NumBlocks); the dummy sources/sinks are indexed by the
1066 // next four values.
1067 uint64_t S = 2 * NumBlocks;
1068 uint64_t T = S + 1;
1069 uint64_t S1 = S + 2;
1070 uint64_t T1 = S + 3;
1071
1072 Network.initialize(2 * NumBlocks + 4, S1, T1);
1073
1074 // Initialize nodes of the flow network
1075 for (uint64_t B = 0; B < NumBlocks; B++) {
1076 auto &Block = Func.Blocks[B];
1077
1078 // Split every block into two auxiliary nodes to allow
1079 // increase/reduction of the block count.
1080 uint64_t Bin = 2 * B;
1081 uint64_t Bout = 2 * B + 1;
1082
1083 // Edges from S and to T
1084 if (Block.isEntry()) {
1085 Network.addEdge(S, Bin, 0);
1086 } else if (Block.isExit()) {
1087 Network.addEdge(Bout, T, 0);
1088 }
1089
1090 // Assign costs for increasing/decreasing the block counts
1091 auto [AuxCostInc, AuxCostDec] = assignBlockCosts(Params, Block);
1092
1093 // Add the corresponding edges to the network
1094 Network.addEdge(Bin, Bout, AuxCostInc);
1095 if (Block.Weight > 0) {
1096 Network.addEdge(Bout, Bin, Block.Weight, AuxCostDec);
1097 Network.addEdge(S1, Bout, Block.Weight, 0);
1098 Network.addEdge(Bin, T1, Block.Weight, 0);
1099 }
1100 }
1101
1102 // Initialize edges of the flow network
1103 for (uint64_t J = 0; J < NumJumps; J++) {
1104 auto &Jump = Func.Jumps[J];
1105
1106 // Get the endpoints corresponding to the jump
1107 uint64_t Jin = 2 * Jump.Source + 1;
1108 uint64_t Jout = 2 * Jump.Target;
1109
1110 // Assign costs for increasing/decreasing the jump counts
1111 auto [AuxCostInc, AuxCostDec] = assignJumpCosts(Params, Jump);
1112
1113 // Add the corresponding edges to the network
1114 Network.addEdge(Jin, Jout, AuxCostInc);
1115 if (Jump.Weight > 0) {
1116 Network.addEdge(Jout, Jin, Jump.Weight, AuxCostDec);
1117 Network.addEdge(S1, Jout, Jump.Weight, 0);
1118 Network.addEdge(Jin, T1, Jump.Weight, 0);
1119 }
1120 }
1121
1122 // Make sure we have a valid flow circulation
1123 Network.addEdge(T, S, 0);
1124}
1125
1126/// Assign costs for increasing/decreasing the block counts.
1127std::pair<int64_t, int64_t> assignBlockCosts(const ProfiParams &Params,
1128 const FlowBlock &Block) {
1129 // Modifying the weight of an unlikely block is expensive
1130 if (Block.IsUnlikely)
1131 return std::make_pair(Params.CostUnlikely, Params.CostUnlikely);
1132
1133 // Assign default values for the costs
1134 int64_t CostInc = Params.CostBlockInc;
1135 int64_t CostDec = Params.CostBlockDec;
1136 // Update the costs depending on the block metadata
1137 if (Block.HasUnknownWeight) {
1138 CostInc = Params.CostBlockUnknownInc;
1139 CostDec = 0;
1140 } else {
1141 // Increasing the count for "cold" blocks with zero initial count is more
1142 // expensive than for "hot" ones
1143 if (Block.Weight == 0)
1144 CostInc = Params.CostBlockZeroInc;
1145 // Modifying the count of the entry block is expensive
1146 if (Block.isEntry()) {
1147 CostInc = Params.CostBlockEntryInc;
1148 CostDec = Params.CostBlockEntryDec;
1149 }
1150 }
1151 return std::make_pair(CostInc, CostDec);
1152}
1153
1154/// Assign costs for increasing/decreasing the jump counts.
1155std::pair<int64_t, int64_t> assignJumpCosts(const ProfiParams &Params,
1156 const FlowJump &Jump) {
1157 // Modifying the weight of an unlikely jump is expensive
1158 if (Jump.IsUnlikely)
1159 return std::make_pair(Params.CostUnlikely, Params.CostUnlikely);
1160
1161 // Assign default values for the costs
1162 int64_t CostInc = Params.CostJumpInc;
1163 int64_t CostDec = Params.CostJumpDec;
1164 // Update the costs depending on the block metadata
1165 if (Jump.Source + 1 == Jump.Target) {
1166 // Adjusting the fall-through branch
1167 CostInc = Params.CostJumpFTInc;
1168 CostDec = Params.CostJumpFTDec;
1169 }
1170 if (Jump.HasUnknownWeight) {
1171 // The cost is different for fall-through and non-fall-through branches
1172 if (Jump.Source + 1 == Jump.Target)
1173 CostInc = Params.CostJumpUnknownFTInc;
1174 else
1175 CostInc = Params.CostJumpUnknownInc;
1176 CostDec = 0;
1177 } else {
1178 assert(Jump.Weight > 0 && "found zero-weight jump with a positive weight");
1179 }
1180 return std::make_pair(CostInc, CostDec);
1181}
1182
1183/// Extract resulting block and edge counts from the flow network.
1184void extractWeights(const ProfiParams &Params, MinCostMaxFlow &Network,
1185 FlowFunction &Func) {
1186 uint64_t NumBlocks = Func.Blocks.size();
1187 uint64_t NumJumps = Func.Jumps.size();
1188
1189 // Extract resulting jump counts
1190 for (uint64_t J = 0; J < NumJumps; J++) {
1191 auto &Jump = Func.Jumps[J];
1192 uint64_t SrcOut = 2 * Jump.Source + 1;
1193 uint64_t DstIn = 2 * Jump.Target;
1194
1195 int64_t Flow = 0;
1196 int64_t AuxFlow = Network.getFlow(SrcOut, DstIn);
1197 if (Jump.Source != Jump.Target)
1198 Flow = int64_t(Jump.Weight) + AuxFlow;
1199 else
1200 Flow = int64_t(Jump.Weight) + (AuxFlow > 0 ? AuxFlow : 0);
1201
1202 Jump.Flow = Flow;
1203 assert(Flow >= 0 && "negative jump flow");
1204 }
1205
1206 // Extract resulting block counts
1207 auto InFlow = std::vector<uint64_t>(NumBlocks, 0);
1208 auto OutFlow = std::vector<uint64_t>(NumBlocks, 0);
1209 for (auto &Jump : Func.Jumps) {
1210 InFlow[Jump.Target] += Jump.Flow;
1211 OutFlow[Jump.Source] += Jump.Flow;
1212 }
1213 for (uint64_t B = 0; B < NumBlocks; B++) {
1214 auto &Block = Func.Blocks[B];
1215 Block.Flow = std::max(OutFlow[B], InFlow[B]);
1216 }
1217}
1218
1219#ifndef NDEBUG
1220/// Verify that the provided block/jump weights are as expected.
1221void verifyInput(const FlowFunction &Func) {
1222 // Verify entry and exit blocks
1223 assert(Func.Entry == 0 && Func.Blocks[0].isEntry());
1224 size_t NumExitBlocks = 0;
1225 for (size_t I = 1; I < Func.Blocks.size(); I++) {
1226 assert(!Func.Blocks[I].isEntry() && "multiple entry blocks");
1227 if (Func.Blocks[I].isExit())
1228 NumExitBlocks++;
1229 }
1230 assert(NumExitBlocks > 0 && "cannot find exit blocks");
1231
1232 // Verify that there are no parallel edges
1233 for (auto &Block : Func.Blocks) {
1234 std::unordered_set<uint64_t> UniqueSuccs;
1235 for (auto &Jump : Block.SuccJumps) {
1236 auto It = UniqueSuccs.insert(Jump->Target);
1237 assert(It.second && "input CFG contains parallel edges");
1238 }
1239 }
1240 // Verify CFG jumps
1241 for (auto &Block : Func.Blocks) {
1242 assert((!Block.isEntry() || !Block.isExit()) &&
1243 "a block cannot be an entry and an exit");
1244 }
1245 // Verify input block weights
1246 for (auto &Block : Func.Blocks) {
1247 assert((!Block.HasUnknownWeight || Block.Weight == 0 || Block.isEntry()) &&
1248 "non-zero weight of a block w/o weight except for an entry");
1249 }
1250 // Verify input jump weights
1251 for (auto &Jump : Func.Jumps) {
1252 assert((!Jump.HasUnknownWeight || Jump.Weight == 0) &&
1253 "non-zero weight of a jump w/o weight");
1254 }
1255}
1256
1257/// Verify that the computed flow values satisfy flow conservation rules.
1258void verifyOutput(const FlowFunction &Func) {
1259 const uint64_t NumBlocks = Func.Blocks.size();
1260 auto InFlow = std::vector<uint64_t>(NumBlocks, 0);
1261 auto OutFlow = std::vector<uint64_t>(NumBlocks, 0);
1262 for (const auto &Jump : Func.Jumps) {
1263 InFlow[Jump.Target] += Jump.Flow;
1264 OutFlow[Jump.Source] += Jump.Flow;
1265 }
1266
1267 uint64_t TotalInFlow = 0;
1268 uint64_t TotalOutFlow = 0;
1269 for (uint64_t I = 0; I < NumBlocks; I++) {
1270 auto &Block = Func.Blocks[I];
1271 if (Block.isEntry()) {
1272 TotalInFlow += Block.Flow;
1273 assert(Block.Flow == OutFlow[I] && "incorrectly computed control flow");
1274 } else if (Block.isExit()) {
1275 TotalOutFlow += Block.Flow;
1276 assert(Block.Flow == InFlow[I] && "incorrectly computed control flow");
1277 } else {
1278 assert(Block.Flow == OutFlow[I] && "incorrectly computed control flow");
1279 assert(Block.Flow == InFlow[I] && "incorrectly computed control flow");
1280 }
1281 }
1282 assert(TotalInFlow == TotalOutFlow && "incorrectly computed control flow");
1283
1284 // Verify that there are no isolated flow components
1285 // One could modify FlowFunction to hold edges indexed by the sources, which
1286 // will avoid a creation of the object
1287 auto PositiveFlowEdges = std::vector<std::vector<uint64_t>>(NumBlocks);
1288 for (const auto &Jump : Func.Jumps) {
1289 if (Jump.Flow > 0) {
1290 PositiveFlowEdges[Jump.Source].push_back(Jump.Target);
1291 }
1292 }
1293
1294 // Run BFS from the source along edges with positive flow
1295 std::queue<uint64_t> Queue;
1296 auto Visited = BitVector(NumBlocks, false);
1297 Queue.push(Func.Entry);
1298 Visited[Func.Entry] = true;
1299 while (!Queue.empty()) {
1300 uint64_t Src = Queue.front();
1301 Queue.pop();
1302 for (uint64_t Dst : PositiveFlowEdges[Src]) {
1303 if (!Visited[Dst]) {
1304 Queue.push(Dst);
1305 Visited[Dst] = true;
1306 }
1307 }
1308 }
1309
1310 // Verify that every block that has a positive flow is reached from the source
1311 // along edges with a positive flow
1312 for (uint64_t I = 0; I < NumBlocks; I++) {
1313 auto &Block = Func.Blocks[I];
1314 assert((Visited[I] || Block.Flow == 0) && "an isolated flow component");
1315 }
1316}
1317#endif
1318
1319} // end of anonymous namespace
1320
1321/// Apply the profile inference algorithm for a given function and provided
1322/// profi options
1324 // Check if the function has samples and assign initial flow values
1325 bool HasSamples = false;
1326 for (FlowBlock &Block : Func.Blocks) {
1327 if (Block.Weight > 0)
1328 HasSamples = true;
1329 Block.Flow = Block.Weight;
1330 }
1331 for (FlowJump &Jump : Func.Jumps) {
1332 if (Jump.Weight > 0)
1333 HasSamples = true;
1334 Jump.Flow = Jump.Weight;
1335 }
1336
1337 // Quit early for functions with a single block or ones w/o samples
1338 if (Func.Blocks.size() <= 1 || !HasSamples)
1339 return;
1340
1341#ifndef NDEBUG
1342 // Verify the input data
1343 verifyInput(Func);
1344#endif
1345
1346 // Create and apply an inference network model
1347 auto InferenceNetwork = MinCostMaxFlow(Params);
1348 initializeNetwork(Params, InferenceNetwork, Func);
1349 InferenceNetwork.run();
1350
1351 // Extract flow values for every block and every edge
1352 extractWeights(Params, InferenceNetwork, Func);
1353
1354 // Post-processing adjustments to the flow
1355 auto Adjuster = FlowAdjuster(Params, Func);
1356 Adjuster.run();
1357
1358#ifndef NDEBUG
1359 // Verify the result
1360 verifyOutput(Func);
1361#endif
1362}
1363
1364/// Apply the profile inference algorithm for a given flow function
1366 ProfiParams Params;
1367 // Set the params from the command-line flags.
1368 Params.EvenFlowDistribution = SampleProfileEvenFlowDistribution;
1369 Params.RebalanceUnknown = SampleProfileRebalanceUnknown;
1370 Params.JoinIslands = SampleProfileJoinIslands;
1371 Params.CostBlockInc = SampleProfileProfiCostBlockInc;
1372 Params.CostBlockDec = SampleProfileProfiCostBlockDec;
1373 Params.CostBlockEntryInc = SampleProfileProfiCostBlockEntryInc;
1374 Params.CostBlockEntryDec = SampleProfileProfiCostBlockEntryDec;
1375 Params.CostBlockZeroInc = SampleProfileProfiCostBlockZeroInc;
1376 Params.CostBlockUnknownInc = SampleProfileProfiCostBlockUnknownInc;
1377
1378 applyFlowInference(Params, Func);
1379}
static const LLT S1
This file implements the BitVector class.
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
#define LLVM_DEBUG(X)
Definition: Debug.h:101
static void addEdge(SmallVectorImpl< LazyCallGraph::Edge > &Edges, DenseMap< LazyCallGraph::Node *, int > &EdgeIndexMap, LazyCallGraph::Node &N, LazyCallGraph::Edge::Kind EK)
#define I(x, y, z)
Definition: MD5.cpp:58
#define T1
Annotate SI Control Flow
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
This file provides the interface for the profile inference algorithm, profi.
static void initialize(TargetLibraryInfoImpl &TLI, const Triple &T, ArrayRef< StringLiteral > StandardNames)
Initialize the set of available library functions based on the specified target triple.
void push_back(bool Val)
Definition: BitVector.h:466
Target - Wrapper for Target specific information.
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:443
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
auto size(R &&Range, std::enable_if_t< std::is_base_of< std::random_access_iterator_tag, typename std::iterator_traits< decltype(Range.begin())>::iterator_category >::value, void > *=nullptr)
Get the size of a range.
Definition: STLExtras.h:1680
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
void applyFlowInference(const ProfiParams &Params, FlowFunction &Func)
Apply the profile inference algorithm for a given function and provided profi options.
#define INF
A wrapper of a binary basic block.
std::vector< FlowJump * > SuccJumps
A wrapper of binary function with basic blocks and jumps.
A wrapper of a jump between two basic blocks.
Various thresholds and options controlling the behavior of the profile inference algorithm.
unsigned CostJumpUnknownFTInc
The cost of increasing an unknown fall-through jump's count by one.
unsigned CostBlockInc
The cost of increasing a block's count by one.
unsigned CostJumpFTInc
The cost of increasing a fall-through jump's count by one.
bool RebalanceUnknown
Evenly re-distribute flow among unknown subgraphs.
const int64_t CostUnlikely
The cost of taking an unlikely block/jump.
unsigned CostJumpDec
The cost of decreasing a jump's count by one.
bool JoinIslands
Join isolated components having positive flow.
unsigned CostBlockZeroInc
The cost of increasing a count of zero-weight block by one.
unsigned CostBlockEntryDec
The cost of decreasing the entry block's count by one.
unsigned CostJumpInc
The cost of increasing a jump's count by one.
unsigned CostJumpUnknownInc
The cost of increasing an unknown jump's count by one.
unsigned CostBlockUnknownInc
The cost of increasing an unknown block's count by one.
unsigned CostJumpFTDec
The cost of decreasing a fall-through jump's count by one.
unsigned CostBlockDec
The cost of decreasing a block's count by one.
unsigned CostBlockEntryInc
The cost of increasing the entry block's count by one.
bool EvenFlowDistribution
Evenly distribute flow when there are multiple equally likely options.