23#include <unordered_set>
26#define DEBUG_TYPE "sample-profile-inference"
32 cl::desc(
"Try to evenly distribute flow when there are multiple equally "
37 cl::desc(
"Evenly re-distribute flow among unknown subgraphs."));
41 cl::desc(
"Join isolated components having positive flow."));
45 cl::desc(
"The cost of increasing a block's count by one."));
49 cl::desc(
"The cost of decreasing a block's count by one."));
53 cl::desc(
"The cost of increasing the entry block's count by one."));
57 cl::desc(
"The cost of decreasing the entry block's count by one."));
61 cl::desc(
"The cost of increasing a count of zero-weight block by one."));
65 cl::desc(
"The cost of increasing an unknown block's count by one."));
70static constexpr int64_t
INF = ((int64_t)1) << 50;
88 MinCostMaxFlow(
const ProfiParams &Params) : Params(Params) {}
95 Nodes = std::vector<Node>(NodeCount);
96 Edges = std::vector<std::vector<Edge>>(NodeCount, std::vector<Edge>());
99 std::vector<std::vector<Edge *>>(NodeCount, std::vector<Edge *>());
104 LLVM_DEBUG(
dbgs() <<
"Starting profi for " << Nodes.size() <<
" nodes\n");
108 size_t AugmentationIters = applyFlowAugmentation();
111 int64_t TotalCost = 0;
112 int64_t TotalFlow = 0;
113 for (
uint64_t Src = 0; Src < Nodes.size(); Src++) {
114 for (
auto &Edge : Edges[Src]) {
116 TotalCost += Edge.Cost * Edge.Flow;
118 TotalFlow += Edge.Flow;
122 LLVM_DEBUG(
dbgs() <<
"Completed profi after " << AugmentationIters
123 <<
" iterations with " << TotalFlow <<
" total flow"
124 <<
" of " << TotalCost <<
" cost\n");
126 (void)AugmentationIters;
134 assert(Capacity > 0 &&
"adding an edge of zero capacity");
135 assert(Src != Dst &&
"loop edge are not supported");
140 SrcEdge.Capacity = Capacity;
142 SrcEdge.RevEdgeIndex = Edges[Dst].size();
146 DstEdge.Cost = -
Cost;
147 DstEdge.Capacity = 0;
149 DstEdge.RevEdgeIndex = Edges[Src].size();
151 Edges[Src].push_back(SrcEdge);
152 Edges[Dst].push_back(DstEdge);
162 std::vector<std::pair<uint64_t, int64_t>> getFlow(
uint64_t Src)
const {
163 std::vector<std::pair<uint64_t, int64_t>>
Flow;
164 for (
const auto &Edge : Edges[Src]) {
166 Flow.push_back(std::make_pair(Edge.Dst, Edge.Flow));
174 for (
const auto &Edge : Edges[Src]) {
175 if (Edge.Dst == Dst) {
185 size_t applyFlowAugmentation() {
186 size_t AugmentationIters = 0;
187 while (findAugmentingPath()) {
188 uint64_t PathCapacity = computeAugmentingPathCapacity();
189 while (PathCapacity > 0) {
190 bool Progress =
false;
193 identifyShortestEdges(PathCapacity);
196 auto AugmentingOrder = findAugmentingDAG();
199 Progress = augmentFlowAlongDAG(AugmentingOrder);
200 PathCapacity = computeAugmentingPathCapacity();
204 augmentFlowAlongPath(PathCapacity);
211 return AugmentationIters;
216 uint64_t computeAugmentingPathCapacity() {
219 while (Now != Source) {
220 uint64_t Pred = Nodes[Now].ParentNode;
221 auto &Edge = Edges[Pred][Nodes[Now].ParentEdgeIndex];
223 assert(Edge.Capacity >= Edge.Flow &&
"incorrect edge flow");
225 PathCapacity = std::min(PathCapacity, EdgeCapacity);
233 bool findAugmentingPath() {
235 for (
auto &
Node : Nodes) {
242 std::queue<uint64_t> Queue;
244 Nodes[Source].Distance = 0;
245 Nodes[Source].Taken =
true;
246 while (!Queue.empty()) {
249 Nodes[Src].Taken =
false;
266 if (Nodes[Src].Distance > Nodes[
Target].Distance)
270 for (
uint64_t EdgeIdx = 0; EdgeIdx < Edges[Src].size(); EdgeIdx++) {
271 auto &Edge = Edges[Src][EdgeIdx];
272 if (Edge.Flow < Edge.Capacity) {
274 int64_t NewDistance = Nodes[Src].Distance + Edge.Cost;
275 if (Nodes[Dst].Distance > NewDistance) {
277 Nodes[Dst].Distance = NewDistance;
278 Nodes[Dst].ParentNode = Src;
279 Nodes[Dst].ParentEdgeIndex = EdgeIdx;
281 if (!Nodes[Dst].Taken) {
283 Nodes[Dst].Taken =
true;
294 void augmentFlowAlongPath(
uint64_t PathCapacity) {
295 assert(PathCapacity > 0 &&
"found an incorrect augmenting path");
297 while (Now != Source) {
298 uint64_t Pred = Nodes[Now].ParentNode;
299 auto &Edge = Edges[Pred][Nodes[Now].ParentEdgeIndex];
300 auto &RevEdge = Edges[Now][Edge.RevEdgeIndex];
302 Edge.Flow += PathCapacity;
303 RevEdge.Flow -= PathCapacity;
316 std::vector<uint64_t> findAugmentingDAG() {
322 typedef std::pair<uint64_t, uint64_t> StackItemType;
323 std::stack<StackItemType> Stack;
324 std::vector<uint64_t> AugmentingOrder;
327 for (
auto &
Node : Nodes) {
336 Nodes[
Target].Taken =
true;
339 Stack.emplace(Source, 0);
340 Nodes[Source].Discovery = ++Time;
341 while (!Stack.empty()) {
342 auto NodeIdx = Stack.top().first;
343 auto EdgeIdx = Stack.top().second;
346 if (EdgeIdx < Edges[NodeIdx].
size()) {
347 auto &Edge = Edges[NodeIdx][EdgeIdx];
348 auto &Dst = Nodes[Edge.Dst];
349 Stack.top().second++;
351 if (Edge.OnShortestPath) {
353 if (Dst.Discovery == 0 && Dst.NumCalls < MaxDfsCalls) {
354 Dst.Discovery = ++Time;
355 Stack.emplace(Edge.Dst, 0);
357 }
else if (Dst.Taken && Dst.Finish != 0) {
359 Nodes[NodeIdx].Taken =
true;
366 if (!Nodes[NodeIdx].Taken) {
367 Nodes[NodeIdx].Discovery = 0;
371 Nodes[NodeIdx].Finish = ++Time;
373 if (NodeIdx != Source) {
374 assert(!Stack.empty() &&
"empty stack while running dfs");
375 Nodes[Stack.top().first].Taken =
true;
377 AugmentingOrder.push_back(NodeIdx);
382 std::reverse(AugmentingOrder.begin(), AugmentingOrder.end());
385 for (
size_t Src : AugmentingOrder) {
386 AugmentingEdges[Src].clear();
387 for (
auto &Edge : Edges[Src]) {
389 if (Edge.OnShortestPath && Nodes[Src].Taken && Nodes[Dst].Taken &&
390 Nodes[Dst].Finish < Nodes[Src].Finish) {
391 AugmentingEdges[Src].push_back(&Edge);
394 assert((Src ==
Target || !AugmentingEdges[Src].empty()) &&
395 "incorrectly constructed augmenting edges");
398 return AugmentingOrder;
405 bool augmentFlowAlongDAG(
const std::vector<uint64_t> &AugmentingOrder) {
407 for (
uint64_t Src : AugmentingOrder) {
408 Nodes[Src].FracFlow = 0;
409 Nodes[Src].IntFlow = 0;
410 for (
auto &Edge : AugmentingEdges[Src]) {
411 Edge->AugmentedFlow = 0;
417 Nodes[Source].FracFlow = 1.0;
418 for (
uint64_t Src : AugmentingOrder) {
420 "incorrectly computed fractional flow");
422 uint64_t Degree = AugmentingEdges[Src].size();
423 for (
auto &Edge : AugmentingEdges[Src]) {
424 double EdgeFlow = Nodes[Src].FracFlow / Degree;
425 Nodes[Edge->Dst].FracFlow += EdgeFlow;
426 if (Edge->Capacity ==
INF)
428 uint64_t MaxIntFlow = double(Edge->Capacity - Edge->Flow) / EdgeFlow;
429 MaxFlowAmount = std::min(MaxFlowAmount, MaxIntFlow);
433 if (MaxFlowAmount == 0)
437 Nodes[Source].IntFlow = MaxFlowAmount;
438 for (
uint64_t Src : AugmentingOrder) {
443 uint64_t Degree = AugmentingEdges[Src].size();
445 uint64_t SuccFlow = (Nodes[Src].IntFlow + Degree - 1) / Degree;
446 for (
auto &Edge : AugmentingEdges[Src]) {
448 uint64_t EdgeFlow = std::min(Nodes[Src].IntFlow, SuccFlow);
449 EdgeFlow = std::min(EdgeFlow,
uint64_t(Edge->Capacity - Edge->Flow));
450 Nodes[Dst].IntFlow += EdgeFlow;
451 Nodes[Src].IntFlow -= EdgeFlow;
452 Edge->AugmentedFlow += EdgeFlow;
456 Nodes[
Target].IntFlow = 0;
461 for (
size_t Idx = AugmentingOrder.size() - 1;
Idx > 0;
Idx--) {
465 for (
auto &Edge : AugmentingEdges[Src]) {
467 if (Nodes[Dst].IntFlow == 0)
469 uint64_t EdgeFlow = std::min(Nodes[Dst].IntFlow, Edge->AugmentedFlow);
470 Nodes[Dst].IntFlow -= EdgeFlow;
471 Nodes[Src].IntFlow += EdgeFlow;
472 Edge->AugmentedFlow -= EdgeFlow;
477 bool HasSaturatedEdges =
false;
478 for (
uint64_t Src : AugmentingOrder) {
480 assert(Src == Source || Nodes[Src].IntFlow == 0);
481 for (
auto &Edge : AugmentingEdges[Src]) {
482 assert(
uint64_t(Edge->Capacity - Edge->Flow) >= Edge->AugmentedFlow);
484 auto &RevEdge = Edges[Edge->Dst][Edge->RevEdgeIndex];
485 Edge->Flow += Edge->AugmentedFlow;
486 RevEdge.Flow -= Edge->AugmentedFlow;
487 if (Edge->Capacity == Edge->Flow && Edge->AugmentedFlow > 0)
488 HasSaturatedEdges =
true;
493 return HasSaturatedEdges;
497 void identifyShortestEdges(
uint64_t PathCapacity) {
498 assert(PathCapacity > 0 &&
"found an incorrect augmenting DAG");
506 for (
size_t Src = 0; Src < Nodes.size(); Src++) {
508 if (Nodes[Src].Distance > Nodes[
Target].Distance)
511 for (
auto &Edge : Edges[Src]) {
513 Edge.OnShortestPath =
514 Src !=
Target && Dst != Source &&
515 Nodes[Dst].Distance <= Nodes[
Target].Distance &&
516 Nodes[Dst].Distance == Nodes[Src].Distance + Edge.Cost &&
517 Edge.Capacity > Edge.Flow &&
518 uint64_t(Edge.Capacity - Edge.Flow) >= MinCapacity;
524 static constexpr uint64_t MaxDfsCalls = 10;
571 std::vector<Node> Nodes;
573 std::vector<std::vector<Edge>> Edges;
579 std::vector<std::vector<Edge *>> AugmentingEdges;
603 : Params(Params), Func(Func) {}
609 joinIsolatedComponents();
614 rebalanceUnknownSubgraphs();
619 void joinIsolatedComponents() {
621 auto Visited =
BitVector(NumBlocks(),
false);
622 findReachable(Func.Entry, Visited);
626 auto &
Block = Func.Blocks[
I];
627 if (
Block.Flow > 0 && !Visited[
I]) {
629 auto Path = findShortestPath(
I);
631 assert(Path.size() > 0 && Path[0]->Source == Func.Entry &&
632 "incorrectly computed path adjusting control flow");
633 Func.Blocks[Func.Entry].Flow += 1;
634 for (
auto &Jump : Path) {
636 Func.Blocks[Jump->Target].Flow += 1;
638 findReachable(Jump->Target, Visited);
649 std::queue<uint64_t> Queue;
652 while (!Queue.empty()) {
655 for (
auto *Jump : Func.Blocks[Src].SuccJumps) {
657 if (Jump->Flow > 0 && !Visited[Dst]) {
667 std::vector<FlowJump *> findShortestPath(
uint64_t BlockIdx) {
669 auto ForwardPath = findShortestPath(Func.Entry, BlockIdx);
671 auto BackwardPath = findShortestPath(BlockIdx, AnyExitBlock);
674 std::vector<FlowJump *> Result;
675 Result.insert(Result.end(), ForwardPath.begin(), ForwardPath.end());
676 Result.insert(Result.end(), BackwardPath.begin(), BackwardPath.end());
686 return std::vector<FlowJump *>();
687 if (Func.Blocks[Source].isExit() &&
Target == AnyExitBlock)
688 return std::vector<FlowJump *>();
691 auto Distance = std::vector<int64_t>(NumBlocks(),
INF);
692 auto Parent = std::vector<FlowJump *>(NumBlocks(),
nullptr);
693 Distance[Source] = 0;
694 std::set<std::pair<uint64_t, uint64_t>> Queue;
695 Queue.insert(std::make_pair(Distance[Source], Source));
698 while (!Queue.empty()) {
699 uint64_t Src = Queue.begin()->second;
700 Queue.erase(Queue.begin());
703 (Func.Blocks[Src].isExit() &&
Target == AnyExitBlock))
706 for (
auto *Jump : Func.Blocks[Src].SuccJumps) {
708 int64_t JumpDist = jumpDistance(Jump);
709 if (Distance[Dst] > Distance[Src] + JumpDist) {
710 Queue.erase(std::make_pair(Distance[Dst], Dst));
712 Distance[Dst] = Distance[Src] + JumpDist;
715 Queue.insert(std::make_pair(Distance[Dst], Dst));
720 if (
Target == AnyExitBlock) {
722 if (Func.Blocks[
I].isExit() && Parent[
I] !=
nullptr) {
723 if (
Target == AnyExitBlock || Distance[
Target] > Distance[
I]) {
729 assert(Parent[
Target] !=
nullptr &&
"a path does not exist");
732 std::vector<FlowJump *> Result;
734 while (Now != Source) {
735 assert(Now == Parent[Now]->
Target &&
"incorrect parent jump");
736 Result.push_back(Parent[Now]);
737 Now = Parent[Now]->Source;
740 std::reverse(Result.begin(), Result.end());
753 int64_t jumpDistance(
FlowJump *Jump)
const {
757 std::max(FlowAdjuster::MinBaseDistance,
758 std::min(Func.Blocks[Func.Entry].Flow,
761 return BaseDistance + BaseDistance / Jump->
Flow;
762 return 2 * BaseDistance * (NumBlocks() + 1);
765 uint64_t NumBlocks()
const {
return Func.Blocks.size(); }
771 void rebalanceUnknownSubgraphs() {
773 for (
const FlowBlock &SrcBlock : Func.Blocks) {
775 if (!canRebalanceAtRoot(&SrcBlock))
780 std::vector<FlowBlock *> UnknownBlocks;
781 std::vector<FlowBlock *> KnownDstBlocks;
782 findUnknownSubgraph(&SrcBlock, KnownDstBlocks, UnknownBlocks);
787 if (!canRebalanceSubgraph(&SrcBlock, KnownDstBlocks, UnknownBlocks,
792 if (!isAcyclicSubgraph(&SrcBlock, DstBlock, UnknownBlocks))
796 rebalanceUnknownSubgraph(&SrcBlock, DstBlock, UnknownBlocks);
801 bool canRebalanceAtRoot(
const FlowBlock *SrcBlock) {
808 bool HasUnknownSuccs =
false;
810 if (Func.Blocks[Jump->
Target].HasUnknownWeight) {
811 HasUnknownSuccs =
true;
815 if (!HasUnknownSuccs)
823 void findUnknownSubgraph(
const FlowBlock *SrcBlock,
824 std::vector<FlowBlock *> &KnownDstBlocks,
825 std::vector<FlowBlock *> &UnknownBlocks) {
828 auto Visited =
BitVector(NumBlocks(),
false);
829 std::queue<uint64_t> Queue;
831 Queue.push(SrcBlock->
Index);
832 Visited[SrcBlock->
Index] =
true;
833 while (!Queue.empty()) {
834 auto &
Block = Func.Blocks[Queue.front()];
837 for (
auto *Jump :
Block.SuccJumps) {
839 if (ignoreJump(SrcBlock,
nullptr, Jump))
848 if (!Func.Blocks[Dst].HasUnknownWeight) {
849 KnownDstBlocks.
push_back(&Func.Blocks[Dst]);
852 UnknownBlocks.push_back(&Func.Blocks[Dst]);
860 bool canRebalanceSubgraph(
const FlowBlock *SrcBlock,
861 const std::vector<FlowBlock *> &KnownDstBlocks,
862 const std::vector<FlowBlock *> &UnknownBlocks,
865 if (UnknownBlocks.empty())
869 if (KnownDstBlocks.size() > 1)
871 DstBlock = KnownDstBlocks.empty() ? nullptr : KnownDstBlocks.front();
874 for (
auto *
Block : UnknownBlocks) {
875 if (
Block->SuccJumps.empty()) {
877 if (DstBlock !=
nullptr)
881 size_t NumIgnoredJumps = 0;
882 for (
auto *Jump :
Block->SuccJumps) {
883 if (ignoreJump(SrcBlock, DstBlock, Jump))
888 if (NumIgnoredJumps ==
Block->SuccJumps.size())
903 auto JumpSource = &Func.Blocks[Jump->
Source];
904 auto JumpTarget = &Func.Blocks[Jump->
Target];
907 if (DstBlock !=
nullptr && JumpTarget == DstBlock)
911 if (!JumpTarget->HasUnknownWeight && JumpSource == SrcBlock)
915 if (!JumpTarget->HasUnknownWeight && JumpTarget->Flow == 0)
924 std::vector<FlowBlock *> &UnknownBlocks) {
926 auto LocalInDegree = std::vector<uint64_t>(NumBlocks(), 0);
928 for (
auto *Jump :
Block->SuccJumps) {
929 if (ignoreJump(SrcBlock, DstBlock, Jump))
931 LocalInDegree[Jump->
Target]++;
934 fillInDegree(SrcBlock);
935 for (
auto *
Block : UnknownBlocks) {
939 if (LocalInDegree[SrcBlock->
Index] > 0)
942 std::vector<FlowBlock *> AcyclicOrder;
943 std::queue<uint64_t> Queue;
944 Queue.push(SrcBlock->
Index);
945 while (!Queue.empty()) {
949 if (DstBlock !=
nullptr &&
Block == DstBlock)
953 if (
Block->HasUnknownWeight &&
Block != SrcBlock)
954 AcyclicOrder.push_back(
Block);
957 for (
auto *Jump :
Block->SuccJumps) {
958 if (ignoreJump(SrcBlock, DstBlock, Jump))
961 LocalInDegree[Dst]--;
962 if (LocalInDegree[Dst] == 0) {
970 if (UnknownBlocks.size() != AcyclicOrder.size())
972 UnknownBlocks = AcyclicOrder;
978 void rebalanceUnknownSubgraph(
const FlowBlock *SrcBlock,
980 const std::vector<FlowBlock *> &UnknownBlocks) {
981 assert(SrcBlock->
Flow > 0 &&
"zero-flow block in unknown subgraph");
987 if (ignoreJump(SrcBlock, DstBlock, Jump))
989 BlockFlow += Jump->
Flow;
991 rebalanceBlock(SrcBlock, DstBlock, SrcBlock, BlockFlow);
994 for (
auto *
Block : UnknownBlocks) {
995 assert(
Block->HasUnknownWeight &&
"incorrect unknown subgraph");
998 for (
auto *Jump :
Block->PredJumps) {
999 BlockFlow += Jump->
Flow;
1001 Block->Flow = BlockFlow;
1002 rebalanceBlock(SrcBlock, DstBlock,
Block, BlockFlow);
1011 size_t BlockDegree = 0;
1012 for (
auto *Jump :
Block->SuccJumps) {
1013 if (ignoreJump(SrcBlock, DstBlock, Jump))
1018 if (DstBlock ==
nullptr && BlockDegree == 0)
1020 assert(BlockDegree > 0 &&
"all outgoing jumps are ignored");
1024 uint64_t SuccFlow = (BlockFlow + BlockDegree - 1) / BlockDegree;
1025 for (
auto *Jump :
Block->SuccJumps) {
1026 if (ignoreJump(SrcBlock, DstBlock, Jump))
1032 assert(BlockFlow == 0 &&
"not all flow is propagated");
1038 static constexpr uint64_t MinBaseDistance = 10000;
1046std::pair<int64_t, int64_t> assignBlockCosts(
const ProfiParams &Params,
1048std::pair<int64_t, int64_t> assignJumpCosts(
const ProfiParams &Params,
1056void initializeNetwork(
const ProfiParams &Params, MinCostMaxFlow &Network,
1058 uint64_t NumBlocks = Func.Blocks.size();
1059 assert(NumBlocks > 1 &&
"Too few blocks in a function");
1060 uint64_t NumJumps = Func.Jumps.size();
1061 assert(NumJumps > 0 &&
"Too few jumps in a function");
1072 Network.initialize(2 * NumBlocks + 4,
S1,
T1);
1076 auto &
Block = Func.Blocks[
B];
1084 if (
Block.isEntry()) {
1085 Network.addEdge(S,
Bin, 0);
1086 }
else if (
Block.isExit()) {
1087 Network.addEdge(Bout,
T, 0);
1091 auto [AuxCostInc, AuxCostDec] = assignBlockCosts(Params,
Block);
1094 Network.addEdge(
Bin, Bout, AuxCostInc);
1095 if (
Block.Weight > 0) {
1096 Network.addEdge(Bout,
Bin,
Block.Weight, AuxCostDec);
1097 Network.addEdge(
S1, Bout,
Block.Weight, 0);
1103 for (
uint64_t J = 0; J < NumJumps; J++) {
1104 auto &Jump = Func.Jumps[J];
1111 auto [AuxCostInc, AuxCostDec] = assignJumpCosts(Params, Jump);
1114 Network.addEdge(Jin, Jout, AuxCostInc);
1116 Network.addEdge(Jout, Jin, Jump.
Weight, AuxCostDec);
1117 Network.addEdge(
S1, Jout, Jump.
Weight, 0);
1118 Network.addEdge(Jin,
T1, Jump.
Weight, 0);
1123 Network.addEdge(
T, S, 0);
1127std::pair<int64_t, int64_t> assignBlockCosts(
const ProfiParams &Params,
1130 if (
Block.IsUnlikely)
1137 if (
Block.HasUnknownWeight) {
1143 if (
Block.Weight == 0)
1146 if (
Block.isEntry()) {
1151 return std::make_pair(CostInc, CostDec);
1155std::pair<int64_t, int64_t> assignJumpCosts(
const ProfiParams &Params,
1178 assert(Jump.
Weight > 0 &&
"found zero-weight jump with a positive weight");
1180 return std::make_pair(CostInc, CostDec);
1184void extractWeights(
const ProfiParams &Params, MinCostMaxFlow &Network,
1186 uint64_t NumBlocks = Func.Blocks.size();
1187 uint64_t NumJumps = Func.Jumps.size();
1190 for (
uint64_t J = 0; J < NumJumps; J++) {
1191 auto &Jump = Func.Jumps[J];
1196 int64_t AuxFlow = Network.getFlow(SrcOut, DstIn);
1200 Flow = int64_t(Jump.
Weight) + (AuxFlow > 0 ? AuxFlow : 0);
1207 auto InFlow = std::vector<uint64_t>(NumBlocks, 0);
1208 auto OutFlow = std::vector<uint64_t>(NumBlocks, 0);
1209 for (
auto &Jump : Func.Jumps) {
1214 auto &
Block = Func.Blocks[
B];
1215 Block.Flow = std::max(OutFlow[
B], InFlow[
B]);
1223 assert(Func.Entry == 0 && Func.Blocks[0].isEntry());
1224 size_t NumExitBlocks = 0;
1225 for (
size_t I = 1;
I < Func.Blocks.size();
I++) {
1226 assert(!Func.Blocks[
I].isEntry() &&
"multiple entry blocks");
1227 if (Func.Blocks[
I].isExit())
1230 assert(NumExitBlocks > 0 &&
"cannot find exit blocks");
1233 for (
auto &
Block : Func.Blocks) {
1234 std::unordered_set<uint64_t> UniqueSuccs;
1235 for (
auto &Jump :
Block.SuccJumps) {
1236 auto It = UniqueSuccs.insert(Jump->
Target);
1237 assert(It.second &&
"input CFG contains parallel edges");
1241 for (
auto &
Block : Func.Blocks) {
1243 "a block cannot be an entry and an exit");
1246 for (
auto &
Block : Func.Blocks) {
1248 "non-zero weight of a block w/o weight except for an entry");
1251 for (
auto &Jump : Func.Jumps) {
1253 "non-zero weight of a jump w/o weight");
1259 const uint64_t NumBlocks = Func.Blocks.size();
1260 auto InFlow = std::vector<uint64_t>(NumBlocks, 0);
1261 auto OutFlow = std::vector<uint64_t>(NumBlocks, 0);
1262 for (
const auto &Jump : Func.Jumps) {
1270 auto &
Block = Func.Blocks[
I];
1271 if (
Block.isEntry()) {
1272 TotalInFlow +=
Block.Flow;
1273 assert(
Block.Flow == OutFlow[
I] &&
"incorrectly computed control flow");
1274 }
else if (
Block.isExit()) {
1275 TotalOutFlow +=
Block.Flow;
1276 assert(
Block.Flow == InFlow[
I] &&
"incorrectly computed control flow");
1278 assert(
Block.Flow == OutFlow[
I] &&
"incorrectly computed control flow");
1279 assert(
Block.Flow == InFlow[
I] &&
"incorrectly computed control flow");
1282 assert(TotalInFlow == TotalOutFlow &&
"incorrectly computed control flow");
1287 auto PositiveFlowEdges = std::vector<std::vector<uint64_t>>(NumBlocks);
1288 for (
const auto &Jump : Func.Jumps) {
1289 if (Jump.
Flow > 0) {
1295 std::queue<uint64_t> Queue;
1296 auto Visited =
BitVector(NumBlocks,
false);
1297 Queue.push(Func.Entry);
1298 Visited[Func.Entry] =
true;
1299 while (!Queue.empty()) {
1302 for (
uint64_t Dst : PositiveFlowEdges[Src]) {
1303 if (!Visited[Dst]) {
1305 Visited[Dst] =
true;
1313 auto &
Block = Func.Blocks[
I];
1314 assert((Visited[
I] ||
Block.Flow == 0) &&
"an isolated flow component");
1325 bool HasSamples =
false;
1327 if (
Block.Weight > 0)
1331 for (
FlowJump &Jump : Func.Jumps) {
1338 if (Func.Blocks.size() <= 1 || !HasSamples)
1347 auto InferenceNetwork = MinCostMaxFlow(Params);
1348 initializeNetwork(Params, InferenceNetwork, Func);
1349 InferenceNetwork.run();
1352 extractWeights(Params, InferenceNetwork, Func);
1355 auto Adjuster = FlowAdjuster(Params, Func);
This file implements the BitVector class.
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
static void addEdge(SmallVectorImpl< LazyCallGraph::Edge > &Edges, DenseMap< LazyCallGraph::Node *, int > &EdgeIndexMap, LazyCallGraph::Node &N, LazyCallGraph::Edge::Kind EK)
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
This file provides the interface for the profile inference algorithm, profi.
static void initialize(TargetLibraryInfoImpl &TLI, const Triple &T, ArrayRef< StringLiteral > StandardNames)
Initialize the set of available library functions based on the specified target triple.
Target - Wrapper for Target specific information.
initializer< Ty > init(const Ty &Val)
This is an optimization pass for GlobalISel generic memory operations.
auto size(R &&Range, std::enable_if_t< std::is_base_of< std::random_access_iterator_tag, typename std::iterator_traits< decltype(Range.begin())>::iterator_category >::value, void > *=nullptr)
Get the size of a range.
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
void applyFlowInference(const ProfiParams &Params, FlowFunction &Func)
Apply the profile inference algorithm for a given function and provided profi options.
A wrapper of a binary basic block.
std::vector< FlowJump * > SuccJumps
A wrapper of binary function with basic blocks and jumps.
A wrapper of a jump between two basic blocks.
Various thresholds and options controlling the behavior of the profile inference algorithm.
unsigned CostJumpUnknownFTInc
The cost of increasing an unknown fall-through jump's count by one.
unsigned CostBlockInc
The cost of increasing a block's count by one.
unsigned CostJumpFTInc
The cost of increasing a fall-through jump's count by one.
bool RebalanceUnknown
Evenly re-distribute flow among unknown subgraphs.
const int64_t CostUnlikely
The cost of taking an unlikely block/jump.
unsigned CostJumpDec
The cost of decreasing a jump's count by one.
bool JoinIslands
Join isolated components having positive flow.
unsigned CostBlockZeroInc
The cost of increasing a count of zero-weight block by one.
unsigned CostBlockEntryDec
The cost of decreasing the entry block's count by one.
unsigned CostJumpInc
The cost of increasing a jump's count by one.
unsigned CostJumpUnknownInc
The cost of increasing an unknown jump's count by one.
unsigned CostBlockUnknownInc
The cost of increasing an unknown block's count by one.
unsigned CostJumpFTDec
The cost of decreasing a fall-through jump's count by one.
unsigned CostBlockDec
The cost of decreasing a block's count by one.
unsigned CostBlockEntryInc
The cost of increasing the entry block's count by one.
bool EvenFlowDistribution
Evenly distribute flow when there are multiple equally likely options.