14 #ifndef LLVM_ANALYSIS_BLOCKFREQUENCYINFOIMPL_H
15 #define LLVM_ANALYSIS_BLOCKFREQUENCYINFOIMPL_H
51 #define DEBUG_TYPE "block-freq"
60 class BranchProbabilityInfo;
64 class MachineBasicBlock;
65 class MachineBranchProbabilityInfo;
66 class MachineFunction;
68 class MachineLoopInfo;
70 namespace bfi_detail {
126 Mass = Diff > Mass ? 0 : Diff;
131 Mass =
P.scale(Mass);
240 template <
class It1,
class It2>
297 return Loop->Parent->Parent;
315 return L ? L->getHeader() :
Node;
322 while (L->Parent && L->Parent->IsPackaged)
337 return Loop->Parent->Mass;
467 std::list<LoopData>::iterator Insert);
525 std::optional<uint64_t>
527 bool AllowSynthetic =
false)
const;
528 std::optional<uint64_t>
530 bool AllowSynthetic =
false)
const;
541 return Freqs[0].Integer;
545 namespace bfi_detail {
565 template <
class BlockT,
class BFIImplT>
577 auto MachineName =
"BB" +
Twine(
BB->getNumber());
578 if (
BB->getBasicBlock())
579 return (MachineName +
"[" +
BB->getName() +
"]").str();
580 return MachineName.str();
585 return BB->getName().str();
615 using iterator = std::deque<const IrrNode *>::const_iterator;
636 template <
class BlockEdgesAdder>
642 template <
class BlockEdgesAdder>
643 void initialize(
const BFIBase::LoopData *OuterLoop,
654 template <
class BlockEdgesAdder>
658 const BFIBase::LoopData *OuterLoop);
661 template <
class BlockEdgesAdder>
666 for (
auto N : OuterLoop->
Nodes)
676 template <
class BlockEdgesAdder>
686 if (Working.isAPackage())
687 for (
const auto &
I : Working.Loop->Exits)
690 addBlockEdges(*
this, Irr, OuterLoop);
855 using BranchProbabilityInfoT =
864 const BranchProbabilityInfoT *BPI =
nullptr;
865 const LoopInfoT *LI =
nullptr;
866 const FunctionT *
F =
nullptr;
869 std::vector<const BlockT *> RPOT;
872 using rpot_iterator =
typename std::vector<const BlockT *>::const_iterator;
874 rpot_iterator rpot_begin()
const {
return RPOT.
begin(); }
875 rpot_iterator rpot_end()
const {
return RPOT.end(); }
877 size_t getIndex(
const rpot_iterator &
I)
const {
return I - rpot_begin(); }
879 BlockNode getNode(
const rpot_iterator &
I)
const {
880 return BlockNode(getIndex(
I));
883 BlockNode getNode(
const BlockT *
BB)
const {
return Nodes.
lookup(
BB).first; }
885 const BlockT *getBlock(
const BlockNode &
Node)
const {
887 return RPOT[
Node.Index];
893 void initializeRPOT();
902 void initializeLoops();
910 bool propagateMassToSuccessors(LoopData *OuterLoop,
const BlockNode &
Node);
920 bool computeMassInLoop(LoopData &Loop);
930 bool tryToComputeMassInFunction();
944 void computeIrreducibleMass(LoopData *OuterLoop,
945 std::list<LoopData>::iterator
Insert);
956 void computeMassInLoops();
964 void computeMassInFunction();
980 bool needIterativeInference()
const;
983 void applyIterativeInference();
985 using ProbMatrixType = std::vector<std::vector<std::pair<size_t, Scaled64>>>;
988 void iterativeInference(
const ProbMatrixType &ProbMatrix,
989 std::vector<Scaled64> &Freq)
const;
993 void findReachableBlocks(std::vector<const BlockT *> &Blocks)
const;
997 void initTransitionProbabilities(
998 const std::vector<const BlockT *> &Blocks,
999 const DenseMap<const BlockT *, size_t> &BlockIndex,
1000 ProbMatrixType &ProbMatrix)
const;
1005 Scaled64 discrepancy(
const ProbMatrixType &ProbMatrix,
1006 const std::vector<Scaled64> &Freq)
const;
1014 void calculate(
const FunctionT &
F,
const BranchProbabilityInfoT &BPI,
1015 const LoopInfoT &LI);
1023 std::optional<uint64_t>
1025 bool AllowSynthetic =
false)
const {
1030 std::optional<uint64_t>
1032 bool AllowSynthetic =
false)
const {
1054 const BranchProbabilityInfoT &
getBPI()
const {
return *BPI; }
1079 namespace bfi_detail {
1081 template <
class BFIImplT>
1094 BFIImpl->forgetBlock(cast<BasicBlock>(getValPtr()));
1100 template <
class BFIImplT>
1111 const BranchProbabilityInfoT &BPI,
1112 const LoopInfoT &LI) {
1125 <<
"\n================="
1126 << std::string(
F.getName().size(),
'=') <<
"\n");
1132 computeMassInLoops();
1133 computeMassInFunction();
1137 if (needIterativeInference())
1138 applyIterativeInference();
1145 for (
const BlockT &
BB :
F)
1147 setBlockFreq(&
BB, 0);
1159 BlockNode NewNode(Freqs.size());
1161 Freqs.emplace_back();
1167 const BlockT *Entry = &
F->front();
1168 RPOT.reserve(
F->size());
1172 assert(RPOT.size() - 1 <= BlockNode::getMaxIndex() &&
1173 "More nodes in function than Block Frequency Info supports");
1176 for (rpot_iterator
I = rpot_begin(),
E = rpot_end();
I !=
E; ++
I) {
1177 BlockNode
Node = getNode(
I);
1180 Nodes[*
I] = {
Node, BFICallbackVH(*
I,
this)};
1183 Working.reserve(RPOT.size());
1185 Working.emplace_back(
Index);
1186 Freqs.resize(RPOT.size());
1189 template <
class BT>
void BlockFrequencyInfoImpl<BT>::initializeLoops() {
1195 std::deque<std::pair<const LoopT *, LoopData *>> Q;
1196 for (
const LoopT *L : *LI)
1197 Q.emplace_back(L,
nullptr);
1198 while (!Q.empty()) {
1199 const LoopT *Loop = Q.front().first;
1200 LoopData *Parent = Q.front().second;
1203 BlockNode Header = getNode(Loop->getHeader());
1204 assert(Header.isValid());
1206 Loops.emplace_back(Parent, Header);
1207 Working[Header.Index].Loop = &
Loops.back();
1210 for (
const LoopT *L : *Loop)
1211 Q.emplace_back(L, &
Loops.back());
1218 if (Working[
Index].isLoopHeader()) {
1219 LoopData *ContainingLoop = Working[
Index].getContainingLoop();
1221 ContainingLoop->Nodes.push_back(
Index);
1225 const LoopT *Loop = LI->getLoopFor(RPOT[
Index]);
1230 BlockNode Header = getNode(Loop->getHeader());
1231 assert(Header.isValid());
1232 const auto &HeaderData = Working[Header.Index];
1233 assert(HeaderData.isLoopHeader());
1235 Working[
Index].Loop = HeaderData.Loop;
1236 HeaderData.Loop->Nodes.push_back(
Index);
1242 template <
class BT>
void BlockFrequencyInfoImpl<BT>::computeMassInLoops() {
1244 for (
auto L =
Loops.rbegin(),
E =
Loops.rend(); L !=
E; ++L) {
1245 if (computeMassInLoop(*L))
1247 auto Next = std::next(L);
1248 computeIrreducibleMass(&*L, L.base());
1249 L = std::prev(Next);
1250 if (computeMassInLoop(*L))
1257 bool BlockFrequencyInfoImpl<BT>::computeMassInLoop(LoopData &Loop) {
1259 LLVM_DEBUG(
dbgs() <<
"compute-mass-in-loop: " << getLoopName(Loop) <<
"\n");
1261 if (Loop.isIrreducible()) {
1264 unsigned NumHeadersWithWeight = 0;
1265 std::optional<uint64_t> MinHeaderWeight;
1266 DenseSet<uint32_t> HeadersWithoutWeight;
1267 HeadersWithoutWeight.reserve(Loop.NumHeaders);
1269 auto &HeaderNode = Loop.Nodes[
H];
1270 const BlockT *
Block = getBlock(HeaderNode);
1271 IsIrrLoopHeader.set(Loop.Nodes[
H].Index);
1272 std::optional<uint64_t> HeaderWeight =
Block->getIrrLoopHeaderWeight();
1273 if (!HeaderWeight) {
1276 HeadersWithoutWeight.insert(
H);
1280 <<
" has irr loop header weight " << *HeaderWeight
1282 NumHeadersWithWeight++;
1283 uint64_t HeaderWeightValue = *HeaderWeight;
1284 if (!MinHeaderWeight || HeaderWeightValue < MinHeaderWeight)
1285 MinHeaderWeight = HeaderWeightValue;
1286 if (HeaderWeightValue) {
1287 Dist.addLocal(HeaderNode, HeaderWeightValue);
1296 if (!MinHeaderWeight)
1297 MinHeaderWeight = 1;
1298 for (
uint32_t H : HeadersWithoutWeight) {
1299 auto &HeaderNode = Loop.Nodes[
H];
1300 assert(!getBlock(HeaderNode)->getIrrLoopHeaderWeight() &&
1301 "Shouldn't have a weight metadata");
1302 uint64_t MinWeight = *MinHeaderWeight;
1306 Dist.addLocal(HeaderNode, MinWeight);
1308 distributeIrrLoopHeaderMass(Dist);
1309 for (
const BlockNode &M : Loop.Nodes)
1310 if (!propagateMassToSuccessors(&Loop, M))
1312 if (NumHeadersWithWeight == 0)
1314 adjustLoopHeaderMass(Loop);
1316 Working[Loop.getHeader().Index].getMass() = BlockMass::getFull();
1317 if (!propagateMassToSuccessors(&Loop, Loop.getHeader()))
1319 for (
const BlockNode &M : Loop.members())
1320 if (!propagateMassToSuccessors(&Loop, M))
1325 computeLoopScale(Loop);
1331 bool BlockFrequencyInfoImpl<BT>::tryToComputeMassInFunction() {
1334 assert(!Working.empty() &&
"no blocks in function");
1335 assert(!Working[0].isLoopHeader() &&
"entry block is a loop header");
1337 Working[0].getMass() = BlockMass::getFull();
1338 for (rpot_iterator
I = rpot_begin(),
IE = rpot_end();
I !=
IE; ++
I) {
1340 BlockNode
Node = getNode(
I);
1341 if (Working[
Node.Index].isPackaged())
1344 if (!propagateMassToSuccessors(
nullptr,
Node))
1350 template <
class BT>
void BlockFrequencyInfoImpl<BT>::computeMassInFunction() {
1351 if (tryToComputeMassInFunction())
1353 computeIrreducibleMass(
nullptr,
Loops.begin());
1354 if (tryToComputeMassInFunction())
1360 bool BlockFrequencyInfoImpl<BT>::needIterativeInference()
const {
1363 if (!
F->getFunction().hasProfileData())
1367 for (
auto L =
Loops.rbegin(),
E =
Loops.rend(); L !=
E; ++L) {
1368 if (L->isIrreducible())
1374 template <
class BT>
void BlockFrequencyInfoImpl<BT>::applyIterativeInference() {
1379 std::vector<const BlockT *> ReachableBlocks;
1380 findReachableBlocks(ReachableBlocks);
1381 if (ReachableBlocks.empty())
1386 DenseMap<const BlockT *, size_t> BlockIndex;
1388 auto Freq = std::vector<Scaled64>(ReachableBlocks.size());
1390 for (
size_t I = 0;
I < ReachableBlocks.size();
I++) {
1391 const BlockT *
BB = ReachableBlocks[
I];
1393 Freq[
I] = getFloatingBlockFreq(
BB);
1396 assert(!SumFreq.isZero() &&
"empty initial block frequencies");
1398 LLVM_DEBUG(
dbgs() <<
"Applying iterative inference for " <<
F->getName()
1399 <<
" with " << ReachableBlocks.size() <<
" blocks\n");
1402 for (
auto &
Value : Freq) {
1408 ProbMatrixType ProbMatrix;
1409 initTransitionProbabilities(ReachableBlocks, BlockIndex, ProbMatrix);
1412 iterativeInference(ProbMatrix, Freq);
1415 for (
const BlockT &
BB : *
F) {
1416 auto Node = getNode(&
BB);
1417 if (!
Node.isValid())
1419 if (BlockIndex.count(&
BB)) {
1420 Freqs[
Node.Index].Scaled = Freq[BlockIndex[&
BB]];
1422 Freqs[
Node.Index].Scaled = Scaled64::getZero();
1428 void BlockFrequencyInfoImpl<BT>::iterativeInference(
1429 const ProbMatrixType &ProbMatrix, std::vector<Scaled64> &Freq)
const {
1431 "incorrectly specified precision");
1433 const auto Precision =
1439 << discrepancy(ProbMatrix, Freq).
toString() <<
"\n");
1443 auto Successors = std::vector<std::vector<size_t>>(Freq.size());
1444 for (
size_t I = 0;
I < Freq.size();
I++) {
1445 for (
const auto &Jump : ProbMatrix[
I]) {
1446 Successors[Jump.first].push_back(
I);
1454 auto IsActive = BitVector(Freq.size(),
false);
1455 std::queue<size_t> ActiveSet;
1456 for (
size_t I = 0;
I < Freq.size();
I++) {
1465 while (It++ < MaxIterations && !ActiveSet.empty()) {
1466 size_t I = ActiveSet.front();
1468 IsActive[
I] =
false;
1474 Scaled64 OneMinusSelfProb = Scaled64::getOne();
1475 for (
const auto &Jump : ProbMatrix[
I]) {
1476 if (Jump.first ==
I) {
1477 OneMinusSelfProb -= Jump.second;
1479 NewFreq += Freq[Jump.first] * Jump.second;
1482 if (OneMinusSelfProb != Scaled64::getOne())
1483 NewFreq /= OneMinusSelfProb;
1487 auto Change = Freq[
I] >= NewFreq ? Freq[
I] - NewFreq : NewFreq - Freq[
I];
1488 if (Change > Precision) {
1491 for (
size_t Succ : Successors[
I]) {
1492 if (!IsActive[Succ]) {
1493 ActiveSet.push(Succ);
1494 IsActive[Succ] =
true;
1503 LLVM_DEBUG(
dbgs() <<
" Completed " << It <<
" inference iterations"
1504 <<
format(
" (%0.0f per block)",
double(It) / Freq.size())
1508 << discrepancy(ProbMatrix, Freq).
toString() <<
"\n");
1513 void BlockFrequencyInfoImpl<BT>::findReachableBlocks(
1514 std::vector<const BlockT *> &Blocks)
const {
1517 std::queue<const BlockT *>
Queue;
1518 SmallPtrSet<const BlockT *, 8> Reachable;
1519 const BlockT *Entry = &
F->front();
1521 Reachable.insert(Entry);
1522 while (!
Queue.empty()) {
1523 const BlockT *SrcBB =
Queue.front();
1525 for (
const BlockT *DstBB : children<const BlockT *>(SrcBB)) {
1526 auto EP = BPI->getEdgeProbability(SrcBB, DstBB);
1529 if (Reachable.insert(DstBB).second)
1536 SmallPtrSet<const BlockT *, 8> InverseReachable;
1537 for (
const BlockT &
BB : *
F) {
1539 bool HasSucc = GraphTraits<const BlockT *>::child_begin(&
BB) !=
1540 GraphTraits<const BlockT *>::child_end(&
BB);
1541 if (!HasSucc && Reachable.count(&
BB)) {
1543 InverseReachable.insert(&
BB);
1546 while (!
Queue.empty()) {
1547 const BlockT *SrcBB =
Queue.front();
1549 for (
const BlockT *DstBB :
children<Inverse<const BlockT *>>(SrcBB)) {
1550 auto EP = BPI->getEdgeProbability(DstBB, SrcBB);
1553 if (InverseReachable.insert(DstBB).second)
1559 Blocks.reserve(
F->size());
1560 for (
const BlockT &
BB : *
F) {
1561 if (Reachable.count(&
BB) && InverseReachable.count(&
BB)) {
1562 Blocks.push_back(&
BB);
1568 void BlockFrequencyInfoImpl<BT>::initTransitionProbabilities(
1569 const std::vector<const BlockT *> &Blocks,
1570 const DenseMap<const BlockT *, size_t> &BlockIndex,
1571 ProbMatrixType &ProbMatrix)
const {
1572 const size_t NumBlocks = Blocks.size();
1573 auto Succs = std::vector<std::vector<std::pair<size_t, Scaled64>>>(NumBlocks);
1574 auto SumProb = std::vector<Scaled64>(NumBlocks);
1577 for (
size_t Src = 0; Src < NumBlocks; Src++) {
1578 const BlockT *
BB = Blocks[Src];
1579 SmallPtrSet<const BlockT *, 2> UniqueSuccs;
1580 for (
const auto SI : children<const BlockT *>(
BB)) {
1582 if (BlockIndex.find(
SI) == BlockIndex.end())
1585 if (!UniqueSuccs.insert(
SI).second)
1588 auto EP = BPI->getEdgeProbability(
BB,
SI);
1593 Scaled64::getFraction(EP.getNumerator(), EP.getDenominator());
1594 size_t Dst = BlockIndex.find(
SI)->second;
1595 Succs[Src].push_back(std::make_pair(Dst, EdgeProb));
1596 SumProb[Src] += EdgeProb;
1601 ProbMatrix = ProbMatrixType(NumBlocks);
1602 for (
size_t Src = 0; Src < NumBlocks; Src++) {
1604 if (Succs[Src].empty())
1607 assert(!SumProb[Src].
isZero() &&
"Zero sum probability of non-exit block");
1608 for (
auto &Jump : Succs[Src]) {
1609 size_t Dst = Jump.first;
1611 ProbMatrix[Dst].push_back(std::make_pair(Src, Prob / SumProb[Src]));
1616 size_t EntryIdx = BlockIndex.find(&
F->front())->second;
1617 for (
size_t Src = 0; Src < NumBlocks; Src++) {
1618 if (Succs[Src].empty()) {
1619 ProbMatrix[EntryIdx].push_back(std::make_pair(Src, Scaled64::getOne()));
1627 const ProbMatrixType &ProbMatrix,
const std::vector<Scaled64> &Freq)
const {
1628 assert(Freq[0] > 0 &&
"Incorrectly computed frequency of the entry block");
1630 for (
size_t I = 0;
I < ProbMatrix.size();
I++) {
1632 for (
const auto &Jump : ProbMatrix[
I]) {
1633 Sum += Freq[Jump.first] * Jump.second;
1635 Discrepancy += Freq[
I] >= Sum ? Freq[
I] - Sum : Sum - Freq[
I];
1638 return Discrepancy / Freq[0];
1643 namespace bfi_detail {
1645 template <
class BT>
struct BlockEdgesAdder {
1658 for (
const auto Succ : children<const BlockT *>(
BB))
1659 G.addEdge(Irr,
BFI.getNode(Succ), OuterLoop);
1666 void BlockFrequencyInfoImpl<BT>::computeIrreducibleMass(
1667 LoopData *OuterLoop, std::list<LoopData>::iterator Insert) {
1669 if (OuterLoop)
dbgs()
1670 <<
"loop: " << getLoopName(*OuterLoop) <<
"\n";
1671 else dbgs() <<
"function\n");
1673 using namespace bfi_detail;
1677 BlockEdgesAdder<BT> addBlockEdges(*
this);
1678 IrreducibleGraph
G(*
this, OuterLoop, addBlockEdges);
1680 for (
auto &L : analyzeIrreducible(
G, OuterLoop, Insert))
1681 computeMassInLoop(L);
1685 updateLoopWithIrreducible(*OuterLoop);
1695 BlockFrequencyInfoImpl<BT>::propagateMassToSuccessors(LoopData *OuterLoop,
1696 const BlockNode &
Node) {
1700 if (
auto *Loop = Working[
Node.Index].getPackagedLoop()) {
1701 assert(Loop != OuterLoop &&
"Cannot propagate mass in a packaged loop");
1702 if (!addLoopSuccessorsToDist(OuterLoop, *Loop, Dist))
1706 const BlockT *
BB = getBlock(
Node);
1707 for (
auto SI = GraphTraits<const BlockT *>::child_begin(
BB),
1708 SE = GraphTraits<const BlockT *>::child_end(
BB);
1711 Dist, OuterLoop,
Node, getNode(*
SI),
1719 distributeMass(
Node, OuterLoop, Dist);
1727 OS <<
"block-frequency-info: " <<
F->getName() <<
"\n";
1728 for (
const BlockT &
BB : *
F) {
1730 getFloatingBlockFreq(&
BB).print(OS, 5)
1731 <<
", int = " << getBlockFreq(&
BB).getFrequency();
1734 F->getFunction(), getNode(&
BB)))
1736 if (std::optional<uint64_t> IrrLoopHeaderWeight =
1737 BB.getIrrLoopHeaderWeight())
1738 OS <<
", irr_loop_header_weight = " << *IrrLoopHeaderWeight;
1753 for (
auto &Entry : Nodes) {
1754 const BlockT *
BB = Entry.first;
1756 ValidNodes[
BB] = Entry.second.first;
1759 for (
auto &Entry :
Other.Nodes) {
1760 const BlockT *
BB = Entry.first;
1762 OtherValidNodes[
BB] = Entry.second.first;
1765 unsigned NumValidNodes = ValidNodes.
size();
1766 unsigned NumOtherValidNodes = OtherValidNodes.
size();
1767 if (NumValidNodes != NumOtherValidNodes) {
1769 dbgs() <<
"Number of blocks mismatch: " << NumValidNodes <<
" vs "
1770 << NumOtherValidNodes <<
"\n";
1772 for (
auto &Entry : ValidNodes) {
1773 const BlockT *
BB = Entry.first;
1774 BlockNode
Node = Entry.second;
1775 if (OtherValidNodes.
count(
BB)) {
1776 BlockNode OtherNode = OtherValidNodes[
BB];
1777 const auto &Freq = Freqs[
Node.Index];
1778 const auto &OtherFreq =
Other.Freqs[OtherNode.Index];
1779 if (Freq.Integer != OtherFreq.Integer) {
1782 << Freq.Integer <<
" vs " << OtherFreq.Integer <<
"\n";
1787 <<
Node.Index <<
" does not exist in Other.\n";
1796 dbgs() <<
"Other\n";
1807 template <
class BlockFrequencyInfoT,
class BranchProbabilityInfoT>
1820 return G->getFunction()->getName();
1824 unsigned HotPercentThreshold = 0) {
1826 if (!HotPercentThreshold)
1831 for (
NodeIter I = GTraits::nodes_begin(Graph),
1832 E = GTraits::nodes_end(Graph);
1848 OS <<
"color=\"red\"";
1854 GVDAGType GType,
int layout_order = -1) {
1858 if (layout_order != -1)
1859 OS <<
Node->getName() <<
"[" << layout_order <<
"] : ";
1861 OS <<
Node->getName() <<
" : ";
1864 Graph->printBlockFreq(OS,
Node);
1867 OS << Graph->getBlockFreq(
Node).getFrequency();
1870 auto Count = Graph->getBlockProfileCount(
Node);
1879 "never reach this point.");
1885 const BlockFrequencyInfoT *
BFI,
1886 const BranchProbabilityInfoT *BPI,
1887 unsigned HotPercentThreshold = 0) {
1899 if (HotPercentThreshold) {
1904 if (EFreq >= HotFreq) {
1905 OS <<
",color=\"red\"";
1918 #endif // LLVM_ANALYSIS_BLOCKFREQUENCYINFOIMPL_H