54 #define DEBUG_TYPE "block-placement"
56 STATISTIC(NumCondBranches,
"Number of conditional branches");
57 STATISTIC(NumUncondBranches,
"Number of unconditional branches");
59 "Potential frequency of taking conditional branches");
61 "Potential frequency of taking unconditional branches");
64 cl::desc(
"Force the alignment of all "
65 "blocks in the function."),
69 "align-all-nofallthru-blocks",
70 cl::desc(
"Force the alignment of all "
71 "blocks that have no fall-through predecessors (i.e. don't add "
72 "nops that are executed)."),
77 "block-placement-exit-block-bias",
78 cl::desc(
"Block frequency percentage a loop exit block needs "
79 "over the original exit to be considered the new exit."),
86 "outline-optional-branches",
87 cl::desc(
"Outlining optional branches will place blocks that are optional "
88 "branches, i.e. branches with a common post dominator, outside "
89 "the hot path or chain"),
93 "outline-optional-threshold",
94 cl::desc(
"Don't outline optional branches that are a single block with an "
95 "instruction count below this threshold"),
99 "loop-to-cold-block-ratio",
100 cl::desc(
"Outline loop blocks from loop chain if (frequency of loop) / "
101 "(frequency of block) is greater than this ratio"),
106 cl::desc(
"Model the cost of loop rotation more "
107 "precisely by using profile data."),
111 cl::desc(
"Force the use of precise cost "
112 "loop rotation strategy."),
117 cl::desc(
"Cost that models the probabilistic risk of an instruction "
118 "misfetch due to a jump comparing to falling through, whose cost "
123 cl::desc(
"Cost of jump instructions."),
127 cl::desc(
"Perform tail duplication during placement. "
128 "Creates more fallthrough opportunites in "
129 "outline branches."),
134 cl::desc(
"Perform branch folding during placement. "
135 "Reduces code size."),
140 "tail-dup-placement-threshold",
141 cl::desc(
"Instruction cutoff for tail duplication during layout. "
142 "Tail merging during layout is forced to have a threshold "
143 "that won't conflict."),
cl::init(2),
180 BlockToChainMapType &BlockToChain;
189 : Blocks(1, BB), BlockToChain(BlockToChain), UnscheduledPredecessors(0) {
190 assert(BB &&
"Cannot create a chain with a null basic block");
191 BlockToChain[BB] =
this;
201 iterator
end() {
return Blocks.
end(); }
225 assert(!BlockToChain[BB]);
226 Blocks.push_back(BB);
227 BlockToChain[BB] =
this;
231 assert(BB == *Chain->begin());
232 assert(Chain->begin() != Chain->end());
237 Blocks.push_back(ChainBB);
238 assert(BlockToChain[ChainBB] == Chain &&
"Incoming blocks not in chain");
239 BlockToChain[ChainBB] =
this;
260 unsigned UnscheduledPredecessors;
280 std::unique_ptr<BranchFolder::MBFIWrapper> MBFI;
338 const BlockFilterSet *BlockFilter =
nullptr);
342 void markBlockSuccessors(
344 const BlockFilterSet *BlockFilter =
nullptr);
349 const BlockFilterSet *BlockFilter,
353 const BlockFilterSet *BlockFilter,
356 bool repeatedlyTailDuplicateBlock(
359 BlockChain &Chain, BlockFilterSet *BlockFilter,
362 const BlockChain &Chain,
363 BlockFilterSet *BlockFilter,
365 bool &DuplicatedToPred);
370 const BlockFilterSet *BlockFilter);
373 const BlockFilterSet *BlockFilter);
375 selectBestCandidateBlock(BlockChain &Chain,
378 getFirstUnplacedBlock(
const BlockChain &PlacedChain,
380 const BlockFilterSet *BlockFilter);
389 const BlockFilterSet *BlockFilter);
391 BlockFilterSet *BlockFilter =
nullptr);
393 const BlockFilterSet &LoopBlockSet);
395 const BlockFilterSet &LoopBlockSet);
399 const BlockFilterSet &LoopBlockSet);
400 void rotateLoopWithProfile(BlockChain &LoopChain,
MachineLoop &
L,
401 const BlockFilterSet &LoopBlockSet);
402 void collectMustExecuteBBs();
403 void buildCFGChains();
404 void optimizeBranches();
429 "Branch Probability Basic Block Placement",
false,
false)
445 OS <<
" ('" << BB->
getName() <<
"')";
457 void MachineBlockPlacement::markChainSuccessors(
459 const BlockFilterSet *BlockFilter) {
463 markBlockSuccessors(Chain,
MBB, LoopHeaderBB, BlockFilter);
473 void MachineBlockPlacement::markBlockSuccessors(
475 const BlockFilterSet *BlockFilter) {
481 if (BlockFilter && !BlockFilter->count(Succ))
483 BlockChain &SuccChain = *BlockToChain[Succ];
485 if (&Chain == &SuccChain || Succ == LoopHeaderBB)
490 if (SuccChain.UnscheduledPredecessors == 0 ||
491 --SuccChain.UnscheduledPredecessors > 0)
494 auto *NewBB = *SuccChain.begin();
495 if (NewBB->isEHPad())
496 EHPadWorkList.push_back(NewBB);
498 BlockWorkList.push_back(NewBB);
527 bool SkipSucc =
false;
528 if (Succ->isEHPad() || (BlockFilter && !BlockFilter->count(Succ))) {
531 BlockChain *SuccChain = BlockToChain[Succ];
532 if (SuccChain == &Chain) {
534 }
else if (Succ != *SuccChain->begin()) {
540 AdjustedSumProb -= MBPI->getEdgeProbability(BB, Succ);
545 return AdjustedSumProb;
556 if (SuccProbN >= SuccProbD)
573 bool MachineBlockPlacement::shouldPredBlockBeOutlined(
582 if (SuccProb > HotProb.
getCompl() && UnavoidableBlocks.count(Succ) > 0) {
585 if (Pred == Succ || (BlockFilter && !BlockFilter->count(Pred)) ||
586 BlockToChain[Pred] == &Chain)
589 if (Pred->pred_size() > 1 || *Pred->pred_begin() != BB)
628 bool MachineBlockPlacement::hasBetterLayoutPredecessor(
631 BlockChain &Chain,
const BlockFilterSet *BlockFilter) {
634 if (SuccChain.UnscheduledPredecessors == 0)
753 BlockFrequency CandidateEdgeFreq = MBFI->getBlockFreq(BB) * RealSuccProb;
754 bool BadCFGConflict =
false;
757 if (Pred == Succ || BlockToChain[Pred] == &SuccChain ||
758 (BlockFilter && !BlockFilter->count(Pred)) ||
759 BlockToChain[Pred] == &Chain)
777 MBFI->getBlockFreq(Pred) * MBPI->getEdgeProbability(Pred, Succ);
778 if (PredEdgeFreq * HotProb >= CandidateEdgeFreq * HotProb.
getCompl()) {
779 BadCFGConflict =
true;
784 if (BadCFGConflict) {
786 <<
" (prob) (non-cold CFG conflict)\n");
805 const BlockFilterSet *BlockFilter) {
812 auto AdjustedSumProb =
813 collectViableSuccessors(BB, Chain, BlockFilter, Successors);
817 auto RealSuccProb = MBPI->getEdgeProbability(BB, Succ);
822 if (shouldPredBlockBeOutlined(BB, Succ, Chain, BlockFilter, SuccProb,
826 BlockChain &SuccChain = *BlockToChain[Succ];
829 if (hasBetterLayoutPredecessor(BB, Succ, SuccChain, SuccProb, RealSuccProb,
836 << (SuccChain.UnscheduledPredecessors != 0 ?
" (CFG break)" :
"")
839 if (BestSucc && BestProb >= SuccProb) {
840 DEBUG(
dbgs() <<
" Not the best candidate, continuing\n");
844 DEBUG(
dbgs() <<
" Setting it as best candidate\n");
872 return BlockToChain.lookup(BB) == &Chain;
876 if (WorkList.
empty())
879 bool IsEHPad = WorkList[0]->isEHPad();
886 BlockChain &SuccChain = *BlockToChain[
MBB];
887 if (&SuccChain == &Chain)
890 assert(SuccChain.UnscheduledPredecessors == 0 &&
"Found CFG-violating block");
894 MBFI->printBlockFreq(
dbgs(), CandidateFreq) <<
" (freq)\n");
914 if (BestBlock && (IsEHPad ^ (BestFreq >= CandidateFreq)))
918 BestFreq = CandidateFreq;
932 const BlockChain &PlacedChain,
934 const BlockFilterSet *BlockFilter) {
937 if (BlockFilter && !BlockFilter->count(&*
I))
939 if (BlockToChain[&*
I] != &PlacedChain) {
940 PrevUnplacedBlockIt =
I;
944 return *BlockToChain[&*
I]->begin();
950 void MachineBlockPlacement::fillWorkLists(
953 const BlockFilterSet *BlockFilter =
nullptr) {
954 BlockChain &Chain = *BlockToChain[
MBB];
955 if (!UpdatedPreds.
insert(&Chain).second)
958 assert(Chain.UnscheduledPredecessors == 0);
960 assert(BlockToChain[ChainBB] == &Chain);
962 if (BlockFilter && !BlockFilter->count(Pred))
964 if (BlockToChain[Pred] == &Chain)
966 ++Chain.UnscheduledPredecessors;
970 if (Chain.UnscheduledPredecessors != 0)
973 MBB = *Chain.
begin();
975 EHPadWorkList.push_back(MBB);
977 BlockWorkList.push_back(MBB);
980 void MachineBlockPlacement::buildChain(
982 BlockFilterSet *BlockFilter) {
983 assert(BB &&
"BB must not be null.\n");
984 assert(BlockToChain[BB] == &Chain &&
"BlockToChainMap mis-match.\n");
988 markChainSuccessors(Chain, LoopHeaderBB, BlockFilter);
989 BB = *std::prev(Chain.end());
991 assert(BB &&
"null block found at end of chain in loop.");
992 assert(BlockToChain[BB] == &Chain &&
"BlockToChainMap mis-match in loop.");
993 assert(*std::prev(Chain.end()) == BB &&
"BB Not found at end of chain.");
1004 BestSucc = selectBestCandidateBlock(Chain, BlockWorkList);
1006 BestSucc = selectBestCandidateBlock(Chain, EHPadWorkList);
1009 BestSucc = getFirstUnplacedBlock(Chain, PrevUnplacedBlockIt, BlockFilter);
1013 DEBUG(
dbgs() <<
"Unnatural loop CFG detected, forcibly merging the "
1014 "layout successor until the CFG reduces\n");
1023 if (repeatedlyTailDuplicateBlock(BestSucc, BB, LoopHeaderBB, Chain,
1024 BlockFilter, PrevUnplacedBlockIt))
1029 BlockChain &SuccChain = *BlockToChain[BestSucc];
1032 SuccChain.UnscheduledPredecessors = 0;
1035 markChainSuccessors(SuccChain, LoopHeaderBB, BlockFilter);
1036 Chain.merge(BestSucc, &SuccChain);
1037 BB = *std::prev(Chain.end());
1040 DEBUG(
dbgs() <<
"Finished forming chain for header block "
1056 const BlockFilterSet &LoopBlockSet) {
1064 if (
F->getFunction()->optForSize())
1070 BlockChain &HeaderChain = *BlockToChain[L.
getHeader()];
1071 if (!LoopBlockSet.count(*HeaderChain.begin()))
1080 if (!LoopBlockSet.count(Pred))
1083 << Pred->succ_size() <<
" successors, ";
1084 MBFI->printBlockFreq(
dbgs(), Pred) <<
" freq\n");
1085 if (Pred->succ_size() > 1)
1089 if (!BestPred || PredFreq > BestPredFreq ||
1090 (!(PredFreq < BestPredFreq) &&
1091 Pred->isLayoutSuccessor(L.
getHeader()))) {
1093 BestPredFreq = PredFreq;
1099 DEBUG(
dbgs() <<
" final top unchanged\n");
1105 (*BestPred->
pred_begin())->succ_size() == 1 &&
1119 MachineBlockPlacement::findBestLoopExit(
MachineLoop &L,
1120 const BlockFilterSet &LoopBlockSet) {
1129 BlockChain &HeaderChain = *BlockToChain[L.
getHeader()];
1130 if (!LoopBlockSet.count(*HeaderChain.begin()))
1134 unsigned BestExitLoopDepth = 0;
1144 BlockChain &Chain = *BlockToChain[
MBB];
1147 if (MBB != *std::prev(Chain.end()))
1156 bool HasLoopingSucc =
false;
1162 BlockChain &SuccChain = *BlockToChain[Succ];
1164 if (&Chain == &SuccChain) {
1170 auto SuccProb = MBPI->getEdgeProbability(MBB, Succ);
1171 if (LoopBlockSet.count(Succ)) {
1174 HasLoopingSucc =
true;
1178 unsigned SuccLoopDepth = 0;
1179 if (
MachineLoop *ExitLoop = MLI->getLoopFor(Succ)) {
1180 SuccLoopDepth = ExitLoop->getLoopDepth();
1181 if (ExitLoop->contains(&L))
1182 BlocksExitingToOuterLoop.
insert(MBB);
1185 BlockFrequency ExitEdgeFreq = MBFI->getBlockFreq(MBB) * SuccProb;
1187 <<
getBlockName(Succ) <<
" [L:" << SuccLoopDepth <<
"] (";
1188 MBFI->printBlockFreq(
dbgs(), ExitEdgeFreq) <<
")\n");
1194 if (!ExitingBB || SuccLoopDepth > BestExitLoopDepth ||
1195 ExitEdgeFreq > BestExitEdgeFreq ||
1197 !(ExitEdgeFreq < BestExitEdgeFreq * Bias))) {
1198 BestExitEdgeFreq = ExitEdgeFreq;
1203 if (!HasLoopingSucc) {
1205 ExitingBB = OldExitingBB;
1206 BestExitEdgeFreq = OldBestExitEdgeFreq;
1212 DEBUG(
dbgs() <<
" No other candidate exit blocks, using loop header\n");
1216 DEBUG(
dbgs() <<
" Loop has 1 block, using loop header as exit\n");
1223 if (!BlocksExitingToOuterLoop.
empty() &&
1224 !BlocksExitingToOuterLoop.
count(ExitingBB))
1237 void MachineBlockPlacement::rotateLoop(BlockChain &LoopChain,
1239 const BlockFilterSet &LoopBlockSet) {
1244 bool ViableTopFallthrough =
false;
1246 BlockChain *PredChain = BlockToChain[Pred];
1247 if (!LoopBlockSet.count(Pred) &&
1248 (!PredChain || Pred == *std::prev(PredChain->end()))) {
1249 ViableTopFallthrough =
true;
1257 if (ViableTopFallthrough) {
1260 BlockChain *SuccChain = BlockToChain[Succ];
1261 if (!LoopBlockSet.count(Succ) &&
1262 (!SuccChain || Succ == *SuccChain->begin()))
1268 if (ExitIt == LoopChain.end())
1271 std::rotate(LoopChain.begin(), std::next(ExitIt), LoopChain.end());
1287 void MachineBlockPlacement::rotateLoopWithProfile(
1288 BlockChain &LoopChain,
MachineLoop &L,
const BlockFilterSet &LoopBlockSet) {
1290 auto HeaderIter =
find(LoopChain, HeaderBB);
1291 auto RotationPos = LoopChain.end();
1310 for (
auto *Pred : HeaderBB->predecessors()) {
1311 BlockChain *PredChain = BlockToChain[Pred];
1312 if (!LoopBlockSet.count(Pred) &&
1313 (!PredChain || Pred == *std::prev(PredChain->end()))) {
1315 MBFI->getBlockFreq(Pred) * MBPI->getEdgeProbability(Pred, HeaderBB);
1316 auto FallThruCost = ScaleBlockFrequency(EdgeFreq,
MisfetchCost);
1319 if (Pred->succ_size() == 1)
1320 FallThruCost += ScaleBlockFrequency(EdgeFreq,
JumpInstCost);
1321 HeaderFallThroughCost = std::max(HeaderFallThroughCost, FallThruCost);
1330 for (
auto BB : LoopChain) {
1333 BlockChain *SuccChain = BlockToChain[Succ];
1334 if (!LoopBlockSet.count(Succ) &&
1335 (!SuccChain || Succ == *SuccChain->begin())) {
1336 auto SuccProb = MBPI->getEdgeProbability(BB, Succ);
1337 LargestExitEdgeProb = std::max(LargestExitEdgeProb, SuccProb);
1341 auto ExitFreq = MBFI->getBlockFreq(BB) * LargestExitEdgeProb;
1349 for (
auto Iter = LoopChain.begin(), TailIter = std::prev(LoopChain.end()),
1350 EndIter = LoopChain.end();
1351 Iter != EndIter; Iter++, TailIter++) {
1354 if (TailIter == LoopChain.end())
1355 TailIter = LoopChain.begin();
1357 auto TailBB = *TailIter;
1365 if (Iter != HeaderIter)
1366 Cost += HeaderFallThroughCost;
1370 for (
auto &ExitWithFreq : ExitsWithFreq)
1371 if (TailBB != ExitWithFreq.first)
1372 Cost += ExitWithFreq.second;
1388 if (TailBB->isSuccessor(*Iter)) {
1389 auto TailBBFreq = MBFI->getBlockFreq(TailBB);
1390 if (TailBB->succ_size() == 1)
1391 Cost += ScaleBlockFrequency(TailBBFreq.getFrequency(),
1393 else if (TailBB->succ_size() == 2) {
1394 auto TailToHeadProb = MBPI->getEdgeProbability(TailBB, *Iter);
1395 auto TailToHeadFreq = TailBBFreq * TailToHeadProb;
1397 ? TailBBFreq * TailToHeadProb.
getCompl()
1399 Cost += ScaleBlockFrequency(TailToHeadFreq,
MisfetchCost) +
1407 if (Cost < SmallestRotationCost) {
1408 SmallestRotationCost = Cost;
1413 if (RotationPos != LoopChain.end()) {
1415 <<
" to the top\n");
1416 std::rotate(LoopChain.begin(), RotationPos, LoopChain.end());
1425 MachineBlockPlacement::collectLoopBlockSet(
MachineLoop &L) {
1426 BlockFilterSet LoopBlockSet;
1437 if (
F->getFunction()->getEntryCount()) {
1439 for (
auto LoopPred : L.
getHeader()->predecessors())
1441 LoopFreq += MBFI->getBlockFreq(LoopPred) *
1442 MBPI->getEdgeProbability(LoopPred, L.
getHeader());
1448 LoopBlockSet.insert(LoopBB);
1453 return LoopBlockSet;
1462 void MachineBlockPlacement::buildLoopChains(
MachineLoop &L) {
1466 buildLoopChains(*InnerLoop);
1468 assert(BlockWorkList.empty());
1469 assert(EHPadWorkList.empty());
1470 BlockFilterSet LoopBlockSet = collectLoopBlockSet(L);
1475 bool RotateLoopWithProfile =
1485 RotateLoopWithProfile ? L.getHeader() : findBestLoopTop(L, LoopBlockSet);
1490 if (!RotateLoopWithProfile && LoopTop == L.getHeader())
1491 PreferredLoopExit = findBestLoopExit(L, LoopBlockSet);
1493 BlockChain &LoopChain = *BlockToChain[LoopTop];
1499 assert(LoopChain.UnscheduledPredecessors == 0);
1500 UpdatedPreds.
insert(&LoopChain);
1503 fillWorkLists(LoopBB, UpdatedPreds, &LoopBlockSet);
1505 buildChain(LoopTop, LoopChain, &LoopBlockSet);
1507 if (RotateLoopWithProfile)
1508 rotateLoopWithProfile(LoopChain, L, LoopBlockSet);
1510 rotateLoop(LoopChain, PreferredLoopExit, LoopBlockSet);
1514 bool BadLoop =
false;
1515 if (LoopChain.UnscheduledPredecessors) {
1517 dbgs() <<
"Loop chain contains a block without its preds placed!\n"
1518 <<
" Loop header: " <<
getBlockName(*L.block_begin()) <<
"\n"
1519 <<
" Chain header: " <<
getBlockName(*LoopChain.begin()) <<
"\n";
1523 if (!LoopBlockSet.remove(ChainBB)) {
1527 dbgs() <<
"Loop chain contains a block not contained by the loop!\n"
1528 <<
" Loop header: " <<
getBlockName(*L.block_begin()) <<
"\n"
1529 <<
" Chain header: " <<
getBlockName(*LoopChain.begin()) <<
"\n"
1534 if (!LoopBlockSet.empty()) {
1537 dbgs() <<
"Loop contains blocks never placed into a chain!\n"
1538 <<
" Loop header: " <<
getBlockName(*L.block_begin()) <<
"\n"
1539 <<
" Chain header: " <<
getBlockName(*LoopChain.begin()) <<
"\n"
1542 assert(!BadLoop &&
"Detected problems with the placement of this loop.");
1545 BlockWorkList.clear();
1546 EHPadWorkList.clear();
1551 void MachineBlockPlacement::collectMustExecuteBBs() {
1557 if (Terminator ==
nullptr)
1560 Terminator = MDT->findNearestCommonDominator(Terminator, &MBB);
1565 UnavoidableBlocks.
clear();
1567 if (MDT->dominates(&MBB, Terminator)) {
1568 UnavoidableBlocks.insert(&MBB);
1574 void MachineBlockPlacement::buildCFGChains() {
1582 new (ChainAllocator.Allocate()) BlockChain(BlockToChain, BB);
1595 assert(NextFI != FE &&
"Can't fallthrough past the last block.");
1596 DEBUG(
dbgs() <<
"Pre-merging due to unanalyzable fallthrough: "
1599 Chain->merge(NextBB,
nullptr);
1601 BlocksWithUnanalyzableExits.insert(&*BB);
1609 collectMustExecuteBBs();
1612 PreferredLoopExit =
nullptr;
1614 buildLoopChains(*L);
1616 assert(BlockWorkList.empty());
1617 assert(EHPadWorkList.empty());
1621 fillWorkLists(&MBB, UpdatedPreds);
1623 BlockChain &FunctionChain = *BlockToChain[&F->front()];
1624 buildChain(&F->front(), FunctionChain);
1631 bool BadFunc =
false;
1632 FunctionBlockSetType FunctionBlockSet;
1634 FunctionBlockSet.insert(&MBB);
1637 if (!FunctionBlockSet.erase(ChainBB)) {
1639 dbgs() <<
"Function chain contains a block not in the function!\n"
1643 if (!FunctionBlockSet.empty()) {
1646 dbgs() <<
"Function contains blocks never placed into a chain!\n"
1647 <<
" Bad block: " <<
getBlockName(RemainingBB) <<
"\n";
1649 assert(!BadFunc &&
"Detected problems with the block placement.");
1654 DEBUG(
dbgs() <<
"[MBP] Function: "<< F->getName() <<
"\n");
1656 DEBUG(
dbgs() << (ChainBB == *FunctionChain.begin() ?
"Placing chain "
1660 F->splice(InsertPos, ChainBB);
1665 if (ChainBB == *FunctionChain.begin())
1676 if (!BlocksWithUnanalyzableExits.count(PrevBB)) {
1682 "Unexpected block with un-analyzable fallthrough!");
1684 TBB = FBB =
nullptr;
1717 F->
back().updateTerminator();
1719 BlockWorkList.clear();
1720 EHPadWorkList.clear();
1723 void MachineBlockPlacement::optimizeBranches() {
1724 BlockChain &FunctionChain = *BlockToChain[&F->front()];
1739 if (TBB && !Cond.
empty() && FBB &&
1740 MBPI->getEdgeProbability(ChainBB, FBB) >
1741 MBPI->getEdgeProbability(ChainBB, TBB) &&
1743 DEBUG(
dbgs() <<
"Reverse order of the two branches: "
1746 << MBPI->getEdgeProbability(ChainBB, FBB) <<
" vs "
1747 << MBPI->getEdgeProbability(ChainBB, TBB) <<
"\n");
1751 ChainBB->updateTerminator();
1757 void MachineBlockPlacement::alignBlocks() {
1763 if (F->getFunction()->optForSize())
1765 BlockChain &FunctionChain = *BlockToChain[&F->front()];
1766 if (FunctionChain.begin() == FunctionChain.end())
1773 if (ChainBB == *FunctionChain.begin())
1784 unsigned Align = TLI->getPrefLoopAlignment(L);
1791 if (Freq < WeightedEntryFreq)
1798 if (Freq < (LoopHeaderFreq * ColdProb))
1809 ChainBB->setAlignment(Align);
1818 MBPI->getEdgeProbability(LayoutPred, ChainBB);
1819 BlockFrequency LayoutEdgeFreq = MBFI->getBlockFreq(LayoutPred) * LayoutProb;
1820 if (LayoutEdgeFreq <= (Freq * ColdProb))
1821 ChainBB->setAlignment(Align);
1840 bool MachineBlockPlacement::repeatedlyTailDuplicateBlock(
1843 BlockChain &Chain, BlockFilterSet *BlockFilter,
1845 bool Removed, DuplicatedToLPred;
1846 bool DuplicatedToOriginalLPred;
1847 Removed = maybeTailDuplicateBlock(BB, LPred, Chain, BlockFilter,
1848 PrevUnplacedBlockIt,
1852 DuplicatedToOriginalLPred = DuplicatedToLPred;
1858 while (DuplicatedToLPred) {
1859 assert (Removed &&
"Block must have been removed to be duplicated into its "
1860 "layout predecessor.");
1868 DupBB = *(--ChainEnd);
1870 if (ChainEnd == Chain.begin())
1872 DupPred = *std::prev(ChainEnd);
1873 Removed = maybeTailDuplicateBlock(DupBB, DupPred, Chain, BlockFilter,
1874 PrevUnplacedBlockIt,
1882 LPred = *std::prev(Chain.end());
1883 if (DuplicatedToOriginalLPred)
1884 markBlockSuccessors(Chain, LPred, LoopHeaderBB, BlockFilter);
1902 bool MachineBlockPlacement::maybeTailDuplicateBlock(
1904 const BlockChain &Chain, BlockFilterSet *BlockFilter,
1906 bool &DuplicatedToLPred) {
1908 DuplicatedToLPred =
false;
1909 DEBUG(
dbgs() <<
"Redoing tail duplication for Succ#"
1911 bool IsSimple = TailDup.isSimpleBB(BB);
1917 if (!TailDup.shouldTailDuplicate(IsSimple, *BB))
1921 bool Removed =
false;
1922 auto RemovalCallback =
1928 bool InWorkList =
true;
1930 if (BlockToChain.count(RemBB)) {
1931 BlockChain *Chain = BlockToChain[RemBB];
1932 InWorkList = Chain->UnscheduledPredecessors == 0;
1933 Chain->remove(RemBB);
1934 BlockToChain.erase(RemBB);
1938 if (&(*PrevUnplacedBlockIt) == RemBB) {
1939 PrevUnplacedBlockIt++;
1945 if (RemBB->isEHPad())
1946 RemoveList = EHPadWorkList;
1955 BlockFilter->remove(RemBB);
1959 MLI->removeBlock(RemBB);
1960 if (RemBB == PreferredLoopExit)
1961 PreferredLoopExit =
nullptr;
1963 DEBUG(
dbgs() <<
"TailDuplicator deleted block: "
1966 auto RemovalCallbackRef =
1970 TailDup.tailDuplicateAndUpdate(IsSimple, BB, LPred,
1971 &DuplicatedPreds, &RemovalCallbackRef);
1974 DuplicatedToLPred =
false;
1977 BlockChain* PredChain = BlockToChain[Pred];
1979 DuplicatedToLPred =
true;
1980 if (Pred == LPred || (BlockFilter && !BlockFilter->count(Pred))
1981 || PredChain == &Chain)
1984 if (BlockFilter && !BlockFilter->count(NewSucc))
1986 BlockChain *NewChain = BlockToChain[NewSucc];
1987 if (NewChain != &Chain && NewChain != PredChain)
1988 NewChain->UnscheduledPredecessors++;
1994 bool MachineBlockPlacement::runOnMachineFunction(
MachineFunction &MF) {
1999 if (std::next(MF.
begin()) == MF.
end())
2003 MBPI = &getAnalysis<MachineBranchProbabilityInfo>();
2004 MBFI = llvm::make_unique<BranchFolder::MBFIWrapper>(
2005 getAnalysis<MachineBlockFrequencyInfo>());
2006 MLI = &getAnalysis<MachineLoopInfo>();
2009 MDT = &getAnalysis<MachineDominatorTree>();
2013 PreferredLoopExit =
nullptr;
2019 TailDup.initMF(MF, MBPI,
true, TailDupSize);
2022 assert(BlockToChain.empty());
2034 if (MF.
size() > 3 && EnableTailMerge) {
2037 *MBPI, TailMergeSize);
2040 getAnalysisIfAvailable<MachineModuleInfo>(), MLI,
2043 BlockToChain.clear();
2045 MDT->runOnMachineFunction(MF);
2046 ChainAllocator.DestroyAll();
2054 BlockToChain.clear();
2055 ChainAllocator.DestroyAll();
2064 for (
auto MBI = std::next(MF.begin()), MBE = MF.end(); MBI != MBE; ++MBI) {
2065 auto LayoutPred = std::prev(MBI);
2110 "Basic Block Placement Stats",
false,
false)
2116 bool MachineBlockPlacementStats::runOnMachineFunction(
MachineFunction &F) {
2118 if (std::next(F.begin()) == F.end())
2121 MBPI = &getAnalysis<MachineBranchProbabilityInfo>();
2122 MBFI = &getAnalysis<MachineBlockFrequencyInfo>();
2127 (MBB.
succ_size() > 1) ? NumCondBranches : NumUncondBranches;
2129 (MBB.
succ_size() > 1) ? CondBranchTakenFreq : UncondBranchTakenFreq;
2136 BlockFreq * MBPI->getEdgeProbability(&MBB, Succ);
unsigned succ_size() const
static cl::opt< unsigned > AlignAllBlock("align-all-blocks", cl::desc("Force the alignment of all ""blocks in the function."), cl::init(0), cl::Hidden)
void push_back(const T &Elt)
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
bool isEHPad() const
Returns true if the block is a landing pad.
const_iterator end(StringRef path)
Get end iterator over path.
static cl::opt< bool > OutlineOptionalBranches("outline-optional-branches", cl::desc("Outlining optional branches will place blocks that are optional ""branches, i.e. branches with a common post dominator, outside ""the hot path or chain"), cl::init(false), cl::Hidden)
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
STATISTIC(NumFunctions,"Total number of functions")
#define LLVM_DUMP_METHOD
Mark debug helper function definitions like dump() that should not be stripped from debug builds...
int getNumber() const
MachineBasicBlocks are uniquely numbered at the function level, unless they're not in a MachineFuncti...
auto remove_if(R &&Range, UnaryPredicate P) -> decltype(std::begin(Range))
Provide wrappers to std::remove_if which take ranges instead of having to pass begin/end explicitly...
static BranchProbability getAdjustedProbability(BranchProbability OrigProb, BranchProbability AdjustedSumProb)
The helper function returns the branch probability that is adjusted or normalized over the new total ...
static cl::opt< bool > BranchFoldPlacement("branch-fold-placement", cl::desc("Perform branch folding during placement. ""Reduces code size."), cl::init(true), cl::Hidden)
size_type count(PtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
void initializeMachineBlockPlacementStatsPass(PassRegistry &)
An efficient, type-erasing, non-owning reference to a callable.
uint32_t getNumerator() const
bool reverseBranchCondition(SmallVectorImpl< MachineOperand > &Cond) const override
Reverses the branch condition of the specified condition list, returning false on success and true if...
MachineBlockFrequencyInfo pass uses BlockFrequencyInfoImpl implementation to estimate machine basic b...
const_iterator begin(StringRef path)
Get begin iterator over path.
const Function * getFunction() const
getFunction - Return the LLVM function that this machine code represents
INITIALIZE_PASS_BEGIN(MachineBlockPlacement,"block-placement","Branch Probability Basic Block Placement", false, false) INITIALIZE_PASS_END(MachineBlockPlacement
uint64_t getFrequency() const
Returns the frequency as a fixpoint number scaled by the entry frequency.
static BranchProbability getOne()
const std::vector< BlockT * > & getBlocks() const
Get a list of the basic blocks which make up this loop.
char & MachineBlockPlacementStatsID
MachineBlockPlacementStats - This pass collects statistics about the basic block placement using bran...
bool optForSize() const
Optimize this function for size (-Os) or minimum size (-Oz).
void setAlignment(unsigned Align)
Set alignment of the basic block.
BlockT * getHeader() const
static cl::opt< unsigned > TailDuplicatePlacementThreshold("tail-dup-placement-threshold", cl::desc("Instruction cutoff for tail duplication during layout. ""Tail merging during layout is forced to have a threshold ""that won't conflict."), cl::init(2), cl::Hidden)
This file defines the MallocAllocator and BumpPtrAllocator interfaces.
iterator_range< succ_iterator > successors()
A templated base class for SmallPtrSet which provides the typesafe interface that is common across al...
static cl::opt< unsigned > AlignAllNonFallThruBlocks("align-all-nofallthru-blocks", cl::desc("Force the alignment of all ""blocks that have no fall-through predecessors (i.e. don't add ""nops that are executed)."), cl::init(0), cl::Hidden)
bool analyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB, MachineBasicBlock *&FBB, SmallVectorImpl< MachineOperand > &Cond, bool AllowModify) const override
Analyze the branching code at the end of MBB, returning true if it cannot be understood (e...
AnalysisUsage & addRequired()
#define INITIALIZE_PASS_DEPENDENCY(depName)
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
const HexagonInstrInfo * TII
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
block placement Basic Block Placement Stats
Target-Independent Code Generator Pass Configuration Options.
LLVM_NODISCARD bool empty() const
bool canFallThrough()
Return true if the block can implicitly transfer control to the block after it by falling off the end...
Optional< uint64_t > getEntryCount() const
Get the entry count for this function.
static GCRegistry::Add< CoreCLRGC > E("coreclr","CoreCLR-compatible GC")
TargetInstrInfo - Interface to description of machine instruction set.
static cl::opt< bool > ForcePreciseRotationCost("force-precise-rotation-cost", cl::desc("Force the use of precise cost ""loop rotation strategy."), cl::init(false), cl::Hidden)
cl::opt< unsigned > ProfileLikelyProb
initializer< Ty > init(const Ty &Val)
friend const_iterator end(StringRef path)
Get end iterator over path.
void dump(const SparseBitVector< ElementSize > &LHS, raw_ostream &out)
static cl::opt< unsigned > OutlineOptionalThreshold("outline-optional-threshold", cl::desc("Don't outline optional branches that are a single block with an ""instruction count below this threshold"), cl::init(4), cl::Hidden)
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
Represent the analysis usage information of a pass.
bool contains(const LoopT *L) const
Return true if the specified loop is contained within in this loop.
INITIALIZE_PASS_END(RegBankSelect, DEBUG_TYPE,"Assign register bank of generic virtual registers", false, false) RegBankSelect
bool getEnableTailMerge() const
static cl::opt< unsigned > LoopToColdBlockRatio("loop-to-cold-block-ratio", cl::desc("Outline loop blocks from loop chain if (frequency of loop) / ""(frequency of block) is greater than this ratio"), cl::init(5), cl::Hidden)
iterator_range< pred_iterator > predecessors()
LLVM_NODISCARD bool empty() const
succ_iterator succ_begin()
iterator erase(const_iterator CI)
pred_iterator pred_begin()
void initializeMachineBlockPlacementPass(PassRegistry &)
static cl::opt< unsigned > TailMergeSize("tail-merge-size", cl::desc("Min number of instructions to consider tail merging"), cl::init(3), cl::Hidden)
This base class for TargetLowering contains the SelectionDAG-independent parts that can be used from ...
A SetVector that performs no allocations if smaller than a certain size.
Iterator for intrusive lists based on ilist_node.
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements...
block Branch Probability Basic Block Placement
virtual bool runOnMachineFunction(MachineFunction &MF)=0
runOnMachineFunction - This method must be overloaded to perform the desired machine code transformat...
auto find(R &&Range, const T &Val) -> decltype(std::begin(Range))
Provide wrappers to std::find which take ranges instead of having to pass begin/end explicitly...
friend const_iterator begin(StringRef path)
Get begin iterator over path.
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small...
virtual const TargetLowering * getTargetLowering() const
void updateTerminator()
Update the terminator instructions in block to account for changes to the layout. ...
static cl::opt< unsigned > MisfetchCost("misfetch-cost", cl::desc("Cost that models the probabilistic risk of an instruction ""misfetch due to a jump comparing to falling through, whose cost ""is zero."), cl::init(1), cl::Hidden)
static cl::opt< unsigned > JumpInstCost("jump-inst-cost", cl::desc("Cost of jump instructions."), cl::init(1), cl::Hidden)
bool isSuccessor(const MachineBasicBlock *MBB) const
Return true if the specified MBB is a successor of this block.
block Branch Probability Basic Block static false std::string getBlockName(MachineBasicBlock *BB)
Helper to print the name of a MBB.
unsigned insertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB, MachineBasicBlock *FBB, ArrayRef< MachineOperand > Cond, const DebugLoc &DL, int *BytesAdded=nullptr) const override
Insert branch code into the end of the specified MachineBasicBlock.
A BumpPtrAllocator that allows only elements of a specific type to be allocated.
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
StringRef getName() const
Return the name of the corresponding LLVM basic block, or "(null)".
void setPreservesAll()
Set by analyses that do not transform their input at all.
unsigned removeBranch(MachineBasicBlock &MBB, int *BytesRemoved=nullptr) const override
Remove the branching code at the end of the specific MBB.
cl::opt< unsigned > StaticLikelyProb
block_iterator block_end() const
LLVM_ATTRIBUTE_ALWAYS_INLINE iterator end()
static uint64_t getMaxFrequency()
Returns the maximum possible frequency, the saturation value.
void emplace_back(ArgTypes &&...Args)
static cl::opt< bool > PreciseRotationCost("precise-rotation-cost", cl::desc("Model the cost of loop rotation more ""precisely by using profile data."), cl::init(false), cl::Hidden)
unsigned getNumBlocks() const
Get the number of blocks in this loop in constant time.
char & MachineBlockPlacementID
MachineBlockPlacement - This pass places basic blocks based on branch probabilities.
const TargetMachine & getTarget() const
getTarget - Return the target machine this machine code is compiled with
static cl::opt< unsigned > ExitBlockBias("block-placement-exit-block-bias", cl::desc("Block frequency percentage a loop exit block needs ""over the original exit to be considered the new exit."), cl::init(0), cl::Hidden)
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
A raw_ostream that writes to an std::string.
virtual const TargetInstrInfo * getInstrInfo() const
block Branch Probability Basic Block false
static BranchProbability getLayoutSuccessorProbThreshold(MachineBasicBlock *BB)
block_iterator block_begin() const
virtual const TargetRegisterInfo * getRegisterInfo() const
getRegisterInfo - If register information is available, return it.
static BranchProbability getZero()
Utility class to perform tail duplication.
bool isLayoutSuccessor(const MachineBasicBlock *MBB) const
Return true if the specified MBB will be emitted immediately after this block, such that if this bloc...
static cl::opt< bool > TailDupPlacement("tail-dup-placement", cl::desc("Perform tail duplication during placement. ""Creates more fallthrough opportunites in ""outline branches."), cl::init(true), cl::Hidden)
bool requiresStructuredCFG() const
DominatorTree Class - Concrete subclass of DominatorTreeBase that is used to compute a normal dominat...
unsigned pred_size() const
This file describes how to lower LLVM code to machine code.
BranchProbability getCompl() const