73 #define DEBUG_TYPE "block-placement" 75 STATISTIC(NumCondBranches,
"Number of conditional branches");
76 STATISTIC(NumUncondBranches,
"Number of unconditional branches");
78 "Potential frequency of taking conditional branches");
80 "Potential frequency of taking unconditional branches");
84 cl::desc(
"Force the alignment of all blocks in the function in log2 format " 85 "(e.g 4 means align on 16B boundaries)."),
89 "align-all-nofallthru-blocks",
90 cl::desc(
"Force the alignment of all blocks that have no fall-through " 91 "predecessors (i.e. don't add nops that are executed). In log2 " 92 "format (e.g 4 means align on 16B boundaries)."),
97 "block-placement-exit-block-bias",
98 cl::desc(
"Block frequency percentage a loop exit block needs " 99 "over the original exit to be considered the new exit."),
106 "loop-to-cold-block-ratio",
107 cl::desc(
"Outline loop blocks from loop chain if (frequency of loop) / " 108 "(frequency of block) is greater than this ratio"),
112 "force-loop-cold-block",
113 cl::desc(
"Force outlining cold blocks from loops."),
118 cl::desc(
"Model the cost of loop rotation more " 119 "precisely by using profile data."),
124 cl::desc(
"Force the use of precise cost " 125 "loop rotation strategy."),
130 cl::desc(
"Cost that models the probabilistic risk of an instruction " 131 "misfetch due to a jump comparing to falling through, whose cost " 136 cl::desc(
"Cost of jump instructions."),
140 cl::desc(
"Perform tail duplication during placement. " 141 "Creates more fallthrough opportunites in " 142 "outline branches."),
147 cl::desc(
"Perform branch folding during placement. " 148 "Reduces code size."),
153 "tail-dup-placement-threshold",
154 cl::desc(
"Instruction cutoff for tail duplication during layout. " 155 "Tail merging during layout is forced to have a threshold " 156 "that won't conflict."),
cl::init(2),
161 "tail-dup-placement-aggressive-threshold",
162 cl::desc(
"Instruction cutoff for aggressive tail duplication during " 163 "layout. Used at -O3. Tail merging during layout is forced to " 164 "have a threshold that won't conflict."),
cl::init(4),
169 "tail-dup-placement-penalty",
170 cl::desc(
"Cost penalty for blocks that can avoid breaking CFG by copying. " 171 "Copying can increase fallthrough, but it also increases icache " 172 "pressure. This parameter controls the penalty to account for that. " 173 "Percent as integer."),
179 "triangle-chain-count",
180 cl::desc(
"Number of triangle-shaped-CFG's that need to be in a row for the " 181 "triangle tail duplication heuristic to kick in. 0 to disable."),
228 BlockToChainMapType &BlockToChain;
237 : Blocks(1, BB), BlockToChain(BlockToChain) {
238 assert(BB &&
"Cannot create a chain with a null basic block");
239 BlockToChain[BB] =
this;
248 const_iterator
begin()
const {
return Blocks.
begin(); }
251 iterator
end() {
return Blocks.
end(); }
252 const_iterator
end()
const {
return Blocks.
end(); }
255 for(iterator i =
begin(); i !=
end(); ++i) {
271 assert(BB &&
"Can't merge a null block.");
272 assert(!Blocks.
empty() &&
"Can't merge into an empty chain.");
276 assert(!BlockToChain[BB] &&
277 "Passed chain is null, but BB has entry in BlockToChain.");
279 BlockToChain[BB] =
this;
283 assert(BB == *Chain->begin() &&
"Passed BB is not head of Chain.");
284 assert(Chain->begin() != Chain->end());
290 assert(BlockToChain[ChainBB] == Chain &&
"Incoming blocks not in chain.");
291 BlockToChain[ChainBB] =
this;
312 unsigned UnscheduledPredecessors = 0;
320 struct BlockAndTailDupResult {
326 struct WeightedEdge {
346 std::unique_ptr<BranchFolder::MBFIWrapper> MBFI;
399 void markChainSuccessors(
401 const BlockFilterSet *BlockFilter =
nullptr);
405 void markBlockSuccessors(
408 const BlockFilterSet *BlockFilter =
nullptr);
411 collectViableSuccessors(
413 const BlockFilterSet *BlockFilter,
415 bool shouldPredBlockBeOutlined(
417 const BlockChain &Chain,
const BlockFilterSet *BlockFilter,
419 bool repeatedlyTailDuplicateBlock(
422 BlockChain &Chain, BlockFilterSet *BlockFilter,
424 bool maybeTailDuplicateBlock(
426 BlockChain &Chain, BlockFilterSet *BlockFilter,
428 bool &DuplicatedToLPred);
429 bool hasBetterLayoutPredecessor(
433 const BlockFilterSet *BlockFilter);
434 BlockAndTailDupResult selectBestSuccessor(
436 const BlockFilterSet *BlockFilter);
440 const BlockChain &PlacedChain,
442 const BlockFilterSet *BlockFilter);
451 const BlockFilterSet *BlockFilter);
454 BlockFilterSet *BlockFilter =
nullptr);
458 const BlockFilterSet &LoopBlockSet);
460 const BlockFilterSet &LoopBlockSet);
464 const BlockFilterSet &LoopBlockSet);
466 const MachineLoop &L,
const BlockFilterSet &LoopBlockSet);
468 const MachineLoop &L,
const BlockFilterSet &LoopBlockSet);
470 const MachineLoop &L,
const BlockFilterSet &LoopBlockSet,
472 BlockFilterSet collectLoopBlockSet(
const MachineLoop &L);
477 void rotateLoopWithProfile(
479 const BlockFilterSet &LoopBlockSet);
480 void buildCFGChains();
481 void optimizeBranches();
488 bool isProfitableToTailDup(
491 const BlockChain &Chain,
const BlockFilterSet *BlockFilter);
496 const BlockChain &Chain,
const BlockFilterSet *BlockFilter);
499 BlockAndTailDupResult getBestTrellisSuccessor(
503 const BlockFilterSet *BlockFilter);
506 static std::pair<WeightedEdge, WeightedEdge> getBestNonConflictingEdges(
512 bool canTailDuplicateUnplacedPreds(
514 const BlockChain &Chain,
const BlockFilterSet *BlockFilter);
518 void precomputeTriangleChains();
529 bool allowTailDupPlacement()
const {
552 "Branch Probability Basic Block Placement",
false,
false)
568 OS <<
" ('" << BB->
getName() <<
"')";
580 void MachineBlockPlacement::markChainSuccessors(
582 const BlockFilterSet *BlockFilter) {
586 markBlockSuccessors(Chain, MBB, LoopHeaderBB, BlockFilter);
596 void MachineBlockPlacement::markBlockSuccessors(
604 if (BlockFilter && !BlockFilter->count(Succ))
606 BlockChain &SuccChain = *BlockToChain[Succ];
608 if (&Chain == &SuccChain || Succ == LoopHeaderBB)
613 if (SuccChain.UnscheduledPredecessors == 0 ||
614 --SuccChain.UnscheduledPredecessors > 0)
617 auto *NewBB = *SuccChain.begin();
618 if (NewBB->isEHPad())
631 const BlockFilterSet *BlockFilter,
651 bool SkipSucc =
false;
652 if (Succ->isEHPad() || (BlockFilter && !BlockFilter->count(Succ))) {
655 BlockChain *SuccChain = BlockToChain[Succ];
656 if (SuccChain == &Chain) {
658 }
else if (Succ != *SuccChain->begin()) {
660 <<
" -> Mid chain!\n");
670 return AdjustedSumProb;
681 if (SuccProbN >= SuccProbD)
696 if (Successors.
count(&BB))
699 if (!Successors.
count(Succ))
725 uint64_t EntryFreq) {
728 return (Gain / ThresholdProb).getFrequency() >= EntryFreq;
736 bool MachineBlockPlacement::isProfitableToTailDup(
739 const BlockChain &Chain,
const BlockFilterSet *BlockFilter) {
766 auto AdjustedSuccSumProb =
767 collectViableSuccessors(Succ, Chain, BlockFilter, SuccSuccs);
769 auto BBFreq = MBFI->getBlockFreq(BB);
770 auto SuccFreq = MBFI->getBlockFreq(Succ);
773 uint64_t EntryFreq = MBFI->getEntryFreq();
776 if (SuccSuccs.
size() == 0)
783 if (Prob > BestSuccSucc)
795 if (SuccPred == Succ || SuccPred == BB
796 || BlockToChain[SuccPred] == &Chain
797 || (BlockFilter && !BlockFilter->count(SuccPred)))
799 auto Freq = MBFI->getBlockFreq(SuccPred)
801 if (Freq > SuccBestPred)
869 if (UProb > AdjustedSuccSumProb / 2 &&
870 !hasBetterLayoutPredecessor(Succ, PDom, *BlockToChain[PDom], UProb, UProb,
874 (P + V), (Qout +
std::max(Qin, F) * VProb + std::min(Qin, F) * UProb),
878 (Qout + std::min(Qin, F) * AdjustedSuccSumProb +
890 bool MachineBlockPlacement::isTrellis(
893 const BlockChain &Chain,
const BlockFilterSet *BlockFilter) {
908 if (Successors.count(SuccPred)) {
911 if (!Successors.count(CheckSucc))
915 const BlockChain *PredChain = BlockToChain[SuccPred];
916 if (SuccPred == BB || (BlockFilter && !BlockFilter->count(SuccPred)) ||
917 PredChain == &Chain || PredChain == BlockToChain[Succ])
921 if (!SeenPreds.
insert(SuccPred).second)
939 std::pair<MachineBlockPlacement::WeightedEdge,
940 MachineBlockPlacement::WeightedEdge>
941 MachineBlockPlacement::getBestNonConflictingEdges(
952 auto Cmp = [](WeightedEdge A, WeightedEdge
B) {
return A.Weight >
B.Weight; };
956 auto BestA = Edges[0].begin();
957 auto BestB = Edges[1].begin();
960 if (BestA->Src == BestB->Src) {
962 auto SecondBestA = std::next(BestA);
963 auto SecondBestB = std::next(BestB);
966 if (BestAScore < BestBScore)
972 if (BestB->Src == BB)
974 return std::make_pair(*BestA, *BestB);
984 MachineBlockPlacement::BlockAndTailDupResult
985 MachineBlockPlacement::getBestTrellisSuccessor(
989 const BlockFilterSet *BlockFilter) {
991 BlockAndTailDupResult Result = {
nullptr,
false};
998 if (Successors.
size() != 2 || ViableSuccs.
size() != 2)
1004 for (
auto Succ : ViableSuccs) {
1008 if ((BlockFilter && !BlockFilter->count(SuccPred)) ||
1009 BlockToChain[SuccPred] == &Chain ||
1010 BlockToChain[SuccPred] == BlockToChain[Succ])
1014 Edges[SuccIndex].
push_back({EdgeFreq, SuccPred, Succ});
1020 WeightedEdge BestA, BestB;
1021 std::tie(BestA, BestB) = getBestNonConflictingEdges(BB, Edges);
1023 if (BestA.Src != BB) {
1027 LLVM_DEBUG(
dbgs() <<
"Trellis, but not one of the chosen edges.\n");
1034 if (BestA.Dest == BestB.Src) {
1040 if (allowTailDupPlacement() && shouldTailDuplicate(Succ2) &&
1041 canTailDuplicateUnplacedPreds(BB, Succ2, Chain, BlockFilter) &&
1043 Chain, BlockFilter)) {
1047 <<
", probability: " << Succ2Prob
1048 <<
" (Tail Duplicate)\n");
1050 Result.ShouldTailDup =
true;
1056 ComputedEdges[BestB.Src] = { BestB.Dest,
false };
1058 auto TrellisSucc = BestA.Dest;
1062 <<
", probability: " << SuccProb <<
" (Trellis)\n");
1063 Result.BB = TrellisSucc;
1070 bool MachineBlockPlacement::canTailDuplicateUnplacedPreds(
1072 const BlockChain &Chain,
const BlockFilterSet *BlockFilter) {
1073 if (!shouldTailDuplicate(Succ))
1083 if (Pred == BB || (BlockFilter && !BlockFilter->count(Pred))
1084 || BlockToChain[Pred] == &Chain)
1145 void MachineBlockPlacement::precomputeTriangleChains() {
1146 struct TriangleChain {
1147 std::vector<MachineBasicBlock *> Edges;
1150 : Edges({src, dst}) {}
1153 assert(getKey()->isSuccessor(dst) &&
1154 "Attempting to append a block that is not a successor.");
1155 Edges.push_back(dst);
1158 unsigned count()
const {
return Edges.size() - 1; }
1161 return Edges.
back();
1185 if (PDom ==
nullptr)
1192 if (!shouldTailDuplicate(PDom))
1194 bool CanTailDuplicate =
true;
1201 CanTailDuplicate =
false;
1207 if (!CanTailDuplicate)
1214 auto Found = TriangleChainMap.
find(&BB);
1217 if (Found != TriangleChainMap.
end()) {
1218 TriangleChain Chain = std::move(Found->second);
1219 TriangleChainMap.
erase(Found);
1221 TriangleChainMap.
insert(std::make_pair(Chain.getKey(), std::move(Chain)));
1223 auto InsertResult = TriangleChainMap.
try_emplace(PDom, &BB, PDom);
1224 assert(InsertResult.second &&
"Block seen twice.");
1232 for (
auto &ChainPair : TriangleChainMap) {
1233 TriangleChain &Chain = ChainPair.second;
1240 Chain.Edges.pop_back();
1244 <<
" as pre-computed based on triangles.\n");
1246 auto InsertResult = ComputedEdges.
insert({src, {dst,
true}});
1247 assert(InsertResult.second &&
"Block seen twice.");
1289 bool MachineBlockPlacement::hasBetterLayoutPredecessor(
1293 const BlockFilterSet *BlockFilter) {
1296 if (SuccChain.UnscheduledPredecessors == 0)
1416 BlockFrequency CandidateEdgeFreq = MBFI->getBlockFreq(BB) * RealSuccProb;
1417 bool BadCFGConflict =
false;
1420 if (Pred == Succ || BlockToChain[Pred] == &SuccChain ||
1421 (BlockFilter && !BlockFilter->count(Pred)) ||
1422 BlockToChain[Pred] == &Chain ||
1443 if (PredEdgeFreq * HotProb >= CandidateEdgeFreq * HotProb.
getCompl()) {
1444 BadCFGConflict =
true;
1449 if (BadCFGConflict) {
1451 << SuccProb <<
" (prob) (non-cold CFG conflict)\n");
1468 MachineBlockPlacement::BlockAndTailDupResult
1469 MachineBlockPlacement::selectBestSuccessor(
1471 const BlockFilterSet *BlockFilter) {
1474 BlockAndTailDupResult BestSucc = {
nullptr,
false };
1478 auto AdjustedSumProb =
1479 collectViableSuccessors(BB, Chain, BlockFilter, Successors);
1486 auto FoundEdge = ComputedEdges.
find(BB);
1487 if (FoundEdge != ComputedEdges.
end()) {
1489 ComputedEdges.
erase(FoundEdge);
1490 BlockChain *SuccChain = BlockToChain[Succ];
1491 if (BB->
isSuccessor(Succ) && (!BlockFilter || BlockFilter->count(Succ)) &&
1492 SuccChain != &Chain && Succ == *SuccChain->begin())
1493 return FoundEdge->second;
1498 if (isTrellis(BB, Successors, Chain, BlockFilter))
1499 return getBestTrellisSuccessor(BB, Successors, AdjustedSumProb, Chain,
1512 BlockChain &SuccChain = *BlockToChain[Succ];
1515 if (hasBetterLayoutPredecessor(BB, Succ, SuccChain, SuccProb, RealSuccProb,
1516 Chain, BlockFilter)) {
1518 if (allowTailDupPlacement() && shouldTailDuplicate(Succ))
1519 DupCandidates.
push_back(std::make_tuple(SuccProb, Succ));
1525 <<
", probability: " << SuccProb
1526 << (SuccChain.UnscheduledPredecessors != 0 ?
" (CFG break)" :
"")
1529 if (BestSucc.BB && BestProb >= SuccProb) {
1536 BestProb = SuccProb;
1544 [](std::tuple<BranchProbability, MachineBasicBlock *> L,
1545 std::tuple<BranchProbability, MachineBasicBlock *> R) {
1546 return std::get<0>(L) > std::get<0>(R);
1548 for (
auto &Tup : DupCandidates) {
1551 std::tie(DupProb, Succ) = Tup;
1552 if (DupProb < BestProb)
1554 if (canTailDuplicateUnplacedPreds(BB, Succ, Chain, BlockFilter)
1555 && (isProfitableToTailDup(BB, Succ, BestProb, Chain, BlockFilter))) {
1557 <<
", probability: " << DupProb
1558 <<
" (Tail Duplicate)\n");
1560 BestSucc.ShouldTailDup =
true;
1589 return BlockToChain.
lookup(BB) == &Chain;
1593 if (WorkList.
empty())
1596 bool IsEHPad = WorkList[0]->isEHPad();
1602 "EHPad mismatch between block and work list.");
1604 BlockChain &SuccChain = *BlockToChain[MBB];
1605 if (&SuccChain == &Chain)
1608 assert(SuccChain.UnscheduledPredecessors == 0 &&
1609 "Found CFG-violating block");
1613 MBFI->printBlockFreq(
dbgs(), CandidateFreq) <<
" (freq)\n");
1633 if (BestBlock && (IsEHPad ^ (BestFreq >= CandidateFreq)))
1637 BestFreq = CandidateFreq;
1651 const BlockChain &PlacedChain,
1653 const BlockFilterSet *BlockFilter) {
1656 if (BlockFilter && !BlockFilter->count(&*
I))
1658 if (BlockToChain[&*
I] != &PlacedChain) {
1659 PrevUnplacedBlockIt =
I;
1663 return *BlockToChain[&*
I]->
begin();
1669 void MachineBlockPlacement::fillWorkLists(
1672 const BlockFilterSet *BlockFilter =
nullptr) {
1673 BlockChain &Chain = *BlockToChain[MBB];
1674 if (!UpdatedPreds.
insert(&Chain).second)
1678 Chain.UnscheduledPredecessors == 0 &&
1679 "Attempting to place block with unscheduled predecessors in worklist.");
1681 assert(BlockToChain[ChainBB] == &Chain &&
1682 "Block in chain doesn't match BlockToChain map.");
1684 if (BlockFilter && !BlockFilter->count(Pred))
1686 if (BlockToChain[Pred] == &Chain)
1688 ++Chain.UnscheduledPredecessors;
1692 if (Chain.UnscheduledPredecessors != 0)
1702 void MachineBlockPlacement::buildChain(
1704 BlockFilterSet *BlockFilter) {
1705 assert(HeadBB &&
"BB must not be null.\n");
1706 assert(BlockToChain[HeadBB] == &Chain &&
"BlockToChainMap mis-match.\n");
1710 markChainSuccessors(Chain, LoopHeaderBB, BlockFilter);
1713 assert(BB &&
"null block found at end of chain in loop.");
1714 assert(BlockToChain[BB] == &Chain &&
"BlockToChainMap mis-match in loop.");
1715 assert(*std::prev(Chain.end()) == BB &&
"BB Not found at end of chain.");
1720 auto Result = selectBestSuccessor(BB, Chain, BlockFilter);
1722 bool ShouldTailDup = Result.ShouldTailDup;
1723 if (allowTailDupPlacement())
1724 ShouldTailDup |= (BestSucc && shouldTailDuplicate(BestSucc));
1730 BestSucc = selectBestCandidateBlock(Chain, BlockWorkList);
1732 BestSucc = selectBestCandidateBlock(Chain, EHPadWorkList);
1735 BestSucc = getFirstUnplacedBlock(Chain, PrevUnplacedBlockIt, BlockFilter);
1739 LLVM_DEBUG(
dbgs() <<
"Unnatural loop CFG detected, forcibly merging the " 1740 "layout successor until the CFG reduces\n");
1745 if (allowTailDupPlacement() && BestSucc && ShouldTailDup) {
1749 if (repeatedlyTailDuplicateBlock(BestSucc, BB, LoopHeaderBB, Chain,
1750 BlockFilter, PrevUnplacedBlockIt))
1755 BlockChain &SuccChain = *BlockToChain[BestSucc];
1758 SuccChain.UnscheduledPredecessors = 0;
1761 markChainSuccessors(SuccChain, LoopHeaderBB, BlockFilter);
1762 Chain.merge(BestSucc, &SuccChain);
1763 BB = *std::prev(Chain.end());
1785 MachineBlockPlacement::canMoveBottomBlockToTop(
1795 if (OtherBB == BottomBlock)
1797 if (OtherBB == OldTop)
1805 MachineBlockPlacement::TopFallThroughFreq(
1807 const BlockFilterSet &LoopBlockSet) {
1810 BlockChain *PredChain = BlockToChain[Pred];
1811 if (!LoopBlockSet.count(Pred) &&
1812 (!PredChain || Pred == *std::prev(PredChain->end()))) {
1819 BlockChain *SuccChain = BlockToChain[Succ];
1822 if (!LoopBlockSet.count(Succ) && (SuccProb > TopProb) &&
1823 (!SuccChain || Succ == *SuccChain->begin())) {
1831 if (EdgeFreq > MaxFreq)
1861 MachineBlockPlacement::FallThroughGains(
1865 const BlockFilterSet &LoopBlockSet) {
1866 BlockFrequency FallThrough2Top = TopFallThroughFreq(OldTop, LoopBlockSet);
1869 FallThrough2Exit = MBFI->getBlockFreq(NewTop) *
1878 if (!LoopBlockSet.count(Pred))
1880 BlockChain *PredChain = BlockToChain[Pred];
1881 if (!PredChain || Pred == *std::prev(PredChain->end())) {
1884 if (EdgeFreq > FallThroughFromPred) {
1885 FallThroughFromPred = EdgeFreq;
1896 if ((Succ == NewTop) || (Succ == BestPred) || !LoopBlockSet.count(Succ))
1898 if (ComputedEdges.
find(Succ) != ComputedEdges.
end())
1900 BlockChain *SuccChain = BlockToChain[Succ];
1901 if ((SuccChain && (Succ != *SuccChain->begin())) ||
1902 (SuccChain == BlockToChain[BestPred]))
1906 if (EdgeFreq > NewFreq)
1911 if (NewFreq > OrigEdgeFreq) {
1916 FallThroughFromPred = 0;
1923 FallThroughFromPred;
1925 Result = Gains - Lost;
1952 MachineBlockPlacement::findBestLoopTopHelper(
1955 const BlockFilterSet &LoopBlockSet) {
1959 BlockChain &HeaderChain = *BlockToChain[OldTop];
1960 if (!LoopBlockSet.count(*HeaderChain.begin()))
1969 if (!LoopBlockSet.count(Pred))
1974 << Pred->succ_size() <<
" successors, ";
1975 MBFI->printBlockFreq(
dbgs(), Pred) <<
" freq\n");
1976 if (Pred->succ_size() > 2)
1980 if (Pred->succ_size() == 2) {
1982 if (OtherBB == OldTop)
1986 if (!canMoveBottomBlockToTop(Pred, OldTop))
1991 if ((Gains > 0) && (Gains > BestGains ||
1992 ((Gains == BestGains) && Pred->isLayoutSuccessor(OldTop)))) {
2019 MachineBlockPlacement::findBestLoopTop(
const MachineLoop &L,
2020 const BlockFilterSet &LoopBlockSet) {
2033 while (NewTop != OldTop) {
2035 NewTop = findBestLoopTopHelper(OldTop, L, LoopBlockSet);
2036 if (NewTop != OldTop)
2037 ComputedEdges[NewTop] = { OldTop,
false };
2048 MachineBlockPlacement::findBestLoopExit(
const MachineLoop &L,
2049 const BlockFilterSet &LoopBlockSet,
2059 BlockChain &HeaderChain = *BlockToChain[L.
getHeader()];
2060 if (!LoopBlockSet.count(*HeaderChain.begin()))
2064 unsigned BestExitLoopDepth = 0;
2074 BlockChain &Chain = *BlockToChain[MBB];
2077 if (MBB != *std::prev(Chain.end()))
2086 bool HasLoopingSucc =
false;
2092 BlockChain &SuccChain = *BlockToChain[Succ];
2094 if (&Chain == &SuccChain) {
2101 if (LoopBlockSet.count(Succ)) {
2104 HasLoopingSucc =
true;
2108 unsigned SuccLoopDepth = 0;
2110 SuccLoopDepth = ExitLoop->getLoopDepth();
2111 if (ExitLoop->contains(&L))
2112 BlocksExitingToOuterLoop.
insert(MBB);
2115 BlockFrequency ExitEdgeFreq = MBFI->getBlockFreq(MBB) * SuccProb;
2119 MBFI->printBlockFreq(
dbgs(), ExitEdgeFreq) <<
")\n");
2125 if (!ExitingBB || SuccLoopDepth > BestExitLoopDepth ||
2126 ExitEdgeFreq > BestExitEdgeFreq ||
2128 !(ExitEdgeFreq < BestExitEdgeFreq * Bias))) {
2129 BestExitEdgeFreq = ExitEdgeFreq;
2134 if (!HasLoopingSucc) {
2136 ExitingBB = OldExitingBB;
2137 BestExitEdgeFreq = OldBestExitEdgeFreq;
2144 dbgs() <<
" No other candidate exit blocks, using loop header\n");
2148 LLVM_DEBUG(
dbgs() <<
" Loop has 1 block, using loop header as exit\n");
2155 if (!BlocksExitingToOuterLoop.
empty() &&
2156 !BlocksExitingToOuterLoop.
count(ExitingBB))
2161 ExitFreq = BestExitEdgeFreq;
2170 MachineBlockPlacement::hasViableTopFallthrough(
2172 const BlockFilterSet &LoopBlockSet) {
2174 BlockChain *PredChain = BlockToChain[Pred];
2175 if (!LoopBlockSet.count(Pred) &&
2176 (!PredChain || Pred == *std::prev(PredChain->end()))) {
2183 BlockChain *SuccChain = BlockToChain[Succ];
2186 if ((!SuccChain || Succ == *SuccChain->begin()) && SuccProb > TopProb) {
2204 void MachineBlockPlacement::rotateLoop(BlockChain &LoopChain,
2207 const BlockFilterSet &LoopBlockSet) {
2215 if (Bottom == ExitingBB)
2218 bool ViableTopFallthrough = hasViableTopFallthrough(Top, LoopBlockSet);
2223 if (ViableTopFallthrough) {
2225 BlockChain *SuccChain = BlockToChain[Succ];
2226 if (!LoopBlockSet.count(Succ) &&
2227 (!SuccChain || Succ == *SuccChain->begin()))
2233 BlockFrequency FallThrough2Top = TopFallThroughFreq(Top, LoopBlockSet);
2234 if (FallThrough2Top >= ExitFreq)
2238 BlockChain::iterator ExitIt =
llvm::find(LoopChain, ExitingBB);
2239 if (ExitIt == LoopChain.end())
2261 if (ViableTopFallthrough) {
2262 assert(std::next(ExitIt) != LoopChain.end() &&
2263 "Exit should not be last BB");
2272 std::rotate(LoopChain.begin(), std::next(ExitIt), LoopChain.end());
2288 void MachineBlockPlacement::rotateLoopWithProfile(
2290 const BlockFilterSet &LoopBlockSet) {
2291 auto RotationPos = LoopChain.end();
2312 BlockChain *PredChain = BlockToChain[Pred];
2313 if (!LoopBlockSet.count(Pred) &&
2314 (!PredChain || Pred == *std::prev(PredChain->end()))) {
2315 auto EdgeFreq = MBFI->getBlockFreq(Pred) *
2317 auto FallThruCost = ScaleBlockFrequency(EdgeFreq,
MisfetchCost);
2320 if (Pred->succ_size() == 1)
2321 FallThruCost += ScaleBlockFrequency(EdgeFreq,
JumpInstCost);
2322 HeaderFallThroughCost =
std::max(HeaderFallThroughCost, FallThruCost);
2331 for (
auto BB : LoopChain) {
2334 BlockChain *SuccChain = BlockToChain[Succ];
2335 if (!LoopBlockSet.count(Succ) &&
2336 (!SuccChain || Succ == *SuccChain->begin())) {
2338 LargestExitEdgeProb =
std::max(LargestExitEdgeProb, SuccProb);
2342 auto ExitFreq = MBFI->getBlockFreq(BB) * LargestExitEdgeProb;
2350 for (
auto Iter = LoopChain.begin(), TailIter = std::prev(LoopChain.end()),
2351 EndIter = LoopChain.end();
2352 Iter != EndIter; Iter++, TailIter++) {
2355 if (TailIter == LoopChain.end())
2356 TailIter = LoopChain.begin();
2358 auto TailBB = *TailIter;
2366 if (Iter != LoopChain.begin())
2367 Cost += HeaderFallThroughCost;
2371 for (
auto &ExitWithFreq : ExitsWithFreq)
2372 if (TailBB != ExitWithFreq.first)
2373 Cost += ExitWithFreq.second;
2389 if (TailBB->isSuccessor(*Iter)) {
2390 auto TailBBFreq = MBFI->getBlockFreq(TailBB);
2391 if (TailBB->succ_size() == 1)
2392 Cost += ScaleBlockFrequency(TailBBFreq.getFrequency(),
2394 else if (TailBB->succ_size() == 2) {
2396 auto TailToHeadFreq = TailBBFreq * TailToHeadProb;
2398 ? TailBBFreq * TailToHeadProb.
getCompl()
2400 Cost += ScaleBlockFrequency(TailToHeadFreq,
MisfetchCost) +
2409 if (Cost < SmallestRotationCost) {
2410 SmallestRotationCost = Cost;
2415 if (RotationPos != LoopChain.end()) {
2417 <<
" to the top\n");
2418 std::rotate(LoopChain.begin(), RotationPos, LoopChain.end());
2427 MachineBlockPlacement::collectLoopBlockSet(
const MachineLoop &L) {
2428 BlockFilterSet LoopBlockSet;
2443 LoopFreq += MBFI->getBlockFreq(LoopPred) *
2447 auto Freq = MBFI->getBlockFreq(LoopBB).getFrequency();
2450 LoopBlockSet.insert(LoopBB);
2455 return LoopBlockSet;
2464 void MachineBlockPlacement::buildLoopChains(
const MachineLoop &L) {
2468 buildLoopChains(*InnerLoop);
2471 "BlockWorkList not empty when starting to build loop chains.");
2473 "EHPadWorkList not empty when starting to build loop chains.");
2474 BlockFilterSet LoopBlockSet = collectLoopBlockSet(L);
2479 bool RotateLoopWithProfile =
2495 PreferredLoopExit =
nullptr;
2497 if (!RotateLoopWithProfile && LoopTop == L.getHeader())
2498 PreferredLoopExit = findBestLoopExit(L, LoopBlockSet, ExitFreq);
2500 BlockChain &LoopChain = *BlockToChain[LoopTop];
2506 assert(LoopChain.UnscheduledPredecessors == 0 &&
2507 "LoopChain should not have unscheduled predecessors.");
2508 UpdatedPreds.
insert(&LoopChain);
2511 fillWorkLists(LoopBB, UpdatedPreds, &LoopBlockSet);
2513 buildChain(LoopTop, LoopChain, &LoopBlockSet);
2515 if (RotateLoopWithProfile)
2516 rotateLoopWithProfile(LoopChain, L, LoopBlockSet);
2518 rotateLoop(LoopChain, PreferredLoopExit, ExitFreq, LoopBlockSet);
2522 bool BadLoop =
false;
2523 if (LoopChain.UnscheduledPredecessors) {
2525 dbgs() <<
"Loop chain contains a block without its preds placed!\n" 2526 <<
" Loop header: " << getBlockName(*L.block_begin()) <<
"\n" 2527 <<
" Chain header: " << getBlockName(*LoopChain.begin()) <<
"\n";
2531 if (!LoopBlockSet.remove(ChainBB)) {
2535 dbgs() <<
"Loop chain contains a block not contained by the loop!\n" 2536 <<
" Loop header: " <<
getBlockName(*L.block_begin()) <<
"\n" 2537 <<
" Chain header: " <<
getBlockName(*LoopChain.begin()) <<
"\n" 2542 if (!LoopBlockSet.empty()) {
2545 dbgs() <<
"Loop contains blocks never placed into a chain!\n" 2546 <<
" Loop header: " <<
getBlockName(*L.block_begin()) <<
"\n" 2547 <<
" Chain header: " <<
getBlockName(*LoopChain.begin()) <<
"\n" 2550 assert(!BadLoop &&
"Detected problems with the placement of this loop.");
2553 BlockWorkList.
clear();
2554 EHPadWorkList.
clear();
2557 void MachineBlockPlacement::buildCFGChains() {
2565 new (ChainAllocator.
Allocate()) BlockChain(BlockToChain, BB);
2571 if (!TII->
analyzeBranch(*BB, TBB, FBB, Cond) || !FI->canFallThrough())
2578 assert(NextFI != FE &&
"Can't fallthrough past the last block.");
2579 LLVM_DEBUG(
dbgs() <<
"Pre-merging due to unanalyzable fallthrough: " 2582 Chain->merge(NextBB,
nullptr);
2584 BlocksWithUnanalyzableExits.
insert(&*BB);
2592 PreferredLoopExit =
nullptr;
2594 buildLoopChains(*L);
2597 "BlockWorkList should be empty before building final chain.");
2599 "EHPadWorkList should be empty before building final chain.");
2603 fillWorkLists(&MBB, UpdatedPreds);
2605 BlockChain &FunctionChain = *BlockToChain[&F->front()];
2606 buildChain(&F->front(), FunctionChain);
2613 bool BadFunc =
false;
2614 FunctionBlockSetType FunctionBlockSet;
2616 FunctionBlockSet.insert(&MBB);
2619 if (!FunctionBlockSet.erase(ChainBB)) {
2621 dbgs() <<
"Function chain contains a block not in the function!\n" 2625 if (!FunctionBlockSet.empty()) {
2628 dbgs() <<
"Function contains blocks never placed into a chain!\n" 2629 <<
" Bad block: " <<
getBlockName(RemainingBB) <<
"\n";
2631 assert(!BadFunc &&
"Detected problems with the block placement.");
2636 LLVM_DEBUG(
dbgs() <<
"[MBP] Function: " << F->getName() <<
"\n");
2638 LLVM_DEBUG(
dbgs() << (ChainBB == *FunctionChain.begin() ?
"Placing chain " 2642 F->splice(InsertPos, ChainBB);
2647 if (ChainBB == *FunctionChain.begin())
2658 if (!BlocksWithUnanalyzableExits.
count(PrevBB)) {
2664 "Unexpected block with un-analyzable fallthrough!");
2666 TBB = FBB =
nullptr;
2699 F->
back().updateTerminator();
2701 BlockWorkList.
clear();
2702 EHPadWorkList.
clear();
2705 void MachineBlockPlacement::optimizeBranches() {
2706 BlockChain &FunctionChain = *BlockToChain[&F->
front()];
2721 if (TBB && !Cond.
empty() && FBB &&
2733 ChainBB->updateTerminator();
2739 void MachineBlockPlacement::alignBlocks() {
2748 BlockChain &FunctionChain = *BlockToChain[&F->
front()];
2749 if (FunctionChain.begin() == FunctionChain.end())
2756 if (ChainBB == *FunctionChain.begin())
2774 if (Freq < WeightedEntryFreq)
2781 if (Freq < (LoopHeaderFreq * ColdProb))
2792 ChainBB->setAlignment(Align);
2802 BlockFrequency LayoutEdgeFreq = MBFI->getBlockFreq(LayoutPred) * LayoutProb;
2803 if (LayoutEdgeFreq <= (Freq * ColdProb))
2804 ChainBB->setAlignment(Align);
2823 bool MachineBlockPlacement::repeatedlyTailDuplicateBlock(
2826 BlockChain &Chain, BlockFilterSet *BlockFilter,
2828 bool Removed, DuplicatedToLPred;
2829 bool DuplicatedToOriginalLPred;
2830 Removed = maybeTailDuplicateBlock(BB, LPred, Chain, BlockFilter,
2831 PrevUnplacedBlockIt,
2835 DuplicatedToOriginalLPred = DuplicatedToLPred;
2841 while (DuplicatedToLPred) {
2842 assert(Removed &&
"Block must have been removed to be duplicated into its " 2843 "layout predecessor.");
2850 BlockChain::iterator ChainEnd = Chain.end();
2851 DupBB = *(--ChainEnd);
2853 if (ChainEnd == Chain.begin())
2855 DupPred = *std::prev(ChainEnd);
2856 Removed = maybeTailDuplicateBlock(DupBB, DupPred, Chain, BlockFilter,
2857 PrevUnplacedBlockIt,
2865 LPred = *std::prev(Chain.end());
2866 if (DuplicatedToOriginalLPred)
2867 markBlockSuccessors(Chain, LPred, LoopHeaderBB, BlockFilter);
2885 bool MachineBlockPlacement::maybeTailDuplicateBlock(
2887 BlockChain &Chain, BlockFilterSet *BlockFilter,
2889 bool &DuplicatedToLPred) {
2890 DuplicatedToLPred =
false;
2891 if (!shouldTailDuplicate(BB))
2899 bool Removed =
false;
2900 auto RemovalCallback =
2906 bool InWorkList =
true;
2908 if (BlockToChain.
count(RemBB)) {
2909 BlockChain *Chain = BlockToChain[RemBB];
2910 InWorkList = Chain->UnscheduledPredecessors == 0;
2911 Chain->remove(RemBB);
2912 BlockToChain.
erase(RemBB);
2916 if (&(*PrevUnplacedBlockIt) == RemBB) {
2917 PrevUnplacedBlockIt++;
2923 if (RemBB->isEHPad())
2924 RemoveList = EHPadWorkList;
2935 BlockFilter->remove(RemBB);
2940 if (RemBB == PreferredLoopExit)
2941 PreferredLoopExit =
nullptr;
2946 auto RemovalCallbackRef =
2952 &DuplicatedPreds, &RemovalCallbackRef);
2955 DuplicatedToLPred =
false;
2958 BlockChain* PredChain = BlockToChain[Pred];
2960 DuplicatedToLPred =
true;
2961 if (Pred == LPred || (BlockFilter && !BlockFilter->count(Pred))
2962 || PredChain == &Chain)
2965 if (BlockFilter && !BlockFilter->count(NewSucc))
2967 BlockChain *NewChain = BlockToChain[NewSucc];
2968 if (NewChain != &Chain && NewChain != PredChain)
2969 NewChain->UnscheduledPredecessors++;
2975 bool MachineBlockPlacement::runOnMachineFunction(
MachineFunction &MF) {
2980 if (std::next(MF.
begin()) == MF.
end())
2984 MBPI = &getAnalysis<MachineBranchProbabilityInfo>();
2985 MBFI = std::make_unique<BranchFolder::MBFIWrapper>(
2986 getAnalysis<MachineBlockFrequencyInfo>());
2987 MLI = &getAnalysis<MachineLoopInfo>();
2994 PreferredLoopExit =
nullptr;
2997 "BlockToChain map should be empty before starting placement.");
2999 "Computed Edge map should be empty before starting placement.");
3019 if (allowTailDupPlacement()) {
3020 MPDT = &getAnalysis<MachinePostDominatorTree>();
3023 bool PreRegAlloc =
false;
3024 TailDup.
initMF(MF, PreRegAlloc, MBPI,
true, TailDupSize);
3025 precomputeTriangleChains();
3037 if (MF.
size() > 3 && EnableTailMerge) {
3040 *MBPI, TailMergeSize);
3042 auto *MMIWP = getAnalysisIfAvailable<MachineModuleInfoWrapperPass>();
3044 MMIWP ? &MMIWP->getMMI() :
nullptr, MLI,
3047 BlockToChain.
clear();
3048 ComputedEdges.
clear();
3060 BlockToChain.
clear();
3061 ComputedEdges.
clear();
3071 for (
auto MBI = std::next(MF.begin()), MBE = MF.end(); MBI != MBE; ++MBI) {
3072 auto LayoutPred = std::prev(MBI);
3073 if (!LayoutPred->isSuccessor(&*MBI))
3077 if (ViewBlockLayoutWithBFI !=
GVDT_None &&
3078 (ViewBlockFreqFuncName.empty() ||
3080 MBFI->view(
"MBP." + MF.getName(),
false);
3128 "Basic Block Placement Stats",
false,
false)
3132 "Basic Block Placement
Stats", false, false)
3134 bool MachineBlockPlacementStats::runOnMachineFunction(
MachineFunction &F) {
3136 if (std::next(F.begin()) == F.end())
3139 MBPI = &getAnalysis<MachineBranchProbabilityInfo>();
3140 MBFI = &getAnalysis<MachineBlockFrequencyInfo>();
3145 (MBB.
succ_size() > 1) ? NumCondBranches : NumUncondBranches;
3147 (MBB.
succ_size() > 1) ? CondBranchTakenFreq : UncondBranchTakenFreq;
bool shouldTailDuplicate(bool IsSimple, MachineBasicBlock &TailBB)
Determine if it is profitable to duplicate this block.
static cl::opt< unsigned > TailDupPlacementThreshold("tail-dup-placement-threshold", cl::desc("Instruction cutoff for tail duplication during layout. " "Tail merging during layout is forced to have a threshold " "that won't conflict."), cl::init(2), cl::Hidden)
const_iterator end(StringRef path)
Get end iterator over path.
constexpr char Align[]
Key for Kernel::Arg::Metadata::mAlign.
reference emplace_back(ArgTypes &&... Args)
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
BranchProbability getCompl() const
bool dominates(const MachineDomTreeNode *A, const MachineDomTreeNode *B) const
const_iterator begin(StringRef path, Style style=Style::native)
Get begin iterator over path.
typename SuperClass::const_iterator const_iterator
This class represents lattice values for constants.
#define LLVM_DUMP_METHOD
Mark debug helper function definitions like dump() that should not be stripped from debug builds...
static cl::opt< unsigned > MisfetchCost("misfetch-cost", cl::desc("Cost that models the probabilistic risk of an instruction " "misfetch due to a jump comparing to falling through, whose cost " "is zero."), cl::init(1), cl::Hidden)
bool hasOptSize() const
Optimize this function for size (-Os) or minimum size (-Oz).
static BranchProbability getAdjustedProbability(BranchProbability OrigProb, BranchProbability AdjustedSumProb)
The helper function returns the branch probability that is adjusted or normalized over the new total ...
virtual const TargetRegisterInfo * getRegisterInfo() const
getRegisterInfo - If register information is available, return it.
void push_back(const T &Elt)
bool requiresStructuredCFG() const
virtual const TargetLowering * getTargetLowering() const
void initializeMachineBlockPlacementStatsPass(PassRegistry &)
virtual unsigned insertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB, MachineBasicBlock *FBB, ArrayRef< MachineOperand > Cond, const DebugLoc &DL, int *BytesAdded=nullptr) const
Insert branch code into the end of the specified MachineBasicBlock.
An efficient, type-erasing, non-owning reference to a callable.
uint64_t getFrequency() const
Returns the frequency as a fixpoint number scaled by the entry frequency.
MachineBlockFrequencyInfo pass uses BlockFrequencyInfoImpl implementation to estimate machine basic b...
static cl::opt< unsigned > AlignAllNonFallThruBlocks("align-all-nofallthru-blocks", cl::desc("Force the alignment of all blocks that have no fall-through " "predecessors (i.e. don't add nops that are executed). In log2 " "format (e.g 4 means align on 16B boundaries)."), cl::init(0), cl::Hidden)
STATISTIC(NumFunctions, "Total number of functions")
static BranchProbability getLayoutSuccessorProbThreshold(const MachineBasicBlock *BB)
static BranchProbability getOne()
virtual unsigned removeBranch(MachineBasicBlock &MBB, int *BytesRemoved=nullptr) const
Remove the branching code at the end of the specific MBB.
char & MachineBlockPlacementStatsID
MachineBlockPlacementStats - This pass collects statistics about the basic block placement using bran...
CodeGenOpt::Level getOptLevel() const
This file defines the MallocAllocator and BumpPtrAllocator interfaces.
iterator_range< succ_iterator > successors()
A templated base class for SmallPtrSet which provides the typesafe interface that is common across al...
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
AnalysisUsage & addRequired()
#define INITIALIZE_PASS_DEPENDENCY(depName)
void append(SmallVectorImpl< char > &path, const Twine &a, const Twine &b="", const Twine &c="", const Twine &d="")
Append to path.
static cl::opt< bool > TailDupPlacement("tail-dup-placement", cl::desc("Perform tail duplication during placement. " "Creates more fallthrough opportunites in " "outline branches."), cl::init(true), cl::Hidden)
bool OptimizeFunction(MachineFunction &MF, const TargetInstrInfo *tii, const TargetRegisterInfo *tri, MachineModuleInfo *mmi, MachineLoopInfo *mli=nullptr, bool AfterPlacement=false)
Perhaps branch folding, tail merging and other CFG optimizations on the given function.
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
const HexagonInstrInfo * TII
Printable printMBBReference(const MachineBasicBlock &MBB)
Prints a machine basic block reference.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
void removeBlock(MachineBasicBlock *BB)
This method completely removes BB from all data structures, including all of the Loop objects it is n...
block placement Basic Block Placement Stats
Target-Independent Code Generator Pass Configuration Options.
BlockT * getHeader() const
auto reverse(ContainerTy &&C, typename std::enable_if< has_rbegin< ContainerTy >::value >::type *=nullptr) -> decltype(make_range(C.rbegin(), C.rend()))
static cl::opt< unsigned > AlignAllBlock("align-all-blocks", cl::desc("Force the alignment of all blocks in the function in log2 format " "(e.g 4 means align on 16B boundaries)."), cl::init(0), cl::Hidden)
bool canFallThrough()
Return true if the block can implicitly transfer control to the block after it by falling off the end...
void DestroyAll()
Call the destructor of each allocated object and deallocate all but the current slab and reset the cu...
void initMF(MachineFunction &MF, bool PreRegAlloc, const MachineBranchProbabilityInfo *MBPI, bool LayoutMode, unsigned TailDupSize=0)
Prepare to run on a specific machine function.
int getNumber() const
MachineBasicBlocks are uniquely numbered at the function level, unless they're not in a MachineFuncti...
virtual const TargetInstrInfo * getInstrInfo() const
cl::opt< std::string > ViewBlockFreqFuncName
auto count(R &&Range, const E &Element) -> typename std::iterator_traits< decltype(adl_begin(Range))>::difference_type
Wrapper function around std::count to count the number of times an element Element occurs in the give...
TargetInstrInfo - Interface to description of machine instruction set.
iterator find(const_arg_type_t< KeyT > Val)
static bool isSimpleBB(MachineBasicBlock *TailBB)
True if this BB has only one unconditional jump.
cl::opt< unsigned > ProfileLikelyProb
bool getEnableTailMerge() const
virtual bool alignLoopsWithOptSize() const
Should loops be aligned even when the function is marked OptSize (but not MinSize).
bool tailDuplicateAndUpdate(bool IsSimple, MachineBasicBlock *MBB, MachineBasicBlock *ForcedLayoutPred, SmallVectorImpl< MachineBasicBlock *> *DuplicatedPreds=nullptr, function_ref< void(MachineBasicBlock *)> *RemovalCallback=nullptr)
Tail duplicate a single basic block into its predecessors, and then clean up.
initializer< Ty > init(const Ty &Val)
bool erase(const KeyT &Val)
cl::opt< GVDAGType > ViewBlockLayoutWithBFI
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
void dump(const SparseBitVector< ElementSize > &LHS, raw_ostream &out)
StringRef getName() const
Return the name of the corresponding LLVM basic block, or an empty string.
MutableArrayRef - Represent a mutable reference to an array (0 or more elements consecutively in memo...
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
succ_reverse_iterator succ_rbegin()
static cl::opt< bool > BranchFoldPlacement("branch-fold-placement", cl::desc("Perform branch folding during placement. " "Reduces code size."), cl::init(true), cl::Hidden)
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
LLVM_NODISCARD bool empty() const
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
bool canTailDuplicate(MachineBasicBlock *TailBB, MachineBasicBlock *PredBB)
Returns true if TailBB can successfully be duplicated into PredBB.
static cl::opt< unsigned > TriangleChainCount("triangle-chain-count", cl::desc("Number of triangle-shaped-CFG's that need to be in a row for the " "triangle tail duplication heuristic to kick in. 0 to disable."), cl::init(2), cl::Hidden)
virtual Align getPrefLoopAlignment(MachineLoop *ML=nullptr) const
Return the preferred loop alignment.
Represent the analysis usage information of a pass.
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
static bool greaterWithBias(BlockFrequency A, BlockFrequency B, uint64_t EntryFreq)
Compare 2 BlockFrequency's with a small penalty for A.
iterator_range< pred_iterator > predecessors()
bool hasProfileData(bool IncludeSynthetic=false) const
Return true if the function is annotated with profile data.
T * Allocate(size_t num=1)
Allocate space for an array of objects without constructing them.
auto remove_if(R &&Range, UnaryPredicate P) -> decltype(adl_begin(Range))
Provide wrappers to std::remove_if which take ranges instead of having to pass begin/end explicitly...
succ_iterator succ_begin()
virtual bool analyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB, MachineBasicBlock *&FBB, SmallVectorImpl< MachineOperand > &Cond, bool AllowModify=false) const
Analyze the branching code at the end of MBB, returning true if it cannot be understood (e...
iterator erase(const_iterator CI)
const MachineBasicBlock & front() const
pred_iterator pred_begin()
void initializeMachineBlockPlacementPass(PassRegistry &)
static cl::opt< bool > ForcePreciseRotationCost("force-precise-rotation-cost", cl::desc("Force the use of precise cost " "loop rotation strategy."), cl::init(false), cl::Hidden)
auto find(R &&Range, const T &Val) -> decltype(adl_begin(Range))
Provide wrappers to std::find which take ranges instead of having to pass begin/end explicitly...
static cl::opt< unsigned > TailMergeSize("tail-merge-size", cl::desc("Min number of instructions to consider tail merging"), cl::init(3), cl::Hidden)
INITIALIZE_PASS_END(RegBankSelect, DEBUG_TYPE, "Assign register bank of generic virtual registers", false, false) RegBankSelect
std::pair< iterator, bool > try_emplace(KeyT &&Key, Ts &&... Args)
This struct is a compact representation of a valid (non-zero power of two) alignment.
Branch Probability Basic Block Placement
This base class for TargetLowering contains the SelectionDAG-independent parts that can be used from ...
bool contains(const LoopT *L) const
Return true if the specified loop is contained within in this loop.
A SetVector that performs no allocations if smaller than a certain size.
Iterator for intrusive lists based on ilist_node.
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements...
void setAlignment(Align A)
Set alignment of the basic block.
Align max(MaybeAlign Lhs, Align Rhs)
static cl::opt< bool > ForceLoopColdBlock("force-loop-cold-block", cl::desc("Force outlining cold blocks from loops."), cl::init(false), cl::Hidden)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small...
void updateTerminator()
Update the terminator instructions in block to account for changes to the layout. ...
static cl::opt< unsigned > JumpInstCost("jump-inst-cost", cl::desc("Cost of jump instructions."), cl::init(1), cl::Hidden)
static cl::opt< unsigned > TailDupPlacementAggressiveThreshold("tail-dup-placement-aggressive-threshold", cl::desc("Instruction cutoff for aggressive tail duplication during " "layout. Used at -O3. Tail merging during layout is forced to " "have a threshold that won't conflict."), cl::init(4), cl::Hidden)
INITIALIZE_PASS_BEGIN(MachineBlockPlacement, DEBUG_TYPE, "Branch Probability Basic Block Placement", false, false) INITIALIZE_PASS_END(MachineBlockPlacement
A BumpPtrAllocator that allows only elements of a specific type to be allocated.
unsigned pred_size() const
bool isLayoutSuccessor(const MachineBasicBlock *MBB) const
Return true if the specified MBB will be emitted immediately after this block, such that if this bloc...
const Function & getFunction() const
Return the LLVM function that this machine code represents.
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
static cl::opt< unsigned > TailDupPlacementPenalty("tail-dup-placement-penalty", cl::desc("Cost penalty for blocks that can avoid breaking CFG by copying. " "Copying can increase fallthrough, but it also increases icache " "pressure. This parameter controls the penalty to account for that. " "Percent as integer."), cl::init(2), cl::Hidden)
void setPreservesAll()
Set by analyses that do not transform their input at all.
typename SuperClass::iterator iterator
unsigned succ_size() const
Branch Probability Basic Block static false std::string getBlockName(const MachineBasicBlock *BB)
Helper to print the name of a MBB.
cl::opt< unsigned > StaticLikelyProb
LLVM_NODISCARD bool equals(StringRef RHS) const
equals - Check for string equality, this is more efficient than compare() when the relative ordering ...
BranchProbability getEdgeProbability(const MachineBasicBlock *Src, const MachineBasicBlock *Dst) const
bool runOnMachineFunction(MachineFunction &MF) override
runOnMachineFunction - This method must be overloaded to perform the desired machine code transformat...
unsigned getNumBlocks() const
Get the number of blocks in this loop in constant time.
unsigned succ_size(const Instruction *I)
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
static uint64_t getMaxFrequency()
Returns the maximum possible frequency, the saturation value.
bool isEHPad() const
Returns true if the block is a landing pad.
LLVM_NODISCARD bool empty() const
ArrayRef< BlockT * > getBlocks() const
Get a list of the basic blocks which make up this loop.
StringRef getName() const
Return a constant reference to the value's name.
static bool hasSameSuccessors(MachineBasicBlock &BB, SmallPtrSetImpl< const MachineBasicBlock *> &Successors)
Check if BB has exactly the successors in Successors.
bool hasMinSize() const
Optimize this function for minimum size (-Oz).
char & MachineBlockPlacementID
MachineBlockPlacement - This pass places basic blocks based on branch probabilities.
block_iterator block_end() const
bool isSuccessor(const MachineBasicBlock *MBB) const
Return true if the specified MBB is a successor of this block.
const LLVMTargetMachine & getTarget() const
getTarget - Return the target machine this machine code is compiled with
size_type count(const_arg_type_t< KeyT > Val) const
Return 1 if the specified key is in the map, 0 otherwise.
static cl::opt< unsigned > LoopToColdBlockRatio("loop-to-cold-block-ratio", cl::desc("Outline loop blocks from loop chain if (frequency of loop) / " "(frequency of block) is greater than this ratio"), cl::init(5), cl::Hidden)
LLVM_NODISCARD bool empty() const
ValueT lookup(const_arg_type_t< KeyT > Val) const
lookup - Return the entry for the specified key, or a default constructed value if no such entry exis...
virtual bool reverseBranchCondition(SmallVectorImpl< MachineOperand > &Cond) const
Reverses the branch condition of the specified condition list, returning false on success and true if...
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
void stable_sort(R &&Range)
A raw_ostream that writes to an std::string.
MachinePostDominatorTree - an analysis pass wrapper for DominatorTree used to compute the post-domina...
MachineLoop * getLoopFor(const MachineBasicBlock *BB) const
Return the innermost loop that BB lives in.
static BranchProbability getZero()
Utility class to perform tail duplication.
static cl::opt< bool > PreciseRotationCost("precise-rotation-cost", cl::desc("Model the cost of loop rotation more " "precisely by using profile data."), cl::init(false), cl::Hidden)
for(unsigned i=Desc.getNumOperands(), e=OldMI.getNumOperands();i !=e;++i)
block_iterator block_begin() const
uint32_t getNumerator() const
static cl::opt< unsigned > ExitBlockBias("block-placement-exit-block-bias", cl::desc("Block frequency percentage a loop exit block needs " "over the original exit to be considered the new exit."), cl::init(0), cl::Hidden)
This file describes how to lower LLVM code to machine code.