78#define DEBUG_TYPE "block-placement"
80STATISTIC(NumCondBranches,
"Number of conditional branches");
81STATISTIC(NumUncondBranches,
"Number of unconditional branches");
83 "Potential frequency of taking conditional branches");
85 "Potential frequency of taking unconditional branches");
89 cl::desc(
"Force the alignment of all blocks in the function in log2 format "
90 "(e.g 4 means align on 16B boundaries)."),
94 "align-all-nofallthru-blocks",
95 cl::desc(
"Force the alignment of all blocks that have no fall-through "
96 "predecessors (i.e. don't add nops that are executed). In log2 "
97 "format (e.g 4 means align on 16B boundaries)."),
101 "max-bytes-for-alignment",
102 cl::desc(
"Forces the maximum bytes allowed to be emitted when padding for "
108 "block-placement-exit-block-bias",
109 cl::desc(
"Block frequency percentage a loop exit block needs "
110 "over the original exit to be considered the new exit."),
117 "loop-to-cold-block-ratio",
118 cl::desc(
"Outline loop blocks from loop chain if (frequency of loop) / "
119 "(frequency of block) is greater than this ratio"),
123 "force-loop-cold-block",
124 cl::desc(
"Force outlining cold blocks from loops."),
129 cl::desc(
"Model the cost of loop rotation more "
130 "precisely by using profile data."),
135 cl::desc(
"Force the use of precise cost "
136 "loop rotation strategy."),
141 cl::desc(
"Cost that models the probabilistic risk of an instruction "
142 "misfetch due to a jump comparing to falling through, whose cost "
147 cl::desc(
"Cost of jump instructions."),
151 cl::desc(
"Perform tail duplication during placement. "
152 "Creates more fallthrough opportunites in "
153 "outline branches."),
158 cl::desc(
"Perform branch folding during placement. "
159 "Reduces code size."),
164 "tail-dup-placement-threshold",
165 cl::desc(
"Instruction cutoff for tail duplication during layout. "
166 "Tail merging during layout is forced to have a threshold "
167 "that won't conflict."),
cl::init(2),
172 "tail-dup-placement-aggressive-threshold",
173 cl::desc(
"Instruction cutoff for aggressive tail duplication during "
174 "layout. Used at -O3. Tail merging during layout is forced to "
175 "have a threshold that won't conflict."),
cl::init(4),
180 "tail-dup-placement-penalty",
181 cl::desc(
"Cost penalty for blocks that can avoid breaking CFG by copying. "
182 "Copying can increase fallthrough, but it also increases icache "
183 "pressure. This parameter controls the penalty to account for that. "
184 "Percent as integer."),
190 "tail-dup-profile-percent-threshold",
191 cl::desc(
"If profile count information is used in tail duplication cost "
192 "model, the gained fall through number from tail duplication "
193 "should be at least this percent of hot count."),
198 "triangle-chain-count",
199 cl::desc(
"Number of triangle-shaped-CFG's that need to be in a row for the "
200 "triangle tail duplication heuristic to kick in. 0 to disable."),
210 "renumber-blocks-before-view",
212 "If true, basic blocks are re-numbered before MBP layout is printed "
213 "into a dot graph. Only used when a function is being printed."),
263 BlockToChainMapType &BlockToChain;
272 :
Blocks(1, BB), BlockToChain(BlockToChain) {
273 assert(BB &&
"Cannot create a chain with a null basic block");
274 BlockToChain[BB] =
this;
282 iterator begin() {
return Blocks.begin(); }
286 iterator end() {
return Blocks.end(); }
290 for(iterator i = begin(); i != end(); ++i) {
306 assert(BB &&
"Can't merge a null block.");
307 assert(!
Blocks.empty() &&
"Can't merge into an empty chain.");
311 assert(!BlockToChain[BB] &&
312 "Passed chain is null, but BB has entry in BlockToChain.");
314 BlockToChain[BB] =
this;
318 assert(BB == *Chain->
begin() &&
"Passed BB is not head of Chain.");
319 assert(Chain->begin() != Chain->end());
324 Blocks.push_back(ChainBB);
325 assert(BlockToChain[ChainBB] == Chain &&
"Incoming blocks not in chain.");
326 BlockToChain[ChainBB] =
this;
347 unsigned UnscheduledPredecessors = 0;
355 struct BlockAndTailDupResult {
361 struct WeightedEdge {
381 std::unique_ptr<MBFIWrapper> MBFI;
414 bool UseProfileCount =
false;
444 if (UseProfileCount) {
445 auto Count = MBFI->getBlockProfileCount(BB);
451 return MBFI->getBlockFreq(BB);
456 void initDupThreshold();
460 void markChainSuccessors(
462 const BlockFilterSet *BlockFilter =
nullptr);
466 void markBlockSuccessors(
469 const BlockFilterSet *BlockFilter =
nullptr);
472 collectViableSuccessors(
474 const BlockFilterSet *BlockFilter,
477 BlockFilterSet *BlockFilter);
480 BlockFilterSet *BlockFilter);
481 bool repeatedlyTailDuplicateBlock(
484 BlockFilterSet *BlockFilter,
489 BlockChain &Chain, BlockFilterSet *BlockFilter,
492 bool &DuplicatedToLPred);
493 bool hasBetterLayoutPredecessor(
497 const BlockFilterSet *BlockFilter);
498 BlockAndTailDupResult selectBestSuccessor(
500 const BlockFilterSet *BlockFilter);
504 getFirstUnplacedBlock(
const BlockChain &PlacedChain,
507 getFirstUnplacedBlock(
const BlockChain &PlacedChain,
509 const BlockFilterSet *BlockFilter);
518 const BlockFilterSet *BlockFilter);
521 BlockFilterSet *BlockFilter =
nullptr);
525 const BlockFilterSet &LoopBlockSet);
527 const BlockFilterSet &LoopBlockSet);
531 const BlockFilterSet &LoopBlockSet);
533 const MachineLoop &L,
const BlockFilterSet &LoopBlockSet);
535 const MachineLoop &L,
const BlockFilterSet &LoopBlockSet);
537 const MachineLoop &L,
const BlockFilterSet &LoopBlockSet,
539 BlockFilterSet collectLoopBlockSet(
const MachineLoop &L);
544 void rotateLoopWithProfile(
546 const BlockFilterSet &LoopBlockSet);
547 void buildCFGChains();
548 void optimizeBranches();
555 bool isProfitableToTailDup(
558 const BlockChain &Chain,
const BlockFilterSet *BlockFilter);
563 const BlockChain &Chain,
const BlockFilterSet *BlockFilter);
566 BlockAndTailDupResult getBestTrellisSuccessor(
570 const BlockFilterSet *BlockFilter);
573 static std::pair<WeightedEdge, WeightedEdge> getBestNonConflictingEdges(
579 bool canTailDuplicateUnplacedPreds(
581 const BlockChain &Chain,
const BlockFilterSet *BlockFilter);
585 void precomputeTriangleChains();
591 void assignBlockOrder(
const std::vector<const MachineBasicBlock *> &NewOrder);
594 void createCFGChainExtTsp();
605 bool allowTailDupPlacement()
const {
624char MachineBlockPlacement::ID = 0;
629 "Branch Probability Basic Block Placement",
false,
false)
658void MachineBlockPlacement::markChainSuccessors(
660 const BlockFilterSet *BlockFilter) {
664 markBlockSuccessors(Chain,
MBB, LoopHeaderBB, BlockFilter);
674void MachineBlockPlacement::markBlockSuccessors(
682 if (BlockFilter && !BlockFilter->count(Succ))
684 BlockChain &SuccChain = *BlockToChain[Succ];
686 if (&Chain == &SuccChain || Succ == LoopHeaderBB)
691 if (SuccChain.UnscheduledPredecessors == 0 ||
692 --SuccChain.UnscheduledPredecessors > 0)
695 auto *NewBB = *SuccChain.
begin();
696 if (NewBB->isEHPad())
709 const BlockFilterSet *BlockFilter,
729 bool SkipSucc =
false;
730 if (Succ->isEHPad() || (BlockFilter && !BlockFilter->count(Succ))) {
733 BlockChain *SuccChain = BlockToChain[Succ];
734 if (SuccChain == &Chain) {
736 }
else if (Succ != *SuccChain->begin()) {
738 <<
" -> Mid chain!\n");
748 return AdjustedSumProb;
759 if (SuccProbN >= SuccProbD)
774 if (Successors.
count(&BB))
777 if (!Successors.
count(Succ))
806 return (Gain / ThresholdProb) >= EntryFreq;
814bool MachineBlockPlacement::isProfitableToTailDup(
817 const BlockChain &Chain,
const BlockFilterSet *BlockFilter) {
844 auto AdjustedSuccSumProb =
845 collectViableSuccessors(Succ, Chain, BlockFilter, SuccSuccs);
847 auto BBFreq = MBFI->getBlockFreq(BB);
848 auto SuccFreq = MBFI->getBlockFreq(Succ);
854 if (SuccSuccs.
size() == 0)
861 if (Prob > BestSuccSucc)
873 if (SuccPred == Succ || SuccPred == BB
874 || BlockToChain[SuccPred] == &Chain
875 || (BlockFilter && !BlockFilter->count(SuccPred)))
877 auto Freq = MBFI->getBlockFreq(SuccPred)
879 if (Freq > SuccBestPred)
947 if (UProb > AdjustedSuccSumProb / 2 &&
948 !hasBetterLayoutPredecessor(Succ, PDom, *BlockToChain[PDom], UProb, UProb,
952 (
P + V), (Qout + std::max(Qin,
F) * VProb + std::min(Qin,
F) * UProb),
956 (Qout + std::min(Qin,
F) * AdjustedSuccSumProb +
957 std::max(Qin,
F) * UProb),
968bool MachineBlockPlacement::isTrellis(
971 const BlockChain &Chain,
const BlockFilterSet *BlockFilter) {
986 if (Successors.count(SuccPred)) {
989 if (!Successors.count(CheckSucc))
993 const BlockChain *PredChain = BlockToChain[SuccPred];
994 if (SuccPred == BB || (BlockFilter && !BlockFilter->count(SuccPred)) ||
995 PredChain == &Chain || PredChain == BlockToChain[Succ])
999 if (!SeenPreds.
insert(SuccPred).second)
1017std::pair<MachineBlockPlacement::WeightedEdge,
1018 MachineBlockPlacement::WeightedEdge>
1019MachineBlockPlacement::getBestNonConflictingEdges(
1030 auto Cmp = [](WeightedEdge
A, WeightedEdge
B) {
return A.Weight >
B.Weight; };
1034 auto BestA = Edges[0].begin();
1035 auto BestB = Edges[1].begin();
1038 if (BestA->Src == BestB->Src) {
1040 auto SecondBestA = std::next(BestA);
1041 auto SecondBestB = std::next(BestB);
1044 if (BestAScore < BestBScore)
1045 BestA = SecondBestA;
1047 BestB = SecondBestB;
1050 if (BestB->Src == BB)
1052 return std::make_pair(*BestA, *BestB);
1062MachineBlockPlacement::BlockAndTailDupResult
1063MachineBlockPlacement::getBestTrellisSuccessor(
1067 const BlockFilterSet *BlockFilter) {
1069 BlockAndTailDupResult
Result = {
nullptr,
false};
1076 if (Successors.
size() != 2 || ViableSuccs.
size() != 2)
1082 for (
auto *Succ : ViableSuccs) {
1086 if ((BlockFilter && !BlockFilter->count(SuccPred)) ||
1087 BlockToChain[SuccPred] == &Chain ||
1088 BlockToChain[SuccPred] == BlockToChain[Succ])
1092 Edges[SuccIndex].
push_back({EdgeFreq, SuccPred, Succ});
1098 WeightedEdge BestA, BestB;
1099 std::tie(BestA, BestB) = getBestNonConflictingEdges(BB, Edges);
1101 if (BestA.Src != BB) {
1105 LLVM_DEBUG(
dbgs() <<
"Trellis, but not one of the chosen edges.\n");
1112 if (BestA.Dest == BestB.Src) {
1118 if (allowTailDupPlacement() && shouldTailDuplicate(Succ2) &&
1119 canTailDuplicateUnplacedPreds(BB, Succ2, Chain, BlockFilter) &&
1121 Chain, BlockFilter)) {
1125 <<
", probability: " << Succ2Prob
1126 <<
" (Tail Duplicate)\n");
1128 Result.ShouldTailDup =
true;
1134 ComputedEdges[BestB.Src] = { BestB.Dest,
false };
1136 auto TrellisSucc = BestA.Dest;
1140 <<
", probability: " << SuccProb <<
" (Trellis)\n");
1148bool MachineBlockPlacement::canTailDuplicateUnplacedPreds(
1150 const BlockChain &Chain,
const BlockFilterSet *BlockFilter) {
1151 if (!shouldTailDuplicate(Succ))
1155 bool Duplicate =
true;
1157 unsigned int NumDup = 0;
1166 if (Pred == BB || (BlockFilter && !BlockFilter->count(Pred))
1167 || (BlockToChain[Pred] == &Chain && !Succ->
succ_empty()))
1217 if (
F->getFunction().hasProfileData())
1248 if ((NumDup > Succ->
succ_size()) || !Duplicate)
1271void MachineBlockPlacement::precomputeTriangleChains() {
1272 struct TriangleChain {
1273 std::vector<MachineBasicBlock *> Edges;
1276 : Edges({src, dst}) {}
1279 assert(getKey()->isSuccessor(dst) &&
1280 "Attempting to append a block that is not a successor.");
1281 Edges.push_back(dst);
1284 unsigned count()
const {
return Edges.size() - 1; }
1287 return Edges.
back();
1311 if (PDom ==
nullptr)
1318 if (!shouldTailDuplicate(PDom))
1320 bool CanTailDuplicate =
true;
1327 CanTailDuplicate =
false;
1333 if (!CanTailDuplicate)
1340 auto Found = TriangleChainMap.
find(&BB);
1343 if (Found != TriangleChainMap.
end()) {
1344 TriangleChain Chain = std::move(Found->second);
1345 TriangleChainMap.
erase(Found);
1347 TriangleChainMap.
insert(std::make_pair(Chain.getKey(), std::move(Chain)));
1349 auto InsertResult = TriangleChainMap.
try_emplace(PDom, &BB, PDom);
1350 assert(InsertResult.second &&
"Block seen twice.");
1358 for (
auto &ChainPair : TriangleChainMap) {
1359 TriangleChain &Chain = ChainPair.second;
1366 Chain.Edges.pop_back();
1370 <<
" as pre-computed based on triangles.\n");
1372 auto InsertResult = ComputedEdges.
insert({src, {dst,
true}});
1373 assert(InsertResult.second &&
"Block seen twice.");
1415bool MachineBlockPlacement::hasBetterLayoutPredecessor(
1419 const BlockFilterSet *BlockFilter) {
1422 if (SuccChain.UnscheduledPredecessors == 0)
1542 BlockFrequency CandidateEdgeFreq = MBFI->getBlockFreq(BB) * RealSuccProb;
1543 bool BadCFGConflict =
false;
1546 BlockChain *PredChain = BlockToChain[Pred];
1547 if (Pred == Succ || PredChain == &SuccChain ||
1548 (BlockFilter && !BlockFilter->count(Pred)) ||
1549 PredChain == &Chain || Pred != *std::prev(PredChain->end()) ||
1570 if (PredEdgeFreq * HotProb >= CandidateEdgeFreq * HotProb.
getCompl()) {
1571 BadCFGConflict =
true;
1576 if (BadCFGConflict) {
1578 << SuccProb <<
" (prob) (non-cold CFG conflict)\n");
1595MachineBlockPlacement::BlockAndTailDupResult
1596MachineBlockPlacement::selectBestSuccessor(
1598 const BlockFilterSet *BlockFilter) {
1601 BlockAndTailDupResult BestSucc = {
nullptr,
false };
1605 auto AdjustedSumProb =
1606 collectViableSuccessors(BB, Chain, BlockFilter, Successors);
1613 auto FoundEdge = ComputedEdges.
find(BB);
1614 if (FoundEdge != ComputedEdges.
end()) {
1616 ComputedEdges.
erase(FoundEdge);
1617 BlockChain *SuccChain = BlockToChain[Succ];
1618 if (BB->
isSuccessor(Succ) && (!BlockFilter || BlockFilter->count(Succ)) &&
1619 SuccChain != &Chain && Succ == *SuccChain->begin())
1620 return FoundEdge->second;
1625 if (isTrellis(BB, Successors, Chain, BlockFilter))
1626 return getBestTrellisSuccessor(BB, Successors, AdjustedSumProb, Chain,
1639 BlockChain &SuccChain = *BlockToChain[Succ];
1642 if (hasBetterLayoutPredecessor(BB, Succ, SuccChain, SuccProb, RealSuccProb,
1643 Chain, BlockFilter)) {
1645 if (allowTailDupPlacement() && shouldTailDuplicate(Succ))
1652 <<
", probability: " << SuccProb
1653 << (SuccChain.UnscheduledPredecessors != 0 ?
" (CFG break)" :
"")
1656 if (BestSucc.BB && BestProb >= SuccProb) {
1663 BestProb = SuccProb;
1671 [](std::tuple<BranchProbability, MachineBasicBlock *> L,
1672 std::tuple<BranchProbability, MachineBasicBlock *> R) {
1673 return std::get<0>(L) > std::get<0>(R);
1675 for (
auto &Tup : DupCandidates) {
1678 std::tie(DupProb, Succ) = Tup;
1679 if (DupProb < BestProb)
1681 if (canTailDuplicateUnplacedPreds(BB, Succ, Chain, BlockFilter)
1682 && (isProfitableToTailDup(BB, Succ, BestProb, Chain, BlockFilter))) {
1684 <<
", probability: " << DupProb
1685 <<
" (Tail Duplicate)\n");
1687 BestSucc.ShouldTailDup =
true;
1715 return BlockToChain.
lookup(BB) == &Chain;
1718 if (WorkList.
empty())
1721 bool IsEHPad = WorkList[0]->isEHPad();
1727 "EHPad mismatch between block and work list.");
1729 BlockChain &SuccChain = *BlockToChain[
MBB];
1730 if (&SuccChain == &Chain)
1733 assert(SuccChain.UnscheduledPredecessors == 0 &&
1734 "Found CFG-violating block");
1759 if (BestBlock && (IsEHPad ^ (BestFreq >= CandidateFreq)))
1763 BestFreq = CandidateFreq;
1777 const BlockChain &PlacedChain,
1782 if (BlockToChain[&*
I] != &PlacedChain) {
1783 PrevUnplacedBlockIt =
I;
1787 return *BlockToChain[&*
I]->
begin();
1804 const BlockChain &PlacedChain,
1805 BlockFilterSet::iterator &PrevUnplacedBlockInFilterIt,
1806 const BlockFilterSet *BlockFilter) {
1808 for (; PrevUnplacedBlockInFilterIt != BlockFilter->end();
1809 ++PrevUnplacedBlockInFilterIt) {
1810 BlockChain *
C = BlockToChain[*PrevUnplacedBlockInFilterIt];
1811 if (
C != &PlacedChain) {
1818void MachineBlockPlacement::fillWorkLists(
1821 const BlockFilterSet *BlockFilter =
nullptr) {
1822 BlockChain &Chain = *BlockToChain[
MBB];
1823 if (!UpdatedPreds.
insert(&Chain).second)
1827 Chain.UnscheduledPredecessors == 0 &&
1828 "Attempting to place block with unscheduled predecessors in worklist.");
1830 assert(BlockToChain[ChainBB] == &Chain &&
1831 "Block in chain doesn't match BlockToChain map.");
1833 if (BlockFilter && !BlockFilter->count(Pred))
1835 if (BlockToChain[Pred] == &Chain)
1837 ++Chain.UnscheduledPredecessors;
1841 if (Chain.UnscheduledPredecessors != 0)
1851void MachineBlockPlacement::buildChain(
1853 BlockFilterSet *BlockFilter) {
1854 assert(HeadBB &&
"BB must not be null.\n");
1855 assert(BlockToChain[HeadBB] == &Chain &&
"BlockToChainMap mis-match.\n");
1857 BlockFilterSet::iterator PrevUnplacedBlockInFilterIt;
1859 PrevUnplacedBlockInFilterIt = BlockFilter->begin();
1862 markChainSuccessors(Chain, LoopHeaderBB, BlockFilter);
1865 assert(BB &&
"null block found at end of chain in loop.");
1866 assert(BlockToChain[BB] == &Chain &&
"BlockToChainMap mis-match in loop.");
1867 assert(*std::prev(Chain.end()) == BB &&
"BB Not found at end of chain.");
1872 auto Result = selectBestSuccessor(BB, Chain, BlockFilter);
1874 bool ShouldTailDup =
Result.ShouldTailDup;
1875 if (allowTailDupPlacement())
1876 ShouldTailDup |= (BestSucc && canTailDuplicateUnplacedPreds(BB, BestSucc,
1884 BestSucc = selectBestCandidateBlock(Chain, BlockWorkList);
1886 BestSucc = selectBestCandidateBlock(Chain, EHPadWorkList);
1890 BestSucc = getFirstUnplacedBlock(Chain, PrevUnplacedBlockInFilterIt,
1893 BestSucc = getFirstUnplacedBlock(Chain, PrevUnplacedBlockIt);
1897 LLVM_DEBUG(
dbgs() <<
"Unnatural loop CFG detected, forcibly merging the "
1898 "layout successor until the CFG reduces\n");
1903 if (allowTailDupPlacement() && BestSucc && ShouldTailDup) {
1904 repeatedlyTailDuplicateBlock(BestSucc, BB, LoopHeaderBB, Chain,
1905 BlockFilter, PrevUnplacedBlockIt,
1906 PrevUnplacedBlockInFilterIt);
1914 BlockChain &SuccChain = *BlockToChain[BestSucc];
1917 SuccChain.UnscheduledPredecessors = 0;
1920 markChainSuccessors(SuccChain, LoopHeaderBB, BlockFilter);
1921 Chain.merge(BestSucc, &SuccChain);
1922 BB = *std::prev(Chain.end());
1944MachineBlockPlacement::canMoveBottomBlockToTop(
1954 if (OtherBB == BottomBlock)
1956 if (OtherBB == OldTop)
1964MachineBlockPlacement::TopFallThroughFreq(
1966 const BlockFilterSet &LoopBlockSet) {
1969 BlockChain *PredChain = BlockToChain[Pred];
1970 if (!LoopBlockSet.count(Pred) &&
1971 (!PredChain || Pred == *std::prev(PredChain->end()))) {
1978 BlockChain *SuccChain = BlockToChain[Succ];
1981 if (!LoopBlockSet.count(Succ) && (SuccProb > TopProb) &&
1982 (!SuccChain || Succ == *SuccChain->begin())) {
1990 if (EdgeFreq > MaxFreq)
2020MachineBlockPlacement::FallThroughGains(
2024 const BlockFilterSet &LoopBlockSet) {
2025 BlockFrequency FallThrough2Top = TopFallThroughFreq(OldTop, LoopBlockSet);
2028 FallThrough2Exit = MBFI->getBlockFreq(NewTop) *
2037 if (!LoopBlockSet.count(Pred))
2039 BlockChain *PredChain = BlockToChain[Pred];
2040 if (!PredChain || Pred == *std::prev(PredChain->end())) {
2043 if (EdgeFreq > FallThroughFromPred) {
2044 FallThroughFromPred = EdgeFreq;
2055 if ((Succ == NewTop) || (Succ == BestPred) || !LoopBlockSet.count(Succ))
2059 BlockChain *SuccChain = BlockToChain[Succ];
2060 if ((SuccChain && (Succ != *SuccChain->begin())) ||
2061 (SuccChain == BlockToChain[BestPred]))
2065 if (EdgeFreq > NewFreq)
2070 if (NewFreq > OrigEdgeFreq) {
2082 FallThrough2Top + FallThrough2Exit + FallThroughFromPred;
2111MachineBlockPlacement::findBestLoopTopHelper(
2114 const BlockFilterSet &LoopBlockSet) {
2118 BlockChain &HeaderChain = *BlockToChain[OldTop];
2119 if (!LoopBlockSet.count(*HeaderChain.begin()))
2121 if (OldTop != *HeaderChain.begin())
2130 if (!LoopBlockSet.count(Pred))
2132 if (Pred ==
L.getHeader())
2143 if (OtherBB == OldTop)
2147 if (!canMoveBottomBlockToTop(Pred, OldTop))
2153 (Gains > BestGains ||
2168 (*BestPred->
pred_begin())->succ_size() == 1 &&
2181MachineBlockPlacement::findBestLoopTop(
const MachineLoop &L,
2182 const BlockFilterSet &LoopBlockSet) {
2190 bool OptForSize =
F->getFunction().hasOptSize() ||
2193 return L.getHeader();
2197 while (NewTop != OldTop) {
2199 NewTop = findBestLoopTopHelper(OldTop, L, LoopBlockSet);
2200 if (NewTop != OldTop)
2201 ComputedEdges[NewTop] = { OldTop,
false };
2212MachineBlockPlacement::findBestLoopExit(
const MachineLoop &L,
2213 const BlockFilterSet &LoopBlockSet,
2223 BlockChain &HeaderChain = *BlockToChain[
L.getHeader()];
2224 if (!LoopBlockSet.count(*HeaderChain.begin()))
2228 unsigned BestExitLoopDepth = 0;
2238 BlockChain &Chain = *BlockToChain[
MBB];
2241 if (
MBB != *std::prev(Chain.end()))
2250 bool HasLoopingSucc =
false;
2256 BlockChain &SuccChain = *BlockToChain[Succ];
2258 if (&Chain == &SuccChain) {
2265 if (LoopBlockSet.count(Succ)) {
2268 HasLoopingSucc =
true;
2272 unsigned SuccLoopDepth = 0;
2274 SuccLoopDepth = ExitLoop->getLoopDepth();
2275 if (ExitLoop->contains(&L))
2282 <<
getBlockName(Succ) <<
" [L:" << SuccLoopDepth <<
"] ("
2289 if (!ExitingBB || SuccLoopDepth > BestExitLoopDepth ||
2290 ExitEdgeFreq > BestExitEdgeFreq ||
2292 !(ExitEdgeFreq < BestExitEdgeFreq * Bias))) {
2293 BestExitEdgeFreq = ExitEdgeFreq;
2298 if (!HasLoopingSucc) {
2300 ExitingBB = OldExitingBB;
2301 BestExitEdgeFreq = OldBestExitEdgeFreq;
2308 dbgs() <<
" No other candidate exit blocks, using loop header\n");
2311 if (
L.getNumBlocks() == 1) {
2312 LLVM_DEBUG(
dbgs() <<
" Loop has 1 block, using loop header as exit\n");
2319 if (!BlocksExitingToOuterLoop.
empty() &&
2320 !BlocksExitingToOuterLoop.
count(ExitingBB))
2325 ExitFreq = BestExitEdgeFreq;
2334MachineBlockPlacement::hasViableTopFallthrough(
2336 const BlockFilterSet &LoopBlockSet) {
2338 BlockChain *PredChain = BlockToChain[Pred];
2339 if (!LoopBlockSet.count(Pred) &&
2340 (!PredChain || Pred == *std::prev(PredChain->end()))) {
2347 BlockChain *SuccChain = BlockToChain[Succ];
2350 if ((!SuccChain || Succ == *SuccChain->begin()) && SuccProb > TopProb) {
2368void MachineBlockPlacement::rotateLoop(BlockChain &LoopChain,
2371 const BlockFilterSet &LoopBlockSet) {
2379 if (Bottom == ExitingBB)
2386 bool ViableTopFallthrough = hasViableTopFallthrough(Top, LoopBlockSet);
2391 if (ViableTopFallthrough) {
2393 BlockChain *SuccChain = BlockToChain[Succ];
2394 if (!LoopBlockSet.count(Succ) &&
2395 (!SuccChain || Succ == *SuccChain->begin()))
2401 BlockFrequency FallThrough2Top = TopFallThroughFreq(Top, LoopBlockSet);
2402 if (FallThrough2Top >= ExitFreq)
2406 BlockChain::iterator ExitIt =
llvm::find(LoopChain, ExitingBB);
2407 if (ExitIt == LoopChain.end())
2429 if (ViableTopFallthrough) {
2430 assert(std::next(ExitIt) != LoopChain.end() &&
2431 "Exit should not be last BB");
2440 std::rotate(LoopChain.begin(), std::next(ExitIt), LoopChain.end());
2456void MachineBlockPlacement::rotateLoopWithProfile(
2458 const BlockFilterSet &LoopBlockSet) {
2459 auto RotationPos = LoopChain.end();
2484 BlockChain *PredChain = BlockToChain[Pred];
2485 if (!LoopBlockSet.count(Pred) &&
2486 (!PredChain || Pred == *std::prev(PredChain->end()))) {
2487 auto EdgeFreq = MBFI->getBlockFreq(Pred) *
2489 auto FallThruCost = ScaleBlockFrequency(EdgeFreq,
MisfetchCost);
2493 FallThruCost += ScaleBlockFrequency(EdgeFreq,
JumpInstCost);
2494 HeaderFallThroughCost = std::max(HeaderFallThroughCost, FallThruCost);
2503 for (
auto *BB : LoopChain) {
2506 BlockChain *SuccChain = BlockToChain[Succ];
2507 if (!LoopBlockSet.count(Succ) &&
2508 (!SuccChain || Succ == *SuccChain->begin())) {
2510 LargestExitEdgeProb = std::max(LargestExitEdgeProb, SuccProb);
2514 auto ExitFreq = MBFI->getBlockFreq(BB) * LargestExitEdgeProb;
2522 for (
auto Iter = LoopChain.begin(), TailIter = std::prev(LoopChain.end()),
2523 EndIter = LoopChain.end();
2524 Iter != EndIter; Iter++, TailIter++) {
2527 if (TailIter == LoopChain.end())
2528 TailIter = LoopChain.begin();
2530 auto TailBB = *TailIter;
2538 if (Iter != LoopChain.begin())
2539 Cost += HeaderFallThroughCost;
2543 for (
auto &ExitWithFreq : ExitsWithFreq)
2544 if (TailBB != ExitWithFreq.first)
2545 Cost += ExitWithFreq.second;
2561 if (TailBB->isSuccessor(*Iter)) {
2562 auto TailBBFreq = MBFI->getBlockFreq(TailBB);
2563 if (TailBB->succ_size() == 1)
2565 else if (TailBB->succ_size() == 2) {
2567 auto TailToHeadFreq = TailBBFreq * TailToHeadProb;
2569 ? TailBBFreq * TailToHeadProb.
getCompl()
2580 if (
Cost < SmallestRotationCost) {
2581 SmallestRotationCost =
Cost;
2586 if (RotationPos != LoopChain.end()) {
2588 <<
" to the top\n");
2589 std::rotate(LoopChain.begin(), RotationPos, LoopChain.end());
2598MachineBlockPlacement::collectLoopBlockSet(
const MachineLoop &L) {
2599 BlockFilterSet LoopBlockSet;
2612 for (
auto *LoopPred :
L.getHeader()->predecessors())
2613 if (!
L.contains(LoopPred))
2614 LoopFreq += MBFI->getBlockFreq(LoopPred) *
2618 if (LoopBlockSet.count(LoopBB))
2623 BlockChain *Chain = BlockToChain[LoopBB];
2625 LoopBlockSet.
insert(ChainBB);
2628 LoopBlockSet.insert(
L.block_begin(),
L.block_end());
2630 return LoopBlockSet;
2639void MachineBlockPlacement::buildLoopChains(
const MachineLoop &L) {
2643 buildLoopChains(*InnerLoop);
2646 "BlockWorkList not empty when starting to build loop chains.");
2648 "EHPadWorkList not empty when starting to build loop chains.");
2649 BlockFilterSet LoopBlockSet = collectLoopBlockSet(L);
2654 bool RotateLoopWithProfile =
2670 PreferredLoopExit =
nullptr;
2672 if (!RotateLoopWithProfile && LoopTop ==
L.getHeader())
2673 PreferredLoopExit = findBestLoopExit(L, LoopBlockSet, ExitFreq);
2675 BlockChain &LoopChain = *BlockToChain[LoopTop];
2681 assert(LoopChain.UnscheduledPredecessors == 0 &&
2682 "LoopChain should not have unscheduled predecessors.");
2683 UpdatedPreds.
insert(&LoopChain);
2686 fillWorkLists(LoopBB, UpdatedPreds, &LoopBlockSet);
2688 buildChain(LoopTop, LoopChain, &LoopBlockSet);
2690 if (RotateLoopWithProfile)
2691 rotateLoopWithProfile(LoopChain, L, LoopBlockSet);
2693 rotateLoop(LoopChain, PreferredLoopExit, ExitFreq, LoopBlockSet);
2697 bool BadLoop =
false;
2698 if (LoopChain.UnscheduledPredecessors) {
2700 dbgs() <<
"Loop chain contains a block without its preds placed!\n"
2701 <<
" Loop header: " << getBlockName(*L.block_begin()) <<
"\n"
2702 <<
" Chain header: " << getBlockName(*LoopChain.begin()) <<
"\n";
2706 if (!LoopBlockSet.remove(ChainBB)) {
2710 dbgs() <<
"Loop chain contains a block not contained by the loop!\n"
2711 <<
" Loop header: " <<
getBlockName(*
L.block_begin()) <<
"\n"
2712 <<
" Chain header: " <<
getBlockName(*LoopChain.begin()) <<
"\n"
2717 if (!LoopBlockSet.empty()) {
2720 dbgs() <<
"Loop contains blocks never placed into a chain!\n"
2721 <<
" Loop header: " <<
getBlockName(*
L.block_begin()) <<
"\n"
2722 <<
" Chain header: " <<
getBlockName(*LoopChain.begin()) <<
"\n"
2725 assert(!BadLoop &&
"Detected problems with the placement of this loop.");
2728 BlockWorkList.
clear();
2729 EHPadWorkList.
clear();
2732void MachineBlockPlacement::buildCFGChains() {
2740 new (ChainAllocator.
Allocate()) BlockChain(BlockToChain, BB);
2753 assert(NextFI != FE &&
"Can't fallthrough past the last block.");
2754 LLVM_DEBUG(
dbgs() <<
"Pre-merging due to unanalyzable fallthrough: "
2757 Chain->merge(NextBB,
nullptr);
2759 BlocksWithUnanalyzableExits.
insert(&*BB);
2767 PreferredLoopExit =
nullptr;
2769 buildLoopChains(*L);
2772 "BlockWorkList should be empty before building final chain.");
2774 "EHPadWorkList should be empty before building final chain.");
2778 fillWorkLists(&
MBB, UpdatedPreds);
2780 BlockChain &FunctionChain = *BlockToChain[&
F->front()];
2781 buildChain(&
F->front(), FunctionChain);
2788 bool BadFunc =
false;
2789 FunctionBlockSetType FunctionBlockSet;
2791 FunctionBlockSet.insert(&
MBB);
2794 if (!FunctionBlockSet.erase(ChainBB)) {
2796 dbgs() <<
"Function chain contains a block not in the function!\n"
2797 <<
" Bad block: " << getBlockName(ChainBB) <<
"\n";
2800 if (!FunctionBlockSet.empty()) {
2802 for (MachineBasicBlock *RemainingBB : FunctionBlockSet)
2803 dbgs() <<
"Function contains blocks never placed into a chain!\n"
2804 <<
" Bad block: " << getBlockName(RemainingBB) <<
"\n";
2806 assert(!BadFunc &&
"Detected problems with the block placement.");
2812 F->getNumBlockIDs());
2815 for (
auto &
MBB : *
F) {
2816 if (LastMBB !=
nullptr)
2820 OriginalLayoutSuccessors[
F->back().getNumber()] =
nullptr;
2827 LLVM_DEBUG(
dbgs() << (ChainBB == *FunctionChain.begin() ?
"Placing chain "
2831 F->splice(InsertPos, ChainBB);
2836 if (ChainBB == *FunctionChain.begin())
2847 if (!BlocksWithUnanalyzableExits.
count(PrevBB)) {
2853 "Unexpected block with un-analyzable fallthrough!");
2855 TBB = FBB =
nullptr;
2893 BlockWorkList.
clear();
2894 EHPadWorkList.
clear();
2897void MachineBlockPlacement::optimizeBranches() {
2898 BlockChain &FunctionChain = *BlockToChain[&
F->front()];
2913 if (
TBB && !
Cond.empty() && FBB &&
2930void MachineBlockPlacement::alignBlocks() {
2936 if (
F->getFunction().hasMinSize() ||
2939 BlockChain &FunctionChain = *BlockToChain[&
F->front()];
2940 if (FunctionChain.begin() == FunctionChain.end())
2947 if (ChainBB == *FunctionChain.begin())
2959 unsigned MDAlign = 1;
2960 MDNode *LoopID =
L->getLoopID();
2963 MDNode *MD = dyn_cast<MDNode>(MDO);
2969 if (S->
getString() ==
"llvm.loop.align") {
2971 "per-loop align metadata should have two operands.");
2973 mdconst::extract<ConstantInt>(MD->
getOperand(1))->getZExtValue();
2974 assert(MDAlign >= 1 &&
"per-loop align value must be positive.");
2980 const Align LoopAlign = std::max(TLIAlign,
Align(MDAlign));
2987 if (Freq < WeightedEntryFreq)
2994 if (Freq < (LoopHeaderFreq * ColdProb))
3007 auto DetermineMaxAlignmentPadding = [&]() {
3014 ChainBB->setMaxBytesForAlignment(MaxBytes);
3020 ChainBB->setAlignment(LoopAlign);
3021 DetermineMaxAlignmentPadding();
3031 BlockFrequency LayoutEdgeFreq = MBFI->getBlockFreq(LayoutPred) * LayoutProb;
3032 if (LayoutEdgeFreq <= (Freq * ColdProb)) {
3033 ChainBB->setAlignment(LoopAlign);
3034 DetermineMaxAlignmentPadding();
3054bool MachineBlockPlacement::repeatedlyTailDuplicateBlock(
3058 BlockFilterSet::iterator &PrevUnplacedBlockInFilterIt) {
3059 bool Removed, DuplicatedToLPred;
3060 bool DuplicatedToOriginalLPred;
3061 Removed = maybeTailDuplicateBlock(
3062 BB, LPred, Chain, BlockFilter, PrevUnplacedBlockIt,
3063 PrevUnplacedBlockInFilterIt, DuplicatedToLPred);
3066 DuplicatedToOriginalLPred = DuplicatedToLPred;
3071 while (DuplicatedToLPred && Removed) {
3078 BlockChain::iterator ChainEnd = Chain.
end();
3079 DupBB = *(--ChainEnd);
3081 if (ChainEnd == Chain.begin())
3083 DupPred = *std::prev(ChainEnd);
3084 Removed = maybeTailDuplicateBlock(
3085 DupBB, DupPred, Chain, BlockFilter, PrevUnplacedBlockIt,
3086 PrevUnplacedBlockInFilterIt, DuplicatedToLPred);
3093 LPred = *std::prev(Chain.end());
3094 if (DuplicatedToOriginalLPred)
3095 markBlockSuccessors(Chain, LPred, LoopHeaderBB, BlockFilter);
3112bool MachineBlockPlacement::maybeTailDuplicateBlock(
3115 BlockFilterSet::iterator &PrevUnplacedBlockInFilterIt,
3116 bool &DuplicatedToLPred) {
3117 DuplicatedToLPred =
false;
3118 if (!shouldTailDuplicate(BB))
3126 bool Removed =
false;
3127 auto RemovalCallback =
3133 bool InWorkList =
true;
3135 if (BlockToChain.
count(RemBB)) {
3136 BlockChain *Chain = BlockToChain[RemBB];
3137 InWorkList = Chain->UnscheduledPredecessors == 0;
3138 Chain->remove(RemBB);
3139 BlockToChain.
erase(RemBB);
3143 if (&(*PrevUnplacedBlockIt) == RemBB) {
3144 PrevUnplacedBlockIt++;
3150 if (RemBB->isEHPad())
3151 RemoveList = EHPadWorkList;
3160 if (It != BlockFilter->end()) {
3161 if (It < PrevUnplacedBlockInFilterIt) {
3165 auto Distance = PrevUnplacedBlockInFilterIt - It - 1;
3166 PrevUnplacedBlockInFilterIt = BlockFilter->
erase(It) + Distance;
3167 assert(*PrevUnplacedBlockInFilterIt == PrevBB);
3169 }
else if (It == PrevUnplacedBlockInFilterIt)
3172 PrevUnplacedBlockInFilterIt = BlockFilter->erase(It);
3174 BlockFilter->erase(It);
3179 MLI->removeBlock(RemBB);
3180 if (RemBB == PreferredLoopExit)
3181 PreferredLoopExit =
nullptr;
3186 auto RemovalCallbackRef =
3193 if (
F->getFunction().hasProfileData()) {
3195 findDuplicateCandidates(CandidatePreds, BB, BlockFilter);
3196 if (CandidatePreds.
size() == 0)
3199 CandidatePtr = &CandidatePreds;
3202 &RemovalCallbackRef, CandidatePtr);
3205 DuplicatedToLPred =
false;
3208 BlockChain* PredChain = BlockToChain[Pred];
3210 DuplicatedToLPred =
true;
3211 if (Pred == LPred || (BlockFilter && !BlockFilter->count(Pred))
3212 || PredChain == &Chain)
3215 if (BlockFilter && !BlockFilter->count(NewSucc))
3217 BlockChain *NewChain = BlockToChain[NewSucc];
3218 if (NewChain != &Chain && NewChain != PredChain)
3219 NewChain->UnscheduledPredecessors++;
3229 if (!
MI.isPHI() && !
MI.isMetaInstruction())
3245 BlockFilterSet *BlockFilter) {
3248 if (BlockFilter && !BlockFilter->count(Pred))
3250 BlockChain *PredChain = BlockToChain[Pred];
3251 if (PredChain && (Pred != *std::prev(PredChain->end())))
3258 if (BlockFilter && !BlockFilter->count(Succ))
3260 BlockChain *SuccChain = BlockToChain[Succ];
3261 if (SuccChain && (Succ != *SuccChain->begin()))
3264 if (SuccProb > BestProb)
3265 BestProb = SuccProb;
3269 if (BBProb <= BestProb)
3276 return Gain > scaleThreshold(BB);
3281void MachineBlockPlacement::findDuplicateCandidates(
3284 BlockFilterSet *BlockFilter) {
3296 return MBFI->getBlockFreq(
A) > MBFI->getBlockFreq(
B);
3301 auto SuccIt = Succs.begin();
3302 if (SuccIt != Succs.end()) {
3353 if (!Fallthrough && isBestSuccessor(BB, Pred, BlockFilter)) {
3355 if (SuccIt != Succs.end())
3361 BlockFrequency OrigCost = PredFreq + PredFreq * DefaultBranchProb;
3363 if (SuccIt == Succs.end()) {
3365 if (Succs.size() > 0)
3366 DupCost += PredFreq;
3369 DupCost += PredFreq;
3373 assert(OrigCost >= DupCost);
3374 OrigCost -= DupCost;
3375 if (OrigCost > BBDupThreshold) {
3377 if (SuccIt != Succs.end())
3385 if ((Candidates.
size() < Preds.size()) && (Candidates.
size() > 0)) {
3386 Candidates[0] = Candidates.
back();
3392void MachineBlockPlacement::initDupThreshold() {
3394 if (!
F->getFunction().hasProfileData())
3400 UseProfileCount =
true;
3416 UseProfileCount =
false;
3419bool MachineBlockPlacement::runOnMachineFunction(
MachineFunction &MF) {
3424 if (std::next(MF.
begin()) == MF.
end())
3428 MBPI = &getAnalysis<MachineBranchProbabilityInfoWrapperPass>().getMBPI();
3429 MBFI = std::make_unique<MBFIWrapper>(
3430 getAnalysis<MachineBlockFrequencyInfo>());
3431 MLI = &getAnalysis<MachineLoopInfo>();
3435 PSI = &getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI();
3441 PreferredLoopExit =
nullptr;
3444 "BlockToChain map should be empty before starting placement.");
3446 "Computed Edge map should be empty before starting placement.");
3457 if (PassConfig->
getOptLevel() >= CodeGenOptLevel::Aggressive) {
3469 (PassConfig->
getOptLevel() < CodeGenOptLevel::Aggressive ||
3471 TailDupSize =
TII->getTailDuplicateSize(PassConfig->
getOptLevel());
3473 if (allowTailDupPlacement()) {
3474 MPDT = &getAnalysis<MachinePostDominatorTreeWrapperPass>().getPostDomTree();
3479 bool PreRegAlloc =
false;
3480 TailDup.
initMF(MF, PreRegAlloc, MBPI, MBFI.get(), PSI,
3482 precomputeTriangleChains();
3494 if (MF.
size() > 3 && EnableTailMerge) {
3502 BlockToChain.
clear();
3503 ComputedEdges.
clear();
3519 createCFGChainExtTsp();
3525 BlockToChain.
clear();
3526 ComputedEdges.
clear();
3529 bool HasMaxBytesOverride =
3535 if (HasMaxBytesOverride)
3544 for (
auto MBI = std::next(MF.begin()), MBE = MF.end(); MBI != MBE; ++MBI) {
3545 auto LayoutPred = std::prev(MBI);
3547 if (HasMaxBytesOverride)
3559 MF.RenumberBlocks();
3560 MBFI->view(
"MBP." + MF.getName(),
false);
3568void MachineBlockPlacement::applyExtTsp() {
3572 std::vector<const MachineBasicBlock *> CurrentBlockOrder;
3573 CurrentBlockOrder.reserve(
F->size());
3574 size_t NumBlocks = 0;
3576 BlockIndex[&
MBB] = NumBlocks++;
3577 CurrentBlockOrder.push_back(&
MBB);
3580 auto BlockSizes = std::vector<uint64_t>(
F->size());
3581 auto BlockCounts = std::vector<uint64_t>(
F->size());
3582 std::vector<codelayout::EdgeCount> JumpCounts;
3595 int NumInsts = std::distance(NonDbgInsts.begin(), NonDbgInsts.end());
3596 BlockSizes[BlockIndex[&
MBB]] = 4 * NumInsts;
3601 JumpCounts.push_back(
3606 LLVM_DEBUG(
dbgs() <<
"Applying ext-tsp layout for |V| = " <<
F->size()
3607 <<
" with profile = " <<
F->getFunction().hasProfileData()
3608 <<
" (" <<
F->getName().str() <<
")"
3611 dbgs() <<
format(
" original layout score: %0.2f\n",
3616 std::vector<const MachineBasicBlock *> NewBlockOrder;
3617 NewBlockOrder.reserve(
F->size());
3619 NewBlockOrder.push_back(CurrentBlockOrder[
Node]);
3626 assignBlockOrder(NewBlockOrder);
3629void MachineBlockPlacement::assignBlockOrder(
3630 const std::vector<const MachineBasicBlock *> &NewBlockOrder) {
3631 assert(
F->size() == NewBlockOrder.size() &&
"Incorrect size of block order");
3632 F->RenumberBlocks();
3634 bool HasChanges =
false;
3635 for (
size_t I = 0;
I < NewBlockOrder.size();
I++) {
3636 if (NewBlockOrder[
I] !=
F->getBlockNumbered(
I)) {
3646 for (
auto &
MBB : *
F) {
3653 NewIndex[
MBB] = NewIndex.
size();
3656 return NewIndex[&
L] < NewIndex[&
R];
3663 for (
auto &
MBB : *
F) {
3670 if (FTMBB && (NextMBB == EndIt || &*NextMBB != FTMBB)) {
3684 F->verify(
this,
"After optimized block reordering");
3688void MachineBlockPlacement::createCFGChainExtTsp() {
3689 BlockToChain.
clear();
3690 ComputedEdges.
clear();
3694 BlockChain *FunctionChain =
3695 new (ChainAllocator.
Allocate()) BlockChain(BlockToChain, HeadBB);
3700 FunctionChain->merge(&
MBB,
nullptr);
3738char MachineBlockPlacementStats::ID = 0;
3743 "Basic Block Placement Stats",
false,
false)
3751 if (std::next(
F.begin()) ==
F.end())
3757 MBPI = &getAnalysis<MachineBranchProbabilityInfoWrapperPass>().getMBPI();
3758 MBFI = &getAnalysis<MachineBlockFrequencyInfo>();
3763 (
MBB.
succ_size() > 1) ? NumCondBranches : NumUncondBranches;
3765 (
MBB.
succ_size() > 1) ? CondBranchTakenFreq : UncondBranchTakenFreq;
for(const MachineOperand &MO :llvm::drop_begin(OldMI.operands(), Desc.getNumOperands()))
This file defines the BumpPtrAllocator interface.
static cl::opt< unsigned > TailMergeSize("tail-merge-size", cl::desc("Min number of instructions to consider tail merging"), cl::init(3), cl::Hidden)
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
Declares methods and data structures for code layout algorithms.
#define LLVM_DUMP_METHOD
Mark debug helper function definitions like dump() that should not be stripped from debug builds.
static unsigned InstrCount
This file defines the DenseMap class.
DenseMap< Block *, BlockRelaxAux > Blocks
const HexagonInstrInfo * TII
static LoopDeletionResult merge(LoopDeletionResult A, LoopDeletionResult B)
block placement Basic Block Placement Stats
static BranchProbability getAdjustedProbability(BranchProbability OrigProb, BranchProbability AdjustedSumProb)
The helper function returns the branch probability that is adjusted or normalized over the new total ...
static cl::opt< bool > PreciseRotationCost("precise-rotation-cost", cl::desc("Model the cost of loop rotation more " "precisely by using profile data."), cl::init(false), cl::Hidden)
Branch Probability Basic Block Placement
static cl::opt< unsigned > AlignAllBlock("align-all-blocks", cl::desc("Force the alignment of all blocks in the function in log2 format " "(e.g 4 means align on 16B boundaries)."), cl::init(0), cl::Hidden)
static cl::opt< unsigned > TailDupPlacementPenalty("tail-dup-placement-penalty", cl::desc("Cost penalty for blocks that can avoid breaking CFG by copying. " "Copying can increase fallthrough, but it also increases icache " "pressure. This parameter controls the penalty to account for that. " "Percent as integer."), cl::init(2), cl::Hidden)
static BranchProbability getLayoutSuccessorProbThreshold(const MachineBasicBlock *BB)
static cl::opt< bool > ForceLoopColdBlock("force-loop-cold-block", cl::desc("Force outlining cold blocks from loops."), cl::init(false), cl::Hidden)
static cl::opt< unsigned > ExitBlockBias("block-placement-exit-block-bias", cl::desc("Block frequency percentage a loop exit block needs " "over the original exit to be considered the new exit."), cl::init(0), cl::Hidden)
static cl::opt< unsigned > AlignAllNonFallThruBlocks("align-all-nofallthru-blocks", cl::desc("Force the alignment of all blocks that have no fall-through " "predecessors (i.e. don't add nops that are executed). In log2 " "format (e.g 4 means align on 16B boundaries)."), cl::init(0), cl::Hidden)
static cl::opt< unsigned > TailDupPlacementThreshold("tail-dup-placement-threshold", cl::desc("Instruction cutoff for tail duplication during layout. " "Tail merging during layout is forced to have a threshold " "that won't conflict."), cl::init(2), cl::Hidden)
static cl::opt< unsigned > JumpInstCost("jump-inst-cost", cl::desc("Cost of jump instructions."), cl::init(1), cl::Hidden)
static bool greaterWithBias(BlockFrequency A, BlockFrequency B, BlockFrequency EntryFreq)
Compare 2 BlockFrequency's with a small penalty for A.
static cl::opt< unsigned > MisfetchCost("misfetch-cost", cl::desc("Cost that models the probabilistic risk of an instruction " "misfetch due to a jump comparing to falling through, whose cost " "is zero."), cl::init(1), cl::Hidden)
static cl::opt< unsigned > MaxBytesForAlignmentOverride("max-bytes-for-alignment", cl::desc("Forces the maximum bytes allowed to be emitted when padding for " "alignment"), cl::init(0), cl::Hidden)
static cl::opt< bool > BranchFoldPlacement("branch-fold-placement", cl::desc("Perform branch folding during placement. " "Reduces code size."), cl::init(true), cl::Hidden)
static cl::opt< unsigned > TailDupProfilePercentThreshold("tail-dup-profile-percent-threshold", cl::desc("If profile count information is used in tail duplication cost " "model, the gained fall through number from tail duplication " "should be at least this percent of hot count."), cl::init(50), cl::Hidden)
static cl::opt< unsigned > TriangleChainCount("triangle-chain-count", cl::desc("Number of triangle-shaped-CFG's that need to be in a row for the " "triangle tail duplication heuristic to kick in. 0 to disable."), cl::init(2), cl::Hidden)
Branch Probability Basic Block static false std::string getBlockName(const MachineBasicBlock *BB)
Helper to print the name of a MBB.
static bool hasSameSuccessors(MachineBasicBlock &BB, SmallPtrSetImpl< const MachineBasicBlock * > &Successors)
Check if BB has exactly the successors in Successors.
static cl::opt< bool > TailDupPlacement("tail-dup-placement", cl::desc("Perform tail duplication during placement. " "Creates more fallthrough opportunites in " "outline branches."), cl::init(true), cl::Hidden)
static uint64_t countMBBInstruction(MachineBasicBlock *MBB)
static cl::opt< unsigned > LoopToColdBlockRatio("loop-to-cold-block-ratio", cl::desc("Outline loop blocks from loop chain if (frequency of loop) / " "(frequency of block) is greater than this ratio"), cl::init(5), cl::Hidden)
static cl::opt< bool > RenumberBlocksBeforeView("renumber-blocks-before-view", cl::desc("If true, basic blocks are re-numbered before MBP layout is printed " "into a dot graph. Only used when a function is being printed."), cl::init(false), cl::Hidden)
static cl::opt< unsigned > TailDupPlacementAggressiveThreshold("tail-dup-placement-aggressive-threshold", cl::desc("Instruction cutoff for aggressive tail duplication during " "layout. Used at -O3. Tail merging during layout is forced to " "have a threshold that won't conflict."), cl::init(4), cl::Hidden)
static cl::opt< bool > ForcePreciseRotationCost("force-precise-rotation-cost", cl::desc("Force the use of precise cost " "loop rotation strategy."), cl::init(false), cl::Hidden)
#define INITIALIZE_PASS_DEPENDENCY(depName)
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
const SmallVectorImpl< MachineOperand > MachineBasicBlock * TBB
const SmallVectorImpl< MachineOperand > & Cond
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
This file implements a set that has insertion order iteration characteristics.
This file defines the SmallPtrSet class.
This file defines the SmallVector class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
This file describes how to lower LLVM code to machine code.
Target-Independent Code Generator Pass Configuration Options pass.
unify loop Fixup each natural loop to have a single exit block
Represent the analysis usage information of a pass.
AnalysisUsage & addRequired()
void setPreservesAll()
Set by analyses that do not transform their input at all.
static BlockFrequency max()
Returns the maximum possible frequency, the saturation value.
uint64_t getFrequency() const
Returns the frequency as a fixpoint number scaled by the entry frequency.
static BranchProbability getOne()
uint32_t getNumerator() const
BranchProbability getCompl() const
static BranchProbability getZero()
ValueT lookup(const_arg_type_t< KeyT > Val) const
lookup - Return the entry for the specified key, or a default constructed value if no such entry exis...
iterator find(const_arg_type_t< KeyT > Val)
std::pair< iterator, bool > try_emplace(KeyT &&Key, Ts &&... Args)
bool erase(const KeyT &Val)
size_type count(const_arg_type_t< KeyT > Val) const
Return 1 if the specified key is in the map, 0 otherwise.
bool contains(const_arg_type_t< KeyT > Val) const
Return true if the specified key is in the map, false otherwise.
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
void reserve(size_type NumEntries)
Grow the densemap so that it can contain at least NumEntries items before resizing again.
bool dominates(const DomTreeNodeBase< NodeT > *A, const DomTreeNodeBase< NodeT > *B) const
dominates - Returns true iff A dominates B.
void recalculate(ParentType &Func)
recalculate - compute a dominator tree for the given function
bool hasOptSize() const
Optimize this function for size (-Os) or minimum size (-Oz).
bool hasProfileData(bool IncludeSynthetic=false) const
Return true if the function is annotated with profile data.
unsigned removeBranch(MachineBasicBlock &MBB, int *BytesRemoved=nullptr) const override
Remove the branching code at the end of the specific MBB.
bool analyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB, MachineBasicBlock *&FBB, SmallVectorImpl< MachineOperand > &Cond, bool AllowModify) const override
Analyze the branching code at the end of MBB, returning true if it cannot be understood (e....
bool reverseBranchCondition(SmallVectorImpl< MachineOperand > &Cond) const override
Reverses the branch condition of the specified condition list, returning false on success and true if...
unsigned insertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB, MachineBasicBlock *FBB, ArrayRef< MachineOperand > Cond, const DebugLoc &DL, int *BytesAdded=nullptr) const override
Insert branch code into the end of the specified MachineBasicBlock.
const MDOperand & getOperand(unsigned I) const
ArrayRef< MDOperand > operands() const
unsigned getNumOperands() const
Return number of MDNode operands.
Tracking metadata reference owned by Metadata.
StringRef getString() const
unsigned pred_size() const
bool isEHPad() const
Returns true if the block is a landing pad.
instr_iterator instr_begin()
MachineBasicBlock * getFallThrough(bool JumpToFallThrough=true)
Return the fallthrough block if the block can implicitly transfer control to the block after it by fa...
int getNumber() const
MachineBasicBlocks are uniquely numbered at the function level, unless they're not in a MachineFuncti...
void updateTerminator(MachineBasicBlock *PreviousLayoutSuccessor)
Update the terminator instructions in block to account for changes to block layout which may have bee...
bool canFallThrough()
Return true if the block can implicitly transfer control to the block after it by falling off the end...
succ_iterator succ_begin()
unsigned succ_size() const
void setAlignment(Align A)
Set alignment of the basic block.
bool isEntryBlock() const
Returns true if this is the entry block of the function.
pred_iterator pred_begin()
succ_reverse_iterator succ_rbegin()
bool isLayoutSuccessor(const MachineBasicBlock *MBB) const
Return true if the specified MBB will be emitted immediately after this block, such that if this bloc...
instr_iterator instr_end()
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
instr_iterator erase(instr_iterator I)
Remove an instruction from the instruction list and delete it.
DebugLoc findBranchDebugLoc()
Find and return the merged DebugLoc of the branch instructions of the block.
iterator_range< succ_iterator > successors()
bool isSuccessor(const MachineBasicBlock *MBB) const
Return true if the specified MBB is a successor of this block.
iterator_range< pred_iterator > predecessors()
StringRef getName() const
Return the name of the corresponding LLVM basic block, or an empty string.
MachineBlockFrequencyInfo pass uses BlockFrequencyInfoImpl implementation to estimate machine basic b...
BlockFrequency getBlockFreq(const MachineBasicBlock *MBB) const
getblockFreq - Return block frequency.
BranchProbability getEdgeProbability(const MachineBasicBlock *Src, const MachineBasicBlock *Dst) const
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
virtual bool runOnMachineFunction(MachineFunction &MF)=0
runOnMachineFunction - This method must be overloaded to perform the desired machine code transformat...
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
Function & getFunction()
Return the LLVM function that this machine code represents.
const LLVMTargetMachine & getTarget() const
getTarget - Return the target machine this machine code is compiled with
Representation of each machine instruction.
MachineLoop * getLoopFor(const MachineBasicBlock *BB) const
Return the innermost loop that BB lives in.
MachinePostDominatorTree - an analysis pass wrapper for DominatorTree used to compute the post-domina...
MutableArrayRef - Represent a mutable reference to an array (0 or more elements consecutively in memo...
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
An analysis pass based on legacy pass manager to deliver ProfileSummaryInfo.
Analysis providing profile information.
uint64_t getOrCompHotCountThreshold() const
Returns HotCountThreshold if set.
typename vector_type::const_iterator iterator
A templated base class for SmallPtrSet which provides the typesafe interface that is common across al...
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
A SetVector that performs no allocations if smaller than a certain size.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
reference emplace_back(ArgTypes &&... Args)
typename SuperClass::const_iterator const_iterator
typename SuperClass::iterator iterator
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
A BumpPtrAllocator that allows only elements of a specific type to be allocated.
T * Allocate(size_t num=1)
Allocate space for an array of objects without constructing them.
void DestroyAll()
Call the destructor of each allocated object and deallocate all but the current slab and reset the cu...
Utility class to perform tail duplication.
void initMF(MachineFunction &MF, bool PreRegAlloc, const MachineBranchProbabilityInfo *MBPI, MBFIWrapper *MBFI, ProfileSummaryInfo *PSI, bool LayoutMode, unsigned TailDupSize=0)
Prepare to run on a specific machine function.
bool tailDuplicateAndUpdate(bool IsSimple, MachineBasicBlock *MBB, MachineBasicBlock *ForcedLayoutPred, SmallVectorImpl< MachineBasicBlock * > *DuplicatedPreds=nullptr, function_ref< void(MachineBasicBlock *)> *RemovalCallback=nullptr, SmallVectorImpl< MachineBasicBlock * > *CandidatePtr=nullptr)
Tail duplicate a single basic block into its predecessors, and then clean up.
static bool isSimpleBB(MachineBasicBlock *TailBB)
True if this BB has only one unconditional jump.
bool canTailDuplicate(MachineBasicBlock *TailBB, MachineBasicBlock *PredBB)
Returns true if TailBB can successfully be duplicated into PredBB.
bool shouldTailDuplicate(bool IsSimple, MachineBasicBlock &TailBB)
Determine if it is profitable to duplicate this block.
TargetInstrInfo - Interface to description of machine instruction set.
This base class for TargetLowering contains the SelectionDAG-independent parts that can be used from ...
virtual unsigned getMaxPermittedBytesForAlignment(MachineBasicBlock *MBB) const
Return the maximum amount of bytes allowed to be emitted when padding for alignment.
virtual Align getPrefLoopAlignment(MachineLoop *ML=nullptr) const
Return the preferred loop alignment.
virtual bool alignLoopsWithOptSize() const
Should loops be aligned even when the function is marked OptSize (but not MinSize).
bool requiresStructuredCFG() const
Target-Independent Code Generator Pass Configuration Options.
bool getEnableTailMerge() const
CodeGenOptLevel getOptLevel() const
virtual const TargetRegisterInfo * getRegisterInfo() const
getRegisterInfo - If register information is available, return it.
virtual const TargetInstrInfo * getInstrInfo() const
virtual const TargetLowering * getTargetLowering() const
An efficient, type-erasing, non-owning reference to a callable.
self_iterator getIterator()
A raw_ostream that writes to an std::string.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
@ C
The default llvm calling convention, compatible with C.
initializer< Ty > init(const Ty &Val)
double calcExtTspScore(ArrayRef< uint64_t > Order, ArrayRef< uint64_t > NodeSizes, ArrayRef< uint64_t > NodeCounts, ArrayRef< EdgeCount > EdgeCounts)
Estimate the "quality" of a given node order in CFG.
std::vector< uint64_t > computeExtTspLayout(ArrayRef< uint64_t > NodeSizes, ArrayRef< uint64_t > NodeCounts, ArrayRef< EdgeCount > EdgeCounts)
Find a layout of nodes (basic blocks) of a given CFG optimizing jump locality and thus processor I-ca...
void append(SmallVectorImpl< char > &path, const Twine &a, const Twine &b="", const Twine &c="", const Twine &d="")
Append to path.
This is an optimization pass for GlobalISel generic memory operations.
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
void dump(const SparseBitVector< ElementSize > &LHS, raw_ostream &out)
void stable_sort(R &&Range)
auto find(R &&Range, const T &Val)
Provide wrappers to std::find which take ranges instead of having to pass begin/end explicitly.
cl::opt< bool > ApplyExtTspWithoutProfile
bool shouldOptimizeForSize(const MachineFunction *MF, ProfileSummaryInfo *PSI, const MachineBlockFrequencyInfo *BFI, PGSOQueryType QueryType=PGSOQueryType::Other)
Returns true if machine function MF is suggested to be size-optimized based on the profile.
void initializeMachineBlockPlacementStatsPass(PassRegistry &)
cl::opt< unsigned > ProfileLikelyProb
void erase(Container &C, ValueType V)
Wrapper function to remove a value from a container:
cl::opt< std::string > ViewBlockFreqFuncName("view-bfi-func-name", cl::Hidden, cl::desc("The option to specify " "the name of the function " "whose CFG will be displayed."))
auto reverse(ContainerTy &&C)
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
cl::opt< GVDAGType > ViewBlockLayoutWithBFI("view-block-layout-with-bfi", cl::Hidden, cl::desc("Pop up a window to show a dag displaying MBP layout and associated " "block frequencies of the CFG."), cl::values(clEnumValN(GVDT_None, "none", "do not display graphs."), clEnumValN(GVDT_Fraction, "fraction", "display a graph using the " "fractional block frequency representation."), clEnumValN(GVDT_Integer, "integer", "display a graph using the raw " "integer fractional block frequency representation."), clEnumValN(GVDT_Count, "count", "display a graph using the real " "profile count if available.")))
bool isFunctionInPrintList(StringRef FunctionName)
auto instructionsWithoutDebug(IterT It, IterT End, bool SkipPseudoOp=true)
Construct a range iterator which begins at It and moves forwards until End is reached,...
format_object< Ts... > format(const char *Fmt, const Ts &... Vals)
These are helper functions used to produce formatted output.
cl::opt< unsigned > StaticLikelyProb
auto count(R &&Range, const E &Element)
Wrapper function around std::count to count the number of times an element Element occurs in the give...
void erase_if(Container &C, UnaryPredicate P)
Provide a container algorithm similar to C++ Library Fundamentals v2's erase_if which is equivalent t...
Printable printBlockFreq(const BlockFrequencyInfo &BFI, BlockFrequency Freq)
Print the block frequency Freq relative to the current functions entry frequency.
char & MachineBlockPlacementID
MachineBlockPlacement - This pass places basic blocks based on branch probabilities.
cl::opt< bool > EnableExtTspBlockPlacement
char & MachineBlockPlacementStatsID
MachineBlockPlacementStats - This pass collects statistics about the basic block placement using bran...
void initializeMachineBlockPlacementPass(PassRegistry &)
Printable printMBBReference(const MachineBasicBlock &MBB)
Prints a machine basic block reference.
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
This struct is a compact representation of a valid (non-zero power of two) alignment.