78#define DEBUG_TYPE "block-placement"
80STATISTIC(NumCondBranches,
"Number of conditional branches");
81STATISTIC(NumUncondBranches,
"Number of unconditional branches");
83 "Potential frequency of taking conditional branches");
85 "Potential frequency of taking unconditional branches");
89 cl::desc(
"Force the alignment of all blocks in the function in log2 format "
90 "(e.g 4 means align on 16B boundaries)."),
94 "align-all-nofallthru-blocks",
95 cl::desc(
"Force the alignment of all blocks that have no fall-through "
96 "predecessors (i.e. don't add nops that are executed). In log2 "
97 "format (e.g 4 means align on 16B boundaries)."),
101 "max-bytes-for-alignment",
102 cl::desc(
"Forces the maximum bytes allowed to be emitted when padding for "
108 "block-placement-exit-block-bias",
109 cl::desc(
"Block frequency percentage a loop exit block needs "
110 "over the original exit to be considered the new exit."),
117 "loop-to-cold-block-ratio",
118 cl::desc(
"Outline loop blocks from loop chain if (frequency of loop) / "
119 "(frequency of block) is greater than this ratio"),
124 cl::desc(
"Force outlining cold blocks from loops."),
129 cl::desc(
"Model the cost of loop rotation more "
130 "precisely by using profile data."),
135 cl::desc(
"Force the use of precise cost "
136 "loop rotation strategy."),
141 cl::desc(
"Cost that models the probabilistic risk of an instruction "
142 "misfetch due to a jump comparing to falling through, whose cost "
147 cl::desc(
"Cost of jump instructions."),
151 cl::desc(
"Perform tail duplication during placement. "
152 "Creates more fallthrough opportunites in "
153 "outline branches."),
158 cl::desc(
"Perform branch folding during placement. "
159 "Reduces code size."),
164 "tail-dup-placement-threshold",
165 cl::desc(
"Instruction cutoff for tail duplication during layout. "
166 "Tail merging during layout is forced to have a threshold "
167 "that won't conflict."),
172 "tail-dup-placement-aggressive-threshold",
173 cl::desc(
"Instruction cutoff for aggressive tail duplication during "
174 "layout. Used at -O3. Tail merging during layout is forced to "
175 "have a threshold that won't conflict."),
180 "tail-dup-placement-penalty",
182 "Cost penalty for blocks that can avoid breaking CFG by copying. "
183 "Copying can increase fallthrough, but it also increases icache "
184 "pressure. This parameter controls the penalty to account for that. "
185 "Percent as integer."),
190 "tail-dup-profile-percent-threshold",
191 cl::desc(
"If profile count information is used in tail duplication cost "
192 "model, the gained fall through number from tail duplication "
193 "should be at least this percent of hot count."),
198 "triangle-chain-count",
199 cl::desc(
"Number of triangle-shaped-CFG's that need to be in a row for the "
200 "triangle tail duplication heuristic to kick in. 0 to disable."),
209 "renumber-blocks-before-view",
211 "If true, basic blocks are re-numbered before MBP layout is printed "
212 "into a dot graph. Only used when a function is being printed."),
216 "ext-tsp-block-placement-max-blocks",
217 cl::desc(
"Maximum number of basic blocks in a function to run ext-TSP "
224 cl::desc(
"Use ext-tsp for size-aware block placement."));
273 BlockToChainMapType &BlockToChain;
282 :
Blocks(1, BB), BlockToChain(BlockToChain) {
283 assert(BB &&
"Cannot create a chain with a null basic block");
284 BlockToChain[BB] =
this;
292 iterator begin() {
return Blocks.begin(); }
296 iterator end() {
return Blocks.end(); }
300 for (iterator i = begin(); i != end(); ++i) {
316 assert(BB &&
"Can't merge a null block.");
317 assert(!
Blocks.empty() &&
"Can't merge into an empty chain.");
321 assert(!BlockToChain[BB] &&
322 "Passed chain is null, but BB has entry in BlockToChain.");
324 BlockToChain[BB] =
this;
328 assert(BB == *Chain->
begin() &&
"Passed BB is not head of Chain.");
329 assert(Chain->begin() != Chain->end());
334 Blocks.push_back(ChainBB);
335 assert(BlockToChain[ChainBB] == Chain &&
"Incoming blocks not in chain.");
336 BlockToChain[ChainBB] =
this;
357 unsigned UnscheduledPredecessors = 0;
365 struct BlockAndTailDupResult {
371 struct WeightedEdge {
391 std::unique_ptr<MBFIWrapper> MBFI;
424 unsigned TailDupSize;
428 bool UseProfileCount =
false;
458 if (UseProfileCount) {
459 auto Count = MBFI->getBlockProfileCount(BB);
465 return MBFI->getBlockFreq(BB);
470 void initTailDupThreshold();
474 void markChainSuccessors(
const BlockChain &Chain,
476 const BlockFilterSet *BlockFilter =
nullptr);
482 const BlockFilterSet *BlockFilter =
nullptr);
486 const BlockFilterSet *BlockFilter,
489 BlockFilterSet *BlockFilter);
492 BlockFilterSet *BlockFilter);
493 bool repeatedlyTailDuplicateBlock(
496 BlockFilterSet *BlockFilter,
501 BlockChain &Chain, BlockFilterSet *BlockFilter,
504 bool &DuplicatedToLPred);
507 const BlockChain &SuccChain,
510 const BlockChain &Chain,
511 const BlockFilterSet *BlockFilter);
513 const BlockChain &Chain,
514 const BlockFilterSet *BlockFilter);
516 selectBestCandidateBlock(
const BlockChain &Chain,
519 getFirstUnplacedBlock(
const BlockChain &PlacedChain,
522 getFirstUnplacedBlock(
const BlockChain &PlacedChain,
524 const BlockFilterSet *BlockFilter);
533 const BlockFilterSet *BlockFilter);
536 BlockFilterSet *BlockFilter =
nullptr);
540 const BlockFilterSet &LoopBlockSet);
542 const BlockFilterSet &LoopBlockSet);
546 const BlockFilterSet &LoopBlockSet);
549 const BlockFilterSet &LoopBlockSet);
551 const BlockFilterSet &LoopBlockSet);
553 const BlockFilterSet &LoopBlockSet,
555 BlockFilterSet collectLoopBlockSet(
const MachineLoop &L);
559 void rotateLoopWithProfile(BlockChain &LoopChain,
const MachineLoop &L,
560 const BlockFilterSet &LoopBlockSet);
561 void buildCFGChains();
562 void optimizeBranches();
572 const BlockFilterSet *BlockFilter);
577 const BlockChain &Chain,
const BlockFilterSet *BlockFilter);
580 BlockAndTailDupResult getBestTrellisSuccessor(
584 const BlockFilterSet *BlockFilter);
587 static std::pair<WeightedEdge, WeightedEdge> getBestNonConflictingEdges(
595 const BlockChain &Chain,
596 const BlockFilterSet *BlockFilter);
600 void precomputeTriangleChains();
603 void applyExtTsp(
bool OptForSize);
606 void assignBlockOrder(
const std::vector<const MachineBasicBlock *> &NewOrder);
609 void createCFGChainExtTsp();
620 bool allowTailDupPlacement()
const {
639char MachineBlockPlacement::ID = 0;
644 "Branch Probability Basic Block Placement",
false,
false)
673void MachineBlockPlacement::markChainSuccessors(
675 const BlockFilterSet *BlockFilter) {
679 markBlockSuccessors(Chain,
MBB, LoopHeaderBB, BlockFilter);
689void MachineBlockPlacement::markBlockSuccessors(
697 if (BlockFilter && !BlockFilter->count(Succ))
699 BlockChain &SuccChain = *BlockToChain[Succ];
701 if (&Chain == &SuccChain || Succ == LoopHeaderBB)
706 if (SuccChain.UnscheduledPredecessors == 0 ||
707 --SuccChain.UnscheduledPredecessors > 0)
710 auto *NewBB = *SuccChain.
begin();
711 if (NewBB->isEHPad())
724 const BlockFilterSet *BlockFilter,
744 bool SkipSucc =
false;
745 if (Succ->isEHPad() || (BlockFilter && !BlockFilter->count(Succ))) {
748 BlockChain *SuccChain = BlockToChain[Succ];
749 if (SuccChain == &Chain) {
751 }
else if (Succ != *SuccChain->begin()) {
753 <<
" -> Mid chain!\n");
763 return AdjustedSumProb;
774 if (SuccProbN >= SuccProbD)
789 if (Successors.
count(&BB))
792 if (!Successors.
count(Succ))
821 return (Gain / ThresholdProb) >= EntryFreq;
829bool MachineBlockPlacement::isProfitableToTailDup(
832 const BlockFilterSet *BlockFilter) {
859 auto AdjustedSuccSumProb =
860 collectViableSuccessors(Succ, Chain, BlockFilter, SuccSuccs);
862 auto BBFreq = MBFI->getBlockFreq(BB);
863 auto SuccFreq = MBFI->getBlockFreq(Succ);
869 if (SuccSuccs.
size() == 0)
876 if (Prob > BestSuccSucc)
888 if (SuccPred == Succ || SuccPred == BB ||
889 BlockToChain[SuccPred] == &Chain ||
890 (BlockFilter && !BlockFilter->count(SuccPred)))
894 if (Freq > SuccBestPred)
962 if (UProb > AdjustedSuccSumProb / 2 &&
963 !hasBetterLayoutPredecessor(Succ, PDom, *BlockToChain[PDom], UProb, UProb,
967 (
P + V), (Qout + std::max(Qin,
F) * VProb + std::min(Qin,
F) * UProb),
971 (Qout + std::min(Qin,
F) * AdjustedSuccSumProb +
972 std::max(Qin,
F) * UProb),
983bool MachineBlockPlacement::isTrellis(
986 const BlockChain &Chain,
const BlockFilterSet *BlockFilter) {
1001 if (Successors.count(SuccPred)) {
1004 if (!Successors.count(CheckSucc))
1008 const BlockChain *PredChain = BlockToChain[SuccPred];
1009 if (SuccPred == BB || (BlockFilter && !BlockFilter->count(SuccPred)) ||
1010 PredChain == &Chain || PredChain == BlockToChain[Succ])
1014 if (!SeenPreds.
insert(SuccPred).second)
1032std::pair<MachineBlockPlacement::WeightedEdge,
1033 MachineBlockPlacement::WeightedEdge>
1034MachineBlockPlacement::getBestNonConflictingEdges(
1045 auto Cmp = [](WeightedEdge
A, WeightedEdge
B) {
return A.Weight >
B.Weight; };
1049 auto BestA = Edges[0].begin();
1050 auto BestB = Edges[1].begin();
1053 if (BestA->Src == BestB->Src) {
1055 auto SecondBestA = std::next(BestA);
1056 auto SecondBestB = std::next(BestB);
1059 if (BestAScore < BestBScore)
1060 BestA = SecondBestA;
1062 BestB = SecondBestB;
1065 if (BestB->Src == BB)
1067 return std::make_pair(*BestA, *BestB);
1077MachineBlockPlacement::BlockAndTailDupResult
1078MachineBlockPlacement::getBestTrellisSuccessor(
1082 const BlockFilterSet *BlockFilter) {
1084 BlockAndTailDupResult
Result = {
nullptr,
false};
1091 if (Successors.
size() != 2 || ViableSuccs.
size() != 2)
1097 for (
auto *Succ : ViableSuccs) {
1101 if ((BlockFilter && !BlockFilter->count(SuccPred)) ||
1102 BlockToChain[SuccPred] == &Chain ||
1103 BlockToChain[SuccPred] == BlockToChain[Succ])
1107 Edges[SuccIndex].
push_back({EdgeFreq, SuccPred, Succ});
1113 WeightedEdge BestA, BestB;
1114 std::tie(BestA, BestB) = getBestNonConflictingEdges(BB, Edges);
1116 if (BestA.Src != BB) {
1120 LLVM_DEBUG(
dbgs() <<
"Trellis, but not one of the chosen edges.\n");
1127 if (BestA.Dest == BestB.Src) {
1133 if (allowTailDupPlacement() && shouldTailDuplicate(Succ2) &&
1134 canTailDuplicateUnplacedPreds(BB, Succ2, Chain, BlockFilter) &&
1136 Chain, BlockFilter)) {
1140 <<
", probability: " << Succ2Prob
1141 <<
" (Tail Duplicate)\n");
1143 Result.ShouldTailDup =
true;
1149 ComputedEdges[BestB.Src] = {BestB.Dest,
false};
1151 auto TrellisSucc = BestA.Dest;
1155 <<
", probability: " << SuccProb <<
" (Trellis)\n");
1163bool MachineBlockPlacement::canTailDuplicateUnplacedPreds(
1165 const BlockChain &Chain,
const BlockFilterSet *BlockFilter) {
1166 if (!shouldTailDuplicate(Succ))
1170 bool Duplicate =
true;
1172 unsigned int NumDup = 0;
1181 if (Pred == BB || (BlockFilter && !BlockFilter->count(Pred)) ||
1182 (BlockToChain[Pred] == &Chain && !Succ->
succ_empty()))
1232 if (
F->getFunction().hasProfileData())
1263 if ((NumDup > Succ->
succ_size()) || !Duplicate)
1286void MachineBlockPlacement::precomputeTriangleChains() {
1287 struct TriangleChain {
1288 std::vector<MachineBasicBlock *> Edges;
1291 : Edges({src, dst}) {}
1294 assert(getKey()->isSuccessor(dst) &&
1295 "Attempting to append a block that is not a successor.");
1296 Edges.push_back(dst);
1299 unsigned count()
const {
return Edges.size() - 1; }
1324 if (PDom ==
nullptr)
1331 if (!shouldTailDuplicate(PDom))
1333 bool CanTailDuplicate =
true;
1340 CanTailDuplicate =
false;
1346 if (!CanTailDuplicate)
1353 auto Found = TriangleChainMap.
find(&BB);
1356 if (Found != TriangleChainMap.
end()) {
1357 TriangleChain Chain = std::move(Found->second);
1358 TriangleChainMap.
erase(Found);
1360 TriangleChainMap.
insert(std::make_pair(Chain.getKey(), std::move(Chain)));
1362 auto InsertResult = TriangleChainMap.
try_emplace(PDom, &BB, PDom);
1363 assert(InsertResult.second &&
"Block seen twice.");
1371 for (
auto &ChainPair : TriangleChainMap) {
1372 TriangleChain &Chain = ChainPair.second;
1379 Chain.Edges.pop_back();
1383 <<
" as pre-computed based on triangles.\n");
1385 auto InsertResult = ComputedEdges.
insert({src, {dst,
true}});
1386 assert(InsertResult.second &&
"Block seen twice.");
1428bool MachineBlockPlacement::hasBetterLayoutPredecessor(
1432 const BlockFilterSet *BlockFilter) {
1435 if (SuccChain.UnscheduledPredecessors == 0)
1555 BlockFrequency CandidateEdgeFreq = MBFI->getBlockFreq(BB) * RealSuccProb;
1556 bool BadCFGConflict =
false;
1559 BlockChain *PredChain = BlockToChain[Pred];
1560 if (Pred == Succ || PredChain == &SuccChain ||
1561 (BlockFilter && !BlockFilter->count(Pred)) || PredChain == &Chain ||
1562 Pred != *std::prev(PredChain->end()) ||
1583 if (PredEdgeFreq * HotProb >= CandidateEdgeFreq * HotProb.
getCompl()) {
1584 BadCFGConflict =
true;
1589 if (BadCFGConflict) {
1591 << SuccProb <<
" (prob) (non-cold CFG conflict)\n");
1608MachineBlockPlacement::BlockAndTailDupResult
1610 const BlockChain &Chain,
1611 const BlockFilterSet *BlockFilter) {
1614 BlockAndTailDupResult BestSucc = {
nullptr,
false};
1618 auto AdjustedSumProb =
1619 collectViableSuccessors(BB, Chain, BlockFilter, Successors);
1626 auto FoundEdge = ComputedEdges.
find(BB);
1627 if (FoundEdge != ComputedEdges.
end()) {
1629 ComputedEdges.
erase(FoundEdge);
1630 BlockChain *SuccChain = BlockToChain[Succ];
1631 if (BB->
isSuccessor(Succ) && (!BlockFilter || BlockFilter->count(Succ)) &&
1632 SuccChain != &Chain && Succ == *SuccChain->begin())
1633 return FoundEdge->second;
1638 if (isTrellis(BB, Successors, Chain, BlockFilter))
1639 return getBestTrellisSuccessor(BB, Successors, AdjustedSumProb, Chain,
1652 BlockChain &SuccChain = *BlockToChain[Succ];
1655 if (hasBetterLayoutPredecessor(BB, Succ, SuccChain, SuccProb, RealSuccProb,
1656 Chain, BlockFilter)) {
1658 if (allowTailDupPlacement() && shouldTailDuplicate(Succ))
1665 <<
", probability: " << SuccProb
1666 << (SuccChain.UnscheduledPredecessors != 0 ?
" (CFG break)" :
"")
1669 if (BestSucc.BB && BestProb >= SuccProb) {
1676 BestProb = SuccProb;
1684 [](std::tuple<BranchProbability, MachineBasicBlock *> L,
1685 std::tuple<BranchProbability, MachineBasicBlock *> R) {
1686 return std::get<0>(L) > std::get<0>(R);
1688 for (
auto &Tup : DupCandidates) {
1691 std::tie(DupProb, Succ) = Tup;
1692 if (DupProb < BestProb)
1694 if (canTailDuplicateUnplacedPreds(BB, Succ, Chain, BlockFilter) &&
1695 (isProfitableToTailDup(BB, Succ, BestProb, Chain, BlockFilter))) {
1697 <<
", probability: " << DupProb
1698 <<
" (Tail Duplicate)\n");
1700 BestSucc.ShouldTailDup =
true;
1728 return BlockToChain.
lookup(BB) == &Chain;
1731 if (WorkList.
empty())
1734 bool IsEHPad = WorkList[0]->isEHPad();
1740 "EHPad mismatch between block and work list.");
1742 BlockChain &SuccChain = *BlockToChain[
MBB];
1743 if (&SuccChain == &Chain)
1746 assert(SuccChain.UnscheduledPredecessors == 0 &&
1747 "Found CFG-violating block");
1772 if (BestBlock && (IsEHPad ^ (BestFreq >= CandidateFreq)))
1776 BestFreq = CandidateFreq;
1790 const BlockChain &PlacedChain,
1795 if (BlockToChain[&*
I] != &PlacedChain) {
1796 PrevUnplacedBlockIt =
I;
1800 return *BlockToChain[&*
I]->
begin();
1817 const BlockChain &PlacedChain,
1818 BlockFilterSet::iterator &PrevUnplacedBlockInFilterIt,
1819 const BlockFilterSet *BlockFilter) {
1821 for (; PrevUnplacedBlockInFilterIt != BlockFilter->end();
1822 ++PrevUnplacedBlockInFilterIt) {
1823 BlockChain *
C = BlockToChain[*PrevUnplacedBlockInFilterIt];
1824 if (
C != &PlacedChain) {
1831void MachineBlockPlacement::fillWorkLists(
1833 const BlockFilterSet *BlockFilter =
nullptr) {
1834 BlockChain &Chain = *BlockToChain[
MBB];
1835 if (!UpdatedPreds.
insert(&Chain).second)
1839 Chain.UnscheduledPredecessors == 0 &&
1840 "Attempting to place block with unscheduled predecessors in worklist.");
1842 assert(BlockToChain[ChainBB] == &Chain &&
1843 "Block in chain doesn't match BlockToChain map.");
1845 if (BlockFilter && !BlockFilter->count(Pred))
1847 if (BlockToChain[Pred] == &Chain)
1849 ++Chain.UnscheduledPredecessors;
1853 if (Chain.UnscheduledPredecessors != 0)
1865 BlockFilterSet *BlockFilter) {
1866 assert(HeadBB &&
"BB must not be null.\n");
1867 assert(BlockToChain[HeadBB] == &Chain &&
"BlockToChainMap mis-match.\n");
1869 BlockFilterSet::iterator PrevUnplacedBlockInFilterIt;
1871 PrevUnplacedBlockInFilterIt = BlockFilter->begin();
1874 markChainSuccessors(Chain, LoopHeaderBB, BlockFilter);
1877 assert(BB &&
"null block found at end of chain in loop.");
1878 assert(BlockToChain[BB] == &Chain &&
"BlockToChainMap mis-match in loop.");
1879 assert(*std::prev(Chain.end()) == BB &&
"BB Not found at end of chain.");
1883 auto Result = selectBestSuccessor(BB, Chain, BlockFilter);
1885 bool ShouldTailDup =
Result.ShouldTailDup;
1886 if (allowTailDupPlacement())
1887 ShouldTailDup |= (BestSucc && canTailDuplicateUnplacedPreds(
1888 BB, BestSucc, Chain, BlockFilter));
1894 BestSucc = selectBestCandidateBlock(Chain, BlockWorkList);
1896 BestSucc = selectBestCandidateBlock(Chain, EHPadWorkList);
1900 BestSucc = getFirstUnplacedBlock(Chain, PrevUnplacedBlockInFilterIt,
1903 BestSucc = getFirstUnplacedBlock(Chain, PrevUnplacedBlockIt);
1907 LLVM_DEBUG(
dbgs() <<
"Unnatural loop CFG detected, forcibly merging the "
1908 "layout successor until the CFG reduces\n");
1913 if (allowTailDupPlacement() && BestSucc && ShouldTailDup) {
1914 repeatedlyTailDuplicateBlock(BestSucc, BB, LoopHeaderBB, Chain,
1915 BlockFilter, PrevUnplacedBlockIt,
1916 PrevUnplacedBlockInFilterIt);
1924 BlockChain &SuccChain = *BlockToChain[BestSucc];
1927 SuccChain.UnscheduledPredecessors = 0;
1930 markChainSuccessors(SuccChain, LoopHeaderBB, BlockFilter);
1931 Chain.merge(BestSucc, &SuccChain);
1932 BB = *std::prev(Chain.end());
1953bool MachineBlockPlacement::canMoveBottomBlockToTop(
1962 if (OtherBB == BottomBlock)
1964 if (OtherBB == OldTop)
1973 const BlockFilterSet &LoopBlockSet) {
1976 BlockChain *PredChain = BlockToChain[Pred];
1977 if (!LoopBlockSet.count(Pred) &&
1978 (!PredChain || Pred == *std::prev(PredChain->end()))) {
1985 BlockChain *SuccChain = BlockToChain[Succ];
1988 if (!LoopBlockSet.count(Succ) && (SuccProb > TopProb) &&
1989 (!SuccChain || Succ == *SuccChain->begin())) {
1997 if (EdgeFreq > MaxFreq)
2029 BlockFrequency FallThrough2Top = TopFallThroughFreq(OldTop, LoopBlockSet);
2041 if (!LoopBlockSet.count(Pred))
2043 BlockChain *PredChain = BlockToChain[Pred];
2044 if (!PredChain || Pred == *std::prev(PredChain->end())) {
2047 if (EdgeFreq > FallThroughFromPred) {
2048 FallThroughFromPred = EdgeFreq;
2059 if ((Succ == NewTop) || (Succ == BestPred) || !LoopBlockSet.count(Succ))
2063 BlockChain *SuccChain = BlockToChain[Succ];
2064 if ((SuccChain && (Succ != *SuccChain->begin())) ||
2065 (SuccChain == BlockToChain[BestPred]))
2069 if (EdgeFreq > NewFreq)
2074 if (NewFreq > OrigEdgeFreq) {
2086 FallThrough2Top + FallThrough2Exit + FallThroughFromPred;
2116 const BlockFilterSet &LoopBlockSet) {
2120 BlockChain &HeaderChain = *BlockToChain[OldTop];
2121 if (!LoopBlockSet.count(*HeaderChain.begin()))
2123 if (OldTop != *HeaderChain.begin())
2132 if (!LoopBlockSet.count(Pred))
2134 if (Pred ==
L.getHeader())
2145 if (OtherBB == OldTop)
2149 if (!canMoveBottomBlockToTop(Pred, OldTop))
2153 FallThroughGains(Pred, OldTop, OtherBB, LoopBlockSet);
2155 (Gains > BestGains ||
2170 (*BestPred->
pred_begin())->succ_size() == 1 &&
2183MachineBlockPlacement::findBestLoopTop(
const MachineLoop &L,
2184 const BlockFilterSet &LoopBlockSet) {
2193 return L.getHeader();
2197 while (NewTop != OldTop) {
2199 NewTop = findBestLoopTopHelper(OldTop, L, LoopBlockSet);
2200 if (NewTop != OldTop)
2201 ComputedEdges[NewTop] = {OldTop,
false};
2212MachineBlockPlacement::findBestLoopExit(
const MachineLoop &L,
2213 const BlockFilterSet &LoopBlockSet,
2223 BlockChain &HeaderChain = *BlockToChain[
L.getHeader()];
2224 if (!LoopBlockSet.count(*HeaderChain.begin()))
2228 unsigned BestExitLoopDepth = 0;
2238 BlockChain &Chain = *BlockToChain[
MBB];
2241 if (
MBB != *std::prev(Chain.end()))
2250 bool HasLoopingSucc =
false;
2256 BlockChain &SuccChain = *BlockToChain[Succ];
2258 if (&Chain == &SuccChain) {
2265 if (LoopBlockSet.count(Succ)) {
2268 HasLoopingSucc =
true;
2272 unsigned SuccLoopDepth = 0;
2274 SuccLoopDepth = ExitLoop->getLoopDepth();
2275 if (ExitLoop->contains(&L))
2282 <<
getBlockName(Succ) <<
" [L:" << SuccLoopDepth <<
"] ("
2289 if (!ExitingBB || SuccLoopDepth > BestExitLoopDepth ||
2290 ExitEdgeFreq > BestExitEdgeFreq ||
2292 !(ExitEdgeFreq < BestExitEdgeFreq * Bias))) {
2293 BestExitEdgeFreq = ExitEdgeFreq;
2298 if (!HasLoopingSucc) {
2300 ExitingBB = OldExitingBB;
2301 BestExitEdgeFreq = OldBestExitEdgeFreq;
2308 dbgs() <<
" No other candidate exit blocks, using loop header\n");
2311 if (
L.getNumBlocks() == 1) {
2312 LLVM_DEBUG(
dbgs() <<
" Loop has 1 block, using loop header as exit\n");
2319 if (!BlocksExitingToOuterLoop.
empty() &&
2320 !BlocksExitingToOuterLoop.
count(ExitingBB))
2325 ExitFreq = BestExitEdgeFreq;
2333bool MachineBlockPlacement::hasViableTopFallthrough(
2336 BlockChain *PredChain = BlockToChain[Pred];
2337 if (!LoopBlockSet.count(Pred) &&
2338 (!PredChain || Pred == *std::prev(PredChain->end()))) {
2345 BlockChain *SuccChain = BlockToChain[Succ];
2348 if ((!SuccChain || Succ == *SuccChain->begin()) && SuccProb > TopProb) {
2366void MachineBlockPlacement::rotateLoop(BlockChain &LoopChain,
2369 const BlockFilterSet &LoopBlockSet) {
2377 if (Bottom == ExitingBB)
2384 bool ViableTopFallthrough = hasViableTopFallthrough(Top, LoopBlockSet);
2389 if (ViableTopFallthrough) {
2391 BlockChain *SuccChain = BlockToChain[Succ];
2392 if (!LoopBlockSet.count(Succ) &&
2393 (!SuccChain || Succ == *SuccChain->begin()))
2399 BlockFrequency FallThrough2Top = TopFallThroughFreq(Top, LoopBlockSet);
2400 if (FallThrough2Top >= ExitFreq)
2404 BlockChain::iterator ExitIt =
llvm::find(LoopChain, ExitingBB);
2405 if (ExitIt == LoopChain.end())
2427 if (ViableTopFallthrough) {
2428 assert(std::next(ExitIt) != LoopChain.end() &&
2429 "Exit should not be last BB");
2438 std::rotate(LoopChain.begin(), std::next(ExitIt), LoopChain.end());
2454void MachineBlockPlacement::rotateLoopWithProfile(
2456 const BlockFilterSet &LoopBlockSet) {
2457 auto RotationPos = LoopChain.end();
2482 BlockChain *PredChain = BlockToChain[Pred];
2483 if (!LoopBlockSet.count(Pred) &&
2484 (!PredChain || Pred == *std::prev(PredChain->end()))) {
2485 auto EdgeFreq = MBFI->getBlockFreq(Pred) *
2487 auto FallThruCost = ScaleBlockFrequency(EdgeFreq,
MisfetchCost);
2491 FallThruCost += ScaleBlockFrequency(EdgeFreq,
JumpInstCost);
2492 HeaderFallThroughCost = std::max(HeaderFallThroughCost, FallThruCost);
2501 for (
auto *BB : LoopChain) {
2504 BlockChain *SuccChain = BlockToChain[Succ];
2505 if (!LoopBlockSet.count(Succ) &&
2506 (!SuccChain || Succ == *SuccChain->begin())) {
2508 LargestExitEdgeProb = std::max(LargestExitEdgeProb, SuccProb);
2512 auto ExitFreq = MBFI->getBlockFreq(BB) * LargestExitEdgeProb;
2520 for (
auto Iter = LoopChain.begin(), TailIter = std::prev(LoopChain.end()),
2521 EndIter = LoopChain.end();
2522 Iter != EndIter; Iter++, TailIter++) {
2525 if (TailIter == LoopChain.end())
2526 TailIter = LoopChain.begin();
2528 auto TailBB = *TailIter;
2536 if (Iter != LoopChain.begin())
2537 Cost += HeaderFallThroughCost;
2541 for (
auto &ExitWithFreq : ExitsWithFreq)
2542 if (TailBB != ExitWithFreq.first)
2543 Cost += ExitWithFreq.second;
2559 if (TailBB->isSuccessor(*Iter)) {
2560 auto TailBBFreq = MBFI->getBlockFreq(TailBB);
2561 if (TailBB->succ_size() == 1)
2563 else if (TailBB->succ_size() == 2) {
2565 auto TailToHeadFreq = TailBBFreq * TailToHeadProb;
2567 ? TailBBFreq * TailToHeadProb.
getCompl()
2578 if (
Cost < SmallestRotationCost) {
2579 SmallestRotationCost =
Cost;
2584 if (RotationPos != LoopChain.end()) {
2586 <<
" to the top\n");
2587 std::rotate(LoopChain.begin(), RotationPos, LoopChain.end());
2596MachineBlockPlacement::collectLoopBlockSet(
const MachineLoop &L) {
2602 return X->getNumber() <
Y->getNumber();
2605 std::set<const MachineBasicBlock *, MBBCompare> LoopBlockSet;
2618 for (
auto *LoopPred :
L.getHeader()->predecessors())
2619 if (!
L.contains(LoopPred))
2620 LoopFreq += MBFI->getBlockFreq(LoopPred) *
2624 if (LoopBlockSet.count(LoopBB))
2629 BlockChain *Chain = BlockToChain[LoopBB];
2631 LoopBlockSet.
insert(ChainBB);
2634 LoopBlockSet.insert(
L.block_begin(),
L.block_end());
2639 BlockFilterSet
Ret(LoopBlockSet.begin(), LoopBlockSet.end());
2649void MachineBlockPlacement::buildLoopChains(
const MachineLoop &L) {
2653 buildLoopChains(*InnerLoop);
2656 "BlockWorkList not empty when starting to build loop chains.");
2658 "EHPadWorkList not empty when starting to build loop chains.");
2659 BlockFilterSet LoopBlockSet = collectLoopBlockSet(L);
2664 bool RotateLoopWithProfile =
2680 PreferredLoopExit =
nullptr;
2682 if (!RotateLoopWithProfile && LoopTop ==
L.getHeader())
2683 PreferredLoopExit = findBestLoopExit(L, LoopBlockSet, ExitFreq);
2685 BlockChain &LoopChain = *BlockToChain[LoopTop];
2691 assert(LoopChain.UnscheduledPredecessors == 0 &&
2692 "LoopChain should not have unscheduled predecessors.");
2693 UpdatedPreds.
insert(&LoopChain);
2696 fillWorkLists(LoopBB, UpdatedPreds, &LoopBlockSet);
2698 buildChain(LoopTop, LoopChain, &LoopBlockSet);
2700 if (RotateLoopWithProfile)
2701 rotateLoopWithProfile(LoopChain, L, LoopBlockSet);
2703 rotateLoop(LoopChain, PreferredLoopExit, ExitFreq, LoopBlockSet);
2707 bool BadLoop =
false;
2708 if (LoopChain.UnscheduledPredecessors) {
2710 dbgs() <<
"Loop chain contains a block without its preds placed!\n"
2711 <<
" Loop header: " << getBlockName(*L.block_begin()) <<
"\n"
2712 <<
" Chain header: " << getBlockName(*LoopChain.begin()) <<
"\n";
2716 if (!LoopBlockSet.remove(ChainBB)) {
2720 dbgs() <<
"Loop chain contains a block not contained by the loop!\n"
2721 <<
" Loop header: " <<
getBlockName(*
L.block_begin()) <<
"\n"
2722 <<
" Chain header: " <<
getBlockName(*LoopChain.begin()) <<
"\n"
2727 if (!LoopBlockSet.empty()) {
2730 dbgs() <<
"Loop contains blocks never placed into a chain!\n"
2731 <<
" Loop header: " <<
getBlockName(*
L.block_begin()) <<
"\n"
2732 <<
" Chain header: " <<
getBlockName(*LoopChain.begin()) <<
"\n"
2735 assert(!BadLoop &&
"Detected problems with the placement of this loop.");
2738 BlockWorkList.
clear();
2739 EHPadWorkList.
clear();
2742void MachineBlockPlacement::buildCFGChains() {
2750 new (ChainAllocator.
Allocate()) BlockChain(BlockToChain, BB);
2763 assert(NextFI != FE &&
"Can't fallthrough past the last block.");
2764 LLVM_DEBUG(
dbgs() <<
"Pre-merging due to unanalyzable fallthrough: "
2767 Chain->merge(NextBB,
nullptr);
2769 BlocksWithUnanalyzableExits.
insert(&*BB);
2777 PreferredLoopExit =
nullptr;
2779 buildLoopChains(*L);
2782 "BlockWorkList should be empty before building final chain.");
2784 "EHPadWorkList should be empty before building final chain.");
2788 fillWorkLists(&
MBB, UpdatedPreds);
2790 BlockChain &FunctionChain = *BlockToChain[&
F->front()];
2791 buildChain(&
F->front(), FunctionChain);
2798 bool BadFunc =
false;
2799 FunctionBlockSetType FunctionBlockSet;
2801 FunctionBlockSet.insert(&
MBB);
2804 if (!FunctionBlockSet.erase(ChainBB)) {
2806 dbgs() <<
"Function chain contains a block not in the function!\n"
2807 <<
" Bad block: " << getBlockName(ChainBB) <<
"\n";
2810 if (!FunctionBlockSet.empty()) {
2812 for (MachineBasicBlock *RemainingBB : FunctionBlockSet)
2813 dbgs() <<
"Function contains blocks never placed into a chain!\n"
2814 <<
" Bad block: " << getBlockName(RemainingBB) <<
"\n";
2816 assert(!BadFunc &&
"Detected problems with the block placement.");
2822 F->getNumBlockIDs());
2825 for (
auto &
MBB : *
F) {
2826 if (LastMBB !=
nullptr)
2830 OriginalLayoutSuccessors[
F->back().getNumber()] =
nullptr;
2837 LLVM_DEBUG(
dbgs() << (ChainBB == *FunctionChain.begin() ?
"Placing chain "
2841 F->splice(InsertPos, ChainBB);
2846 if (ChainBB == *FunctionChain.begin())
2857 if (!BlocksWithUnanalyzableExits.
count(PrevBB)) {
2863 "Unexpected block with un-analyzable fallthrough!");
2865 TBB = FBB =
nullptr;
2903 BlockWorkList.
clear();
2904 EHPadWorkList.
clear();
2907void MachineBlockPlacement::optimizeBranches() {
2908 BlockChain &FunctionChain = *BlockToChain[&
F->front()];
2922 if (!
TBB || !FBB ||
Cond.empty())
2940 auto Dl = ChainBB->findBranchDebugLoc();
2946void MachineBlockPlacement::alignBlocks() {
2953 if (
F->getFunction().hasMinSize() ||
2958 BlockChain &FunctionChain = *BlockToChain[&
F->front()];
2960 if (FunctionChain.begin() == FunctionChain.end())
2967 if (ChainBB == *FunctionChain.begin())
2979 unsigned MDAlign = 1;
2980 MDNode *LoopID =
L->getLoopID();
2983 MDNode *MD = dyn_cast<MDNode>(MDO);
2989 if (S->
getString() ==
"llvm.loop.align") {
2991 "per-loop align metadata should have two operands.");
2993 mdconst::extract<ConstantInt>(MD->
getOperand(1))->getZExtValue();
2994 assert(MDAlign >= 1 &&
"per-loop align value must be positive.");
3000 const Align LoopAlign = std::max(TLIAlign,
Align(MDAlign));
3007 if (Freq < WeightedEntryFreq)
3014 if (Freq < (LoopHeaderFreq * ColdProb))
3027 auto DetermineMaxAlignmentPadding = [&]() {
3034 ChainBB->setMaxBytesForAlignment(MaxBytes);
3040 ChainBB->setAlignment(LoopAlign);
3041 DetermineMaxAlignmentPadding();
3051 BlockFrequency LayoutEdgeFreq = MBFI->getBlockFreq(LayoutPred) * LayoutProb;
3052 if (LayoutEdgeFreq <= (Freq * ColdProb)) {
3053 ChainBB->setAlignment(LoopAlign);
3054 DetermineMaxAlignmentPadding();
3058 const bool HasMaxBytesOverride =
3064 if (HasMaxBytesOverride)
3073 for (
auto MBI = std::next(
F->begin()), MBE =
F->end(); MBI != MBE; ++MBI) {
3074 auto LayoutPred = std::prev(MBI);
3076 if (HasMaxBytesOverride)
3101bool MachineBlockPlacement::repeatedlyTailDuplicateBlock(
3105 BlockFilterSet::iterator &PrevUnplacedBlockInFilterIt) {
3106 bool Removed, DuplicatedToLPred;
3107 bool DuplicatedToOriginalLPred;
3108 Removed = maybeTailDuplicateBlock(
3109 BB, LPred, Chain, BlockFilter, PrevUnplacedBlockIt,
3110 PrevUnplacedBlockInFilterIt, DuplicatedToLPred);
3113 DuplicatedToOriginalLPred = DuplicatedToLPred;
3118 while (DuplicatedToLPred && Removed) {
3125 BlockChain::iterator ChainEnd = Chain.
end();
3126 DupBB = *(--ChainEnd);
3128 if (ChainEnd == Chain.begin())
3130 DupPred = *std::prev(ChainEnd);
3131 Removed = maybeTailDuplicateBlock(
3132 DupBB, DupPred, Chain, BlockFilter, PrevUnplacedBlockIt,
3133 PrevUnplacedBlockInFilterIt, DuplicatedToLPred);
3140 LPred = *std::prev(Chain.end());
3141 if (DuplicatedToOriginalLPred)
3142 markBlockSuccessors(Chain, LPred, LoopHeaderBB, BlockFilter);
3159bool MachineBlockPlacement::maybeTailDuplicateBlock(
3162 BlockFilterSet::iterator &PrevUnplacedBlockInFilterIt,
3163 bool &DuplicatedToLPred) {
3164 DuplicatedToLPred =
false;
3165 if (!shouldTailDuplicate(BB))
3173 bool Removed =
false;
3179 bool InWorkList =
true;
3181 if (BlockToChain.
count(RemBB)) {
3182 BlockChain *Chain = BlockToChain[RemBB];
3183 InWorkList = Chain->UnscheduledPredecessors == 0;
3184 Chain->remove(RemBB);
3185 BlockToChain.
erase(RemBB);
3189 if (&(*PrevUnplacedBlockIt) == RemBB) {
3190 PrevUnplacedBlockIt++;
3196 if (RemBB->isEHPad())
3197 RemoveList = EHPadWorkList;
3206 if (It != BlockFilter->end()) {
3207 if (It < PrevUnplacedBlockInFilterIt) {
3211 auto Distance = PrevUnplacedBlockInFilterIt - It - 1;
3212 PrevUnplacedBlockInFilterIt = BlockFilter->
erase(It) + Distance;
3213 assert(*PrevUnplacedBlockInFilterIt == PrevBB);
3215 }
else if (It == PrevUnplacedBlockInFilterIt)
3218 PrevUnplacedBlockInFilterIt = BlockFilter->erase(It);
3220 BlockFilter->erase(It);
3225 MLI->removeBlock(RemBB);
3226 if (RemBB == PreferredLoopExit)
3227 PreferredLoopExit =
nullptr;
3232 auto RemovalCallbackRef =
3239 if (
F->getFunction().hasProfileData()) {
3241 findDuplicateCandidates(CandidatePreds, BB, BlockFilter);
3242 if (CandidatePreds.
size() == 0)
3245 CandidatePtr = &CandidatePreds;
3248 &RemovalCallbackRef, CandidatePtr);
3251 DuplicatedToLPred =
false;
3254 BlockChain *PredChain = BlockToChain[Pred];
3256 DuplicatedToLPred =
true;
3257 if (Pred == LPred || (BlockFilter && !BlockFilter->count(Pred)) ||
3258 PredChain == &Chain)
3261 if (BlockFilter && !BlockFilter->count(NewSucc))
3263 BlockChain *NewChain = BlockToChain[NewSucc];
3264 if (NewChain != &Chain && NewChain != PredChain)
3265 NewChain->UnscheduledPredecessors++;
3275 if (!
MI.isPHI() && !
MI.isMetaInstruction())
3291 BlockFilterSet *BlockFilter) {
3294 if (BlockFilter && !BlockFilter->count(Pred))
3296 BlockChain *PredChain = BlockToChain[Pred];
3297 if (PredChain && (Pred != *std::prev(PredChain->end())))
3304 if (BlockFilter && !BlockFilter->count(Succ))
3306 BlockChain *SuccChain = BlockToChain[Succ];
3307 if (SuccChain && (Succ != *SuccChain->begin()))
3310 if (SuccProb > BestProb)
3311 BestProb = SuccProb;
3315 if (BBProb <= BestProb)
3322 return Gain > scaleThreshold(BB);
3327void MachineBlockPlacement::findDuplicateCandidates(
3329 BlockFilterSet *BlockFilter) {
3341 return MBFI->getBlockFreq(
A) > MBFI->getBlockFreq(
B);
3346 auto SuccIt = Succs.begin();
3347 if (SuccIt != Succs.end()) {
3398 if (!Fallthrough && isBestSuccessor(BB, Pred, BlockFilter)) {
3400 if (SuccIt != Succs.end())
3406 BlockFrequency OrigCost = PredFreq + PredFreq * DefaultBranchProb;
3408 if (SuccIt == Succs.end()) {
3410 if (Succs.size() > 0)
3411 DupCost += PredFreq;
3414 DupCost += PredFreq;
3418 assert(OrigCost >= DupCost);
3419 OrigCost -= DupCost;
3420 if (OrigCost > BBDupThreshold) {
3422 if (SuccIt != Succs.end())
3430 if ((Candidates.
size() < Preds.size()) && (Candidates.
size() > 0)) {
3431 Candidates[0] = Candidates.
back();
3437void MachineBlockPlacement::initTailDupThreshold() {
3439 if (
F->getFunction().hasProfileData()) {
3443 UseProfileCount =
true;
3457 UseProfileCount =
false;
3469 if (PassConfig->
getOptLevel() >= CodeGenOptLevel::Aggressive) {
3481 (PassConfig->
getOptLevel() < CodeGenOptLevel::Aggressive ||
3483 TailDupSize =
TII->getTailDuplicateSize(PassConfig->
getOptLevel());
3486bool MachineBlockPlacement::runOnMachineFunction(
MachineFunction &MF) {
3491 if (std::next(MF.
begin()) == MF.
end())
3495 MBPI = &getAnalysis<MachineBranchProbabilityInfoWrapperPass>().getMBPI();
3496 MBFI = std::make_unique<MBFIWrapper>(
3497 getAnalysis<MachineBlockFrequencyInfoWrapperPass>().getMBFI());
3498 MLI = &getAnalysis<MachineLoopInfoWrapperPass>().getLI();
3502 PSI = &getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI();
3503 PassConfig = &getAnalysis<TargetPassConfig>();
3507 PreferredLoopExit =
nullptr;
3510 "BlockToChain map should be empty before starting placement.");
3512 "Computed Edge map should be empty before starting placement.");
3515 initTailDupThreshold();
3517 const bool OptForSize =
3522 bool UseExtTspForPerf =
false;
3523 bool UseExtTspForSize =
false;
3532 if (allowTailDupPlacement()) {
3533 MPDT = &getAnalysis<MachinePostDominatorTreeWrapperPass>().getPostDomTree();
3536 const bool PreRegAlloc =
false;
3537 TailDup.
initMF(MF, PreRegAlloc, MBPI, MBFI.get(), PSI,
3539 if (!UseExtTspForSize)
3540 precomputeTriangleChains();
3544 if (!UseExtTspForSize)
3554 if (EnableTailMerge) {
3564 if (!UseExtTspForSize) {
3566 BlockToChain.
clear();
3567 ComputedEdges.
clear();
3577 if (UseExtTspForPerf || UseExtTspForSize) {
3579 !(UseExtTspForPerf && UseExtTspForSize) &&
3580 "UseExtTspForPerf and UseExtTspForSize can not be set simultaneously");
3581 applyExtTsp(UseExtTspForSize);
3582 createCFGChainExtTsp();
3588 BlockToChain.
clear();
3589 ComputedEdges.
clear();
3598 MBFI->view(
"MBP." + MF.
getName(),
false);
3606void MachineBlockPlacement::applyExtTsp(
bool OptForSize) {
3610 std::vector<const MachineBasicBlock *> CurrentBlockOrder;
3611 CurrentBlockOrder.reserve(
F->size());
3612 size_t NumBlocks = 0;
3614 BlockIndex[&
MBB] = NumBlocks++;
3615 CurrentBlockOrder.push_back(&
MBB);
3626 BlockCounts[BlockIndex[&
MBB]] = OptForSize ? 1 : BlockFreq.
getFrequency();
3635 size_t NumInsts = std::distance(NonDbgInsts.begin(), NonDbgInsts.end());
3636 BlockSizes[BlockIndex[&
MBB]] = 4 * NumInsts;
3652 if (FBB && FBB != FTB)
3661 JumpCounts.
push_back({BlockIndex[&
MBB], BlockIndex[Succ], Freq});
3672 LLVM_DEBUG(
dbgs() <<
"Applying ext-tsp layout for |V| = " <<
F->size()
3673 <<
" with profile = " <<
F->getFunction().hasProfileData()
3674 <<
" (" <<
F->getName() <<
")" <<
"\n");
3681 std::vector<const MachineBasicBlock *> NewBlockOrder;
3682 NewBlockOrder.reserve(
F->size());
3684 NewBlockOrder.push_back(CurrentBlockOrder[
Node]);
3686 const double OptScore =
calcExtTspScore(NewOrder, BlockSizes, JumpCounts);
3690 if (OptForSize && OrgScore > OptScore)
3691 assignBlockOrder(CurrentBlockOrder);
3693 assignBlockOrder(NewBlockOrder);
3696void MachineBlockPlacement::assignBlockOrder(
3697 const std::vector<const MachineBasicBlock *> &NewBlockOrder) {
3698 assert(
F->size() == NewBlockOrder.size() &&
"Incorrect size of block order");
3699 F->RenumberBlocks();
3706 bool HasChanges =
false;
3707 for (
size_t I = 0;
I < NewBlockOrder.size();
I++) {
3708 if (NewBlockOrder[
I] !=
F->getBlockNumbered(
I)) {
3718 for (
auto &
MBB : *
F) {
3725 NewIndex[
MBB] = NewIndex.
size();
3728 return NewIndex[&
L] < NewIndex[&
R];
3735 for (
auto &
MBB : *
F) {
3742 if (FTMBB && (NextMBB == EndIt || &*NextMBB != FTMBB)) {
3755void MachineBlockPlacement::createCFGChainExtTsp() {
3756 BlockToChain.
clear();
3757 ComputedEdges.
clear();
3761 BlockChain *FunctionChain =
3762 new (ChainAllocator.
Allocate()) BlockChain(BlockToChain, HeadBB);
3767 FunctionChain->merge(&
MBB,
nullptr);
3805char MachineBlockPlacementStats::ID = 0;
3810 "Basic Block Placement Stats",
false,
false)
3818 if (std::next(
F.begin()) ==
F.end())
3824 MBPI = &getAnalysis<MachineBranchProbabilityInfoWrapperPass>().getMBPI();
3825 MBFI = &getAnalysis<MachineBlockFrequencyInfoWrapperPass>().getMBFI();
3830 (
MBB.
succ_size() > 1) ? NumCondBranches : NumUncondBranches;
3832 (
MBB.
succ_size() > 1) ? CondBranchTakenFreq : UncondBranchTakenFreq;
for(const MachineOperand &MO :llvm::drop_begin(OldMI.operands(), Desc.getNumOperands()))
This file defines the BumpPtrAllocator interface.
static cl::opt< unsigned > TailMergeSize("tail-merge-size", cl::desc("Min number of instructions to consider tail merging"), cl::init(3), cl::Hidden)
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
Declares methods and data structures for code layout algorithms.
#define LLVM_DUMP_METHOD
Mark debug helper function definitions like dump() that should not be stripped from debug builds.
static unsigned InstrCount
This file defines the DenseMap class.
DenseMap< Block *, BlockRelaxAux > Blocks
static GCMetadataPrinterRegistry::Add< ErlangGCPrinter > X("erlang", "erlang-compatible garbage collector")
const HexagonInstrInfo * TII
static LoopDeletionResult merge(LoopDeletionResult A, LoopDeletionResult B)
block placement Basic Block Placement Stats
static BranchProbability getAdjustedProbability(BranchProbability OrigProb, BranchProbability AdjustedSumProb)
The helper function returns the branch probability that is adjusted or normalized over the new total ...
static cl::opt< bool > PreciseRotationCost("precise-rotation-cost", cl::desc("Model the cost of loop rotation more " "precisely by using profile data."), cl::init(false), cl::Hidden)
Branch Probability Basic Block Placement
static cl::opt< unsigned > ExtTspBlockPlacementMaxBlocks("ext-tsp-block-placement-max-blocks", cl::desc("Maximum number of basic blocks in a function to run ext-TSP " "block placement."), cl::init(UINT_MAX), cl::Hidden)
static cl::opt< unsigned > AlignAllBlock("align-all-blocks", cl::desc("Force the alignment of all blocks in the function in log2 format " "(e.g 4 means align on 16B boundaries)."), cl::init(0), cl::Hidden)
static BranchProbability getLayoutSuccessorProbThreshold(const MachineBasicBlock *BB)
static cl::opt< bool > ForceLoopColdBlock("force-loop-cold-block", cl::desc("Force outlining cold blocks from loops."), cl::init(false), cl::Hidden)
static cl::opt< unsigned > ExitBlockBias("block-placement-exit-block-bias", cl::desc("Block frequency percentage a loop exit block needs " "over the original exit to be considered the new exit."), cl::init(0), cl::Hidden)
static cl::opt< unsigned > AlignAllNonFallThruBlocks("align-all-nofallthru-blocks", cl::desc("Force the alignment of all blocks that have no fall-through " "predecessors (i.e. don't add nops that are executed). In log2 " "format (e.g 4 means align on 16B boundaries)."), cl::init(0), cl::Hidden)
static cl::opt< unsigned > TailDupPlacementThreshold("tail-dup-placement-threshold", cl::desc("Instruction cutoff for tail duplication during layout. " "Tail merging during layout is forced to have a threshold " "that won't conflict."), cl::init(2), cl::Hidden)
static cl::opt< unsigned > JumpInstCost("jump-inst-cost", cl::desc("Cost of jump instructions."), cl::init(1), cl::Hidden)
static cl::opt< unsigned > TailDupPlacementPenalty("tail-dup-placement-penalty", cl::desc("Cost penalty for blocks that can avoid breaking CFG by copying. " "Copying can increase fallthrough, but it also increases icache " "pressure. This parameter controls the penalty to account for that. " "Percent as integer."), cl::init(2), cl::Hidden)
static bool greaterWithBias(BlockFrequency A, BlockFrequency B, BlockFrequency EntryFreq)
Compare 2 BlockFrequency's with a small penalty for A.
static cl::opt< unsigned > MisfetchCost("misfetch-cost", cl::desc("Cost that models the probabilistic risk of an instruction " "misfetch due to a jump comparing to falling through, whose cost " "is zero."), cl::init(1), cl::Hidden)
static cl::opt< unsigned > MaxBytesForAlignmentOverride("max-bytes-for-alignment", cl::desc("Forces the maximum bytes allowed to be emitted when padding for " "alignment"), cl::init(0), cl::Hidden)
static cl::opt< bool > BranchFoldPlacement("branch-fold-placement", cl::desc("Perform branch folding during placement. " "Reduces code size."), cl::init(true), cl::Hidden)
static cl::opt< unsigned > TailDupProfilePercentThreshold("tail-dup-profile-percent-threshold", cl::desc("If profile count information is used in tail duplication cost " "model, the gained fall through number from tail duplication " "should be at least this percent of hot count."), cl::init(50), cl::Hidden)
static cl::opt< unsigned > TriangleChainCount("triangle-chain-count", cl::desc("Number of triangle-shaped-CFG's that need to be in a row for the " "triangle tail duplication heuristic to kick in. 0 to disable."), cl::init(2), cl::Hidden)
Branch Probability Basic Block static false std::string getBlockName(const MachineBasicBlock *BB)
Helper to print the name of a MBB.
static cl::opt< bool > ApplyExtTspForSize("apply-ext-tsp-for-size", cl::init(false), cl::Hidden, cl::desc("Use ext-tsp for size-aware block placement."))
static bool hasSameSuccessors(MachineBasicBlock &BB, SmallPtrSetImpl< const MachineBasicBlock * > &Successors)
Check if BB has exactly the successors in Successors.
static cl::opt< bool > TailDupPlacement("tail-dup-placement", cl::desc("Perform tail duplication during placement. " "Creates more fallthrough opportunites in " "outline branches."), cl::init(true), cl::Hidden)
static uint64_t countMBBInstruction(MachineBasicBlock *MBB)
static cl::opt< unsigned > LoopToColdBlockRatio("loop-to-cold-block-ratio", cl::desc("Outline loop blocks from loop chain if (frequency of loop) / " "(frequency of block) is greater than this ratio"), cl::init(5), cl::Hidden)
static cl::opt< bool > RenumberBlocksBeforeView("renumber-blocks-before-view", cl::desc("If true, basic blocks are re-numbered before MBP layout is printed " "into a dot graph. Only used when a function is being printed."), cl::init(false), cl::Hidden)
static cl::opt< unsigned > TailDupPlacementAggressiveThreshold("tail-dup-placement-aggressive-threshold", cl::desc("Instruction cutoff for aggressive tail duplication during " "layout. Used at -O3. Tail merging during layout is forced to " "have a threshold that won't conflict."), cl::init(4), cl::Hidden)
static cl::opt< bool > ForcePreciseRotationCost("force-precise-rotation-cost", cl::desc("Force the use of precise cost " "loop rotation strategy."), cl::init(false), cl::Hidden)
static GCMetadataPrinterRegistry::Add< OcamlGCMetadataPrinter > Y("ocaml", "ocaml 3.10-compatible collector")
#define INITIALIZE_PASS_DEPENDENCY(depName)
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
const SmallVectorImpl< MachineOperand > MachineBasicBlock * TBB
const SmallVectorImpl< MachineOperand > & Cond
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
This file implements a set that has insertion order iteration characteristics.
This file defines the SmallPtrSet class.
This file defines the SmallVector class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
This file describes how to lower LLVM code to machine code.
Target-Independent Code Generator Pass Configuration Options pass.
unify loop Fixup each natural loop to have a single exit block
Represent the analysis usage information of a pass.
AnalysisUsage & addRequired()
void setPreservesAll()
Set by analyses that do not transform their input at all.
static BlockFrequency max()
Returns the maximum possible frequency, the saturation value.
uint64_t getFrequency() const
Returns the frequency as a fixpoint number scaled by the entry frequency.
static BranchProbability getOne()
uint32_t getNumerator() const
BranchProbability getCompl() const
static BranchProbability getZero()
ValueT lookup(const_arg_type_t< KeyT > Val) const
lookup - Return the entry for the specified key, or a default constructed value if no such entry exis...
iterator find(const_arg_type_t< KeyT > Val)
std::pair< iterator, bool > try_emplace(KeyT &&Key, Ts &&...Args)
bool erase(const KeyT &Val)
size_type count(const_arg_type_t< KeyT > Val) const
Return 1 if the specified key is in the map, 0 otherwise.
bool contains(const_arg_type_t< KeyT > Val) const
Return true if the specified key is in the map, false otherwise.
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
void reserve(size_type NumEntries)
Grow the densemap so that it can contain at least NumEntries items before resizing again.
bool dominates(const DomTreeNodeBase< NodeT > *A, const DomTreeNodeBase< NodeT > *B) const
dominates - Returns true iff A dominates B.
void recalculate(ParentType &Func)
recalculate - compute a dominator tree for the given function
bool hasProfileData(bool IncludeSynthetic=false) const
Return true if the function is annotated with profile data.
unsigned removeBranch(MachineBasicBlock &MBB, int *BytesRemoved=nullptr) const override
Remove the branching code at the end of the specific MBB.
bool analyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB, MachineBasicBlock *&FBB, SmallVectorImpl< MachineOperand > &Cond, bool AllowModify) const override
Analyze the branching code at the end of MBB, returning true if it cannot be understood (e....
bool reverseBranchCondition(SmallVectorImpl< MachineOperand > &Cond) const override
Reverses the branch condition of the specified condition list, returning false on success and true if...
unsigned insertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB, MachineBasicBlock *FBB, ArrayRef< MachineOperand > Cond, const DebugLoc &DL, int *BytesAdded=nullptr) const override
Insert branch code into the end of the specified MachineBasicBlock.
LoopT * getLoopFor(const BlockT *BB) const
Return the inner most loop that BB lives in.
const MDOperand & getOperand(unsigned I) const
ArrayRef< MDOperand > operands() const
unsigned getNumOperands() const
Return number of MDNode operands.
Tracking metadata reference owned by Metadata.
StringRef getString() const
unsigned pred_size() const
bool isEHPad() const
Returns true if the block is a landing pad.
instr_iterator instr_begin()
MachineBasicBlock * getFallThrough(bool JumpToFallThrough=true)
Return the fallthrough block if the block can implicitly transfer control to the block after it by fa...
int getNumber() const
MachineBasicBlocks are uniquely numbered at the function level, unless they're not in a MachineFuncti...
void updateTerminator(MachineBasicBlock *PreviousLayoutSuccessor)
Update the terminator instructions in block to account for changes to block layout which may have bee...
bool canFallThrough()
Return true if the block can implicitly transfer control to the block after it by falling off the end...
succ_iterator succ_begin()
unsigned succ_size() const
void setAlignment(Align A)
Set alignment of the basic block.
bool isEntryBlock() const
Returns true if this is the entry block of the function.
pred_iterator pred_begin()
succ_reverse_iterator succ_rbegin()
bool isLayoutSuccessor(const MachineBasicBlock *MBB) const
Return true if the specified MBB will be emitted immediately after this block, such that if this bloc...
instr_iterator instr_end()
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
instr_iterator erase(instr_iterator I)
Remove an instruction from the instruction list and delete it.
DebugLoc findBranchDebugLoc()
Find and return the merged DebugLoc of the branch instructions of the block.
iterator_range< succ_iterator > successors()
bool isSuccessor(const MachineBasicBlock *MBB) const
Return true if the specified MBB is a successor of this block.
iterator_range< pred_iterator > predecessors()
StringRef getName() const
Return the name of the corresponding LLVM basic block, or an empty string.
MachineBlockFrequencyInfo pass uses BlockFrequencyInfoImpl implementation to estimate machine basic b...
BlockFrequency getBlockFreq(const MachineBasicBlock *MBB) const
getblockFreq - Return block frequency.
BranchProbability getEdgeProbability(const MachineBasicBlock *Src, const MachineBasicBlock *Dst) const
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
virtual bool runOnMachineFunction(MachineFunction &MF)=0
runOnMachineFunction - This method must be overloaded to perform the desired machine code transformat...
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
StringRef getName() const
getName - Return the name of the corresponding LLVM function.
Function & getFunction()
Return the LLVM function that this machine code represents.
void RenumberBlocks(MachineBasicBlock *MBBFrom=nullptr)
RenumberBlocks - This discards all of the MachineBasicBlock numbers and recomputes them.
const TargetMachine & getTarget() const
getTarget - Return the target machine this machine code is compiled with
Representation of each machine instruction.
MachinePostDominatorTree - an analysis pass wrapper for DominatorTree used to compute the post-domina...
MutableArrayRef - Represent a mutable reference to an array (0 or more elements consecutively in memo...
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
An analysis pass based on legacy pass manager to deliver ProfileSummaryInfo.
Analysis providing profile information.
uint64_t getOrCompHotCountThreshold() const
Returns HotCountThreshold if set.
typename vector_type::const_iterator iterator
A templated base class for SmallPtrSet which provides the typesafe interface that is common across al...
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
A SetVector that performs no allocations if smaller than a certain size.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
reference emplace_back(ArgTypes &&... Args)
typename SuperClass::const_iterator const_iterator
typename SuperClass::iterator iterator
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
A BumpPtrAllocator that allows only elements of a specific type to be allocated.
T * Allocate(size_t num=1)
Allocate space for an array of objects without constructing them.
void DestroyAll()
Call the destructor of each allocated object and deallocate all but the current slab and reset the cu...
Utility class to perform tail duplication.
void initMF(MachineFunction &MF, bool PreRegAlloc, const MachineBranchProbabilityInfo *MBPI, MBFIWrapper *MBFI, ProfileSummaryInfo *PSI, bool LayoutMode, unsigned TailDupSize=0)
Prepare to run on a specific machine function.
bool tailDuplicateAndUpdate(bool IsSimple, MachineBasicBlock *MBB, MachineBasicBlock *ForcedLayoutPred, SmallVectorImpl< MachineBasicBlock * > *DuplicatedPreds=nullptr, function_ref< void(MachineBasicBlock *)> *RemovalCallback=nullptr, SmallVectorImpl< MachineBasicBlock * > *CandidatePtr=nullptr)
Tail duplicate a single basic block into its predecessors, and then clean up.
static bool isSimpleBB(MachineBasicBlock *TailBB)
True if this BB has only one unconditional jump.
bool canTailDuplicate(MachineBasicBlock *TailBB, MachineBasicBlock *PredBB)
Returns true if TailBB can successfully be duplicated into PredBB.
bool shouldTailDuplicate(bool IsSimple, MachineBasicBlock &TailBB)
Determine if it is profitable to duplicate this block.
TargetInstrInfo - Interface to description of machine instruction set.
This base class for TargetLowering contains the SelectionDAG-independent parts that can be used from ...
virtual unsigned getMaxPermittedBytesForAlignment(MachineBasicBlock *MBB) const
Return the maximum amount of bytes allowed to be emitted when padding for alignment.
virtual Align getPrefLoopAlignment(MachineLoop *ML=nullptr) const
Return the preferred loop alignment.
virtual bool alignLoopsWithOptSize() const
Should loops be aligned even when the function is marked OptSize (but not MinSize).
bool requiresStructuredCFG() const
Target-Independent Code Generator Pass Configuration Options.
bool getEnableTailMerge() const
CodeGenOptLevel getOptLevel() const
virtual const TargetRegisterInfo * getRegisterInfo() const
getRegisterInfo - If register information is available, return it.
virtual const TargetInstrInfo * getInstrInfo() const
virtual const TargetLowering * getTargetLowering() const
An efficient, type-erasing, non-owning reference to a callable.
self_iterator getIterator()
A raw_ostream that writes to an std::string.
@ C
The default llvm calling convention, compatible with C.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
initializer< Ty > init(const Ty &Val)
double calcExtTspScore(ArrayRef< uint64_t > Order, ArrayRef< uint64_t > NodeSizes, ArrayRef< EdgeCount > EdgeCounts)
Estimate the "quality" of a given node order in CFG.
std::vector< uint64_t > computeExtTspLayout(ArrayRef< uint64_t > NodeSizes, ArrayRef< uint64_t > NodeCounts, ArrayRef< EdgeCount > EdgeCounts)
Find a layout of nodes (basic blocks) of a given CFG optimizing jump locality and thus processor I-ca...
void append(SmallVectorImpl< char > &path, const Twine &a, const Twine &b="", const Twine &c="", const Twine &d="")
Append to path.
This is an optimization pass for GlobalISel generic memory operations.
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
void dump(const SparseBitVector< ElementSize > &LHS, raw_ostream &out)
void stable_sort(R &&Range)
auto find(R &&Range, const T &Val)
Provide wrappers to std::find which take ranges instead of having to pass begin/end explicitly.
cl::opt< bool > ApplyExtTspWithoutProfile
bool shouldOptimizeForSize(const MachineFunction *MF, ProfileSummaryInfo *PSI, const MachineBlockFrequencyInfo *BFI, PGSOQueryType QueryType=PGSOQueryType::Other)
Returns true if machine function MF is suggested to be size-optimized based on the profile.
void initializeMachineBlockPlacementStatsPass(PassRegistry &)
cl::opt< unsigned > ProfileLikelyProb
void erase(Container &C, ValueType V)
Wrapper function to remove a value from a container:
cl::opt< std::string > ViewBlockFreqFuncName("view-bfi-func-name", cl::Hidden, cl::desc("The option to specify " "the name of the function " "whose CFG will be displayed."))
auto reverse(ContainerTy &&C)
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
cl::opt< GVDAGType > ViewBlockLayoutWithBFI("view-block-layout-with-bfi", cl::Hidden, cl::desc("Pop up a window to show a dag displaying MBP layout and associated " "block frequencies of the CFG."), cl::values(clEnumValN(GVDT_None, "none", "do not display graphs."), clEnumValN(GVDT_Fraction, "fraction", "display a graph using the " "fractional block frequency representation."), clEnumValN(GVDT_Integer, "integer", "display a graph using the raw " "integer fractional block frequency representation."), clEnumValN(GVDT_Count, "count", "display a graph using the real " "profile count if available.")))
bool isFunctionInPrintList(StringRef FunctionName)
auto instructionsWithoutDebug(IterT It, IterT End, bool SkipPseudoOp=true)
Construct a range iterator which begins at It and moves forwards until End is reached,...
format_object< Ts... > format(const char *Fmt, const Ts &... Vals)
These are helper functions used to produce formatted output.
cl::opt< unsigned > StaticLikelyProb
auto count(R &&Range, const E &Element)
Wrapper function around std::count to count the number of times an element Element occurs in the give...
void erase_if(Container &C, UnaryPredicate P)
Provide a container algorithm similar to C++ Library Fundamentals v2's erase_if which is equivalent t...
Printable printBlockFreq(const BlockFrequencyInfo &BFI, BlockFrequency Freq)
Print the block frequency Freq relative to the current functions entry frequency.
char & MachineBlockPlacementID
MachineBlockPlacement - This pass places basic blocks based on branch probabilities.
cl::opt< bool > EnableExtTspBlockPlacement
char & MachineBlockPlacementStatsID
MachineBlockPlacementStats - This pass collects statistics about the basic block placement using bran...
void initializeMachineBlockPlacementPass(PassRegistry &)
Printable printMBBReference(const MachineBasicBlock &MBB)
Prints a machine basic block reference.
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
This struct is a compact representation of a valid (non-zero power of two) alignment.