78#define DEBUG_TYPE "block-placement"
80STATISTIC(NumCondBranches,
"Number of conditional branches");
81STATISTIC(NumUncondBranches,
"Number of unconditional branches");
83 "Potential frequency of taking conditional branches");
85 "Potential frequency of taking unconditional branches");
89 cl::desc(
"Force the alignment of all blocks in the function in log2 format "
90 "(e.g 4 means align on 16B boundaries)."),
94 "align-all-nofallthru-blocks",
95 cl::desc(
"Force the alignment of all blocks that have no fall-through "
96 "predecessors (i.e. don't add nops that are executed). In log2 "
97 "format (e.g 4 means align on 16B boundaries)."),
101 "max-bytes-for-alignment",
102 cl::desc(
"Forces the maximum bytes allowed to be emitted when padding for "
108 "block-placement-exit-block-bias",
109 cl::desc(
"Block frequency percentage a loop exit block needs "
110 "over the original exit to be considered the new exit."),
117 "loop-to-cold-block-ratio",
118 cl::desc(
"Outline loop blocks from loop chain if (frequency of loop) / "
119 "(frequency of block) is greater than this ratio"),
123 "force-loop-cold-block",
124 cl::desc(
"Force outlining cold blocks from loops."),
129 cl::desc(
"Model the cost of loop rotation more "
130 "precisely by using profile data."),
135 cl::desc(
"Force the use of precise cost "
136 "loop rotation strategy."),
141 cl::desc(
"Cost that models the probabilistic risk of an instruction "
142 "misfetch due to a jump comparing to falling through, whose cost "
147 cl::desc(
"Cost of jump instructions."),
151 cl::desc(
"Perform tail duplication during placement. "
152 "Creates more fallthrough opportunites in "
153 "outline branches."),
158 cl::desc(
"Perform branch folding during placement. "
159 "Reduces code size."),
164 "tail-dup-placement-threshold",
165 cl::desc(
"Instruction cutoff for tail duplication during layout. "
166 "Tail merging during layout is forced to have a threshold "
167 "that won't conflict."),
cl::init(2),
172 "tail-dup-placement-aggressive-threshold",
173 cl::desc(
"Instruction cutoff for aggressive tail duplication during "
174 "layout. Used at -O3. Tail merging during layout is forced to "
175 "have a threshold that won't conflict."),
cl::init(4),
180 "tail-dup-placement-penalty",
181 cl::desc(
"Cost penalty for blocks that can avoid breaking CFG by copying. "
182 "Copying can increase fallthrough, but it also increases icache "
183 "pressure. This parameter controls the penalty to account for that. "
184 "Percent as integer."),
190 "tail-dup-profile-percent-threshold",
191 cl::desc(
"If profile count information is used in tail duplication cost "
192 "model, the gained fall through number from tail duplication "
193 "should be at least this percent of hot count."),
198 "triangle-chain-count",
199 cl::desc(
"Number of triangle-shaped-CFG's that need to be in a row for the "
200 "triangle tail duplication heuristic to kick in. 0 to disable."),
210 "renumber-blocks-before-view",
212 "If true, basic blocks are re-numbered before MBP layout is printed "
213 "into a dot graph. Only used when a function is being printed."),
217 "ext-tsp-block-placement-max-blocks",
218 cl::desc(
"Maximum number of basic blocks in a function to run ext-TSP "
269 BlockToChainMapType &BlockToChain;
278 :
Blocks(1, BB), BlockToChain(BlockToChain) {
279 assert(BB &&
"Cannot create a chain with a null basic block");
280 BlockToChain[BB] =
this;
288 iterator begin() {
return Blocks.begin(); }
292 iterator end() {
return Blocks.end(); }
296 for(iterator i = begin(); i != end(); ++i) {
312 assert(BB &&
"Can't merge a null block.");
313 assert(!
Blocks.empty() &&
"Can't merge into an empty chain.");
317 assert(!BlockToChain[BB] &&
318 "Passed chain is null, but BB has entry in BlockToChain.");
320 BlockToChain[BB] =
this;
324 assert(BB == *Chain->
begin() &&
"Passed BB is not head of Chain.");
325 assert(Chain->begin() != Chain->end());
330 Blocks.push_back(ChainBB);
331 assert(BlockToChain[ChainBB] == Chain &&
"Incoming blocks not in chain.");
332 BlockToChain[ChainBB] =
this;
353 unsigned UnscheduledPredecessors = 0;
361 struct BlockAndTailDupResult {
367 struct WeightedEdge {
387 std::unique_ptr<MBFIWrapper> MBFI;
420 bool UseProfileCount =
false;
450 if (UseProfileCount) {
451 auto Count = MBFI->getBlockProfileCount(BB);
457 return MBFI->getBlockFreq(BB);
462 void initDupThreshold();
466 void markChainSuccessors(
468 const BlockFilterSet *BlockFilter =
nullptr);
472 void markBlockSuccessors(
475 const BlockFilterSet *BlockFilter =
nullptr);
478 collectViableSuccessors(
480 const BlockFilterSet *BlockFilter,
483 BlockFilterSet *BlockFilter);
486 BlockFilterSet *BlockFilter);
487 bool repeatedlyTailDuplicateBlock(
490 BlockFilterSet *BlockFilter,
495 BlockChain &Chain, BlockFilterSet *BlockFilter,
498 bool &DuplicatedToLPred);
499 bool hasBetterLayoutPredecessor(
503 const BlockFilterSet *BlockFilter);
504 BlockAndTailDupResult selectBestSuccessor(
506 const BlockFilterSet *BlockFilter);
510 getFirstUnplacedBlock(
const BlockChain &PlacedChain,
513 getFirstUnplacedBlock(
const BlockChain &PlacedChain,
515 const BlockFilterSet *BlockFilter);
524 const BlockFilterSet *BlockFilter);
527 BlockFilterSet *BlockFilter =
nullptr);
531 const BlockFilterSet &LoopBlockSet);
533 const BlockFilterSet &LoopBlockSet);
537 const BlockFilterSet &LoopBlockSet);
539 const MachineLoop &L,
const BlockFilterSet &LoopBlockSet);
541 const MachineLoop &L,
const BlockFilterSet &LoopBlockSet);
543 const MachineLoop &L,
const BlockFilterSet &LoopBlockSet,
545 BlockFilterSet collectLoopBlockSet(
const MachineLoop &L);
550 void rotateLoopWithProfile(
552 const BlockFilterSet &LoopBlockSet);
553 void buildCFGChains();
554 void optimizeBranches();
561 bool isProfitableToTailDup(
564 const BlockChain &Chain,
const BlockFilterSet *BlockFilter);
569 const BlockChain &Chain,
const BlockFilterSet *BlockFilter);
572 BlockAndTailDupResult getBestTrellisSuccessor(
576 const BlockFilterSet *BlockFilter);
579 static std::pair<WeightedEdge, WeightedEdge> getBestNonConflictingEdges(
585 bool canTailDuplicateUnplacedPreds(
587 const BlockChain &Chain,
const BlockFilterSet *BlockFilter);
591 void precomputeTriangleChains();
597 void assignBlockOrder(
const std::vector<const MachineBasicBlock *> &NewOrder);
600 void createCFGChainExtTsp();
611 bool allowTailDupPlacement()
const {
630char MachineBlockPlacement::ID = 0;
635 "Branch Probability Basic Block Placement",
false,
false)
664void MachineBlockPlacement::markChainSuccessors(
666 const BlockFilterSet *BlockFilter) {
670 markBlockSuccessors(Chain,
MBB, LoopHeaderBB, BlockFilter);
680void MachineBlockPlacement::markBlockSuccessors(
688 if (BlockFilter && !BlockFilter->count(Succ))
690 BlockChain &SuccChain = *BlockToChain[Succ];
692 if (&Chain == &SuccChain || Succ == LoopHeaderBB)
697 if (SuccChain.UnscheduledPredecessors == 0 ||
698 --SuccChain.UnscheduledPredecessors > 0)
701 auto *NewBB = *SuccChain.
begin();
702 if (NewBB->isEHPad())
715 const BlockFilterSet *BlockFilter,
735 bool SkipSucc =
false;
736 if (Succ->isEHPad() || (BlockFilter && !BlockFilter->count(Succ))) {
739 BlockChain *SuccChain = BlockToChain[Succ];
740 if (SuccChain == &Chain) {
742 }
else if (Succ != *SuccChain->begin()) {
744 <<
" -> Mid chain!\n");
754 return AdjustedSumProb;
765 if (SuccProbN >= SuccProbD)
780 if (Successors.
count(&BB))
783 if (!Successors.
count(Succ))
812 return (Gain / ThresholdProb) >= EntryFreq;
820bool MachineBlockPlacement::isProfitableToTailDup(
823 const BlockChain &Chain,
const BlockFilterSet *BlockFilter) {
850 auto AdjustedSuccSumProb =
851 collectViableSuccessors(Succ, Chain, BlockFilter, SuccSuccs);
853 auto BBFreq = MBFI->getBlockFreq(BB);
854 auto SuccFreq = MBFI->getBlockFreq(Succ);
860 if (SuccSuccs.
size() == 0)
867 if (Prob > BestSuccSucc)
879 if (SuccPred == Succ || SuccPred == BB
880 || BlockToChain[SuccPred] == &Chain
881 || (BlockFilter && !BlockFilter->count(SuccPred)))
883 auto Freq = MBFI->getBlockFreq(SuccPred)
885 if (Freq > SuccBestPred)
953 if (UProb > AdjustedSuccSumProb / 2 &&
954 !hasBetterLayoutPredecessor(Succ, PDom, *BlockToChain[PDom], UProb, UProb,
958 (
P + V), (Qout + std::max(Qin,
F) * VProb + std::min(Qin,
F) * UProb),
962 (Qout + std::min(Qin,
F) * AdjustedSuccSumProb +
963 std::max(Qin,
F) * UProb),
974bool MachineBlockPlacement::isTrellis(
977 const BlockChain &Chain,
const BlockFilterSet *BlockFilter) {
992 if (Successors.count(SuccPred)) {
995 if (!Successors.count(CheckSucc))
999 const BlockChain *PredChain = BlockToChain[SuccPred];
1000 if (SuccPred == BB || (BlockFilter && !BlockFilter->count(SuccPred)) ||
1001 PredChain == &Chain || PredChain == BlockToChain[Succ])
1005 if (!SeenPreds.
insert(SuccPred).second)
1023std::pair<MachineBlockPlacement::WeightedEdge,
1024 MachineBlockPlacement::WeightedEdge>
1025MachineBlockPlacement::getBestNonConflictingEdges(
1036 auto Cmp = [](WeightedEdge
A, WeightedEdge
B) {
return A.Weight >
B.Weight; };
1040 auto BestA = Edges[0].begin();
1041 auto BestB = Edges[1].begin();
1044 if (BestA->Src == BestB->Src) {
1046 auto SecondBestA = std::next(BestA);
1047 auto SecondBestB = std::next(BestB);
1050 if (BestAScore < BestBScore)
1051 BestA = SecondBestA;
1053 BestB = SecondBestB;
1056 if (BestB->Src == BB)
1058 return std::make_pair(*BestA, *BestB);
1068MachineBlockPlacement::BlockAndTailDupResult
1069MachineBlockPlacement::getBestTrellisSuccessor(
1073 const BlockFilterSet *BlockFilter) {
1075 BlockAndTailDupResult
Result = {
nullptr,
false};
1082 if (Successors.
size() != 2 || ViableSuccs.
size() != 2)
1088 for (
auto *Succ : ViableSuccs) {
1092 if ((BlockFilter && !BlockFilter->count(SuccPred)) ||
1093 BlockToChain[SuccPred] == &Chain ||
1094 BlockToChain[SuccPred] == BlockToChain[Succ])
1098 Edges[SuccIndex].
push_back({EdgeFreq, SuccPred, Succ});
1104 WeightedEdge BestA, BestB;
1105 std::tie(BestA, BestB) = getBestNonConflictingEdges(BB, Edges);
1107 if (BestA.Src != BB) {
1111 LLVM_DEBUG(
dbgs() <<
"Trellis, but not one of the chosen edges.\n");
1118 if (BestA.Dest == BestB.Src) {
1124 if (allowTailDupPlacement() && shouldTailDuplicate(Succ2) &&
1125 canTailDuplicateUnplacedPreds(BB, Succ2, Chain, BlockFilter) &&
1127 Chain, BlockFilter)) {
1131 <<
", probability: " << Succ2Prob
1132 <<
" (Tail Duplicate)\n");
1134 Result.ShouldTailDup =
true;
1140 ComputedEdges[BestB.Src] = { BestB.Dest,
false };
1142 auto TrellisSucc = BestA.Dest;
1146 <<
", probability: " << SuccProb <<
" (Trellis)\n");
1154bool MachineBlockPlacement::canTailDuplicateUnplacedPreds(
1156 const BlockChain &Chain,
const BlockFilterSet *BlockFilter) {
1157 if (!shouldTailDuplicate(Succ))
1161 bool Duplicate =
true;
1163 unsigned int NumDup = 0;
1172 if (Pred == BB || (BlockFilter && !BlockFilter->count(Pred))
1173 || (BlockToChain[Pred] == &Chain && !Succ->
succ_empty()))
1223 if (
F->getFunction().hasProfileData())
1254 if ((NumDup > Succ->
succ_size()) || !Duplicate)
1277void MachineBlockPlacement::precomputeTriangleChains() {
1278 struct TriangleChain {
1279 std::vector<MachineBasicBlock *> Edges;
1282 : Edges({src, dst}) {}
1285 assert(getKey()->isSuccessor(dst) &&
1286 "Attempting to append a block that is not a successor.");
1287 Edges.push_back(dst);
1290 unsigned count()
const {
return Edges.size() - 1; }
1293 return Edges.
back();
1317 if (PDom ==
nullptr)
1324 if (!shouldTailDuplicate(PDom))
1326 bool CanTailDuplicate =
true;
1333 CanTailDuplicate =
false;
1339 if (!CanTailDuplicate)
1346 auto Found = TriangleChainMap.
find(&BB);
1349 if (Found != TriangleChainMap.
end()) {
1350 TriangleChain Chain = std::move(Found->second);
1351 TriangleChainMap.
erase(Found);
1353 TriangleChainMap.
insert(std::make_pair(Chain.getKey(), std::move(Chain)));
1355 auto InsertResult = TriangleChainMap.
try_emplace(PDom, &BB, PDom);
1356 assert(InsertResult.second &&
"Block seen twice.");
1364 for (
auto &ChainPair : TriangleChainMap) {
1365 TriangleChain &Chain = ChainPair.second;
1372 Chain.Edges.pop_back();
1376 <<
" as pre-computed based on triangles.\n");
1378 auto InsertResult = ComputedEdges.
insert({src, {dst,
true}});
1379 assert(InsertResult.second &&
"Block seen twice.");
1421bool MachineBlockPlacement::hasBetterLayoutPredecessor(
1425 const BlockFilterSet *BlockFilter) {
1428 if (SuccChain.UnscheduledPredecessors == 0)
1548 BlockFrequency CandidateEdgeFreq = MBFI->getBlockFreq(BB) * RealSuccProb;
1549 bool BadCFGConflict =
false;
1552 BlockChain *PredChain = BlockToChain[Pred];
1553 if (Pred == Succ || PredChain == &SuccChain ||
1554 (BlockFilter && !BlockFilter->count(Pred)) ||
1555 PredChain == &Chain || Pred != *std::prev(PredChain->end()) ||
1576 if (PredEdgeFreq * HotProb >= CandidateEdgeFreq * HotProb.
getCompl()) {
1577 BadCFGConflict =
true;
1582 if (BadCFGConflict) {
1584 << SuccProb <<
" (prob) (non-cold CFG conflict)\n");
1601MachineBlockPlacement::BlockAndTailDupResult
1602MachineBlockPlacement::selectBestSuccessor(
1604 const BlockFilterSet *BlockFilter) {
1607 BlockAndTailDupResult BestSucc = {
nullptr,
false };
1611 auto AdjustedSumProb =
1612 collectViableSuccessors(BB, Chain, BlockFilter, Successors);
1619 auto FoundEdge = ComputedEdges.
find(BB);
1620 if (FoundEdge != ComputedEdges.
end()) {
1622 ComputedEdges.
erase(FoundEdge);
1623 BlockChain *SuccChain = BlockToChain[Succ];
1624 if (BB->
isSuccessor(Succ) && (!BlockFilter || BlockFilter->count(Succ)) &&
1625 SuccChain != &Chain && Succ == *SuccChain->begin())
1626 return FoundEdge->second;
1631 if (isTrellis(BB, Successors, Chain, BlockFilter))
1632 return getBestTrellisSuccessor(BB, Successors, AdjustedSumProb, Chain,
1645 BlockChain &SuccChain = *BlockToChain[Succ];
1648 if (hasBetterLayoutPredecessor(BB, Succ, SuccChain, SuccProb, RealSuccProb,
1649 Chain, BlockFilter)) {
1651 if (allowTailDupPlacement() && shouldTailDuplicate(Succ))
1658 <<
", probability: " << SuccProb
1659 << (SuccChain.UnscheduledPredecessors != 0 ?
" (CFG break)" :
"")
1662 if (BestSucc.BB && BestProb >= SuccProb) {
1669 BestProb = SuccProb;
1677 [](std::tuple<BranchProbability, MachineBasicBlock *> L,
1678 std::tuple<BranchProbability, MachineBasicBlock *> R) {
1679 return std::get<0>(L) > std::get<0>(R);
1681 for (
auto &Tup : DupCandidates) {
1684 std::tie(DupProb, Succ) = Tup;
1685 if (DupProb < BestProb)
1687 if (canTailDuplicateUnplacedPreds(BB, Succ, Chain, BlockFilter)
1688 && (isProfitableToTailDup(BB, Succ, BestProb, Chain, BlockFilter))) {
1690 <<
", probability: " << DupProb
1691 <<
" (Tail Duplicate)\n");
1693 BestSucc.ShouldTailDup =
true;
1721 return BlockToChain.
lookup(BB) == &Chain;
1724 if (WorkList.
empty())
1727 bool IsEHPad = WorkList[0]->isEHPad();
1733 "EHPad mismatch between block and work list.");
1735 BlockChain &SuccChain = *BlockToChain[
MBB];
1736 if (&SuccChain == &Chain)
1739 assert(SuccChain.UnscheduledPredecessors == 0 &&
1740 "Found CFG-violating block");
1765 if (BestBlock && (IsEHPad ^ (BestFreq >= CandidateFreq)))
1769 BestFreq = CandidateFreq;
1783 const BlockChain &PlacedChain,
1788 if (BlockToChain[&*
I] != &PlacedChain) {
1789 PrevUnplacedBlockIt =
I;
1793 return *BlockToChain[&*
I]->
begin();
1810 const BlockChain &PlacedChain,
1811 BlockFilterSet::iterator &PrevUnplacedBlockInFilterIt,
1812 const BlockFilterSet *BlockFilter) {
1814 for (; PrevUnplacedBlockInFilterIt != BlockFilter->end();
1815 ++PrevUnplacedBlockInFilterIt) {
1816 BlockChain *
C = BlockToChain[*PrevUnplacedBlockInFilterIt];
1817 if (
C != &PlacedChain) {
1824void MachineBlockPlacement::fillWorkLists(
1827 const BlockFilterSet *BlockFilter =
nullptr) {
1828 BlockChain &Chain = *BlockToChain[
MBB];
1829 if (!UpdatedPreds.
insert(&Chain).second)
1833 Chain.UnscheduledPredecessors == 0 &&
1834 "Attempting to place block with unscheduled predecessors in worklist.");
1836 assert(BlockToChain[ChainBB] == &Chain &&
1837 "Block in chain doesn't match BlockToChain map.");
1839 if (BlockFilter && !BlockFilter->count(Pred))
1841 if (BlockToChain[Pred] == &Chain)
1843 ++Chain.UnscheduledPredecessors;
1847 if (Chain.UnscheduledPredecessors != 0)
1857void MachineBlockPlacement::buildChain(
1859 BlockFilterSet *BlockFilter) {
1860 assert(HeadBB &&
"BB must not be null.\n");
1861 assert(BlockToChain[HeadBB] == &Chain &&
"BlockToChainMap mis-match.\n");
1863 BlockFilterSet::iterator PrevUnplacedBlockInFilterIt;
1865 PrevUnplacedBlockInFilterIt = BlockFilter->begin();
1868 markChainSuccessors(Chain, LoopHeaderBB, BlockFilter);
1871 assert(BB &&
"null block found at end of chain in loop.");
1872 assert(BlockToChain[BB] == &Chain &&
"BlockToChainMap mis-match in loop.");
1873 assert(*std::prev(Chain.end()) == BB &&
"BB Not found at end of chain.");
1878 auto Result = selectBestSuccessor(BB, Chain, BlockFilter);
1880 bool ShouldTailDup =
Result.ShouldTailDup;
1881 if (allowTailDupPlacement())
1882 ShouldTailDup |= (BestSucc && canTailDuplicateUnplacedPreds(BB, BestSucc,
1890 BestSucc = selectBestCandidateBlock(Chain, BlockWorkList);
1892 BestSucc = selectBestCandidateBlock(Chain, EHPadWorkList);
1896 BestSucc = getFirstUnplacedBlock(Chain, PrevUnplacedBlockInFilterIt,
1899 BestSucc = getFirstUnplacedBlock(Chain, PrevUnplacedBlockIt);
1903 LLVM_DEBUG(
dbgs() <<
"Unnatural loop CFG detected, forcibly merging the "
1904 "layout successor until the CFG reduces\n");
1909 if (allowTailDupPlacement() && BestSucc && ShouldTailDup) {
1910 repeatedlyTailDuplicateBlock(BestSucc, BB, LoopHeaderBB, Chain,
1911 BlockFilter, PrevUnplacedBlockIt,
1912 PrevUnplacedBlockInFilterIt);
1920 BlockChain &SuccChain = *BlockToChain[BestSucc];
1923 SuccChain.UnscheduledPredecessors = 0;
1926 markChainSuccessors(SuccChain, LoopHeaderBB, BlockFilter);
1927 Chain.merge(BestSucc, &SuccChain);
1928 BB = *std::prev(Chain.end());
1950MachineBlockPlacement::canMoveBottomBlockToTop(
1960 if (OtherBB == BottomBlock)
1962 if (OtherBB == OldTop)
1970MachineBlockPlacement::TopFallThroughFreq(
1972 const BlockFilterSet &LoopBlockSet) {
1975 BlockChain *PredChain = BlockToChain[Pred];
1976 if (!LoopBlockSet.count(Pred) &&
1977 (!PredChain || Pred == *std::prev(PredChain->end()))) {
1984 BlockChain *SuccChain = BlockToChain[Succ];
1987 if (!LoopBlockSet.count(Succ) && (SuccProb > TopProb) &&
1988 (!SuccChain || Succ == *SuccChain->begin())) {
1996 if (EdgeFreq > MaxFreq)
2026MachineBlockPlacement::FallThroughGains(
2030 const BlockFilterSet &LoopBlockSet) {
2031 BlockFrequency FallThrough2Top = TopFallThroughFreq(OldTop, LoopBlockSet);
2034 FallThrough2Exit = MBFI->getBlockFreq(NewTop) *
2043 if (!LoopBlockSet.count(Pred))
2045 BlockChain *PredChain = BlockToChain[Pred];
2046 if (!PredChain || Pred == *std::prev(PredChain->end())) {
2049 if (EdgeFreq > FallThroughFromPred) {
2050 FallThroughFromPred = EdgeFreq;
2061 if ((Succ == NewTop) || (Succ == BestPred) || !LoopBlockSet.count(Succ))
2065 BlockChain *SuccChain = BlockToChain[Succ];
2066 if ((SuccChain && (Succ != *SuccChain->begin())) ||
2067 (SuccChain == BlockToChain[BestPred]))
2071 if (EdgeFreq > NewFreq)
2076 if (NewFreq > OrigEdgeFreq) {
2088 FallThrough2Top + FallThrough2Exit + FallThroughFromPred;
2117MachineBlockPlacement::findBestLoopTopHelper(
2120 const BlockFilterSet &LoopBlockSet) {
2124 BlockChain &HeaderChain = *BlockToChain[OldTop];
2125 if (!LoopBlockSet.count(*HeaderChain.begin()))
2127 if (OldTop != *HeaderChain.begin())
2136 if (!LoopBlockSet.count(Pred))
2138 if (Pred ==
L.getHeader())
2149 if (OtherBB == OldTop)
2153 if (!canMoveBottomBlockToTop(Pred, OldTop))
2159 (Gains > BestGains ||
2174 (*BestPred->
pred_begin())->succ_size() == 1 &&
2187MachineBlockPlacement::findBestLoopTop(
const MachineLoop &L,
2188 const BlockFilterSet &LoopBlockSet) {
2196 bool OptForSize =
F->getFunction().hasOptSize() ||
2199 return L.getHeader();
2203 while (NewTop != OldTop) {
2205 NewTop = findBestLoopTopHelper(OldTop, L, LoopBlockSet);
2206 if (NewTop != OldTop)
2207 ComputedEdges[NewTop] = { OldTop,
false };
2218MachineBlockPlacement::findBestLoopExit(
const MachineLoop &L,
2219 const BlockFilterSet &LoopBlockSet,
2229 BlockChain &HeaderChain = *BlockToChain[
L.getHeader()];
2230 if (!LoopBlockSet.count(*HeaderChain.begin()))
2234 unsigned BestExitLoopDepth = 0;
2244 BlockChain &Chain = *BlockToChain[
MBB];
2247 if (
MBB != *std::prev(Chain.end()))
2256 bool HasLoopingSucc =
false;
2262 BlockChain &SuccChain = *BlockToChain[Succ];
2264 if (&Chain == &SuccChain) {
2271 if (LoopBlockSet.count(Succ)) {
2274 HasLoopingSucc =
true;
2278 unsigned SuccLoopDepth = 0;
2280 SuccLoopDepth = ExitLoop->getLoopDepth();
2281 if (ExitLoop->contains(&L))
2288 <<
getBlockName(Succ) <<
" [L:" << SuccLoopDepth <<
"] ("
2295 if (!ExitingBB || SuccLoopDepth > BestExitLoopDepth ||
2296 ExitEdgeFreq > BestExitEdgeFreq ||
2298 !(ExitEdgeFreq < BestExitEdgeFreq * Bias))) {
2299 BestExitEdgeFreq = ExitEdgeFreq;
2304 if (!HasLoopingSucc) {
2306 ExitingBB = OldExitingBB;
2307 BestExitEdgeFreq = OldBestExitEdgeFreq;
2314 dbgs() <<
" No other candidate exit blocks, using loop header\n");
2317 if (
L.getNumBlocks() == 1) {
2318 LLVM_DEBUG(
dbgs() <<
" Loop has 1 block, using loop header as exit\n");
2325 if (!BlocksExitingToOuterLoop.
empty() &&
2326 !BlocksExitingToOuterLoop.
count(ExitingBB))
2331 ExitFreq = BestExitEdgeFreq;
2340MachineBlockPlacement::hasViableTopFallthrough(
2342 const BlockFilterSet &LoopBlockSet) {
2344 BlockChain *PredChain = BlockToChain[Pred];
2345 if (!LoopBlockSet.count(Pred) &&
2346 (!PredChain || Pred == *std::prev(PredChain->end()))) {
2353 BlockChain *SuccChain = BlockToChain[Succ];
2356 if ((!SuccChain || Succ == *SuccChain->begin()) && SuccProb > TopProb) {
2374void MachineBlockPlacement::rotateLoop(BlockChain &LoopChain,
2377 const BlockFilterSet &LoopBlockSet) {
2385 if (Bottom == ExitingBB)
2392 bool ViableTopFallthrough = hasViableTopFallthrough(Top, LoopBlockSet);
2397 if (ViableTopFallthrough) {
2399 BlockChain *SuccChain = BlockToChain[Succ];
2400 if (!LoopBlockSet.count(Succ) &&
2401 (!SuccChain || Succ == *SuccChain->begin()))
2407 BlockFrequency FallThrough2Top = TopFallThroughFreq(Top, LoopBlockSet);
2408 if (FallThrough2Top >= ExitFreq)
2412 BlockChain::iterator ExitIt =
llvm::find(LoopChain, ExitingBB);
2413 if (ExitIt == LoopChain.end())
2435 if (ViableTopFallthrough) {
2436 assert(std::next(ExitIt) != LoopChain.end() &&
2437 "Exit should not be last BB");
2446 std::rotate(LoopChain.begin(), std::next(ExitIt), LoopChain.end());
2462void MachineBlockPlacement::rotateLoopWithProfile(
2464 const BlockFilterSet &LoopBlockSet) {
2465 auto RotationPos = LoopChain.end();
2490 BlockChain *PredChain = BlockToChain[Pred];
2491 if (!LoopBlockSet.count(Pred) &&
2492 (!PredChain || Pred == *std::prev(PredChain->end()))) {
2493 auto EdgeFreq = MBFI->getBlockFreq(Pred) *
2495 auto FallThruCost = ScaleBlockFrequency(EdgeFreq,
MisfetchCost);
2499 FallThruCost += ScaleBlockFrequency(EdgeFreq,
JumpInstCost);
2500 HeaderFallThroughCost = std::max(HeaderFallThroughCost, FallThruCost);
2509 for (
auto *BB : LoopChain) {
2512 BlockChain *SuccChain = BlockToChain[Succ];
2513 if (!LoopBlockSet.count(Succ) &&
2514 (!SuccChain || Succ == *SuccChain->begin())) {
2516 LargestExitEdgeProb = std::max(LargestExitEdgeProb, SuccProb);
2520 auto ExitFreq = MBFI->getBlockFreq(BB) * LargestExitEdgeProb;
2528 for (
auto Iter = LoopChain.begin(), TailIter = std::prev(LoopChain.end()),
2529 EndIter = LoopChain.end();
2530 Iter != EndIter; Iter++, TailIter++) {
2533 if (TailIter == LoopChain.end())
2534 TailIter = LoopChain.begin();
2536 auto TailBB = *TailIter;
2544 if (Iter != LoopChain.begin())
2545 Cost += HeaderFallThroughCost;
2549 for (
auto &ExitWithFreq : ExitsWithFreq)
2550 if (TailBB != ExitWithFreq.first)
2551 Cost += ExitWithFreq.second;
2567 if (TailBB->isSuccessor(*Iter)) {
2568 auto TailBBFreq = MBFI->getBlockFreq(TailBB);
2569 if (TailBB->succ_size() == 1)
2571 else if (TailBB->succ_size() == 2) {
2573 auto TailToHeadFreq = TailBBFreq * TailToHeadProb;
2575 ? TailBBFreq * TailToHeadProb.
getCompl()
2586 if (
Cost < SmallestRotationCost) {
2587 SmallestRotationCost =
Cost;
2592 if (RotationPos != LoopChain.end()) {
2594 <<
" to the top\n");
2595 std::rotate(LoopChain.begin(), RotationPos, LoopChain.end());
2604MachineBlockPlacement::collectLoopBlockSet(
const MachineLoop &L) {
2610 return X->getNumber() <
Y->getNumber();
2613 std::set<const MachineBasicBlock *, MBBCompare> LoopBlockSet;
2626 for (
auto *LoopPred :
L.getHeader()->predecessors())
2627 if (!
L.contains(LoopPred))
2628 LoopFreq += MBFI->getBlockFreq(LoopPred) *
2632 if (LoopBlockSet.count(LoopBB))
2637 BlockChain *Chain = BlockToChain[LoopBB];
2639 LoopBlockSet.
insert(ChainBB);
2642 LoopBlockSet.insert(
L.block_begin(),
L.block_end());
2647 BlockFilterSet
Ret(LoopBlockSet.begin(), LoopBlockSet.end());
2657void MachineBlockPlacement::buildLoopChains(
const MachineLoop &L) {
2661 buildLoopChains(*InnerLoop);
2664 "BlockWorkList not empty when starting to build loop chains.");
2666 "EHPadWorkList not empty when starting to build loop chains.");
2667 BlockFilterSet LoopBlockSet = collectLoopBlockSet(L);
2672 bool RotateLoopWithProfile =
2688 PreferredLoopExit =
nullptr;
2690 if (!RotateLoopWithProfile && LoopTop ==
L.getHeader())
2691 PreferredLoopExit = findBestLoopExit(L, LoopBlockSet, ExitFreq);
2693 BlockChain &LoopChain = *BlockToChain[LoopTop];
2699 assert(LoopChain.UnscheduledPredecessors == 0 &&
2700 "LoopChain should not have unscheduled predecessors.");
2701 UpdatedPreds.
insert(&LoopChain);
2704 fillWorkLists(LoopBB, UpdatedPreds, &LoopBlockSet);
2706 buildChain(LoopTop, LoopChain, &LoopBlockSet);
2708 if (RotateLoopWithProfile)
2709 rotateLoopWithProfile(LoopChain, L, LoopBlockSet);
2711 rotateLoop(LoopChain, PreferredLoopExit, ExitFreq, LoopBlockSet);
2715 bool BadLoop =
false;
2716 if (LoopChain.UnscheduledPredecessors) {
2718 dbgs() <<
"Loop chain contains a block without its preds placed!\n"
2719 <<
" Loop header: " << getBlockName(*L.block_begin()) <<
"\n"
2720 <<
" Chain header: " << getBlockName(*LoopChain.begin()) <<
"\n";
2724 if (!LoopBlockSet.remove(ChainBB)) {
2728 dbgs() <<
"Loop chain contains a block not contained by the loop!\n"
2729 <<
" Loop header: " <<
getBlockName(*
L.block_begin()) <<
"\n"
2730 <<
" Chain header: " <<
getBlockName(*LoopChain.begin()) <<
"\n"
2735 if (!LoopBlockSet.empty()) {
2738 dbgs() <<
"Loop contains blocks never placed into a chain!\n"
2739 <<
" Loop header: " <<
getBlockName(*
L.block_begin()) <<
"\n"
2740 <<
" Chain header: " <<
getBlockName(*LoopChain.begin()) <<
"\n"
2743 assert(!BadLoop &&
"Detected problems with the placement of this loop.");
2746 BlockWorkList.
clear();
2747 EHPadWorkList.
clear();
2750void MachineBlockPlacement::buildCFGChains() {
2758 new (ChainAllocator.
Allocate()) BlockChain(BlockToChain, BB);
2771 assert(NextFI != FE &&
"Can't fallthrough past the last block.");
2772 LLVM_DEBUG(
dbgs() <<
"Pre-merging due to unanalyzable fallthrough: "
2775 Chain->merge(NextBB,
nullptr);
2777 BlocksWithUnanalyzableExits.
insert(&*BB);
2785 PreferredLoopExit =
nullptr;
2787 buildLoopChains(*L);
2790 "BlockWorkList should be empty before building final chain.");
2792 "EHPadWorkList should be empty before building final chain.");
2796 fillWorkLists(&
MBB, UpdatedPreds);
2798 BlockChain &FunctionChain = *BlockToChain[&
F->front()];
2799 buildChain(&
F->front(), FunctionChain);
2806 bool BadFunc =
false;
2807 FunctionBlockSetType FunctionBlockSet;
2809 FunctionBlockSet.insert(&
MBB);
2812 if (!FunctionBlockSet.erase(ChainBB)) {
2814 dbgs() <<
"Function chain contains a block not in the function!\n"
2815 <<
" Bad block: " << getBlockName(ChainBB) <<
"\n";
2818 if (!FunctionBlockSet.empty()) {
2820 for (MachineBasicBlock *RemainingBB : FunctionBlockSet)
2821 dbgs() <<
"Function contains blocks never placed into a chain!\n"
2822 <<
" Bad block: " << getBlockName(RemainingBB) <<
"\n";
2824 assert(!BadFunc &&
"Detected problems with the block placement.");
2830 F->getNumBlockIDs());
2833 for (
auto &
MBB : *
F) {
2834 if (LastMBB !=
nullptr)
2838 OriginalLayoutSuccessors[
F->back().getNumber()] =
nullptr;
2845 LLVM_DEBUG(
dbgs() << (ChainBB == *FunctionChain.begin() ?
"Placing chain "
2849 F->splice(InsertPos, ChainBB);
2854 if (ChainBB == *FunctionChain.begin())
2865 if (!BlocksWithUnanalyzableExits.
count(PrevBB)) {
2871 "Unexpected block with un-analyzable fallthrough!");
2873 TBB = FBB =
nullptr;
2911 BlockWorkList.
clear();
2912 EHPadWorkList.
clear();
2915void MachineBlockPlacement::optimizeBranches() {
2916 BlockChain &FunctionChain = *BlockToChain[&
F->front()];
2931 if (
TBB && !
Cond.empty() && FBB &&
2948void MachineBlockPlacement::alignBlocks() {
2954 if (
F->getFunction().hasMinSize() ||
2957 BlockChain &FunctionChain = *BlockToChain[&
F->front()];
2958 if (FunctionChain.begin() == FunctionChain.end())
2965 if (ChainBB == *FunctionChain.begin())
2977 unsigned MDAlign = 1;
2978 MDNode *LoopID =
L->getLoopID();
2981 MDNode *MD = dyn_cast<MDNode>(MDO);
2987 if (S->
getString() ==
"llvm.loop.align") {
2989 "per-loop align metadata should have two operands.");
2991 mdconst::extract<ConstantInt>(MD->
getOperand(1))->getZExtValue();
2992 assert(MDAlign >= 1 &&
"per-loop align value must be positive.");
2998 const Align LoopAlign = std::max(TLIAlign,
Align(MDAlign));
3005 if (Freq < WeightedEntryFreq)
3012 if (Freq < (LoopHeaderFreq * ColdProb))
3025 auto DetermineMaxAlignmentPadding = [&]() {
3032 ChainBB->setMaxBytesForAlignment(MaxBytes);
3038 ChainBB->setAlignment(LoopAlign);
3039 DetermineMaxAlignmentPadding();
3049 BlockFrequency LayoutEdgeFreq = MBFI->getBlockFreq(LayoutPred) * LayoutProb;
3050 if (LayoutEdgeFreq <= (Freq * ColdProb)) {
3051 ChainBB->setAlignment(LoopAlign);
3052 DetermineMaxAlignmentPadding();
3072bool MachineBlockPlacement::repeatedlyTailDuplicateBlock(
3076 BlockFilterSet::iterator &PrevUnplacedBlockInFilterIt) {
3077 bool Removed, DuplicatedToLPred;
3078 bool DuplicatedToOriginalLPred;
3079 Removed = maybeTailDuplicateBlock(
3080 BB, LPred, Chain, BlockFilter, PrevUnplacedBlockIt,
3081 PrevUnplacedBlockInFilterIt, DuplicatedToLPred);
3084 DuplicatedToOriginalLPred = DuplicatedToLPred;
3089 while (DuplicatedToLPred && Removed) {
3096 BlockChain::iterator ChainEnd = Chain.
end();
3097 DupBB = *(--ChainEnd);
3099 if (ChainEnd == Chain.begin())
3101 DupPred = *std::prev(ChainEnd);
3102 Removed = maybeTailDuplicateBlock(
3103 DupBB, DupPred, Chain, BlockFilter, PrevUnplacedBlockIt,
3104 PrevUnplacedBlockInFilterIt, DuplicatedToLPred);
3111 LPred = *std::prev(Chain.end());
3112 if (DuplicatedToOriginalLPred)
3113 markBlockSuccessors(Chain, LPred, LoopHeaderBB, BlockFilter);
3130bool MachineBlockPlacement::maybeTailDuplicateBlock(
3133 BlockFilterSet::iterator &PrevUnplacedBlockInFilterIt,
3134 bool &DuplicatedToLPred) {
3135 DuplicatedToLPred =
false;
3136 if (!shouldTailDuplicate(BB))
3144 bool Removed =
false;
3145 auto RemovalCallback =
3151 bool InWorkList =
true;
3153 if (BlockToChain.
count(RemBB)) {
3154 BlockChain *Chain = BlockToChain[RemBB];
3155 InWorkList = Chain->UnscheduledPredecessors == 0;
3156 Chain->remove(RemBB);
3157 BlockToChain.
erase(RemBB);
3161 if (&(*PrevUnplacedBlockIt) == RemBB) {
3162 PrevUnplacedBlockIt++;
3168 if (RemBB->isEHPad())
3169 RemoveList = EHPadWorkList;
3178 if (It != BlockFilter->end()) {
3179 if (It < PrevUnplacedBlockInFilterIt) {
3183 auto Distance = PrevUnplacedBlockInFilterIt - It - 1;
3184 PrevUnplacedBlockInFilterIt = BlockFilter->
erase(It) + Distance;
3185 assert(*PrevUnplacedBlockInFilterIt == PrevBB);
3187 }
else if (It == PrevUnplacedBlockInFilterIt)
3190 PrevUnplacedBlockInFilterIt = BlockFilter->erase(It);
3192 BlockFilter->erase(It);
3197 MLI->removeBlock(RemBB);
3198 if (RemBB == PreferredLoopExit)
3199 PreferredLoopExit =
nullptr;
3204 auto RemovalCallbackRef =
3211 if (
F->getFunction().hasProfileData()) {
3213 findDuplicateCandidates(CandidatePreds, BB, BlockFilter);
3214 if (CandidatePreds.
size() == 0)
3217 CandidatePtr = &CandidatePreds;
3220 &RemovalCallbackRef, CandidatePtr);
3223 DuplicatedToLPred =
false;
3226 BlockChain* PredChain = BlockToChain[Pred];
3228 DuplicatedToLPred =
true;
3229 if (Pred == LPred || (BlockFilter && !BlockFilter->count(Pred))
3230 || PredChain == &Chain)
3233 if (BlockFilter && !BlockFilter->count(NewSucc))
3235 BlockChain *NewChain = BlockToChain[NewSucc];
3236 if (NewChain != &Chain && NewChain != PredChain)
3237 NewChain->UnscheduledPredecessors++;
3247 if (!
MI.isPHI() && !
MI.isMetaInstruction())
3263 BlockFilterSet *BlockFilter) {
3266 if (BlockFilter && !BlockFilter->count(Pred))
3268 BlockChain *PredChain = BlockToChain[Pred];
3269 if (PredChain && (Pred != *std::prev(PredChain->end())))
3276 if (BlockFilter && !BlockFilter->count(Succ))
3278 BlockChain *SuccChain = BlockToChain[Succ];
3279 if (SuccChain && (Succ != *SuccChain->begin()))
3282 if (SuccProb > BestProb)
3283 BestProb = SuccProb;
3287 if (BBProb <= BestProb)
3294 return Gain > scaleThreshold(BB);
3299void MachineBlockPlacement::findDuplicateCandidates(
3302 BlockFilterSet *BlockFilter) {
3314 return MBFI->getBlockFreq(
A) > MBFI->getBlockFreq(
B);
3319 auto SuccIt = Succs.begin();
3320 if (SuccIt != Succs.end()) {
3371 if (!Fallthrough && isBestSuccessor(BB, Pred, BlockFilter)) {
3373 if (SuccIt != Succs.end())
3379 BlockFrequency OrigCost = PredFreq + PredFreq * DefaultBranchProb;
3381 if (SuccIt == Succs.end()) {
3383 if (Succs.size() > 0)
3384 DupCost += PredFreq;
3387 DupCost += PredFreq;
3391 assert(OrigCost >= DupCost);
3392 OrigCost -= DupCost;
3393 if (OrigCost > BBDupThreshold) {
3395 if (SuccIt != Succs.end())
3403 if ((Candidates.
size() < Preds.size()) && (Candidates.
size() > 0)) {
3404 Candidates[0] = Candidates.
back();
3410void MachineBlockPlacement::initDupThreshold() {
3412 if (!
F->getFunction().hasProfileData())
3418 UseProfileCount =
true;
3434 UseProfileCount =
false;
3437bool MachineBlockPlacement::runOnMachineFunction(
MachineFunction &MF) {
3442 if (std::next(MF.
begin()) == MF.
end())
3446 MBPI = &getAnalysis<MachineBranchProbabilityInfoWrapperPass>().getMBPI();
3447 MBFI = std::make_unique<MBFIWrapper>(
3448 getAnalysis<MachineBlockFrequencyInfoWrapperPass>().getMBFI());
3449 MLI = &getAnalysis<MachineLoopInfoWrapperPass>().getLI();
3453 PSI = &getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI();
3459 PreferredLoopExit =
nullptr;
3462 "BlockToChain map should be empty before starting placement.");
3464 "Computed Edge map should be empty before starting placement.");
3475 if (PassConfig->
getOptLevel() >= CodeGenOptLevel::Aggressive) {
3487 (PassConfig->
getOptLevel() < CodeGenOptLevel::Aggressive ||
3489 TailDupSize =
TII->getTailDuplicateSize(PassConfig->
getOptLevel());
3491 if (allowTailDupPlacement()) {
3492 MPDT = &getAnalysis<MachinePostDominatorTreeWrapperPass>().getPostDomTree();
3497 bool PreRegAlloc =
false;
3498 TailDup.
initMF(MF, PreRegAlloc, MBPI, MBFI.get(), PSI,
3500 precomputeTriangleChains();
3512 if (MF.
size() > 3 && EnableTailMerge) {
3520 BlockToChain.
clear();
3521 ComputedEdges.
clear();
3538 createCFGChainExtTsp();
3544 BlockToChain.
clear();
3545 ComputedEdges.
clear();
3548 bool HasMaxBytesOverride =
3554 if (HasMaxBytesOverride)
3563 for (
auto MBI = std::next(MF.begin()), MBE = MF.end(); MBI != MBE; ++MBI) {
3564 auto LayoutPred = std::prev(MBI);
3566 if (HasMaxBytesOverride)
3578 MF.RenumberBlocks();
3579 MBFI->view(
"MBP." + MF.getName(),
false);
3587void MachineBlockPlacement::applyExtTsp() {
3591 std::vector<const MachineBasicBlock *> CurrentBlockOrder;
3592 CurrentBlockOrder.reserve(
F->size());
3593 size_t NumBlocks = 0;
3595 BlockIndex[&
MBB] = NumBlocks++;
3596 CurrentBlockOrder.push_back(&
MBB);
3599 auto BlockSizes = std::vector<uint64_t>(
F->size());
3600 auto BlockCounts = std::vector<uint64_t>(
F->size());
3601 std::vector<codelayout::EdgeCount> JumpCounts;
3614 int NumInsts = std::distance(NonDbgInsts.begin(), NonDbgInsts.end());
3615 BlockSizes[BlockIndex[&
MBB]] = 4 * NumInsts;
3620 JumpCounts.push_back(
3625 LLVM_DEBUG(
dbgs() <<
"Applying ext-tsp layout for |V| = " <<
F->size()
3626 <<
" with profile = " <<
F->getFunction().hasProfileData()
3627 <<
" (" <<
F->getName().str() <<
")"
3630 dbgs() <<
format(
" original layout score: %0.2f\n",
3635 std::vector<const MachineBasicBlock *> NewBlockOrder;
3636 NewBlockOrder.reserve(
F->size());
3638 NewBlockOrder.push_back(CurrentBlockOrder[
Node]);
3645 assignBlockOrder(NewBlockOrder);
3648void MachineBlockPlacement::assignBlockOrder(
3649 const std::vector<const MachineBasicBlock *> &NewBlockOrder) {
3650 assert(
F->size() == NewBlockOrder.size() &&
"Incorrect size of block order");
3651 F->RenumberBlocks();
3658 bool HasChanges =
false;
3659 for (
size_t I = 0;
I < NewBlockOrder.size();
I++) {
3660 if (NewBlockOrder[
I] !=
F->getBlockNumbered(
I)) {
3670 for (
auto &
MBB : *
F) {
3677 NewIndex[
MBB] = NewIndex.
size();
3680 return NewIndex[&
L] < NewIndex[&
R];
3687 for (
auto &
MBB : *
F) {
3694 if (FTMBB && (NextMBB == EndIt || &*NextMBB != FTMBB)) {
3708 F->verify(
this,
"After optimized block reordering");
3712void MachineBlockPlacement::createCFGChainExtTsp() {
3713 BlockToChain.
clear();
3714 ComputedEdges.
clear();
3718 BlockChain *FunctionChain =
3719 new (ChainAllocator.
Allocate()) BlockChain(BlockToChain, HeadBB);
3724 FunctionChain->merge(&
MBB,
nullptr);
3762char MachineBlockPlacementStats::ID = 0;
3767 "Basic Block Placement Stats",
false,
false)
3775 if (std::next(
F.begin()) ==
F.end())
3781 MBPI = &getAnalysis<MachineBranchProbabilityInfoWrapperPass>().getMBPI();
3782 MBFI = &getAnalysis<MachineBlockFrequencyInfoWrapperPass>().getMBFI();
3787 (
MBB.
succ_size() > 1) ? NumCondBranches : NumUncondBranches;
3789 (
MBB.
succ_size() > 1) ? CondBranchTakenFreq : UncondBranchTakenFreq;
for(const MachineOperand &MO :llvm::drop_begin(OldMI.operands(), Desc.getNumOperands()))
This file defines the BumpPtrAllocator interface.
static cl::opt< unsigned > TailMergeSize("tail-merge-size", cl::desc("Min number of instructions to consider tail merging"), cl::init(3), cl::Hidden)
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
Declares methods and data structures for code layout algorithms.
#define LLVM_DUMP_METHOD
Mark debug helper function definitions like dump() that should not be stripped from debug builds.
static unsigned InstrCount
This file defines the DenseMap class.
DenseMap< Block *, BlockRelaxAux > Blocks
static GCMetadataPrinterRegistry::Add< ErlangGCPrinter > X("erlang", "erlang-compatible garbage collector")
const HexagonInstrInfo * TII
static LoopDeletionResult merge(LoopDeletionResult A, LoopDeletionResult B)
block placement Basic Block Placement Stats
static BranchProbability getAdjustedProbability(BranchProbability OrigProb, BranchProbability AdjustedSumProb)
The helper function returns the branch probability that is adjusted or normalized over the new total ...
static cl::opt< bool > PreciseRotationCost("precise-rotation-cost", cl::desc("Model the cost of loop rotation more " "precisely by using profile data."), cl::init(false), cl::Hidden)
Branch Probability Basic Block Placement
static cl::opt< unsigned > ExtTspBlockPlacementMaxBlocks("ext-tsp-block-placement-max-blocks", cl::desc("Maximum number of basic blocks in a function to run ext-TSP " "block placement."), cl::init(UINT_MAX), cl::Hidden)
static cl::opt< unsigned > AlignAllBlock("align-all-blocks", cl::desc("Force the alignment of all blocks in the function in log2 format " "(e.g 4 means align on 16B boundaries)."), cl::init(0), cl::Hidden)
static cl::opt< unsigned > TailDupPlacementPenalty("tail-dup-placement-penalty", cl::desc("Cost penalty for blocks that can avoid breaking CFG by copying. " "Copying can increase fallthrough, but it also increases icache " "pressure. This parameter controls the penalty to account for that. " "Percent as integer."), cl::init(2), cl::Hidden)
static BranchProbability getLayoutSuccessorProbThreshold(const MachineBasicBlock *BB)
static cl::opt< bool > ForceLoopColdBlock("force-loop-cold-block", cl::desc("Force outlining cold blocks from loops."), cl::init(false), cl::Hidden)
static cl::opt< unsigned > ExitBlockBias("block-placement-exit-block-bias", cl::desc("Block frequency percentage a loop exit block needs " "over the original exit to be considered the new exit."), cl::init(0), cl::Hidden)
static cl::opt< unsigned > AlignAllNonFallThruBlocks("align-all-nofallthru-blocks", cl::desc("Force the alignment of all blocks that have no fall-through " "predecessors (i.e. don't add nops that are executed). In log2 " "format (e.g 4 means align on 16B boundaries)."), cl::init(0), cl::Hidden)
static cl::opt< unsigned > TailDupPlacementThreshold("tail-dup-placement-threshold", cl::desc("Instruction cutoff for tail duplication during layout. " "Tail merging during layout is forced to have a threshold " "that won't conflict."), cl::init(2), cl::Hidden)
static cl::opt< unsigned > JumpInstCost("jump-inst-cost", cl::desc("Cost of jump instructions."), cl::init(1), cl::Hidden)
static bool greaterWithBias(BlockFrequency A, BlockFrequency B, BlockFrequency EntryFreq)
Compare 2 BlockFrequency's with a small penalty for A.
static cl::opt< unsigned > MisfetchCost("misfetch-cost", cl::desc("Cost that models the probabilistic risk of an instruction " "misfetch due to a jump comparing to falling through, whose cost " "is zero."), cl::init(1), cl::Hidden)
static cl::opt< unsigned > MaxBytesForAlignmentOverride("max-bytes-for-alignment", cl::desc("Forces the maximum bytes allowed to be emitted when padding for " "alignment"), cl::init(0), cl::Hidden)
static cl::opt< bool > BranchFoldPlacement("branch-fold-placement", cl::desc("Perform branch folding during placement. " "Reduces code size."), cl::init(true), cl::Hidden)
static cl::opt< unsigned > TailDupProfilePercentThreshold("tail-dup-profile-percent-threshold", cl::desc("If profile count information is used in tail duplication cost " "model, the gained fall through number from tail duplication " "should be at least this percent of hot count."), cl::init(50), cl::Hidden)
static cl::opt< unsigned > TriangleChainCount("triangle-chain-count", cl::desc("Number of triangle-shaped-CFG's that need to be in a row for the " "triangle tail duplication heuristic to kick in. 0 to disable."), cl::init(2), cl::Hidden)
Branch Probability Basic Block static false std::string getBlockName(const MachineBasicBlock *BB)
Helper to print the name of a MBB.
static bool hasSameSuccessors(MachineBasicBlock &BB, SmallPtrSetImpl< const MachineBasicBlock * > &Successors)
Check if BB has exactly the successors in Successors.
static cl::opt< bool > TailDupPlacement("tail-dup-placement", cl::desc("Perform tail duplication during placement. " "Creates more fallthrough opportunites in " "outline branches."), cl::init(true), cl::Hidden)
static uint64_t countMBBInstruction(MachineBasicBlock *MBB)
static cl::opt< unsigned > LoopToColdBlockRatio("loop-to-cold-block-ratio", cl::desc("Outline loop blocks from loop chain if (frequency of loop) / " "(frequency of block) is greater than this ratio"), cl::init(5), cl::Hidden)
static cl::opt< bool > RenumberBlocksBeforeView("renumber-blocks-before-view", cl::desc("If true, basic blocks are re-numbered before MBP layout is printed " "into a dot graph. Only used when a function is being printed."), cl::init(false), cl::Hidden)
static cl::opt< unsigned > TailDupPlacementAggressiveThreshold("tail-dup-placement-aggressive-threshold", cl::desc("Instruction cutoff for aggressive tail duplication during " "layout. Used at -O3. Tail merging during layout is forced to " "have a threshold that won't conflict."), cl::init(4), cl::Hidden)
static cl::opt< bool > ForcePreciseRotationCost("force-precise-rotation-cost", cl::desc("Force the use of precise cost " "loop rotation strategy."), cl::init(false), cl::Hidden)
static GCMetadataPrinterRegistry::Add< OcamlGCMetadataPrinter > Y("ocaml", "ocaml 3.10-compatible collector")
#define INITIALIZE_PASS_DEPENDENCY(depName)
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
const SmallVectorImpl< MachineOperand > MachineBasicBlock * TBB
const SmallVectorImpl< MachineOperand > & Cond
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
This file implements a set that has insertion order iteration characteristics.
This file defines the SmallPtrSet class.
This file defines the SmallVector class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
This file describes how to lower LLVM code to machine code.
Target-Independent Code Generator Pass Configuration Options pass.
unify loop Fixup each natural loop to have a single exit block
Represent the analysis usage information of a pass.
AnalysisUsage & addRequired()
void setPreservesAll()
Set by analyses that do not transform their input at all.
static BlockFrequency max()
Returns the maximum possible frequency, the saturation value.
uint64_t getFrequency() const
Returns the frequency as a fixpoint number scaled by the entry frequency.
static BranchProbability getOne()
uint32_t getNumerator() const
BranchProbability getCompl() const
static BranchProbability getZero()
ValueT lookup(const_arg_type_t< KeyT > Val) const
lookup - Return the entry for the specified key, or a default constructed value if no such entry exis...
iterator find(const_arg_type_t< KeyT > Val)
std::pair< iterator, bool > try_emplace(KeyT &&Key, Ts &&... Args)
bool erase(const KeyT &Val)
size_type count(const_arg_type_t< KeyT > Val) const
Return 1 if the specified key is in the map, 0 otherwise.
bool contains(const_arg_type_t< KeyT > Val) const
Return true if the specified key is in the map, false otherwise.
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
void reserve(size_type NumEntries)
Grow the densemap so that it can contain at least NumEntries items before resizing again.
bool dominates(const DomTreeNodeBase< NodeT > *A, const DomTreeNodeBase< NodeT > *B) const
dominates - Returns true iff A dominates B.
void recalculate(ParentType &Func)
recalculate - compute a dominator tree for the given function
bool hasOptSize() const
Optimize this function for size (-Os) or minimum size (-Oz).
bool hasProfileData(bool IncludeSynthetic=false) const
Return true if the function is annotated with profile data.
unsigned removeBranch(MachineBasicBlock &MBB, int *BytesRemoved=nullptr) const override
Remove the branching code at the end of the specific MBB.
bool analyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB, MachineBasicBlock *&FBB, SmallVectorImpl< MachineOperand > &Cond, bool AllowModify) const override
Analyze the branching code at the end of MBB, returning true if it cannot be understood (e....
bool reverseBranchCondition(SmallVectorImpl< MachineOperand > &Cond) const override
Reverses the branch condition of the specified condition list, returning false on success and true if...
unsigned insertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB, MachineBasicBlock *FBB, ArrayRef< MachineOperand > Cond, const DebugLoc &DL, int *BytesAdded=nullptr) const override
Insert branch code into the end of the specified MachineBasicBlock.
LoopT * getLoopFor(const BlockT *BB) const
Return the inner most loop that BB lives in.
const MDOperand & getOperand(unsigned I) const
ArrayRef< MDOperand > operands() const
unsigned getNumOperands() const
Return number of MDNode operands.
Tracking metadata reference owned by Metadata.
StringRef getString() const
unsigned pred_size() const
bool isEHPad() const
Returns true if the block is a landing pad.
instr_iterator instr_begin()
MachineBasicBlock * getFallThrough(bool JumpToFallThrough=true)
Return the fallthrough block if the block can implicitly transfer control to the block after it by fa...
int getNumber() const
MachineBasicBlocks are uniquely numbered at the function level, unless they're not in a MachineFuncti...
void updateTerminator(MachineBasicBlock *PreviousLayoutSuccessor)
Update the terminator instructions in block to account for changes to block layout which may have bee...
bool canFallThrough()
Return true if the block can implicitly transfer control to the block after it by falling off the end...
succ_iterator succ_begin()
unsigned succ_size() const
void setAlignment(Align A)
Set alignment of the basic block.
bool isEntryBlock() const
Returns true if this is the entry block of the function.
pred_iterator pred_begin()
succ_reverse_iterator succ_rbegin()
bool isLayoutSuccessor(const MachineBasicBlock *MBB) const
Return true if the specified MBB will be emitted immediately after this block, such that if this bloc...
instr_iterator instr_end()
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
instr_iterator erase(instr_iterator I)
Remove an instruction from the instruction list and delete it.
DebugLoc findBranchDebugLoc()
Find and return the merged DebugLoc of the branch instructions of the block.
iterator_range< succ_iterator > successors()
bool isSuccessor(const MachineBasicBlock *MBB) const
Return true if the specified MBB is a successor of this block.
iterator_range< pred_iterator > predecessors()
StringRef getName() const
Return the name of the corresponding LLVM basic block, or an empty string.
MachineBlockFrequencyInfo pass uses BlockFrequencyInfoImpl implementation to estimate machine basic b...
BlockFrequency getBlockFreq(const MachineBasicBlock *MBB) const
getblockFreq - Return block frequency.
BranchProbability getEdgeProbability(const MachineBasicBlock *Src, const MachineBasicBlock *Dst) const
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
virtual bool runOnMachineFunction(MachineFunction &MF)=0
runOnMachineFunction - This method must be overloaded to perform the desired machine code transformat...
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
Function & getFunction()
Return the LLVM function that this machine code represents.
const LLVMTargetMachine & getTarget() const
getTarget - Return the target machine this machine code is compiled with
Representation of each machine instruction.
MachinePostDominatorTree - an analysis pass wrapper for DominatorTree used to compute the post-domina...
MutableArrayRef - Represent a mutable reference to an array (0 or more elements consecutively in memo...
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
An analysis pass based on legacy pass manager to deliver ProfileSummaryInfo.
Analysis providing profile information.
uint64_t getOrCompHotCountThreshold() const
Returns HotCountThreshold if set.
typename vector_type::const_iterator iterator
A templated base class for SmallPtrSet which provides the typesafe interface that is common across al...
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
A SetVector that performs no allocations if smaller than a certain size.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
reference emplace_back(ArgTypes &&... Args)
typename SuperClass::const_iterator const_iterator
typename SuperClass::iterator iterator
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
A BumpPtrAllocator that allows only elements of a specific type to be allocated.
T * Allocate(size_t num=1)
Allocate space for an array of objects without constructing them.
void DestroyAll()
Call the destructor of each allocated object and deallocate all but the current slab and reset the cu...
Utility class to perform tail duplication.
void initMF(MachineFunction &MF, bool PreRegAlloc, const MachineBranchProbabilityInfo *MBPI, MBFIWrapper *MBFI, ProfileSummaryInfo *PSI, bool LayoutMode, unsigned TailDupSize=0)
Prepare to run on a specific machine function.
bool tailDuplicateAndUpdate(bool IsSimple, MachineBasicBlock *MBB, MachineBasicBlock *ForcedLayoutPred, SmallVectorImpl< MachineBasicBlock * > *DuplicatedPreds=nullptr, function_ref< void(MachineBasicBlock *)> *RemovalCallback=nullptr, SmallVectorImpl< MachineBasicBlock * > *CandidatePtr=nullptr)
Tail duplicate a single basic block into its predecessors, and then clean up.
static bool isSimpleBB(MachineBasicBlock *TailBB)
True if this BB has only one unconditional jump.
bool canTailDuplicate(MachineBasicBlock *TailBB, MachineBasicBlock *PredBB)
Returns true if TailBB can successfully be duplicated into PredBB.
bool shouldTailDuplicate(bool IsSimple, MachineBasicBlock &TailBB)
Determine if it is profitable to duplicate this block.
TargetInstrInfo - Interface to description of machine instruction set.
This base class for TargetLowering contains the SelectionDAG-independent parts that can be used from ...
virtual unsigned getMaxPermittedBytesForAlignment(MachineBasicBlock *MBB) const
Return the maximum amount of bytes allowed to be emitted when padding for alignment.
virtual Align getPrefLoopAlignment(MachineLoop *ML=nullptr) const
Return the preferred loop alignment.
virtual bool alignLoopsWithOptSize() const
Should loops be aligned even when the function is marked OptSize (but not MinSize).
bool requiresStructuredCFG() const
Target-Independent Code Generator Pass Configuration Options.
bool getEnableTailMerge() const
CodeGenOptLevel getOptLevel() const
virtual const TargetRegisterInfo * getRegisterInfo() const
getRegisterInfo - If register information is available, return it.
virtual const TargetInstrInfo * getInstrInfo() const
virtual const TargetLowering * getTargetLowering() const
An efficient, type-erasing, non-owning reference to a callable.
self_iterator getIterator()
A raw_ostream that writes to an std::string.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
@ C
The default llvm calling convention, compatible with C.
initializer< Ty > init(const Ty &Val)
double calcExtTspScore(ArrayRef< uint64_t > Order, ArrayRef< uint64_t > NodeSizes, ArrayRef< uint64_t > NodeCounts, ArrayRef< EdgeCount > EdgeCounts)
Estimate the "quality" of a given node order in CFG.
std::vector< uint64_t > computeExtTspLayout(ArrayRef< uint64_t > NodeSizes, ArrayRef< uint64_t > NodeCounts, ArrayRef< EdgeCount > EdgeCounts)
Find a layout of nodes (basic blocks) of a given CFG optimizing jump locality and thus processor I-ca...
void append(SmallVectorImpl< char > &path, const Twine &a, const Twine &b="", const Twine &c="", const Twine &d="")
Append to path.
This is an optimization pass for GlobalISel generic memory operations.
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
void dump(const SparseBitVector< ElementSize > &LHS, raw_ostream &out)
void stable_sort(R &&Range)
auto find(R &&Range, const T &Val)
Provide wrappers to std::find which take ranges instead of having to pass begin/end explicitly.
cl::opt< bool > ApplyExtTspWithoutProfile
bool shouldOptimizeForSize(const MachineFunction *MF, ProfileSummaryInfo *PSI, const MachineBlockFrequencyInfo *BFI, PGSOQueryType QueryType=PGSOQueryType::Other)
Returns true if machine function MF is suggested to be size-optimized based on the profile.
void initializeMachineBlockPlacementStatsPass(PassRegistry &)
cl::opt< unsigned > ProfileLikelyProb
void erase(Container &C, ValueType V)
Wrapper function to remove a value from a container:
cl::opt< std::string > ViewBlockFreqFuncName("view-bfi-func-name", cl::Hidden, cl::desc("The option to specify " "the name of the function " "whose CFG will be displayed."))
auto reverse(ContainerTy &&C)
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
cl::opt< GVDAGType > ViewBlockLayoutWithBFI("view-block-layout-with-bfi", cl::Hidden, cl::desc("Pop up a window to show a dag displaying MBP layout and associated " "block frequencies of the CFG."), cl::values(clEnumValN(GVDT_None, "none", "do not display graphs."), clEnumValN(GVDT_Fraction, "fraction", "display a graph using the " "fractional block frequency representation."), clEnumValN(GVDT_Integer, "integer", "display a graph using the raw " "integer fractional block frequency representation."), clEnumValN(GVDT_Count, "count", "display a graph using the real " "profile count if available.")))
bool isFunctionInPrintList(StringRef FunctionName)
auto instructionsWithoutDebug(IterT It, IterT End, bool SkipPseudoOp=true)
Construct a range iterator which begins at It and moves forwards until End is reached,...
format_object< Ts... > format(const char *Fmt, const Ts &... Vals)
These are helper functions used to produce formatted output.
cl::opt< unsigned > StaticLikelyProb
auto count(R &&Range, const E &Element)
Wrapper function around std::count to count the number of times an element Element occurs in the give...
void erase_if(Container &C, UnaryPredicate P)
Provide a container algorithm similar to C++ Library Fundamentals v2's erase_if which is equivalent t...
Printable printBlockFreq(const BlockFrequencyInfo &BFI, BlockFrequency Freq)
Print the block frequency Freq relative to the current functions entry frequency.
char & MachineBlockPlacementID
MachineBlockPlacement - This pass places basic blocks based on branch probabilities.
cl::opt< bool > EnableExtTspBlockPlacement
char & MachineBlockPlacementStatsID
MachineBlockPlacementStats - This pass collects statistics about the basic block placement using bran...
void initializeMachineBlockPlacementPass(PassRegistry &)
Printable printMBBReference(const MachineBasicBlock &MBB)
Prints a machine basic block reference.
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
This struct is a compact representation of a valid (non-zero power of two) alignment.