78#define DEBUG_TYPE "block-placement"
80STATISTIC(NumCondBranches,
"Number of conditional branches");
81STATISTIC(NumUncondBranches,
"Number of unconditional branches");
83 "Potential frequency of taking conditional branches");
85 "Potential frequency of taking unconditional branches");
89 cl::desc(
"Force the alignment of all blocks in the function in log2 format "
90 "(e.g 4 means align on 16B boundaries)."),
94 "align-all-nofallthru-blocks",
95 cl::desc(
"Force the alignment of all blocks that have no fall-through "
96 "predecessors (i.e. don't add nops that are executed). In log2 "
97 "format (e.g 4 means align on 16B boundaries)."),
101 "max-bytes-for-alignment",
102 cl::desc(
"Forces the maximum bytes allowed to be emitted when padding for "
108 "block-placement-exit-block-bias",
109 cl::desc(
"Block frequency percentage a loop exit block needs "
110 "over the original exit to be considered the new exit."),
117 "loop-to-cold-block-ratio",
118 cl::desc(
"Outline loop blocks from loop chain if (frequency of loop) / "
119 "(frequency of block) is greater than this ratio"),
123 "force-loop-cold-block",
124 cl::desc(
"Force outlining cold blocks from loops."),
129 cl::desc(
"Model the cost of loop rotation more "
130 "precisely by using profile data."),
135 cl::desc(
"Force the use of precise cost "
136 "loop rotation strategy."),
141 cl::desc(
"Cost that models the probabilistic risk of an instruction "
142 "misfetch due to a jump comparing to falling through, whose cost "
147 cl::desc(
"Cost of jump instructions."),
151 cl::desc(
"Perform tail duplication during placement. "
152 "Creates more fallthrough opportunites in "
153 "outline branches."),
158 cl::desc(
"Perform branch folding during placement. "
159 "Reduces code size."),
164 "tail-dup-placement-threshold",
165 cl::desc(
"Instruction cutoff for tail duplication during layout. "
166 "Tail merging during layout is forced to have a threshold "
167 "that won't conflict."),
cl::init(2),
172 "tail-dup-placement-aggressive-threshold",
173 cl::desc(
"Instruction cutoff for aggressive tail duplication during "
174 "layout. Used at -O3. Tail merging during layout is forced to "
175 "have a threshold that won't conflict."),
cl::init(4),
180 "tail-dup-placement-penalty",
181 cl::desc(
"Cost penalty for blocks that can avoid breaking CFG by copying. "
182 "Copying can increase fallthrough, but it also increases icache "
183 "pressure. This parameter controls the penalty to account for that. "
184 "Percent as integer."),
190 "tail-dup-profile-percent-threshold",
191 cl::desc(
"If profile count information is used in tail duplication cost "
192 "model, the gained fall through number from tail duplication "
193 "should be at least this percent of hot count."),
198 "triangle-chain-count",
199 cl::desc(
"Number of triangle-shaped-CFG's that need to be in a row for the "
200 "triangle tail duplication heuristic to kick in. 0 to disable."),
210 "renumber-blocks-before-view",
212 "If true, basic blocks are re-numbered before MBP layout is printed "
213 "into a dot graph. Only used when a function is being printed."),
263 BlockToChainMapType &BlockToChain;
272 :
Blocks(1, BB), BlockToChain(BlockToChain) {
273 assert(BB &&
"Cannot create a chain with a null basic block");
274 BlockToChain[BB] =
this;
282 iterator begin() {
return Blocks.begin(); }
286 iterator end() {
return Blocks.end(); }
290 for(iterator i = begin(); i != end(); ++i) {
306 assert(BB &&
"Can't merge a null block.");
307 assert(!
Blocks.empty() &&
"Can't merge into an empty chain.");
311 assert(!BlockToChain[BB] &&
312 "Passed chain is null, but BB has entry in BlockToChain.");
314 BlockToChain[BB] =
this;
318 assert(BB == *Chain->
begin() &&
"Passed BB is not head of Chain.");
319 assert(Chain->begin() != Chain->end());
324 Blocks.push_back(ChainBB);
325 assert(BlockToChain[ChainBB] == Chain &&
"Incoming blocks not in chain.");
326 BlockToChain[ChainBB] =
this;
347 unsigned UnscheduledPredecessors = 0;
355 struct BlockAndTailDupResult {
361 struct WeightedEdge {
381 std::unique_ptr<MBFIWrapper> MBFI;
414 bool UseProfileCount =
false;
444 if (UseProfileCount) {
445 auto Count = MBFI->getBlockProfileCount(BB);
451 return MBFI->getBlockFreq(BB);
456 void initDupThreshold();
460 void markChainSuccessors(
462 const BlockFilterSet *BlockFilter =
nullptr);
466 void markBlockSuccessors(
469 const BlockFilterSet *BlockFilter =
nullptr);
472 collectViableSuccessors(
474 const BlockFilterSet *BlockFilter,
477 BlockFilterSet *BlockFilter);
480 BlockFilterSet *BlockFilter);
481 bool repeatedlyTailDuplicateBlock(
484 BlockChain &Chain, BlockFilterSet *BlockFilter,
486 bool maybeTailDuplicateBlock(
488 BlockChain &Chain, BlockFilterSet *BlockFilter,
490 bool &DuplicatedToLPred);
491 bool hasBetterLayoutPredecessor(
495 const BlockFilterSet *BlockFilter);
496 BlockAndTailDupResult selectBestSuccessor(
498 const BlockFilterSet *BlockFilter);
502 const BlockChain &PlacedChain,
504 const BlockFilterSet *BlockFilter);
513 const BlockFilterSet *BlockFilter);
516 BlockFilterSet *BlockFilter =
nullptr);
520 const BlockFilterSet &LoopBlockSet);
522 const BlockFilterSet &LoopBlockSet);
526 const BlockFilterSet &LoopBlockSet);
528 const MachineLoop &L,
const BlockFilterSet &LoopBlockSet);
530 const MachineLoop &L,
const BlockFilterSet &LoopBlockSet);
532 const MachineLoop &L,
const BlockFilterSet &LoopBlockSet,
534 BlockFilterSet collectLoopBlockSet(
const MachineLoop &L);
539 void rotateLoopWithProfile(
541 const BlockFilterSet &LoopBlockSet);
542 void buildCFGChains();
543 void optimizeBranches();
550 bool isProfitableToTailDup(
553 const BlockChain &Chain,
const BlockFilterSet *BlockFilter);
558 const BlockChain &Chain,
const BlockFilterSet *BlockFilter);
561 BlockAndTailDupResult getBestTrellisSuccessor(
565 const BlockFilterSet *BlockFilter);
568 static std::pair<WeightedEdge, WeightedEdge> getBestNonConflictingEdges(
574 bool canTailDuplicateUnplacedPreds(
576 const BlockChain &Chain,
const BlockFilterSet *BlockFilter);
580 void precomputeTriangleChains();
586 void assignBlockOrder(
const std::vector<const MachineBasicBlock *> &NewOrder);
589 void createCFGChainExtTsp();
600 bool allowTailDupPlacement()
const {
619char MachineBlockPlacement::ID = 0;
624 "Branch Probability Basic Block Placement",
false,
false)
653void MachineBlockPlacement::markChainSuccessors(
655 const BlockFilterSet *BlockFilter) {
659 markBlockSuccessors(Chain,
MBB, LoopHeaderBB, BlockFilter);
669void MachineBlockPlacement::markBlockSuccessors(
677 if (BlockFilter && !BlockFilter->count(Succ))
679 BlockChain &SuccChain = *BlockToChain[Succ];
681 if (&Chain == &SuccChain || Succ == LoopHeaderBB)
686 if (SuccChain.UnscheduledPredecessors == 0 ||
687 --SuccChain.UnscheduledPredecessors > 0)
690 auto *NewBB = *SuccChain.
begin();
691 if (NewBB->isEHPad())
704 const BlockFilterSet *BlockFilter,
724 bool SkipSucc =
false;
725 if (Succ->isEHPad() || (BlockFilter && !BlockFilter->count(Succ))) {
728 BlockChain *SuccChain = BlockToChain[Succ];
729 if (SuccChain == &Chain) {
731 }
else if (Succ != *SuccChain->begin()) {
733 <<
" -> Mid chain!\n");
743 return AdjustedSumProb;
754 if (SuccProbN >= SuccProbD)
769 if (Successors.
count(&BB))
772 if (!Successors.
count(Succ))
801 return (Gain / ThresholdProb) >= EntryFreq;
809bool MachineBlockPlacement::isProfitableToTailDup(
812 const BlockChain &Chain,
const BlockFilterSet *BlockFilter) {
839 auto AdjustedSuccSumProb =
840 collectViableSuccessors(Succ, Chain, BlockFilter, SuccSuccs);
842 auto BBFreq = MBFI->getBlockFreq(BB);
843 auto SuccFreq = MBFI->getBlockFreq(Succ);
849 if (SuccSuccs.
size() == 0)
856 if (Prob > BestSuccSucc)
868 if (SuccPred == Succ || SuccPred == BB
869 || BlockToChain[SuccPred] == &Chain
870 || (BlockFilter && !BlockFilter->count(SuccPred)))
872 auto Freq = MBFI->getBlockFreq(SuccPred)
874 if (Freq > SuccBestPred)
942 if (UProb > AdjustedSuccSumProb / 2 &&
943 !hasBetterLayoutPredecessor(Succ, PDom, *BlockToChain[PDom], UProb, UProb,
947 (
P + V), (Qout + std::max(Qin,
F) * VProb + std::min(Qin,
F) * UProb),
951 (Qout + std::min(Qin,
F) * AdjustedSuccSumProb +
952 std::max(Qin,
F) * UProb),
963bool MachineBlockPlacement::isTrellis(
966 const BlockChain &Chain,
const BlockFilterSet *BlockFilter) {
981 if (Successors.count(SuccPred)) {
984 if (!Successors.count(CheckSucc))
988 const BlockChain *PredChain = BlockToChain[SuccPred];
989 if (SuccPred == BB || (BlockFilter && !BlockFilter->count(SuccPred)) ||
990 PredChain == &Chain || PredChain == BlockToChain[Succ])
994 if (!SeenPreds.
insert(SuccPred).second)
1012std::pair<MachineBlockPlacement::WeightedEdge,
1013 MachineBlockPlacement::WeightedEdge>
1014MachineBlockPlacement::getBestNonConflictingEdges(
1025 auto Cmp = [](WeightedEdge
A, WeightedEdge
B) {
return A.Weight >
B.Weight; };
1029 auto BestA = Edges[0].begin();
1030 auto BestB = Edges[1].begin();
1033 if (BestA->Src == BestB->Src) {
1035 auto SecondBestA = std::next(BestA);
1036 auto SecondBestB = std::next(BestB);
1039 if (BestAScore < BestBScore)
1040 BestA = SecondBestA;
1042 BestB = SecondBestB;
1045 if (BestB->Src == BB)
1047 return std::make_pair(*BestA, *BestB);
1057MachineBlockPlacement::BlockAndTailDupResult
1058MachineBlockPlacement::getBestTrellisSuccessor(
1062 const BlockFilterSet *BlockFilter) {
1064 BlockAndTailDupResult
Result = {
nullptr,
false};
1071 if (Successors.
size() != 2 || ViableSuccs.
size() != 2)
1077 for (
auto *Succ : ViableSuccs) {
1081 if ((BlockFilter && !BlockFilter->count(SuccPred)) ||
1082 BlockToChain[SuccPred] == &Chain ||
1083 BlockToChain[SuccPred] == BlockToChain[Succ])
1087 Edges[SuccIndex].
push_back({EdgeFreq, SuccPred, Succ});
1093 WeightedEdge BestA, BestB;
1094 std::tie(BestA, BestB) = getBestNonConflictingEdges(BB, Edges);
1096 if (BestA.Src != BB) {
1100 LLVM_DEBUG(
dbgs() <<
"Trellis, but not one of the chosen edges.\n");
1107 if (BestA.Dest == BestB.Src) {
1113 if (allowTailDupPlacement() && shouldTailDuplicate(Succ2) &&
1114 canTailDuplicateUnplacedPreds(BB, Succ2, Chain, BlockFilter) &&
1116 Chain, BlockFilter)) {
1120 <<
", probability: " << Succ2Prob
1121 <<
" (Tail Duplicate)\n");
1123 Result.ShouldTailDup =
true;
1129 ComputedEdges[BestB.Src] = { BestB.Dest,
false };
1131 auto TrellisSucc = BestA.Dest;
1135 <<
", probability: " << SuccProb <<
" (Trellis)\n");
1143bool MachineBlockPlacement::canTailDuplicateUnplacedPreds(
1145 const BlockChain &Chain,
const BlockFilterSet *BlockFilter) {
1146 if (!shouldTailDuplicate(Succ))
1150 bool Duplicate =
true;
1152 unsigned int NumDup = 0;
1161 if (Pred == BB || (BlockFilter && !BlockFilter->count(Pred))
1162 || (BlockToChain[Pred] == &Chain && !Succ->
succ_empty()))
1212 if (
F->getFunction().hasProfileData())
1243 if ((NumDup > Succ->
succ_size()) || !Duplicate)
1266void MachineBlockPlacement::precomputeTriangleChains() {
1267 struct TriangleChain {
1268 std::vector<MachineBasicBlock *> Edges;
1271 : Edges({src, dst}) {}
1274 assert(getKey()->isSuccessor(dst) &&
1275 "Attempting to append a block that is not a successor.");
1276 Edges.push_back(dst);
1279 unsigned count()
const {
return Edges.size() - 1; }
1282 return Edges.
back();
1306 if (PDom ==
nullptr)
1313 if (!shouldTailDuplicate(PDom))
1315 bool CanTailDuplicate =
true;
1322 CanTailDuplicate =
false;
1328 if (!CanTailDuplicate)
1335 auto Found = TriangleChainMap.
find(&BB);
1338 if (Found != TriangleChainMap.
end()) {
1339 TriangleChain Chain = std::move(Found->second);
1340 TriangleChainMap.
erase(Found);
1342 TriangleChainMap.
insert(std::make_pair(Chain.getKey(), std::move(Chain)));
1344 auto InsertResult = TriangleChainMap.
try_emplace(PDom, &BB, PDom);
1345 assert(InsertResult.second &&
"Block seen twice.");
1353 for (
auto &ChainPair : TriangleChainMap) {
1354 TriangleChain &Chain = ChainPair.second;
1361 Chain.Edges.pop_back();
1365 <<
" as pre-computed based on triangles.\n");
1367 auto InsertResult = ComputedEdges.
insert({src, {dst,
true}});
1368 assert(InsertResult.second &&
"Block seen twice.");
1410bool MachineBlockPlacement::hasBetterLayoutPredecessor(
1414 const BlockFilterSet *BlockFilter) {
1417 if (SuccChain.UnscheduledPredecessors == 0)
1537 BlockFrequency CandidateEdgeFreq = MBFI->getBlockFreq(BB) * RealSuccProb;
1538 bool BadCFGConflict =
false;
1541 BlockChain *PredChain = BlockToChain[Pred];
1542 if (Pred == Succ || PredChain == &SuccChain ||
1543 (BlockFilter && !BlockFilter->count(Pred)) ||
1544 PredChain == &Chain || Pred != *std::prev(PredChain->end()) ||
1565 if (PredEdgeFreq * HotProb >= CandidateEdgeFreq * HotProb.
getCompl()) {
1566 BadCFGConflict =
true;
1571 if (BadCFGConflict) {
1573 << SuccProb <<
" (prob) (non-cold CFG conflict)\n");
1590MachineBlockPlacement::BlockAndTailDupResult
1591MachineBlockPlacement::selectBestSuccessor(
1593 const BlockFilterSet *BlockFilter) {
1596 BlockAndTailDupResult BestSucc = {
nullptr,
false };
1600 auto AdjustedSumProb =
1601 collectViableSuccessors(BB, Chain, BlockFilter, Successors);
1608 auto FoundEdge = ComputedEdges.
find(BB);
1609 if (FoundEdge != ComputedEdges.
end()) {
1611 ComputedEdges.
erase(FoundEdge);
1612 BlockChain *SuccChain = BlockToChain[Succ];
1613 if (BB->
isSuccessor(Succ) && (!BlockFilter || BlockFilter->count(Succ)) &&
1614 SuccChain != &Chain && Succ == *SuccChain->begin())
1615 return FoundEdge->second;
1620 if (isTrellis(BB, Successors, Chain, BlockFilter))
1621 return getBestTrellisSuccessor(BB, Successors, AdjustedSumProb, Chain,
1634 BlockChain &SuccChain = *BlockToChain[Succ];
1637 if (hasBetterLayoutPredecessor(BB, Succ, SuccChain, SuccProb, RealSuccProb,
1638 Chain, BlockFilter)) {
1640 if (allowTailDupPlacement() && shouldTailDuplicate(Succ))
1647 <<
", probability: " << SuccProb
1648 << (SuccChain.UnscheduledPredecessors != 0 ?
" (CFG break)" :
"")
1651 if (BestSucc.BB && BestProb >= SuccProb) {
1658 BestProb = SuccProb;
1666 [](std::tuple<BranchProbability, MachineBasicBlock *> L,
1667 std::tuple<BranchProbability, MachineBasicBlock *> R) {
1668 return std::get<0>(L) > std::get<0>(R);
1670 for (
auto &Tup : DupCandidates) {
1673 std::tie(DupProb, Succ) = Tup;
1674 if (DupProb < BestProb)
1676 if (canTailDuplicateUnplacedPreds(BB, Succ, Chain, BlockFilter)
1677 && (isProfitableToTailDup(BB, Succ, BestProb, Chain, BlockFilter))) {
1679 <<
", probability: " << DupProb
1680 <<
" (Tail Duplicate)\n");
1682 BestSucc.ShouldTailDup =
true;
1710 return BlockToChain.
lookup(BB) == &Chain;
1713 if (WorkList.
empty())
1716 bool IsEHPad = WorkList[0]->isEHPad();
1722 "EHPad mismatch between block and work list.");
1724 BlockChain &SuccChain = *BlockToChain[
MBB];
1725 if (&SuccChain == &Chain)
1728 assert(SuccChain.UnscheduledPredecessors == 0 &&
1729 "Found CFG-violating block");
1754 if (BestBlock && (IsEHPad ^ (BestFreq >= CandidateFreq)))
1758 BestFreq = CandidateFreq;
1772 const BlockChain &PlacedChain,
1774 const BlockFilterSet *BlockFilter) {
1777 if (BlockFilter && !BlockFilter->count(&*
I))
1779 if (BlockToChain[&*
I] != &PlacedChain) {
1780 PrevUnplacedBlockIt =
I;
1784 return *BlockToChain[&*
I]->
begin();
1790void MachineBlockPlacement::fillWorkLists(
1793 const BlockFilterSet *BlockFilter =
nullptr) {
1794 BlockChain &Chain = *BlockToChain[
MBB];
1795 if (!UpdatedPreds.
insert(&Chain).second)
1799 Chain.UnscheduledPredecessors == 0 &&
1800 "Attempting to place block with unscheduled predecessors in worklist.");
1802 assert(BlockToChain[ChainBB] == &Chain &&
1803 "Block in chain doesn't match BlockToChain map.");
1805 if (BlockFilter && !BlockFilter->count(Pred))
1807 if (BlockToChain[Pred] == &Chain)
1809 ++Chain.UnscheduledPredecessors;
1813 if (Chain.UnscheduledPredecessors != 0)
1823void MachineBlockPlacement::buildChain(
1825 BlockFilterSet *BlockFilter) {
1826 assert(HeadBB &&
"BB must not be null.\n");
1827 assert(BlockToChain[HeadBB] == &Chain &&
"BlockToChainMap mis-match.\n");
1831 markChainSuccessors(Chain, LoopHeaderBB, BlockFilter);
1834 assert(BB &&
"null block found at end of chain in loop.");
1835 assert(BlockToChain[BB] == &Chain &&
"BlockToChainMap mis-match in loop.");
1836 assert(*std::prev(Chain.end()) == BB &&
"BB Not found at end of chain.");
1841 auto Result = selectBestSuccessor(BB, Chain, BlockFilter);
1843 bool ShouldTailDup =
Result.ShouldTailDup;
1844 if (allowTailDupPlacement())
1845 ShouldTailDup |= (BestSucc && canTailDuplicateUnplacedPreds(BB, BestSucc,
1853 BestSucc = selectBestCandidateBlock(Chain, BlockWorkList);
1855 BestSucc = selectBestCandidateBlock(Chain, EHPadWorkList);
1858 BestSucc = getFirstUnplacedBlock(Chain, PrevUnplacedBlockIt, BlockFilter);
1862 LLVM_DEBUG(
dbgs() <<
"Unnatural loop CFG detected, forcibly merging the "
1863 "layout successor until the CFG reduces\n");
1868 if (allowTailDupPlacement() && BestSucc && ShouldTailDup) {
1869 repeatedlyTailDuplicateBlock(BestSucc, BB, LoopHeaderBB, Chain,
1870 BlockFilter, PrevUnplacedBlockIt);
1878 BlockChain &SuccChain = *BlockToChain[BestSucc];
1881 SuccChain.UnscheduledPredecessors = 0;
1884 markChainSuccessors(SuccChain, LoopHeaderBB, BlockFilter);
1885 Chain.merge(BestSucc, &SuccChain);
1886 BB = *std::prev(Chain.end());
1908MachineBlockPlacement::canMoveBottomBlockToTop(
1918 if (OtherBB == BottomBlock)
1920 if (OtherBB == OldTop)
1928MachineBlockPlacement::TopFallThroughFreq(
1930 const BlockFilterSet &LoopBlockSet) {
1933 BlockChain *PredChain = BlockToChain[Pred];
1934 if (!LoopBlockSet.count(Pred) &&
1935 (!PredChain || Pred == *std::prev(PredChain->end()))) {
1942 BlockChain *SuccChain = BlockToChain[Succ];
1945 if (!LoopBlockSet.count(Succ) && (SuccProb > TopProb) &&
1946 (!SuccChain || Succ == *SuccChain->begin())) {
1954 if (EdgeFreq > MaxFreq)
1984MachineBlockPlacement::FallThroughGains(
1988 const BlockFilterSet &LoopBlockSet) {
1989 BlockFrequency FallThrough2Top = TopFallThroughFreq(OldTop, LoopBlockSet);
1992 FallThrough2Exit = MBFI->getBlockFreq(NewTop) *
2001 if (!LoopBlockSet.count(Pred))
2003 BlockChain *PredChain = BlockToChain[Pred];
2004 if (!PredChain || Pred == *std::prev(PredChain->end())) {
2007 if (EdgeFreq > FallThroughFromPred) {
2008 FallThroughFromPred = EdgeFreq;
2019 if ((Succ == NewTop) || (Succ == BestPred) || !LoopBlockSet.count(Succ))
2023 BlockChain *SuccChain = BlockToChain[Succ];
2024 if ((SuccChain && (Succ != *SuccChain->begin())) ||
2025 (SuccChain == BlockToChain[BestPred]))
2029 if (EdgeFreq > NewFreq)
2034 if (NewFreq > OrigEdgeFreq) {
2046 FallThrough2Top + FallThrough2Exit + FallThroughFromPred;
2075MachineBlockPlacement::findBestLoopTopHelper(
2078 const BlockFilterSet &LoopBlockSet) {
2082 BlockChain &HeaderChain = *BlockToChain[OldTop];
2083 if (!LoopBlockSet.count(*HeaderChain.begin()))
2085 if (OldTop != *HeaderChain.begin())
2094 if (!LoopBlockSet.count(Pred))
2096 if (Pred ==
L.getHeader())
2107 if (OtherBB == OldTop)
2111 if (!canMoveBottomBlockToTop(Pred, OldTop))
2117 (Gains > BestGains ||
2132 (*BestPred->
pred_begin())->succ_size() == 1 &&
2145MachineBlockPlacement::findBestLoopTop(
const MachineLoop &L,
2146 const BlockFilterSet &LoopBlockSet) {
2154 bool OptForSize =
F->getFunction().hasOptSize() ||
2157 return L.getHeader();
2161 while (NewTop != OldTop) {
2163 NewTop = findBestLoopTopHelper(OldTop, L, LoopBlockSet);
2164 if (NewTop != OldTop)
2165 ComputedEdges[NewTop] = { OldTop,
false };
2176MachineBlockPlacement::findBestLoopExit(
const MachineLoop &L,
2177 const BlockFilterSet &LoopBlockSet,
2187 BlockChain &HeaderChain = *BlockToChain[
L.getHeader()];
2188 if (!LoopBlockSet.count(*HeaderChain.begin()))
2192 unsigned BestExitLoopDepth = 0;
2202 BlockChain &Chain = *BlockToChain[
MBB];
2205 if (
MBB != *std::prev(Chain.end()))
2214 bool HasLoopingSucc =
false;
2220 BlockChain &SuccChain = *BlockToChain[Succ];
2222 if (&Chain == &SuccChain) {
2229 if (LoopBlockSet.count(Succ)) {
2232 HasLoopingSucc =
true;
2236 unsigned SuccLoopDepth = 0;
2238 SuccLoopDepth = ExitLoop->getLoopDepth();
2239 if (ExitLoop->contains(&L))
2246 <<
getBlockName(Succ) <<
" [L:" << SuccLoopDepth <<
"] ("
2253 if (!ExitingBB || SuccLoopDepth > BestExitLoopDepth ||
2254 ExitEdgeFreq > BestExitEdgeFreq ||
2256 !(ExitEdgeFreq < BestExitEdgeFreq * Bias))) {
2257 BestExitEdgeFreq = ExitEdgeFreq;
2262 if (!HasLoopingSucc) {
2264 ExitingBB = OldExitingBB;
2265 BestExitEdgeFreq = OldBestExitEdgeFreq;
2272 dbgs() <<
" No other candidate exit blocks, using loop header\n");
2275 if (
L.getNumBlocks() == 1) {
2276 LLVM_DEBUG(
dbgs() <<
" Loop has 1 block, using loop header as exit\n");
2283 if (!BlocksExitingToOuterLoop.
empty() &&
2284 !BlocksExitingToOuterLoop.
count(ExitingBB))
2289 ExitFreq = BestExitEdgeFreq;
2298MachineBlockPlacement::hasViableTopFallthrough(
2300 const BlockFilterSet &LoopBlockSet) {
2302 BlockChain *PredChain = BlockToChain[Pred];
2303 if (!LoopBlockSet.count(Pred) &&
2304 (!PredChain || Pred == *std::prev(PredChain->end()))) {
2311 BlockChain *SuccChain = BlockToChain[Succ];
2314 if ((!SuccChain || Succ == *SuccChain->begin()) && SuccProb > TopProb) {
2332void MachineBlockPlacement::rotateLoop(BlockChain &LoopChain,
2335 const BlockFilterSet &LoopBlockSet) {
2343 if (Bottom == ExitingBB)
2350 bool ViableTopFallthrough = hasViableTopFallthrough(Top, LoopBlockSet);
2355 if (ViableTopFallthrough) {
2357 BlockChain *SuccChain = BlockToChain[Succ];
2358 if (!LoopBlockSet.count(Succ) &&
2359 (!SuccChain || Succ == *SuccChain->begin()))
2365 BlockFrequency FallThrough2Top = TopFallThroughFreq(Top, LoopBlockSet);
2366 if (FallThrough2Top >= ExitFreq)
2370 BlockChain::iterator ExitIt =
llvm::find(LoopChain, ExitingBB);
2371 if (ExitIt == LoopChain.end())
2393 if (ViableTopFallthrough) {
2394 assert(std::next(ExitIt) != LoopChain.end() &&
2395 "Exit should not be last BB");
2404 std::rotate(LoopChain.begin(), std::next(ExitIt), LoopChain.end());
2420void MachineBlockPlacement::rotateLoopWithProfile(
2422 const BlockFilterSet &LoopBlockSet) {
2423 auto RotationPos = LoopChain.end();
2448 BlockChain *PredChain = BlockToChain[Pred];
2449 if (!LoopBlockSet.count(Pred) &&
2450 (!PredChain || Pred == *std::prev(PredChain->end()))) {
2451 auto EdgeFreq = MBFI->getBlockFreq(Pred) *
2453 auto FallThruCost = ScaleBlockFrequency(EdgeFreq,
MisfetchCost);
2457 FallThruCost += ScaleBlockFrequency(EdgeFreq,
JumpInstCost);
2458 HeaderFallThroughCost = std::max(HeaderFallThroughCost, FallThruCost);
2467 for (
auto *BB : LoopChain) {
2470 BlockChain *SuccChain = BlockToChain[Succ];
2471 if (!LoopBlockSet.count(Succ) &&
2472 (!SuccChain || Succ == *SuccChain->begin())) {
2474 LargestExitEdgeProb = std::max(LargestExitEdgeProb, SuccProb);
2478 auto ExitFreq = MBFI->getBlockFreq(BB) * LargestExitEdgeProb;
2486 for (
auto Iter = LoopChain.begin(), TailIter = std::prev(LoopChain.end()),
2487 EndIter = LoopChain.end();
2488 Iter != EndIter; Iter++, TailIter++) {
2491 if (TailIter == LoopChain.end())
2492 TailIter = LoopChain.begin();
2494 auto TailBB = *TailIter;
2502 if (Iter != LoopChain.begin())
2503 Cost += HeaderFallThroughCost;
2507 for (
auto &ExitWithFreq : ExitsWithFreq)
2508 if (TailBB != ExitWithFreq.first)
2509 Cost += ExitWithFreq.second;
2525 if (TailBB->isSuccessor(*Iter)) {
2526 auto TailBBFreq = MBFI->getBlockFreq(TailBB);
2527 if (TailBB->succ_size() == 1)
2529 else if (TailBB->succ_size() == 2) {
2531 auto TailToHeadFreq = TailBBFreq * TailToHeadProb;
2533 ? TailBBFreq * TailToHeadProb.
getCompl()
2544 if (
Cost < SmallestRotationCost) {
2545 SmallestRotationCost =
Cost;
2550 if (RotationPos != LoopChain.end()) {
2552 <<
" to the top\n");
2553 std::rotate(LoopChain.begin(), RotationPos, LoopChain.end());
2562MachineBlockPlacement::collectLoopBlockSet(
const MachineLoop &L) {
2563 BlockFilterSet LoopBlockSet;
2576 for (
auto *LoopPred :
L.getHeader()->predecessors())
2577 if (!
L.contains(LoopPred))
2578 LoopFreq += MBFI->getBlockFreq(LoopPred) *
2582 if (LoopBlockSet.count(LoopBB))
2587 BlockChain *Chain = BlockToChain[LoopBB];
2589 LoopBlockSet.
insert(ChainBB);
2592 LoopBlockSet.insert(
L.block_begin(),
L.block_end());
2594 return LoopBlockSet;
2603void MachineBlockPlacement::buildLoopChains(
const MachineLoop &L) {
2607 buildLoopChains(*InnerLoop);
2610 "BlockWorkList not empty when starting to build loop chains.");
2612 "EHPadWorkList not empty when starting to build loop chains.");
2613 BlockFilterSet LoopBlockSet = collectLoopBlockSet(L);
2618 bool RotateLoopWithProfile =
2634 PreferredLoopExit =
nullptr;
2636 if (!RotateLoopWithProfile && LoopTop ==
L.getHeader())
2637 PreferredLoopExit = findBestLoopExit(L, LoopBlockSet, ExitFreq);
2639 BlockChain &LoopChain = *BlockToChain[LoopTop];
2645 assert(LoopChain.UnscheduledPredecessors == 0 &&
2646 "LoopChain should not have unscheduled predecessors.");
2647 UpdatedPreds.
insert(&LoopChain);
2650 fillWorkLists(LoopBB, UpdatedPreds, &LoopBlockSet);
2652 buildChain(LoopTop, LoopChain, &LoopBlockSet);
2654 if (RotateLoopWithProfile)
2655 rotateLoopWithProfile(LoopChain, L, LoopBlockSet);
2657 rotateLoop(LoopChain, PreferredLoopExit, ExitFreq, LoopBlockSet);
2661 bool BadLoop =
false;
2662 if (LoopChain.UnscheduledPredecessors) {
2664 dbgs() <<
"Loop chain contains a block without its preds placed!\n"
2665 <<
" Loop header: " << getBlockName(*L.block_begin()) <<
"\n"
2666 <<
" Chain header: " << getBlockName(*LoopChain.begin()) <<
"\n";
2670 if (!LoopBlockSet.remove(ChainBB)) {
2674 dbgs() <<
"Loop chain contains a block not contained by the loop!\n"
2675 <<
" Loop header: " <<
getBlockName(*
L.block_begin()) <<
"\n"
2676 <<
" Chain header: " <<
getBlockName(*LoopChain.begin()) <<
"\n"
2681 if (!LoopBlockSet.empty()) {
2684 dbgs() <<
"Loop contains blocks never placed into a chain!\n"
2685 <<
" Loop header: " <<
getBlockName(*
L.block_begin()) <<
"\n"
2686 <<
" Chain header: " <<
getBlockName(*LoopChain.begin()) <<
"\n"
2689 assert(!BadLoop &&
"Detected problems with the placement of this loop.");
2692 BlockWorkList.
clear();
2693 EHPadWorkList.
clear();
2696void MachineBlockPlacement::buildCFGChains() {
2704 new (ChainAllocator.
Allocate()) BlockChain(BlockToChain, BB);
2717 assert(NextFI != FE &&
"Can't fallthrough past the last block.");
2718 LLVM_DEBUG(
dbgs() <<
"Pre-merging due to unanalyzable fallthrough: "
2721 Chain->merge(NextBB,
nullptr);
2723 BlocksWithUnanalyzableExits.
insert(&*BB);
2731 PreferredLoopExit =
nullptr;
2733 buildLoopChains(*L);
2736 "BlockWorkList should be empty before building final chain.");
2738 "EHPadWorkList should be empty before building final chain.");
2742 fillWorkLists(&
MBB, UpdatedPreds);
2744 BlockChain &FunctionChain = *BlockToChain[&
F->front()];
2745 buildChain(&
F->front(), FunctionChain);
2752 bool BadFunc =
false;
2753 FunctionBlockSetType FunctionBlockSet;
2755 FunctionBlockSet.insert(&
MBB);
2758 if (!FunctionBlockSet.erase(ChainBB)) {
2760 dbgs() <<
"Function chain contains a block not in the function!\n"
2761 <<
" Bad block: " << getBlockName(ChainBB) <<
"\n";
2764 if (!FunctionBlockSet.empty()) {
2766 for (MachineBasicBlock *RemainingBB : FunctionBlockSet)
2767 dbgs() <<
"Function contains blocks never placed into a chain!\n"
2768 <<
" Bad block: " << getBlockName(RemainingBB) <<
"\n";
2770 assert(!BadFunc &&
"Detected problems with the block placement.");
2776 F->getNumBlockIDs());
2779 for (
auto &
MBB : *
F) {
2780 if (LastMBB !=
nullptr)
2784 OriginalLayoutSuccessors[
F->back().getNumber()] =
nullptr;
2791 LLVM_DEBUG(
dbgs() << (ChainBB == *FunctionChain.begin() ?
"Placing chain "
2795 F->splice(InsertPos, ChainBB);
2800 if (ChainBB == *FunctionChain.begin())
2811 if (!BlocksWithUnanalyzableExits.
count(PrevBB)) {
2817 "Unexpected block with un-analyzable fallthrough!");
2819 TBB = FBB =
nullptr;
2857 BlockWorkList.
clear();
2858 EHPadWorkList.
clear();
2861void MachineBlockPlacement::optimizeBranches() {
2862 BlockChain &FunctionChain = *BlockToChain[&
F->front()];
2877 if (
TBB && !
Cond.empty() && FBB &&
2894void MachineBlockPlacement::alignBlocks() {
2900 if (
F->getFunction().hasMinSize() ||
2903 BlockChain &FunctionChain = *BlockToChain[&
F->front()];
2904 if (FunctionChain.begin() == FunctionChain.end())
2911 if (ChainBB == *FunctionChain.begin())
2923 unsigned MDAlign = 1;
2924 MDNode *LoopID =
L->getLoopID();
2933 if (S->
getString() ==
"llvm.loop.align") {
2935 "per-loop align metadata should have two operands.");
2937 mdconst::extract<ConstantInt>(MD->
getOperand(1))->getZExtValue();
2938 assert(MDAlign >= 1 &&
"per-loop align value must be positive.");
2944 const Align LoopAlign = std::max(TLIAlign,
Align(MDAlign));
2951 if (Freq < WeightedEntryFreq)
2958 if (Freq < (LoopHeaderFreq * ColdProb))
2971 auto DetermineMaxAlignmentPadding = [&]() {
2978 ChainBB->setMaxBytesForAlignment(MaxBytes);
2984 ChainBB->setAlignment(LoopAlign);
2985 DetermineMaxAlignmentPadding();
2995 BlockFrequency LayoutEdgeFreq = MBFI->getBlockFreq(LayoutPred) * LayoutProb;
2996 if (LayoutEdgeFreq <= (Freq * ColdProb)) {
2997 ChainBB->setAlignment(LoopAlign);
2998 DetermineMaxAlignmentPadding();
3018bool MachineBlockPlacement::repeatedlyTailDuplicateBlock(
3021 BlockChain &Chain, BlockFilterSet *BlockFilter,
3023 bool Removed, DuplicatedToLPred;
3024 bool DuplicatedToOriginalLPred;
3025 Removed = maybeTailDuplicateBlock(BB, LPred, Chain, BlockFilter,
3026 PrevUnplacedBlockIt,
3030 DuplicatedToOriginalLPred = DuplicatedToLPred;
3035 while (DuplicatedToLPred && Removed) {
3042 BlockChain::iterator ChainEnd = Chain.
end();
3043 DupBB = *(--ChainEnd);
3045 if (ChainEnd == Chain.begin())
3047 DupPred = *std::prev(ChainEnd);
3048 Removed = maybeTailDuplicateBlock(DupBB, DupPred, Chain, BlockFilter,
3049 PrevUnplacedBlockIt,
3057 LPred = *std::prev(Chain.end());
3058 if (DuplicatedToOriginalLPred)
3059 markBlockSuccessors(Chain, LPred, LoopHeaderBB, BlockFilter);
3076bool MachineBlockPlacement::maybeTailDuplicateBlock(
3078 BlockChain &Chain, BlockFilterSet *BlockFilter,
3080 bool &DuplicatedToLPred) {
3081 DuplicatedToLPred =
false;
3082 if (!shouldTailDuplicate(BB))
3090 bool Removed =
false;
3091 auto RemovalCallback =
3097 bool InWorkList =
true;
3099 if (BlockToChain.
count(RemBB)) {
3100 BlockChain *Chain = BlockToChain[RemBB];
3101 InWorkList = Chain->UnscheduledPredecessors == 0;
3102 Chain->remove(RemBB);
3103 BlockToChain.
erase(RemBB);
3107 if (&(*PrevUnplacedBlockIt) == RemBB) {
3108 PrevUnplacedBlockIt++;
3114 if (RemBB->isEHPad())
3115 RemoveList = EHPadWorkList;
3121 BlockFilter->remove(RemBB);
3125 MLI->removeBlock(RemBB);
3126 if (RemBB == PreferredLoopExit)
3127 PreferredLoopExit =
nullptr;
3132 auto RemovalCallbackRef =
3139 if (
F->getFunction().hasProfileData()) {
3141 findDuplicateCandidates(CandidatePreds, BB, BlockFilter);
3142 if (CandidatePreds.
size() == 0)
3145 CandidatePtr = &CandidatePreds;
3148 &RemovalCallbackRef, CandidatePtr);
3151 DuplicatedToLPred =
false;
3154 BlockChain* PredChain = BlockToChain[Pred];
3156 DuplicatedToLPred =
true;
3157 if (Pred == LPred || (BlockFilter && !BlockFilter->count(Pred))
3158 || PredChain == &Chain)
3161 if (BlockFilter && !BlockFilter->count(NewSucc))
3163 BlockChain *NewChain = BlockToChain[NewSucc];
3164 if (NewChain != &Chain && NewChain != PredChain)
3165 NewChain->UnscheduledPredecessors++;
3175 if (!
MI.isPHI() && !
MI.isMetaInstruction())
3191 BlockFilterSet *BlockFilter) {
3194 if (BlockFilter && !BlockFilter->count(Pred))
3196 BlockChain *PredChain = BlockToChain[Pred];
3197 if (PredChain && (Pred != *std::prev(PredChain->end())))
3204 if (BlockFilter && !BlockFilter->count(Succ))
3206 BlockChain *SuccChain = BlockToChain[Succ];
3207 if (SuccChain && (Succ != *SuccChain->begin()))
3210 if (SuccProb > BestProb)
3211 BestProb = SuccProb;
3215 if (BBProb <= BestProb)
3222 return Gain > scaleThreshold(BB);
3227void MachineBlockPlacement::findDuplicateCandidates(
3230 BlockFilterSet *BlockFilter) {
3242 return MBFI->getBlockFreq(
A) > MBFI->getBlockFreq(
B);
3247 auto SuccIt = Succs.begin();
3248 if (SuccIt != Succs.end()) {
3299 if (!Fallthrough && isBestSuccessor(BB, Pred, BlockFilter)) {
3301 if (SuccIt != Succs.end())
3307 BlockFrequency OrigCost = PredFreq + PredFreq * DefaultBranchProb;
3309 if (SuccIt == Succs.end()) {
3311 if (Succs.size() > 0)
3312 DupCost += PredFreq;
3315 DupCost += PredFreq;
3319 assert(OrigCost >= DupCost);
3320 OrigCost -= DupCost;
3321 if (OrigCost > BBDupThreshold) {
3323 if (SuccIt != Succs.end())
3331 if ((Candidates.
size() < Preds.size()) && (Candidates.
size() > 0)) {
3332 Candidates[0] = Candidates.
back();
3338void MachineBlockPlacement::initDupThreshold() {
3340 if (!
F->getFunction().hasProfileData())
3346 UseProfileCount =
true;
3362 UseProfileCount =
false;
3365bool MachineBlockPlacement::runOnMachineFunction(
MachineFunction &MF) {
3370 if (std::next(MF.
begin()) == MF.
end())
3374 MBPI = &getAnalysis<MachineBranchProbabilityInfo>();
3375 MBFI = std::make_unique<MBFIWrapper>(
3376 getAnalysis<MachineBlockFrequencyInfo>());
3377 MLI = &getAnalysis<MachineLoopInfo>();
3381 PSI = &getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI();
3387 PreferredLoopExit =
nullptr;
3390 "BlockToChain map should be empty before starting placement.");
3392 "Computed Edge map should be empty before starting placement.");
3403 if (PassConfig->
getOptLevel() >= CodeGenOptLevel::Aggressive) {
3415 (PassConfig->
getOptLevel() < CodeGenOptLevel::Aggressive ||
3417 TailDupSize =
TII->getTailDuplicateSize(PassConfig->
getOptLevel());
3419 if (allowTailDupPlacement()) {
3420 MPDT = &getAnalysis<MachinePostDominatorTree>();
3425 bool PreRegAlloc =
false;
3426 TailDup.
initMF(MF, PreRegAlloc, MBPI, MBFI.get(), PSI,
3428 precomputeTriangleChains();
3440 if (MF.
size() > 3 && EnableTailMerge) {
3448 BlockToChain.
clear();
3449 ComputedEdges.
clear();
3465 createCFGChainExtTsp();
3471 BlockToChain.
clear();
3472 ComputedEdges.
clear();
3475 bool HasMaxBytesOverride =
3481 if (HasMaxBytesOverride)
3490 for (
auto MBI = std::next(MF.begin()), MBE = MF.end(); MBI != MBE; ++MBI) {
3491 auto LayoutPred = std::prev(MBI);
3493 if (HasMaxBytesOverride)
3505 MF.RenumberBlocks();
3506 MBFI->view(
"MBP." + MF.getName(),
false);
3514void MachineBlockPlacement::applyExtTsp() {
3518 std::vector<const MachineBasicBlock *> CurrentBlockOrder;
3519 CurrentBlockOrder.reserve(
F->size());
3520 size_t NumBlocks = 0;
3522 BlockIndex[&
MBB] = NumBlocks++;
3523 CurrentBlockOrder.push_back(&
MBB);
3526 auto BlockSizes = std::vector<uint64_t>(
F->size());
3527 auto BlockCounts = std::vector<uint64_t>(
F->size());
3528 std::vector<codelayout::EdgeCount> JumpCounts;
3541 int NumInsts = std::distance(NonDbgInsts.begin(), NonDbgInsts.end());
3542 BlockSizes[BlockIndex[&
MBB]] = 4 * NumInsts;
3547 JumpCounts.push_back(
3552 LLVM_DEBUG(
dbgs() <<
"Applying ext-tsp layout for |V| = " <<
F->size()
3553 <<
" with profile = " <<
F->getFunction().hasProfileData()
3554 <<
" (" <<
F->getName().str() <<
")"
3557 dbgs() <<
format(
" original layout score: %0.2f\n",
3562 std::vector<const MachineBasicBlock *> NewBlockOrder;
3563 NewBlockOrder.reserve(
F->size());
3565 NewBlockOrder.push_back(CurrentBlockOrder[
Node]);
3572 assignBlockOrder(NewBlockOrder);
3575void MachineBlockPlacement::assignBlockOrder(
3576 const std::vector<const MachineBasicBlock *> &NewBlockOrder) {
3577 assert(
F->size() == NewBlockOrder.size() &&
"Incorrect size of block order");
3578 F->RenumberBlocks();
3580 bool HasChanges =
false;
3581 for (
size_t I = 0;
I < NewBlockOrder.size();
I++) {
3582 if (NewBlockOrder[
I] !=
F->getBlockNumbered(
I)) {
3592 for (
auto &
MBB : *
F) {
3599 NewIndex[
MBB] = NewIndex.
size();
3602 return NewIndex[&
L] < NewIndex[&
R];
3609 for (
auto &
MBB : *
F) {
3616 if (FTMBB && (NextMBB == EndIt || &*NextMBB != FTMBB)) {
3630 F->verify(
this,
"After optimized block reordering");
3634void MachineBlockPlacement::createCFGChainExtTsp() {
3635 BlockToChain.
clear();
3636 ComputedEdges.
clear();
3640 BlockChain *FunctionChain =
3641 new (ChainAllocator.
Allocate()) BlockChain(BlockToChain, HeadBB);
3646 FunctionChain->merge(&
MBB,
nullptr);
3684char MachineBlockPlacementStats::ID = 0;
3689 "Basic Block Placement Stats",
false,
false)
3697 if (std::next(
F.begin()) ==
F.end())
3703 MBPI = &getAnalysis<MachineBranchProbabilityInfo>();
3704 MBFI = &getAnalysis<MachineBlockFrequencyInfo>();
3709 (
MBB.
succ_size() > 1) ? NumCondBranches : NumUncondBranches;
3711 (
MBB.
succ_size() > 1) ? CondBranchTakenFreq : UncondBranchTakenFreq;
for(const MachineOperand &MO :llvm::drop_begin(OldMI.operands(), Desc.getNumOperands()))
This file defines the BumpPtrAllocator interface.
static cl::opt< unsigned > TailMergeSize("tail-merge-size", cl::desc("Min number of instructions to consider tail merging"), cl::init(3), cl::Hidden)
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
Declares methods and data structures for code layout algorithms.
#define LLVM_DUMP_METHOD
Mark debug helper function definitions like dump() that should not be stripped from debug builds.
static unsigned InstrCount
This file defines the DenseMap class.
DenseMap< Block *, BlockRelaxAux > Blocks
const HexagonInstrInfo * TII
static LoopDeletionResult merge(LoopDeletionResult A, LoopDeletionResult B)
block placement Basic Block Placement Stats
static BranchProbability getAdjustedProbability(BranchProbability OrigProb, BranchProbability AdjustedSumProb)
The helper function returns the branch probability that is adjusted or normalized over the new total ...
static cl::opt< bool > PreciseRotationCost("precise-rotation-cost", cl::desc("Model the cost of loop rotation more " "precisely by using profile data."), cl::init(false), cl::Hidden)
Branch Probability Basic Block Placement
static cl::opt< unsigned > AlignAllBlock("align-all-blocks", cl::desc("Force the alignment of all blocks in the function in log2 format " "(e.g 4 means align on 16B boundaries)."), cl::init(0), cl::Hidden)
static cl::opt< unsigned > TailDupPlacementPenalty("tail-dup-placement-penalty", cl::desc("Cost penalty for blocks that can avoid breaking CFG by copying. " "Copying can increase fallthrough, but it also increases icache " "pressure. This parameter controls the penalty to account for that. " "Percent as integer."), cl::init(2), cl::Hidden)
static BranchProbability getLayoutSuccessorProbThreshold(const MachineBasicBlock *BB)
static cl::opt< bool > ForceLoopColdBlock("force-loop-cold-block", cl::desc("Force outlining cold blocks from loops."), cl::init(false), cl::Hidden)
static cl::opt< unsigned > ExitBlockBias("block-placement-exit-block-bias", cl::desc("Block frequency percentage a loop exit block needs " "over the original exit to be considered the new exit."), cl::init(0), cl::Hidden)
static cl::opt< unsigned > AlignAllNonFallThruBlocks("align-all-nofallthru-blocks", cl::desc("Force the alignment of all blocks that have no fall-through " "predecessors (i.e. don't add nops that are executed). In log2 " "format (e.g 4 means align on 16B boundaries)."), cl::init(0), cl::Hidden)
static cl::opt< unsigned > TailDupPlacementThreshold("tail-dup-placement-threshold", cl::desc("Instruction cutoff for tail duplication during layout. " "Tail merging during layout is forced to have a threshold " "that won't conflict."), cl::init(2), cl::Hidden)
static cl::opt< unsigned > JumpInstCost("jump-inst-cost", cl::desc("Cost of jump instructions."), cl::init(1), cl::Hidden)
static bool greaterWithBias(BlockFrequency A, BlockFrequency B, BlockFrequency EntryFreq)
Compare 2 BlockFrequency's with a small penalty for A.
static cl::opt< unsigned > MisfetchCost("misfetch-cost", cl::desc("Cost that models the probabilistic risk of an instruction " "misfetch due to a jump comparing to falling through, whose cost " "is zero."), cl::init(1), cl::Hidden)
static cl::opt< unsigned > MaxBytesForAlignmentOverride("max-bytes-for-alignment", cl::desc("Forces the maximum bytes allowed to be emitted when padding for " "alignment"), cl::init(0), cl::Hidden)
static cl::opt< bool > BranchFoldPlacement("branch-fold-placement", cl::desc("Perform branch folding during placement. " "Reduces code size."), cl::init(true), cl::Hidden)
static cl::opt< unsigned > TailDupProfilePercentThreshold("tail-dup-profile-percent-threshold", cl::desc("If profile count information is used in tail duplication cost " "model, the gained fall through number from tail duplication " "should be at least this percent of hot count."), cl::init(50), cl::Hidden)
static cl::opt< unsigned > TriangleChainCount("triangle-chain-count", cl::desc("Number of triangle-shaped-CFG's that need to be in a row for the " "triangle tail duplication heuristic to kick in. 0 to disable."), cl::init(2), cl::Hidden)
Branch Probability Basic Block static false std::string getBlockName(const MachineBasicBlock *BB)
Helper to print the name of a MBB.
static bool hasSameSuccessors(MachineBasicBlock &BB, SmallPtrSetImpl< const MachineBasicBlock * > &Successors)
Check if BB has exactly the successors in Successors.
static cl::opt< bool > TailDupPlacement("tail-dup-placement", cl::desc("Perform tail duplication during placement. " "Creates more fallthrough opportunites in " "outline branches."), cl::init(true), cl::Hidden)
static uint64_t countMBBInstruction(MachineBasicBlock *MBB)
static cl::opt< unsigned > LoopToColdBlockRatio("loop-to-cold-block-ratio", cl::desc("Outline loop blocks from loop chain if (frequency of loop) / " "(frequency of block) is greater than this ratio"), cl::init(5), cl::Hidden)
static cl::opt< bool > RenumberBlocksBeforeView("renumber-blocks-before-view", cl::desc("If true, basic blocks are re-numbered before MBP layout is printed " "into a dot graph. Only used when a function is being printed."), cl::init(false), cl::Hidden)
static cl::opt< unsigned > TailDupPlacementAggressiveThreshold("tail-dup-placement-aggressive-threshold", cl::desc("Instruction cutoff for aggressive tail duplication during " "layout. Used at -O3. Tail merging during layout is forced to " "have a threshold that won't conflict."), cl::init(4), cl::Hidden)
static cl::opt< bool > ForcePreciseRotationCost("force-precise-rotation-cost", cl::desc("Force the use of precise cost " "loop rotation strategy."), cl::init(false), cl::Hidden)
#define INITIALIZE_PASS_DEPENDENCY(depName)
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
const SmallVectorImpl< MachineOperand > MachineBasicBlock * TBB
const SmallVectorImpl< MachineOperand > & Cond
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
This file implements a set that has insertion order iteration characteristics.
This file defines the SmallPtrSet class.
This file defines the SmallVector class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
This file describes how to lower LLVM code to machine code.
Target-Independent Code Generator Pass Configuration Options pass.
unify loop Fixup each natural loop to have a single exit block
Represent the analysis usage information of a pass.
AnalysisUsage & addRequired()
void setPreservesAll()
Set by analyses that do not transform their input at all.
static BlockFrequency max()
Returns the maximum possible frequency, the saturation value.
uint64_t getFrequency() const
Returns the frequency as a fixpoint number scaled by the entry frequency.
static BranchProbability getOne()
uint32_t getNumerator() const
BranchProbability getCompl() const
static BranchProbability getZero()
ValueT lookup(const_arg_type_t< KeyT > Val) const
lookup - Return the entry for the specified key, or a default constructed value if no such entry exis...
iterator find(const_arg_type_t< KeyT > Val)
std::pair< iterator, bool > try_emplace(KeyT &&Key, Ts &&... Args)
bool erase(const KeyT &Val)
size_type count(const_arg_type_t< KeyT > Val) const
Return 1 if the specified key is in the map, 0 otherwise.
bool contains(const_arg_type_t< KeyT > Val) const
Return true if the specified key is in the map, false otherwise.
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
void reserve(size_type NumEntries)
Grow the densemap so that it can contain at least NumEntries items before resizing again.
bool hasOptSize() const
Optimize this function for size (-Os) or minimum size (-Oz).
bool hasProfileData(bool IncludeSynthetic=false) const
Return true if the function is annotated with profile data.
unsigned removeBranch(MachineBasicBlock &MBB, int *BytesRemoved=nullptr) const override
Remove the branching code at the end of the specific MBB.
bool analyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB, MachineBasicBlock *&FBB, SmallVectorImpl< MachineOperand > &Cond, bool AllowModify) const override
Analyze the branching code at the end of MBB, returning true if it cannot be understood (e....
bool reverseBranchCondition(SmallVectorImpl< MachineOperand > &Cond) const override
Reverses the branch condition of the specified condition list, returning false on success and true if...
unsigned insertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB, MachineBasicBlock *FBB, ArrayRef< MachineOperand > Cond, const DebugLoc &DL, int *BytesAdded=nullptr) const override
Insert branch code into the end of the specified MachineBasicBlock.
const MDOperand & getOperand(unsigned I) const
unsigned getNumOperands() const
Return number of MDNode operands.
StringRef getString() const
unsigned pred_size() const
bool isEHPad() const
Returns true if the block is a landing pad.
instr_iterator instr_begin()
MachineBasicBlock * getFallThrough(bool JumpToFallThrough=true)
Return the fallthrough block if the block can implicitly transfer control to the block after it by fa...
int getNumber() const
MachineBasicBlocks are uniquely numbered at the function level, unless they're not in a MachineFuncti...
void updateTerminator(MachineBasicBlock *PreviousLayoutSuccessor)
Update the terminator instructions in block to account for changes to block layout which may have bee...
bool canFallThrough()
Return true if the block can implicitly transfer control to the block after it by falling off the end...
succ_iterator succ_begin()
unsigned succ_size() const
void setAlignment(Align A)
Set alignment of the basic block.
bool isEntryBlock() const
Returns true if this is the entry block of the function.
pred_iterator pred_begin()
succ_reverse_iterator succ_rbegin()
bool isLayoutSuccessor(const MachineBasicBlock *MBB) const
Return true if the specified MBB will be emitted immediately after this block, such that if this bloc...
instr_iterator instr_end()
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
DebugLoc findBranchDebugLoc()
Find and return the merged DebugLoc of the branch instructions of the block.
iterator_range< succ_iterator > successors()
bool isSuccessor(const MachineBasicBlock *MBB) const
Return true if the specified MBB is a successor of this block.
iterator_range< pred_iterator > predecessors()
StringRef getName() const
Return the name of the corresponding LLVM basic block, or an empty string.
MachineBlockFrequencyInfo pass uses BlockFrequencyInfoImpl implementation to estimate machine basic b...
BlockFrequency getBlockFreq(const MachineBasicBlock *MBB) const
getblockFreq - Return block frequency.
BranchProbability getEdgeProbability(const MachineBasicBlock *Src, const MachineBasicBlock *Dst) const
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
virtual bool runOnMachineFunction(MachineFunction &MF)=0
runOnMachineFunction - This method must be overloaded to perform the desired machine code transformat...
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
Function & getFunction()
Return the LLVM function that this machine code represents.
const LLVMTargetMachine & getTarget() const
getTarget - Return the target machine this machine code is compiled with
Representation of each machine instruction.
MachineLoop * getLoopFor(const MachineBasicBlock *BB) const
Return the innermost loop that BB lives in.
MachinePostDominatorTree - an analysis pass wrapper for DominatorTree used to compute the post-domina...
bool dominates(const MachineDomTreeNode *A, const MachineDomTreeNode *B) const
bool runOnMachineFunction(MachineFunction &MF) override
runOnMachineFunction - This method must be overloaded to perform the desired machine code transformat...
MutableArrayRef - Represent a mutable reference to an array (0 or more elements consecutively in memo...
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
An analysis pass based on legacy pass manager to deliver ProfileSummaryInfo.
Analysis providing profile information.
uint64_t getOrCompHotCountThreshold() const
Returns HotCountThreshold if set.
A templated base class for SmallPtrSet which provides the typesafe interface that is common across al...
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
A SetVector that performs no allocations if smaller than a certain size.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
reference emplace_back(ArgTypes &&... Args)
typename SuperClass::const_iterator const_iterator
typename SuperClass::iterator iterator
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
A BumpPtrAllocator that allows only elements of a specific type to be allocated.
T * Allocate(size_t num=1)
Allocate space for an array of objects without constructing them.
void DestroyAll()
Call the destructor of each allocated object and deallocate all but the current slab and reset the cu...
Utility class to perform tail duplication.
void initMF(MachineFunction &MF, bool PreRegAlloc, const MachineBranchProbabilityInfo *MBPI, MBFIWrapper *MBFI, ProfileSummaryInfo *PSI, bool LayoutMode, unsigned TailDupSize=0)
Prepare to run on a specific machine function.
bool tailDuplicateAndUpdate(bool IsSimple, MachineBasicBlock *MBB, MachineBasicBlock *ForcedLayoutPred, SmallVectorImpl< MachineBasicBlock * > *DuplicatedPreds=nullptr, function_ref< void(MachineBasicBlock *)> *RemovalCallback=nullptr, SmallVectorImpl< MachineBasicBlock * > *CandidatePtr=nullptr)
Tail duplicate a single basic block into its predecessors, and then clean up.
static bool isSimpleBB(MachineBasicBlock *TailBB)
True if this BB has only one unconditional jump.
bool canTailDuplicate(MachineBasicBlock *TailBB, MachineBasicBlock *PredBB)
Returns true if TailBB can successfully be duplicated into PredBB.
bool shouldTailDuplicate(bool IsSimple, MachineBasicBlock &TailBB)
Determine if it is profitable to duplicate this block.
TargetInstrInfo - Interface to description of machine instruction set.
This base class for TargetLowering contains the SelectionDAG-independent parts that can be used from ...
virtual unsigned getMaxPermittedBytesForAlignment(MachineBasicBlock *MBB) const
Return the maximum amount of bytes allowed to be emitted when padding for alignment.
virtual Align getPrefLoopAlignment(MachineLoop *ML=nullptr) const
Return the preferred loop alignment.
virtual bool alignLoopsWithOptSize() const
Should loops be aligned even when the function is marked OptSize (but not MinSize).
bool requiresStructuredCFG() const
Target-Independent Code Generator Pass Configuration Options.
bool getEnableTailMerge() const
CodeGenOptLevel getOptLevel() const
virtual const TargetRegisterInfo * getRegisterInfo() const
getRegisterInfo - If register information is available, return it.
virtual const TargetInstrInfo * getInstrInfo() const
virtual const TargetLowering * getTargetLowering() const
An efficient, type-erasing, non-owning reference to a callable.
self_iterator getIterator()
A raw_ostream that writes to an std::string.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
initializer< Ty > init(const Ty &Val)
double calcExtTspScore(ArrayRef< uint64_t > Order, ArrayRef< uint64_t > NodeSizes, ArrayRef< uint64_t > NodeCounts, ArrayRef< EdgeCount > EdgeCounts)
Estimate the "quality" of a given node order in CFG.
std::vector< uint64_t > computeExtTspLayout(ArrayRef< uint64_t > NodeSizes, ArrayRef< uint64_t > NodeCounts, ArrayRef< EdgeCount > EdgeCounts)
Find a layout of nodes (basic blocks) of a given CFG optimizing jump locality and thus processor I-ca...
void append(SmallVectorImpl< char > &path, const Twine &a, const Twine &b="", const Twine &c="", const Twine &d="")
Append to path.
This is an optimization pass for GlobalISel generic memory operations.
void dump(const SparseBitVector< ElementSize > &LHS, raw_ostream &out)
void stable_sort(R &&Range)
auto find(R &&Range, const T &Val)
Provide wrappers to std::find which take ranges instead of having to pass begin/end explicitly.
cl::opt< bool > ApplyExtTspWithoutProfile
bool shouldOptimizeForSize(const MachineFunction *MF, ProfileSummaryInfo *PSI, const MachineBlockFrequencyInfo *BFI, PGSOQueryType QueryType=PGSOQueryType::Other)
Returns true if machine function MF is suggested to be size-optimized based on the profile.
void initializeMachineBlockPlacementStatsPass(PassRegistry &)
cl::opt< unsigned > ProfileLikelyProb
void erase(Container &C, ValueType V)
Wrapper function to remove a value from a container:
cl::opt< std::string > ViewBlockFreqFuncName("view-bfi-func-name", cl::Hidden, cl::desc("The option to specify " "the name of the function " "whose CFG will be displayed."))
auto reverse(ContainerTy &&C)
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
cl::opt< GVDAGType > ViewBlockLayoutWithBFI("view-block-layout-with-bfi", cl::Hidden, cl::desc("Pop up a window to show a dag displaying MBP layout and associated " "block frequencies of the CFG."), cl::values(clEnumValN(GVDT_None, "none", "do not display graphs."), clEnumValN(GVDT_Fraction, "fraction", "display a graph using the " "fractional block frequency representation."), clEnumValN(GVDT_Integer, "integer", "display a graph using the raw " "integer fractional block frequency representation."), clEnumValN(GVDT_Count, "count", "display a graph using the real " "profile count if available.")))
bool isFunctionInPrintList(StringRef FunctionName)
auto instructionsWithoutDebug(IterT It, IterT End, bool SkipPseudoOp=true)
Construct a range iterator which begins at It and moves forwards until End is reached,...
format_object< Ts... > format(const char *Fmt, const Ts &... Vals)
These are helper functions used to produce formatted output.
cl::opt< unsigned > StaticLikelyProb
auto count(R &&Range, const E &Element)
Wrapper function around std::count to count the number of times an element Element occurs in the give...
void erase_if(Container &C, UnaryPredicate P)
Provide a container algorithm similar to C++ Library Fundamentals v2's erase_if which is equivalent t...
Printable printBlockFreq(const BlockFrequencyInfo &BFI, BlockFrequency Freq)
Print the block frequency Freq relative to the current functions entry frequency.
char & MachineBlockPlacementID
MachineBlockPlacement - This pass places basic blocks based on branch probabilities.
cl::opt< bool > EnableExtTspBlockPlacement
char & MachineBlockPlacementStatsID
MachineBlockPlacementStats - This pass collects statistics about the basic block placement using bran...
void initializeMachineBlockPlacementPass(PassRegistry &)
Printable printMBBReference(const MachineBasicBlock &MBB)
Prints a machine basic block reference.
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
This struct is a compact representation of a valid (non-zero power of two) alignment.