79#define DEBUG_TYPE "block-placement"
81STATISTIC(NumCondBranches,
"Number of conditional branches");
82STATISTIC(NumUncondBranches,
"Number of unconditional branches");
84 "Potential frequency of taking conditional branches");
86 "Potential frequency of taking unconditional branches");
90 cl::desc(
"Force the alignment of all blocks in the function in log2 format "
91 "(e.g 4 means align on 16B boundaries)."),
95 "align-all-nofallthru-blocks",
96 cl::desc(
"Force the alignment of all blocks that have no fall-through "
97 "predecessors (i.e. don't add nops that are executed). In log2 "
98 "format (e.g 4 means align on 16B boundaries)."),
102 "max-bytes-for-alignment",
103 cl::desc(
"Forces the maximum bytes allowed to be emitted when padding for "
108 "block-placement-predecessor-limit",
109 cl::desc(
"For blocks with more predecessors, certain layout optimizations"
110 "will be disabled to prevent quadratic compile time."),
115 "block-placement-exit-block-bias",
116 cl::desc(
"Block frequency percentage a loop exit block needs "
117 "over the original exit to be considered the new exit."),
124 "loop-to-cold-block-ratio",
125 cl::desc(
"Outline loop blocks from loop chain if (frequency of loop) / "
126 "(frequency of block) is greater than this ratio"),
131 cl::desc(
"Force outlining cold blocks from loops."),
136 cl::desc(
"Model the cost of loop rotation more "
137 "precisely by using profile data."),
142 cl::desc(
"Force the use of precise cost "
143 "loop rotation strategy."),
148 cl::desc(
"Cost that models the probabilistic risk of an instruction "
149 "misfetch due to a jump comparing to falling through, whose cost "
154 cl::desc(
"Cost of jump instructions."),
158 cl::desc(
"Perform tail duplication during placement. "
159 "Creates more fallthrough opportunities in "
160 "outline branches."),
165 cl::desc(
"Perform branch folding during placement. "
166 "Reduces code size."),
171 "tail-dup-placement-threshold",
172 cl::desc(
"Instruction cutoff for tail duplication during layout. "
173 "Tail merging during layout is forced to have a threshold "
174 "that won't conflict."),
179 "tail-dup-placement-aggressive-threshold",
180 cl::desc(
"Instruction cutoff for aggressive tail duplication during "
181 "layout. Used at -O3. Tail merging during layout is forced to "
182 "have a threshold that won't conflict."),
187 "tail-dup-placement-penalty",
189 "Cost penalty for blocks that can avoid breaking CFG by copying. "
190 "Copying can increase fallthrough, but it also increases icache "
191 "pressure. This parameter controls the penalty to account for that. "
192 "Percent as integer."),
197 "tail-dup-profile-percent-threshold",
198 cl::desc(
"If profile count information is used in tail duplication cost "
199 "model, the gained fall through number from tail duplication "
200 "should be at least this percent of hot count."),
205 "triangle-chain-count",
206 cl::desc(
"Number of triangle-shaped-CFG's that need to be in a row for the "
207 "triangle tail duplication heuristic to kick in. 0 to disable."),
216 "renumber-blocks-before-view",
218 "If true, basic blocks are re-numbered before MBP layout is printed "
219 "into a dot graph. Only used when a function is being printed."),
223 "ext-tsp-block-placement-max-blocks",
224 cl::desc(
"Maximum number of basic blocks in a function to run ext-TSP "
231 cl::desc(
"Use ext-tsp for size-aware block placement."));
280 BlockToChainMapType &BlockToChain;
289 : Blocks(1, BB), BlockToChain(BlockToChain) {
290 assert(BB &&
"Cannot create a chain with a null basic block");
291 BlockToChain[BB] =
this;
307 for (
iterator i = begin(); i != end(); ++i) {
323 assert(BB &&
"Can't merge a null block.");
324 assert(!Blocks.
empty() &&
"Can't merge into an empty chain.");
328 assert(!BlockToChain[BB] &&
329 "Passed chain is null, but BB has entry in BlockToChain.");
331 BlockToChain[BB] =
this;
335 assert(BB == *Chain->begin() &&
"Passed BB is not head of Chain.");
336 assert(Chain->begin() != Chain->end());
342 assert(BlockToChain[ChainBB] == Chain &&
"Incoming blocks not in chain.");
343 BlockToChain[ChainBB] =
this;
364 unsigned UnscheduledPredecessors = 0;
367class MachineBlockPlacement {
369 using BlockFilterSet = SmallSetVector<const MachineBasicBlock *, 16>;
372 struct BlockAndTailDupResult {
373 MachineBasicBlock *BB =
nullptr;
378 struct WeightedEdge {
379 BlockFrequency Weight;
380 MachineBasicBlock *Src =
nullptr;
381 MachineBasicBlock *Dest =
nullptr;
389 DenseMap<const MachineBasicBlock *, BlockAndTailDupResult> ComputedEdges;
392 MachineFunction *F =
nullptr;
395 const MachineBranchProbabilityInfo *MBPI =
nullptr;
398 std::unique_ptr<MBFIWrapper> MBFI;
401 MachineLoopInfo *MLI =
nullptr;
406 MachineBasicBlock *PreferredLoopExit =
nullptr;
409 const TargetInstrInfo *TII =
nullptr;
412 const TargetLoweringBase *TLI =
nullptr;
415 MachinePostDominatorTree *MPDT =
nullptr;
417 ProfileSummaryInfo *PSI =
nullptr;
430 TailDuplicator TailDup;
433 BlockFrequency DupThreshold;
435 unsigned TailDupSize;
439 bool UseProfileCount =
false;
448 SpecificBumpPtrAllocator<BlockChain> ChainAllocator;
456 DenseMap<const MachineBasicBlock *, BlockChain *> BlockToChain;
463 SmallPtrSet<MachineBasicBlock *, 4> BlocksWithUnanalyzableExits;
468 BlockFrequency getBlockCountOrFrequency(
const MachineBasicBlock *BB) {
469 if (UseProfileCount) {
470 auto Count = MBFI->getBlockProfileCount(BB);
472 return BlockFrequency(*
Count);
474 return BlockFrequency(0);
476 return MBFI->getBlockFreq(BB);
480 BlockFrequency scaleThreshold(MachineBasicBlock *BB);
481 void initTailDupThreshold();
485 void markChainSuccessors(
const BlockChain &Chain,
486 const MachineBasicBlock *LoopHeaderBB,
487 const BlockFilterSet *BlockFilter =
nullptr);
491 void markBlockSuccessors(
const BlockChain &Chain,
const MachineBasicBlock *BB,
492 const MachineBasicBlock *LoopHeaderBB,
493 const BlockFilterSet *BlockFilter =
nullptr);
496 collectViableSuccessors(
const MachineBasicBlock *BB,
const BlockChain &Chain,
497 const BlockFilterSet *BlockFilter,
498 SmallVector<MachineBasicBlock *, 4> &Successors);
499 bool isBestSuccessor(MachineBasicBlock *BB, MachineBasicBlock *Pred,
500 BlockFilterSet *BlockFilter);
501 void findDuplicateCandidates(SmallVectorImpl<MachineBasicBlock *> &Candidates,
502 MachineBasicBlock *BB,
503 BlockFilterSet *BlockFilter);
504 bool repeatedlyTailDuplicateBlock(
505 MachineBasicBlock *BB, MachineBasicBlock *&LPred,
506 const MachineBasicBlock *LoopHeaderBB, BlockChain &Chain,
507 BlockFilterSet *BlockFilter,
511 maybeTailDuplicateBlock(MachineBasicBlock *BB, MachineBasicBlock *LPred,
512 BlockChain &Chain, BlockFilterSet *BlockFilter,
515 bool &DuplicatedToLPred);
516 bool hasBetterLayoutPredecessor(
const MachineBasicBlock *BB,
517 const MachineBasicBlock *Succ,
518 const BlockChain &SuccChain,
519 BranchProbability SuccProb,
520 BranchProbability RealSuccProb,
521 const BlockChain &Chain,
522 const BlockFilterSet *BlockFilter);
523 BlockAndTailDupResult selectBestSuccessor(
const MachineBasicBlock *BB,
524 const BlockChain &Chain,
525 const BlockFilterSet *BlockFilter);
527 selectBestCandidateBlock(
const BlockChain &Chain,
528 SmallVectorImpl<MachineBasicBlock *> &WorkList);
530 getFirstUnplacedBlock(
const BlockChain &PlacedChain,
533 getFirstUnplacedBlock(
const BlockChain &PlacedChain,
535 const BlockFilterSet *BlockFilter);
542 void fillWorkLists(
const MachineBasicBlock *
MBB,
543 SmallPtrSetImpl<BlockChain *> &UpdatedPreds,
544 const BlockFilterSet *BlockFilter);
546 void buildChain(
const MachineBasicBlock *BB, BlockChain &Chain,
547 BlockFilterSet *BlockFilter =
nullptr);
548 bool canMoveBottomBlockToTop(
const MachineBasicBlock *BottomBlock,
549 const MachineBasicBlock *OldTop);
550 bool hasViableTopFallthrough(
const MachineBasicBlock *Top,
551 const BlockFilterSet &LoopBlockSet);
552 BlockFrequency TopFallThroughFreq(
const MachineBasicBlock *Top,
553 const BlockFilterSet &LoopBlockSet);
554 BlockFrequency FallThroughGains(
const MachineBasicBlock *NewTop,
555 const MachineBasicBlock *OldTop,
556 const MachineBasicBlock *ExitBB,
557 const BlockFilterSet &LoopBlockSet);
558 MachineBasicBlock *findBestLoopTopHelper(MachineBasicBlock *OldTop,
559 const MachineLoop &L,
560 const BlockFilterSet &LoopBlockSet);
561 MachineBasicBlock *findBestLoopTop(
const MachineLoop &L,
562 const BlockFilterSet &LoopBlockSet);
563 MachineBasicBlock *findBestLoopExit(
const MachineLoop &L,
564 const BlockFilterSet &LoopBlockSet,
565 BlockFrequency &ExitFreq);
566 BlockFilterSet collectLoopBlockSet(
const MachineLoop &L);
567 void buildLoopChains(
const MachineLoop &L);
568 void rotateLoop(BlockChain &LoopChain,
const MachineBasicBlock *ExitingBB,
569 BlockFrequency ExitFreq,
const BlockFilterSet &LoopBlockSet);
570 void rotateLoopWithProfile(BlockChain &LoopChain,
const MachineLoop &L,
571 const BlockFilterSet &LoopBlockSet);
572 void buildCFGChains();
573 void optimizeBranches();
577 bool shouldTailDuplicate(MachineBasicBlock *BB);
580 bool isProfitableToTailDup(
const MachineBasicBlock *BB,
581 const MachineBasicBlock *Succ,
582 BranchProbability QProb,
const BlockChain &Chain,
583 const BlockFilterSet *BlockFilter);
586 bool isTrellis(
const MachineBasicBlock *BB,
587 const SmallVectorImpl<MachineBasicBlock *> &ViableSuccs,
588 const BlockChain &Chain,
const BlockFilterSet *BlockFilter);
591 BlockAndTailDupResult getBestTrellisSuccessor(
592 const MachineBasicBlock *BB,
593 const SmallVectorImpl<MachineBasicBlock *> &ViableSuccs,
594 BranchProbability AdjustedSumProb,
const BlockChain &Chain,
595 const BlockFilterSet *BlockFilter);
598 static std::pair<WeightedEdge, WeightedEdge> getBestNonConflictingEdges(
599 const MachineBasicBlock *BB,
604 bool canTailDuplicateUnplacedPreds(
const MachineBasicBlock *BB,
605 MachineBasicBlock *Succ,
606 const BlockChain &Chain,
607 const BlockFilterSet *BlockFilter);
611 void precomputeTriangleChains();
614 void applyExtTsp(
bool OptForSize);
617 void assignBlockOrder(
const std::vector<const MachineBasicBlock *> &NewOrder);
620 void createCFGChainExtTsp();
623 MachineBlockPlacement(
const MachineBranchProbabilityInfo *MBPI,
624 MachineLoopInfo *MLI, ProfileSummaryInfo *PSI,
625 std::unique_ptr<MBFIWrapper> MBFI,
626 MachinePostDominatorTree *MPDT,
bool AllowTailMerge)
627 : MBPI(MBPI), MBFI(std::
move(MBFI)), MLI(MLI), MPDT(MPDT), PSI(PSI),
628 AllowTailMerge(AllowTailMerge) {};
630 bool run(MachineFunction &F);
632 static bool allowTailDupPlacement(MachineFunction &MF) {
641 MachineBlockPlacementLegacy() : MachineFunctionPass(ID) {
645 bool runOnMachineFunction(MachineFunction &MF)
override {
650 &getAnalysis<MachineBranchProbabilityInfoWrapperPass>().getMBPI();
651 auto MBFI = std::make_unique<MBFIWrapper>(
652 getAnalysis<MachineBlockFrequencyInfoWrapperPass>().getMBFI());
653 auto *MLI = &getAnalysis<MachineLoopInfoWrapperPass>().getLI();
654 auto *MPDT = MachineBlockPlacement::allowTailDupPlacement(MF)
655 ? &getAnalysis<MachinePostDominatorTreeWrapperPass>()
658 auto *PSI = &getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI();
659 auto *PassConfig = &getAnalysis<TargetPassConfig>();
660 bool AllowTailMerge = PassConfig->getEnableTailMerge();
661 return MachineBlockPlacement(MBPI, MLI, PSI, std::move(MBFI), MPDT,
666 void getAnalysisUsage(AnalysisUsage &AU)
const override {
667 AU.
addRequired<MachineBranchProbabilityInfoWrapperPass>();
668 AU.
addRequired<MachineBlockFrequencyInfoWrapperPass>();
670 AU.
addRequired<MachinePostDominatorTreeWrapperPass>();
680char MachineBlockPlacementLegacy::ID = 0;
685 "Branch Probability Basic Block Placement",
false,
false)
702 OS <<
" ('" << BB->
getName() <<
"')";
713void MachineBlockPlacement::markChainSuccessors(
715 const BlockFilterSet *BlockFilter) {
718 for (MachineBasicBlock *
MBB : Chain) {
719 markBlockSuccessors(Chain,
MBB, LoopHeaderBB, BlockFilter);
729void MachineBlockPlacement::markBlockSuccessors(
730 const BlockChain &Chain,
const MachineBasicBlock *
MBB,
731 const MachineBasicBlock *LoopHeaderBB,
const BlockFilterSet *BlockFilter) {
737 if (BlockFilter && !BlockFilter->count(Succ))
739 BlockChain &SuccChain = *BlockToChain[Succ];
741 if (&Chain == &SuccChain || Succ == LoopHeaderBB)
746 if (SuccChain.UnscheduledPredecessors == 0 ||
747 --SuccChain.UnscheduledPredecessors > 0)
750 auto *NewBB = *SuccChain.begin();
751 if (NewBB->isEHPad())
762BranchProbability MachineBlockPlacement::collectViableSuccessors(
763 const MachineBasicBlock *BB,
const BlockChain &Chain,
764 const BlockFilterSet *BlockFilter,
765 SmallVector<MachineBasicBlock *, 4> &Successors) {
783 for (MachineBasicBlock *Succ : BB->
successors()) {
784 bool SkipSucc =
false;
785 if (Succ->isEHPad() || (BlockFilter && !BlockFilter->count(Succ))) {
788 BlockChain *SuccChain = BlockToChain[Succ];
789 if (SuccChain == &Chain) {
791 }
else if (Succ != *SuccChain->begin()) {
793 <<
" -> Mid chain!\n");
803 return AdjustedSumProb;
808static BranchProbability
814 if (SuccProbN >= SuccProbD)
829 if (Successors.
count(&BB))
832 if (!Successors.
count(Succ))
840bool MachineBlockPlacement::shouldTailDuplicate(MachineBasicBlock *BB) {
861 return (Gain / ThresholdProb) >= EntryFreq;
869bool MachineBlockPlacement::isProfitableToTailDup(
870 const MachineBasicBlock *BB,
const MachineBasicBlock *Succ,
871 BranchProbability QProb,
const BlockChain &Chain,
872 const BlockFilterSet *BlockFilter) {
896 MachineBasicBlock *PDom =
nullptr;
897 SmallVector<MachineBasicBlock *, 4> SuccSuccs;
899 auto AdjustedSuccSumProb =
900 collectViableSuccessors(Succ, Chain, BlockFilter, SuccSuccs);
902 auto BBFreq = MBFI->getBlockFreq(BB);
903 auto SuccFreq = MBFI->getBlockFreq(Succ);
904 BlockFrequency
P =
BBFreq * PProb;
905 BlockFrequency Qout =
BBFreq * QProb;
906 BlockFrequency EntryFreq = MBFI->getEntryFreq();
909 if (SuccSuccs.
size() == 0)
914 for (MachineBasicBlock *SuccSucc : SuccSuccs) {
916 if (Prob > BestSuccSucc)
926 auto SuccBestPred = BlockFrequency(0);
927 for (MachineBasicBlock *SuccPred : Succ->
predecessors()) {
928 if (SuccPred == Succ || SuccPred == BB ||
929 BlockToChain[SuccPred] == &Chain ||
930 (BlockFilter && !BlockFilter->count(SuccPred)))
934 if (Freq > SuccBestPred)
938 BlockFrequency Qin = SuccBestPred;
959 BranchProbability UProb = BestSuccSucc;
960 BranchProbability VProb = AdjustedSuccSumProb - UProb;
961 BlockFrequency
F = SuccFreq - Qin;
962 BlockFrequency
V = SuccFreq * VProb;
963 BlockFrequency QinU = std::min(Qin,
F) * UProb;
964 BlockFrequency BaseCost =
P +
V;
965 BlockFrequency DupCost = Qout + QinU + std::max(Qin,
F) * VProb;
969 BranchProbability VProb = AdjustedSuccSumProb - UProb;
970 BlockFrequency
U = SuccFreq * UProb;
971 BlockFrequency
V = SuccFreq * VProb;
972 BlockFrequency
F = SuccFreq - Qin;
1002 if (UProb > AdjustedSuccSumProb / 2 &&
1003 !hasBetterLayoutPredecessor(Succ, PDom, *BlockToChain[PDom], UProb, UProb,
1004 Chain, BlockFilter))
1007 (
P + V), (Qout + std::max(Qin,
F) * VProb + std::min(Qin,
F) * UProb),
1011 (Qout + std::min(Qin,
F) * AdjustedSuccSumProb +
1012 std::max(Qin,
F) * UProb),
1023bool MachineBlockPlacement::isTrellis(
1024 const MachineBasicBlock *BB,
1025 const SmallVectorImpl<MachineBasicBlock *> &ViableSuccs,
1026 const BlockChain &Chain,
const BlockFilterSet *BlockFilter) {
1035 SmallPtrSet<const MachineBasicBlock *, 8> SeenPreds;
1037 for (MachineBasicBlock *Succ : ViableSuccs) {
1046 if (Successors.count(SuccPred)) {
1048 for (MachineBasicBlock *CheckSucc : SuccPred->successors())
1049 if (!Successors.count(CheckSucc))
1053 const BlockChain *PredChain = BlockToChain[SuccPred];
1054 if (SuccPred == BB || (BlockFilter && !BlockFilter->count(SuccPred)) ||
1055 PredChain == &Chain || PredChain == BlockToChain[Succ])
1059 if (!SeenPreds.
insert(SuccPred).second)
1077std::pair<MachineBlockPlacement::WeightedEdge,
1078 MachineBlockPlacement::WeightedEdge>
1079MachineBlockPlacement::getBestNonConflictingEdges(
1080 const MachineBasicBlock *BB,
1090 auto Cmp = [](WeightedEdge
A, WeightedEdge
B) {
return A.Weight >
B.Weight; };
1094 auto BestA = Edges[0].begin();
1095 auto BestB = Edges[1].begin();
1098 if (BestA->Src == BestB->Src) {
1100 auto SecondBestA = std::next(BestA);
1101 auto SecondBestB = std::next(BestB);
1102 BlockFrequency BestAScore = BestA->Weight + SecondBestB->Weight;
1103 BlockFrequency BestBScore = BestB->Weight + SecondBestA->Weight;
1104 if (BestAScore < BestBScore)
1105 BestA = SecondBestA;
1107 BestB = SecondBestB;
1110 if (BestB->Src == BB)
1112 return std::make_pair(*BestA, *BestB);
1122MachineBlockPlacement::BlockAndTailDupResult
1123MachineBlockPlacement::getBestTrellisSuccessor(
1124 const MachineBasicBlock *BB,
1125 const SmallVectorImpl<MachineBasicBlock *> &ViableSuccs,
1126 BranchProbability AdjustedSumProb,
const BlockChain &Chain,
1127 const BlockFilterSet *BlockFilter) {
1129 BlockAndTailDupResult
Result = {
nullptr,
false};
1136 if (Successors.
size() != 2 || ViableSuccs.
size() != 2)
1142 for (
auto *Succ : ViableSuccs) {
1143 for (MachineBasicBlock *SuccPred : Succ->predecessors()) {
1145 if (SuccPred != BB) {
1146 if (BlockFilter && !BlockFilter->count(SuccPred))
1148 const BlockChain *SuccPredChain = BlockToChain[SuccPred];
1149 if (SuccPredChain == &Chain || SuccPredChain == BlockToChain[Succ])
1152 BlockFrequency EdgeFreq = MBFI->getBlockFreq(SuccPred) *
1154 Edges[SuccIndex].
push_back({EdgeFreq, SuccPred, Succ});
1160 WeightedEdge BestA, BestB;
1161 std::tie(BestA, BestB) = getBestNonConflictingEdges(BB, Edges);
1163 if (BestA.Src != BB) {
1167 LLVM_DEBUG(
dbgs() <<
"Trellis, but not one of the chosen edges.\n");
1174 if (BestA.Dest == BestB.Src) {
1177 MachineBasicBlock *Succ1 = BestA.Dest;
1178 MachineBasicBlock *Succ2 = BestB.Dest;
1180 if (allowTailDupPlacement(*
F) && shouldTailDuplicate(Succ2) &&
1181 canTailDuplicateUnplacedPreds(BB, Succ2, Chain, BlockFilter) &&
1183 Chain, BlockFilter)) {
1187 <<
", probability: " << Succ2Prob
1188 <<
" (Tail Duplicate)\n");
1190 Result.ShouldTailDup =
true;
1196 ComputedEdges[BestB.Src] = {BestB.Dest,
false};
1198 auto TrellisSucc = BestA.Dest;
1202 <<
", probability: " << SuccProb <<
" (Trellis)\n");
1210bool MachineBlockPlacement::canTailDuplicateUnplacedPreds(
1211 const MachineBasicBlock *BB, MachineBasicBlock *Succ,
1212 const BlockChain &Chain,
const BlockFilterSet *BlockFilter) {
1213 if (!shouldTailDuplicate(Succ))
1217 bool Duplicate =
true;
1219 unsigned int NumDup = 0;
1228 if (Pred == BB || (BlockFilter && !BlockFilter->count(Pred)) ||
1229 (BlockToChain[Pred] == &Chain && !Succ->
succ_empty()))
1279 if (
F->getFunction().hasProfileData())
1310 if ((NumDup > Succ->
succ_size()) || !Duplicate)
1333void MachineBlockPlacement::precomputeTriangleChains() {
1334 struct TriangleChain {
1335 std::vector<MachineBasicBlock *> Edges;
1337 TriangleChain(MachineBasicBlock *src, MachineBasicBlock *dst)
1338 : Edges({src, dst}) {}
1340 void append(MachineBasicBlock *dst) {
1341 assert(getKey()->isSuccessor(dst) &&
1342 "Attempting to append a block that is not a successor.");
1343 Edges.push_back(dst);
1346 unsigned count()
const {
return Edges.size() - 1; }
1348 MachineBasicBlock *getKey()
const {
return Edges.back(); }
1357 DenseMap<const MachineBasicBlock *, TriangleChain> TriangleChainMap;
1358 for (MachineBasicBlock &BB : *
F) {
1362 MachineBasicBlock *PDom =
nullptr;
1363 for (MachineBasicBlock *Succ : BB.
successors()) {
1371 if (PDom ==
nullptr)
1378 if (!shouldTailDuplicate(PDom))
1380 bool CanTailDuplicate =
true;
1387 CanTailDuplicate =
false;
1393 if (!CanTailDuplicate)
1400 auto Found = TriangleChainMap.
find(&BB);
1403 if (Found != TriangleChainMap.
end()) {
1404 TriangleChain Chain = std::move(Found->second);
1405 TriangleChainMap.
erase(Found);
1407 TriangleChainMap.
insert(std::make_pair(Chain.getKey(), std::move(Chain)));
1409 auto InsertResult = TriangleChainMap.
try_emplace(PDom, &BB, PDom);
1410 assert(InsertResult.second &&
"Block seen twice.");
1418 for (
auto &ChainPair : TriangleChainMap) {
1419 TriangleChain &Chain = ChainPair.second;
1425 MachineBasicBlock *dst = Chain.Edges.back();
1426 Chain.Edges.pop_back();
1427 for (MachineBasicBlock *src :
reverse(Chain.Edges)) {
1430 <<
" as pre-computed based on triangles.\n");
1432 auto InsertResult = ComputedEdges.
insert({src, {dst,
true}});
1433 assert(InsertResult.second &&
"Block seen twice.");
1443static BranchProbability
1475bool MachineBlockPlacement::hasBetterLayoutPredecessor(
1476 const MachineBasicBlock *BB,
const MachineBasicBlock *Succ,
1477 const BlockChain &SuccChain, BranchProbability SuccProb,
1478 BranchProbability RealSuccProb,
const BlockChain &Chain,
1479 const BlockFilterSet *BlockFilter) {
1482 if (SuccChain.UnscheduledPredecessors == 0)
1607 BlockFrequency CandidateEdgeFreq = MBFI->getBlockFreq(BB) * RealSuccProb;
1608 bool BadCFGConflict =
false;
1611 BlockChain *PredChain = BlockToChain[Pred];
1612 if (Pred == Succ || PredChain == &SuccChain ||
1613 (BlockFilter && !BlockFilter->count(Pred)) || PredChain == &Chain ||
1614 Pred != *std::prev(PredChain->end()) ||
1633 BlockFrequency PredEdgeFreq =
1635 if (PredEdgeFreq * HotProb >= CandidateEdgeFreq * HotProb.
getCompl()) {
1636 BadCFGConflict =
true;
1641 if (BadCFGConflict) {
1643 << SuccProb <<
" (prob) (non-cold CFG conflict)\n");
1660MachineBlockPlacement::BlockAndTailDupResult
1661MachineBlockPlacement::selectBestSuccessor(
const MachineBasicBlock *BB,
1662 const BlockChain &Chain,
1663 const BlockFilterSet *BlockFilter) {
1666 BlockAndTailDupResult BestSucc = {
nullptr,
false};
1669 SmallVector<MachineBasicBlock *, 4> Successors;
1670 auto AdjustedSumProb =
1671 collectViableSuccessors(BB, Chain, BlockFilter, Successors);
1678 auto FoundEdge = ComputedEdges.
find(BB);
1679 if (FoundEdge != ComputedEdges.
end()) {
1680 MachineBasicBlock *Succ = FoundEdge->second.BB;
1681 ComputedEdges.
erase(FoundEdge);
1682 BlockChain *SuccChain = BlockToChain[Succ];
1683 if (BB->
isSuccessor(Succ) && (!BlockFilter || BlockFilter->count(Succ)) &&
1684 SuccChain != &Chain && Succ == *SuccChain->begin())
1685 return FoundEdge->second;
1690 if (isTrellis(BB, Successors, Chain, BlockFilter))
1691 return getBestTrellisSuccessor(BB, Successors, AdjustedSumProb, Chain,
1699 for (MachineBasicBlock *Succ : Successors) {
1701 BranchProbability SuccProb =
1704 BlockChain &SuccChain = *BlockToChain[Succ];
1707 if (hasBetterLayoutPredecessor(BB, Succ, SuccChain, SuccProb, RealSuccProb,
1708 Chain, BlockFilter)) {
1710 if (allowTailDupPlacement(*
F) && shouldTailDuplicate(Succ))
1717 <<
", probability: " << SuccProb
1718 << (SuccChain.UnscheduledPredecessors != 0 ?
" (CFG break)" :
"")
1721 if (BestSucc.BB && BestProb >= SuccProb) {
1728 BestProb = SuccProb;
1736 [](std::tuple<BranchProbability, MachineBasicBlock *> L,
1737 std::tuple<BranchProbability, MachineBasicBlock *> R) {
1738 return std::get<0>(L) > std::get<0>(R);
1740 for (
auto &Tup : DupCandidates) {
1741 BranchProbability DupProb;
1742 MachineBasicBlock *Succ;
1743 std::tie(DupProb, Succ) = Tup;
1744 if (DupProb < BestProb)
1746 if (canTailDuplicateUnplacedPreds(BB, Succ, Chain, BlockFilter) &&
1747 (isProfitableToTailDup(BB, Succ, BestProb, Chain, BlockFilter))) {
1749 <<
", probability: " << DupProb
1750 <<
" (Tail Duplicate)\n");
1752 BestSucc.ShouldTailDup =
true;
1773MachineBasicBlock *MachineBlockPlacement::selectBestCandidateBlock(
1774 const BlockChain &Chain, SmallVectorImpl<MachineBasicBlock *> &WorkList) {
1780 return BlockToChain.
lookup(BB) == &Chain;
1783 if (WorkList.
empty())
1786 bool IsEHPad = WorkList[0]->isEHPad();
1788 MachineBasicBlock *BestBlock =
nullptr;
1789 BlockFrequency BestFreq;
1790 for (MachineBasicBlock *
MBB : WorkList) {
1792 "EHPad mismatch between block and work list.");
1794 BlockChain &SuccChain = *BlockToChain[
MBB];
1795 if (&SuccChain == &Chain)
1798 assert(SuccChain.UnscheduledPredecessors == 0 &&
1799 "Found CFG-violating block");
1801 BlockFrequency CandidateFreq = MBFI->getBlockFreq(
MBB);
1824 if (BestBlock && (IsEHPad ^ (BestFreq >= CandidateFreq)))
1828 BestFreq = CandidateFreq;
1841MachineBasicBlock *MachineBlockPlacement::getFirstUnplacedBlock(
1842 const BlockChain &PlacedChain,
1847 if (BlockChain *Chain = BlockToChain[&*
I]; Chain != &PlacedChain) {
1848 PrevUnplacedBlockIt =
I;
1852 return *Chain->begin();
1868MachineBasicBlock *MachineBlockPlacement::getFirstUnplacedBlock(
1869 const BlockChain &PlacedChain,
1870 BlockFilterSet::iterator &PrevUnplacedBlockInFilterIt,
1871 const BlockFilterSet *BlockFilter) {
1873 for (; PrevUnplacedBlockInFilterIt != BlockFilter->end();
1874 ++PrevUnplacedBlockInFilterIt) {
1875 BlockChain *
C = BlockToChain[*PrevUnplacedBlockInFilterIt];
1876 if (
C != &PlacedChain) {
1883void MachineBlockPlacement::fillWorkLists(
1884 const MachineBasicBlock *
MBB, SmallPtrSetImpl<BlockChain *> &UpdatedPreds,
1885 const BlockFilterSet *BlockFilter =
nullptr) {
1886 BlockChain &Chain = *BlockToChain[
MBB];
1887 if (!UpdatedPreds.
insert(&Chain).second)
1891 Chain.UnscheduledPredecessors == 0 &&
1892 "Attempting to place block with unscheduled predecessors in worklist.");
1893 for (MachineBasicBlock *ChainBB : Chain) {
1894 assert(BlockToChain[ChainBB] == &Chain &&
1895 "Block in chain doesn't match BlockToChain map.");
1896 for (MachineBasicBlock *Pred : ChainBB->predecessors()) {
1897 if (BlockFilter && !BlockFilter->count(Pred))
1899 if (BlockToChain[Pred] == &Chain)
1901 ++Chain.UnscheduledPredecessors;
1905 if (Chain.UnscheduledPredecessors != 0)
1908 MachineBasicBlock *BB = *Chain.
begin();
1915void MachineBlockPlacement::buildChain(
const MachineBasicBlock *HeadBB,
1917 BlockFilterSet *BlockFilter) {
1918 assert(HeadBB &&
"BB must not be null.\n");
1919 assert(BlockToChain[HeadBB] == &Chain &&
"BlockToChainMap mis-match.\n");
1921 BlockFilterSet::iterator PrevUnplacedBlockInFilterIt;
1923 PrevUnplacedBlockInFilterIt = BlockFilter->begin();
1925 const MachineBasicBlock *LoopHeaderBB = HeadBB;
1926 markChainSuccessors(Chain, LoopHeaderBB, BlockFilter);
1927 MachineBasicBlock *BB = *std::prev(Chain.end());
1929 assert(BB &&
"null block found at end of chain in loop.");
1930 assert(BlockToChain[BB] == &Chain &&
"BlockToChainMap mis-match in loop.");
1931 assert(*std::prev(Chain.end()) == BB &&
"BB Not found at end of chain.");
1935 auto Result = selectBestSuccessor(BB, Chain, BlockFilter);
1936 MachineBasicBlock *BestSucc =
Result.BB;
1937 bool ShouldTailDup =
Result.ShouldTailDup;
1938 if (allowTailDupPlacement(*
F))
1939 ShouldTailDup |= (BestSucc && canTailDuplicateUnplacedPreds(
1940 BB, BestSucc, Chain, BlockFilter));
1946 BestSucc = selectBestCandidateBlock(Chain, BlockWorkList);
1948 BestSucc = selectBestCandidateBlock(Chain, EHPadWorkList);
1952 BestSucc = getFirstUnplacedBlock(Chain, PrevUnplacedBlockInFilterIt,
1955 BestSucc = getFirstUnplacedBlock(Chain, PrevUnplacedBlockIt);
1959 LLVM_DEBUG(
dbgs() <<
"Unnatural loop CFG detected, forcibly merging the "
1960 "layout successor until the CFG reduces\n");
1965 if (allowTailDupPlacement(*
F) && BestSucc && ShouldTailDup) {
1966 repeatedlyTailDuplicateBlock(BestSucc, BB, LoopHeaderBB, Chain,
1967 BlockFilter, PrevUnplacedBlockIt,
1968 PrevUnplacedBlockInFilterIt);
1976 BlockChain &SuccChain = *BlockToChain[BestSucc];
1979 SuccChain.UnscheduledPredecessors = 0;
1982 markChainSuccessors(SuccChain, LoopHeaderBB, BlockFilter);
1983 Chain.merge(BestSucc, &SuccChain);
1984 BB = *std::prev(Chain.end());
2005bool MachineBlockPlacement::canMoveBottomBlockToTop(
2006 const MachineBasicBlock *BottomBlock,
const MachineBasicBlock *OldTop) {
2009 MachineBasicBlock *Pred = *BottomBlock->
pred_begin();
2013 MachineBasicBlock *OtherBB = *Pred->
succ_begin();
2014 if (OtherBB == BottomBlock)
2016 if (OtherBB == OldTop)
2024MachineBlockPlacement::TopFallThroughFreq(
const MachineBasicBlock *Top,
2025 const BlockFilterSet &LoopBlockSet) {
2026 BlockFrequency MaxFreq = BlockFrequency(0);
2028 BlockChain *PredChain = BlockToChain[Pred];
2029 if (!LoopBlockSet.count(Pred) &&
2030 (!PredChain || Pred == *std::prev(PredChain->end()))) {
2035 for (MachineBasicBlock *Succ : Pred->
successors()) {
2037 BlockChain *SuccChain = BlockToChain[Succ];
2040 if (!LoopBlockSet.count(Succ) && (SuccProb > TopProb) &&
2041 (!SuccChain || Succ == *SuccChain->begin())) {
2047 BlockFrequency EdgeFreq =
2049 if (EdgeFreq > MaxFreq)
2078BlockFrequency MachineBlockPlacement::FallThroughGains(
2079 const MachineBasicBlock *NewTop,
const MachineBasicBlock *OldTop,
2080 const MachineBasicBlock *ExitBB,
const BlockFilterSet &LoopBlockSet) {
2081 BlockFrequency FallThrough2Top = TopFallThroughFreq(OldTop, LoopBlockSet);
2082 BlockFrequency FallThrough2Exit = BlockFrequency(0);
2086 BlockFrequency BackEdgeFreq =
2090 MachineBasicBlock *BestPred =
nullptr;
2091 BlockFrequency FallThroughFromPred = BlockFrequency(0);
2092 for (MachineBasicBlock *Pred : NewTop->
predecessors()) {
2093 if (!LoopBlockSet.count(Pred))
2095 BlockChain *PredChain = BlockToChain[Pred];
2096 if (!PredChain || Pred == *std::prev(PredChain->end())) {
2097 BlockFrequency EdgeFreq =
2099 if (EdgeFreq > FallThroughFromPred) {
2100 FallThroughFromPred = EdgeFreq;
2108 BlockFrequency NewFreq = BlockFrequency(0);
2110 for (MachineBasicBlock *Succ : BestPred->
successors()) {
2111 if ((Succ == NewTop) || (Succ == BestPred) || !LoopBlockSet.count(Succ))
2115 BlockChain *SuccChain = BlockToChain[Succ];
2116 if ((SuccChain && (Succ != *SuccChain->begin())) ||
2117 (SuccChain == BlockToChain[BestPred]))
2119 BlockFrequency EdgeFreq = MBFI->getBlockFreq(BestPred) *
2121 if (EdgeFreq > NewFreq)
2124 BlockFrequency OrigEdgeFreq = MBFI->getBlockFreq(BestPred) *
2126 if (NewFreq > OrigEdgeFreq) {
2130 NewFreq = BlockFrequency(0);
2131 FallThroughFromPred = BlockFrequency(0);
2135 BlockFrequency
Result = BlockFrequency(0);
2136 BlockFrequency Gains = BackEdgeFreq + NewFreq;
2137 BlockFrequency Lost =
2138 FallThrough2Top + FallThrough2Exit + FallThroughFromPred;
2166MachineBasicBlock *MachineBlockPlacement::findBestLoopTopHelper(
2167 MachineBasicBlock *OldTop,
const MachineLoop &L,
2168 const BlockFilterSet &LoopBlockSet) {
2172 BlockChain &HeaderChain = *BlockToChain[OldTop];
2173 if (!LoopBlockSet.count(*HeaderChain.begin()))
2175 if (OldTop != *HeaderChain.begin())
2181 BlockFrequency BestGains = BlockFrequency(0);
2182 MachineBasicBlock *BestPred =
nullptr;
2183 for (MachineBasicBlock *Pred : OldTop->
predecessors()) {
2184 if (!LoopBlockSet.count(Pred))
2186 if (Pred ==
L.getHeader())
2194 MachineBasicBlock *OtherBB =
nullptr;
2197 if (OtherBB == OldTop)
2201 if (!canMoveBottomBlockToTop(Pred, OldTop))
2204 BlockFrequency Gains =
2205 FallThroughGains(Pred, OldTop, OtherBB, LoopBlockSet);
2206 if ((Gains > BlockFrequency(0)) &&
2207 (Gains > BestGains ||
2222 (*BestPred->
pred_begin())->succ_size() == 1 &&
2235MachineBlockPlacement::findBestLoopTop(
const MachineLoop &L,
2236 const BlockFilterSet &LoopBlockSet) {
2245 return L.getHeader();
2247 MachineBasicBlock *OldTop =
nullptr;
2248 MachineBasicBlock *NewTop =
L.getHeader();
2249 while (NewTop != OldTop) {
2251 NewTop = findBestLoopTopHelper(OldTop, L, LoopBlockSet);
2252 if (NewTop != OldTop)
2253 ComputedEdges[NewTop] = {OldTop,
false};
2264MachineBlockPlacement::findBestLoopExit(
const MachineLoop &L,
2265 const BlockFilterSet &LoopBlockSet,
2266 BlockFrequency &ExitFreq) {
2275 BlockChain &HeaderChain = *BlockToChain[
L.getHeader()];
2276 if (!LoopBlockSet.count(*HeaderChain.begin()))
2279 BlockFrequency BestExitEdgeFreq;
2280 unsigned BestExitLoopDepth = 0;
2281 MachineBasicBlock *ExitingBB =
nullptr;
2285 SmallPtrSet<MachineBasicBlock *, 4> BlocksExitingToOuterLoop;
2289 for (MachineBasicBlock *
MBB :
L.getBlocks()) {
2290 BlockChain &Chain = *BlockToChain[
MBB];
2293 if (
MBB != *std::prev(Chain.end()))
2300 MachineBasicBlock *OldExitingBB = ExitingBB;
2301 BlockFrequency OldBestExitEdgeFreq = BestExitEdgeFreq;
2302 bool HasLoopingSucc =
false;
2308 BlockChain &SuccChain = *BlockToChain[Succ];
2310 if (&Chain == &SuccChain) {
2317 if (LoopBlockSet.count(Succ)) {
2320 HasLoopingSucc =
true;
2324 unsigned SuccLoopDepth = 0;
2325 if (MachineLoop *ExitLoop = MLI->
getLoopFor(Succ)) {
2326 SuccLoopDepth = ExitLoop->getLoopDepth();
2327 if (ExitLoop->contains(&L))
2331 BlockFrequency ExitEdgeFreq = MBFI->getBlockFreq(
MBB) * SuccProb;
2334 <<
getBlockName(Succ) <<
" [L:" << SuccLoopDepth <<
"] ("
2341 if (!ExitingBB || SuccLoopDepth > BestExitLoopDepth ||
2342 ExitEdgeFreq > BestExitEdgeFreq ||
2344 !(ExitEdgeFreq < BestExitEdgeFreq * Bias))) {
2345 BestExitEdgeFreq = ExitEdgeFreq;
2350 if (!HasLoopingSucc) {
2352 ExitingBB = OldExitingBB;
2353 BestExitEdgeFreq = OldBestExitEdgeFreq;
2360 dbgs() <<
" No other candidate exit blocks, using loop header\n");
2363 if (
L.getNumBlocks() == 1) {
2364 LLVM_DEBUG(
dbgs() <<
" Loop has 1 block, using loop header as exit\n");
2371 if (!BlocksExitingToOuterLoop.
empty() &&
2372 !BlocksExitingToOuterLoop.
count(ExitingBB))
2377 ExitFreq = BestExitEdgeFreq;
2385bool MachineBlockPlacement::hasViableTopFallthrough(
2386 const MachineBasicBlock *Top,
const BlockFilterSet &LoopBlockSet) {
2388 BlockChain *PredChain = BlockToChain[Pred];
2389 if (!LoopBlockSet.count(Pred) &&
2390 (!PredChain || Pred == *std::prev(PredChain->end()))) {
2395 for (MachineBasicBlock *Succ : Pred->
successors()) {
2397 BlockChain *SuccChain = BlockToChain[Succ];
2400 if ((!SuccChain || Succ == *SuccChain->begin()) && SuccProb > TopProb) {
2418void MachineBlockPlacement::rotateLoop(BlockChain &LoopChain,
2419 const MachineBasicBlock *ExitingBB,
2420 BlockFrequency ExitFreq,
2421 const BlockFilterSet &LoopBlockSet) {
2425 MachineBasicBlock *Top = *LoopChain.begin();
2426 MachineBasicBlock *Bottom = *std::prev(LoopChain.end());
2429 if (Bottom == ExitingBB)
2436 bool ViableTopFallthrough = hasViableTopFallthrough(Top, LoopBlockSet);
2441 if (ViableTopFallthrough) {
2442 for (MachineBasicBlock *Succ : Bottom->
successors()) {
2443 BlockChain *SuccChain = BlockToChain[Succ];
2444 if (!LoopBlockSet.count(Succ) &&
2445 (!SuccChain || Succ == *SuccChain->begin()))
2451 BlockFrequency FallThrough2Top = TopFallThroughFreq(Top, LoopBlockSet);
2452 if (FallThrough2Top >= ExitFreq)
2456 BlockChain::iterator ExitIt =
llvm::find(LoopChain, ExitingBB);
2457 if (ExitIt == LoopChain.end())
2479 if (ViableTopFallthrough) {
2480 assert(std::next(ExitIt) != LoopChain.end() &&
2481 "Exit should not be last BB");
2482 MachineBasicBlock *NextBlockInChain = *std::next(ExitIt);
2490 std::rotate(LoopChain.begin(), std::next(ExitIt), LoopChain.end());
2506void MachineBlockPlacement::rotateLoopWithProfile(
2507 BlockChain &LoopChain,
const MachineLoop &L,
2508 const BlockFilterSet &LoopBlockSet) {
2509 auto RotationPos = LoopChain.end();
2510 MachineBasicBlock *ChainHeaderBB = *LoopChain.begin();
2520 auto ScaleBlockFrequency = [](BlockFrequency Freq,
2521 unsigned Scale) -> BlockFrequency {
2523 return BlockFrequency(0);
2526 return Freq / BranchProbability(1, Scale);
2532 BlockFrequency HeaderFallThroughCost(0);
2534 BlockChain *PredChain = BlockToChain[Pred];
2535 if (!LoopBlockSet.count(Pred) &&
2536 (!PredChain || Pred == *std::prev(PredChain->end()))) {
2537 auto EdgeFreq = MBFI->getBlockFreq(Pred) *
2539 auto FallThruCost = ScaleBlockFrequency(EdgeFreq,
MisfetchCost);
2543 FallThruCost += ScaleBlockFrequency(EdgeFreq,
JumpInstCost);
2544 HeaderFallThroughCost = std::max(HeaderFallThroughCost, FallThruCost);
2553 for (
auto *BB : LoopChain) {
2556 BlockChain *SuccChain = BlockToChain[Succ];
2557 if (!LoopBlockSet.count(Succ) &&
2558 (!SuccChain || Succ == *SuccChain->begin())) {
2560 LargestExitEdgeProb = std::max(LargestExitEdgeProb, SuccProb);
2564 auto ExitFreq = MBFI->getBlockFreq(BB) * LargestExitEdgeProb;
2572 for (
auto Iter = LoopChain.begin(), TailIter = std::prev(LoopChain.end()),
2573 EndIter = LoopChain.end();
2574 Iter != EndIter; Iter++, TailIter++) {
2577 if (TailIter == LoopChain.end())
2578 TailIter = LoopChain.begin();
2580 auto TailBB = *TailIter;
2583 BlockFrequency
Cost = BlockFrequency(0);
2588 if (Iter != LoopChain.begin())
2589 Cost += HeaderFallThroughCost;
2593 for (
auto &ExitWithFreq : ExitsWithFreq)
2594 if (TailBB != ExitWithFreq.first)
2595 Cost += ExitWithFreq.second;
2611 if (TailBB->isSuccessor(*Iter)) {
2612 auto TailBBFreq = MBFI->getBlockFreq(TailBB);
2613 if (TailBB->succ_size() == 1)
2615 else if (TailBB->succ_size() == 2) {
2617 auto TailToHeadFreq = TailBBFreq * TailToHeadProb;
2618 auto ColderEdgeFreq = TailToHeadProb > BranchProbability(1, 2)
2619 ? TailBBFreq * TailToHeadProb.
getCompl()
2630 if (
Cost < SmallestRotationCost) {
2631 SmallestRotationCost =
Cost;
2636 if (RotationPos != LoopChain.end()) {
2638 <<
" to the top\n");
2639 std::rotate(LoopChain.begin(), RotationPos, LoopChain.end());
2647MachineBlockPlacement::BlockFilterSet
2648MachineBlockPlacement::collectLoopBlockSet(
const MachineLoop &L) {
2652 bool operator()(
const MachineBasicBlock *
X,
2653 const MachineBasicBlock *
Y)
const {
2654 return X->getNumber() <
Y->getNumber();
2657 std::set<const MachineBasicBlock *, MBBCompare> LoopBlockSet;
2669 BlockFrequency LoopFreq(0);
2670 for (
auto *LoopPred :
L.getHeader()->predecessors())
2671 if (!
L.contains(LoopPred))
2672 LoopFreq += MBFI->getBlockFreq(LoopPred) *
2675 for (MachineBasicBlock *LoopBB :
L.getBlocks()) {
2676 if (LoopBlockSet.count(LoopBB))
2681 BlockChain *Chain = BlockToChain[LoopBB];
2682 for (MachineBasicBlock *ChainBB : *Chain)
2683 LoopBlockSet.insert(ChainBB);
2686 LoopBlockSet.insert(
L.block_begin(),
L.block_end());
2691 BlockFilterSet Ret(LoopBlockSet.begin(), LoopBlockSet.end());
2701void MachineBlockPlacement::buildLoopChains(
const MachineLoop &L) {
2704 for (
const MachineLoop *InnerLoop : L)
2705 buildLoopChains(*InnerLoop);
2708 "BlockWorkList not empty when starting to build loop chains.");
2710 "EHPadWorkList not empty when starting to build loop chains.");
2711 BlockFilterSet LoopBlockSet = collectLoopBlockSet(L);
2716 bool RotateLoopWithProfile =
2724 MachineBasicBlock *LoopTop = findBestLoopTop(L, LoopBlockSet);
2732 PreferredLoopExit =
nullptr;
2733 BlockFrequency ExitFreq;
2734 if (!RotateLoopWithProfile && LoopTop ==
L.getHeader())
2735 PreferredLoopExit = findBestLoopExit(L, LoopBlockSet, ExitFreq);
2737 BlockChain &LoopChain = *BlockToChain[LoopTop];
2742 SmallPtrSet<BlockChain *, 4> UpdatedPreds;
2743 assert(LoopChain.UnscheduledPredecessors == 0 &&
2744 "LoopChain should not have unscheduled predecessors.");
2745 UpdatedPreds.
insert(&LoopChain);
2747 for (
const MachineBasicBlock *LoopBB : LoopBlockSet)
2748 fillWorkLists(LoopBB, UpdatedPreds, &LoopBlockSet);
2750 buildChain(LoopTop, LoopChain, &LoopBlockSet);
2752 if (RotateLoopWithProfile)
2753 rotateLoopWithProfile(LoopChain, L, LoopBlockSet);
2755 rotateLoop(LoopChain, PreferredLoopExit, ExitFreq, LoopBlockSet);
2759 bool BadLoop =
false;
2760 if (LoopChain.UnscheduledPredecessors) {
2762 dbgs() <<
"Loop chain contains a block without its preds placed!\n"
2763 <<
" Loop header: " <<
getBlockName(*
L.block_begin()) <<
"\n"
2764 <<
" Chain header: " <<
getBlockName(*LoopChain.begin()) <<
"\n";
2766 for (MachineBasicBlock *ChainBB : LoopChain) {
2768 if (!LoopBlockSet.remove(ChainBB)) {
2772 dbgs() <<
"Loop chain contains a block not contained by the loop!\n"
2773 <<
" Loop header: " <<
getBlockName(*
L.block_begin()) <<
"\n"
2774 <<
" Chain header: " <<
getBlockName(*LoopChain.begin()) <<
"\n"
2779 if (!LoopBlockSet.empty()) {
2781 for (
const MachineBasicBlock *LoopBB : LoopBlockSet)
2782 dbgs() <<
"Loop contains blocks never placed into a chain!\n"
2783 <<
" Loop header: " <<
getBlockName(*
L.block_begin()) <<
"\n"
2784 <<
" Chain header: " <<
getBlockName(*LoopChain.begin()) <<
"\n"
2787 assert(!BadLoop &&
"Detected problems with the placement of this loop.");
2790 BlockWorkList.
clear();
2791 EHPadWorkList.
clear();
2794void MachineBlockPlacement::buildCFGChains() {
2800 MachineBasicBlock *BB = &*FI;
2802 new (ChainAllocator.
Allocate()) BlockChain(BlockToChain, BB);
2807 MachineBasicBlock *
TBB =
nullptr, *FBB =
nullptr;
2812 MachineBasicBlock *NextBB = &*NextFI;
2815 assert(NextFI != FE &&
"Can't fallthrough past the last block.");
2816 LLVM_DEBUG(
dbgs() <<
"Pre-merging due to unanalyzable fallthrough: "
2819 Chain->merge(NextBB,
nullptr);
2821 BlocksWithUnanalyzableExits.
insert(&*BB);
2829 PreferredLoopExit =
nullptr;
2830 for (MachineLoop *L : *MLI)
2831 buildLoopChains(*L);
2834 "BlockWorkList should be empty before building final chain.");
2836 "EHPadWorkList should be empty before building final chain.");
2838 SmallPtrSet<BlockChain *, 4> UpdatedPreds;
2839 for (MachineBasicBlock &
MBB : *
F)
2840 fillWorkLists(&
MBB, UpdatedPreds);
2842 BlockChain &FunctionChain = *BlockToChain[&
F->front()];
2843 buildChain(&
F->front(), FunctionChain);
2846 using FunctionBlockSetType = SmallPtrSet<MachineBasicBlock *, 16>;
2850 bool BadFunc =
false;
2851 FunctionBlockSetType FunctionBlockSet;
2852 for (MachineBasicBlock &
MBB : *
F)
2853 FunctionBlockSet.insert(&
MBB);
2855 for (MachineBasicBlock *ChainBB : FunctionChain)
2856 if (!FunctionBlockSet.erase(ChainBB)) {
2858 dbgs() <<
"Function chain contains a block not in the function!\n"
2862 if (!FunctionBlockSet.empty()) {
2864 for (MachineBasicBlock *RemainingBB : FunctionBlockSet)
2865 dbgs() <<
"Function contains blocks never placed into a chain!\n"
2866 <<
" Bad block: " <<
getBlockName(RemainingBB) <<
"\n";
2868 assert(!BadFunc &&
"Detected problems with the block placement.");
2873 SmallVector<MachineBasicBlock *, 4> OriginalLayoutSuccessors(
2874 F->getNumBlockIDs());
2876 MachineBasicBlock *LastMBB =
nullptr;
2877 for (
auto &
MBB : *
F) {
2878 if (LastMBB !=
nullptr)
2882 OriginalLayoutSuccessors[
F->back().getNumber()] =
nullptr;
2888 for (MachineBasicBlock *ChainBB : FunctionChain) {
2889 LLVM_DEBUG(
dbgs() << (ChainBB == *FunctionChain.begin() ?
"Placing chain "
2893 F->splice(InsertPos, ChainBB);
2898 if (ChainBB == *FunctionChain.begin())
2906 MachineBasicBlock *
TBB =
nullptr, *FBB =
nullptr;
2909 if (!BlocksWithUnanalyzableExits.
count(PrevBB)) {
2915 "Unexpected block with un-analyzable fallthrough!");
2917 TBB = FBB =
nullptr;
2949 MachineBasicBlock *
TBB =
nullptr, *FBB =
nullptr;
2951 MachineBasicBlock *PrevBB = &
F->
back();
2955 BlockWorkList.
clear();
2956 EHPadWorkList.
clear();
2959void MachineBlockPlacement::optimizeBranches() {
2960 BlockChain &FunctionChain = *BlockToChain[&
F->front()];
2969 for (MachineBasicBlock *ChainBB : FunctionChain) {
2971 MachineBasicBlock *
TBB =
nullptr, *FBB =
nullptr;
2992 auto Dl = ChainBB->findBranchDebugLoc();
2998void MachineBlockPlacement::alignBlocks() {
3005 if (
F->getFunction().hasMinSize() ||
3010 BlockChain &FunctionChain = *BlockToChain[&
F->front()];
3012 if (FunctionChain.begin() == FunctionChain.end())
3015 const BranchProbability ColdProb(1, 5);
3016 BlockFrequency EntryFreq = MBFI->getBlockFreq(&
F->front());
3017 BlockFrequency WeightedEntryFreq = EntryFreq * ColdProb;
3018 for (MachineBasicBlock *ChainBB : FunctionChain) {
3019 if (ChainBB == *FunctionChain.begin())
3026 MachineLoop *
L = MLI->getLoopFor(ChainBB);
3031 unsigned MDAlign = 1;
3032 MDNode *LoopID =
L->getLoopID();
3041 if (S->
getString() ==
"llvm.loop.align") {
3043 "per-loop align metadata should have two operands.");
3046 assert(MDAlign >= 1 &&
"per-loop align value must be positive.");
3052 const Align LoopAlign = std::max(TLIAlign,
Align(MDAlign));
3058 BlockFrequency Freq = MBFI->getBlockFreq(ChainBB);
3059 if (Freq < WeightedEntryFreq)
3064 MachineBasicBlock *LoopHeader =
L->getHeader();
3065 BlockFrequency LoopHeaderFreq = MBFI->getBlockFreq(LoopHeader);
3066 if (Freq < (LoopHeaderFreq * ColdProb))
3076 MachineBasicBlock *LayoutPred =
3079 auto DetermineMaxAlignmentPadding = [&]() {
3086 ChainBB->setMaxBytesForAlignment(MaxBytes);
3092 ChainBB->setAlignment(LoopAlign);
3093 DetermineMaxAlignmentPadding();
3101 BranchProbability LayoutProb =
3103 BlockFrequency LayoutEdgeFreq = MBFI->getBlockFreq(LayoutPred) * LayoutProb;
3104 if (LayoutEdgeFreq <= (Freq * ColdProb)) {
3105 ChainBB->setAlignment(LoopAlign);
3106 DetermineMaxAlignmentPadding();
3110 const bool HasMaxBytesOverride =
3115 for (MachineBasicBlock &
MBB : *
F) {
3116 if (HasMaxBytesOverride)
3125 for (
auto MBI = std::next(
F->begin()), MBE =
F->end(); MBI != MBE; ++MBI) {
3126 auto LayoutPred = std::prev(MBI);
3128 if (HasMaxBytesOverride)
3153bool MachineBlockPlacement::repeatedlyTailDuplicateBlock(
3154 MachineBasicBlock *BB, MachineBasicBlock *&LPred,
3155 const MachineBasicBlock *LoopHeaderBB, BlockChain &Chain,
3157 BlockFilterSet::iterator &PrevUnplacedBlockInFilterIt) {
3158 bool Removed, DuplicatedToLPred;
3159 bool DuplicatedToOriginalLPred;
3160 Removed = maybeTailDuplicateBlock(
3161 BB, LPred, Chain, BlockFilter, PrevUnplacedBlockIt,
3162 PrevUnplacedBlockInFilterIt, DuplicatedToLPred);
3165 DuplicatedToOriginalLPred = DuplicatedToLPred;
3170 while (DuplicatedToLPred && Removed) {
3171 MachineBasicBlock *DupBB, *DupPred;
3177 BlockChain::iterator ChainEnd = Chain.end();
3178 DupBB = *(--ChainEnd);
3180 if (ChainEnd == Chain.begin())
3182 DupPred = *std::prev(ChainEnd);
3183 Removed = maybeTailDuplicateBlock(
3184 DupBB, DupPred, Chain, BlockFilter, PrevUnplacedBlockIt,
3185 PrevUnplacedBlockInFilterIt, DuplicatedToLPred);
3192 LPred = *std::prev(Chain.end());
3193 if (DuplicatedToOriginalLPred)
3194 markBlockSuccessors(Chain, LPred, LoopHeaderBB, BlockFilter);
3211bool MachineBlockPlacement::maybeTailDuplicateBlock(
3212 MachineBasicBlock *BB, MachineBasicBlock *LPred, BlockChain &Chain,
3214 BlockFilterSet::iterator &PrevUnplacedBlockInFilterIt,
3215 bool &DuplicatedToLPred) {
3216 DuplicatedToLPred =
false;
3217 if (!shouldTailDuplicate(BB))
3225 bool Removed =
false;
3226 auto RemovalCallback = [&](MachineBasicBlock *RemBB) {
3231 if (
auto It = BlockToChain.
find(RemBB); It != BlockToChain.
end()) {
3232 It->second->remove(RemBB);
3233 BlockToChain.
erase(It);
3237 if (&(*PrevUnplacedBlockIt) == RemBB) {
3238 PrevUnplacedBlockIt++;
3242 if (RemBB->isEHPad()) {
3253 if (It != BlockFilter->end()) {
3254 if (It < PrevUnplacedBlockInFilterIt) {
3255 const MachineBasicBlock *PrevBB = *PrevUnplacedBlockInFilterIt;
3258 auto Distance = PrevUnplacedBlockInFilterIt - It - 1;
3259 PrevUnplacedBlockInFilterIt = BlockFilter->
erase(It) + Distance;
3260 assert(*PrevUnplacedBlockInFilterIt == PrevBB);
3262 }
else if (It == PrevUnplacedBlockInFilterIt)
3265 PrevUnplacedBlockInFilterIt = BlockFilter->erase(It);
3267 BlockFilter->erase(It);
3272 MLI->removeBlock(RemBB);
3273 if (RemBB == PreferredLoopExit)
3274 PreferredLoopExit =
nullptr;
3279 auto RemovalCallbackRef =
3280 function_ref<void(MachineBasicBlock *)>(RemovalCallback);
3282 SmallVector<MachineBasicBlock *, 8> DuplicatedPreds;
3284 SmallVector<MachineBasicBlock *, 8> CandidatePreds;
3285 SmallVectorImpl<MachineBasicBlock *> *CandidatePtr =
nullptr;
3286 if (
F->getFunction().hasProfileData()) {
3288 findDuplicateCandidates(CandidatePreds, BB, BlockFilter);
3289 if (CandidatePreds.
size() == 0)
3292 CandidatePtr = &CandidatePreds;
3295 &RemovalCallbackRef, CandidatePtr);
3298 DuplicatedToLPred =
false;
3299 for (MachineBasicBlock *Pred : DuplicatedPreds) {
3301 BlockChain *PredChain = BlockToChain[Pred];
3303 DuplicatedToLPred =
true;
3304 if (Pred == LPred || (BlockFilter && !BlockFilter->count(Pred)) ||
3305 PredChain == &Chain)
3307 for (MachineBasicBlock *NewSucc : Pred->
successors()) {
3308 if (BlockFilter && !BlockFilter->count(NewSucc))
3310 BlockChain *NewChain = BlockToChain[NewSucc];
3311 if (NewChain != &Chain && NewChain != PredChain)
3312 NewChain->UnscheduledPredecessors++;
3322 if (!
MI.isPHI() && !
MI.isMetaInstruction())
3331BlockFrequency MachineBlockPlacement::scaleThreshold(MachineBasicBlock *BB) {
3336bool MachineBlockPlacement::isBestSuccessor(MachineBasicBlock *BB,
3337 MachineBasicBlock *Pred,
3338 BlockFilterSet *BlockFilter) {
3341 if (BlockFilter && !BlockFilter->count(Pred))
3343 BlockChain *PredChain = BlockToChain[Pred];
3344 if (PredChain && (Pred != *std::prev(PredChain->end())))
3349 for (MachineBasicBlock *Succ : Pred->
successors())
3351 if (BlockFilter && !BlockFilter->count(Succ))
3353 BlockChain *SuccChain = BlockToChain[Succ];
3354 if (SuccChain && (Succ != *SuccChain->begin()))
3357 if (SuccProb > BestProb)
3358 BestProb = SuccProb;
3362 if (BBProb <= BestProb)
3367 BlockFrequency PredFreq = getBlockCountOrFrequency(Pred);
3368 BlockFrequency Gain = PredFreq * (BBProb - BestProb);
3369 return Gain > scaleThreshold(BB);
3374void MachineBlockPlacement::findDuplicateCandidates(
3375 SmallVectorImpl<MachineBasicBlock *> &Candidates, MachineBasicBlock *BB,
3376 BlockFilterSet *BlockFilter) {
3377 MachineBasicBlock *Fallthrough =
nullptr;
3379 BlockFrequency BBDupThreshold(scaleThreshold(BB));
3380 SmallVector<MachineBasicBlock *, 8> Preds(BB->
predecessors());
3381 SmallVector<MachineBasicBlock *, 8> Succs(BB->
successors());
3384 auto CmpSucc = [&](MachineBasicBlock *
A, MachineBasicBlock *
B) {
3387 auto CmpPred = [&](MachineBasicBlock *
A, MachineBasicBlock *
B) {
3388 return MBFI->getBlockFreq(
A) > MBFI->getBlockFreq(
B);
3393 auto SuccIt = Succs.begin();
3394 if (SuccIt != Succs.end()) {
3439 for (MachineBasicBlock *Pred : Preds) {
3440 BlockFrequency PredFreq = getBlockCountOrFrequency(Pred);
3445 if (!Fallthrough && isBestSuccessor(BB, Pred, BlockFilter)) {
3447 if (SuccIt != Succs.end())
3453 BlockFrequency OrigCost = PredFreq + PredFreq * DefaultBranchProb;
3454 BlockFrequency DupCost;
3455 if (SuccIt == Succs.end()) {
3457 if (Succs.size() > 0)
3458 DupCost += PredFreq;
3461 DupCost += PredFreq;
3465 assert(OrigCost >= DupCost);
3466 OrigCost -= DupCost;
3467 if (OrigCost > BBDupThreshold) {
3469 if (SuccIt != Succs.end())
3477 if ((Candidates.
size() < Preds.size()) && (Candidates.
size() > 0)) {
3478 Candidates[0] = Candidates.
back();
3484void MachineBlockPlacement::initTailDupThreshold() {
3485 DupThreshold = BlockFrequency(0);
3486 if (
F->getFunction().hasProfileData()) {
3490 UseProfileCount =
true;
3495 BlockFrequency MaxFreq = BlockFrequency(0);
3496 for (MachineBasicBlock &
MBB : *
F) {
3497 BlockFrequency Freq = MBFI->getBlockFreq(&
MBB);
3503 DupThreshold = BlockFrequency(MaxFreq * ThresholdProb);
3504 UseProfileCount =
false;
3516 if (OptLevel >= CodeGenOptLevel::Aggressive) {
3528 (OptLevel < CodeGenOptLevel::Aggressive ||
3530 TailDupSize =
TII->getTailDuplicateSize(OptLevel);
3537 auto MBFI = std::make_unique<MBFIWrapper>(
3540 auto *MPDT = MachineBlockPlacement::allowTailDupPlacement(MF)
3544 .getCachedResult<ProfileSummaryAnalysis>(
3550 MachineBlockPlacement MBP(MBPI, MLI, PSI, std::move(MBFI), MPDT,
3562 OS << MapClassName2PassName(
name());
3563 if (!AllowTailMerge)
3564 OS <<
"<no-tail-merge>";
3570 if (std::next(MF.
begin()) == MF.
end())
3574 OptLevel =
F->getTarget().getOptLevel();
3581 PreferredLoopExit =
nullptr;
3584 "BlockToChain map should be empty before starting placement.");
3586 "Computed Edge map should be empty before starting placement.");
3589 initTailDupThreshold();
3591 const bool OptForSize =
3596 bool UseExtTspForPerf =
false;
3597 bool UseExtTspForSize =
false;
3606 if (allowTailDupPlacement(*
F)) {
3609 const bool PreRegAlloc =
false;
3610 TailDup.
initMF(MF, PreRegAlloc, MBPI, MBFI.get(), PSI,
3612 if (!UseExtTspForSize)
3613 precomputeTriangleChains();
3617 if (!UseExtTspForSize)
3627 if (EnableTailMerge) {
3629 BranchFolder BF(
true,
false,
3637 if (!UseExtTspForSize) {
3639 BlockToChain.
clear();
3640 ComputedEdges.
clear();
3650 if (UseExtTspForPerf || UseExtTspForSize) {
3652 !(UseExtTspForPerf && UseExtTspForSize) &&
3653 "UseExtTspForPerf and UseExtTspForSize can not be set simultaneously");
3654 applyExtTsp(UseExtTspForSize);
3655 createCFGChainExtTsp();
3661 BlockToChain.
clear();
3662 ComputedEdges.
clear();
3671 MBFI->view(
"MBP." + MF.
getName(),
false);
3679void MachineBlockPlacement::applyExtTsp(
bool OptForSize) {
3681 DenseMap<const MachineBasicBlock *, uint64_t> BlockIndex;
3683 std::vector<const MachineBasicBlock *> CurrentBlockOrder;
3684 CurrentBlockOrder.reserve(
F->size());
3685 size_t NumBlocks = 0;
3686 for (
const MachineBasicBlock &
MBB : *
F) {
3687 BlockIndex[&
MBB] = NumBlocks++;
3688 CurrentBlockOrder.push_back(&
MBB);
3691 SmallVector<uint64_t, 0> BlockCounts(
F->size());
3692 SmallVector<uint64_t, 0> BlockSizes(
F->size());
3696 for (MachineBasicBlock &
MBB : *
F) {
3698 BlockFrequency BlockFreq = MBFI->getBlockFreq(&
MBB);
3699 BlockCounts[BlockIndex[&
MBB]] = OptForSize ? 1 : BlockFreq.
getFrequency();
3708 size_t NumInsts = std::distance(NonDbgInsts.begin(), NonDbgInsts.end());
3709 BlockSizes[BlockIndex[&
MBB]] = 4 * NumInsts;
3714 MachineBasicBlock *
TBB =
nullptr, *FBB =
nullptr;
3725 if (FBB && FBB != FTB)
3732 const uint64_t Freq = Succs.
size() == 1 ? 110 : 100;
3733 for (
const MachineBasicBlock *Succ : Succs)
3734 JumpCounts.
push_back({BlockIndex[&
MBB], BlockIndex[Succ], Freq});
3738 BlockFrequency JumpFreq = BlockFreq * EP;
3745 LLVM_DEBUG(
dbgs() <<
"Applying ext-tsp layout for |V| = " <<
F->size()
3746 <<
" with profile = " <<
F->getFunction().hasProfileData()
3747 <<
" (" <<
F->getName() <<
")" <<
"\n");
3754 std::vector<const MachineBasicBlock *> NewBlockOrder;
3755 NewBlockOrder.reserve(
F->size());
3756 for (uint64_t Node : NewOrder) {
3757 NewBlockOrder.push_back(CurrentBlockOrder[Node]);
3759 const double OptScore =
calcExtTspScore(NewOrder, BlockSizes, JumpCounts);
3763 if (OptForSize && OrgScore > OptScore)
3764 assignBlockOrder(CurrentBlockOrder);
3766 assignBlockOrder(NewBlockOrder);
3769void MachineBlockPlacement::assignBlockOrder(
3770 const std::vector<const MachineBasicBlock *> &NewBlockOrder) {
3771 assert(
F->size() == NewBlockOrder.size() &&
"Incorrect size of block order");
3772 F->RenumberBlocks();
3779 bool HasChanges =
false;
3780 for (
size_t I = 0;
I < NewBlockOrder.size();
I++) {
3781 if (NewBlockOrder[
I] !=
F->getBlockNumbered(
I)) {
3790 SmallVector<MachineBasicBlock *, 4> PrevFallThroughs(
F->getNumBlockIDs());
3791 for (
auto &
MBB : *
F) {
3796 DenseMap<const MachineBasicBlock *, size_t> NewIndex;
3797 for (
const MachineBasicBlock *
MBB : NewBlockOrder) {
3798 NewIndex[
MBB] = NewIndex.
size();
3800 F->sort([&](MachineBasicBlock &L, MachineBasicBlock &R) {
3801 return NewIndex[&
L] < NewIndex[&
R];
3806 const TargetInstrInfo *
TII =
F->getSubtarget().getInstrInfo();
3808 for (
auto &
MBB : *
F) {
3815 if (FTMBB && (NextMBB == EndIt || &*NextMBB != FTMBB)) {
3821 MachineBasicBlock *
TBB =
nullptr, *FBB =
nullptr;
3828void MachineBlockPlacement::createCFGChainExtTsp() {
3829 BlockToChain.
clear();
3830 ComputedEdges.
clear();
3833 MachineBasicBlock *HeadBB = &
F->
front();
3834 BlockChain *FunctionChain =
3835 new (ChainAllocator.
Allocate()) BlockChain(BlockToChain, HeadBB);
3837 for (MachineBasicBlock &
MBB : *
F) {
3840 FunctionChain->merge(&
MBB,
nullptr);
3852class MachineBlockPlacementStats {
3854 const MachineBranchProbabilityInfo *MBPI;
3857 const MachineBlockFrequencyInfo *MBFI;
3860 MachineBlockPlacementStats(
const MachineBranchProbabilityInfo *MBPI,
3861 const MachineBlockFrequencyInfo *MBFI)
3862 : MBPI(MBPI), MBFI(MBFI) {}
3863 bool run(MachineFunction &MF);
3866class MachineBlockPlacementStatsLegacy :
public MachineFunctionPass {
3870 MachineBlockPlacementStatsLegacy() : MachineFunctionPass(
ID) {
3875 bool runOnMachineFunction(MachineFunction &
F)
override {
3877 &getAnalysis<MachineBranchProbabilityInfoWrapperPass>().getMBPI();
3878 auto *MBFI = &getAnalysis<MachineBlockFrequencyInfoWrapperPass>().getMBFI();
3879 return MachineBlockPlacementStats(MBPI, MBFI).run(
F);
3882 void getAnalysisUsage(AnalysisUsage &AU)
const override {
3883 AU.
addRequired<MachineBranchProbabilityInfoWrapperPass>();
3884 AU.
addRequired<MachineBlockFrequencyInfoWrapperPass>();
3892char MachineBlockPlacementStatsLegacy::ID = 0;
3897 "Basic Block Placement Stats",
false,
false)
3909 MachineBlockPlacementStats(&MBPI, &MBFI).
run(MF);
3915 if (std::next(
F.begin()) ==
F.end())
3924 (
MBB.succ_size() > 1) ? NumCondBranches : NumUncondBranches;
3926 (
MBB.succ_size() > 1) ? CondBranchTakenFreq : UncondBranchTakenFreq;
3929 if (
MBB.isLayoutSuccessor(Succ))
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
This file defines the BumpPtrAllocator interface.
static cl::opt< unsigned > TailMergeSize("tail-merge-size", cl::desc("Min number of instructions to consider tail merging"), cl::init(3), cl::Hidden)
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
Declares methods and data structures for code layout algorithms.
#define LLVM_DUMP_METHOD
Mark debug helper function definitions like dump() that should not be stripped from debug builds.
static unsigned InstrCount
This file defines the DenseMap class.
const HexagonInstrInfo * TII
static LoopDeletionResult merge(LoopDeletionResult A, LoopDeletionResult B)
static BranchProbability getAdjustedProbability(BranchProbability OrigProb, BranchProbability AdjustedSumProb)
The helper function returns the branch probability that is adjusted or normalized over the new total ...
static cl::opt< bool > PreciseRotationCost("precise-rotation-cost", cl::desc("Model the cost of loop rotation more " "precisely by using profile data."), cl::init(false), cl::Hidden)
static cl::opt< unsigned > ExtTspBlockPlacementMaxBlocks("ext-tsp-block-placement-max-blocks", cl::desc("Maximum number of basic blocks in a function to run ext-TSP " "block placement."), cl::init(UINT_MAX), cl::Hidden)
static cl::opt< unsigned > AlignAllBlock("align-all-blocks", cl::desc("Force the alignment of all blocks in the function in log2 format " "(e.g 4 means align on 16B boundaries)."), cl::init(0), cl::Hidden)
static cl::opt< unsigned > PredecessorLimit("block-placement-predecessor-limit", cl::desc("For blocks with more predecessors, certain layout optimizations" "will be disabled to prevent quadratic compile time."), cl::init(1000), cl::Hidden)
static BranchProbability getLayoutSuccessorProbThreshold(const MachineBasicBlock *BB)
static cl::opt< bool > ForceLoopColdBlock("force-loop-cold-block", cl::desc("Force outlining cold blocks from loops."), cl::init(false), cl::Hidden)
static cl::opt< unsigned > ExitBlockBias("block-placement-exit-block-bias", cl::desc("Block frequency percentage a loop exit block needs " "over the original exit to be considered the new exit."), cl::init(0), cl::Hidden)
static cl::opt< unsigned > AlignAllNonFallThruBlocks("align-all-nofallthru-blocks", cl::desc("Force the alignment of all blocks that have no fall-through " "predecessors (i.e. don't add nops that are executed). In log2 " "format (e.g 4 means align on 16B boundaries)."), cl::init(0), cl::Hidden)
static cl::opt< unsigned > TailDupPlacementThreshold("tail-dup-placement-threshold", cl::desc("Instruction cutoff for tail duplication during layout. " "Tail merging during layout is forced to have a threshold " "that won't conflict."), cl::init(2), cl::Hidden)
static cl::opt< unsigned > JumpInstCost("jump-inst-cost", cl::desc("Cost of jump instructions."), cl::init(1), cl::Hidden)
static cl::opt< unsigned > TailDupPlacementPenalty("tail-dup-placement-penalty", cl::desc("Cost penalty for blocks that can avoid breaking CFG by copying. " "Copying can increase fallthrough, but it also increases icache " "pressure. This parameter controls the penalty to account for that. " "Percent as integer."), cl::init(2), cl::Hidden)
static bool greaterWithBias(BlockFrequency A, BlockFrequency B, BlockFrequency EntryFreq)
Compare 2 BlockFrequency's with a small penalty for A.
static cl::opt< unsigned > MisfetchCost("misfetch-cost", cl::desc("Cost that models the probabilistic risk of an instruction " "misfetch due to a jump comparing to falling through, whose cost " "is zero."), cl::init(1), cl::Hidden)
static cl::opt< unsigned > MaxBytesForAlignmentOverride("max-bytes-for-alignment", cl::desc("Forces the maximum bytes allowed to be emitted when padding for " "alignment"), cl::init(0), cl::Hidden)
static cl::opt< bool > BranchFoldPlacement("branch-fold-placement", cl::desc("Perform branch folding during placement. " "Reduces code size."), cl::init(true), cl::Hidden)
static cl::opt< unsigned > TailDupProfilePercentThreshold("tail-dup-profile-percent-threshold", cl::desc("If profile count information is used in tail duplication cost " "model, the gained fall through number from tail duplication " "should be at least this percent of hot count."), cl::init(50), cl::Hidden)
static cl::opt< unsigned > TriangleChainCount("triangle-chain-count", cl::desc("Number of triangle-shaped-CFG's that need to be in a row for the " "triangle tail duplication heuristic to kick in. 0 to disable."), cl::init(2), cl::Hidden)
Branch Probability Basic Block static false std::string getBlockName(const MachineBasicBlock *BB)
Helper to print the name of a MBB.
static cl::opt< bool > ApplyExtTspForSize("apply-ext-tsp-for-size", cl::init(false), cl::Hidden, cl::desc("Use ext-tsp for size-aware block placement."))
static bool hasSameSuccessors(MachineBasicBlock &BB, SmallPtrSetImpl< const MachineBasicBlock * > &Successors)
Check if BB has exactly the successors in Successors.
static cl::opt< bool > TailDupPlacement("tail-dup-placement", cl::desc("Perform tail duplication during placement. " "Creates more fallthrough opportunities in " "outline branches."), cl::init(true), cl::Hidden)
static uint64_t countMBBInstruction(MachineBasicBlock *MBB)
static cl::opt< unsigned > LoopToColdBlockRatio("loop-to-cold-block-ratio", cl::desc("Outline loop blocks from loop chain if (frequency of loop) / " "(frequency of block) is greater than this ratio"), cl::init(5), cl::Hidden)
static cl::opt< bool > RenumberBlocksBeforeView("renumber-blocks-before-view", cl::desc("If true, basic blocks are re-numbered before MBP layout is printed " "into a dot graph. Only used when a function is being printed."), cl::init(false), cl::Hidden)
static cl::opt< unsigned > TailDupPlacementAggressiveThreshold("tail-dup-placement-aggressive-threshold", cl::desc("Instruction cutoff for aggressive tail duplication during " "layout. Used at -O3. Tail merging during layout is forced to " "have a threshold that won't conflict."), cl::init(4), cl::Hidden)
static cl::opt< bool > ForcePreciseRotationCost("force-precise-rotation-cost", cl::desc("Force the use of precise cost " "loop rotation strategy."), cl::init(false), cl::Hidden)
static bool hasProfileData(const Function &F, const FunctionOutliningInfo &OI)
#define INITIALIZE_PASS_DEPENDENCY(depName)
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
const SmallVectorImpl< MachineOperand > MachineBasicBlock * TBB
const SmallVectorImpl< MachineOperand > & Cond
This file implements a set that has insertion order iteration characteristics.
This file defines the SmallPtrSet class.
This file defines the SmallVector class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
static TableGen::Emitter::Opt Y("gen-skeleton-entry", EmitSkeleton, "Generate example skeleton entry")
static TableGen::Emitter::OptClass< SkeletonEmitter > X("gen-skeleton-class", "Generate example skeleton class")
This file describes how to lower LLVM code to machine code.
Target-Independent Code Generator Pass Configuration Options pass.
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
AnalysisUsage & addRequired()
void setPreservesAll()
Set by analyses that do not transform their input at all.
static BlockFrequency max()
Returns the maximum possible frequency, the saturation value.
uint64_t getFrequency() const
Returns the frequency as a fixpoint number scaled by the entry frequency.
static BranchProbability getOne()
uint32_t getNumerator() const
BranchProbability getCompl() const
static BranchProbability getZero()
ValueT lookup(const_arg_type_t< KeyT > Val) const
lookup - Return the entry for the specified key, or a default constructed value if no such entry exis...
iterator find(const_arg_type_t< KeyT > Val)
std::pair< iterator, bool > try_emplace(KeyT &&Key, Ts &&...Args)
bool erase(const KeyT &Val)
bool contains(const_arg_type_t< KeyT > Val) const
Return true if the specified key is in the map, false otherwise.
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
void reserve(size_type NumEntries)
Grow the densemap so that it can contain at least NumEntries items before resizing again.
bool dominates(const DomTreeNodeBase< NodeT > *A, const DomTreeNodeBase< NodeT > *B) const
dominates - Returns true iff A dominates B.
void recalculate(ParentType &Func)
recalculate - compute a dominator tree for the given function
bool hasProfileData(bool IncludeSynthetic=false) const
Return true if the function is annotated with profile data.
Module * getParent()
Get the module that this global value is contained inside of...
unsigned removeBranch(MachineBasicBlock &MBB, int *BytesRemoved=nullptr) const override
Remove the branching code at the end of the specific MBB.
bool analyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB, MachineBasicBlock *&FBB, SmallVectorImpl< MachineOperand > &Cond, bool AllowModify) const override
Analyze the branching code at the end of MBB, returning true if it cannot be understood (e....
bool reverseBranchCondition(SmallVectorImpl< MachineOperand > &Cond) const override
Reverses the branch condition of the specified condition list, returning false on success and true if...
unsigned insertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB, MachineBasicBlock *FBB, ArrayRef< MachineOperand > Cond, const DebugLoc &DL, int *BytesAdded=nullptr) const override
Insert branch code into the end of the specified MachineBasicBlock.
LoopT * getLoopFor(const BlockT *BB) const
Return the inner most loop that BB lives in.
const MDOperand & getOperand(unsigned I) const
ArrayRef< MDOperand > operands() const
unsigned getNumOperands() const
Return number of MDNode operands.
LLVM_ABI StringRef getString() const
unsigned pred_size() const
bool isEHPad() const
Returns true if the block is a landing pad.
instr_iterator instr_begin()
LLVM_ABI MachineBasicBlock * getFallThrough(bool JumpToFallThrough=true)
Return the fallthrough block if the block can implicitly transfer control to the block after it by fa...
int getNumber() const
MachineBasicBlocks are uniquely numbered at the function level, unless they're not in a MachineFuncti...
LLVM_ABI void updateTerminator(MachineBasicBlock *PreviousLayoutSuccessor)
Update the terminator instructions in block to account for changes to block layout which may have bee...
LLVM_ABI bool canFallThrough()
Return true if the block can implicitly transfer control to the block after it by falling off the end...
succ_iterator succ_begin()
unsigned succ_size() const
void setAlignment(Align A)
Set alignment of the basic block.
LLVM_ABI bool isEntryBlock() const
Returns true if this is the entry block of the function.
pred_iterator pred_begin()
succ_reverse_iterator succ_rbegin()
LLVM_ABI bool isLayoutSuccessor(const MachineBasicBlock *MBB) const
Return true if the specified MBB will be emitted immediately after this block, such that if this bloc...
instr_iterator instr_end()
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
LLVM_ABI instr_iterator erase(instr_iterator I)
Remove an instruction from the instruction list and delete it.
LLVM_ABI DebugLoc findBranchDebugLoc()
Find and return the merged DebugLoc of the branch instructions of the block.
iterator_range< succ_iterator > successors()
LLVM_ABI bool isSuccessor(const MachineBasicBlock *MBB) const
Return true if the specified MBB is a successor of this block.
iterator_range< pred_iterator > predecessors()
LLVM_ABI StringRef getName() const
Return the name of the corresponding LLVM basic block, or an empty string.
LLVM_ABI Result run(MachineFunction &MF, MachineFunctionAnalysisManager &MFAM)
PreservedAnalyses run(MachineFunction &MF, MachineFunctionAnalysisManager &MFAM)
void printPipeline(raw_ostream &OS, function_ref< StringRef(StringRef)> MapClassName2PassName) const
PreservedAnalyses run(MachineFunction &MF, MachineFunctionAnalysisManager &MFAM)
BranchProbability getEdgeProbability(const MachineBasicBlock *Src, const MachineBasicBlock *Dst) const
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
StringRef getName() const
getName - Return the name of the corresponding LLVM function.
Function & getFunction()
Return the LLVM function that this machine code represents.
BasicBlockListType::iterator iterator
void RenumberBlocks(MachineBasicBlock *MBBFrom=nullptr)
RenumberBlocks - This discards all of the MachineBasicBlock numbers and recomputes them.
const TargetMachine & getTarget() const
getTarget - Return the target machine this machine code is compiled with
Representation of each machine instruction.
Analysis pass that exposes the MachineLoopInfo for a machine function.
static LLVM_ABI PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
A set of analyses that are preserved following a run of a transformation pass.
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
An analysis pass based on legacy pass manager to deliver ProfileSummaryInfo.
LLVM_ABI uint64_t getOrCompHotCountThreshold() const
Returns HotCountThreshold if set.
typename vector_type::const_iterator iterator
A templated base class for SmallPtrSet which provides the typesafe interface that is common across al...
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
reference emplace_back(ArgTypes &&... Args)
iterator erase(const_iterator CI)
typename SuperClass::const_iterator const_iterator
typename SuperClass::iterator iterator
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
T * Allocate(size_t num=1)
Allocate space for an array of objects without constructing them.
void DestroyAll()
Call the destructor of each allocated object and deallocate all but the current slab and reset the cu...
StringRef - Represent a constant reference to a string, i.e.
void initMF(MachineFunction &MF, bool PreRegAlloc, const MachineBranchProbabilityInfo *MBPI, MBFIWrapper *MBFI, ProfileSummaryInfo *PSI, bool LayoutMode, unsigned TailDupSize=0)
Prepare to run on a specific machine function.
bool tailDuplicateAndUpdate(bool IsSimple, MachineBasicBlock *MBB, MachineBasicBlock *ForcedLayoutPred, SmallVectorImpl< MachineBasicBlock * > *DuplicatedPreds=nullptr, function_ref< void(MachineBasicBlock *)> *RemovalCallback=nullptr, SmallVectorImpl< MachineBasicBlock * > *CandidatePtr=nullptr)
Tail duplicate a single basic block into its predecessors, and then clean up.
static bool isSimpleBB(MachineBasicBlock *TailBB)
True if this BB has only one unconditional jump.
bool canTailDuplicate(MachineBasicBlock *TailBB, MachineBasicBlock *PredBB)
Returns true if TailBB can successfully be duplicated into PredBB.
bool shouldTailDuplicate(bool IsSimple, MachineBasicBlock &TailBB)
Determine if it is profitable to duplicate this block.
virtual unsigned getMaxPermittedBytesForAlignment(MachineBasicBlock *MBB) const
Return the maximum amount of bytes allowed to be emitted when padding for alignment.
virtual Align getPrefLoopAlignment(MachineLoop *ML=nullptr) const
Return the preferred loop alignment.
virtual bool alignLoopsWithOptSize() const
Should loops be aligned even when the function is marked OptSize (but not MinSize).
bool requiresStructuredCFG() const
virtual const TargetInstrInfo * getInstrInfo() const
virtual const TargetRegisterInfo * getRegisterInfo() const =0
Return the target's register information.
virtual const TargetLowering * getTargetLowering() const
An efficient, type-erasing, non-owning reference to a callable.
self_iterator getIterator()
This class implements an extremely fast bulk output stream that can only output to a stream.
A raw_ostream that writes to an std::string.
constexpr char Align[]
Key for Kernel::Arg::Metadata::mAlign.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
@ C
The default llvm calling convention, compatible with C.
initializer< Ty > init(const Ty &Val)
LLVM_ABI double calcExtTspScore(ArrayRef< uint64_t > Order, ArrayRef< uint64_t > NodeSizes, ArrayRef< EdgeCount > EdgeCounts)
Estimate the "quality" of a given node order in CFG.
LLVM_ABI std::vector< uint64_t > computeExtTspLayout(ArrayRef< uint64_t > NodeSizes, ArrayRef< uint64_t > NodeCounts, ArrayRef< EdgeCount > EdgeCounts)
Find a layout of nodes (basic blocks) of a given CFG optimizing jump locality and thus processor I-ca...
PointerTypeMap run(const Module &M)
Compute the PointerTypeMap for the module M.
std::enable_if_t< detail::IsValidPointer< X, Y >::value, X * > extract(Y &&MD)
Extract a Value from Metadata.
LLVM_ABI void append(SmallVectorImpl< char > &path, const Twine &a, const Twine &b="", const Twine &c="", const Twine &d="")
Append to path.
This is an optimization pass for GlobalISel generic memory operations.
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
void dump(const SparseBitVector< ElementSize > &LHS, raw_ostream &out)
LLVM_ABI void initializeMachineBlockPlacementStatsLegacyPass(PassRegistry &)
void stable_sort(R &&Range)
auto find(R &&Range, const T &Val)
Provide wrappers to std::find which take ranges instead of having to pass begin/end explicitly.
OuterAnalysisManagerProxy< ModuleAnalysisManager, MachineFunction > ModuleAnalysisManagerMachineFunctionProxy
Provide the ModuleAnalysisManager to Function proxy.
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
cl::opt< bool > ApplyExtTspWithoutProfile
constexpr from_range_t from_range
LLVM_ABI bool shouldOptimizeForSize(const MachineFunction *MF, ProfileSummaryInfo *PSI, const MachineBlockFrequencyInfo *BFI, PGSOQueryType QueryType=PGSOQueryType::Other)
Returns true if machine function MF is suggested to be size-optimized based on the profile.
AnalysisManager< MachineFunction > MachineFunctionAnalysisManager
LLVM_ABI PreservedAnalyses getMachineFunctionPassPreservedAnalyses()
Returns the minimum set of Analyses that all machine function passes must preserve.
cl::opt< unsigned > ProfileLikelyProb
void erase(Container &C, ValueType V)
Wrapper function to remove a value from a container:
cl::opt< std::string > ViewBlockFreqFuncName("view-bfi-func-name", cl::Hidden, cl::desc("The option to specify " "the name of the function " "whose CFG will be displayed."))
auto reverse(ContainerTy &&C)
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
cl::opt< GVDAGType > ViewBlockLayoutWithBFI("view-block-layout-with-bfi", cl::Hidden, cl::desc("Pop up a window to show a dag displaying MBP layout and associated " "block frequencies of the CFG."), cl::values(clEnumValN(GVDT_None, "none", "do not display graphs."), clEnumValN(GVDT_Fraction, "fraction", "display a graph using the " "fractional block frequency representation."), clEnumValN(GVDT_Integer, "integer", "display a graph using the raw " "integer fractional block frequency representation."), clEnumValN(GVDT_Count, "count", "display a graph using the real " "profile count if available.")))
bool isFunctionInPrintList(StringRef FunctionName)
LLVM_ABI void report_fatal_error(Error Err, bool gen_crash_diag=true)
FunctionAddr VTableAddr Count
CodeGenOptLevel
Code generation optimization level.
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
auto instructionsWithoutDebug(IterT It, IterT End, bool SkipPseudoOp=true)
Construct a range iterator which begins at It and moves forwards until End is reached,...
format_object< Ts... > format(const char *Fmt, const Ts &... Vals)
These are helper functions used to produce formatted output.
MutableArrayRef(T &OneElt) -> MutableArrayRef< T >
LLVM_ABI void initializeMachineBlockPlacementLegacyPass(PassRegistry &)
auto count(R &&Range, const E &Element)
Wrapper function around std::count to count the number of times an element Element occurs in the give...
OutputIt move(R &&Range, OutputIt Out)
Provide wrappers to std::move which take ranges instead of having to pass begin/end explicitly.
cl::opt< unsigned > StaticLikelyProb
void erase_if(Container &C, UnaryPredicate P)
Provide a container algorithm similar to C++ Library Fundamentals v2's erase_if which is equivalent t...
LLVM_ABI Printable printBlockFreq(const BlockFrequencyInfo &BFI, BlockFrequency Freq)
Print the block frequency Freq relative to the current functions entry frequency.
LLVM_ABI char & MachineBlockPlacementID
MachineBlockPlacement - This pass places basic blocks based on branch probabilities.
cl::opt< bool > EnableExtTspBlockPlacement
LLVM_ABI char & MachineBlockPlacementStatsID
MachineBlockPlacementStats - This pass collects statistics about the basic block placement using bran...
LLVM_ABI Printable printMBBReference(const MachineBasicBlock &MBB)
Prints a machine basic block reference.
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.