79#define DEBUG_TYPE "block-placement"
81STATISTIC(NumCondBranches,
"Number of conditional branches");
82STATISTIC(NumUncondBranches,
"Number of unconditional branches");
84 "Potential frequency of taking conditional branches");
86 "Potential frequency of taking unconditional branches");
90 cl::desc(
"Force the alignment of all blocks in the function in log2 format "
91 "(e.g 4 means align on 16B boundaries)."),
95 "align-all-nofallthru-blocks",
96 cl::desc(
"Force the alignment of all blocks that have no fall-through "
97 "predecessors (i.e. don't add nops that are executed). In log2 "
98 "format (e.g 4 means align on 16B boundaries)."),
102 "max-bytes-for-alignment",
103 cl::desc(
"Forces the maximum bytes allowed to be emitted when padding for "
108 "block-placement-predecessor-limit",
109 cl::desc(
"For blocks with more predecessors, certain layout optimizations"
110 "will be disabled to prevent quadratic compile time."),
115 "block-placement-exit-block-bias",
116 cl::desc(
"Block frequency percentage a loop exit block needs "
117 "over the original exit to be considered the new exit."),
124 "loop-to-cold-block-ratio",
125 cl::desc(
"Outline loop blocks from loop chain if (frequency of loop) / "
126 "(frequency of block) is greater than this ratio"),
131 cl::desc(
"Force outlining cold blocks from loops."),
136 cl::desc(
"Model the cost of loop rotation more "
137 "precisely by using profile data."),
142 cl::desc(
"Force the use of precise cost "
143 "loop rotation strategy."),
148 cl::desc(
"Cost that models the probabilistic risk of an instruction "
149 "misfetch due to a jump comparing to falling through, whose cost "
154 cl::desc(
"Cost of jump instructions."),
158 cl::desc(
"Perform tail duplication during placement. "
159 "Creates more fallthrough opportunities in "
160 "outline branches."),
165 cl::desc(
"Perform branch folding during placement. "
166 "Reduces code size."),
171 "tail-dup-placement-threshold",
172 cl::desc(
"Instruction cutoff for tail duplication during layout. "
173 "Tail merging during layout is forced to have a threshold "
174 "that won't conflict."),
179 "tail-dup-placement-aggressive-threshold",
180 cl::desc(
"Instruction cutoff for aggressive tail duplication during "
181 "layout. Used at -O3. Tail merging during layout is forced to "
182 "have a threshold that won't conflict."),
187 "tail-dup-placement-penalty",
189 "Cost penalty for blocks that can avoid breaking CFG by copying. "
190 "Copying can increase fallthrough, but it also increases icache "
191 "pressure. This parameter controls the penalty to account for that. "
192 "Percent as integer."),
197 "tail-dup-profile-percent-threshold",
198 cl::desc(
"If profile count information is used in tail duplication cost "
199 "model, the gained fall through number from tail duplication "
200 "should be at least this percent of hot count."),
205 "triangle-chain-count",
206 cl::desc(
"Number of triangle-shaped-CFG's that need to be in a row for the "
207 "triangle tail duplication heuristic to kick in. 0 to disable."),
216 "renumber-blocks-before-view",
218 "If true, basic blocks are re-numbered before MBP layout is printed "
219 "into a dot graph. Only used when a function is being printed."),
223 "ext-tsp-block-placement-max-blocks",
224 cl::desc(
"Maximum number of basic blocks in a function to run ext-TSP "
231 cl::desc(
"Use ext-tsp for size-aware block placement."));
280 BlockToChainMapType &BlockToChain;
289 : Blocks(1, BB), BlockToChain(BlockToChain) {
290 assert(BB &&
"Cannot create a chain with a null basic block");
291 BlockToChain[BB] =
this;
307 for (
iterator i = begin(); i != end(); ++i) {
323 assert(BB &&
"Can't merge a null block.");
324 assert(!Blocks.
empty() &&
"Can't merge into an empty chain.");
328 assert(!BlockToChain[BB] &&
329 "Passed chain is null, but BB has entry in BlockToChain.");
331 BlockToChain[BB] =
this;
335 assert(BB == *Chain->begin() &&
"Passed BB is not head of Chain.");
336 assert(Chain->begin() != Chain->end());
342 assert(BlockToChain[ChainBB] == Chain &&
"Incoming blocks not in chain.");
343 BlockToChain[ChainBB] =
this;
364 unsigned UnscheduledPredecessors = 0;
367class MachineBlockPlacement {
369 using BlockFilterSet = SmallSetVector<const MachineBasicBlock *, 16>;
372 struct BlockAndTailDupResult {
373 MachineBasicBlock *BB =
nullptr;
378 struct WeightedEdge {
379 BlockFrequency Weight;
380 MachineBasicBlock *Src =
nullptr;
381 MachineBasicBlock *Dest =
nullptr;
389 DenseMap<const MachineBasicBlock *, BlockAndTailDupResult> ComputedEdges;
392 MachineFunction *F =
nullptr;
395 const MachineBranchProbabilityInfo *MBPI =
nullptr;
398 std::unique_ptr<MBFIWrapper> MBFI;
401 MachineLoopInfo *MLI =
nullptr;
406 MachineBasicBlock *PreferredLoopExit =
nullptr;
409 const TargetInstrInfo *TII =
nullptr;
412 const TargetLoweringBase *TLI =
nullptr;
415 MachinePostDominatorTree *MPDT =
nullptr;
417 ProfileSummaryInfo *PSI =
nullptr;
430 TailDuplicator TailDup;
433 BlockFrequency DupThreshold;
435 unsigned TailDupSize;
439 bool UseProfileCount =
false;
448 SpecificBumpPtrAllocator<BlockChain> ChainAllocator;
456 DenseMap<const MachineBasicBlock *, BlockChain *> BlockToChain;
463 SmallPtrSet<MachineBasicBlock *, 4> BlocksWithUnanalyzableExits;
468 BlockFrequency getBlockCountOrFrequency(
const MachineBasicBlock *BB) {
469 if (UseProfileCount) {
470 auto Count = MBFI->getBlockProfileCount(BB);
472 return BlockFrequency(*
Count);
474 return BlockFrequency(0);
476 return MBFI->getBlockFreq(BB);
480 BlockFrequency scaleThreshold(MachineBasicBlock *BB);
481 void initTailDupThreshold();
485 void markChainSuccessors(
const BlockChain &Chain,
486 const MachineBasicBlock *LoopHeaderBB,
487 const BlockFilterSet *BlockFilter =
nullptr);
491 void markBlockSuccessors(
const BlockChain &Chain,
const MachineBasicBlock *BB,
492 const MachineBasicBlock *LoopHeaderBB,
493 const BlockFilterSet *BlockFilter =
nullptr);
496 collectViableSuccessors(
const MachineBasicBlock *BB,
const BlockChain &Chain,
497 const BlockFilterSet *BlockFilter,
498 SmallVector<MachineBasicBlock *, 4> &Successors);
499 bool isBestSuccessor(MachineBasicBlock *BB, MachineBasicBlock *Pred,
500 BlockFilterSet *BlockFilter);
501 void findDuplicateCandidates(SmallVectorImpl<MachineBasicBlock *> &Candidates,
502 MachineBasicBlock *BB,
503 BlockFilterSet *BlockFilter);
504 bool repeatedlyTailDuplicateBlock(
505 MachineBasicBlock *BB, MachineBasicBlock *&LPred,
506 const MachineBasicBlock *LoopHeaderBB, BlockChain &Chain,
507 BlockFilterSet *BlockFilter,
511 maybeTailDuplicateBlock(MachineBasicBlock *BB, MachineBasicBlock *LPred,
512 BlockChain &Chain, BlockFilterSet *BlockFilter,
515 bool &DuplicatedToLPred);
516 bool hasBetterLayoutPredecessor(
const MachineBasicBlock *BB,
517 const MachineBasicBlock *Succ,
518 const BlockChain &SuccChain,
519 BranchProbability SuccProb,
520 BranchProbability RealSuccProb,
521 const BlockChain &Chain,
522 const BlockFilterSet *BlockFilter);
523 BlockAndTailDupResult selectBestSuccessor(
const MachineBasicBlock *BB,
524 const BlockChain &Chain,
525 const BlockFilterSet *BlockFilter);
527 selectBestCandidateBlock(
const BlockChain &Chain,
528 SmallVectorImpl<MachineBasicBlock *> &WorkList);
530 getFirstUnplacedBlock(
const BlockChain &PlacedChain,
533 getFirstUnplacedBlock(
const BlockChain &PlacedChain,
535 const BlockFilterSet *BlockFilter);
542 void fillWorkLists(
const MachineBasicBlock *
MBB,
543 SmallPtrSetImpl<BlockChain *> &UpdatedPreds,
544 const BlockFilterSet *BlockFilter);
546 void buildChain(
const MachineBasicBlock *BB, BlockChain &Chain,
547 BlockFilterSet *BlockFilter =
nullptr);
548 bool canMoveBottomBlockToTop(
const MachineBasicBlock *BottomBlock,
549 const MachineBasicBlock *OldTop);
550 bool hasViableTopFallthrough(
const MachineBasicBlock *Top,
551 const BlockFilterSet &LoopBlockSet);
552 BlockFrequency TopFallThroughFreq(
const MachineBasicBlock *Top,
553 const BlockFilterSet &LoopBlockSet);
554 BlockFrequency FallThroughGains(
const MachineBasicBlock *NewTop,
555 const MachineBasicBlock *OldTop,
556 const MachineBasicBlock *ExitBB,
557 const BlockFilterSet &LoopBlockSet);
558 MachineBasicBlock *findBestLoopTopHelper(MachineBasicBlock *OldTop,
559 const MachineLoop &L,
560 const BlockFilterSet &LoopBlockSet);
561 MachineBasicBlock *findBestLoopTop(
const MachineLoop &L,
562 const BlockFilterSet &LoopBlockSet);
563 MachineBasicBlock *findBestLoopExit(
const MachineLoop &L,
564 const BlockFilterSet &LoopBlockSet,
565 BlockFrequency &ExitFreq);
566 BlockFilterSet collectLoopBlockSet(
const MachineLoop &L);
567 void buildLoopChains(
const MachineLoop &L);
568 void rotateLoop(BlockChain &LoopChain,
const MachineBasicBlock *ExitingBB,
569 BlockFrequency ExitFreq,
const BlockFilterSet &LoopBlockSet);
570 void rotateLoopWithProfile(BlockChain &LoopChain,
const MachineLoop &L,
571 const BlockFilterSet &LoopBlockSet);
572 void buildCFGChains();
573 void optimizeBranches();
577 bool shouldTailDuplicate(MachineBasicBlock *BB);
580 bool isProfitableToTailDup(
const MachineBasicBlock *BB,
581 const MachineBasicBlock *Succ,
582 BranchProbability QProb,
const BlockChain &Chain,
583 const BlockFilterSet *BlockFilter);
586 bool isTrellis(
const MachineBasicBlock *BB,
587 const SmallVectorImpl<MachineBasicBlock *> &ViableSuccs,
588 const BlockChain &Chain,
const BlockFilterSet *BlockFilter);
591 BlockAndTailDupResult getBestTrellisSuccessor(
592 const MachineBasicBlock *BB,
593 const SmallVectorImpl<MachineBasicBlock *> &ViableSuccs,
594 BranchProbability AdjustedSumProb,
const BlockChain &Chain,
595 const BlockFilterSet *BlockFilter);
598 static std::pair<WeightedEdge, WeightedEdge> getBestNonConflictingEdges(
599 const MachineBasicBlock *BB,
604 bool canTailDuplicateUnplacedPreds(
const MachineBasicBlock *BB,
605 MachineBasicBlock *Succ,
606 const BlockChain &Chain,
607 const BlockFilterSet *BlockFilter);
611 void precomputeTriangleChains();
614 void applyExtTsp(
bool OptForSize);
617 void assignBlockOrder(
const std::vector<const MachineBasicBlock *> &NewOrder);
620 void createCFGChainExtTsp();
623 MachineBlockPlacement(
const MachineBranchProbabilityInfo *MBPI,
624 MachineLoopInfo *MLI, ProfileSummaryInfo *PSI,
625 std::unique_ptr<MBFIWrapper> MBFI,
626 MachinePostDominatorTree *MPDT,
bool AllowTailMerge)
627 : MBPI(MBPI), MBFI(std::
move(MBFI)), MLI(MLI), MPDT(MPDT), PSI(PSI),
628 AllowTailMerge(AllowTailMerge) {};
630 bool run(MachineFunction &F);
632 static bool allowTailDupPlacement(MachineFunction &MF) {
641 MachineBlockPlacementLegacy() : MachineFunctionPass(ID) {}
643 bool runOnMachineFunction(MachineFunction &MF)
override {
648 &getAnalysis<MachineBranchProbabilityInfoWrapperPass>().getMBPI();
649 auto MBFI = std::make_unique<MBFIWrapper>(
650 getAnalysis<MachineBlockFrequencyInfoWrapperPass>().getMBFI());
651 auto *MLI = &getAnalysis<MachineLoopInfoWrapperPass>().getLI();
652 auto *MPDT = MachineBlockPlacement::allowTailDupPlacement(MF)
653 ? &getAnalysis<MachinePostDominatorTreeWrapperPass>()
656 auto *PSI = &getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI();
657 auto *PassConfig = &getAnalysis<TargetPassConfig>();
658 bool AllowTailMerge = PassConfig->getEnableTailMerge();
659 return MachineBlockPlacement(MBPI, MLI, PSI, std::move(MBFI), MPDT,
664 void getAnalysisUsage(AnalysisUsage &AU)
const override {
665 AU.
addRequired<MachineBranchProbabilityInfoWrapperPass>();
666 AU.
addRequired<MachineBlockFrequencyInfoWrapperPass>();
668 AU.
addRequired<MachinePostDominatorTreeWrapperPass>();
678char MachineBlockPlacementLegacy::ID = 0;
683 "Branch Probability Basic Block Placement",
false,
false)
700 OS <<
" ('" << BB->
getName() <<
"')";
711void MachineBlockPlacement::markChainSuccessors(
713 const BlockFilterSet *BlockFilter) {
716 for (MachineBasicBlock *
MBB : Chain) {
717 markBlockSuccessors(Chain,
MBB, LoopHeaderBB, BlockFilter);
727void MachineBlockPlacement::markBlockSuccessors(
728 const BlockChain &Chain,
const MachineBasicBlock *
MBB,
729 const MachineBasicBlock *LoopHeaderBB,
const BlockFilterSet *BlockFilter) {
735 if (BlockFilter && !BlockFilter->count(Succ))
737 BlockChain &SuccChain = *BlockToChain[Succ];
739 if (&Chain == &SuccChain || Succ == LoopHeaderBB)
744 if (SuccChain.UnscheduledPredecessors == 0 ||
745 --SuccChain.UnscheduledPredecessors > 0)
748 auto *NewBB = *SuccChain.begin();
749 if (NewBB->isEHPad())
760BranchProbability MachineBlockPlacement::collectViableSuccessors(
761 const MachineBasicBlock *BB,
const BlockChain &Chain,
762 const BlockFilterSet *BlockFilter,
763 SmallVector<MachineBasicBlock *, 4> &Successors) {
781 for (MachineBasicBlock *Succ : BB->
successors()) {
782 bool SkipSucc =
false;
783 if (Succ->isEHPad() || (BlockFilter && !BlockFilter->count(Succ))) {
786 BlockChain *SuccChain = BlockToChain[Succ];
787 if (SuccChain == &Chain) {
789 }
else if (Succ != *SuccChain->begin()) {
791 <<
" -> Mid chain!\n");
801 return AdjustedSumProb;
806static BranchProbability
812 if (SuccProbN >= SuccProbD)
827 if (Successors.
count(&BB))
830 if (!Successors.
count(Succ))
838bool MachineBlockPlacement::shouldTailDuplicate(MachineBasicBlock *BB) {
859 return (Gain / ThresholdProb) >= EntryFreq;
867bool MachineBlockPlacement::isProfitableToTailDup(
868 const MachineBasicBlock *BB,
const MachineBasicBlock *Succ,
869 BranchProbability QProb,
const BlockChain &Chain,
870 const BlockFilterSet *BlockFilter) {
894 MachineBasicBlock *PDom =
nullptr;
895 SmallVector<MachineBasicBlock *, 4> SuccSuccs;
897 auto AdjustedSuccSumProb =
898 collectViableSuccessors(Succ, Chain, BlockFilter, SuccSuccs);
900 auto BBFreq = MBFI->getBlockFreq(BB);
901 auto SuccFreq = MBFI->getBlockFreq(Succ);
902 BlockFrequency
P =
BBFreq * PProb;
903 BlockFrequency Qout =
BBFreq * QProb;
904 BlockFrequency EntryFreq = MBFI->getEntryFreq();
907 if (SuccSuccs.
size() == 0)
912 for (MachineBasicBlock *SuccSucc : SuccSuccs) {
914 if (Prob > BestSuccSucc)
924 auto SuccBestPred = BlockFrequency(0);
925 for (MachineBasicBlock *SuccPred : Succ->
predecessors()) {
926 if (SuccPred == Succ || SuccPred == BB ||
927 BlockToChain[SuccPred] == &Chain ||
928 (BlockFilter && !BlockFilter->count(SuccPred)))
932 if (Freq > SuccBestPred)
936 BlockFrequency Qin = SuccBestPred;
957 BranchProbability UProb = BestSuccSucc;
958 BranchProbability VProb = AdjustedSuccSumProb - UProb;
959 BlockFrequency
F = SuccFreq - Qin;
960 BlockFrequency
V = SuccFreq * VProb;
961 BlockFrequency QinU = std::min(Qin,
F) * UProb;
962 BlockFrequency BaseCost =
P +
V;
963 BlockFrequency DupCost = Qout + QinU + std::max(Qin,
F) * VProb;
967 BranchProbability VProb = AdjustedSuccSumProb - UProb;
968 BlockFrequency
U = SuccFreq * UProb;
969 BlockFrequency
V = SuccFreq * VProb;
970 BlockFrequency
F = SuccFreq - Qin;
1000 if (UProb > AdjustedSuccSumProb / 2 &&
1001 !hasBetterLayoutPredecessor(Succ, PDom, *BlockToChain[PDom], UProb, UProb,
1002 Chain, BlockFilter))
1005 (
P + V), (Qout + std::max(Qin,
F) * VProb + std::min(Qin,
F) * UProb),
1009 (Qout + std::min(Qin,
F) * AdjustedSuccSumProb +
1010 std::max(Qin,
F) * UProb),
1021bool MachineBlockPlacement::isTrellis(
1022 const MachineBasicBlock *BB,
1023 const SmallVectorImpl<MachineBasicBlock *> &ViableSuccs,
1024 const BlockChain &Chain,
const BlockFilterSet *BlockFilter) {
1033 SmallPtrSet<const MachineBasicBlock *, 8> SeenPreds;
1035 for (MachineBasicBlock *Succ : ViableSuccs) {
1044 if (Successors.count(SuccPred)) {
1046 for (MachineBasicBlock *CheckSucc : SuccPred->successors())
1047 if (!Successors.count(CheckSucc))
1051 const BlockChain *PredChain = BlockToChain[SuccPred];
1052 if (SuccPred == BB || (BlockFilter && !BlockFilter->count(SuccPred)) ||
1053 PredChain == &Chain || PredChain == BlockToChain[Succ])
1057 if (!SeenPreds.
insert(SuccPred).second)
1075std::pair<MachineBlockPlacement::WeightedEdge,
1076 MachineBlockPlacement::WeightedEdge>
1077MachineBlockPlacement::getBestNonConflictingEdges(
1078 const MachineBasicBlock *BB,
1088 auto Cmp = [](WeightedEdge
A, WeightedEdge
B) {
return A.Weight >
B.Weight; };
1092 auto BestA = Edges[0].begin();
1093 auto BestB = Edges[1].begin();
1096 if (BestA->Src == BestB->Src) {
1098 auto SecondBestA = std::next(BestA);
1099 auto SecondBestB = std::next(BestB);
1100 BlockFrequency BestAScore = BestA->Weight + SecondBestB->Weight;
1101 BlockFrequency BestBScore = BestB->Weight + SecondBestA->Weight;
1102 if (BestAScore < BestBScore)
1103 BestA = SecondBestA;
1105 BestB = SecondBestB;
1108 if (BestB->Src == BB)
1110 return std::make_pair(*BestA, *BestB);
1120MachineBlockPlacement::BlockAndTailDupResult
1121MachineBlockPlacement::getBestTrellisSuccessor(
1122 const MachineBasicBlock *BB,
1123 const SmallVectorImpl<MachineBasicBlock *> &ViableSuccs,
1124 BranchProbability AdjustedSumProb,
const BlockChain &Chain,
1125 const BlockFilterSet *BlockFilter) {
1127 BlockAndTailDupResult
Result = {
nullptr,
false};
1134 if (Successors.
size() != 2 || ViableSuccs.
size() != 2)
1140 for (
auto *Succ : ViableSuccs) {
1141 for (MachineBasicBlock *SuccPred : Succ->predecessors()) {
1143 if (SuccPred != BB) {
1144 if (BlockFilter && !BlockFilter->count(SuccPred))
1146 const BlockChain *SuccPredChain = BlockToChain[SuccPred];
1147 if (SuccPredChain == &Chain || SuccPredChain == BlockToChain[Succ])
1150 BlockFrequency EdgeFreq = MBFI->getBlockFreq(SuccPred) *
1152 Edges[SuccIndex].
push_back({EdgeFreq, SuccPred, Succ});
1158 WeightedEdge BestA, BestB;
1159 std::tie(BestA, BestB) = getBestNonConflictingEdges(BB, Edges);
1161 if (BestA.Src != BB) {
1165 LLVM_DEBUG(
dbgs() <<
"Trellis, but not one of the chosen edges.\n");
1172 if (BestA.Dest == BestB.Src) {
1175 MachineBasicBlock *Succ1 = BestA.Dest;
1176 MachineBasicBlock *Succ2 = BestB.Dest;
1178 if (allowTailDupPlacement(*
F) && shouldTailDuplicate(Succ2) &&
1179 canTailDuplicateUnplacedPreds(BB, Succ2, Chain, BlockFilter) &&
1181 Chain, BlockFilter)) {
1185 <<
", probability: " << Succ2Prob
1186 <<
" (Tail Duplicate)\n");
1188 Result.ShouldTailDup =
true;
1194 ComputedEdges[BestB.Src] = {BestB.Dest,
false};
1196 auto TrellisSucc = BestA.Dest;
1200 <<
", probability: " << SuccProb <<
" (Trellis)\n");
1208bool MachineBlockPlacement::canTailDuplicateUnplacedPreds(
1209 const MachineBasicBlock *BB, MachineBasicBlock *Succ,
1210 const BlockChain &Chain,
const BlockFilterSet *BlockFilter) {
1211 if (!shouldTailDuplicate(Succ))
1215 bool Duplicate =
true;
1217 unsigned int NumDup = 0;
1226 if (Pred == BB || (BlockFilter && !BlockFilter->count(Pred)) ||
1227 (BlockToChain[Pred] == &Chain && !Succ->
succ_empty()))
1277 if (
F->getFunction().hasProfileData())
1308 if ((NumDup > Succ->
succ_size()) || !Duplicate)
1331void MachineBlockPlacement::precomputeTriangleChains() {
1332 struct TriangleChain {
1333 std::vector<MachineBasicBlock *> Edges;
1335 TriangleChain(MachineBasicBlock *src, MachineBasicBlock *dst)
1336 : Edges({src, dst}) {}
1338 void append(MachineBasicBlock *dst) {
1339 assert(getKey()->isSuccessor(dst) &&
1340 "Attempting to append a block that is not a successor.");
1341 Edges.push_back(dst);
1344 unsigned count()
const {
return Edges.size() - 1; }
1346 MachineBasicBlock *getKey()
const {
return Edges.back(); }
1355 DenseMap<const MachineBasicBlock *, TriangleChain> TriangleChainMap;
1356 for (MachineBasicBlock &BB : *
F) {
1360 MachineBasicBlock *PDom =
nullptr;
1361 for (MachineBasicBlock *Succ : BB.
successors()) {
1369 if (PDom ==
nullptr)
1376 if (!shouldTailDuplicate(PDom))
1378 bool CanTailDuplicate =
true;
1385 CanTailDuplicate =
false;
1391 if (!CanTailDuplicate)
1398 auto Found = TriangleChainMap.
find(&BB);
1401 if (Found != TriangleChainMap.
end()) {
1402 TriangleChain Chain = std::move(Found->second);
1403 TriangleChainMap.
erase(Found);
1405 TriangleChainMap.
insert(std::make_pair(Chain.getKey(), std::move(Chain)));
1407 auto InsertResult = TriangleChainMap.
try_emplace(PDom, &BB, PDom);
1408 assert(InsertResult.second &&
"Block seen twice.");
1416 for (
auto &ChainPair : TriangleChainMap) {
1417 TriangleChain &Chain = ChainPair.second;
1423 MachineBasicBlock *dst = Chain.Edges.back();
1424 Chain.Edges.pop_back();
1425 for (MachineBasicBlock *src :
reverse(Chain.Edges)) {
1428 <<
" as pre-computed based on triangles.\n");
1430 auto InsertResult = ComputedEdges.
insert({src, {dst,
true}});
1431 assert(InsertResult.second &&
"Block seen twice.");
1441static BranchProbability
1473bool MachineBlockPlacement::hasBetterLayoutPredecessor(
1474 const MachineBasicBlock *BB,
const MachineBasicBlock *Succ,
1475 const BlockChain &SuccChain, BranchProbability SuccProb,
1476 BranchProbability RealSuccProb,
const BlockChain &Chain,
1477 const BlockFilterSet *BlockFilter) {
1480 if (SuccChain.UnscheduledPredecessors == 0)
1605 BlockFrequency CandidateEdgeFreq = MBFI->getBlockFreq(BB) * RealSuccProb;
1606 bool BadCFGConflict =
false;
1609 BlockChain *PredChain = BlockToChain[Pred];
1610 if (Pred == Succ || PredChain == &SuccChain ||
1611 (BlockFilter && !BlockFilter->count(Pred)) || PredChain == &Chain ||
1612 Pred != *std::prev(PredChain->end()) ||
1631 BlockFrequency PredEdgeFreq =
1633 if (PredEdgeFreq * HotProb >= CandidateEdgeFreq * HotProb.
getCompl()) {
1634 BadCFGConflict =
true;
1639 if (BadCFGConflict) {
1641 << SuccProb <<
" (prob) (non-cold CFG conflict)\n");
1658MachineBlockPlacement::BlockAndTailDupResult
1659MachineBlockPlacement::selectBestSuccessor(
const MachineBasicBlock *BB,
1660 const BlockChain &Chain,
1661 const BlockFilterSet *BlockFilter) {
1664 BlockAndTailDupResult BestSucc = {
nullptr,
false};
1667 SmallVector<MachineBasicBlock *, 4> Successors;
1668 auto AdjustedSumProb =
1669 collectViableSuccessors(BB, Chain, BlockFilter, Successors);
1676 auto FoundEdge = ComputedEdges.
find(BB);
1677 if (FoundEdge != ComputedEdges.
end()) {
1678 MachineBasicBlock *Succ = FoundEdge->second.BB;
1679 ComputedEdges.
erase(FoundEdge);
1680 BlockChain *SuccChain = BlockToChain[Succ];
1681 if (BB->
isSuccessor(Succ) && (!BlockFilter || BlockFilter->count(Succ)) &&
1682 SuccChain != &Chain && Succ == *SuccChain->begin())
1683 return FoundEdge->second;
1688 if (isTrellis(BB, Successors, Chain, BlockFilter))
1689 return getBestTrellisSuccessor(BB, Successors, AdjustedSumProb, Chain,
1697 for (MachineBasicBlock *Succ : Successors) {
1699 BranchProbability SuccProb =
1702 BlockChain &SuccChain = *BlockToChain[Succ];
1705 if (hasBetterLayoutPredecessor(BB, Succ, SuccChain, SuccProb, RealSuccProb,
1706 Chain, BlockFilter)) {
1708 if (allowTailDupPlacement(*
F) && shouldTailDuplicate(Succ))
1715 <<
", probability: " << SuccProb
1716 << (SuccChain.UnscheduledPredecessors != 0 ?
" (CFG break)" :
"")
1719 if (BestSucc.BB && BestProb >= SuccProb) {
1726 BestProb = SuccProb;
1734 [](std::tuple<BranchProbability, MachineBasicBlock *> L,
1735 std::tuple<BranchProbability, MachineBasicBlock *> R) {
1736 return std::get<0>(L) > std::get<0>(R);
1738 for (
auto &Tup : DupCandidates) {
1739 BranchProbability DupProb;
1740 MachineBasicBlock *Succ;
1741 std::tie(DupProb, Succ) = Tup;
1742 if (DupProb < BestProb)
1744 if (canTailDuplicateUnplacedPreds(BB, Succ, Chain, BlockFilter) &&
1745 (isProfitableToTailDup(BB, Succ, BestProb, Chain, BlockFilter))) {
1747 <<
", probability: " << DupProb
1748 <<
" (Tail Duplicate)\n");
1750 BestSucc.ShouldTailDup =
true;
1771MachineBasicBlock *MachineBlockPlacement::selectBestCandidateBlock(
1772 const BlockChain &Chain, SmallVectorImpl<MachineBasicBlock *> &WorkList) {
1778 return BlockToChain.
lookup(BB) == &Chain;
1781 if (WorkList.
empty())
1784 bool IsEHPad = WorkList[0]->isEHPad();
1786 MachineBasicBlock *BestBlock =
nullptr;
1787 BlockFrequency BestFreq;
1788 for (MachineBasicBlock *
MBB : WorkList) {
1790 "EHPad mismatch between block and work list.");
1792 BlockChain &SuccChain = *BlockToChain[
MBB];
1793 if (&SuccChain == &Chain)
1796 assert(SuccChain.UnscheduledPredecessors == 0 &&
1797 "Found CFG-violating block");
1799 BlockFrequency CandidateFreq = MBFI->getBlockFreq(
MBB);
1822 if (BestBlock && (IsEHPad ^ (BestFreq >= CandidateFreq)))
1826 BestFreq = CandidateFreq;
1839MachineBasicBlock *MachineBlockPlacement::getFirstUnplacedBlock(
1840 const BlockChain &PlacedChain,
1845 if (BlockChain *Chain = BlockToChain[&*
I]; Chain != &PlacedChain) {
1846 PrevUnplacedBlockIt =
I;
1850 return *Chain->begin();
1866MachineBasicBlock *MachineBlockPlacement::getFirstUnplacedBlock(
1867 const BlockChain &PlacedChain,
1868 BlockFilterSet::iterator &PrevUnplacedBlockInFilterIt,
1869 const BlockFilterSet *BlockFilter) {
1871 for (; PrevUnplacedBlockInFilterIt != BlockFilter->end();
1872 ++PrevUnplacedBlockInFilterIt) {
1873 BlockChain *
C = BlockToChain[*PrevUnplacedBlockInFilterIt];
1874 if (
C != &PlacedChain) {
1881void MachineBlockPlacement::fillWorkLists(
1882 const MachineBasicBlock *
MBB, SmallPtrSetImpl<BlockChain *> &UpdatedPreds,
1883 const BlockFilterSet *BlockFilter =
nullptr) {
1884 BlockChain &Chain = *BlockToChain[
MBB];
1885 if (!UpdatedPreds.
insert(&Chain).second)
1889 Chain.UnscheduledPredecessors == 0 &&
1890 "Attempting to place block with unscheduled predecessors in worklist.");
1891 for (MachineBasicBlock *ChainBB : Chain) {
1892 assert(BlockToChain[ChainBB] == &Chain &&
1893 "Block in chain doesn't match BlockToChain map.");
1894 for (MachineBasicBlock *Pred : ChainBB->predecessors()) {
1895 if (BlockFilter && !BlockFilter->count(Pred))
1897 if (BlockToChain[Pred] == &Chain)
1899 ++Chain.UnscheduledPredecessors;
1903 if (Chain.UnscheduledPredecessors != 0)
1906 MachineBasicBlock *BB = *Chain.
begin();
1913void MachineBlockPlacement::buildChain(
const MachineBasicBlock *HeadBB,
1915 BlockFilterSet *BlockFilter) {
1916 assert(HeadBB &&
"BB must not be null.\n");
1917 assert(BlockToChain[HeadBB] == &Chain &&
"BlockToChainMap mis-match.\n");
1919 BlockFilterSet::iterator PrevUnplacedBlockInFilterIt;
1921 PrevUnplacedBlockInFilterIt = BlockFilter->begin();
1923 const MachineBasicBlock *LoopHeaderBB = HeadBB;
1924 markChainSuccessors(Chain, LoopHeaderBB, BlockFilter);
1925 MachineBasicBlock *BB = *std::prev(Chain.end());
1927 assert(BB &&
"null block found at end of chain in loop.");
1928 assert(BlockToChain[BB] == &Chain &&
"BlockToChainMap mis-match in loop.");
1929 assert(*std::prev(Chain.end()) == BB &&
"BB Not found at end of chain.");
1933 auto Result = selectBestSuccessor(BB, Chain, BlockFilter);
1934 MachineBasicBlock *BestSucc =
Result.BB;
1935 bool ShouldTailDup =
Result.ShouldTailDup;
1936 if (allowTailDupPlacement(*
F))
1937 ShouldTailDup |= (BestSucc && canTailDuplicateUnplacedPreds(
1938 BB, BestSucc, Chain, BlockFilter));
1944 BestSucc = selectBestCandidateBlock(Chain, BlockWorkList);
1946 BestSucc = selectBestCandidateBlock(Chain, EHPadWorkList);
1950 BestSucc = getFirstUnplacedBlock(Chain, PrevUnplacedBlockInFilterIt,
1953 BestSucc = getFirstUnplacedBlock(Chain, PrevUnplacedBlockIt);
1957 LLVM_DEBUG(
dbgs() <<
"Unnatural loop CFG detected, forcibly merging the "
1958 "layout successor until the CFG reduces\n");
1963 if (allowTailDupPlacement(*
F) && BestSucc && ShouldTailDup) {
1964 repeatedlyTailDuplicateBlock(BestSucc, BB, LoopHeaderBB, Chain,
1965 BlockFilter, PrevUnplacedBlockIt,
1966 PrevUnplacedBlockInFilterIt);
1974 BlockChain &SuccChain = *BlockToChain[BestSucc];
1977 SuccChain.UnscheduledPredecessors = 0;
1980 markChainSuccessors(SuccChain, LoopHeaderBB, BlockFilter);
1981 Chain.merge(BestSucc, &SuccChain);
1982 BB = *std::prev(Chain.end());
2003bool MachineBlockPlacement::canMoveBottomBlockToTop(
2004 const MachineBasicBlock *BottomBlock,
const MachineBasicBlock *OldTop) {
2007 MachineBasicBlock *Pred = *BottomBlock->
pred_begin();
2011 MachineBasicBlock *OtherBB = *Pred->
succ_begin();
2012 if (OtherBB == BottomBlock)
2014 if (OtherBB == OldTop)
2022MachineBlockPlacement::TopFallThroughFreq(
const MachineBasicBlock *Top,
2023 const BlockFilterSet &LoopBlockSet) {
2024 BlockFrequency MaxFreq = BlockFrequency(0);
2026 BlockChain *PredChain = BlockToChain[Pred];
2027 if (!LoopBlockSet.count(Pred) &&
2028 (!PredChain || Pred == *std::prev(PredChain->end()))) {
2033 for (MachineBasicBlock *Succ : Pred->
successors()) {
2035 BlockChain *SuccChain = BlockToChain[Succ];
2038 if (!LoopBlockSet.count(Succ) && (SuccProb > TopProb) &&
2039 (!SuccChain || Succ == *SuccChain->begin())) {
2045 BlockFrequency EdgeFreq =
2047 if (EdgeFreq > MaxFreq)
2076BlockFrequency MachineBlockPlacement::FallThroughGains(
2077 const MachineBasicBlock *NewTop,
const MachineBasicBlock *OldTop,
2078 const MachineBasicBlock *ExitBB,
const BlockFilterSet &LoopBlockSet) {
2079 BlockFrequency FallThrough2Top = TopFallThroughFreq(OldTop, LoopBlockSet);
2080 BlockFrequency FallThrough2Exit = BlockFrequency(0);
2084 BlockFrequency BackEdgeFreq =
2088 MachineBasicBlock *BestPred =
nullptr;
2089 BlockFrequency FallThroughFromPred = BlockFrequency(0);
2090 for (MachineBasicBlock *Pred : NewTop->
predecessors()) {
2091 if (!LoopBlockSet.count(Pred))
2093 BlockChain *PredChain = BlockToChain[Pred];
2094 if (!PredChain || Pred == *std::prev(PredChain->end())) {
2095 BlockFrequency EdgeFreq =
2097 if (EdgeFreq > FallThroughFromPred) {
2098 FallThroughFromPred = EdgeFreq;
2106 BlockFrequency NewFreq = BlockFrequency(0);
2108 for (MachineBasicBlock *Succ : BestPred->
successors()) {
2109 if ((Succ == NewTop) || (Succ == BestPred) || !LoopBlockSet.count(Succ))
2113 BlockChain *SuccChain = BlockToChain[Succ];
2114 if ((SuccChain && (Succ != *SuccChain->begin())) ||
2115 (SuccChain == BlockToChain[BestPred]))
2117 BlockFrequency EdgeFreq = MBFI->getBlockFreq(BestPred) *
2119 if (EdgeFreq > NewFreq)
2122 BlockFrequency OrigEdgeFreq = MBFI->getBlockFreq(BestPred) *
2124 if (NewFreq > OrigEdgeFreq) {
2128 NewFreq = BlockFrequency(0);
2129 FallThroughFromPred = BlockFrequency(0);
2133 BlockFrequency
Result = BlockFrequency(0);
2134 BlockFrequency Gains = BackEdgeFreq + NewFreq;
2135 BlockFrequency Lost =
2136 FallThrough2Top + FallThrough2Exit + FallThroughFromPred;
2164MachineBasicBlock *MachineBlockPlacement::findBestLoopTopHelper(
2165 MachineBasicBlock *OldTop,
const MachineLoop &L,
2166 const BlockFilterSet &LoopBlockSet) {
2170 BlockChain &HeaderChain = *BlockToChain[OldTop];
2171 if (!LoopBlockSet.count(*HeaderChain.begin()))
2173 if (OldTop != *HeaderChain.begin())
2179 BlockFrequency BestGains = BlockFrequency(0);
2180 MachineBasicBlock *BestPred =
nullptr;
2181 for (MachineBasicBlock *Pred : OldTop->
predecessors()) {
2182 if (!LoopBlockSet.count(Pred))
2184 if (Pred ==
L.getHeader())
2192 MachineBasicBlock *OtherBB =
nullptr;
2195 if (OtherBB == OldTop)
2199 if (!canMoveBottomBlockToTop(Pred, OldTop))
2202 BlockFrequency Gains =
2203 FallThroughGains(Pred, OldTop, OtherBB, LoopBlockSet);
2204 if ((Gains > BlockFrequency(0)) &&
2205 (Gains > BestGains ||
2220 (*BestPred->
pred_begin())->succ_size() == 1 &&
2233MachineBlockPlacement::findBestLoopTop(
const MachineLoop &L,
2234 const BlockFilterSet &LoopBlockSet) {
2243 return L.getHeader();
2245 MachineBasicBlock *OldTop =
nullptr;
2246 MachineBasicBlock *NewTop =
L.getHeader();
2247 while (NewTop != OldTop) {
2249 NewTop = findBestLoopTopHelper(OldTop, L, LoopBlockSet);
2250 if (NewTop != OldTop)
2251 ComputedEdges[NewTop] = {OldTop,
false};
2262MachineBlockPlacement::findBestLoopExit(
const MachineLoop &L,
2263 const BlockFilterSet &LoopBlockSet,
2264 BlockFrequency &ExitFreq) {
2273 BlockChain &HeaderChain = *BlockToChain[
L.getHeader()];
2274 if (!LoopBlockSet.count(*HeaderChain.begin()))
2277 BlockFrequency BestExitEdgeFreq;
2278 unsigned BestExitLoopDepth = 0;
2279 MachineBasicBlock *ExitingBB =
nullptr;
2283 SmallPtrSet<MachineBasicBlock *, 4> BlocksExitingToOuterLoop;
2287 for (MachineBasicBlock *
MBB :
L.getBlocks()) {
2288 BlockChain &Chain = *BlockToChain[
MBB];
2291 if (
MBB != *std::prev(Chain.end()))
2298 MachineBasicBlock *OldExitingBB = ExitingBB;
2299 BlockFrequency OldBestExitEdgeFreq = BestExitEdgeFreq;
2300 bool HasLoopingSucc =
false;
2306 BlockChain &SuccChain = *BlockToChain[Succ];
2308 if (&Chain == &SuccChain) {
2315 if (LoopBlockSet.count(Succ)) {
2318 HasLoopingSucc =
true;
2322 unsigned SuccLoopDepth = 0;
2323 if (MachineLoop *ExitLoop = MLI->
getLoopFor(Succ)) {
2324 SuccLoopDepth = ExitLoop->getLoopDepth();
2325 if (ExitLoop->contains(&L))
2329 BlockFrequency ExitEdgeFreq = MBFI->getBlockFreq(
MBB) * SuccProb;
2332 <<
getBlockName(Succ) <<
" [L:" << SuccLoopDepth <<
"] ("
2339 if (!ExitingBB || SuccLoopDepth > BestExitLoopDepth ||
2340 ExitEdgeFreq > BestExitEdgeFreq ||
2342 !(ExitEdgeFreq < BestExitEdgeFreq * Bias))) {
2343 BestExitEdgeFreq = ExitEdgeFreq;
2348 if (!HasLoopingSucc) {
2350 ExitingBB = OldExitingBB;
2351 BestExitEdgeFreq = OldBestExitEdgeFreq;
2358 dbgs() <<
" No other candidate exit blocks, using loop header\n");
2361 if (
L.getNumBlocks() == 1) {
2362 LLVM_DEBUG(
dbgs() <<
" Loop has 1 block, using loop header as exit\n");
2369 if (!BlocksExitingToOuterLoop.
empty() &&
2370 !BlocksExitingToOuterLoop.
count(ExitingBB))
2375 ExitFreq = BestExitEdgeFreq;
2383bool MachineBlockPlacement::hasViableTopFallthrough(
2384 const MachineBasicBlock *Top,
const BlockFilterSet &LoopBlockSet) {
2386 BlockChain *PredChain = BlockToChain[Pred];
2387 if (!LoopBlockSet.count(Pred) &&
2388 (!PredChain || Pred == *std::prev(PredChain->end()))) {
2393 for (MachineBasicBlock *Succ : Pred->
successors()) {
2395 BlockChain *SuccChain = BlockToChain[Succ];
2398 if ((!SuccChain || Succ == *SuccChain->begin()) && SuccProb > TopProb) {
2416void MachineBlockPlacement::rotateLoop(BlockChain &LoopChain,
2417 const MachineBasicBlock *ExitingBB,
2418 BlockFrequency ExitFreq,
2419 const BlockFilterSet &LoopBlockSet) {
2423 MachineBasicBlock *Top = *LoopChain.begin();
2424 MachineBasicBlock *Bottom = *std::prev(LoopChain.end());
2427 if (Bottom == ExitingBB)
2434 bool ViableTopFallthrough = hasViableTopFallthrough(Top, LoopBlockSet);
2439 if (ViableTopFallthrough) {
2440 for (MachineBasicBlock *Succ : Bottom->
successors()) {
2441 BlockChain *SuccChain = BlockToChain[Succ];
2442 if (!LoopBlockSet.count(Succ) &&
2443 (!SuccChain || Succ == *SuccChain->begin()))
2449 BlockFrequency FallThrough2Top = TopFallThroughFreq(Top, LoopBlockSet);
2450 if (FallThrough2Top >= ExitFreq)
2454 BlockChain::iterator ExitIt =
llvm::find(LoopChain, ExitingBB);
2455 if (ExitIt == LoopChain.end())
2477 if (ViableTopFallthrough) {
2478 assert(std::next(ExitIt) != LoopChain.end() &&
2479 "Exit should not be last BB");
2480 MachineBasicBlock *NextBlockInChain = *std::next(ExitIt);
2488 std::rotate(LoopChain.begin(), std::next(ExitIt), LoopChain.end());
2504void MachineBlockPlacement::rotateLoopWithProfile(
2505 BlockChain &LoopChain,
const MachineLoop &L,
2506 const BlockFilterSet &LoopBlockSet) {
2507 auto RotationPos = LoopChain.end();
2508 MachineBasicBlock *ChainHeaderBB = *LoopChain.begin();
2518 auto ScaleBlockFrequency = [](BlockFrequency Freq,
2519 unsigned Scale) -> BlockFrequency {
2521 return BlockFrequency(0);
2524 return Freq / BranchProbability(1, Scale);
2530 BlockFrequency HeaderFallThroughCost(0);
2532 BlockChain *PredChain = BlockToChain[Pred];
2533 if (!LoopBlockSet.count(Pred) &&
2534 (!PredChain || Pred == *std::prev(PredChain->end()))) {
2535 auto EdgeFreq = MBFI->getBlockFreq(Pred) *
2537 auto FallThruCost = ScaleBlockFrequency(EdgeFreq,
MisfetchCost);
2541 FallThruCost += ScaleBlockFrequency(EdgeFreq,
JumpInstCost);
2542 HeaderFallThroughCost = std::max(HeaderFallThroughCost, FallThruCost);
2551 for (
auto *BB : LoopChain) {
2554 BlockChain *SuccChain = BlockToChain[Succ];
2555 if (!LoopBlockSet.count(Succ) &&
2556 (!SuccChain || Succ == *SuccChain->begin())) {
2558 LargestExitEdgeProb = std::max(LargestExitEdgeProb, SuccProb);
2562 auto ExitFreq = MBFI->getBlockFreq(BB) * LargestExitEdgeProb;
2570 for (
auto Iter = LoopChain.begin(), TailIter = std::prev(LoopChain.end()),
2571 EndIter = LoopChain.end();
2572 Iter != EndIter; Iter++, TailIter++) {
2575 if (TailIter == LoopChain.end())
2576 TailIter = LoopChain.begin();
2578 auto TailBB = *TailIter;
2581 BlockFrequency
Cost = BlockFrequency(0);
2586 if (Iter != LoopChain.begin())
2587 Cost += HeaderFallThroughCost;
2591 for (
auto &ExitWithFreq : ExitsWithFreq)
2592 if (TailBB != ExitWithFreq.first)
2593 Cost += ExitWithFreq.second;
2609 if (TailBB->isSuccessor(*Iter)) {
2610 auto TailBBFreq = MBFI->getBlockFreq(TailBB);
2611 if (TailBB->succ_size() == 1)
2613 else if (TailBB->succ_size() == 2) {
2615 auto TailToHeadFreq = TailBBFreq * TailToHeadProb;
2616 auto ColderEdgeFreq = TailToHeadProb > BranchProbability(1, 2)
2617 ? TailBBFreq * TailToHeadProb.
getCompl()
2628 if (
Cost < SmallestRotationCost) {
2629 SmallestRotationCost =
Cost;
2634 if (RotationPos != LoopChain.end()) {
2636 <<
" to the top\n");
2637 std::rotate(LoopChain.begin(), RotationPos, LoopChain.end());
2645MachineBlockPlacement::BlockFilterSet
2646MachineBlockPlacement::collectLoopBlockSet(
const MachineLoop &L) {
2650 bool operator()(
const MachineBasicBlock *
X,
2651 const MachineBasicBlock *
Y)
const {
2652 return X->getNumber() <
Y->getNumber();
2655 std::set<const MachineBasicBlock *, MBBCompare> LoopBlockSet;
2667 BlockFrequency LoopFreq(0);
2668 for (
auto *LoopPred :
L.getHeader()->predecessors())
2669 if (!
L.contains(LoopPred))
2670 LoopFreq += MBFI->getBlockFreq(LoopPred) *
2673 for (MachineBasicBlock *LoopBB :
L.getBlocks()) {
2674 if (LoopBlockSet.count(LoopBB))
2679 BlockChain *Chain = BlockToChain[LoopBB];
2680 for (MachineBasicBlock *ChainBB : *Chain)
2681 LoopBlockSet.insert(ChainBB);
2684 LoopBlockSet.insert(
L.block_begin(),
L.block_end());
2689 BlockFilterSet Ret(LoopBlockSet.begin(), LoopBlockSet.end());
2699void MachineBlockPlacement::buildLoopChains(
const MachineLoop &L) {
2702 for (
const MachineLoop *InnerLoop : L)
2703 buildLoopChains(*InnerLoop);
2706 "BlockWorkList not empty when starting to build loop chains.");
2708 "EHPadWorkList not empty when starting to build loop chains.");
2709 BlockFilterSet LoopBlockSet = collectLoopBlockSet(L);
2714 bool RotateLoopWithProfile =
2722 MachineBasicBlock *LoopTop = findBestLoopTop(L, LoopBlockSet);
2730 PreferredLoopExit =
nullptr;
2731 BlockFrequency ExitFreq;
2732 if (!RotateLoopWithProfile && LoopTop ==
L.getHeader())
2733 PreferredLoopExit = findBestLoopExit(L, LoopBlockSet, ExitFreq);
2735 BlockChain &LoopChain = *BlockToChain[LoopTop];
2740 SmallPtrSet<BlockChain *, 4> UpdatedPreds;
2741 assert(LoopChain.UnscheduledPredecessors == 0 &&
2742 "LoopChain should not have unscheduled predecessors.");
2743 UpdatedPreds.
insert(&LoopChain);
2745 for (
const MachineBasicBlock *LoopBB : LoopBlockSet)
2746 fillWorkLists(LoopBB, UpdatedPreds, &LoopBlockSet);
2748 buildChain(LoopTop, LoopChain, &LoopBlockSet);
2750 if (RotateLoopWithProfile)
2751 rotateLoopWithProfile(LoopChain, L, LoopBlockSet);
2753 rotateLoop(LoopChain, PreferredLoopExit, ExitFreq, LoopBlockSet);
2757 bool BadLoop =
false;
2758 if (LoopChain.UnscheduledPredecessors) {
2760 dbgs() <<
"Loop chain contains a block without its preds placed!\n"
2761 <<
" Loop header: " <<
getBlockName(*
L.block_begin()) <<
"\n"
2762 <<
" Chain header: " <<
getBlockName(*LoopChain.begin()) <<
"\n";
2764 for (MachineBasicBlock *ChainBB : LoopChain) {
2766 if (!LoopBlockSet.remove(ChainBB)) {
2770 dbgs() <<
"Loop chain contains a block not contained by the loop!\n"
2771 <<
" Loop header: " <<
getBlockName(*
L.block_begin()) <<
"\n"
2772 <<
" Chain header: " <<
getBlockName(*LoopChain.begin()) <<
"\n"
2777 if (!LoopBlockSet.empty()) {
2779 for (
const MachineBasicBlock *LoopBB : LoopBlockSet)
2780 dbgs() <<
"Loop contains blocks never placed into a chain!\n"
2781 <<
" Loop header: " <<
getBlockName(*
L.block_begin()) <<
"\n"
2782 <<
" Chain header: " <<
getBlockName(*LoopChain.begin()) <<
"\n"
2785 assert(!BadLoop &&
"Detected problems with the placement of this loop.");
2788 BlockWorkList.
clear();
2789 EHPadWorkList.
clear();
2792void MachineBlockPlacement::buildCFGChains() {
2798 MachineBasicBlock *BB = &*FI;
2800 new (ChainAllocator.
Allocate()) BlockChain(BlockToChain, BB);
2805 MachineBasicBlock *
TBB =
nullptr, *FBB =
nullptr;
2810 MachineBasicBlock *NextBB = &*NextFI;
2813 assert(NextFI != FE &&
"Can't fallthrough past the last block.");
2814 LLVM_DEBUG(
dbgs() <<
"Pre-merging due to unanalyzable fallthrough: "
2817 Chain->merge(NextBB,
nullptr);
2819 BlocksWithUnanalyzableExits.
insert(&*BB);
2827 PreferredLoopExit =
nullptr;
2828 for (MachineLoop *L : *MLI)
2829 buildLoopChains(*L);
2832 "BlockWorkList should be empty before building final chain.");
2834 "EHPadWorkList should be empty before building final chain.");
2836 SmallPtrSet<BlockChain *, 4> UpdatedPreds;
2837 for (MachineBasicBlock &
MBB : *
F)
2838 fillWorkLists(&
MBB, UpdatedPreds);
2840 BlockChain &FunctionChain = *BlockToChain[&
F->front()];
2841 buildChain(&
F->front(), FunctionChain);
2844 using FunctionBlockSetType = SmallPtrSet<MachineBasicBlock *, 16>;
2848 bool BadFunc =
false;
2849 FunctionBlockSetType FunctionBlockSet;
2850 for (MachineBasicBlock &
MBB : *
F)
2851 FunctionBlockSet.insert(&
MBB);
2853 for (MachineBasicBlock *ChainBB : FunctionChain)
2854 if (!FunctionBlockSet.erase(ChainBB)) {
2856 dbgs() <<
"Function chain contains a block not in the function!\n"
2860 if (!FunctionBlockSet.empty()) {
2862 for (MachineBasicBlock *RemainingBB : FunctionBlockSet)
2863 dbgs() <<
"Function contains blocks never placed into a chain!\n"
2864 <<
" Bad block: " <<
getBlockName(RemainingBB) <<
"\n";
2866 assert(!BadFunc &&
"Detected problems with the block placement.");
2871 SmallVector<MachineBasicBlock *, 4> OriginalLayoutSuccessors(
2872 F->getNumBlockIDs());
2874 MachineBasicBlock *LastMBB =
nullptr;
2875 for (
auto &
MBB : *
F) {
2876 if (LastMBB !=
nullptr)
2880 OriginalLayoutSuccessors[
F->back().getNumber()] =
nullptr;
2886 for (MachineBasicBlock *ChainBB : FunctionChain) {
2887 LLVM_DEBUG(
dbgs() << (ChainBB == *FunctionChain.begin() ?
"Placing chain "
2891 F->splice(InsertPos, ChainBB);
2896 if (ChainBB == *FunctionChain.begin())
2904 MachineBasicBlock *
TBB =
nullptr, *FBB =
nullptr;
2907 if (!BlocksWithUnanalyzableExits.
count(PrevBB)) {
2913 "Unexpected block with un-analyzable fallthrough!");
2915 TBB = FBB =
nullptr;
2947 MachineBasicBlock *
TBB =
nullptr, *FBB =
nullptr;
2949 MachineBasicBlock *PrevBB = &
F->
back();
2953 BlockWorkList.
clear();
2954 EHPadWorkList.
clear();
2957void MachineBlockPlacement::optimizeBranches() {
2958 BlockChain &FunctionChain = *BlockToChain[&
F->front()];
2967 for (MachineBasicBlock *ChainBB : FunctionChain) {
2969 MachineBasicBlock *
TBB =
nullptr, *FBB =
nullptr;
2990 auto Dl = ChainBB->findBranchDebugLoc();
2996void MachineBlockPlacement::alignBlocks() {
3003 if (
F->getFunction().hasMinSize() ||
3008 BlockChain &FunctionChain = *BlockToChain[&
F->front()];
3010 if (FunctionChain.begin() == FunctionChain.end())
3013 const BranchProbability ColdProb(1, 5);
3014 BlockFrequency EntryFreq = MBFI->getBlockFreq(&
F->front());
3015 BlockFrequency WeightedEntryFreq = EntryFreq * ColdProb;
3016 for (MachineBasicBlock *ChainBB : FunctionChain) {
3017 if (ChainBB == *FunctionChain.begin())
3024 MachineLoop *
L = MLI->getLoopFor(ChainBB);
3029 unsigned MDAlign = 1;
3030 MDNode *LoopID =
L->getLoopID();
3039 if (S->
getString() ==
"llvm.loop.align") {
3041 "per-loop align metadata should have two operands.");
3044 assert(MDAlign >= 1 &&
"per-loop align value must be positive.");
3050 const Align LoopAlign = std::max(TLIAlign,
Align(MDAlign));
3056 BlockFrequency Freq = MBFI->getBlockFreq(ChainBB);
3057 if (Freq < WeightedEntryFreq)
3062 MachineBasicBlock *LoopHeader =
L->getHeader();
3063 BlockFrequency LoopHeaderFreq = MBFI->getBlockFreq(LoopHeader);
3064 if (Freq < (LoopHeaderFreq * ColdProb))
3074 MachineBasicBlock *LayoutPred =
3077 auto DetermineMaxAlignmentPadding = [&]() {
3084 ChainBB->setMaxBytesForAlignment(MaxBytes);
3090 ChainBB->setAlignment(LoopAlign);
3091 DetermineMaxAlignmentPadding();
3099 BranchProbability LayoutProb =
3101 BlockFrequency LayoutEdgeFreq = MBFI->getBlockFreq(LayoutPred) * LayoutProb;
3102 if (LayoutEdgeFreq <= (Freq * ColdProb)) {
3103 ChainBB->setAlignment(LoopAlign);
3104 DetermineMaxAlignmentPadding();
3108 const bool HasMaxBytesOverride =
3113 for (MachineBasicBlock &
MBB : *
F) {
3114 if (HasMaxBytesOverride)
3123 for (
auto MBI = std::next(
F->begin()), MBE =
F->end(); MBI != MBE; ++MBI) {
3124 auto LayoutPred = std::prev(MBI);
3126 if (HasMaxBytesOverride)
3151bool MachineBlockPlacement::repeatedlyTailDuplicateBlock(
3152 MachineBasicBlock *BB, MachineBasicBlock *&LPred,
3153 const MachineBasicBlock *LoopHeaderBB, BlockChain &Chain,
3155 BlockFilterSet::iterator &PrevUnplacedBlockInFilterIt) {
3156 bool Removed, DuplicatedToLPred;
3157 bool DuplicatedToOriginalLPred;
3158 Removed = maybeTailDuplicateBlock(
3159 BB, LPred, Chain, BlockFilter, PrevUnplacedBlockIt,
3160 PrevUnplacedBlockInFilterIt, DuplicatedToLPred);
3163 DuplicatedToOriginalLPred = DuplicatedToLPred;
3168 while (DuplicatedToLPred && Removed) {
3169 MachineBasicBlock *DupBB, *DupPred;
3175 BlockChain::iterator ChainEnd = Chain.end();
3176 DupBB = *(--ChainEnd);
3178 if (ChainEnd == Chain.begin())
3180 DupPred = *std::prev(ChainEnd);
3181 Removed = maybeTailDuplicateBlock(
3182 DupBB, DupPred, Chain, BlockFilter, PrevUnplacedBlockIt,
3183 PrevUnplacedBlockInFilterIt, DuplicatedToLPred);
3190 LPred = *std::prev(Chain.end());
3191 if (DuplicatedToOriginalLPred)
3192 markBlockSuccessors(Chain, LPred, LoopHeaderBB, BlockFilter);
3209bool MachineBlockPlacement::maybeTailDuplicateBlock(
3210 MachineBasicBlock *BB, MachineBasicBlock *LPred, BlockChain &Chain,
3212 BlockFilterSet::iterator &PrevUnplacedBlockInFilterIt,
3213 bool &DuplicatedToLPred) {
3214 DuplicatedToLPred =
false;
3215 if (!shouldTailDuplicate(BB))
3223 bool Removed =
false;
3224 auto RemovalCallback = [&](MachineBasicBlock *RemBB) {
3229 if (
auto It = BlockToChain.
find(RemBB); It != BlockToChain.
end()) {
3230 It->second->remove(RemBB);
3231 BlockToChain.
erase(It);
3235 if (&(*PrevUnplacedBlockIt) == RemBB) {
3236 PrevUnplacedBlockIt++;
3240 if (RemBB->isEHPad()) {
3251 if (It != BlockFilter->end()) {
3252 if (It < PrevUnplacedBlockInFilterIt) {
3253 const MachineBasicBlock *PrevBB = *PrevUnplacedBlockInFilterIt;
3256 auto Distance = PrevUnplacedBlockInFilterIt - It - 1;
3257 PrevUnplacedBlockInFilterIt = BlockFilter->
erase(It) + Distance;
3258 assert(*PrevUnplacedBlockInFilterIt == PrevBB);
3260 }
else if (It == PrevUnplacedBlockInFilterIt)
3263 PrevUnplacedBlockInFilterIt = BlockFilter->erase(It);
3265 BlockFilter->erase(It);
3270 MLI->removeBlock(RemBB);
3271 if (RemBB == PreferredLoopExit)
3272 PreferredLoopExit =
nullptr;
3277 auto RemovalCallbackRef =
3278 function_ref<void(MachineBasicBlock *)>(RemovalCallback);
3280 SmallVector<MachineBasicBlock *, 8> DuplicatedPreds;
3282 SmallVector<MachineBasicBlock *, 8> CandidatePreds;
3283 SmallVectorImpl<MachineBasicBlock *> *CandidatePtr =
nullptr;
3284 if (
F->getFunction().hasProfileData()) {
3286 findDuplicateCandidates(CandidatePreds, BB, BlockFilter);
3287 if (CandidatePreds.
size() == 0)
3290 CandidatePtr = &CandidatePreds;
3293 &RemovalCallbackRef, CandidatePtr);
3296 DuplicatedToLPred =
false;
3297 for (MachineBasicBlock *Pred : DuplicatedPreds) {
3299 BlockChain *PredChain = BlockToChain[Pred];
3301 DuplicatedToLPred =
true;
3302 if (Pred == LPred || (BlockFilter && !BlockFilter->count(Pred)) ||
3303 PredChain == &Chain)
3305 for (MachineBasicBlock *NewSucc : Pred->
successors()) {
3306 if (BlockFilter && !BlockFilter->count(NewSucc))
3308 BlockChain *NewChain = BlockToChain[NewSucc];
3309 if (NewChain != &Chain && NewChain != PredChain)
3310 NewChain->UnscheduledPredecessors++;
3320 if (!
MI.isPHI() && !
MI.isMetaInstruction())
3329BlockFrequency MachineBlockPlacement::scaleThreshold(MachineBasicBlock *BB) {
3334bool MachineBlockPlacement::isBestSuccessor(MachineBasicBlock *BB,
3335 MachineBasicBlock *Pred,
3336 BlockFilterSet *BlockFilter) {
3339 if (BlockFilter && !BlockFilter->count(Pred))
3341 BlockChain *PredChain = BlockToChain[Pred];
3342 if (PredChain && (Pred != *std::prev(PredChain->end())))
3347 for (MachineBasicBlock *Succ : Pred->
successors())
3349 if (BlockFilter && !BlockFilter->count(Succ))
3351 BlockChain *SuccChain = BlockToChain[Succ];
3352 if (SuccChain && (Succ != *SuccChain->begin()))
3355 if (SuccProb > BestProb)
3356 BestProb = SuccProb;
3360 if (BBProb <= BestProb)
3365 BlockFrequency PredFreq = getBlockCountOrFrequency(Pred);
3366 BlockFrequency Gain = PredFreq * (BBProb - BestProb);
3367 return Gain > scaleThreshold(BB);
3372void MachineBlockPlacement::findDuplicateCandidates(
3373 SmallVectorImpl<MachineBasicBlock *> &Candidates, MachineBasicBlock *BB,
3374 BlockFilterSet *BlockFilter) {
3375 MachineBasicBlock *Fallthrough =
nullptr;
3377 BlockFrequency BBDupThreshold(scaleThreshold(BB));
3378 SmallVector<MachineBasicBlock *, 8> Preds(BB->
predecessors());
3379 SmallVector<MachineBasicBlock *, 8> Succs(BB->
successors());
3382 auto CmpSucc = [&](MachineBasicBlock *
A, MachineBasicBlock *
B) {
3385 auto CmpPred = [&](MachineBasicBlock *
A, MachineBasicBlock *
B) {
3386 return MBFI->getBlockFreq(
A) > MBFI->getBlockFreq(
B);
3391 auto SuccIt = Succs.begin();
3392 if (SuccIt != Succs.end()) {
3437 for (MachineBasicBlock *Pred : Preds) {
3438 BlockFrequency PredFreq = getBlockCountOrFrequency(Pred);
3443 if (!Fallthrough && isBestSuccessor(BB, Pred, BlockFilter)) {
3445 if (SuccIt != Succs.end())
3451 BlockFrequency OrigCost = PredFreq + PredFreq * DefaultBranchProb;
3452 BlockFrequency DupCost;
3453 if (SuccIt == Succs.end()) {
3455 if (Succs.size() > 0)
3456 DupCost += PredFreq;
3459 DupCost += PredFreq;
3463 assert(OrigCost >= DupCost);
3464 OrigCost -= DupCost;
3465 if (OrigCost > BBDupThreshold) {
3467 if (SuccIt != Succs.end())
3475 if ((Candidates.
size() < Preds.size()) && (Candidates.
size() > 0)) {
3476 Candidates[0] = Candidates.
back();
3482void MachineBlockPlacement::initTailDupThreshold() {
3483 DupThreshold = BlockFrequency(0);
3484 if (
F->getFunction().hasProfileData()) {
3488 UseProfileCount =
true;
3493 BlockFrequency MaxFreq = BlockFrequency(0);
3494 for (MachineBasicBlock &
MBB : *
F) {
3495 BlockFrequency Freq = MBFI->getBlockFreq(&
MBB);
3501 DupThreshold = BlockFrequency(MaxFreq * ThresholdProb);
3502 UseProfileCount =
false;
3514 if (OptLevel >= CodeGenOptLevel::Aggressive) {
3526 (OptLevel < CodeGenOptLevel::Aggressive ||
3528 TailDupSize =
TII->getTailDuplicateSize(OptLevel);
3535 auto MBFI = std::make_unique<MBFIWrapper>(
3538 auto *MPDT = MachineBlockPlacement::allowTailDupPlacement(MF)
3543 .getCachedResult<ProfileSummaryAnalysis>(
3548 MachineBlockPlacement MBP(MBPI, MLI, PSI, std::move(MBFI), MPDT,
3555 MDT->updateBlockNumbers();
3557 MPDT->updateBlockNumbers();
3564 OS << MapClassName2PassName(
name());
3565 if (!AllowTailMerge)
3566 OS <<
"<no-tail-merge>";
3572 if (std::next(MF.
begin()) == MF.
end())
3576 OptLevel =
F->getTarget().getOptLevel();
3583 PreferredLoopExit =
nullptr;
3586 "BlockToChain map should be empty before starting placement.");
3588 "Computed Edge map should be empty before starting placement.");
3591 initTailDupThreshold();
3593 const bool OptForSize =
3598 bool UseExtTspForPerf =
false;
3599 bool UseExtTspForSize =
false;
3608 if (allowTailDupPlacement(*
F)) {
3611 const bool PreRegAlloc =
false;
3612 TailDup.
initMF(MF, PreRegAlloc, MBPI, MBFI.get(), PSI,
3614 if (!UseExtTspForSize)
3615 precomputeTriangleChains();
3619 if (!UseExtTspForSize)
3629 if (EnableTailMerge) {
3631 BranchFolder BF(
true,
false,
3639 if (!UseExtTspForSize) {
3641 BlockToChain.
clear();
3642 ComputedEdges.
clear();
3652 if (UseExtTspForPerf || UseExtTspForSize) {
3654 !(UseExtTspForPerf && UseExtTspForSize) &&
3655 "UseExtTspForPerf and UseExtTspForSize can not be set simultaneously");
3656 applyExtTsp(UseExtTspForSize);
3657 createCFGChainExtTsp();
3663 BlockToChain.
clear();
3664 ComputedEdges.
clear();
3673 MBFI->view(
"MBP." + MF.
getName(),
false);
3681void MachineBlockPlacement::applyExtTsp(
bool OptForSize) {
3683 DenseMap<const MachineBasicBlock *, uint64_t> BlockIndex;
3685 std::vector<const MachineBasicBlock *> CurrentBlockOrder;
3686 CurrentBlockOrder.reserve(
F->size());
3687 size_t NumBlocks = 0;
3688 for (
const MachineBasicBlock &
MBB : *
F) {
3689 BlockIndex[&
MBB] = NumBlocks++;
3690 CurrentBlockOrder.push_back(&
MBB);
3693 SmallVector<uint64_t, 0> BlockCounts(
F->size());
3694 SmallVector<uint64_t, 0> BlockSizes(
F->size());
3698 for (MachineBasicBlock &
MBB : *
F) {
3700 BlockFrequency BlockFreq = MBFI->getBlockFreq(&
MBB);
3701 BlockCounts[BlockIndex[&
MBB]] = OptForSize ? 1 : BlockFreq.
getFrequency();
3710 size_t NumInsts = std::distance(NonDbgInsts.begin(), NonDbgInsts.end());
3711 BlockSizes[BlockIndex[&
MBB]] = 4 * NumInsts;
3716 MachineBasicBlock *
TBB =
nullptr, *FBB =
nullptr;
3727 if (FBB && FBB != FTB)
3734 const uint64_t Freq = Succs.
size() == 1 ? 110 : 100;
3735 for (
const MachineBasicBlock *Succ : Succs)
3736 JumpCounts.
push_back({BlockIndex[&
MBB], BlockIndex[Succ], Freq});
3740 BlockFrequency JumpFreq = BlockFreq * EP;
3747 LLVM_DEBUG(
dbgs() <<
"Applying ext-tsp layout for |V| = " <<
F->size()
3748 <<
" with profile = " <<
F->getFunction().hasProfileData()
3749 <<
" (" <<
F->getName() <<
")" <<
"\n");
3756 std::vector<const MachineBasicBlock *> NewBlockOrder;
3757 NewBlockOrder.reserve(
F->size());
3758 for (uint64_t Node : NewOrder) {
3759 NewBlockOrder.push_back(CurrentBlockOrder[Node]);
3761 const double OptScore =
calcExtTspScore(NewOrder, BlockSizes, JumpCounts);
3765 if (OptForSize && OrgScore > OptScore)
3766 assignBlockOrder(CurrentBlockOrder);
3768 assignBlockOrder(NewBlockOrder);
3771void MachineBlockPlacement::assignBlockOrder(
3772 const std::vector<const MachineBasicBlock *> &NewBlockOrder) {
3773 assert(
F->size() == NewBlockOrder.size() &&
"Incorrect size of block order");
3774 F->RenumberBlocks();
3781 bool HasChanges =
false;
3782 for (
size_t I = 0;
I < NewBlockOrder.size();
I++) {
3783 if (NewBlockOrder[
I] !=
F->getBlockNumbered(
I)) {
3792 SmallVector<MachineBasicBlock *, 4> PrevFallThroughs(
F->getNumBlockIDs());
3793 for (
auto &
MBB : *
F) {
3798 DenseMap<const MachineBasicBlock *, size_t> NewIndex;
3799 for (
const MachineBasicBlock *
MBB : NewBlockOrder) {
3800 NewIndex[
MBB] = NewIndex.
size();
3802 F->sort([&](MachineBasicBlock &L, MachineBasicBlock &R) {
3803 return NewIndex[&
L] < NewIndex[&
R];
3808 const TargetInstrInfo *
TII =
F->getSubtarget().getInstrInfo();
3810 for (
auto &
MBB : *
F) {
3817 if (FTMBB && (NextMBB == EndIt || &*NextMBB != FTMBB)) {
3823 MachineBasicBlock *
TBB =
nullptr, *FBB =
nullptr;
3830void MachineBlockPlacement::createCFGChainExtTsp() {
3831 BlockToChain.
clear();
3832 ComputedEdges.
clear();
3835 MachineBasicBlock *HeadBB = &
F->
front();
3836 BlockChain *FunctionChain =
3837 new (ChainAllocator.
Allocate()) BlockChain(BlockToChain, HeadBB);
3839 for (MachineBasicBlock &
MBB : *
F) {
3842 FunctionChain->merge(&
MBB,
nullptr);
3854class MachineBlockPlacementStats {
3856 const MachineBranchProbabilityInfo *MBPI;
3859 const MachineBlockFrequencyInfo *MBFI;
3862 MachineBlockPlacementStats(
const MachineBranchProbabilityInfo *MBPI,
3863 const MachineBlockFrequencyInfo *MBFI)
3864 : MBPI(MBPI), MBFI(MBFI) {}
3865 bool run(MachineFunction &MF);
3868class MachineBlockPlacementStatsLegacy :
public MachineFunctionPass {
3872 MachineBlockPlacementStatsLegacy() : MachineFunctionPass(
ID) {}
3874 bool runOnMachineFunction(MachineFunction &
F)
override {
3876 &getAnalysis<MachineBranchProbabilityInfoWrapperPass>().getMBPI();
3877 auto *MBFI = &getAnalysis<MachineBlockFrequencyInfoWrapperPass>().getMBFI();
3878 return MachineBlockPlacementStats(MBPI, MBFI).run(
F);
3881 void getAnalysisUsage(AnalysisUsage &AU)
const override {
3882 AU.
addRequired<MachineBranchProbabilityInfoWrapperPass>();
3883 AU.
addRequired<MachineBlockFrequencyInfoWrapperPass>();
3891char MachineBlockPlacementStatsLegacy::ID = 0;
3896 "Basic Block Placement Stats",
false,
false)
3908 MachineBlockPlacementStats(&MBPI, &MBFI).
run(MF);
3914 if (std::next(
F.begin()) ==
F.end())
3923 (
MBB.succ_size() > 1) ? NumCondBranches : NumUncondBranches;
3925 (
MBB.succ_size() > 1) ? CondBranchTakenFreq : UncondBranchTakenFreq;
3928 if (
MBB.isLayoutSuccessor(Succ))
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
This file defines the BumpPtrAllocator interface.
static cl::opt< unsigned > TailMergeSize("tail-merge-size", cl::desc("Min number of instructions to consider tail merging"), cl::init(3), cl::Hidden)
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
Declares methods and data structures for code layout algorithms.
#define LLVM_DUMP_METHOD
Mark debug helper function definitions like dump() that should not be stripped from debug builds.
static unsigned InstrCount
This file defines the DenseMap class.
const HexagonInstrInfo * TII
static LoopDeletionResult merge(LoopDeletionResult A, LoopDeletionResult B)
static BranchProbability getAdjustedProbability(BranchProbability OrigProb, BranchProbability AdjustedSumProb)
The helper function returns the branch probability that is adjusted or normalized over the new total ...
static cl::opt< bool > PreciseRotationCost("precise-rotation-cost", cl::desc("Model the cost of loop rotation more " "precisely by using profile data."), cl::init(false), cl::Hidden)
static cl::opt< unsigned > ExtTspBlockPlacementMaxBlocks("ext-tsp-block-placement-max-blocks", cl::desc("Maximum number of basic blocks in a function to run ext-TSP " "block placement."), cl::init(UINT_MAX), cl::Hidden)
static cl::opt< unsigned > AlignAllBlock("align-all-blocks", cl::desc("Force the alignment of all blocks in the function in log2 format " "(e.g 4 means align on 16B boundaries)."), cl::init(0), cl::Hidden)
static cl::opt< unsigned > PredecessorLimit("block-placement-predecessor-limit", cl::desc("For blocks with more predecessors, certain layout optimizations" "will be disabled to prevent quadratic compile time."), cl::init(1000), cl::Hidden)
static BranchProbability getLayoutSuccessorProbThreshold(const MachineBasicBlock *BB)
static cl::opt< bool > ForceLoopColdBlock("force-loop-cold-block", cl::desc("Force outlining cold blocks from loops."), cl::init(false), cl::Hidden)
static cl::opt< unsigned > ExitBlockBias("block-placement-exit-block-bias", cl::desc("Block frequency percentage a loop exit block needs " "over the original exit to be considered the new exit."), cl::init(0), cl::Hidden)
static cl::opt< unsigned > AlignAllNonFallThruBlocks("align-all-nofallthru-blocks", cl::desc("Force the alignment of all blocks that have no fall-through " "predecessors (i.e. don't add nops that are executed). In log2 " "format (e.g 4 means align on 16B boundaries)."), cl::init(0), cl::Hidden)
static cl::opt< unsigned > TailDupPlacementThreshold("tail-dup-placement-threshold", cl::desc("Instruction cutoff for tail duplication during layout. " "Tail merging during layout is forced to have a threshold " "that won't conflict."), cl::init(2), cl::Hidden)
static cl::opt< unsigned > JumpInstCost("jump-inst-cost", cl::desc("Cost of jump instructions."), cl::init(1), cl::Hidden)
static cl::opt< unsigned > TailDupPlacementPenalty("tail-dup-placement-penalty", cl::desc("Cost penalty for blocks that can avoid breaking CFG by copying. " "Copying can increase fallthrough, but it also increases icache " "pressure. This parameter controls the penalty to account for that. " "Percent as integer."), cl::init(2), cl::Hidden)
static bool greaterWithBias(BlockFrequency A, BlockFrequency B, BlockFrequency EntryFreq)
Compare 2 BlockFrequency's with a small penalty for A.
static cl::opt< unsigned > MisfetchCost("misfetch-cost", cl::desc("Cost that models the probabilistic risk of an instruction " "misfetch due to a jump comparing to falling through, whose cost " "is zero."), cl::init(1), cl::Hidden)
static cl::opt< unsigned > MaxBytesForAlignmentOverride("max-bytes-for-alignment", cl::desc("Forces the maximum bytes allowed to be emitted when padding for " "alignment"), cl::init(0), cl::Hidden)
static cl::opt< bool > BranchFoldPlacement("branch-fold-placement", cl::desc("Perform branch folding during placement. " "Reduces code size."), cl::init(true), cl::Hidden)
static cl::opt< unsigned > TailDupProfilePercentThreshold("tail-dup-profile-percent-threshold", cl::desc("If profile count information is used in tail duplication cost " "model, the gained fall through number from tail duplication " "should be at least this percent of hot count."), cl::init(50), cl::Hidden)
static cl::opt< unsigned > TriangleChainCount("triangle-chain-count", cl::desc("Number of triangle-shaped-CFG's that need to be in a row for the " "triangle tail duplication heuristic to kick in. 0 to disable."), cl::init(2), cl::Hidden)
Branch Probability Basic Block static false std::string getBlockName(const MachineBasicBlock *BB)
Helper to print the name of a MBB.
static cl::opt< bool > ApplyExtTspForSize("apply-ext-tsp-for-size", cl::init(false), cl::Hidden, cl::desc("Use ext-tsp for size-aware block placement."))
static bool hasSameSuccessors(MachineBasicBlock &BB, SmallPtrSetImpl< const MachineBasicBlock * > &Successors)
Check if BB has exactly the successors in Successors.
static cl::opt< bool > TailDupPlacement("tail-dup-placement", cl::desc("Perform tail duplication during placement. " "Creates more fallthrough opportunities in " "outline branches."), cl::init(true), cl::Hidden)
static uint64_t countMBBInstruction(MachineBasicBlock *MBB)
static cl::opt< unsigned > LoopToColdBlockRatio("loop-to-cold-block-ratio", cl::desc("Outline loop blocks from loop chain if (frequency of loop) / " "(frequency of block) is greater than this ratio"), cl::init(5), cl::Hidden)
static cl::opt< bool > RenumberBlocksBeforeView("renumber-blocks-before-view", cl::desc("If true, basic blocks are re-numbered before MBP layout is printed " "into a dot graph. Only used when a function is being printed."), cl::init(false), cl::Hidden)
static cl::opt< unsigned > TailDupPlacementAggressiveThreshold("tail-dup-placement-aggressive-threshold", cl::desc("Instruction cutoff for aggressive tail duplication during " "layout. Used at -O3. Tail merging during layout is forced to " "have a threshold that won't conflict."), cl::init(4), cl::Hidden)
static cl::opt< bool > ForcePreciseRotationCost("force-precise-rotation-cost", cl::desc("Force the use of precise cost " "loop rotation strategy."), cl::init(false), cl::Hidden)
static bool hasProfileData(const Function &F, const FunctionOutliningInfo &OI)
#define INITIALIZE_PASS_DEPENDENCY(depName)
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
const SmallVectorImpl< MachineOperand > MachineBasicBlock * TBB
const SmallVectorImpl< MachineOperand > & Cond
This file implements a set that has insertion order iteration characteristics.
This file defines the SmallPtrSet class.
This file defines the SmallVector class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
static TableGen::Emitter::Opt Y("gen-skeleton-entry", EmitSkeleton, "Generate example skeleton entry")
static TableGen::Emitter::OptClass< SkeletonEmitter > X("gen-skeleton-class", "Generate example skeleton class")
This file describes how to lower LLVM code to machine code.
Target-Independent Code Generator Pass Configuration Options pass.
PassT::Result * getCachedResult(IRUnitT &IR) const
Get the cached result of an analysis pass for a given IR unit.
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
AnalysisUsage & addRequired()
void setPreservesAll()
Set by analyses that do not transform their input at all.
static BlockFrequency max()
Returns the maximum possible frequency, the saturation value.
uint64_t getFrequency() const
Returns the frequency as a fixpoint number scaled by the entry frequency.
static BranchProbability getOne()
uint32_t getNumerator() const
BranchProbability getCompl() const
static BranchProbability getZero()
ValueT lookup(const_arg_type_t< KeyT > Val) const
lookup - Return the entry for the specified key, or a default constructed value if no such entry exis...
iterator find(const_arg_type_t< KeyT > Val)
std::pair< iterator, bool > try_emplace(KeyT &&Key, Ts &&...Args)
bool erase(const KeyT &Val)
bool contains(const_arg_type_t< KeyT > Val) const
Return true if the specified key is in the map, false otherwise.
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
void reserve(size_type NumEntries)
Grow the densemap so that it can contain at least NumEntries items before resizing again.
bool dominates(const DomTreeNodeBase< NodeT > *A, const DomTreeNodeBase< NodeT > *B) const
dominates - Returns true iff A dominates B.
void recalculate(ParentType &Func)
recalculate - compute a dominator tree for the given function
bool hasProfileData(bool IncludeSynthetic=false) const
Return true if the function is annotated with profile data.
Module * getParent()
Get the module that this global value is contained inside of...
unsigned removeBranch(MachineBasicBlock &MBB, int *BytesRemoved=nullptr) const override
Remove the branching code at the end of the specific MBB.
bool analyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB, MachineBasicBlock *&FBB, SmallVectorImpl< MachineOperand > &Cond, bool AllowModify) const override
Analyze the branching code at the end of MBB, returning true if it cannot be understood (e....
bool reverseBranchCondition(SmallVectorImpl< MachineOperand > &Cond) const override
Reverses the branch condition of the specified condition list, returning false on success and true if...
unsigned insertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB, MachineBasicBlock *FBB, ArrayRef< MachineOperand > Cond, const DebugLoc &DL, int *BytesAdded=nullptr) const override
Insert branch code into the end of the specified MachineBasicBlock.
LoopT * getLoopFor(const BlockT *BB) const
Return the inner most loop that BB lives in.
const MDOperand & getOperand(unsigned I) const
ArrayRef< MDOperand > operands() const
unsigned getNumOperands() const
Return number of MDNode operands.
LLVM_ABI StringRef getString() const
unsigned pred_size() const
bool isEHPad() const
Returns true if the block is a landing pad.
instr_iterator instr_begin()
LLVM_ABI MachineBasicBlock * getFallThrough(bool JumpToFallThrough=true)
Return the fallthrough block if the block can implicitly transfer control to the block after it by fa...
int getNumber() const
MachineBasicBlocks are uniquely numbered at the function level, unless they're not in a MachineFuncti...
LLVM_ABI void updateTerminator(MachineBasicBlock *PreviousLayoutSuccessor)
Update the terminator instructions in block to account for changes to block layout which may have bee...
LLVM_ABI bool canFallThrough()
Return true if the block can implicitly transfer control to the block after it by falling off the end...
succ_iterator succ_begin()
unsigned succ_size() const
void setAlignment(Align A)
Set alignment of the basic block.
LLVM_ABI bool isEntryBlock() const
Returns true if this is the entry block of the function.
pred_iterator pred_begin()
succ_reverse_iterator succ_rbegin()
LLVM_ABI bool isLayoutSuccessor(const MachineBasicBlock *MBB) const
Return true if the specified MBB will be emitted immediately after this block, such that if this bloc...
instr_iterator instr_end()
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
LLVM_ABI instr_iterator erase(instr_iterator I)
Remove an instruction from the instruction list and delete it.
LLVM_ABI DebugLoc findBranchDebugLoc()
Find and return the merged DebugLoc of the branch instructions of the block.
iterator_range< succ_iterator > successors()
LLVM_ABI bool isSuccessor(const MachineBasicBlock *MBB) const
Return true if the specified MBB is a successor of this block.
iterator_range< pred_iterator > predecessors()
LLVM_ABI StringRef getName() const
Return the name of the corresponding LLVM basic block, or an empty string.
LLVM_ABI Result run(MachineFunction &MF, MachineFunctionAnalysisManager &MFAM)
PreservedAnalyses run(MachineFunction &MF, MachineFunctionAnalysisManager &MFAM)
void printPipeline(raw_ostream &OS, function_ref< StringRef(StringRef)> MapClassName2PassName) const
PreservedAnalyses run(MachineFunction &MF, MachineFunctionAnalysisManager &MFAM)
BranchProbability getEdgeProbability(const MachineBasicBlock *Src, const MachineBasicBlock *Dst) const
Analysis pass which computes a MachineDominatorTree.
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
StringRef getName() const
getName - Return the name of the corresponding LLVM function.
Function & getFunction()
Return the LLVM function that this machine code represents.
BasicBlockListType::iterator iterator
void RenumberBlocks(MachineBasicBlock *MBBFrom=nullptr)
RenumberBlocks - This discards all of the MachineBasicBlock numbers and recomputes them.
const TargetMachine & getTarget() const
getTarget - Return the target machine this machine code is compiled with
Representation of each machine instruction.
Analysis pass that exposes the MachineLoopInfo for a machine function.
A set of analyses that are preserved following a run of a transformation pass.
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
An analysis pass based on legacy pass manager to deliver ProfileSummaryInfo.
LLVM_ABI uint64_t getOrCompHotCountThreshold() const
Returns HotCountThreshold if set.
typename vector_type::const_iterator iterator
A templated base class for SmallPtrSet which provides the typesafe interface that is common across al...
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
reference emplace_back(ArgTypes &&... Args)
iterator erase(const_iterator CI)
typename SuperClass::const_iterator const_iterator
typename SuperClass::iterator iterator
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
T * Allocate(size_t num=1)
Allocate space for an array of objects without constructing them.
void DestroyAll()
Call the destructor of each allocated object and deallocate all but the current slab and reset the cu...
StringRef - Represent a constant reference to a string, i.e.
void initMF(MachineFunction &MF, bool PreRegAlloc, const MachineBranchProbabilityInfo *MBPI, MBFIWrapper *MBFI, ProfileSummaryInfo *PSI, bool LayoutMode, unsigned TailDupSize=0)
Prepare to run on a specific machine function.
bool tailDuplicateAndUpdate(bool IsSimple, MachineBasicBlock *MBB, MachineBasicBlock *ForcedLayoutPred, SmallVectorImpl< MachineBasicBlock * > *DuplicatedPreds=nullptr, function_ref< void(MachineBasicBlock *)> *RemovalCallback=nullptr, SmallVectorImpl< MachineBasicBlock * > *CandidatePtr=nullptr)
Tail duplicate a single basic block into its predecessors, and then clean up.
static bool isSimpleBB(MachineBasicBlock *TailBB)
True if this BB has only one unconditional jump.
bool canTailDuplicate(MachineBasicBlock *TailBB, MachineBasicBlock *PredBB)
Returns true if TailBB can successfully be duplicated into PredBB.
bool shouldTailDuplicate(bool IsSimple, MachineBasicBlock &TailBB)
Determine if it is profitable to duplicate this block.
virtual unsigned getMaxPermittedBytesForAlignment(MachineBasicBlock *MBB) const
Return the maximum amount of bytes allowed to be emitted when padding for alignment.
virtual Align getPrefLoopAlignment(MachineLoop *ML=nullptr) const
Return the preferred loop alignment.
virtual bool alignLoopsWithOptSize() const
Should loops be aligned even when the function is marked OptSize (but not MinSize).
bool requiresStructuredCFG() const
virtual const TargetInstrInfo * getInstrInfo() const
virtual const TargetRegisterInfo * getRegisterInfo() const =0
Return the target's register information.
virtual const TargetLowering * getTargetLowering() const
An efficient, type-erasing, non-owning reference to a callable.
self_iterator getIterator()
This class implements an extremely fast bulk output stream that can only output to a stream.
A raw_ostream that writes to an std::string.
constexpr char Align[]
Key for Kernel::Arg::Metadata::mAlign.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
@ C
The default llvm calling convention, compatible with C.
initializer< Ty > init(const Ty &Val)
LLVM_ABI double calcExtTspScore(ArrayRef< uint64_t > Order, ArrayRef< uint64_t > NodeSizes, ArrayRef< EdgeCount > EdgeCounts)
Estimate the "quality" of a given node order in CFG.
LLVM_ABI std::vector< uint64_t > computeExtTspLayout(ArrayRef< uint64_t > NodeSizes, ArrayRef< uint64_t > NodeCounts, ArrayRef< EdgeCount > EdgeCounts)
Find a layout of nodes (basic blocks) of a given CFG optimizing jump locality and thus processor I-ca...
PointerTypeMap run(const Module &M)
Compute the PointerTypeMap for the module M.
std::enable_if_t< detail::IsValidPointer< X, Y >::value, X * > extract(Y &&MD)
Extract a Value from Metadata.
LLVM_ABI void append(SmallVectorImpl< char > &path, const Twine &a, const Twine &b="", const Twine &c="", const Twine &d="")
Append to path.
This is an optimization pass for GlobalISel generic memory operations.
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
void dump(const SparseBitVector< ElementSize > &LHS, raw_ostream &out)
void stable_sort(R &&Range)
auto find(R &&Range, const T &Val)
Provide wrappers to std::find which take ranges instead of having to pass begin/end explicitly.
OuterAnalysisManagerProxy< ModuleAnalysisManager, MachineFunction > ModuleAnalysisManagerMachineFunctionProxy
Provide the ModuleAnalysisManager to Function proxy.
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
cl::opt< bool > ApplyExtTspWithoutProfile
constexpr from_range_t from_range
LLVM_ABI bool shouldOptimizeForSize(const MachineFunction *MF, ProfileSummaryInfo *PSI, const MachineBlockFrequencyInfo *BFI, PGSOQueryType QueryType=PGSOQueryType::Other)
Returns true if machine function MF is suggested to be size-optimized based on the profile.
AnalysisManager< MachineFunction > MachineFunctionAnalysisManager
LLVM_ABI PreservedAnalyses getMachineFunctionPassPreservedAnalyses()
Returns the minimum set of Analyses that all machine function passes must preserve.
cl::opt< unsigned > ProfileLikelyProb
void erase(Container &C, ValueType V)
Wrapper function to remove a value from a container:
cl::opt< std::string > ViewBlockFreqFuncName("view-bfi-func-name", cl::Hidden, cl::desc("The option to specify " "the name of the function " "whose CFG will be displayed."))
auto reverse(ContainerTy &&C)
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
cl::opt< GVDAGType > ViewBlockLayoutWithBFI("view-block-layout-with-bfi", cl::Hidden, cl::desc("Pop up a window to show a dag displaying MBP layout and associated " "block frequencies of the CFG."), cl::values(clEnumValN(GVDT_None, "none", "do not display graphs."), clEnumValN(GVDT_Fraction, "fraction", "display a graph using the " "fractional block frequency representation."), clEnumValN(GVDT_Integer, "integer", "display a graph using the raw " "integer fractional block frequency representation."), clEnumValN(GVDT_Count, "count", "display a graph using the real " "profile count if available.")))
bool isFunctionInPrintList(StringRef FunctionName)
LLVM_ABI void report_fatal_error(Error Err, bool gen_crash_diag=true)
FunctionAddr VTableAddr Count
CodeGenOptLevel
Code generation optimization level.
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
auto instructionsWithoutDebug(IterT It, IterT End, bool SkipPseudoOp=true)
Construct a range iterator which begins at It and moves forwards until End is reached,...
format_object< Ts... > format(const char *Fmt, const Ts &... Vals)
These are helper functions used to produce formatted output.
MutableArrayRef(T &OneElt) -> MutableArrayRef< T >
auto count(R &&Range, const E &Element)
Wrapper function around std::count to count the number of times an element Element occurs in the give...
OutputIt move(R &&Range, OutputIt Out)
Provide wrappers to std::move which take ranges instead of having to pass begin/end explicitly.
cl::opt< unsigned > StaticLikelyProb
void erase_if(Container &C, UnaryPredicate P)
Provide a container algorithm similar to C++ Library Fundamentals v2's erase_if which is equivalent t...
LLVM_ABI Printable printBlockFreq(const BlockFrequencyInfo &BFI, BlockFrequency Freq)
Print the block frequency Freq relative to the current functions entry frequency.
LLVM_ABI char & MachineBlockPlacementID
MachineBlockPlacement - This pass places basic blocks based on branch probabilities.
cl::opt< bool > EnableExtTspBlockPlacement
LLVM_ABI char & MachineBlockPlacementStatsID
MachineBlockPlacementStats - This pass collects statistics about the basic block placement using bran...
LLVM_ABI Printable printMBBReference(const MachineBasicBlock &MBB)
Prints a machine basic block reference.
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.