51 #define DEBUG_TYPE "block-placement"
53 STATISTIC(NumCondBranches,
"Number of conditional branches");
54 STATISTIC(NumUncondBranches,
"Number of uncondittional branches");
56 "Potential frequency of taking conditional branches");
58 "Potential frequency of taking unconditional branches");
61 cl::desc(
"Force the alignment of all "
62 "blocks in the function."),
67 "block-placement-exit-block-bias",
68 cl::desc(
"Block frequency percentage a loop exit block needs "
69 "over the original exit to be considered the new exit."),
73 "outline-optional-branches",
74 cl::desc(
"Put completely optional branches, i.e. branches with a common "
75 "post dominator, out of line."),
79 "outline-optional-threshold",
80 cl::desc(
"Don't outline optional branches that are a single block with an "
81 "instruction count below this threshold"),
115 BlockToChainMapType &BlockToChain;
124 : Blocks(1, BB), BlockToChain(BlockToChain), LoopPredecessors(0) {
125 assert(BB &&
"Cannot create a chain with a null basic block");
126 BlockToChain[BB] =
this;
136 iterator
end() {
return Blocks.
end(); }
146 assert(!Blocks.empty());
150 assert(!BlockToChain[BB]);
151 Blocks.push_back(BB);
152 BlockToChain[BB] =
this;
156 assert(BB == *Chain->begin());
157 assert(Chain->begin() != Chain->end());
162 Blocks.push_back(ChainBB);
163 assert(BlockToChain[ChainBB] == Chain &&
"Incoming blocks not in chain");
164 BlockToChain[ChainBB] =
this;
180 unsigned LoopPredecessors;
230 const BlockFilterSet *BlockFilter =
nullptr);
233 const BlockFilterSet *BlockFilter);
235 selectBestCandidateBlock(BlockChain &Chain,
237 const BlockFilterSet *BlockFilter);
241 const BlockFilterSet *BlockFilter);
244 const BlockFilterSet *BlockFilter =
nullptr);
246 const BlockFilterSet &LoopBlockSet);
248 const BlockFilterSet &LoopBlockSet);
251 const BlockFilterSet &LoopBlockSet);
275 "Branch Probability Basic Block Placement",
false,
false)
291 OS <<
" (derived from LLVM BB '" << BB->
getName() <<
"')";
314 void MachineBlockPlacement::markChainSuccessors(
317 const BlockFilterSet *BlockFilter) {
326 if (BlockFilter && !BlockFilter->count(Succ))
328 BlockChain &SuccChain = *BlockToChain[Succ];
330 if (&Chain == &SuccChain || Succ == LoopHeaderBB)
335 if (SuccChain.LoopPredecessors > 0 && --SuccChain.LoopPredecessors == 0)
336 BlockWorkList.
push_back(*SuccChain.begin());
353 const BlockFilterSet *BlockFilter) {
363 uint32_t BestWeight = 0;
364 uint32_t WeightScale = 0;
365 uint32_t SumWeight = MBPI->getSumForBlock(BB, WeightScale);
368 if (BlockFilter && !BlockFilter->count(Succ))
370 BlockChain &SuccChain = *BlockToChain[Succ];
371 if (&SuccChain == &Chain) {
375 if (Succ != *SuccChain.begin()) {
380 uint32_t SuccWeight = MBPI->getEdgeWeight(BB, Succ);
387 UnavoidableBlocks.count(Succ) > 0) {
388 auto HasShortOptionalBranch = [&]() {
391 if (Pred == Succ || (BlockFilter && !BlockFilter->count(Pred)) ||
392 BlockToChain[Pred] == &Chain)
395 if (Pred->pred_size() > 1 || *Pred->pred_begin() != BB)
403 if (!HasShortOptionalBranch())
409 if (SuccChain.LoopPredecessors != 0) {
410 if (SuccProb < HotProb) {
412 <<
" (prob) (CFG conflict)\n");
419 MBFI->getBlockFreq(BB) * SuccProb * HotProb.getCompl();
420 bool BadCFGConflict =
false;
422 if (Pred == Succ || (BlockFilter && !BlockFilter->count(Pred)) ||
423 BlockToChain[Pred] == &Chain)
426 MBFI->getBlockFreq(Pred) * MBPI->getEdgeProbability(Pred, Succ);
427 if (PredEdgeFreq >= CandidateEdgeFreq) {
428 BadCFGConflict =
true;
432 if (BadCFGConflict) {
434 <<
" (prob) (non-cold CFG conflict)\n");
441 << (SuccChain.LoopPredecessors != 0 ?
" (CFG break)" :
"")
443 if (BestSucc && BestWeight >= SuccWeight)
446 BestWeight = SuccWeight;
463 const BlockFilterSet *BlockFilter) {
468 WorkList.
erase(std::remove_if(WorkList.
begin(), WorkList.
end(),
470 return BlockToChain.lookup(BB) == &Chain;
477 BlockChain &SuccChain = *BlockToChain[MBB];
478 if (&SuccChain == &Chain) {
482 assert(SuccChain.LoopPredecessors == 0 &&
"Found CFG-violating block");
486 MBFI->printBlockFreq(
dbgs(), CandidateFreq) <<
" (freq)\n");
487 if (BestBlock && BestFreq >= CandidateFreq)
490 BestFreq = CandidateFreq;
505 const BlockFilterSet *BlockFilter) {
508 if (BlockFilter && !BlockFilter->count(
I))
510 if (BlockToChain[
I] != &PlacedChain) {
511 PrevUnplacedBlockIt =
I;
515 return *BlockToChain[
I]->begin();
521 void MachineBlockPlacement::buildChain(
524 const BlockFilterSet *BlockFilter) {
526 assert(BlockToChain[BB] == &Chain);
531 markChainSuccessors(Chain, LoopHeaderBB, BlockWorkList, BlockFilter);
532 BB = *std::prev(Chain.end());
535 assert(BlockToChain[BB] == &Chain);
536 assert(*std::prev(Chain.end()) == BB);
546 BestSucc = selectBestCandidateBlock(Chain, BlockWorkList, BlockFilter);
550 getFirstUnplacedBlock(F, Chain, PrevUnplacedBlockIt, BlockFilter);
554 DEBUG(
dbgs() <<
"Unnatural loop CFG detected, forcibly merging the "
555 "layout successor until the CFG reduces\n");
559 BlockChain &SuccChain = *BlockToChain[BestSucc];
562 SuccChain.LoopPredecessors = 0;
565 markChainSuccessors(SuccChain, LoopHeaderBB, BlockWorkList, BlockFilter);
566 Chain.merge(BestSucc, &SuccChain);
567 BB = *std::prev(Chain.end());
570 DEBUG(
dbgs() <<
"Finished forming chain for header block "
585 MachineBlockPlacement::findBestLoopTop(
MachineLoop &L,
586 const BlockFilterSet &LoopBlockSet) {
590 BlockChain &HeaderChain = *BlockToChain[L.
getHeader()];
591 if (!LoopBlockSet.count(*HeaderChain.begin()))
600 if (!LoopBlockSet.count(Pred))
603 << Pred->succ_size() <<
" successors, ";
604 MBFI->printBlockFreq(
dbgs(), Pred) <<
" freq\n");
605 if (Pred->succ_size() > 1)
609 if (!BestPred || PredFreq > BestPredFreq ||
610 (!(PredFreq < BestPredFreq) &&
611 Pred->isLayoutSuccessor(L.
getHeader()))) {
613 BestPredFreq = PredFreq;
623 (*BestPred->
pred_begin())->succ_size() == 1 &&
638 const BlockFilterSet &LoopBlockSet) {
647 BlockChain &HeaderChain = *BlockToChain[L.
getHeader()];
648 if (!LoopBlockSet.count(*HeaderChain.begin()))
652 unsigned BestExitLoopDepth = 0;
662 BlockChain &Chain = *BlockToChain[MBB];
665 if (MBB != *std::prev(Chain.end()))
674 bool HasLoopingSucc =
false;
678 uint32_t WeightScale = 0;
679 uint32_t SumWeight = MBPI->getSumForBlock(MBB, WeightScale);
681 if (Succ->isLandingPad())
685 BlockChain &SuccChain = *BlockToChain[Succ];
687 if (&Chain == &SuccChain) {
693 uint32_t SuccWeight = MBPI->getEdgeWeight(MBB, Succ);
694 if (LoopBlockSet.count(Succ)) {
697 HasLoopingSucc =
true;
701 unsigned SuccLoopDepth = 0;
702 if (
MachineLoop *ExitLoop = MLI->getLoopFor(Succ)) {
703 SuccLoopDepth = ExitLoop->getLoopDepth();
704 if (ExitLoop->contains(&L))
705 BlocksExitingToOuterLoop.
insert(MBB);
711 <<
getBlockName(Succ) <<
" [L:" << SuccLoopDepth <<
"] (";
712 MBFI->printBlockFreq(
dbgs(), ExitEdgeFreq) <<
")\n");
718 if (!ExitingBB || SuccLoopDepth > BestExitLoopDepth ||
719 ExitEdgeFreq > BestExitEdgeFreq ||
720 (MBB->isLayoutSuccessor(Succ) &&
721 !(ExitEdgeFreq < BestExitEdgeFreq * Bias))) {
722 BestExitEdgeFreq = ExitEdgeFreq;
727 if (!HasLoopingSucc) {
729 ExitingBB = OldExitingBB;
730 BestExitEdgeFreq = OldBestExitEdgeFreq;
742 if (!BlocksExitingToOuterLoop.
empty() &&
743 !BlocksExitingToOuterLoop.
count(ExitingBB))
756 void MachineBlockPlacement::rotateLoop(BlockChain &LoopChain,
758 const BlockFilterSet &LoopBlockSet) {
763 bool ViableTopFallthrough =
false;
765 BlockChain *PredChain = BlockToChain[Pred];
766 if (!LoopBlockSet.count(Pred) &&
767 (!PredChain || Pred == *std::prev(PredChain->end()))) {
768 ViableTopFallthrough =
true;
776 if (ViableTopFallthrough) {
779 BlockChain *SuccChain = BlockToChain[Succ];
780 if (!LoopBlockSet.count(Succ) &&
781 (!SuccChain || Succ == *SuccChain->begin()))
787 std::find(LoopChain.begin(), LoopChain.end(), ExitingBB);
788 if (ExitIt == LoopChain.end())
791 std::rotate(LoopChain.begin(), std::next(ExitIt), LoopChain.end());
805 buildLoopChains(F, *InnerLoop);
808 BlockFilterSet LoopBlockSet(L.block_begin(), L.block_end());
820 if (LoopTop == L.getHeader())
821 ExitingBB = findBestLoopExit(F, L, LoopBlockSet);
823 BlockChain &LoopChain = *BlockToChain[LoopTop];
829 assert(LoopChain.LoopPredecessors == 0);
830 UpdatedPreds.
insert(&LoopChain);
832 BlockChain &Chain = *BlockToChain[LoopBB];
833 if (!UpdatedPreds.
insert(&Chain).second)
836 assert(Chain.LoopPredecessors == 0);
838 assert(BlockToChain[ChainBB] == &Chain);
840 if (BlockToChain[Pred] == &Chain || !LoopBlockSet.count(Pred))
842 ++Chain.LoopPredecessors;
846 if (Chain.LoopPredecessors == 0)
850 buildChain(LoopTop, LoopChain, BlockWorkList, &LoopBlockSet);
851 rotateLoop(LoopChain, ExitingBB, LoopBlockSet);
855 bool BadLoop =
false;
856 if (LoopChain.LoopPredecessors) {
858 dbgs() <<
"Loop chain contains a block without its preds placed!\n"
859 <<
" Loop header: " <<
getBlockName(*L.block_begin()) <<
"\n"
860 <<
" Chain header: " <<
getBlockName(*LoopChain.begin()) <<
"\n";
864 if (!LoopBlockSet.erase(ChainBB)) {
868 dbgs() <<
"Loop chain contains a block not contained by the loop!\n"
869 <<
" Loop header: " <<
getBlockName(*L.block_begin()) <<
"\n"
870 <<
" Chain header: " <<
getBlockName(*LoopChain.begin()) <<
"\n"
875 if (!LoopBlockSet.empty()) {
878 dbgs() <<
"Loop contains blocks never placed into a chain!\n"
879 <<
" Loop header: " <<
getBlockName(*L.block_begin()) <<
"\n"
880 <<
" Chain header: " <<
getBlockName(*LoopChain.begin()) <<
"\n"
883 assert(!BadLoop &&
"Detected problems with the placement of this loop.");
894 new (ChainAllocator.Allocate()) BlockChain(BlockToChain, BB);
907 assert(NextFI != FE &&
"Can't fallthrough past the last block.");
908 DEBUG(
dbgs() <<
"Pre-merging due to unanalyzable fallthrough: "
911 Chain->merge(NextBB,
nullptr);
921 if (MBB.succ_size() == 0) {
922 if (Terminator ==
nullptr)
925 Terminator = MDT->findNearestCommonDominator(Terminator, &MBB);
930 UnavoidableBlocks.
clear();
932 if (MDT->dominates(&MBB, Terminator)) {
933 UnavoidableBlocks.insert(&MBB);
940 buildLoopChains(F, *L);
946 BlockChain &Chain = *BlockToChain[&MBB];
947 if (!UpdatedPreds.
insert(&Chain).second)
950 assert(Chain.LoopPredecessors == 0);
952 assert(BlockToChain[ChainBB] == &Chain);
954 if (BlockToChain[Pred] == &Chain)
956 ++Chain.LoopPredecessors;
960 if (Chain.LoopPredecessors == 0)
964 BlockChain &FunctionChain = *BlockToChain[&F.front()];
965 buildChain(&F.front(), FunctionChain, BlockWorkList);
972 bool BadFunc =
false;
973 FunctionBlockSetType FunctionBlockSet;
975 FunctionBlockSet.insert(&MBB);
978 if (!FunctionBlockSet.erase(ChainBB)) {
980 dbgs() <<
"Function chain contains a block not in the function!\n"
984 if (!FunctionBlockSet.empty()) {
987 dbgs() <<
"Function contains blocks never placed into a chain!\n"
990 assert(!BadFunc &&
"Detected problems with the block placement.");
996 DEBUG(
dbgs() << (ChainBB == *FunctionChain.begin() ?
"Placing chain "
1000 F.splice(InsertPos, ChainBB);
1005 if (ChainBB == *FunctionChain.begin())
1025 bool needUpdateBr =
true;
1026 if (!Cond.
empty() && (!FBB || FBB == ChainBB)) {
1028 needUpdateBr =
false;
1030 TBB = FBB =
nullptr;
1033 TBB = FBB =
nullptr;
1039 if (TBB && !Cond.
empty() && FBB &&
1040 MBPI->getEdgeWeight(PrevBB, FBB) > MBPI->getEdgeWeight(PrevBB, TBB) &&
1042 DEBUG(
dbgs() <<
"Reverse order of the two branches: "
1044 DEBUG(
dbgs() <<
" Edge weight: " << MBPI->getEdgeWeight(PrevBB, FBB)
1045 <<
" vs " << MBPI->getEdgeWeight(PrevBB, TBB) <<
"\n");
1049 needUpdateBr =
true;
1060 F.
back().updateTerminator();
1067 if (F.getFunction()->hasFnAttribute(Attribute::OptimizeForSize))
1069 if (FunctionChain.begin() == FunctionChain.end())
1076 if (ChainBB == *FunctionChain.begin())
1087 unsigned Align = TLI->getPrefLoopAlignment(L);
1094 if (Freq < WeightedEntryFreq)
1101 if (Freq < (LoopHeaderFreq * ColdProb))
1112 ChainBB->setAlignment(Align);
1121 MBPI->getEdgeProbability(LayoutPred, ChainBB);
1122 BlockFrequency LayoutEdgeFreq = MBFI->getBlockFreq(LayoutPred) * LayoutProb;
1123 if (LayoutEdgeFreq <= (Freq * ColdProb))
1124 ChainBB->setAlignment(Align);
1128 bool MachineBlockPlacement::runOnMachineFunction(
MachineFunction &F) {
1130 if (std::next(F.
begin()) == F.
end())
1136 MBPI = &getAnalysis<MachineBranchProbabilityInfo>();
1137 MBFI = &getAnalysis<MachineBlockFrequencyInfo>();
1138 MLI = &getAnalysis<MachineLoopInfo>();
1141 MDT = &getAnalysis<MachineDominatorTree>();
1142 assert(BlockToChain.empty());
1146 BlockToChain.clear();
1147 ChainAllocator.DestroyAll();
1193 "Basic Block Placement Stats",
false,
false)
1197 "Basic Block Placement
Stats", false, false)
1199 bool MachineBlockPlacementStats::runOnMachineFunction(
MachineFunction &F) {
1201 if (std::next(F.begin()) == F.end())
1204 MBPI = &getAnalysis<MachineBranchProbabilityInfo>();
1205 MBFI = &getAnalysis<MachineBlockFrequencyInfo>();
1210 (MBB.succ_size() > 1) ? NumCondBranches : NumUncondBranches;
1212 (MBB.succ_size() > 1) ? CondBranchTakenFreq : UncondBranchTakenFreq;
1215 if (MBB.isLayoutSuccessor(Succ))
1219 BlockFreq * MBPI->getEdgeProbability(&MBB, Succ);
static cl::opt< unsigned > AlignAllBlock("align-all-blocks", cl::desc("Force the alignment of all ""blocks in the function."), cl::init(0), cl::Hidden)
void push_back(const T &Elt)
const MachineFunction * getParent() const
getParent - Return the MachineFunction containing this basic block.
const_iterator end(StringRef path)
Get end iterator over path.
bool AnalyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB, MachineBasicBlock *&FBB, SmallVectorImpl< MachineOperand > &Cond, bool AllowModify) const override
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
bool LLVM_ATTRIBUTE_UNUSED_RESULT empty() const
STATISTIC(NumFunctions,"Total number of functions")
#define LLVM_DUMP_METHOD
Mark debug helper function definitions like dump() that should not be stripped from debug builds...
int getNumber() const
getNumber - MachineBasicBlocks are uniquely numbered at the function level, unless they're not in a M...
size_type count(PtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
void initializeMachineBlockPlacementStatsPass(PassRegistry &)
MachineBlockFrequencyInfo pass uses BlockFrequencyInfoImpl implementation to estimate machine basic b...
const_iterator begin(StringRef path)
Get begin iterator over path.
const Function * getFunction() const
getFunction - Return the LLVM function that this machine code represents
INITIALIZE_PASS_BEGIN(MachineBlockPlacement,"block-placement","Branch Probability Basic Block Placement", false, false) INITIALIZE_PASS_END(MachineBlockPlacement
static std::string getBlockNum(MachineBasicBlock *BB)
Helper to print the number of a MBB.
uint64_t getFrequency() const
Returns the frequency as a fixpoint number scaled by the entry frequency.
const std::vector< BlockT * > & getBlocks() const
getBlocks - Get a list of the basic blocks which make up this loop.
char & MachineBlockPlacementStatsID
MachineBlockPlacementStats - This pass collects statistics about the basic block placement using bran...
BlockT * getHeader() const
This file defines the MallocAllocator and BumpPtrAllocator interfaces.
iterator_range< succ_iterator > successors()
AnalysisUsage & addRequired()
#define INITIALIZE_PASS_DEPENDENCY(depName)
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
const HexagonInstrInfo * TII
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
block placement Basic Block Placement Stats
bool LLVM_ATTRIBUTE_UNUSED_RESULT empty() const
uint64_t rotate(uint64_t val, size_t shift)
Bitwise right rotate.
TargetInstrInfo - Interface to description of machine instruction set.
initializer< Ty > init(const Ty &Val)
friend const_iterator end(StringRef path)
Get end iterator over path.
void dump(const SparseBitVector< ElementSize > &LHS, raw_ostream &out)
static cl::opt< unsigned > OutlineOptionalThreshold("outline-optional-threshold", cl::desc("Don't outline optional branches that are a single block with an ""instruction count below this threshold"), cl::init(4), cl::Hidden)
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
unsigned RemoveBranch(MachineBasicBlock &MBB) const override
bool ReverseBranchCondition(SmallVectorImpl< MachineOperand > &Cond) const override
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
Represent the analysis usage information of a pass.
iterator_range< pred_iterator > predecessors()
iterator erase(iterator I)
pred_iterator pred_begin()
void initializeMachineBlockPlacementPass(PassRegistry &)
This base class for TargetLowering contains the SelectionDAG-independent parts that can be used from ...
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements...
block Branch Probability Basic Block Placement
virtual bool runOnMachineFunction(MachineFunction &MF)=0
runOnMachineFunction - This method must be overloaded to perform the desired machine code transformat...
friend const_iterator begin(StringRef path)
Get begin iterator over path.
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small...
virtual const TargetLowering * getTargetLowering() const
void updateTerminator()
updateTerminator - Update the terminator instructions in block to account for changes to the layout...
bool isSuccessor(const MachineBasicBlock *MBB) const
isSuccessor - Return true if the specified MBB is a successor of this block.
block Branch Probability Basic Block static false std::string getBlockName(MachineBasicBlock *BB)
Helper to print the name of a MBB.
A BumpPtrAllocator that allows only elements of a specific type to be allocated.
static cl::opt< AlignMode > Align(cl::desc("Load/store alignment support"), cl::Hidden, cl::init(NoStrictAlign), cl::values(clEnumValN(StrictAlign,"aarch64-strict-align","Disallow all unaligned memory accesses"), clEnumValN(NoStrictAlign,"aarch64-no-strict-align","Allow unaligned memory accesses"), clEnumValEnd))
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
StringRef getName() const
getName - Return the name of the corresponding LLVM basic block, or "(null)".
void setPreservesAll()
Set by analyses that do not transform their input at all.
unsigned getNumBlocks() const
getNumBlocks - Get the number of blocks in this loop in constant time.
char & MachineBlockPlacementID
MachineBlockPlacement - This pass places basic blocks based on branch probabilities.
unsigned InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB, MachineBasicBlock *FBB, ArrayRef< MachineOperand > Cond, DebugLoc DL) const override
static cl::opt< unsigned > ExitBlockBias("block-placement-exit-block-bias", cl::desc("Block frequency percentage a loop exit block needs ""over the original exit to be considered the new exit."), cl::init(0), cl::Hidden)
A raw_ostream that writes to an std::string.
virtual const TargetInstrInfo * getInstrInfo() const
block Branch Probability Basic Block false
BasicBlockListType::iterator iterator
static cl::opt< bool > OutlineOptionalBranches("outline-optional-branches", cl::desc("Put completely optional branches, i.e. branches with a common ""post dominator, out of line."), cl::init(false), cl::Hidden)
DominatorTree Class - Concrete subclass of DominatorTreeBase that is used to compute a normal dominat...
unsigned pred_size() const
This file describes how to lower LLVM code to machine code.