LLVM  3.7.0
MachineBlockPlacement.cpp
Go to the documentation of this file.
1 //===-- MachineBlockPlacement.cpp - Basic Block Code Layout optimization --===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This file implements basic block placement transformations using the CFG
11 // structure and branch probability estimates.
12 //
13 // The pass strives to preserve the structure of the CFG (that is, retain
14 // a topological ordering of basic blocks) in the absence of a *strong* signal
15 // to the contrary from probabilities. However, within the CFG structure, it
16 // attempts to choose an ordering which favors placing more likely sequences of
17 // blocks adjacent to each other.
18 //
19 // The algorithm works from the inner-most loop within a function outward, and
20 // at each stage walks through the basic blocks, trying to coalesce them into
21 // sequential chains where allowed by the CFG (or demanded by heavy
22 // probabilities). Finally, it walks the blocks in topological order, and the
23 // first time it reaches a chain of basic blocks, it schedules them in the
24 // function in-order.
25 //
26 //===----------------------------------------------------------------------===//
27 
28 #include "llvm/CodeGen/Passes.h"
29 #include "llvm/ADT/DenseMap.h"
30 #include "llvm/ADT/SmallPtrSet.h"
31 #include "llvm/ADT/SmallVector.h"
32 #include "llvm/ADT/Statistic.h"
41 #include "llvm/Support/Allocator.h"
43 #include "llvm/Support/Debug.h"
48 #include <algorithm>
49 using namespace llvm;
50 
51 #define DEBUG_TYPE "block-placement"
52 
53 STATISTIC(NumCondBranches, "Number of conditional branches");
54 STATISTIC(NumUncondBranches, "Number of uncondittional branches");
55 STATISTIC(CondBranchTakenFreq,
56  "Potential frequency of taking conditional branches");
57 STATISTIC(UncondBranchTakenFreq,
58  "Potential frequency of taking unconditional branches");
59 
60 static cl::opt<unsigned> AlignAllBlock("align-all-blocks",
61  cl::desc("Force the alignment of all "
62  "blocks in the function."),
63  cl::init(0), cl::Hidden);
64 
65 // FIXME: Find a good default for this flag and remove the flag.
67  "block-placement-exit-block-bias",
68  cl::desc("Block frequency percentage a loop exit block needs "
69  "over the original exit to be considered the new exit."),
70  cl::init(0), cl::Hidden);
71 
73  "outline-optional-branches",
74  cl::desc("Put completely optional branches, i.e. branches with a common "
75  "post dominator, out of line."),
76  cl::init(false), cl::Hidden);
77 
79  "outline-optional-threshold",
80  cl::desc("Don't outline optional branches that are a single block with an "
81  "instruction count below this threshold"),
82  cl::init(4), cl::Hidden);
83 
84 namespace {
85 class BlockChain;
86 /// \brief Type for our function-wide basic block -> block chain mapping.
87 typedef DenseMap<MachineBasicBlock *, BlockChain *> BlockToChainMapType;
88 }
89 
90 namespace {
91 /// \brief A chain of blocks which will be laid out contiguously.
92 ///
93 /// This is the datastructure representing a chain of consecutive blocks that
94 /// are profitable to layout together in order to maximize fallthrough
95 /// probabilities and code locality. We also can use a block chain to represent
96 /// a sequence of basic blocks which have some external (correctness)
97 /// requirement for sequential layout.
98 ///
99 /// Chains can be built around a single basic block and can be merged to grow
100 /// them. They participate in a block-to-chain mapping, which is updated
101 /// automatically as chains are merged together.
102 class BlockChain {
103  /// \brief The sequence of blocks belonging to this chain.
104  ///
105  /// This is the sequence of blocks for a particular chain. These will be laid
106  /// out in-order within the function.
108 
109  /// \brief A handle to the function-wide basic block to block chain mapping.
110  ///
111  /// This is retained in each block chain to simplify the computation of child
112  /// block chains for SCC-formation and iteration. We store the edges to child
113  /// basic blocks, and map them back to their associated chains using this
114  /// structure.
115  BlockToChainMapType &BlockToChain;
116 
117 public:
118  /// \brief Construct a new BlockChain.
119  ///
120  /// This builds a new block chain representing a single basic block in the
121  /// function. It also registers itself as the chain that block participates
122  /// in with the BlockToChain mapping.
123  BlockChain(BlockToChainMapType &BlockToChain, MachineBasicBlock *BB)
124  : Blocks(1, BB), BlockToChain(BlockToChain), LoopPredecessors(0) {
125  assert(BB && "Cannot create a chain with a null basic block");
126  BlockToChain[BB] = this;
127  }
128 
129  /// \brief Iterator over blocks within the chain.
131 
132  /// \brief Beginning of blocks within the chain.
133  iterator begin() { return Blocks.begin(); }
134 
135  /// \brief End of blocks within the chain.
136  iterator end() { return Blocks.end(); }
137 
138  /// \brief Merge a block chain into this one.
139  ///
140  /// This routine merges a block chain into this one. It takes care of forming
141  /// a contiguous sequence of basic blocks, updating the edge list, and
142  /// updating the block -> chain mapping. It does not free or tear down the
143  /// old chain, but the old chain's block list is no longer valid.
144  void merge(MachineBasicBlock *BB, BlockChain *Chain) {
145  assert(BB);
146  assert(!Blocks.empty());
147 
148  // Fast path in case we don't have a chain already.
149  if (!Chain) {
150  assert(!BlockToChain[BB]);
151  Blocks.push_back(BB);
152  BlockToChain[BB] = this;
153  return;
154  }
155 
156  assert(BB == *Chain->begin());
157  assert(Chain->begin() != Chain->end());
158 
159  // Update the incoming blocks to point to this chain, and add them to the
160  // chain structure.
161  for (MachineBasicBlock *ChainBB : *Chain) {
162  Blocks.push_back(ChainBB);
163  assert(BlockToChain[ChainBB] == Chain && "Incoming blocks not in chain");
164  BlockToChain[ChainBB] = this;
165  }
166  }
167 
168 #ifndef NDEBUG
169  /// \brief Dump the blocks in this chain.
170  LLVM_DUMP_METHOD void dump() {
171  for (MachineBasicBlock *MBB : *this)
172  MBB->dump();
173  }
174 #endif // NDEBUG
175 
176  /// \brief Count of predecessors within the loop currently being processed.
177  ///
178  /// This count is updated at each loop we process to represent the number of
179  /// in-loop predecessors of this chain.
180  unsigned LoopPredecessors;
181 };
182 }
183 
184 namespace {
185 class MachineBlockPlacement : public MachineFunctionPass {
186  /// \brief A typedef for a block filter set.
187  typedef SmallPtrSet<MachineBasicBlock *, 16> BlockFilterSet;
188 
189  /// \brief A handle to the branch probability pass.
190  const MachineBranchProbabilityInfo *MBPI;
191 
192  /// \brief A handle to the function-wide block frequency pass.
193  const MachineBlockFrequencyInfo *MBFI;
194 
195  /// \brief A handle to the loop info.
196  const MachineLoopInfo *MLI;
197 
198  /// \brief A handle to the target's instruction info.
199  const TargetInstrInfo *TII;
200 
201  /// \brief A handle to the target's lowering info.
202  const TargetLoweringBase *TLI;
203 
204  /// \brief A handle to the post dominator tree.
206 
207  /// \brief A set of blocks that are unavoidably execute, i.e. they dominate
208  /// all terminators of the MachineFunction.
209  SmallPtrSet<MachineBasicBlock *, 4> UnavoidableBlocks;
210 
211  /// \brief Allocator and owner of BlockChain structures.
212  ///
213  /// We build BlockChains lazily while processing the loop structure of
214  /// a function. To reduce malloc traffic, we allocate them using this
215  /// slab-like allocator, and destroy them after the pass completes. An
216  /// important guarantee is that this allocator produces stable pointers to
217  /// the chains.
219 
220  /// \brief Function wide BasicBlock to BlockChain mapping.
221  ///
222  /// This mapping allows efficiently moving from any given basic block to the
223  /// BlockChain it participates in, if any. We use it to, among other things,
224  /// allow implicitly defining edges between chains as the existing edges
225  /// between basic blocks.
227 
228  void markChainSuccessors(BlockChain &Chain, MachineBasicBlock *LoopHeaderBB,
230  const BlockFilterSet *BlockFilter = nullptr);
231  MachineBasicBlock *selectBestSuccessor(MachineBasicBlock *BB,
232  BlockChain &Chain,
233  const BlockFilterSet *BlockFilter);
235  selectBestCandidateBlock(BlockChain &Chain,
237  const BlockFilterSet *BlockFilter);
239  getFirstUnplacedBlock(MachineFunction &F, const BlockChain &PlacedChain,
240  MachineFunction::iterator &PrevUnplacedBlockIt,
241  const BlockFilterSet *BlockFilter);
242  void buildChain(MachineBasicBlock *BB, BlockChain &Chain,
244  const BlockFilterSet *BlockFilter = nullptr);
245  MachineBasicBlock *findBestLoopTop(MachineLoop &L,
246  const BlockFilterSet &LoopBlockSet);
247  MachineBasicBlock *findBestLoopExit(MachineFunction &F, MachineLoop &L,
248  const BlockFilterSet &LoopBlockSet);
249  void buildLoopChains(MachineFunction &F, MachineLoop &L);
250  void rotateLoop(BlockChain &LoopChain, MachineBasicBlock *ExitingBB,
251  const BlockFilterSet &LoopBlockSet);
252  void buildCFGChains(MachineFunction &F);
253 
254 public:
255  static char ID; // Pass identification, replacement for typeid
256  MachineBlockPlacement() : MachineFunctionPass(ID) {
258  }
259 
260  bool runOnMachineFunction(MachineFunction &F) override;
261 
262  void getAnalysisUsage(AnalysisUsage &AU) const override {
268  }
269 };
270 }
271 
274 INITIALIZE_PASS_BEGIN(MachineBlockPlacement, "block-placement",
275  "Branch Probability Basic Block Placement", false, false)
280 INITIALIZE_PASS_END(MachineBlockPlacement, "block-placement",
281  "Branch Probability Basic Block Placement", false, false)
282 
283 #ifndef NDEBUG
284 /// \brief Helper to print the name of a MBB.
285 ///
286 /// Only used by debug logging.
287 static std::string getBlockName(MachineBasicBlock *BB) {
288  std::string Result;
289  raw_string_ostream OS(Result);
290  OS << "BB#" << BB->getNumber();
291  OS << " (derived from LLVM BB '" << BB->getName() << "')";
292  OS.flush();
293  return Result;
294 }
295 
296 /// \brief Helper to print the number of a MBB.
297 ///
298 /// Only used by debug logging.
299 static std::string getBlockNum(MachineBasicBlock *BB) {
300  std::string Result;
301  raw_string_ostream OS(Result);
302  OS << "BB#" << BB->getNumber();
303  OS.flush();
304  return Result;
305 }
306 #endif
307 
308 /// \brief Mark a chain's successors as having one fewer preds.
309 ///
310 /// When a chain is being merged into the "placed" chain, this routine will
311 /// quickly walk the successors of each block in the chain and mark them as
312 /// having one fewer active predecessor. It also adds any successors of this
313 /// chain which reach the zero-predecessor state to the worklist passed in.
314 void MachineBlockPlacement::markChainSuccessors(
315  BlockChain &Chain, MachineBasicBlock *LoopHeaderBB,
317  const BlockFilterSet *BlockFilter) {
318  // Walk all the blocks in this chain, marking their successors as having
319  // a predecessor placed.
320  for (MachineBasicBlock *MBB : Chain) {
321  // Add any successors for which this is the only un-placed in-loop
322  // predecessor to the worklist as a viable candidate for CFG-neutral
323  // placement. No subsequent placement of this block will violate the CFG
324  // shape, so we get to use heuristics to choose a favorable placement.
325  for (MachineBasicBlock *Succ : MBB->successors()) {
326  if (BlockFilter && !BlockFilter->count(Succ))
327  continue;
328  BlockChain &SuccChain = *BlockToChain[Succ];
329  // Disregard edges within a fixed chain, or edges to the loop header.
330  if (&Chain == &SuccChain || Succ == LoopHeaderBB)
331  continue;
332 
333  // This is a cross-chain edge that is within the loop, so decrement the
334  // loop predecessor count of the destination chain.
335  if (SuccChain.LoopPredecessors > 0 && --SuccChain.LoopPredecessors == 0)
336  BlockWorkList.push_back(*SuccChain.begin());
337  }
338  }
339 }
340 
341 /// \brief Select the best successor for a block.
342 ///
343 /// This looks across all successors of a particular block and attempts to
344 /// select the "best" one to be the layout successor. It only considers direct
345 /// successors which also pass the block filter. It will attempt to avoid
346 /// breaking CFG structure, but cave and break such structures in the case of
347 /// very hot successor edges.
348 ///
349 /// \returns The best successor block found, or null if none are viable.
351 MachineBlockPlacement::selectBestSuccessor(MachineBasicBlock *BB,
352  BlockChain &Chain,
353  const BlockFilterSet *BlockFilter) {
354  const BranchProbability HotProb(4, 5); // 80%
355 
356  MachineBasicBlock *BestSucc = nullptr;
357  // FIXME: Due to the performance of the probability and weight routines in
358  // the MBPI analysis, we manually compute probabilities using the edge
359  // weights. This is suboptimal as it means that the somewhat subtle
360  // definition of edge weight semantics is encoded here as well. We should
361  // improve the MBPI interface to efficiently support query patterns such as
362  // this.
363  uint32_t BestWeight = 0;
364  uint32_t WeightScale = 0;
365  uint32_t SumWeight = MBPI->getSumForBlock(BB, WeightScale);
366  DEBUG(dbgs() << "Attempting merge from: " << getBlockName(BB) << "\n");
367  for (MachineBasicBlock *Succ : BB->successors()) {
368  if (BlockFilter && !BlockFilter->count(Succ))
369  continue;
370  BlockChain &SuccChain = *BlockToChain[Succ];
371  if (&SuccChain == &Chain) {
372  DEBUG(dbgs() << " " << getBlockName(Succ) << " -> Already merged!\n");
373  continue;
374  }
375  if (Succ != *SuccChain.begin()) {
376  DEBUG(dbgs() << " " << getBlockName(Succ) << " -> Mid chain!\n");
377  continue;
378  }
379 
380  uint32_t SuccWeight = MBPI->getEdgeWeight(BB, Succ);
381  BranchProbability SuccProb(SuccWeight / WeightScale, SumWeight);
382 
383  // If we outline optional branches, look whether Succ is unavoidable, i.e.
384  // dominates all terminators of the MachineFunction. If it does, other
385  // successors must be optional. Don't do this for cold branches.
386  if (OutlineOptionalBranches && SuccProb > HotProb.getCompl() &&
387  UnavoidableBlocks.count(Succ) > 0) {
388  auto HasShortOptionalBranch = [&]() {
389  for (MachineBasicBlock *Pred : Succ->predecessors()) {
390  // Check whether there is an unplaced optional branch.
391  if (Pred == Succ || (BlockFilter && !BlockFilter->count(Pred)) ||
392  BlockToChain[Pred] == &Chain)
393  continue;
394  // Check whether the optional branch has exactly one BB.
395  if (Pred->pred_size() > 1 || *Pred->pred_begin() != BB)
396  continue;
397  // Check whether the optional branch is small.
398  if (Pred->size() < OutlineOptionalThreshold)
399  return true;
400  }
401  return false;
402  };
403  if (!HasShortOptionalBranch())
404  return Succ;
405  }
406 
407  // Only consider successors which are either "hot", or wouldn't violate
408  // any CFG constraints.
409  if (SuccChain.LoopPredecessors != 0) {
410  if (SuccProb < HotProb) {
411  DEBUG(dbgs() << " " << getBlockName(Succ) << " -> " << SuccProb
412  << " (prob) (CFG conflict)\n");
413  continue;
414  }
415 
416  // Make sure that a hot successor doesn't have a globally more
417  // important predecessor.
418  BlockFrequency CandidateEdgeFreq =
419  MBFI->getBlockFreq(BB) * SuccProb * HotProb.getCompl();
420  bool BadCFGConflict = false;
421  for (MachineBasicBlock *Pred : Succ->predecessors()) {
422  if (Pred == Succ || (BlockFilter && !BlockFilter->count(Pred)) ||
423  BlockToChain[Pred] == &Chain)
424  continue;
425  BlockFrequency PredEdgeFreq =
426  MBFI->getBlockFreq(Pred) * MBPI->getEdgeProbability(Pred, Succ);
427  if (PredEdgeFreq >= CandidateEdgeFreq) {
428  BadCFGConflict = true;
429  break;
430  }
431  }
432  if (BadCFGConflict) {
433  DEBUG(dbgs() << " " << getBlockName(Succ) << " -> " << SuccProb
434  << " (prob) (non-cold CFG conflict)\n");
435  continue;
436  }
437  }
438 
439  DEBUG(dbgs() << " " << getBlockName(Succ) << " -> " << SuccProb
440  << " (prob)"
441  << (SuccChain.LoopPredecessors != 0 ? " (CFG break)" : "")
442  << "\n");
443  if (BestSucc && BestWeight >= SuccWeight)
444  continue;
445  BestSucc = Succ;
446  BestWeight = SuccWeight;
447  }
448  return BestSucc;
449 }
450 
451 /// \brief Select the best block from a worklist.
452 ///
453 /// This looks through the provided worklist as a list of candidate basic
454 /// blocks and select the most profitable one to place. The definition of
455 /// profitable only really makes sense in the context of a loop. This returns
456 /// the most frequently visited block in the worklist, which in the case of
457 /// a loop, is the one most desirable to be physically close to the rest of the
458 /// loop body in order to improve icache behavior.
459 ///
460 /// \returns The best block found, or null if none are viable.
461 MachineBasicBlock *MachineBlockPlacement::selectBestCandidateBlock(
462  BlockChain &Chain, SmallVectorImpl<MachineBasicBlock *> &WorkList,
463  const BlockFilterSet *BlockFilter) {
464  // Once we need to walk the worklist looking for a candidate, cleanup the
465  // worklist of already placed entries.
466  // FIXME: If this shows up on profiles, it could be folded (at the cost of
467  // some code complexity) into the loop below.
468  WorkList.erase(std::remove_if(WorkList.begin(), WorkList.end(),
469  [&](MachineBasicBlock *BB) {
470  return BlockToChain.lookup(BB) == &Chain;
471  }),
472  WorkList.end());
473 
474  MachineBasicBlock *BestBlock = nullptr;
475  BlockFrequency BestFreq;
476  for (MachineBasicBlock *MBB : WorkList) {
477  BlockChain &SuccChain = *BlockToChain[MBB];
478  if (&SuccChain == &Chain) {
479  DEBUG(dbgs() << " " << getBlockName(MBB) << " -> Already merged!\n");
480  continue;
481  }
482  assert(SuccChain.LoopPredecessors == 0 && "Found CFG-violating block");
483 
484  BlockFrequency CandidateFreq = MBFI->getBlockFreq(MBB);
485  DEBUG(dbgs() << " " << getBlockName(MBB) << " -> ";
486  MBFI->printBlockFreq(dbgs(), CandidateFreq) << " (freq)\n");
487  if (BestBlock && BestFreq >= CandidateFreq)
488  continue;
489  BestBlock = MBB;
490  BestFreq = CandidateFreq;
491  }
492  return BestBlock;
493 }
494 
495 /// \brief Retrieve the first unplaced basic block.
496 ///
497 /// This routine is called when we are unable to use the CFG to walk through
498 /// all of the basic blocks and form a chain due to unnatural loops in the CFG.
499 /// We walk through the function's blocks in order, starting from the
500 /// LastUnplacedBlockIt. We update this iterator on each call to avoid
501 /// re-scanning the entire sequence on repeated calls to this routine.
502 MachineBasicBlock *MachineBlockPlacement::getFirstUnplacedBlock(
503  MachineFunction &F, const BlockChain &PlacedChain,
504  MachineFunction::iterator &PrevUnplacedBlockIt,
505  const BlockFilterSet *BlockFilter) {
506  for (MachineFunction::iterator I = PrevUnplacedBlockIt, E = F.end(); I != E;
507  ++I) {
508  if (BlockFilter && !BlockFilter->count(I))
509  continue;
510  if (BlockToChain[I] != &PlacedChain) {
511  PrevUnplacedBlockIt = I;
512  // Now select the head of the chain to which the unplaced block belongs
513  // as the block to place. This will force the entire chain to be placed,
514  // and satisfies the requirements of merging chains.
515  return *BlockToChain[I]->begin();
516  }
517  }
518  return nullptr;
519 }
520 
521 void MachineBlockPlacement::buildChain(
522  MachineBasicBlock *BB, BlockChain &Chain,
524  const BlockFilterSet *BlockFilter) {
525  assert(BB);
526  assert(BlockToChain[BB] == &Chain);
527  MachineFunction &F = *BB->getParent();
528  MachineFunction::iterator PrevUnplacedBlockIt = F.begin();
529 
530  MachineBasicBlock *LoopHeaderBB = BB;
531  markChainSuccessors(Chain, LoopHeaderBB, BlockWorkList, BlockFilter);
532  BB = *std::prev(Chain.end());
533  for (;;) {
534  assert(BB);
535  assert(BlockToChain[BB] == &Chain);
536  assert(*std::prev(Chain.end()) == BB);
537 
538  // Look for the best viable successor if there is one to place immediately
539  // after this block.
540  MachineBasicBlock *BestSucc = selectBestSuccessor(BB, Chain, BlockFilter);
541 
542  // If an immediate successor isn't available, look for the best viable
543  // block among those we've identified as not violating the loop's CFG at
544  // this point. This won't be a fallthrough, but it will increase locality.
545  if (!BestSucc)
546  BestSucc = selectBestCandidateBlock(Chain, BlockWorkList, BlockFilter);
547 
548  if (!BestSucc) {
549  BestSucc =
550  getFirstUnplacedBlock(F, Chain, PrevUnplacedBlockIt, BlockFilter);
551  if (!BestSucc)
552  break;
553 
554  DEBUG(dbgs() << "Unnatural loop CFG detected, forcibly merging the "
555  "layout successor until the CFG reduces\n");
556  }
557 
558  // Place this block, updating the datastructures to reflect its placement.
559  BlockChain &SuccChain = *BlockToChain[BestSucc];
560  // Zero out LoopPredecessors for the successor we're about to merge in case
561  // we selected a successor that didn't fit naturally into the CFG.
562  SuccChain.LoopPredecessors = 0;
563  DEBUG(dbgs() << "Merging from " << getBlockNum(BB) << " to "
564  << getBlockNum(BestSucc) << "\n");
565  markChainSuccessors(SuccChain, LoopHeaderBB, BlockWorkList, BlockFilter);
566  Chain.merge(BestSucc, &SuccChain);
567  BB = *std::prev(Chain.end());
568  }
569 
570  DEBUG(dbgs() << "Finished forming chain for header block "
571  << getBlockNum(*Chain.begin()) << "\n");
572 }
573 
574 /// \brief Find the best loop top block for layout.
575 ///
576 /// Look for a block which is strictly better than the loop header for laying
577 /// out at the top of the loop. This looks for one and only one pattern:
578 /// a latch block with no conditional exit. This block will cause a conditional
579 /// jump around it or will be the bottom of the loop if we lay it out in place,
580 /// but if it it doesn't end up at the bottom of the loop for any reason,
581 /// rotation alone won't fix it. Because such a block will always result in an
582 /// unconditional jump (for the backedge) rotating it in front of the loop
583 /// header is always profitable.
585 MachineBlockPlacement::findBestLoopTop(MachineLoop &L,
586  const BlockFilterSet &LoopBlockSet) {
587  // Check that the header hasn't been fused with a preheader block due to
588  // crazy branches. If it has, we need to start with the header at the top to
589  // prevent pulling the preheader into the loop body.
590  BlockChain &HeaderChain = *BlockToChain[L.getHeader()];
591  if (!LoopBlockSet.count(*HeaderChain.begin()))
592  return L.getHeader();
593 
594  DEBUG(dbgs() << "Finding best loop top for: " << getBlockName(L.getHeader())
595  << "\n");
596 
597  BlockFrequency BestPredFreq;
598  MachineBasicBlock *BestPred = nullptr;
599  for (MachineBasicBlock *Pred : L.getHeader()->predecessors()) {
600  if (!LoopBlockSet.count(Pred))
601  continue;
602  DEBUG(dbgs() << " header pred: " << getBlockName(Pred) << ", "
603  << Pred->succ_size() << " successors, ";
604  MBFI->printBlockFreq(dbgs(), Pred) << " freq\n");
605  if (Pred->succ_size() > 1)
606  continue;
607 
608  BlockFrequency PredFreq = MBFI->getBlockFreq(Pred);
609  if (!BestPred || PredFreq > BestPredFreq ||
610  (!(PredFreq < BestPredFreq) &&
611  Pred->isLayoutSuccessor(L.getHeader()))) {
612  BestPred = Pred;
613  BestPredFreq = PredFreq;
614  }
615  }
616 
617  // If no direct predecessor is fine, just use the loop header.
618  if (!BestPred)
619  return L.getHeader();
620 
621  // Walk backwards through any straight line of predecessors.
622  while (BestPred->pred_size() == 1 &&
623  (*BestPred->pred_begin())->succ_size() == 1 &&
624  *BestPred->pred_begin() != L.getHeader())
625  BestPred = *BestPred->pred_begin();
626 
627  DEBUG(dbgs() << " final top: " << getBlockName(BestPred) << "\n");
628  return BestPred;
629 }
630 
631 /// \brief Find the best loop exiting block for layout.
632 ///
633 /// This routine implements the logic to analyze the loop looking for the best
634 /// block to layout at the top of the loop. Typically this is done to maximize
635 /// fallthrough opportunities.
637 MachineBlockPlacement::findBestLoopExit(MachineFunction &F, MachineLoop &L,
638  const BlockFilterSet &LoopBlockSet) {
639  // We don't want to layout the loop linearly in all cases. If the loop header
640  // is just a normal basic block in the loop, we want to look for what block
641  // within the loop is the best one to layout at the top. However, if the loop
642  // header has be pre-merged into a chain due to predecessors not having
643  // analyzable branches, *and* the predecessor it is merged with is *not* part
644  // of the loop, rotating the header into the middle of the loop will create
645  // a non-contiguous range of blocks which is Very Bad. So start with the
646  // header and only rotate if safe.
647  BlockChain &HeaderChain = *BlockToChain[L.getHeader()];
648  if (!LoopBlockSet.count(*HeaderChain.begin()))
649  return nullptr;
650 
651  BlockFrequency BestExitEdgeFreq;
652  unsigned BestExitLoopDepth = 0;
653  MachineBasicBlock *ExitingBB = nullptr;
654  // If there are exits to outer loops, loop rotation can severely limit
655  // fallthrough opportunites unless it selects such an exit. Keep a set of
656  // blocks where rotating to exit with that block will reach an outer loop.
657  SmallPtrSet<MachineBasicBlock *, 4> BlocksExitingToOuterLoop;
658 
659  DEBUG(dbgs() << "Finding best loop exit for: " << getBlockName(L.getHeader())
660  << "\n");
661  for (MachineBasicBlock *MBB : L.getBlocks()) {
662  BlockChain &Chain = *BlockToChain[MBB];
663  // Ensure that this block is at the end of a chain; otherwise it could be
664  // mid-way through an inner loop or a successor of an unanalyzable branch.
665  if (MBB != *std::prev(Chain.end()))
666  continue;
667 
668  // Now walk the successors. We need to establish whether this has a viable
669  // exiting successor and whether it has a viable non-exiting successor.
670  // We store the old exiting state and restore it if a viable looping
671  // successor isn't found.
672  MachineBasicBlock *OldExitingBB = ExitingBB;
673  BlockFrequency OldBestExitEdgeFreq = BestExitEdgeFreq;
674  bool HasLoopingSucc = false;
675  // FIXME: Due to the performance of the probability and weight routines in
676  // the MBPI analysis, we use the internal weights and manually compute the
677  // probabilities to avoid quadratic behavior.
678  uint32_t WeightScale = 0;
679  uint32_t SumWeight = MBPI->getSumForBlock(MBB, WeightScale);
680  for (MachineBasicBlock *Succ : MBB->successors()) {
681  if (Succ->isLandingPad())
682  continue;
683  if (Succ == MBB)
684  continue;
685  BlockChain &SuccChain = *BlockToChain[Succ];
686  // Don't split chains, either this chain or the successor's chain.
687  if (&Chain == &SuccChain) {
688  DEBUG(dbgs() << " exiting: " << getBlockName(MBB) << " -> "
689  << getBlockName(Succ) << " (chain conflict)\n");
690  continue;
691  }
692 
693  uint32_t SuccWeight = MBPI->getEdgeWeight(MBB, Succ);
694  if (LoopBlockSet.count(Succ)) {
695  DEBUG(dbgs() << " looping: " << getBlockName(MBB) << " -> "
696  << getBlockName(Succ) << " (" << SuccWeight << ")\n");
697  HasLoopingSucc = true;
698  continue;
699  }
700 
701  unsigned SuccLoopDepth = 0;
702  if (MachineLoop *ExitLoop = MLI->getLoopFor(Succ)) {
703  SuccLoopDepth = ExitLoop->getLoopDepth();
704  if (ExitLoop->contains(&L))
705  BlocksExitingToOuterLoop.insert(MBB);
706  }
707 
708  BranchProbability SuccProb(SuccWeight / WeightScale, SumWeight);
709  BlockFrequency ExitEdgeFreq = MBFI->getBlockFreq(MBB) * SuccProb;
710  DEBUG(dbgs() << " exiting: " << getBlockName(MBB) << " -> "
711  << getBlockName(Succ) << " [L:" << SuccLoopDepth << "] (";
712  MBFI->printBlockFreq(dbgs(), ExitEdgeFreq) << ")\n");
713  // Note that we bias this toward an existing layout successor to retain
714  // incoming order in the absence of better information. The exit must have
715  // a frequency higher than the current exit before we consider breaking
716  // the layout.
717  BranchProbability Bias(100 - ExitBlockBias, 100);
718  if (!ExitingBB || SuccLoopDepth > BestExitLoopDepth ||
719  ExitEdgeFreq > BestExitEdgeFreq ||
720  (MBB->isLayoutSuccessor(Succ) &&
721  !(ExitEdgeFreq < BestExitEdgeFreq * Bias))) {
722  BestExitEdgeFreq = ExitEdgeFreq;
723  ExitingBB = MBB;
724  }
725  }
726 
727  if (!HasLoopingSucc) {
728  // Restore the old exiting state, no viable looping successor was found.
729  ExitingBB = OldExitingBB;
730  BestExitEdgeFreq = OldBestExitEdgeFreq;
731  continue;
732  }
733  }
734  // Without a candidate exiting block or with only a single block in the
735  // loop, just use the loop header to layout the loop.
736  if (!ExitingBB || L.getNumBlocks() == 1)
737  return nullptr;
738 
739  // Also, if we have exit blocks which lead to outer loops but didn't select
740  // one of them as the exiting block we are rotating toward, disable loop
741  // rotation altogether.
742  if (!BlocksExitingToOuterLoop.empty() &&
743  !BlocksExitingToOuterLoop.count(ExitingBB))
744  return nullptr;
745 
746  DEBUG(dbgs() << " Best exiting block: " << getBlockName(ExitingBB) << "\n");
747  return ExitingBB;
748 }
749 
750 /// \brief Attempt to rotate an exiting block to the bottom of the loop.
751 ///
752 /// Once we have built a chain, try to rotate it to line up the hot exit block
753 /// with fallthrough out of the loop if doing so doesn't introduce unnecessary
754 /// branches. For example, if the loop has fallthrough into its header and out
755 /// of its bottom already, don't rotate it.
756 void MachineBlockPlacement::rotateLoop(BlockChain &LoopChain,
757  MachineBasicBlock *ExitingBB,
758  const BlockFilterSet &LoopBlockSet) {
759  if (!ExitingBB)
760  return;
761 
762  MachineBasicBlock *Top = *LoopChain.begin();
763  bool ViableTopFallthrough = false;
764  for (MachineBasicBlock *Pred : Top->predecessors()) {
765  BlockChain *PredChain = BlockToChain[Pred];
766  if (!LoopBlockSet.count(Pred) &&
767  (!PredChain || Pred == *std::prev(PredChain->end()))) {
768  ViableTopFallthrough = true;
769  break;
770  }
771  }
772 
773  // If the header has viable fallthrough, check whether the current loop
774  // bottom is a viable exiting block. If so, bail out as rotating will
775  // introduce an unnecessary branch.
776  if (ViableTopFallthrough) {
777  MachineBasicBlock *Bottom = *std::prev(LoopChain.end());
778  for (MachineBasicBlock *Succ : Bottom->successors()) {
779  BlockChain *SuccChain = BlockToChain[Succ];
780  if (!LoopBlockSet.count(Succ) &&
781  (!SuccChain || Succ == *SuccChain->begin()))
782  return;
783  }
784  }
785 
786  BlockChain::iterator ExitIt =
787  std::find(LoopChain.begin(), LoopChain.end(), ExitingBB);
788  if (ExitIt == LoopChain.end())
789  return;
790 
791  std::rotate(LoopChain.begin(), std::next(ExitIt), LoopChain.end());
792 }
793 
794 /// \brief Forms basic block chains from the natural loop structures.
795 ///
796 /// These chains are designed to preserve the existing *structure* of the code
797 /// as much as possible. We can then stitch the chains together in a way which
798 /// both preserves the topological structure and minimizes taken conditional
799 /// branches.
800 void MachineBlockPlacement::buildLoopChains(MachineFunction &F,
801  MachineLoop &L) {
802  // First recurse through any nested loops, building chains for those inner
803  // loops.
804  for (MachineLoop *InnerLoop : L)
805  buildLoopChains(F, *InnerLoop);
806 
808  BlockFilterSet LoopBlockSet(L.block_begin(), L.block_end());
809 
810  // First check to see if there is an obviously preferable top block for the
811  // loop. This will default to the header, but may end up as one of the
812  // predecessors to the header if there is one which will result in strictly
813  // fewer branches in the loop body.
814  MachineBasicBlock *LoopTop = findBestLoopTop(L, LoopBlockSet);
815 
816  // If we selected just the header for the loop top, look for a potentially
817  // profitable exit block in the event that rotating the loop can eliminate
818  // branches by placing an exit edge at the bottom.
819  MachineBasicBlock *ExitingBB = nullptr;
820  if (LoopTop == L.getHeader())
821  ExitingBB = findBestLoopExit(F, L, LoopBlockSet);
822 
823  BlockChain &LoopChain = *BlockToChain[LoopTop];
824 
825  // FIXME: This is a really lame way of walking the chains in the loop: we
826  // walk the blocks, and use a set to prevent visiting a particular chain
827  // twice.
828  SmallPtrSet<BlockChain *, 4> UpdatedPreds;
829  assert(LoopChain.LoopPredecessors == 0);
830  UpdatedPreds.insert(&LoopChain);
831  for (MachineBasicBlock *LoopBB : L.getBlocks()) {
832  BlockChain &Chain = *BlockToChain[LoopBB];
833  if (!UpdatedPreds.insert(&Chain).second)
834  continue;
835 
836  assert(Chain.LoopPredecessors == 0);
837  for (MachineBasicBlock *ChainBB : Chain) {
838  assert(BlockToChain[ChainBB] == &Chain);
839  for (MachineBasicBlock *Pred : ChainBB->predecessors()) {
840  if (BlockToChain[Pred] == &Chain || !LoopBlockSet.count(Pred))
841  continue;
842  ++Chain.LoopPredecessors;
843  }
844  }
845 
846  if (Chain.LoopPredecessors == 0)
847  BlockWorkList.push_back(*Chain.begin());
848  }
849 
850  buildChain(LoopTop, LoopChain, BlockWorkList, &LoopBlockSet);
851  rotateLoop(LoopChain, ExitingBB, LoopBlockSet);
852 
853  DEBUG({
854  // Crash at the end so we get all of the debugging output first.
855  bool BadLoop = false;
856  if (LoopChain.LoopPredecessors) {
857  BadLoop = true;
858  dbgs() << "Loop chain contains a block without its preds placed!\n"
859  << " Loop header: " << getBlockName(*L.block_begin()) << "\n"
860  << " Chain header: " << getBlockName(*LoopChain.begin()) << "\n";
861  }
862  for (MachineBasicBlock *ChainBB : LoopChain) {
863  dbgs() << " ... " << getBlockName(ChainBB) << "\n";
864  if (!LoopBlockSet.erase(ChainBB)) {
865  // We don't mark the loop as bad here because there are real situations
866  // where this can occur. For example, with an unanalyzable fallthrough
867  // from a loop block to a non-loop block or vice versa.
868  dbgs() << "Loop chain contains a block not contained by the loop!\n"
869  << " Loop header: " << getBlockName(*L.block_begin()) << "\n"
870  << " Chain header: " << getBlockName(*LoopChain.begin()) << "\n"
871  << " Bad block: " << getBlockName(ChainBB) << "\n";
872  }
873  }
874 
875  if (!LoopBlockSet.empty()) {
876  BadLoop = true;
877  for (MachineBasicBlock *LoopBB : LoopBlockSet)
878  dbgs() << "Loop contains blocks never placed into a chain!\n"
879  << " Loop header: " << getBlockName(*L.block_begin()) << "\n"
880  << " Chain header: " << getBlockName(*LoopChain.begin()) << "\n"
881  << " Bad block: " << getBlockName(LoopBB) << "\n";
882  }
883  assert(!BadLoop && "Detected problems with the placement of this loop.");
884  });
885 }
886 
887 void MachineBlockPlacement::buildCFGChains(MachineFunction &F) {
888  // Ensure that every BB in the function has an associated chain to simplify
889  // the assumptions of the remaining algorithm.
890  SmallVector<MachineOperand, 4> Cond; // For AnalyzeBranch.
891  for (MachineFunction::iterator FI = F.begin(), FE = F.end(); FI != FE; ++FI) {
892  MachineBasicBlock *BB = FI;
893  BlockChain *Chain =
894  new (ChainAllocator.Allocate()) BlockChain(BlockToChain, BB);
895  // Also, merge any blocks which we cannot reason about and must preserve
896  // the exact fallthrough behavior for.
897  for (;;) {
898  Cond.clear();
899  MachineBasicBlock *TBB = nullptr, *FBB = nullptr; // For AnalyzeBranch.
900  if (!TII->AnalyzeBranch(*BB, TBB, FBB, Cond) || !FI->canFallThrough())
901  break;
902 
903  MachineFunction::iterator NextFI(std::next(FI));
904  MachineBasicBlock *NextBB = NextFI;
905  // Ensure that the layout successor is a viable block, as we know that
906  // fallthrough is a possibility.
907  assert(NextFI != FE && "Can't fallthrough past the last block.");
908  DEBUG(dbgs() << "Pre-merging due to unanalyzable fallthrough: "
909  << getBlockName(BB) << " -> " << getBlockName(NextBB)
910  << "\n");
911  Chain->merge(NextBB, nullptr);
912  FI = NextFI;
913  BB = NextBB;
914  }
915  }
916 
918  // Find the nearest common dominator of all of F's terminators.
919  MachineBasicBlock *Terminator = nullptr;
920  for (MachineBasicBlock &MBB : F) {
921  if (MBB.succ_size() == 0) {
922  if (Terminator == nullptr)
923  Terminator = &MBB;
924  else
925  Terminator = MDT->findNearestCommonDominator(Terminator, &MBB);
926  }
927  }
928 
929  // MBBs dominating this common dominator are unavoidable.
930  UnavoidableBlocks.clear();
931  for (MachineBasicBlock &MBB : F) {
932  if (MDT->dominates(&MBB, Terminator)) {
933  UnavoidableBlocks.insert(&MBB);
934  }
935  }
936  }
937 
938  // Build any loop-based chains.
939  for (MachineLoop *L : *MLI)
940  buildLoopChains(F, *L);
941 
943 
944  SmallPtrSet<BlockChain *, 4> UpdatedPreds;
945  for (MachineBasicBlock &MBB : F) {
946  BlockChain &Chain = *BlockToChain[&MBB];
947  if (!UpdatedPreds.insert(&Chain).second)
948  continue;
949 
950  assert(Chain.LoopPredecessors == 0);
951  for (MachineBasicBlock *ChainBB : Chain) {
952  assert(BlockToChain[ChainBB] == &Chain);
953  for (MachineBasicBlock *Pred : ChainBB->predecessors()) {
954  if (BlockToChain[Pred] == &Chain)
955  continue;
956  ++Chain.LoopPredecessors;
957  }
958  }
959 
960  if (Chain.LoopPredecessors == 0)
961  BlockWorkList.push_back(*Chain.begin());
962  }
963 
964  BlockChain &FunctionChain = *BlockToChain[&F.front()];
965  buildChain(&F.front(), FunctionChain, BlockWorkList);
966 
967 #ifndef NDEBUG
968  typedef SmallPtrSet<MachineBasicBlock *, 16> FunctionBlockSetType;
969 #endif
970  DEBUG({
971  // Crash at the end so we get all of the debugging output first.
972  bool BadFunc = false;
973  FunctionBlockSetType FunctionBlockSet;
974  for (MachineBasicBlock &MBB : F)
975  FunctionBlockSet.insert(&MBB);
976 
977  for (MachineBasicBlock *ChainBB : FunctionChain)
978  if (!FunctionBlockSet.erase(ChainBB)) {
979  BadFunc = true;
980  dbgs() << "Function chain contains a block not in the function!\n"
981  << " Bad block: " << getBlockName(ChainBB) << "\n";
982  }
983 
984  if (!FunctionBlockSet.empty()) {
985  BadFunc = true;
986  for (MachineBasicBlock *RemainingBB : FunctionBlockSet)
987  dbgs() << "Function contains blocks never placed into a chain!\n"
988  << " Bad block: " << getBlockName(RemainingBB) << "\n";
989  }
990  assert(!BadFunc && "Detected problems with the block placement.");
991  });
992 
993  // Splice the blocks into place.
994  MachineFunction::iterator InsertPos = F.begin();
995  for (MachineBasicBlock *ChainBB : FunctionChain) {
996  DEBUG(dbgs() << (ChainBB == *FunctionChain.begin() ? "Placing chain "
997  : " ... ")
998  << getBlockName(ChainBB) << "\n");
999  if (InsertPos != MachineFunction::iterator(ChainBB))
1000  F.splice(InsertPos, ChainBB);
1001  else
1002  ++InsertPos;
1003 
1004  // Update the terminator of the previous block.
1005  if (ChainBB == *FunctionChain.begin())
1006  continue;
1007  MachineBasicBlock *PrevBB = std::prev(MachineFunction::iterator(ChainBB));
1008 
1009  // FIXME: It would be awesome of updateTerminator would just return rather
1010  // than assert when the branch cannot be analyzed in order to remove this
1011  // boiler plate.
1012  Cond.clear();
1013  MachineBasicBlock *TBB = nullptr, *FBB = nullptr; // For AnalyzeBranch.
1014  if (!TII->AnalyzeBranch(*PrevBB, TBB, FBB, Cond)) {
1015  // The "PrevBB" is not yet updated to reflect current code layout, so,
1016  // o. it may fall-through to a block without explict "goto" instruction
1017  // before layout, and no longer fall-through it after layout; or
1018  // o. just opposite.
1019  //
1020  // AnalyzeBranch() may return erroneous value for FBB when these two
1021  // situations take place. For the first scenario FBB is mistakenly set
1022  // NULL; for the 2nd scenario, the FBB, which is expected to be NULL,
1023  // is mistakenly pointing to "*BI".
1024  //
1025  bool needUpdateBr = true;
1026  if (!Cond.empty() && (!FBB || FBB == ChainBB)) {
1027  PrevBB->updateTerminator();
1028  needUpdateBr = false;
1029  Cond.clear();
1030  TBB = FBB = nullptr;
1031  if (TII->AnalyzeBranch(*PrevBB, TBB, FBB, Cond)) {
1032  // FIXME: This should never take place.
1033  TBB = FBB = nullptr;
1034  }
1035  }
1036 
1037  // If PrevBB has a two-way branch, try to re-order the branches
1038  // such that we branch to the successor with higher weight first.
1039  if (TBB && !Cond.empty() && FBB &&
1040  MBPI->getEdgeWeight(PrevBB, FBB) > MBPI->getEdgeWeight(PrevBB, TBB) &&
1041  !TII->ReverseBranchCondition(Cond)) {
1042  DEBUG(dbgs() << "Reverse order of the two branches: "
1043  << getBlockName(PrevBB) << "\n");
1044  DEBUG(dbgs() << " Edge weight: " << MBPI->getEdgeWeight(PrevBB, FBB)
1045  << " vs " << MBPI->getEdgeWeight(PrevBB, TBB) << "\n");
1046  DebugLoc dl; // FIXME: this is nowhere
1047  TII->RemoveBranch(*PrevBB);
1048  TII->InsertBranch(*PrevBB, FBB, TBB, Cond, dl);
1049  needUpdateBr = true;
1050  }
1051  if (needUpdateBr)
1052  PrevBB->updateTerminator();
1053  }
1054  }
1055 
1056  // Fixup the last block.
1057  Cond.clear();
1058  MachineBasicBlock *TBB = nullptr, *FBB = nullptr; // For AnalyzeBranch.
1059  if (!TII->AnalyzeBranch(F.back(), TBB, FBB, Cond))
1060  F.back().updateTerminator();
1061 
1062  // Walk through the backedges of the function now that we have fully laid out
1063  // the basic blocks and align the destination of each backedge. We don't rely
1064  // exclusively on the loop info here so that we can align backedges in
1065  // unnatural CFGs and backedges that were introduced purely because of the
1066  // loop rotations done during this layout pass.
1067  if (F.getFunction()->hasFnAttribute(Attribute::OptimizeForSize))
1068  return;
1069  if (FunctionChain.begin() == FunctionChain.end())
1070  return; // Empty chain.
1071 
1072  const BranchProbability ColdProb(1, 5); // 20%
1073  BlockFrequency EntryFreq = MBFI->getBlockFreq(F.begin());
1074  BlockFrequency WeightedEntryFreq = EntryFreq * ColdProb;
1075  for (MachineBasicBlock *ChainBB : FunctionChain) {
1076  if (ChainBB == *FunctionChain.begin())
1077  continue;
1078 
1079  // Don't align non-looping basic blocks. These are unlikely to execute
1080  // enough times to matter in practice. Note that we'll still handle
1081  // unnatural CFGs inside of a natural outer loop (the common case) and
1082  // rotated loops.
1083  MachineLoop *L = MLI->getLoopFor(ChainBB);
1084  if (!L)
1085  continue;
1086 
1087  unsigned Align = TLI->getPrefLoopAlignment(L);
1088  if (!Align)
1089  continue; // Don't care about loop alignment.
1090 
1091  // If the block is cold relative to the function entry don't waste space
1092  // aligning it.
1093  BlockFrequency Freq = MBFI->getBlockFreq(ChainBB);
1094  if (Freq < WeightedEntryFreq)
1095  continue;
1096 
1097  // If the block is cold relative to its loop header, don't align it
1098  // regardless of what edges into the block exist.
1099  MachineBasicBlock *LoopHeader = L->getHeader();
1100  BlockFrequency LoopHeaderFreq = MBFI->getBlockFreq(LoopHeader);
1101  if (Freq < (LoopHeaderFreq * ColdProb))
1102  continue;
1103 
1104  // Check for the existence of a non-layout predecessor which would benefit
1105  // from aligning this block.
1106  MachineBasicBlock *LayoutPred =
1107  &*std::prev(MachineFunction::iterator(ChainBB));
1108 
1109  // Force alignment if all the predecessors are jumps. We already checked
1110  // that the block isn't cold above.
1111  if (!LayoutPred->isSuccessor(ChainBB)) {
1112  ChainBB->setAlignment(Align);
1113  continue;
1114  }
1115 
1116  // Align this block if the layout predecessor's edge into this block is
1117  // cold relative to the block. When this is true, other predecessors make up
1118  // all of the hot entries into the block and thus alignment is likely to be
1119  // important.
1120  BranchProbability LayoutProb =
1121  MBPI->getEdgeProbability(LayoutPred, ChainBB);
1122  BlockFrequency LayoutEdgeFreq = MBFI->getBlockFreq(LayoutPred) * LayoutProb;
1123  if (LayoutEdgeFreq <= (Freq * ColdProb))
1124  ChainBB->setAlignment(Align);
1125  }
1126 }
1127 
1128 bool MachineBlockPlacement::runOnMachineFunction(MachineFunction &F) {
1129  // Check for single-block functions and skip them.
1130  if (std::next(F.begin()) == F.end())
1131  return false;
1132 
1133  if (skipOptnoneFunction(*F.getFunction()))
1134  return false;
1135 
1136  MBPI = &getAnalysis<MachineBranchProbabilityInfo>();
1137  MBFI = &getAnalysis<MachineBlockFrequencyInfo>();
1138  MLI = &getAnalysis<MachineLoopInfo>();
1139  TII = F.getSubtarget().getInstrInfo();
1140  TLI = F.getSubtarget().getTargetLowering();
1141  MDT = &getAnalysis<MachineDominatorTree>();
1142  assert(BlockToChain.empty());
1143 
1144  buildCFGChains(F);
1145 
1146  BlockToChain.clear();
1147  ChainAllocator.DestroyAll();
1148 
1149  if (AlignAllBlock)
1150  // Align all of the blocks in the function to a specific alignment.
1151  for (MachineBasicBlock &MBB : F)
1152  MBB.setAlignment(AlignAllBlock);
1153 
1154  // We always return true as we have no way to track whether the final order
1155  // differs from the original order.
1156  return true;
1157 }
1158 
1159 namespace {
1160 /// \brief A pass to compute block placement statistics.
1161 ///
1162 /// A separate pass to compute interesting statistics for evaluating block
1163 /// placement. This is separate from the actual placement pass so that they can
1164 /// be computed in the absence of any placement transformations or when using
1165 /// alternative placement strategies.
1166 class MachineBlockPlacementStats : public MachineFunctionPass {
1167  /// \brief A handle to the branch probability pass.
1168  const MachineBranchProbabilityInfo *MBPI;
1169 
1170  /// \brief A handle to the function-wide block frequency pass.
1171  const MachineBlockFrequencyInfo *MBFI;
1172 
1173 public:
1174  static char ID; // Pass identification, replacement for typeid
1175  MachineBlockPlacementStats() : MachineFunctionPass(ID) {
1176  initializeMachineBlockPlacementStatsPass(*PassRegistry::getPassRegistry());
1177  }
1178 
1179  bool runOnMachineFunction(MachineFunction &F) override;
1180 
1181  void getAnalysisUsage(AnalysisUsage &AU) const override {
1184  AU.setPreservesAll();
1186  }
1187 };
1188 }
1189 
1192 INITIALIZE_PASS_BEGIN(MachineBlockPlacementStats, "block-placement-stats",
1193  "Basic Block Placement Stats", false, false)
1196 INITIALIZE_PASS_END(MachineBlockPlacementStats, "block-placement-stats",
1197  "Basic Block Placement Stats", false, false)
1198 
1199 bool MachineBlockPlacementStats::runOnMachineFunction(MachineFunction &F) {
1200  // Check for single-block functions and skip them.
1201  if (std::next(F.begin()) == F.end())
1202  return false;
1203 
1204  MBPI = &getAnalysis<MachineBranchProbabilityInfo>();
1205  MBFI = &getAnalysis<MachineBlockFrequencyInfo>();
1206 
1207  for (MachineBasicBlock &MBB : F) {
1208  BlockFrequency BlockFreq = MBFI->getBlockFreq(&MBB);
1209  Statistic &NumBranches =
1210  (MBB.succ_size() > 1) ? NumCondBranches : NumUncondBranches;
1211  Statistic &BranchTakenFreq =
1212  (MBB.succ_size() > 1) ? CondBranchTakenFreq : UncondBranchTakenFreq;
1213  for (MachineBasicBlock *Succ : MBB.successors()) {
1214  // Skip if this successor is a fallthrough.
1215  if (MBB.isLayoutSuccessor(Succ))
1216  continue;
1217 
1218  BlockFrequency EdgeFreq =
1219  BlockFreq * MBPI->getEdgeProbability(&MBB, Succ);
1220  ++NumBranches;
1221  BranchTakenFreq += EdgeFreq.getFrequency();
1222  }
1223  }
1224 
1225  return false;
1226 }
1227 
static cl::opt< unsigned > AlignAllBlock("align-all-blocks", cl::desc("Force the alignment of all ""blocks in the function."), cl::init(0), cl::Hidden)
const MachineFunction * getParent() const
getParent - Return the MachineFunction containing this basic block.
const_iterator end(StringRef path)
Get end iterator over path.
Definition: Path.cpp:240
bool AnalyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB, MachineBasicBlock *&FBB, SmallVectorImpl< MachineOperand > &Cond, bool AllowModify) const override
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
bool LLVM_ATTRIBUTE_UNUSED_RESULT empty() const
Definition: SmallPtrSet.h:78
STATISTIC(NumFunctions,"Total number of functions")
#define LLVM_DUMP_METHOD
Mark debug helper function definitions like dump() that should not be stripped from debug builds...
Definition: Compiler.h:344
int getNumber() const
getNumber - MachineBasicBlocks are uniquely numbered at the function level, unless they're not in a M...
size_type count(PtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
Definition: SmallPtrSet.h:276
void initializeMachineBlockPlacementStatsPass(PassRegistry &)
MachineBlockFrequencyInfo pass uses BlockFrequencyInfoImpl implementation to estimate machine basic b...
const_iterator begin(StringRef path)
Get begin iterator over path.
Definition: Path.cpp:232
A debug info location.
Definition: DebugLoc.h:34
F(f)
const Function * getFunction() const
getFunction - Return the LLVM function that this machine code represents
INITIALIZE_PASS_BEGIN(MachineBlockPlacement,"block-placement","Branch Probability Basic Block Placement", false, false) INITIALIZE_PASS_END(MachineBlockPlacement
static std::string getBlockNum(MachineBasicBlock *BB)
Helper to print the number of a MBB.
uint64_t getFrequency() const
Returns the frequency as a fixpoint number scaled by the entry frequency.
const std::vector< BlockT * > & getBlocks() const
getBlocks - Get a list of the basic blocks which make up this loop.
Definition: LoopInfo.h:139
char & MachineBlockPlacementStatsID
MachineBlockPlacementStats - This pass collects statistics about the basic block placement using bran...
BlockT * getHeader() const
Definition: LoopInfo.h:96
This file defines the MallocAllocator and BumpPtrAllocator interfaces.
iterator_range< succ_iterator > successors()
AnalysisUsage & addRequired()
#define INITIALIZE_PASS_DEPENDENCY(depName)
Definition: PassSupport.h:70
global merge
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
const HexagonInstrInfo * TII
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:75
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: APInt.h:33
block placement Basic Block Placement Stats
bool LLVM_ATTRIBUTE_UNUSED_RESULT empty() const
Definition: SmallVector.h:57
uint64_t rotate(uint64_t val, size_t shift)
Bitwise right rotate.
Definition: Hashing.h:171
TargetInstrInfo - Interface to description of machine instruction set.
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:325
friend const_iterator end(StringRef path)
Get end iterator over path.
Definition: Path.cpp:240
void dump(const SparseBitVector< ElementSize > &LHS, raw_ostream &out)
static cl::opt< unsigned > OutlineOptionalThreshold("outline-optional-threshold", cl::desc("Don't outline optional branches that are a single block with an ""instruction count below this threshold"), cl::init(4), cl::Hidden)
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
unsigned RemoveBranch(MachineBasicBlock &MBB) const override
bool ReverseBranchCondition(SmallVectorImpl< MachineOperand > &Cond) const override
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
Definition: SmallPtrSet.h:264
Represent the analysis usage information of a pass.
iterator_range< pred_iterator > predecessors()
iterator erase(iterator I)
Definition: SmallVector.h:455
void initializeMachineBlockPlacementPass(PassRegistry &)
This base class for TargetLowering contains the SelectionDAG-independent parts that can be used from ...
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements...
Definition: SmallPtrSet.h:299
block Branch Probability Basic Block Placement
virtual bool runOnMachineFunction(MachineFunction &MF)=0
runOnMachineFunction - This method must be overloaded to perform the desired machine code transformat...
friend const_iterator begin(StringRef path)
Get begin iterator over path.
Definition: Path.cpp:232
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small...
Definition: SmallVector.h:861
virtual const TargetLowering * getTargetLowering() const
void updateTerminator()
updateTerminator - Update the terminator instructions in block to account for changes to the layout...
bool isSuccessor(const MachineBasicBlock *MBB) const
isSuccessor - Return true if the specified MBB is a successor of this block.
block Branch Probability Basic Block static false std::string getBlockName(MachineBasicBlock *BB)
Helper to print the name of a MBB.
A BumpPtrAllocator that allows only elements of a specific type to be allocated.
Definition: Allocator.h:349
static cl::opt< AlignMode > Align(cl::desc("Load/store alignment support"), cl::Hidden, cl::init(NoStrictAlign), cl::values(clEnumValN(StrictAlign,"aarch64-strict-align","Disallow all unaligned memory accesses"), clEnumValN(NoStrictAlign,"aarch64-no-strict-align","Allow unaligned memory accesses"), clEnumValEnd))
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:123
StringRef getName() const
getName - Return the name of the corresponding LLVM basic block, or "(null)".
void setPreservesAll()
Set by analyses that do not transform their input at all.
block placement
block placement stats
unsigned getNumBlocks() const
getNumBlocks - Get the number of blocks in this loop in constant time.
Definition: LoopInfo.h:145
#define I(x, y, z)
Definition: MD5.cpp:54
char & MachineBlockPlacementID
MachineBlockPlacement - This pass places basic blocks based on branch probabilities.
unsigned InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB, MachineBasicBlock *FBB, ArrayRef< MachineOperand > Cond, DebugLoc DL) const override
static cl::opt< unsigned > ExitBlockBias("block-placement-exit-block-bias", cl::desc("Block frequency percentage a loop exit block needs ""over the original exit to be considered the new exit."), cl::init(0), cl::Hidden)
A raw_ostream that writes to an std::string.
Definition: raw_ostream.h:465
virtual const TargetInstrInfo * getInstrInfo() const
block Branch Probability Basic Block false
BasicBlockListType::iterator iterator
#define DEBUG(X)
Definition: Debug.h:92
static cl::opt< bool > OutlineOptionalBranches("outline-optional-branches", cl::desc("Put completely optional branches, i.e. branches with a common ""post dominator, out of line."), cl::init(false), cl::Hidden)
DominatorTree Class - Concrete subclass of DominatorTreeBase that is used to compute a normal dominat...
unsigned pred_size() const
This file describes how to lower LLVM code to machine code.