LLVM  4.0.0
BranchFolding.cpp
Go to the documentation of this file.
1 //===-- BranchFolding.cpp - Fold machine code branch instructions ---------===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This pass forwards branches to unconditional branches to make them branch
11 // directly to the target block. This pass often results in dead MBB's, which
12 // it then removes.
13 //
14 // Note that this pass must be run after register allocation, it cannot handle
15 // SSA form. It also must handle virtual registers for targets that emit virtual
16 // ISA (e.g. NVPTX).
17 //
18 //===----------------------------------------------------------------------===//
19 
20 #include "BranchFolding.h"
21 #include "llvm/ADT/STLExtras.h"
22 #include "llvm/ADT/SmallSet.h"
23 #include "llvm/ADT/Statistic.h"
24 #include "llvm/CodeGen/Analysis.h"
33 #include "llvm/CodeGen/Passes.h"
35 #include "llvm/IR/Function.h"
37 #include "llvm/Support/Debug.h"
43 #include <algorithm>
44 using namespace llvm;
45 
46 #define DEBUG_TYPE "branchfolding"
47 
48 STATISTIC(NumDeadBlocks, "Number of dead blocks removed");
49 STATISTIC(NumBranchOpts, "Number of branches optimized");
50 STATISTIC(NumTailMerge , "Number of block tails merged");
51 STATISTIC(NumHoist , "Number of times common instructions are hoisted");
52 
53 static cl::opt<cl::boolOrDefault> FlagEnableTailMerge("enable-tail-merge",
55 
56 // Throttle for huge numbers of predecessors (compile speed problems)
57 static cl::opt<unsigned>
58 TailMergeThreshold("tail-merge-threshold",
59  cl::desc("Max number of predecessors to consider tail merging"),
60  cl::init(150), cl::Hidden);
61 
62 // Heuristic for tail merging (and, inversely, tail duplication).
63 // TODO: This should be replaced with a target query.
64 static cl::opt<unsigned>
65 TailMergeSize("tail-merge-size",
66  cl::desc("Min number of instructions to consider tail merging"),
67  cl::init(3), cl::Hidden);
68 
69 namespace {
70  /// BranchFolderPass - Wrap branch folder in a machine function pass.
71  class BranchFolderPass : public MachineFunctionPass {
72  public:
73  static char ID;
74  explicit BranchFolderPass(): MachineFunctionPass(ID) {}
75 
76  bool runOnMachineFunction(MachineFunction &MF) override;
77 
78  void getAnalysisUsage(AnalysisUsage &AU) const override {
83  }
84  };
85 }
86 
87 char BranchFolderPass::ID = 0;
89 
90 INITIALIZE_PASS(BranchFolderPass, "branch-folder",
91  "Control Flow Optimizer", false, false)
92 
93 bool BranchFolderPass::runOnMachineFunction(MachineFunction &MF) {
94  if (skipFunction(*MF.getFunction()))
95  return false;
96 
97  TargetPassConfig *PassConfig = &getAnalysis<TargetPassConfig>();
98  // TailMerge can create jump into if branches that make CFG irreducible for
99  // HW that requires structurized CFG.
100  bool EnableTailMerge = !MF.getTarget().requiresStructuredCFG() &&
101  PassConfig->getEnableTailMerge();
102  BranchFolder::MBFIWrapper MBBFreqInfo(
103  getAnalysis<MachineBlockFrequencyInfo>());
104  BranchFolder Folder(EnableTailMerge, /*CommonHoist=*/true, MBBFreqInfo,
105  getAnalysis<MachineBranchProbabilityInfo>());
106  return Folder.OptimizeFunction(MF, MF.getSubtarget().getInstrInfo(),
107  MF.getSubtarget().getRegisterInfo(),
108  getAnalysisIfAvailable<MachineModuleInfo>());
109 }
110 
111 BranchFolder::BranchFolder(bool defaultEnableTailMerge, bool CommonHoist,
112  MBFIWrapper &FreqInfo,
113  const MachineBranchProbabilityInfo &ProbInfo,
114  unsigned MinTailLength)
115  : EnableHoistCommonCode(CommonHoist), MinCommonTailLength(MinTailLength),
116  MBBFreqInfo(FreqInfo), MBPI(ProbInfo) {
117  if (MinCommonTailLength == 0)
118  MinCommonTailLength = TailMergeSize;
119  switch (FlagEnableTailMerge) {
120  case cl::BOU_UNSET: EnableTailMerge = defaultEnableTailMerge; break;
121  case cl::BOU_TRUE: EnableTailMerge = true; break;
122  case cl::BOU_FALSE: EnableTailMerge = false; break;
123  }
124 }
125 
126 /// RemoveDeadBlock - Remove the specified dead machine basic block from the
127 /// function, updating the CFG.
128 void BranchFolder::RemoveDeadBlock(MachineBasicBlock *MBB) {
129  assert(MBB->pred_empty() && "MBB must be dead!");
130  DEBUG(dbgs() << "\nRemoving MBB: " << *MBB);
131 
132  MachineFunction *MF = MBB->getParent();
133  // drop all successors.
134  while (!MBB->succ_empty())
135  MBB->removeSuccessor(MBB->succ_end()-1);
136 
137  // Avoid matching if this pointer gets reused.
138  TriedMerging.erase(MBB);
139 
140  // Remove the block.
141  MF->erase(MBB);
142  FuncletMembership.erase(MBB);
143  if (MLI)
144  MLI->removeBlock(MBB);
145 }
146 
147 /// OptimizeFunction - Perhaps branch folding, tail merging and other
148 /// CFG optimizations on the given function. Block placement changes the layout
149 /// and may create new tail merging opportunities.
151  const TargetInstrInfo *tii,
152  const TargetRegisterInfo *tri,
153  MachineModuleInfo *mmi,
154  MachineLoopInfo *mli, bool AfterPlacement) {
155  if (!tii) return false;
156 
157  TriedMerging.clear();
158 
159  AfterBlockPlacement = AfterPlacement;
160  TII = tii;
161  TRI = tri;
162  MMI = mmi;
163  MLI = mli;
164 
166  UpdateLiveIns = MRI.tracksLiveness() && TRI->trackLivenessAfterRegAlloc(MF);
167  if (!UpdateLiveIns)
168  MRI.invalidateLiveness();
169 
170  // Fix CFG. The later algorithms expect it to be right.
171  bool MadeChange = false;
172  for (MachineBasicBlock &MBB : MF) {
173  MachineBasicBlock *TBB = nullptr, *FBB = nullptr;
175  if (!TII->analyzeBranch(MBB, TBB, FBB, Cond, true))
176  MadeChange |= MBB.CorrectExtraCFGEdges(TBB, FBB, !Cond.empty());
177  }
178 
179  // Recalculate funclet membership.
180  FuncletMembership = getFuncletMembership(MF);
181 
182  bool MadeChangeThisIteration = true;
183  while (MadeChangeThisIteration) {
184  MadeChangeThisIteration = TailMergeBlocks(MF);
185  // No need to clean up if tail merging does not change anything after the
186  // block placement.
187  if (!AfterBlockPlacement || MadeChangeThisIteration)
188  MadeChangeThisIteration |= OptimizeBranches(MF);
189  if (EnableHoistCommonCode)
190  MadeChangeThisIteration |= HoistCommonCode(MF);
191  MadeChange |= MadeChangeThisIteration;
192  }
193 
194  // See if any jump tables have become dead as the code generator
195  // did its thing.
196  MachineJumpTableInfo *JTI = MF.getJumpTableInfo();
197  if (!JTI)
198  return MadeChange;
199 
200  // Walk the function to find jump tables that are live.
201  BitVector JTIsLive(JTI->getJumpTables().size());
202  for (const MachineBasicBlock &BB : MF) {
203  for (const MachineInstr &I : BB)
204  for (const MachineOperand &Op : I.operands()) {
205  if (!Op.isJTI()) continue;
206 
207  // Remember that this JT is live.
208  JTIsLive.set(Op.getIndex());
209  }
210  }
211 
212  // Finally, remove dead jump tables. This happens when the
213  // indirect jump was unreachable (and thus deleted).
214  for (unsigned i = 0, e = JTIsLive.size(); i != e; ++i)
215  if (!JTIsLive.test(i)) {
216  JTI->RemoveJumpTable(i);
217  MadeChange = true;
218  }
219 
220  return MadeChange;
221 }
222 
223 //===----------------------------------------------------------------------===//
224 // Tail Merging of Blocks
225 //===----------------------------------------------------------------------===//
226 
227 /// HashMachineInstr - Compute a hash value for MI and its operands.
228 static unsigned HashMachineInstr(const MachineInstr &MI) {
229  unsigned Hash = MI.getOpcode();
230  for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
231  const MachineOperand &Op = MI.getOperand(i);
232 
233  // Merge in bits from the operand if easy. We can't use MachineOperand's
234  // hash_code here because it's not deterministic and we sort by hash value
235  // later.
236  unsigned OperandHash = 0;
237  switch (Op.getType()) {
239  OperandHash = Op.getReg();
240  break;
242  OperandHash = Op.getImm();
243  break;
245  OperandHash = Op.getMBB()->getNumber();
246  break;
250  OperandHash = Op.getIndex();
251  break;
254  // Global address / external symbol are too hard, don't bother, but do
255  // pull in the offset.
256  OperandHash = Op.getOffset();
257  break;
258  default:
259  break;
260  }
261 
262  Hash += ((OperandHash << 3) | Op.getType()) << (i & 31);
263  }
264  return Hash;
265 }
266 
267 /// HashEndOfMBB - Hash the last instruction in the MBB.
268 static unsigned HashEndOfMBB(const MachineBasicBlock &MBB) {
270  if (I == MBB.end())
271  return 0;
272 
273  return HashMachineInstr(*I);
274 }
275 
276 /// ComputeCommonTailLength - Given two machine basic blocks, compute the number
277 /// of instructions they actually have in common together at their end. Return
278 /// iterators for the first shared instruction in each block.
280  MachineBasicBlock *MBB2,
283  I1 = MBB1->end();
284  I2 = MBB2->end();
285 
286  unsigned TailLen = 0;
287  while (I1 != MBB1->begin() && I2 != MBB2->begin()) {
288  --I1; --I2;
289  // Skip debugging pseudos; necessary to avoid changing the code.
290  while (I1->isDebugValue()) {
291  if (I1==MBB1->begin()) {
292  while (I2->isDebugValue()) {
293  if (I2==MBB2->begin())
294  // I1==DBG at begin; I2==DBG at begin
295  return TailLen;
296  --I2;
297  }
298  ++I2;
299  // I1==DBG at begin; I2==non-DBG, or first of DBGs not at begin
300  return TailLen;
301  }
302  --I1;
303  }
304  // I1==first (untested) non-DBG preceding known match
305  while (I2->isDebugValue()) {
306  if (I2==MBB2->begin()) {
307  ++I1;
308  // I1==non-DBG, or first of DBGs not at begin; I2==DBG at begin
309  return TailLen;
310  }
311  --I2;
312  }
313  // I1, I2==first (untested) non-DBGs preceding known match
314  if (!I1->isIdenticalTo(*I2) ||
315  // FIXME: This check is dubious. It's used to get around a problem where
316  // people incorrectly expect inline asm directives to remain in the same
317  // relative order. This is untenable because normal compiler
318  // optimizations (like this one) may reorder and/or merge these
319  // directives.
320  I1->isInlineAsm()) {
321  ++I1; ++I2;
322  break;
323  }
324  ++TailLen;
325  }
326  // Back past possible debugging pseudos at beginning of block. This matters
327  // when one block differs from the other only by whether debugging pseudos
328  // are present at the beginning. (This way, the various checks later for
329  // I1==MBB1->begin() work as expected.)
330  if (I1 == MBB1->begin() && I2 != MBB2->begin()) {
331  --I2;
332  while (I2->isDebugValue()) {
333  if (I2 == MBB2->begin())
334  return TailLen;
335  --I2;
336  }
337  ++I2;
338  }
339  if (I2 == MBB2->begin() && I1 != MBB1->begin()) {
340  --I1;
341  while (I1->isDebugValue()) {
342  if (I1 == MBB1->begin())
343  return TailLen;
344  --I1;
345  }
346  ++I1;
347  }
348  return TailLen;
349 }
350 
351 /// ReplaceTailWithBranchTo - Delete the instruction OldInst and everything
352 /// after it, replacing it with an unconditional branch to NewDest.
353 void BranchFolder::ReplaceTailWithBranchTo(MachineBasicBlock::iterator OldInst,
354  MachineBasicBlock *NewDest) {
355  TII->ReplaceTailWithBranchTo(OldInst, NewDest);
356 
357  if (UpdateLiveIns) {
358  NewDest->clearLiveIns();
359  computeLiveIns(LiveRegs, *TRI, *NewDest);
360  }
361 
362  ++NumTailMerge;
363 }
364 
365 /// SplitMBBAt - Given a machine basic block and an iterator into it, split the
366 /// MBB so that the part before the iterator falls into the part starting at the
367 /// iterator. This returns the new MBB.
368 MachineBasicBlock *BranchFolder::SplitMBBAt(MachineBasicBlock &CurMBB,
370  const BasicBlock *BB) {
371  if (!TII->isLegalToSplitMBBAt(CurMBB, BBI1))
372  return nullptr;
373 
374  MachineFunction &MF = *CurMBB.getParent();
375 
376  // Create the fall-through block.
377  MachineFunction::iterator MBBI = CurMBB.getIterator();
379  CurMBB.getParent()->insert(++MBBI, NewMBB);
380 
381  // Move all the successors of this block to the specified block.
382  NewMBB->transferSuccessors(&CurMBB);
383 
384  // Add an edge from CurMBB to NewMBB for the fall-through.
385  CurMBB.addSuccessor(NewMBB);
386 
387  // Splice the code over.
388  NewMBB->splice(NewMBB->end(), &CurMBB, BBI1, CurMBB.end());
389 
390  // NewMBB belongs to the same loop as CurMBB.
391  if (MLI)
392  if (MachineLoop *ML = MLI->getLoopFor(&CurMBB))
393  ML->addBasicBlockToLoop(NewMBB, MLI->getBase());
394 
395  // NewMBB inherits CurMBB's block frequency.
396  MBBFreqInfo.setBlockFreq(NewMBB, MBBFreqInfo.getBlockFreq(&CurMBB));
397 
398  if (UpdateLiveIns)
399  computeLiveIns(LiveRegs, *TRI, *NewMBB);
400 
401  // Add the new block to the funclet.
402  const auto &FuncletI = FuncletMembership.find(&CurMBB);
403  if (FuncletI != FuncletMembership.end()) {
404  auto n = FuncletI->second;
405  FuncletMembership[NewMBB] = n;
406  }
407 
408  return NewMBB;
409 }
410 
411 /// EstimateRuntime - Make a rough estimate for how long it will take to run
412 /// the specified code.
415  unsigned Time = 0;
416  for (; I != E; ++I) {
417  if (I->isDebugValue())
418  continue;
419  if (I->isCall())
420  Time += 10;
421  else if (I->mayLoad() || I->mayStore())
422  Time += 2;
423  else
424  ++Time;
425  }
426  return Time;
427 }
428 
429 // CurMBB needs to add an unconditional branch to SuccMBB (we removed these
430 // branches temporarily for tail merging). In the case where CurMBB ends
431 // with a conditional branch to the next block, optimize by reversing the
432 // test and conditionally branching to SuccMBB instead.
433 static void FixTail(MachineBasicBlock *CurMBB, MachineBasicBlock *SuccBB,
434  const TargetInstrInfo *TII) {
435  MachineFunction *MF = CurMBB->getParent();
437  MachineBasicBlock *TBB = nullptr, *FBB = nullptr;
439  DebugLoc dl; // FIXME: this is nowhere
440  if (I != MF->end() && !TII->analyzeBranch(*CurMBB, TBB, FBB, Cond, true)) {
441  MachineBasicBlock *NextBB = &*I;
442  if (TBB == NextBB && !Cond.empty() && !FBB) {
443  if (!TII->reverseBranchCondition(Cond)) {
444  TII->removeBranch(*CurMBB);
445  TII->insertBranch(*CurMBB, SuccBB, nullptr, Cond, dl);
446  return;
447  }
448  }
449  }
450  TII->insertBranch(*CurMBB, SuccBB, nullptr,
452 }
453 
454 bool
455 BranchFolder::MergePotentialsElt::operator<(const MergePotentialsElt &o) const {
456  if (getHash() < o.getHash())
457  return true;
458  if (getHash() > o.getHash())
459  return false;
460  if (getBlock()->getNumber() < o.getBlock()->getNumber())
461  return true;
462  if (getBlock()->getNumber() > o.getBlock()->getNumber())
463  return false;
464  // _GLIBCXX_DEBUG checks strict weak ordering, which involves comparing
465  // an object with itself.
466 #ifndef _GLIBCXX_DEBUG
467  llvm_unreachable("Predecessor appears twice");
468 #else
469  return false;
470 #endif
471 }
472 
475  auto I = MergedBBFreq.find(MBB);
476 
477  if (I != MergedBBFreq.end())
478  return I->second;
479 
480  return MBFI.getBlockFreq(MBB);
481 }
482 
484  BlockFrequency F) {
485  MergedBBFreq[MBB] = F;
486 }
487 
488 raw_ostream &
490  const MachineBasicBlock *MBB) const {
491  return MBFI.printBlockFreq(OS, getBlockFreq(MBB));
492 }
493 
494 raw_ostream &
496  const BlockFrequency Freq) const {
497  return MBFI.printBlockFreq(OS, Freq);
498 }
499 
500 /// CountTerminators - Count the number of terminators in the given
501 /// block and set I to the position of the first non-terminator, if there
502 /// is one, or MBB->end() otherwise.
503 static unsigned CountTerminators(MachineBasicBlock *MBB,
505  I = MBB->end();
506  unsigned NumTerms = 0;
507  for (;;) {
508  if (I == MBB->begin()) {
509  I = MBB->end();
510  break;
511  }
512  --I;
513  if (!I->isTerminator()) break;
514  ++NumTerms;
515  }
516  return NumTerms;
517 }
518 
519 /// ProfitableToMerge - Check if two machine basic blocks have a common tail
520 /// and decide if it would be profitable to merge those tails. Return the
521 /// length of the common tail and iterators to the first common instruction
522 /// in each block.
523 /// MBB1, MBB2 The blocks to check
524 /// MinCommonTailLength Minimum size of tail block to be merged.
525 /// CommonTailLen Out parameter to record the size of the shared tail between
526 /// MBB1 and MBB2
527 /// I1, I2 Iterator references that will be changed to point to the first
528 /// instruction in the common tail shared by MBB1,MBB2
529 /// SuccBB A common successor of MBB1, MBB2 which are in a canonical form
530 /// relative to SuccBB
531 /// PredBB The layout predecessor of SuccBB, if any.
532 /// FuncletMembership map from block to funclet #.
533 /// AfterPlacement True if we are merging blocks after layout. Stricter
534 /// thresholds apply to prevent undoing tail-duplication.
535 static bool
537  unsigned MinCommonTailLength, unsigned &CommonTailLen,
540  MachineBasicBlock *PredBB,
541  DenseMap<const MachineBasicBlock *, int> &FuncletMembership,
542  bool AfterPlacement) {
543  // It is never profitable to tail-merge blocks from two different funclets.
544  if (!FuncletMembership.empty()) {
545  auto Funclet1 = FuncletMembership.find(MBB1);
546  assert(Funclet1 != FuncletMembership.end());
547  auto Funclet2 = FuncletMembership.find(MBB2);
548  assert(Funclet2 != FuncletMembership.end());
549  if (Funclet1->second != Funclet2->second)
550  return false;
551  }
552 
553  CommonTailLen = ComputeCommonTailLength(MBB1, MBB2, I1, I2);
554  if (CommonTailLen == 0)
555  return false;
556  DEBUG(dbgs() << "Common tail length of BB#" << MBB1->getNumber()
557  << " and BB#" << MBB2->getNumber() << " is " << CommonTailLen
558  << '\n');
559 
560  // It's almost always profitable to merge any number of non-terminator
561  // instructions with the block that falls through into the common successor.
562  // This is true only for a single successor. For multiple successors, we are
563  // trading a conditional branch for an unconditional one.
564  // TODO: Re-visit successor size for non-layout tail merging.
565  if ((MBB1 == PredBB || MBB2 == PredBB) &&
566  (!AfterPlacement || MBB1->succ_size() == 1)) {
568  unsigned NumTerms = CountTerminators(MBB1 == PredBB ? MBB2 : MBB1, I);
569  if (CommonTailLen > NumTerms)
570  return true;
571  }
572 
573  // If one of the blocks can be completely merged and happens to be in
574  // a position where the other could fall through into it, merge any number
575  // of instructions, because it can be done without a branch.
576  // TODO: If the blocks are not adjacent, move one of them so that they are?
577  if (MBB1->isLayoutSuccessor(MBB2) && I2 == MBB2->begin())
578  return true;
579  if (MBB2->isLayoutSuccessor(MBB1) && I1 == MBB1->begin())
580  return true;
581 
582  // If both blocks have an unconditional branch temporarily stripped out,
583  // count that as an additional common instruction for the following
584  // heuristics. This heuristic is only accurate for single-succ blocks, so to
585  // make sure that during layout merging and duplicating don't crash, we check
586  // for that when merging during layout.
587  unsigned EffectiveTailLen = CommonTailLen;
588  if (SuccBB && MBB1 != PredBB && MBB2 != PredBB &&
589  (MBB1->succ_size() == 1 || !AfterPlacement) &&
590  !MBB1->back().isBarrier() &&
591  !MBB2->back().isBarrier())
592  ++EffectiveTailLen;
593 
594  // Check if the common tail is long enough to be worthwhile.
595  if (EffectiveTailLen >= MinCommonTailLength)
596  return true;
597 
598  // If we are optimizing for code size, 2 instructions in common is enough if
599  // we don't have to split a block. At worst we will be introducing 1 new
600  // branch instruction, which is likely to be smaller than the 2
601  // instructions that would be deleted in the merge.
602  MachineFunction *MF = MBB1->getParent();
603  return EffectiveTailLen >= 2 && MF->getFunction()->optForSize() &&
604  (I1 == MBB1->begin() || I2 == MBB2->begin());
605 }
606 
607 /// ComputeSameTails - Look through all the blocks in MergePotentials that have
608 /// hash CurHash (guaranteed to match the last element). Build the vector
609 /// SameTails of all those that have the (same) largest number of instructions
610 /// in common of any pair of these blocks. SameTails entries contain an
611 /// iterator into MergePotentials (from which the MachineBasicBlock can be
612 /// found) and a MachineBasicBlock::iterator into that MBB indicating the
613 /// instruction where the matching code sequence begins.
614 /// Order of elements in SameTails is the reverse of the order in which
615 /// those blocks appear in MergePotentials (where they are not necessarily
616 /// consecutive).
617 unsigned BranchFolder::ComputeSameTails(unsigned CurHash,
618  unsigned MinCommonTailLength,
619  MachineBasicBlock *SuccBB,
620  MachineBasicBlock *PredBB) {
621  unsigned maxCommonTailLength = 0U;
622  SameTails.clear();
623  MachineBasicBlock::iterator TrialBBI1, TrialBBI2;
624  MPIterator HighestMPIter = std::prev(MergePotentials.end());
625  for (MPIterator CurMPIter = std::prev(MergePotentials.end()),
626  B = MergePotentials.begin();
627  CurMPIter != B && CurMPIter->getHash() == CurHash; --CurMPIter) {
628  for (MPIterator I = std::prev(CurMPIter); I->getHash() == CurHash; --I) {
629  unsigned CommonTailLen;
630  if (ProfitableToMerge(CurMPIter->getBlock(), I->getBlock(),
631  MinCommonTailLength,
632  CommonTailLen, TrialBBI1, TrialBBI2,
633  SuccBB, PredBB,
634  FuncletMembership,
635  AfterBlockPlacement)) {
636  if (CommonTailLen > maxCommonTailLength) {
637  SameTails.clear();
638  maxCommonTailLength = CommonTailLen;
639  HighestMPIter = CurMPIter;
640  SameTails.push_back(SameTailElt(CurMPIter, TrialBBI1));
641  }
642  if (HighestMPIter == CurMPIter &&
643  CommonTailLen == maxCommonTailLength)
644  SameTails.push_back(SameTailElt(I, TrialBBI2));
645  }
646  if (I == B)
647  break;
648  }
649  }
650  return maxCommonTailLength;
651 }
652 
653 /// RemoveBlocksWithHash - Remove all blocks with hash CurHash from
654 /// MergePotentials, restoring branches at ends of blocks as appropriate.
655 void BranchFolder::RemoveBlocksWithHash(unsigned CurHash,
656  MachineBasicBlock *SuccBB,
657  MachineBasicBlock *PredBB) {
658  MPIterator CurMPIter, B;
659  for (CurMPIter = std::prev(MergePotentials.end()),
660  B = MergePotentials.begin();
661  CurMPIter->getHash() == CurHash; --CurMPIter) {
662  // Put the unconditional branch back, if we need one.
663  MachineBasicBlock *CurMBB = CurMPIter->getBlock();
664  if (SuccBB && CurMBB != PredBB)
665  FixTail(CurMBB, SuccBB, TII);
666  if (CurMPIter == B)
667  break;
668  }
669  if (CurMPIter->getHash() != CurHash)
670  CurMPIter++;
671  MergePotentials.erase(CurMPIter, MergePotentials.end());
672 }
673 
674 /// CreateCommonTailOnlyBlock - None of the blocks to be tail-merged consist
675 /// only of the common tail. Create a block that does by splitting one.
676 bool BranchFolder::CreateCommonTailOnlyBlock(MachineBasicBlock *&PredBB,
677  MachineBasicBlock *SuccBB,
678  unsigned maxCommonTailLength,
679  unsigned &commonTailIndex) {
680  commonTailIndex = 0;
681  unsigned TimeEstimate = ~0U;
682  for (unsigned i = 0, e = SameTails.size(); i != e; ++i) {
683  // Use PredBB if possible; that doesn't require a new branch.
684  if (SameTails[i].getBlock() == PredBB) {
685  commonTailIndex = i;
686  break;
687  }
688  // Otherwise, make a (fairly bogus) choice based on estimate of
689  // how long it will take the various blocks to execute.
690  unsigned t = EstimateRuntime(SameTails[i].getBlock()->begin(),
691  SameTails[i].getTailStartPos());
692  if (t <= TimeEstimate) {
693  TimeEstimate = t;
694  commonTailIndex = i;
695  }
696  }
697 
699  SameTails[commonTailIndex].getTailStartPos();
700  MachineBasicBlock *MBB = SameTails[commonTailIndex].getBlock();
701 
702  DEBUG(dbgs() << "\nSplitting BB#" << MBB->getNumber() << ", size "
703  << maxCommonTailLength);
704 
705  // If the split block unconditionally falls-thru to SuccBB, it will be
706  // merged. In control flow terms it should then take SuccBB's name. e.g. If
707  // SuccBB is an inner loop, the common tail is still part of the inner loop.
708  const BasicBlock *BB = (SuccBB && MBB->succ_size() == 1) ?
709  SuccBB->getBasicBlock() : MBB->getBasicBlock();
710  MachineBasicBlock *newMBB = SplitMBBAt(*MBB, BBI, BB);
711  if (!newMBB) {
712  DEBUG(dbgs() << "... failed!");
713  return false;
714  }
715 
716  SameTails[commonTailIndex].setBlock(newMBB);
717  SameTails[commonTailIndex].setTailStartPos(newMBB->begin());
718 
719  // If we split PredBB, newMBB is the new predecessor.
720  if (PredBB == MBB)
721  PredBB = newMBB;
722 
723  return true;
724 }
725 
726 static void
728  MachineBasicBlock &MBBCommon) {
729  MachineBasicBlock *MBB = MBBIStartPos->getParent();
730  // Note CommonTailLen does not necessarily matches the size of
731  // the common BB nor all its instructions because of debug
732  // instructions differences.
733  unsigned CommonTailLen = 0;
734  for (auto E = MBB->end(); MBBIStartPos != E; ++MBBIStartPos)
735  ++CommonTailLen;
736 
739  MachineBasicBlock::reverse_iterator MBBICommon = MBBCommon.rbegin();
740  MachineBasicBlock::reverse_iterator MBBIECommon = MBBCommon.rend();
741 
742  while (CommonTailLen--) {
743  assert(MBBI != MBBIE && "Reached BB end within common tail length!");
744  (void)MBBIE;
745 
746  if (MBBI->isDebugValue()) {
747  ++MBBI;
748  continue;
749  }
750 
751  while ((MBBICommon != MBBIECommon) && MBBICommon->isDebugValue())
752  ++MBBICommon;
753 
754  assert(MBBICommon != MBBIECommon &&
755  "Reached BB end within common tail length!");
756  assert(MBBICommon->isIdenticalTo(*MBBI) && "Expected matching MIIs!");
757 
758  // Merge MMOs from memory operations in the common block.
759  if (MBBICommon->mayLoad() || MBBICommon->mayStore())
760  MBBICommon->setMemRefs(MBBICommon->mergeMemRefsWith(*MBBI));
761  // Drop undef flags if they aren't present in all merged instructions.
762  for (unsigned I = 0, E = MBBICommon->getNumOperands(); I != E; ++I) {
763  MachineOperand &MO = MBBICommon->getOperand(I);
764  if (MO.isReg() && MO.isUndef()) {
765  const MachineOperand &OtherMO = MBBI->getOperand(I);
766  if (!OtherMO.isUndef())
767  MO.setIsUndef(false);
768  }
769  }
770 
771  ++MBBI;
772  ++MBBICommon;
773  }
774 }
775 
776 // See if any of the blocks in MergePotentials (which all have SuccBB as a
777 // successor, or all have no successor if it is null) can be tail-merged.
778 // If there is a successor, any blocks in MergePotentials that are not
779 // tail-merged and are not immediately before Succ must have an unconditional
780 // branch to Succ added (but the predecessor/successor lists need no
781 // adjustment). The lone predecessor of Succ that falls through into Succ,
782 // if any, is given in PredBB.
783 // MinCommonTailLength - Except for the special cases below, tail-merge if
784 // there are at least this many instructions in common.
785 bool BranchFolder::TryTailMergeBlocks(MachineBasicBlock *SuccBB,
786  MachineBasicBlock *PredBB,
787  unsigned MinCommonTailLength) {
788  bool MadeChange = false;
789 
790  DEBUG(dbgs() << "\nTryTailMergeBlocks: ";
791  for (unsigned i = 0, e = MergePotentials.size(); i != e; ++i)
792  dbgs() << "BB#" << MergePotentials[i].getBlock()->getNumber()
793  << (i == e-1 ? "" : ", ");
794  dbgs() << "\n";
795  if (SuccBB) {
796  dbgs() << " with successor BB#" << SuccBB->getNumber() << '\n';
797  if (PredBB)
798  dbgs() << " which has fall-through from BB#"
799  << PredBB->getNumber() << "\n";
800  }
801  dbgs() << "Looking for common tails of at least "
802  << MinCommonTailLength << " instruction"
803  << (MinCommonTailLength == 1 ? "" : "s") << '\n';
804  );
805 
806  // Sort by hash value so that blocks with identical end sequences sort
807  // together.
808  array_pod_sort(MergePotentials.begin(), MergePotentials.end());
809 
810  // Walk through equivalence sets looking for actual exact matches.
811  while (MergePotentials.size() > 1) {
812  unsigned CurHash = MergePotentials.back().getHash();
813 
814  // Build SameTails, identifying the set of blocks with this hash code
815  // and with the maximum number of instructions in common.
816  unsigned maxCommonTailLength = ComputeSameTails(CurHash,
817  MinCommonTailLength,
818  SuccBB, PredBB);
819 
820  // If we didn't find any pair that has at least MinCommonTailLength
821  // instructions in common, remove all blocks with this hash code and retry.
822  if (SameTails.empty()) {
823  RemoveBlocksWithHash(CurHash, SuccBB, PredBB);
824  continue;
825  }
826 
827  // If one of the blocks is the entire common tail (and not the entry
828  // block, which we can't jump to), we can treat all blocks with this same
829  // tail at once. Use PredBB if that is one of the possibilities, as that
830  // will not introduce any extra branches.
831  MachineBasicBlock *EntryBB =
832  &MergePotentials.front().getBlock()->getParent()->front();
833  unsigned commonTailIndex = SameTails.size();
834  // If there are two blocks, check to see if one can be made to fall through
835  // into the other.
836  if (SameTails.size() == 2 &&
837  SameTails[0].getBlock()->isLayoutSuccessor(SameTails[1].getBlock()) &&
838  SameTails[1].tailIsWholeBlock())
839  commonTailIndex = 1;
840  else if (SameTails.size() == 2 &&
841  SameTails[1].getBlock()->isLayoutSuccessor(
842  SameTails[0].getBlock()) &&
843  SameTails[0].tailIsWholeBlock())
844  commonTailIndex = 0;
845  else {
846  // Otherwise just pick one, favoring the fall-through predecessor if
847  // there is one.
848  for (unsigned i = 0, e = SameTails.size(); i != e; ++i) {
849  MachineBasicBlock *MBB = SameTails[i].getBlock();
850  if (MBB == EntryBB && SameTails[i].tailIsWholeBlock())
851  continue;
852  if (MBB == PredBB) {
853  commonTailIndex = i;
854  break;
855  }
856  if (SameTails[i].tailIsWholeBlock())
857  commonTailIndex = i;
858  }
859  }
860 
861  if (commonTailIndex == SameTails.size() ||
862  (SameTails[commonTailIndex].getBlock() == PredBB &&
863  !SameTails[commonTailIndex].tailIsWholeBlock())) {
864  // None of the blocks consist entirely of the common tail.
865  // Split a block so that one does.
866  if (!CreateCommonTailOnlyBlock(PredBB, SuccBB,
867  maxCommonTailLength, commonTailIndex)) {
868  RemoveBlocksWithHash(CurHash, SuccBB, PredBB);
869  continue;
870  }
871  }
872 
873  MachineBasicBlock *MBB = SameTails[commonTailIndex].getBlock();
874 
875  // Recompute common tail MBB's edge weights and block frequency.
876  setCommonTailEdgeWeights(*MBB);
877 
878  // Remove the original debug location from the common tail.
879  for (auto &MI : *MBB)
880  if (!MI.isDebugValue())
881  MI.setDebugLoc(DebugLoc());
882 
883  // MBB is common tail. Adjust all other BB's to jump to this one.
884  // Traversal must be forwards so erases work.
885  DEBUG(dbgs() << "\nUsing common tail in BB#" << MBB->getNumber()
886  << " for ");
887  for (unsigned int i=0, e = SameTails.size(); i != e; ++i) {
888  if (commonTailIndex == i)
889  continue;
890  DEBUG(dbgs() << "BB#" << SameTails[i].getBlock()->getNumber()
891  << (i == e-1 ? "" : ", "));
892  // Merge operations (MMOs, undef flags)
893  mergeOperations(SameTails[i].getTailStartPos(), *MBB);
894  // Hack the end off BB i, making it jump to BB commonTailIndex instead.
895  ReplaceTailWithBranchTo(SameTails[i].getTailStartPos(), MBB);
896  // BB i is no longer a predecessor of SuccBB; remove it from the worklist.
897  MergePotentials.erase(SameTails[i].getMPIter());
898  }
899  DEBUG(dbgs() << "\n");
900  // We leave commonTailIndex in the worklist in case there are other blocks
901  // that match it with a smaller number of instructions.
902  MadeChange = true;
903  }
904  return MadeChange;
905 }
906 
907 bool BranchFolder::TailMergeBlocks(MachineFunction &MF) {
908  bool MadeChange = false;
909  if (!EnableTailMerge) return MadeChange;
910 
911  // First find blocks with no successors.
912  // Block placement does not create new tail merging opportunities for these
913  // blocks.
914  if (!AfterBlockPlacement) {
915  MergePotentials.clear();
916  for (MachineBasicBlock &MBB : MF) {
917  if (MergePotentials.size() == TailMergeThreshold)
918  break;
919  if (!TriedMerging.count(&MBB) && MBB.succ_empty())
920  MergePotentials.push_back(MergePotentialsElt(HashEndOfMBB(MBB), &MBB));
921  }
922 
923  // If this is a large problem, avoid visiting the same basic blocks
924  // multiple times.
925  if (MergePotentials.size() == TailMergeThreshold)
926  for (unsigned i = 0, e = MergePotentials.size(); i != e; ++i)
927  TriedMerging.insert(MergePotentials[i].getBlock());
928 
929  // See if we can do any tail merging on those.
930  if (MergePotentials.size() >= 2)
931  MadeChange |= TryTailMergeBlocks(nullptr, nullptr, MinCommonTailLength);
932  }
933 
934  // Look at blocks (IBB) with multiple predecessors (PBB).
935  // We change each predecessor to a canonical form, by
936  // (1) temporarily removing any unconditional branch from the predecessor
937  // to IBB, and
938  // (2) alter conditional branches so they branch to the other block
939  // not IBB; this may require adding back an unconditional branch to IBB
940  // later, where there wasn't one coming in. E.g.
941  // Bcc IBB
942  // fallthrough to QBB
943  // here becomes
944  // Bncc QBB
945  // with a conceptual B to IBB after that, which never actually exists.
946  // With those changes, we see whether the predecessors' tails match,
947  // and merge them if so. We change things out of canonical form and
948  // back to the way they were later in the process. (OptimizeBranches
949  // would undo some of this, but we can't use it, because we'd get into
950  // a compile-time infinite loop repeatedly doing and undoing the same
951  // transformations.)
952 
953  for (MachineFunction::iterator I = std::next(MF.begin()), E = MF.end();
954  I != E; ++I) {
955  if (I->pred_size() < 2) continue;
957  MachineBasicBlock *IBB = &*I;
958  MachineBasicBlock *PredBB = &*std::prev(I);
959  MergePotentials.clear();
960  MachineLoop *ML;
961 
962  // Bail if merging after placement and IBB is the loop header because
963  // -- If merging predecessors that belong to the same loop as IBB, the
964  // common tail of merged predecessors may become the loop top if block
965  // placement is called again and the predecessors may branch to this common
966  // tail and require more branches. This can be relaxed if
967  // MachineBlockPlacement::findBestLoopTop is more flexible.
968  // --If merging predecessors that do not belong to the same loop as IBB, the
969  // loop info of IBB's loop and the other loops may be affected. Calling the
970  // block placement again may make big change to the layout and eliminate the
971  // reason to do tail merging here.
972  if (AfterBlockPlacement && MLI) {
973  ML = MLI->getLoopFor(IBB);
974  if (ML && IBB == ML->getHeader())
975  continue;
976  }
977 
978  for (MachineBasicBlock *PBB : I->predecessors()) {
979  if (MergePotentials.size() == TailMergeThreshold)
980  break;
981 
982  if (TriedMerging.count(PBB))
983  continue;
984 
985  // Skip blocks that loop to themselves, can't tail merge these.
986  if (PBB == IBB)
987  continue;
988 
989  // Visit each predecessor only once.
990  if (!UniquePreds.insert(PBB).second)
991  continue;
992 
993  // Skip blocks which may jump to a landing pad. Can't tail merge these.
994  if (PBB->hasEHPadSuccessor())
995  continue;
996 
997  // After block placement, only consider predecessors that belong to the
998  // same loop as IBB. The reason is the same as above when skipping loop
999  // header.
1000  if (AfterBlockPlacement && MLI)
1001  if (ML != MLI->getLoopFor(PBB))
1002  continue;
1003 
1004  MachineBasicBlock *TBB = nullptr, *FBB = nullptr;
1006  if (!TII->analyzeBranch(*PBB, TBB, FBB, Cond, true)) {
1007  // Failing case: IBB is the target of a cbr, and we cannot reverse the
1008  // branch.
1009  SmallVector<MachineOperand, 4> NewCond(Cond);
1010  if (!Cond.empty() && TBB == IBB) {
1011  if (TII->reverseBranchCondition(NewCond))
1012  continue;
1013  // This is the QBB case described above
1014  if (!FBB) {
1015  auto Next = ++PBB->getIterator();
1016  if (Next != MF.end())
1017  FBB = &*Next;
1018  }
1019  }
1020 
1021  // Failing case: the only way IBB can be reached from PBB is via
1022  // exception handling. Happens for landing pads. Would be nice to have
1023  // a bit in the edge so we didn't have to do all this.
1024  if (IBB->isEHPad()) {
1025  MachineFunction::iterator IP = ++PBB->getIterator();
1026  MachineBasicBlock *PredNextBB = nullptr;
1027  if (IP != MF.end())
1028  PredNextBB = &*IP;
1029  if (!TBB) {
1030  if (IBB != PredNextBB) // fallthrough
1031  continue;
1032  } else if (FBB) {
1033  if (TBB != IBB && FBB != IBB) // cbr then ubr
1034  continue;
1035  } else if (Cond.empty()) {
1036  if (TBB != IBB) // ubr
1037  continue;
1038  } else {
1039  if (TBB != IBB && IBB != PredNextBB) // cbr
1040  continue;
1041  }
1042  }
1043 
1044  // Remove the unconditional branch at the end, if any.
1045  if (TBB && (Cond.empty() || FBB)) {
1046  DebugLoc dl; // FIXME: this is nowhere
1047  TII->removeBranch(*PBB);
1048  if (!Cond.empty())
1049  // reinsert conditional branch only, for now
1050  TII->insertBranch(*PBB, (TBB == IBB) ? FBB : TBB, nullptr,
1051  NewCond, dl);
1052  }
1053 
1054  MergePotentials.push_back(MergePotentialsElt(HashEndOfMBB(*PBB), PBB));
1055  }
1056  }
1057 
1058  // If this is a large problem, avoid visiting the same basic blocks multiple
1059  // times.
1060  if (MergePotentials.size() == TailMergeThreshold)
1061  for (unsigned i = 0, e = MergePotentials.size(); i != e; ++i)
1062  TriedMerging.insert(MergePotentials[i].getBlock());
1063 
1064  if (MergePotentials.size() >= 2)
1065  MadeChange |= TryTailMergeBlocks(IBB, PredBB, MinCommonTailLength);
1066 
1067  // Reinsert an unconditional branch if needed. The 1 below can occur as a
1068  // result of removing blocks in TryTailMergeBlocks.
1069  PredBB = &*std::prev(I); // this may have been changed in TryTailMergeBlocks
1070  if (MergePotentials.size() == 1 &&
1071  MergePotentials.begin()->getBlock() != PredBB)
1072  FixTail(MergePotentials.begin()->getBlock(), IBB, TII);
1073  }
1074 
1075  return MadeChange;
1076 }
1077 
1078 void BranchFolder::setCommonTailEdgeWeights(MachineBasicBlock &TailMBB) {
1079  SmallVector<BlockFrequency, 2> EdgeFreqLs(TailMBB.succ_size());
1080  BlockFrequency AccumulatedMBBFreq;
1081 
1082  // Aggregate edge frequency of successor edge j:
1083  // edgeFreq(j) = sum (freq(bb) * edgeProb(bb, j)),
1084  // where bb is a basic block that is in SameTails.
1085  for (const auto &Src : SameTails) {
1086  const MachineBasicBlock *SrcMBB = Src.getBlock();
1087  BlockFrequency BlockFreq = MBBFreqInfo.getBlockFreq(SrcMBB);
1088  AccumulatedMBBFreq += BlockFreq;
1089 
1090  // It is not necessary to recompute edge weights if TailBB has less than two
1091  // successors.
1092  if (TailMBB.succ_size() <= 1)
1093  continue;
1094 
1095  auto EdgeFreq = EdgeFreqLs.begin();
1096 
1097  for (auto SuccI = TailMBB.succ_begin(), SuccE = TailMBB.succ_end();
1098  SuccI != SuccE; ++SuccI, ++EdgeFreq)
1099  *EdgeFreq += BlockFreq * MBPI.getEdgeProbability(SrcMBB, *SuccI);
1100  }
1101 
1102  MBBFreqInfo.setBlockFreq(&TailMBB, AccumulatedMBBFreq);
1103 
1104  if (TailMBB.succ_size() <= 1)
1105  return;
1106 
1107  auto SumEdgeFreq =
1108  std::accumulate(EdgeFreqLs.begin(), EdgeFreqLs.end(), BlockFrequency(0))
1109  .getFrequency();
1110  auto EdgeFreq = EdgeFreqLs.begin();
1111 
1112  if (SumEdgeFreq > 0) {
1113  for (auto SuccI = TailMBB.succ_begin(), SuccE = TailMBB.succ_end();
1114  SuccI != SuccE; ++SuccI, ++EdgeFreq) {
1116  EdgeFreq->getFrequency(), SumEdgeFreq);
1117  TailMBB.setSuccProbability(SuccI, Prob);
1118  }
1119  }
1120 }
1121 
1122 //===----------------------------------------------------------------------===//
1123 // Branch Optimization
1124 //===----------------------------------------------------------------------===//
1125 
1126 bool BranchFolder::OptimizeBranches(MachineFunction &MF) {
1127  bool MadeChange = false;
1128 
1129  // Make sure blocks are numbered in order
1130  MF.RenumberBlocks();
1131  // Renumbering blocks alters funclet membership, recalculate it.
1132  FuncletMembership = getFuncletMembership(MF);
1133 
1134  for (MachineFunction::iterator I = std::next(MF.begin()), E = MF.end();
1135  I != E; ) {
1136  MachineBasicBlock *MBB = &*I++;
1137  MadeChange |= OptimizeBlock(MBB);
1138 
1139  // If it is dead, remove it.
1140  if (MBB->pred_empty()) {
1141  RemoveDeadBlock(MBB);
1142  MadeChange = true;
1143  ++NumDeadBlocks;
1144  }
1145  }
1146 
1147  return MadeChange;
1148 }
1149 
1150 // Blocks should be considered empty if they contain only debug info;
1151 // else the debug info would affect codegen.
1152 static bool IsEmptyBlock(MachineBasicBlock *MBB) {
1153  return MBB->getFirstNonDebugInstr() == MBB->end();
1154 }
1155 
1156 // Blocks with only debug info and branches should be considered the same
1157 // as blocks with only branches.
1160  assert(I != MBB->end() && "empty block!");
1161  return I->isBranch();
1162 }
1163 
1164 /// IsBetterFallthrough - Return true if it would be clearly better to
1165 /// fall-through to MBB1 than to fall through into MBB2. This has to return
1166 /// a strict ordering, returning true for both (MBB1,MBB2) and (MBB2,MBB1) will
1167 /// result in infinite loops.
1169  MachineBasicBlock *MBB2) {
1170  // Right now, we use a simple heuristic. If MBB2 ends with a call, and
1171  // MBB1 doesn't, we prefer to fall through into MBB1. This allows us to
1172  // optimize branches that branch to either a return block or an assert block
1173  // into a fallthrough to the return.
1176  if (MBB1I == MBB1->end() || MBB2I == MBB2->end())
1177  return false;
1178 
1179  // If there is a clear successor ordering we make sure that one block
1180  // will fall through to the next
1181  if (MBB1->isSuccessor(MBB2)) return true;
1182  if (MBB2->isSuccessor(MBB1)) return false;
1183 
1184  return MBB2I->isCall() && !MBB1I->isCall();
1185 }
1186 
1187 /// getBranchDebugLoc - Find and return, if any, the DebugLoc of the branch
1188 /// instructions on the block.
1191  if (I != MBB.end() && I->isBranch())
1192  return I->getDebugLoc();
1193  return DebugLoc();
1194 }
1195 
1196 /// OptimizeBlock - Analyze and optimize control flow related to the specified
1197 /// block. This is never called on the entry block.
1198 bool BranchFolder::OptimizeBlock(MachineBasicBlock *MBB) {
1199  bool MadeChange = false;
1200  MachineFunction &MF = *MBB->getParent();
1201 ReoptimizeBlock:
1202 
1203  MachineFunction::iterator FallThrough = MBB->getIterator();
1204  ++FallThrough;
1205 
1206  // Make sure MBB and FallThrough belong to the same funclet.
1207  bool SameFunclet = true;
1208  if (!FuncletMembership.empty() && FallThrough != MF.end()) {
1209  auto MBBFunclet = FuncletMembership.find(MBB);
1210  assert(MBBFunclet != FuncletMembership.end());
1211  auto FallThroughFunclet = FuncletMembership.find(&*FallThrough);
1212  assert(FallThroughFunclet != FuncletMembership.end());
1213  SameFunclet = MBBFunclet->second == FallThroughFunclet->second;
1214  }
1215 
1216  // If this block is empty, make everyone use its fall-through, not the block
1217  // explicitly. Landing pads should not do this since the landing-pad table
1218  // points to this block. Blocks with their addresses taken shouldn't be
1219  // optimized away.
1220  if (IsEmptyBlock(MBB) && !MBB->isEHPad() && !MBB->hasAddressTaken() &&
1221  SameFunclet) {
1222  // Dead block? Leave for cleanup later.
1223  if (MBB->pred_empty()) return MadeChange;
1224 
1225  if (FallThrough == MF.end()) {
1226  // TODO: Simplify preds to not branch here if possible!
1227  } else if (FallThrough->isEHPad()) {
1228  // Don't rewrite to a landing pad fallthough. That could lead to the case
1229  // where a BB jumps to more than one landing pad.
1230  // TODO: Is it ever worth rewriting predecessors which don't already
1231  // jump to a landing pad, and so can safely jump to the fallthrough?
1232  } else if (MBB->isSuccessor(&*FallThrough)) {
1233  // Rewrite all predecessors of the old block to go to the fallthrough
1234  // instead.
1235  while (!MBB->pred_empty()) {
1236  MachineBasicBlock *Pred = *(MBB->pred_end()-1);
1237  Pred->ReplaceUsesOfBlockWith(MBB, &*FallThrough);
1238  }
1239  // If MBB was the target of a jump table, update jump tables to go to the
1240  // fallthrough instead.
1241  if (MachineJumpTableInfo *MJTI = MF.getJumpTableInfo())
1242  MJTI->ReplaceMBBInJumpTables(MBB, &*FallThrough);
1243  MadeChange = true;
1244  }
1245  return MadeChange;
1246  }
1247 
1248  // Check to see if we can simplify the terminator of the block before this
1249  // one.
1250  MachineBasicBlock &PrevBB = *std::prev(MachineFunction::iterator(MBB));
1251 
1252  MachineBasicBlock *PriorTBB = nullptr, *PriorFBB = nullptr;
1254  bool PriorUnAnalyzable =
1255  TII->analyzeBranch(PrevBB, PriorTBB, PriorFBB, PriorCond, true);
1256  if (!PriorUnAnalyzable) {
1257  // If the CFG for the prior block has extra edges, remove them.
1258  MadeChange |= PrevBB.CorrectExtraCFGEdges(PriorTBB, PriorFBB,
1259  !PriorCond.empty());
1260 
1261  // If the previous branch is conditional and both conditions go to the same
1262  // destination, remove the branch, replacing it with an unconditional one or
1263  // a fall-through.
1264  if (PriorTBB && PriorTBB == PriorFBB) {
1265  DebugLoc dl = getBranchDebugLoc(PrevBB);
1266  TII->removeBranch(PrevBB);
1267  PriorCond.clear();
1268  if (PriorTBB != MBB)
1269  TII->insertBranch(PrevBB, PriorTBB, nullptr, PriorCond, dl);
1270  MadeChange = true;
1271  ++NumBranchOpts;
1272  goto ReoptimizeBlock;
1273  }
1274 
1275  // If the previous block unconditionally falls through to this block and
1276  // this block has no other predecessors, move the contents of this block
1277  // into the prior block. This doesn't usually happen when SimplifyCFG
1278  // has been used, but it can happen if tail merging splits a fall-through
1279  // predecessor of a block.
1280  // This has to check PrevBB->succ_size() because EH edges are ignored by
1281  // AnalyzeBranch.
1282  if (PriorCond.empty() && !PriorTBB && MBB->pred_size() == 1 &&
1283  PrevBB.succ_size() == 1 &&
1284  !MBB->hasAddressTaken() && !MBB->isEHPad()) {
1285  DEBUG(dbgs() << "\nMerging into block: " << PrevBB
1286  << "From MBB: " << *MBB);
1287  // Remove redundant DBG_VALUEs first.
1288  if (PrevBB.begin() != PrevBB.end()) {
1289  MachineBasicBlock::iterator PrevBBIter = PrevBB.end();
1290  --PrevBBIter;
1291  MachineBasicBlock::iterator MBBIter = MBB->begin();
1292  // Check if DBG_VALUE at the end of PrevBB is identical to the
1293  // DBG_VALUE at the beginning of MBB.
1294  while (PrevBBIter != PrevBB.begin() && MBBIter != MBB->end()
1295  && PrevBBIter->isDebugValue() && MBBIter->isDebugValue()) {
1296  if (!MBBIter->isIdenticalTo(*PrevBBIter))
1297  break;
1298  MachineInstr &DuplicateDbg = *MBBIter;
1299  ++MBBIter; -- PrevBBIter;
1300  DuplicateDbg.eraseFromParent();
1301  }
1302  }
1303  PrevBB.splice(PrevBB.end(), MBB, MBB->begin(), MBB->end());
1304  PrevBB.removeSuccessor(PrevBB.succ_begin());
1305  assert(PrevBB.succ_empty());
1306  PrevBB.transferSuccessors(MBB);
1307  MadeChange = true;
1308  return MadeChange;
1309  }
1310 
1311  // If the previous branch *only* branches to *this* block (conditional or
1312  // not) remove the branch.
1313  if (PriorTBB == MBB && !PriorFBB) {
1314  TII->removeBranch(PrevBB);
1315  MadeChange = true;
1316  ++NumBranchOpts;
1317  goto ReoptimizeBlock;
1318  }
1319 
1320  // If the prior block branches somewhere else on the condition and here if
1321  // the condition is false, remove the uncond second branch.
1322  if (PriorFBB == MBB) {
1323  DebugLoc dl = getBranchDebugLoc(PrevBB);
1324  TII->removeBranch(PrevBB);
1325  TII->insertBranch(PrevBB, PriorTBB, nullptr, PriorCond, dl);
1326  MadeChange = true;
1327  ++NumBranchOpts;
1328  goto ReoptimizeBlock;
1329  }
1330 
1331  // If the prior block branches here on true and somewhere else on false, and
1332  // if the branch condition is reversible, reverse the branch to create a
1333  // fall-through.
1334  if (PriorTBB == MBB) {
1335  SmallVector<MachineOperand, 4> NewPriorCond(PriorCond);
1336  if (!TII->reverseBranchCondition(NewPriorCond)) {
1337  DebugLoc dl = getBranchDebugLoc(PrevBB);
1338  TII->removeBranch(PrevBB);
1339  TII->insertBranch(PrevBB, PriorFBB, nullptr, NewPriorCond, dl);
1340  MadeChange = true;
1341  ++NumBranchOpts;
1342  goto ReoptimizeBlock;
1343  }
1344  }
1345 
1346  // If this block has no successors (e.g. it is a return block or ends with
1347  // a call to a no-return function like abort or __cxa_throw) and if the pred
1348  // falls through into this block, and if it would otherwise fall through
1349  // into the block after this, move this block to the end of the function.
1350  //
1351  // We consider it more likely that execution will stay in the function (e.g.
1352  // due to loops) than it is to exit it. This asserts in loops etc, moving
1353  // the assert condition out of the loop body.
1354  if (MBB->succ_empty() && !PriorCond.empty() && !PriorFBB &&
1355  MachineFunction::iterator(PriorTBB) == FallThrough &&
1356  !MBB->canFallThrough()) {
1357  bool DoTransform = true;
1358 
1359  // We have to be careful that the succs of PredBB aren't both no-successor
1360  // blocks. If neither have successors and if PredBB is the second from
1361  // last block in the function, we'd just keep swapping the two blocks for
1362  // last. Only do the swap if one is clearly better to fall through than
1363  // the other.
1364  if (FallThrough == --MF.end() &&
1365  !IsBetterFallthrough(PriorTBB, MBB))
1366  DoTransform = false;
1367 
1368  if (DoTransform) {
1369  // Reverse the branch so we will fall through on the previous true cond.
1370  SmallVector<MachineOperand, 4> NewPriorCond(PriorCond);
1371  if (!TII->reverseBranchCondition(NewPriorCond)) {
1372  DEBUG(dbgs() << "\nMoving MBB: " << *MBB
1373  << "To make fallthrough to: " << *PriorTBB << "\n");
1374 
1375  DebugLoc dl = getBranchDebugLoc(PrevBB);
1376  TII->removeBranch(PrevBB);
1377  TII->insertBranch(PrevBB, MBB, nullptr, NewPriorCond, dl);
1378 
1379  // Move this block to the end of the function.
1380  MBB->moveAfter(&MF.back());
1381  MadeChange = true;
1382  ++NumBranchOpts;
1383  return MadeChange;
1384  }
1385  }
1386  }
1387  }
1388 
1389  // Analyze the branch in the current block.
1390  MachineBasicBlock *CurTBB = nullptr, *CurFBB = nullptr;
1392  bool CurUnAnalyzable =
1393  TII->analyzeBranch(*MBB, CurTBB, CurFBB, CurCond, true);
1394  if (!CurUnAnalyzable) {
1395  // If the CFG for the prior block has extra edges, remove them.
1396  MadeChange |= MBB->CorrectExtraCFGEdges(CurTBB, CurFBB, !CurCond.empty());
1397 
1398  // If this is a two-way branch, and the FBB branches to this block, reverse
1399  // the condition so the single-basic-block loop is faster. Instead of:
1400  // Loop: xxx; jcc Out; jmp Loop
1401  // we want:
1402  // Loop: xxx; jncc Loop; jmp Out
1403  if (CurTBB && CurFBB && CurFBB == MBB && CurTBB != MBB) {
1404  SmallVector<MachineOperand, 4> NewCond(CurCond);
1405  if (!TII->reverseBranchCondition(NewCond)) {
1406  DebugLoc dl = getBranchDebugLoc(*MBB);
1407  TII->removeBranch(*MBB);
1408  TII->insertBranch(*MBB, CurFBB, CurTBB, NewCond, dl);
1409  MadeChange = true;
1410  ++NumBranchOpts;
1411  goto ReoptimizeBlock;
1412  }
1413  }
1414 
1415  // If this branch is the only thing in its block, see if we can forward
1416  // other blocks across it.
1417  if (CurTBB && CurCond.empty() && !CurFBB &&
1418  IsBranchOnlyBlock(MBB) && CurTBB != MBB &&
1419  !MBB->hasAddressTaken() && !MBB->isEHPad()) {
1420  DebugLoc dl = getBranchDebugLoc(*MBB);
1421  // This block may contain just an unconditional branch. Because there can
1422  // be 'non-branch terminators' in the block, try removing the branch and
1423  // then seeing if the block is empty.
1424  TII->removeBranch(*MBB);
1425  // If the only things remaining in the block are debug info, remove these
1426  // as well, so this will behave the same as an empty block in non-debug
1427  // mode.
1428  if (IsEmptyBlock(MBB)) {
1429  // Make the block empty, losing the debug info (we could probably
1430  // improve this in some cases.)
1431  MBB->erase(MBB->begin(), MBB->end());
1432  }
1433  // If this block is just an unconditional branch to CurTBB, we can
1434  // usually completely eliminate the block. The only case we cannot
1435  // completely eliminate the block is when the block before this one
1436  // falls through into MBB and we can't understand the prior block's branch
1437  // condition.
1438  if (MBB->empty()) {
1439  bool PredHasNoFallThrough = !PrevBB.canFallThrough();
1440  if (PredHasNoFallThrough || !PriorUnAnalyzable ||
1441  !PrevBB.isSuccessor(MBB)) {
1442  // If the prior block falls through into us, turn it into an
1443  // explicit branch to us to make updates simpler.
1444  if (!PredHasNoFallThrough && PrevBB.isSuccessor(MBB) &&
1445  PriorTBB != MBB && PriorFBB != MBB) {
1446  if (!PriorTBB) {
1447  assert(PriorCond.empty() && !PriorFBB &&
1448  "Bad branch analysis");
1449  PriorTBB = MBB;
1450  } else {
1451  assert(!PriorFBB && "Machine CFG out of date!");
1452  PriorFBB = MBB;
1453  }
1454  DebugLoc pdl = getBranchDebugLoc(PrevBB);
1455  TII->removeBranch(PrevBB);
1456  TII->insertBranch(PrevBB, PriorTBB, PriorFBB, PriorCond, pdl);
1457  }
1458 
1459  // Iterate through all the predecessors, revectoring each in-turn.
1460  size_t PI = 0;
1461  bool DidChange = false;
1462  bool HasBranchToSelf = false;
1463  while(PI != MBB->pred_size()) {
1464  MachineBasicBlock *PMBB = *(MBB->pred_begin() + PI);
1465  if (PMBB == MBB) {
1466  // If this block has an uncond branch to itself, leave it.
1467  ++PI;
1468  HasBranchToSelf = true;
1469  } else {
1470  DidChange = true;
1471  PMBB->ReplaceUsesOfBlockWith(MBB, CurTBB);
1472  // If this change resulted in PMBB ending in a conditional
1473  // branch where both conditions go to the same destination,
1474  // change this to an unconditional branch (and fix the CFG).
1475  MachineBasicBlock *NewCurTBB = nullptr, *NewCurFBB = nullptr;
1476  SmallVector<MachineOperand, 4> NewCurCond;
1477  bool NewCurUnAnalyzable = TII->analyzeBranch(
1478  *PMBB, NewCurTBB, NewCurFBB, NewCurCond, true);
1479  if (!NewCurUnAnalyzable && NewCurTBB && NewCurTBB == NewCurFBB) {
1480  DebugLoc pdl = getBranchDebugLoc(*PMBB);
1481  TII->removeBranch(*PMBB);
1482  NewCurCond.clear();
1483  TII->insertBranch(*PMBB, NewCurTBB, nullptr, NewCurCond, pdl);
1484  MadeChange = true;
1485  ++NumBranchOpts;
1486  PMBB->CorrectExtraCFGEdges(NewCurTBB, nullptr, false);
1487  }
1488  }
1489  }
1490 
1491  // Change any jumptables to go to the new MBB.
1492  if (MachineJumpTableInfo *MJTI = MF.getJumpTableInfo())
1493  MJTI->ReplaceMBBInJumpTables(MBB, CurTBB);
1494  if (DidChange) {
1495  ++NumBranchOpts;
1496  MadeChange = true;
1497  if (!HasBranchToSelf) return MadeChange;
1498  }
1499  }
1500  }
1501 
1502  // Add the branch back if the block is more than just an uncond branch.
1503  TII->insertBranch(*MBB, CurTBB, nullptr, CurCond, dl);
1504  }
1505  }
1506 
1507  // If the prior block doesn't fall through into this block, and if this
1508  // block doesn't fall through into some other block, see if we can find a
1509  // place to move this block where a fall-through will happen.
1510  if (!PrevBB.canFallThrough()) {
1511 
1512  // Now we know that there was no fall-through into this block, check to
1513  // see if it has a fall-through into its successor.
1514  bool CurFallsThru = MBB->canFallThrough();
1515 
1516  if (!MBB->isEHPad()) {
1517  // Check all the predecessors of this block. If one of them has no fall
1518  // throughs, move this block right after it.
1519  for (MachineBasicBlock *PredBB : MBB->predecessors()) {
1520  // Analyze the branch at the end of the pred.
1521  MachineBasicBlock *PredTBB = nullptr, *PredFBB = nullptr;
1523  if (PredBB != MBB && !PredBB->canFallThrough() &&
1524  !TII->analyzeBranch(*PredBB, PredTBB, PredFBB, PredCond, true) &&
1525  (!CurFallsThru || !CurTBB || !CurFBB) &&
1526  (!CurFallsThru || MBB->getNumber() >= PredBB->getNumber())) {
1527  // If the current block doesn't fall through, just move it.
1528  // If the current block can fall through and does not end with a
1529  // conditional branch, we need to append an unconditional jump to
1530  // the (current) next block. To avoid a possible compile-time
1531  // infinite loop, move blocks only backward in this case.
1532  // Also, if there are already 2 branches here, we cannot add a third;
1533  // this means we have the case
1534  // Bcc next
1535  // B elsewhere
1536  // next:
1537  if (CurFallsThru) {
1538  MachineBasicBlock *NextBB = &*std::next(MBB->getIterator());
1539  CurCond.clear();
1540  TII->insertBranch(*MBB, NextBB, nullptr, CurCond, DebugLoc());
1541  }
1542  MBB->moveAfter(PredBB);
1543  MadeChange = true;
1544  goto ReoptimizeBlock;
1545  }
1546  }
1547  }
1548 
1549  if (!CurFallsThru) {
1550  // Check all successors to see if we can move this block before it.
1551  for (MachineBasicBlock *SuccBB : MBB->successors()) {
1552  // Analyze the branch at the end of the block before the succ.
1553  MachineFunction::iterator SuccPrev = --SuccBB->getIterator();
1554 
1555  // If this block doesn't already fall-through to that successor, and if
1556  // the succ doesn't already have a block that can fall through into it,
1557  // and if the successor isn't an EH destination, we can arrange for the
1558  // fallthrough to happen.
1559  if (SuccBB != MBB && &*SuccPrev != MBB &&
1560  !SuccPrev->canFallThrough() && !CurUnAnalyzable &&
1561  !SuccBB->isEHPad()) {
1562  MBB->moveBefore(SuccBB);
1563  MadeChange = true;
1564  goto ReoptimizeBlock;
1565  }
1566  }
1567 
1568  // Okay, there is no really great place to put this block. If, however,
1569  // the block before this one would be a fall-through if this block were
1570  // removed, move this block to the end of the function. There is no real
1571  // advantage in "falling through" to an EH block, so we don't want to
1572  // perform this transformation for that case.
1573  //
1574  // Also, Windows EH introduced the possibility of an arbitrary number of
1575  // successors to a given block. The analyzeBranch call does not consider
1576  // exception handling and so we can get in a state where a block
1577  // containing a call is followed by multiple EH blocks that would be
1578  // rotated infinitely at the end of the function if the transformation
1579  // below were performed for EH "FallThrough" blocks. Therefore, even if
1580  // that appears not to be happening anymore, we should assume that it is
1581  // possible and not remove the "!FallThrough()->isEHPad" condition below.
1582  MachineBasicBlock *PrevTBB = nullptr, *PrevFBB = nullptr;
1584  if (FallThrough != MF.end() &&
1585  !FallThrough->isEHPad() &&
1586  !TII->analyzeBranch(PrevBB, PrevTBB, PrevFBB, PrevCond, true) &&
1587  PrevBB.isSuccessor(&*FallThrough)) {
1588  MBB->moveAfter(&MF.back());
1589  MadeChange = true;
1590  return MadeChange;
1591  }
1592  }
1593  }
1594 
1595  return MadeChange;
1596 }
1597 
1598 //===----------------------------------------------------------------------===//
1599 // Hoist Common Code
1600 //===----------------------------------------------------------------------===//
1601 
1602 /// HoistCommonCode - Hoist common instruction sequences at the start of basic
1603 /// blocks to their common predecessor.
1604 bool BranchFolder::HoistCommonCode(MachineFunction &MF) {
1605  bool MadeChange = false;
1606  for (MachineFunction::iterator I = MF.begin(), E = MF.end(); I != E; ) {
1607  MachineBasicBlock *MBB = &*I++;
1608  MadeChange |= HoistCommonCodeInSuccs(MBB);
1609  }
1610 
1611  return MadeChange;
1612 }
1613 
1614 /// findFalseBlock - BB has a fallthrough. Find its 'false' successor given
1615 /// its 'true' successor.
1617  MachineBasicBlock *TrueBB) {
1618  for (MachineBasicBlock *SuccBB : BB->successors())
1619  if (SuccBB != TrueBB)
1620  return SuccBB;
1621  return nullptr;
1622 }
1623 
1624 template <class Container>
1625 static void addRegAndItsAliases(unsigned Reg, const TargetRegisterInfo *TRI,
1626  Container &Set) {
1628  for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI)
1629  Set.insert(*AI);
1630  } else {
1631  Set.insert(Reg);
1632  }
1633 }
1634 
1635 /// findHoistingInsertPosAndDeps - Find the location to move common instructions
1636 /// in successors to. The location is usually just before the terminator,
1637 /// however if the terminator is a conditional branch and its previous
1638 /// instruction is the flag setting instruction, the previous instruction is
1639 /// the preferred location. This function also gathers uses and defs of the
1640 /// instructions from the insertion point to the end of the block. The data is
1641 /// used by HoistCommonCodeInSuccs to ensure safety.
1642 static
1644  const TargetInstrInfo *TII,
1645  const TargetRegisterInfo *TRI,
1646  SmallSet<unsigned,4> &Uses,
1647  SmallSet<unsigned,4> &Defs) {
1649  if (!TII->isUnpredicatedTerminator(*Loc))
1650  return MBB->end();
1651 
1652  for (const MachineOperand &MO : Loc->operands()) {
1653  if (!MO.isReg())
1654  continue;
1655  unsigned Reg = MO.getReg();
1656  if (!Reg)
1657  continue;
1658  if (MO.isUse()) {
1659  addRegAndItsAliases(Reg, TRI, Uses);
1660  } else {
1661  if (!MO.isDead())
1662  // Don't try to hoist code in the rare case the terminator defines a
1663  // register that is later used.
1664  return MBB->end();
1665 
1666  // If the terminator defines a register, make sure we don't hoist
1667  // the instruction whose def might be clobbered by the terminator.
1668  addRegAndItsAliases(Reg, TRI, Defs);
1669  }
1670  }
1671 
1672  if (Uses.empty())
1673  return Loc;
1674  if (Loc == MBB->begin())
1675  return MBB->end();
1676 
1677  // The terminator is probably a conditional branch, try not to separate the
1678  // branch from condition setting instruction.
1680  skipDebugInstructionsBackward(std::prev(Loc), MBB->begin());
1681 
1682  bool IsDef = false;
1683  for (const MachineOperand &MO : PI->operands()) {
1684  // If PI has a regmask operand, it is probably a call. Separate away.
1685  if (MO.isRegMask())
1686  return Loc;
1687  if (!MO.isReg() || MO.isUse())
1688  continue;
1689  unsigned Reg = MO.getReg();
1690  if (!Reg)
1691  continue;
1692  if (Uses.count(Reg)) {
1693  IsDef = true;
1694  break;
1695  }
1696  }
1697  if (!IsDef)
1698  // The condition setting instruction is not just before the conditional
1699  // branch.
1700  return Loc;
1701 
1702  // Be conservative, don't insert instruction above something that may have
1703  // side-effects. And since it's potentially bad to separate flag setting
1704  // instruction from the conditional branch, just abort the optimization
1705  // completely.
1706  // Also avoid moving code above predicated instruction since it's hard to
1707  // reason about register liveness with predicated instruction.
1708  bool DontMoveAcrossStore = true;
1709  if (!PI->isSafeToMove(nullptr, DontMoveAcrossStore) || TII->isPredicated(*PI))
1710  return MBB->end();
1711 
1712 
1713  // Find out what registers are live. Note this routine is ignoring other live
1714  // registers which are only used by instructions in successor blocks.
1715  for (const MachineOperand &MO : PI->operands()) {
1716  if (!MO.isReg())
1717  continue;
1718  unsigned Reg = MO.getReg();
1719  if (!Reg)
1720  continue;
1721  if (MO.isUse()) {
1722  addRegAndItsAliases(Reg, TRI, Uses);
1723  } else {
1724  if (Uses.erase(Reg)) {
1726  for (MCSubRegIterator SubRegs(Reg, TRI); SubRegs.isValid(); ++SubRegs)
1727  Uses.erase(*SubRegs); // Use sub-registers to be conservative
1728  }
1729  }
1730  addRegAndItsAliases(Reg, TRI, Defs);
1731  }
1732  }
1733 
1734  return PI;
1735 }
1736 
1737 /// HoistCommonCodeInSuccs - If the successors of MBB has common instruction
1738 /// sequence at the start of the function, move the instructions before MBB
1739 /// terminator if it's legal.
1740 bool BranchFolder::HoistCommonCodeInSuccs(MachineBasicBlock *MBB) {
1741  MachineBasicBlock *TBB = nullptr, *FBB = nullptr;
1743  if (TII->analyzeBranch(*MBB, TBB, FBB, Cond, true) || !TBB || Cond.empty())
1744  return false;
1745 
1746  if (!FBB) FBB = findFalseBlock(MBB, TBB);
1747  if (!FBB)
1748  // Malformed bcc? True and false blocks are the same?
1749  return false;
1750 
1751  // Restrict the optimization to cases where MBB is the only predecessor,
1752  // it is an obvious win.
1753  if (TBB->pred_size() > 1 || FBB->pred_size() > 1)
1754  return false;
1755 
1756  // Find a suitable position to hoist the common instructions to. Also figure
1757  // out which registers are used or defined by instructions from the insertion
1758  // point to the end of the block.
1759  SmallSet<unsigned, 4> Uses, Defs;
1761  findHoistingInsertPosAndDeps(MBB, TII, TRI, Uses, Defs);
1762  if (Loc == MBB->end())
1763  return false;
1764 
1765  bool HasDups = false;
1766  SmallVector<unsigned, 4> LocalDefs;
1767  SmallSet<unsigned, 4> LocalDefsSet;
1768  MachineBasicBlock::iterator TIB = TBB->begin();
1769  MachineBasicBlock::iterator FIB = FBB->begin();
1770  MachineBasicBlock::iterator TIE = TBB->end();
1771  MachineBasicBlock::iterator FIE = FBB->end();
1772  while (TIB != TIE && FIB != FIE) {
1773  // Skip dbg_value instructions. These do not count.
1774  TIB = skipDebugInstructionsForward(TIB, TIE);
1775  FIB = skipDebugInstructionsForward(FIB, FIE);
1776  if (TIB == TIE || FIB == FIE)
1777  break;
1778 
1779  if (!TIB->isIdenticalTo(*FIB, MachineInstr::CheckKillDead))
1780  break;
1781 
1782  if (TII->isPredicated(*TIB))
1783  // Hard to reason about register liveness with predicated instruction.
1784  break;
1785 
1786  bool IsSafe = true;
1787  for (MachineOperand &MO : TIB->operands()) {
1788  // Don't attempt to hoist instructions with register masks.
1789  if (MO.isRegMask()) {
1790  IsSafe = false;
1791  break;
1792  }
1793  if (!MO.isReg())
1794  continue;
1795  unsigned Reg = MO.getReg();
1796  if (!Reg)
1797  continue;
1798  if (MO.isDef()) {
1799  if (Uses.count(Reg)) {
1800  // Avoid clobbering a register that's used by the instruction at
1801  // the point of insertion.
1802  IsSafe = false;
1803  break;
1804  }
1805 
1806  if (Defs.count(Reg) && !MO.isDead()) {
1807  // Don't hoist the instruction if the def would be clobber by the
1808  // instruction at the point insertion. FIXME: This is overly
1809  // conservative. It should be possible to hoist the instructions
1810  // in BB2 in the following example:
1811  // BB1:
1812  // r1, eflag = op1 r2, r3
1813  // brcc eflag
1814  //
1815  // BB2:
1816  // r1 = op2, ...
1817  // = op3, r1<kill>
1818  IsSafe = false;
1819  break;
1820  }
1821  } else if (!LocalDefsSet.count(Reg)) {
1822  if (Defs.count(Reg)) {
1823  // Use is defined by the instruction at the point of insertion.
1824  IsSafe = false;
1825  break;
1826  }
1827 
1828  if (MO.isKill() && Uses.count(Reg))
1829  // Kills a register that's read by the instruction at the point of
1830  // insertion. Remove the kill marker.
1831  MO.setIsKill(false);
1832  }
1833  }
1834  if (!IsSafe)
1835  break;
1836 
1837  bool DontMoveAcrossStore = true;
1838  if (!TIB->isSafeToMove(nullptr, DontMoveAcrossStore))
1839  break;
1840 
1841  // Remove kills from LocalDefsSet, these registers had short live ranges.
1842  for (const MachineOperand &MO : TIB->operands()) {
1843  if (!MO.isReg() || !MO.isUse() || !MO.isKill())
1844  continue;
1845  unsigned Reg = MO.getReg();
1846  if (!Reg || !LocalDefsSet.count(Reg))
1847  continue;
1849  for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI)
1850  LocalDefsSet.erase(*AI);
1851  } else {
1852  LocalDefsSet.erase(Reg);
1853  }
1854  }
1855 
1856  // Track local defs so we can update liveins.
1857  for (const MachineOperand &MO : TIB->operands()) {
1858  if (!MO.isReg() || !MO.isDef() || MO.isDead())
1859  continue;
1860  unsigned Reg = MO.getReg();
1861  if (!Reg || TargetRegisterInfo::isVirtualRegister(Reg))
1862  continue;
1863  LocalDefs.push_back(Reg);
1864  addRegAndItsAliases(Reg, TRI, LocalDefsSet);
1865  }
1866 
1867  HasDups = true;
1868  ++TIB;
1869  ++FIB;
1870  }
1871 
1872  if (!HasDups)
1873  return false;
1874 
1875  MBB->splice(Loc, TBB, TBB->begin(), TIB);
1876  FBB->erase(FBB->begin(), FIB);
1877 
1878  // Update livein's.
1879  bool AddedLiveIns = false;
1880  for (unsigned i = 0, e = LocalDefs.size(); i != e; ++i) {
1881  unsigned Def = LocalDefs[i];
1882  if (LocalDefsSet.count(Def)) {
1883  TBB->addLiveIn(Def);
1884  FBB->addLiveIn(Def);
1885  AddedLiveIns = true;
1886  }
1887  }
1888 
1889  if (AddedLiveIns) {
1890  TBB->sortUniqueLiveIns();
1891  FBB->sortUniqueLiveIns();
1892  }
1893 
1894  ++NumHoist;
1895  return true;
1896 }
unsigned succ_size() const
static unsigned EstimateRuntime(MachineBasicBlock::iterator I, MachineBasicBlock::iterator E)
EstimateRuntime - Make a rough estimate for how long it will take to run the specified code...
void push_back(const T &Elt)
Definition: SmallVector.h:211
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
bool isEHPad() const
Returns true if the block is a landing pad.
BitVector & set()
Definition: BitVector.h:219
instr_iterator erase(instr_iterator I)
Remove an instruction from the instruction list and delete it.
BlockFrequency getBlockFreq(const MachineBasicBlock *MBB) const
static cl::opt< unsigned > TailMergeThreshold("tail-merge-threshold", cl::desc("Max number of predecessors to consider tail merging"), cl::init(150), cl::Hidden)
STATISTIC(NumFunctions,"Total number of functions")
size_t i
bool isValid() const
isValid - returns true if this iterator is not yet at the end.
void RenumberBlocks(MachineBasicBlock *MBBFrom=nullptr)
RenumberBlocks - This discards all of the MachineBasicBlock numbers and recomputes them...
MachineBasicBlock * getMBB() const
int getNumber() const
MachineBasicBlocks are uniquely numbered at the function level, unless they're not in a MachineFuncti...
iterator getFirstNonDebugInstr()
Returns an iterator to the first non-debug instruction in the basic block, or end().
iterator getFirstTerminator()
Returns an iterator to the first terminator instruction of this basic block.
void setIsUndef(bool Val=true)
static bool isVirtualRegister(unsigned Reg)
Return true if the specified register number is in the virtual register namespace.
Address of indexed Jump Table for switch.
void transferSuccessors(MachineBasicBlock *FromMBB)
Transfers all the successors from MBB to this machine basic block (i.e., copies all the successors Fr...
static unsigned HashMachineInstr(const MachineInstr &MI)
HashMachineInstr - Compute a hash value for MI and its operands.
void RemoveJumpTable(unsigned Idx)
RemoveJumpTable - Mark the specific index as being dead.
MachineBlockFrequencyInfo pass uses BlockFrequencyInfoImpl implementation to estimate machine basic b...
void computeLiveIns(LivePhysRegs &LiveRegs, const TargetRegisterInfo &TRI, MachineBasicBlock &MBB)
Compute the live-in list for MBB assuming all of its successors live-in lists are up-to-date...
virtual bool analyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB, MachineBasicBlock *&FBB, SmallVectorImpl< MachineOperand > &Cond, bool AllowModify=false) const
Analyze the branching code at the end of MBB, returning true if it cannot be understood (e...
const_iterator begin(StringRef path)
Get begin iterator over path.
Definition: Path.cpp:233
MachineBasicBlock reference.
void moveAfter(MachineBasicBlock *NewBefore)
A debug info location.
Definition: DebugLoc.h:34
const Function * getFunction() const
getFunction - Return the LLVM function that this machine code represents
static DebugLoc getBranchDebugLoc(MachineBasicBlock &MBB)
getBranchDebugLoc - Find and return, if any, the DebugLoc of the branch instructions on the block...
bool erase(const T &V)
Definition: SmallSet.h:107
bool optForSize() const
Optimize this function for size (-Os) or minimum size (-Oz).
Definition: Function.h:464
BlockT * getHeader() const
Definition: LoopInfo.h:102
BlockFrequency getBlockFreq(const MachineBasicBlock *MBB) const
getblockFreq - Return block frequency.
iterator_range< succ_iterator > successors()
const std::vector< MachineJumpTableEntry > & getJumpTables() const
AnalysisUsage & addRequired()
void ReplaceUsesOfBlockWith(MachineBasicBlock *Old, MachineBasicBlock *New)
Given a machine basic block that branched to 'Old', change the code and CFG so that it branches to 'N...
LLVM_NODISCARD bool empty() const
Definition: SmallSet.h:55
bool OptimizeFunction(MachineFunction &MF, const TargetInstrInfo *tii, const TargetRegisterInfo *tri, MachineModuleInfo *mmi, MachineLoopInfo *mli=nullptr, bool AfterPlacement=false)
OptimizeFunction - Perhaps branch folding, tail merging and other CFG optimizations on the given func...
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
const HexagonInstrInfo * TII
static F t[256]
bool isReg() const
isReg - Tests if this is a MO_Register operand.
void eraseFromParent()
Unlink 'this' from the containing basic block and delete it.
Name of external global symbol.
Reg
All possible values of the reg field in the ModR/M byte.
static unsigned HashEndOfMBB(const MachineBasicBlock &MBB)
HashEndOfMBB - Hash the last instruction in the MBB.
bool isUndef() const
static bool IsEmptyBlock(MachineBasicBlock *MBB)
void removeBlock(MachineBasicBlock *BB)
This method completely removes BB from all data structures, including all of the Loop objects it is n...
INITIALIZE_PASS(BranchFolderPass,"branch-folder","Control Flow Optimizer", false, false) bool BranchFolderPass
Target-Independent Code Generator Pass Configuration Options.
LLVM_NODISCARD bool empty() const
Definition: SmallVector.h:60
unsigned getNumOperands() const
Access to explicit operands of the instruction.
Definition: MachineInstr.h:277
virtual unsigned insertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB, MachineBasicBlock *FBB, ArrayRef< MachineOperand > Cond, const DebugLoc &DL, int *BytesAdded=nullptr) const
Insert branch code into the end of the specified MachineBasicBlock.
#define F(x, y, z)
Definition: MD5.cpp:51
static cl::opt< cl::boolOrDefault > FlagEnableTailMerge("enable-tail-merge", cl::init(cl::BOU_UNSET), cl::Hidden)
static void FixTail(MachineBasicBlock *CurMBB, MachineBasicBlock *SuccBB, const TargetInstrInfo *TII)
MachineBasicBlock * MBB
bool canFallThrough()
Return true if the block can implicitly transfer control to the block after it by falling off the end...
const MachineJumpTableInfo * getJumpTableInfo() const
getJumpTableInfo - Return the jump table info object for the current function.
virtual bool isUnpredicatedTerminator(const MachineInstr &MI) const
Returns true if the instruction is a terminator instruction that has not been predicated.
iterator getLastNonDebugInstr()
Returns an iterator to the last non-debug instruction in the basic block, or end().
static GCRegistry::Add< OcamlGC > B("ocaml","ocaml 3.10-compatible GC")
static MachineBasicBlock::iterator findHoistingInsertPosAndDeps(MachineBasicBlock *MBB, const TargetInstrInfo *TII, const TargetRegisterInfo *TRI, SmallSet< unsigned, 4 > &Uses, SmallSet< unsigned, 4 > &Defs)
findHoistingInsertPosAndDeps - Find the location to move common instructions in successors to...
int64_t getImm() const
void setSuccProbability(succ_iterator I, BranchProbability Prob)
Set successor probability of a given iterator.
reverse_iterator rend()
MachineBasicBlock * CreateMachineBasicBlock(const BasicBlock *bb=nullptr)
CreateMachineBasicBlock - Allocate a new MachineBasicBlock.
reverse_iterator rbegin()
const BasicBlock * getBasicBlock() const
Return the LLVM basic block that this instance corresponded to originally.
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
Definition: MachineInstr.h:273
static GCRegistry::Add< CoreCLRGC > E("coreclr","CoreCLR-compatible GC")
void setBlockFreq(const MachineBasicBlock *MBB, BlockFrequency F)
TargetInstrInfo - Interface to description of machine instruction set.
static void mergeOperations(MachineBasicBlock::iterator MBBIStartPos, MachineBasicBlock &MBBCommon)
MachineLoop * getLoopFor(const MachineBasicBlock *BB) const
Return the innermost loop that BB lives in.
Address of a global value.
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:395
void array_pod_sort(IteratorTy Start, IteratorTy End)
array_pod_sort - This sorts an array with the specified start and end extent.
Definition: STLExtras.h:689
void addLiveIn(MCPhysReg PhysReg, LaneBitmask LaneMask=LaneBitmask::getAll())
Adds the specified register as a live in.
unsigned const MachineRegisterInfo * MRI
LLVM Basic Block Representation.
Definition: BasicBlock.h:51
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
virtual unsigned removeBranch(MachineBasicBlock &MBB, int *BytesRemoved=nullptr) const
Remove the branching code at the end of the specific MBB.
SmallSet - This maintains a set of unique values, optimizing for the case when the set is small (less...
Definition: SmallSet.h:36
const MachineOperand & getOperand(unsigned i) const
Definition: MachineInstr.h:279
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
Definition: SmallPtrSet.h:368
bool tracksLiveness() const
tracksLiveness - Returns true when tracking register liveness accurately.
virtual bool reverseBranchCondition(SmallVectorImpl< MachineOperand > &Cond) const
Reverses the branch condition of the specified condition list, returning false on success and true if...
MCRegAliasIterator enumerates all registers aliasing Reg.
Represent the analysis usage information of a pass.
void clearLiveIns()
Clear live in list.
bool getEnableTailMerge() const
int64_t getOffset() const
Return the offset from the symbol in this operand.
static bool IsBetterFallthrough(MachineBasicBlock *MBB1, MachineBasicBlock *MBB2)
IsBetterFallthrough - Return true if it would be clearly better to fall-through to MBB1 than to fall ...
self_iterator getIterator()
Definition: ilist_node.h:81
iterator_range< pred_iterator > predecessors()
void moveBefore(MachineBasicBlock *NewAfter)
Move 'this' block before or after the specified block.
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
MCSubRegIterator enumerates all sub-registers of Reg.
static unsigned CountTerminators(MachineBasicBlock *MBB, MachineBasicBlock::iterator &I)
CountTerminators - Count the number of terminators in the given block and set I to the position of th...
static cl::opt< unsigned > TailMergeSize("tail-merge-size", cl::desc("Min number of instructions to consider tail merging"), cl::init(3), cl::Hidden)
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
size_type count(const T &V) const
count - Return 1 if the element is in the set, 0 otherwise.
Definition: SmallSet.h:64
Iterator for intrusive lists based on ilist_node.
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements...
Definition: SmallPtrSet.h:425
void addSuccessor(MachineBasicBlock *Succ, BranchProbability Prob=BranchProbability::getUnknown())
Add Succ as a successor of this MachineBasicBlock.
MachineOperand class - Representation of each machine instruction operand.
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small...
Definition: SmallVector.h:843
DenseMap< const MachineBasicBlock *, int > getFuncletMembership(const MachineFunction &MF)
static void addRegAndItsAliases(unsigned Reg, const TargetRegisterInfo *TRI, Container &Set)
static BranchProbability getBranchProbability(uint64_t Numerator, uint64_t Denominator)
bool isSuccessor(const MachineBasicBlock *MBB) const
Return true if the specified MBB is a successor of this block.
virtual bool trackLivenessAfterRegAlloc(const MachineFunction &MF) const
Returns true if the live-ins should be tracked after register allocation.
void invalidateLiveness()
invalidateLiveness - Indicates that register liveness is no longer being tracked accurately.
void sortUniqueLiveIns()
Sorts and uniques the LiveIns vector.
IterT skipDebugInstructionsBackward(IterT It, IterT Begin)
Decrement It until it points to a non-debug instruction or to Begin and return the resulting iterator...
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:132
IterT skipDebugInstructionsForward(IterT It, IterT End)
Increment It until it points to a non-debug instruction or to End and return the resulting iterator...
MachineRegisterInfo - Keep track of information for virtual and physical registers, including vreg register classes, use/def chains for registers, etc.
virtual bool isLegalToSplitMBBAt(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI) const
Return true if it's legal to split the given basic block at the specified instruction (i...
Representation of each machine instruction.
Definition: MachineInstr.h:52
MachineOperandType getType() const
getType - Returns the MachineOperandType for this operand.
static bool isPhysicalRegister(unsigned Reg)
Return true if the specified register number is in the physical register namespace.
bool hasAddressTaken() const
Test whether this block is potentially the target of an indirect branch.
void splice(iterator Where, MachineBasicBlock *Other, iterator From)
Take an instruction from MBB 'Other' at the position From, and insert it into this MBB right before '...
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
raw_ostream & printBlockFreq(raw_ostream &OS, const MachineBasicBlock *MBB) const
BranchFolder(bool defaultEnableTailMerge, bool CommonHoist, MBFIWrapper &MBFI, const MachineBranchProbabilityInfo &MBPI, unsigned MinCommonTailLength=0)
virtual bool isPredicated(const MachineInstr &MI) const
Returns true if the instruction is already predicated.
#define I(x, y, z)
Definition: MD5.cpp:54
LLVM_ATTRIBUTE_ALWAYS_INLINE size_type size() const
Definition: SmallVector.h:135
static bool IsBranchOnlyBlock(MachineBasicBlock *MBB)
Abstract Stack Frame Index.
void removeSuccessor(MachineBasicBlock *Succ, bool NormalizeSuccProbs=false)
Remove successor from the successors list of this MachineBasicBlock.
static MachineBasicBlock * findFalseBlock(MachineBasicBlock *BB, MachineBasicBlock *TrueBB)
findFalseBlock - BB has a fallthrough.
unsigned getReg() const
getReg - Returns the register number.
void erase(iterator MBBI)
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
void insert(iterator MBBI, MachineBasicBlock *MBB)
bool operator<(int64_t V1, const APSInt &V2)
Definition: APSInt.h:326
static bool ProfitableToMerge(MachineBasicBlock *MBB1, MachineBasicBlock *MBB2, unsigned MinCommonTailLength, unsigned &CommonTailLen, MachineBasicBlock::iterator &I1, MachineBasicBlock::iterator &I2, MachineBasicBlock *SuccBB, MachineBasicBlock *PredBB, DenseMap< const MachineBasicBlock *, int > &FuncletMembership, bool AfterPlacement)
ProfitableToMerge - Check if two machine basic blocks have a common tail and decide if it would be pr...
BasicBlockListType::iterator iterator
char & BranchFolderPassID
BranchFolding - This pass performs machine code CFG based optimizations to delete branches to branche...
This class implements an extremely fast bulk output stream that can only output to a stream...
Definition: raw_ostream.h:44
#define DEBUG(X)
Definition: Debug.h:100
const MachineBasicBlock & back() const
std::string Hash(const Unit &U)
Definition: FuzzerSHA1.cpp:216
This class keeps track of branch frequencies of newly created blocks and tail-merged blocks...
IRTranslator LLVM IR MI
Address of indexed Constant in Constant Pool.
virtual void ReplaceTailWithBranchTo(MachineBasicBlock::iterator Tail, MachineBasicBlock *NewDest) const
Delete the instruction OldInst and everything after it, replacing it with an unconditional branch to ...
BranchProbability getEdgeProbability(const MachineBasicBlock *Src, const MachineBasicBlock *Dst) const
bool isLayoutSuccessor(const MachineBasicBlock *MBB) const
Return true if the specified MBB will be emitted immediately after this block, such that if this bloc...
unsigned pred_size() const
bool isBarrier(QueryType Type=AnyInBundle) const
Returns true if the specified instruction stops control flow from executing the instruction immediate...
Definition: MachineInstr.h:431
bool CorrectExtraCFGEdges(MachineBasicBlock *DestA, MachineBasicBlock *DestB, bool IsCond)
Various pieces of code can cause excess edges in the CFG to be inserted.
This class contains meta information specific to a module.
static unsigned ComputeCommonTailLength(MachineBasicBlock *MBB1, MachineBasicBlock *MBB2, MachineBasicBlock::iterator &I1, MachineBasicBlock::iterator &I2)
ComputeCommonTailLength - Given two machine basic blocks, compute the number of instructions they act...
LoopInfoBase< MachineBasicBlock, MachineLoop > & getBase()