LLVM  8.0.0svn
EarlyIfConversion.cpp
Go to the documentation of this file.
1 //===-- EarlyIfConversion.cpp - If-conversion on SSA form machine code ----===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // Early if-conversion is for out-of-order CPUs that don't have a lot of
11 // predicable instructions. The goal is to eliminate conditional branches that
12 // may mispredict.
13 //
14 // Instructions from both sides of the branch are executed specutatively, and a
15 // cmov instruction selects the result.
16 //
17 //===----------------------------------------------------------------------===//
18 
19 #include "llvm/ADT/BitVector.h"
21 #include "llvm/ADT/SetVector.h"
22 #include "llvm/ADT/SmallPtrSet.h"
23 #include "llvm/ADT/SparseSet.h"
24 #include "llvm/ADT/Statistic.h"
32 #include "llvm/CodeGen/Passes.h"
37 #include "llvm/Support/Debug.h"
39 
40 using namespace llvm;
41 
42 #define DEBUG_TYPE "early-ifcvt"
43 
44 // Absolute maximum number of instructions allowed per speculated block.
45 // This bypasses all other heuristics, so it should be set fairly high.
46 static cl::opt<unsigned>
47 BlockInstrLimit("early-ifcvt-limit", cl::init(30), cl::Hidden,
48  cl::desc("Maximum number of instructions per speculated block."));
49 
50 // Stress testing mode - disable heuristics.
51 static cl::opt<bool> Stress("stress-early-ifcvt", cl::Hidden,
52  cl::desc("Turn all knobs to 11"));
53 
54 STATISTIC(NumDiamondsSeen, "Number of diamonds");
55 STATISTIC(NumDiamondsConv, "Number of diamonds converted");
56 STATISTIC(NumTrianglesSeen, "Number of triangles");
57 STATISTIC(NumTrianglesConv, "Number of triangles converted");
58 
59 //===----------------------------------------------------------------------===//
60 // SSAIfConv
61 //===----------------------------------------------------------------------===//
62 //
63 // The SSAIfConv class performs if-conversion on SSA form machine code after
64 // determining if it is possible. The class contains no heuristics; external
65 // code should be used to determine when if-conversion is a good idea.
66 //
67 // SSAIfConv can convert both triangles and diamonds:
68 //
69 // Triangle: Head Diamond: Head
70 // | \ / \_
71 // | \ / |
72 // | [TF]BB FBB TBB
73 // | / \ /
74 // | / \ /
75 // Tail Tail
76 //
77 // Instructions in the conditional blocks TBB and/or FBB are spliced into the
78 // Head block, and phis in the Tail block are converted to select instructions.
79 //
80 namespace {
81 class SSAIfConv {
82  const TargetInstrInfo *TII;
83  const TargetRegisterInfo *TRI;
85 
86 public:
87  /// The block containing the conditional branch.
88  MachineBasicBlock *Head;
89 
90  /// The block containing phis after the if-then-else.
91  MachineBasicBlock *Tail;
92 
93  /// The 'true' conditional block as determined by AnalyzeBranch.
94  MachineBasicBlock *TBB;
95 
96  /// The 'false' conditional block as determined by AnalyzeBranch.
97  MachineBasicBlock *FBB;
98 
99  /// isTriangle - When there is no 'else' block, either TBB or FBB will be
100  /// equal to Tail.
101  bool isTriangle() const { return TBB == Tail || FBB == Tail; }
102 
103  /// Returns the Tail predecessor for the True side.
104  MachineBasicBlock *getTPred() const { return TBB == Tail ? Head : TBB; }
105 
106  /// Returns the Tail predecessor for the False side.
107  MachineBasicBlock *getFPred() const { return FBB == Tail ? Head : FBB; }
108 
109  /// Information about each phi in the Tail block.
110  struct PHIInfo {
111  MachineInstr *PHI;
112  unsigned TReg, FReg;
113  // Latencies from Cond+Branch, TReg, and FReg to DstReg.
114  int CondCycles, TCycles, FCycles;
115 
116  PHIInfo(MachineInstr *phi)
117  : PHI(phi), TReg(0), FReg(0), CondCycles(0), TCycles(0), FCycles(0) {}
118  };
119 
121 
122 private:
123  /// The branch condition determined by AnalyzeBranch.
125 
126  /// Instructions in Head that define values used by the conditional blocks.
127  /// The hoisted instructions must be inserted after these instructions.
128  SmallPtrSet<MachineInstr*, 8> InsertAfter;
129 
130  /// Register units clobbered by the conditional blocks.
131  BitVector ClobberedRegUnits;
132 
133  // Scratch pad for findInsertionPoint.
135 
136  /// Insertion point in Head for speculatively executed instructions form TBB
137  /// and FBB.
138  MachineBasicBlock::iterator InsertionPoint;
139 
140  /// Return true if all non-terminator instructions in MBB can be safely
141  /// speculated.
142  bool canSpeculateInstrs(MachineBasicBlock *MBB);
143 
144  /// Find a valid insertion point in Head.
145  bool findInsertionPoint();
146 
147  /// Replace PHI instructions in Tail with selects.
148  void replacePHIInstrs();
149 
150  /// Insert selects and rewrite PHI operands to use them.
151  void rewritePHIOperands();
152 
153 public:
154  /// runOnMachineFunction - Initialize per-function data structures.
155  void runOnMachineFunction(MachineFunction &MF) {
156  TII = MF.getSubtarget().getInstrInfo();
157  TRI = MF.getSubtarget().getRegisterInfo();
158  MRI = &MF.getRegInfo();
159  LiveRegUnits.clear();
160  LiveRegUnits.setUniverse(TRI->getNumRegUnits());
161  ClobberedRegUnits.clear();
162  ClobberedRegUnits.resize(TRI->getNumRegUnits());
163  }
164 
165  /// canConvertIf - If the sub-CFG headed by MBB can be if-converted,
166  /// initialize the internal state, and return true.
167  bool canConvertIf(MachineBasicBlock *MBB);
168 
169  /// convertIf - If-convert the last block passed to canConvertIf(), assuming
170  /// it is possible. Add any erased blocks to RemovedBlocks.
171  void convertIf(SmallVectorImpl<MachineBasicBlock*> &RemovedBlocks);
172 };
173 } // end anonymous namespace
174 
175 
176 /// canSpeculateInstrs - Returns true if all the instructions in MBB can safely
177 /// be speculated. The terminators are not considered.
178 ///
179 /// If instructions use any values that are defined in the head basic block,
180 /// the defining instructions are added to InsertAfter.
181 ///
182 /// Any clobbered regunits are added to ClobberedRegUnits.
183 ///
184 bool SSAIfConv::canSpeculateInstrs(MachineBasicBlock *MBB) {
185  // Reject any live-in physregs. It's probably CPSR/EFLAGS, and very hard to
186  // get right.
187  if (!MBB->livein_empty()) {
188  LLVM_DEBUG(dbgs() << printMBBReference(*MBB) << " has live-ins.\n");
189  return false;
190  }
191 
192  unsigned InstrCount = 0;
193 
194  // Check all instructions, except the terminators. It is assumed that
195  // terminators never have side effects or define any used register values.
196  for (MachineBasicBlock::iterator I = MBB->begin(),
197  E = MBB->getFirstTerminator(); I != E; ++I) {
198  if (I->isDebugInstr())
199  continue;
200 
201  if (++InstrCount > BlockInstrLimit && !Stress) {
202  LLVM_DEBUG(dbgs() << printMBBReference(*MBB) << " has more than "
203  << BlockInstrLimit << " instructions.\n");
204  return false;
205  }
206 
207  // There shouldn't normally be any phis in a single-predecessor block.
208  if (I->isPHI()) {
209  LLVM_DEBUG(dbgs() << "Can't hoist: " << *I);
210  return false;
211  }
212 
213  // Don't speculate loads. Note that it may be possible and desirable to
214  // speculate GOT or constant pool loads that are guaranteed not to trap,
215  // but we don't support that for now.
216  if (I->mayLoad()) {
217  LLVM_DEBUG(dbgs() << "Won't speculate load: " << *I);
218  return false;
219  }
220 
221  // We never speculate stores, so an AA pointer isn't necessary.
222  bool DontMoveAcrossStore = true;
223  if (!I->isSafeToMove(nullptr, DontMoveAcrossStore)) {
224  LLVM_DEBUG(dbgs() << "Can't speculate: " << *I);
225  return false;
226  }
227 
228  // Check for any dependencies on Head instructions.
229  for (const MachineOperand &MO : I->operands()) {
230  if (MO.isRegMask()) {
231  LLVM_DEBUG(dbgs() << "Won't speculate regmask: " << *I);
232  return false;
233  }
234  if (!MO.isReg())
235  continue;
236  unsigned Reg = MO.getReg();
237 
238  // Remember clobbered regunits.
239  if (MO.isDef() && TargetRegisterInfo::isPhysicalRegister(Reg))
240  for (MCRegUnitIterator Units(Reg, TRI); Units.isValid(); ++Units)
241  ClobberedRegUnits.set(*Units);
242 
243  if (!MO.readsReg() || !TargetRegisterInfo::isVirtualRegister(Reg))
244  continue;
245  MachineInstr *DefMI = MRI->getVRegDef(Reg);
246  if (!DefMI || DefMI->getParent() != Head)
247  continue;
248  if (InsertAfter.insert(DefMI).second)
249  LLVM_DEBUG(dbgs() << printMBBReference(*MBB) << " depends on "
250  << *DefMI);
251  if (DefMI->isTerminator()) {
252  LLVM_DEBUG(dbgs() << "Can't insert instructions below terminator.\n");
253  return false;
254  }
255  }
256  }
257  return true;
258 }
259 
260 
261 /// Find an insertion point in Head for the speculated instructions. The
262 /// insertion point must be:
263 ///
264 /// 1. Before any terminators.
265 /// 2. After any instructions in InsertAfter.
266 /// 3. Not have any clobbered regunits live.
267 ///
268 /// This function sets InsertionPoint and returns true when successful, it
269 /// returns false if no valid insertion point could be found.
270 ///
271 bool SSAIfConv::findInsertionPoint() {
272  // Keep track of live regunits before the current position.
273  // Only track RegUnits that are also in ClobberedRegUnits.
274  LiveRegUnits.clear();
279  while (I != B) {
280  --I;
281  // Some of the conditional code depends in I.
282  if (InsertAfter.count(&*I)) {
283  LLVM_DEBUG(dbgs() << "Can't insert code after " << *I);
284  return false;
285  }
286 
287  // Update live regunits.
288  for (const MachineOperand &MO : I->operands()) {
289  // We're ignoring regmask operands. That is conservatively correct.
290  if (!MO.isReg())
291  continue;
292  unsigned Reg = MO.getReg();
294  continue;
295  // I clobbers Reg, so it isn't live before I.
296  if (MO.isDef())
297  for (MCRegUnitIterator Units(Reg, TRI); Units.isValid(); ++Units)
298  LiveRegUnits.erase(*Units);
299  // Unless I reads Reg.
300  if (MO.readsReg())
301  Reads.push_back(Reg);
302  }
303  // Anything read by I is live before I.
304  while (!Reads.empty())
305  for (MCRegUnitIterator Units(Reads.pop_back_val(), TRI); Units.isValid();
306  ++Units)
307  if (ClobberedRegUnits.test(*Units))
308  LiveRegUnits.insert(*Units);
309 
310  // We can't insert before a terminator.
311  if (I != FirstTerm && I->isTerminator())
312  continue;
313 
314  // Some of the clobbered registers are live before I, not a valid insertion
315  // point.
316  if (!LiveRegUnits.empty()) {
317  LLVM_DEBUG({
318  dbgs() << "Would clobber";
320  i = LiveRegUnits.begin(), e = LiveRegUnits.end(); i != e; ++i)
321  dbgs() << ' ' << printRegUnit(*i, TRI);
322  dbgs() << " live before " << *I;
323  });
324  continue;
325  }
326 
327  // This is a valid insertion point.
328  InsertionPoint = I;
329  LLVM_DEBUG(dbgs() << "Can insert before " << *I);
330  return true;
331  }
332  LLVM_DEBUG(dbgs() << "No legal insertion point found.\n");
333  return false;
334 }
335 
336 
337 
338 /// canConvertIf - analyze the sub-cfg rooted in MBB, and return true if it is
339 /// a potential candidate for if-conversion. Fill out the internal state.
340 ///
341 bool SSAIfConv::canConvertIf(MachineBasicBlock *MBB) {
342  Head = MBB;
343  TBB = FBB = Tail = nullptr;
344 
345  if (Head->succ_size() != 2)
346  return false;
347  MachineBasicBlock *Succ0 = Head->succ_begin()[0];
348  MachineBasicBlock *Succ1 = Head->succ_begin()[1];
349 
350  // Canonicalize so Succ0 has MBB as its single predecessor.
351  if (Succ0->pred_size() != 1)
352  std::swap(Succ0, Succ1);
353 
354  if (Succ0->pred_size() != 1 || Succ0->succ_size() != 1)
355  return false;
356 
357  Tail = Succ0->succ_begin()[0];
358 
359  // This is not a triangle.
360  if (Tail != Succ1) {
361  // Check for a diamond. We won't deal with any critical edges.
362  if (Succ1->pred_size() != 1 || Succ1->succ_size() != 1 ||
363  Succ1->succ_begin()[0] != Tail)
364  return false;
365  LLVM_DEBUG(dbgs() << "\nDiamond: " << printMBBReference(*Head) << " -> "
366  << printMBBReference(*Succ0) << "/"
367  << printMBBReference(*Succ1) << " -> "
368  << printMBBReference(*Tail) << '\n');
369 
370  // Live-in physregs are tricky to get right when speculating code.
371  if (!Tail->livein_empty()) {
372  LLVM_DEBUG(dbgs() << "Tail has live-ins.\n");
373  return false;
374  }
375  } else {
376  LLVM_DEBUG(dbgs() << "\nTriangle: " << printMBBReference(*Head) << " -> "
377  << printMBBReference(*Succ0) << " -> "
378  << printMBBReference(*Tail) << '\n');
379  }
380 
381  // This is a triangle or a diamond.
382  // If Tail doesn't have any phis, there must be side effects.
383  if (Tail->empty() || !Tail->front().isPHI()) {
384  LLVM_DEBUG(dbgs() << "No phis in tail.\n");
385  return false;
386  }
387 
388  // The branch we're looking to eliminate must be analyzable.
389  Cond.clear();
390  if (TII->analyzeBranch(*Head, TBB, FBB, Cond)) {
391  LLVM_DEBUG(dbgs() << "Branch not analyzable.\n");
392  return false;
393  }
394 
395  // This is weird, probably some sort of degenerate CFG.
396  if (!TBB) {
397  LLVM_DEBUG(dbgs() << "AnalyzeBranch didn't find conditional branch.\n");
398  return false;
399  }
400 
401  // AnalyzeBranch doesn't set FBB on a fall-through branch.
402  // Make sure it is always set.
403  FBB = TBB == Succ0 ? Succ1 : Succ0;
404 
405  // Any phis in the tail block must be convertible to selects.
406  PHIs.clear();
407  MachineBasicBlock *TPred = getTPred();
408  MachineBasicBlock *FPred = getFPred();
409  for (MachineBasicBlock::iterator I = Tail->begin(), E = Tail->end();
410  I != E && I->isPHI(); ++I) {
411  PHIs.push_back(&*I);
412  PHIInfo &PI = PHIs.back();
413  // Find PHI operands corresponding to TPred and FPred.
414  for (unsigned i = 1; i != PI.PHI->getNumOperands(); i += 2) {
415  if (PI.PHI->getOperand(i+1).getMBB() == TPred)
416  PI.TReg = PI.PHI->getOperand(i).getReg();
417  if (PI.PHI->getOperand(i+1).getMBB() == FPred)
418  PI.FReg = PI.PHI->getOperand(i).getReg();
419  }
420  assert(TargetRegisterInfo::isVirtualRegister(PI.TReg) && "Bad PHI");
421  assert(TargetRegisterInfo::isVirtualRegister(PI.FReg) && "Bad PHI");
422 
423  // Get target information.
424  if (!TII->canInsertSelect(*Head, Cond, PI.TReg, PI.FReg,
425  PI.CondCycles, PI.TCycles, PI.FCycles)) {
426  LLVM_DEBUG(dbgs() << "Can't convert: " << *PI.PHI);
427  return false;
428  }
429  }
430 
431  // Check that the conditional instructions can be speculated.
432  InsertAfter.clear();
433  ClobberedRegUnits.reset();
434  if (TBB != Tail && !canSpeculateInstrs(TBB))
435  return false;
436  if (FBB != Tail && !canSpeculateInstrs(FBB))
437  return false;
438 
439  // Try to find a valid insertion point for the speculated instructions in the
440  // head basic block.
441  if (!findInsertionPoint())
442  return false;
443 
444  if (isTriangle())
445  ++NumTrianglesSeen;
446  else
447  ++NumDiamondsSeen;
448  return true;
449 }
450 
451 /// replacePHIInstrs - Completely replace PHI instructions with selects.
452 /// This is possible when the only Tail predecessors are the if-converted
453 /// blocks.
454 void SSAIfConv::replacePHIInstrs() {
455  assert(Tail->pred_size() == 2 && "Cannot replace PHIs");
457  assert(FirstTerm != Head->end() && "No terminators");
458  DebugLoc HeadDL = FirstTerm->getDebugLoc();
459 
460  // Convert all PHIs to select instructions inserted before FirstTerm.
461  for (unsigned i = 0, e = PHIs.size(); i != e; ++i) {
462  PHIInfo &PI = PHIs[i];
463  LLVM_DEBUG(dbgs() << "If-converting " << *PI.PHI);
464  unsigned DstReg = PI.PHI->getOperand(0).getReg();
465  TII->insertSelect(*Head, FirstTerm, HeadDL, DstReg, Cond, PI.TReg, PI.FReg);
466  LLVM_DEBUG(dbgs() << " --> " << *std::prev(FirstTerm));
467  PI.PHI->eraseFromParent();
468  PI.PHI = nullptr;
469  }
470 }
471 
472 /// rewritePHIOperands - When there are additional Tail predecessors, insert
473 /// select instructions in Head and rewrite PHI operands to use the selects.
474 /// Keep the PHI instructions in Tail to handle the other predecessors.
475 void SSAIfConv::rewritePHIOperands() {
477  assert(FirstTerm != Head->end() && "No terminators");
478  DebugLoc HeadDL = FirstTerm->getDebugLoc();
479 
480  // Convert all PHIs to select instructions inserted before FirstTerm.
481  for (unsigned i = 0, e = PHIs.size(); i != e; ++i) {
482  PHIInfo &PI = PHIs[i];
483  unsigned DstReg = 0;
484 
485  LLVM_DEBUG(dbgs() << "If-converting " << *PI.PHI);
486  if (PI.TReg == PI.FReg) {
487  // We do not need the select instruction if both incoming values are
488  // equal.
489  DstReg = PI.TReg;
490  } else {
491  unsigned PHIDst = PI.PHI->getOperand(0).getReg();
492  DstReg = MRI->createVirtualRegister(MRI->getRegClass(PHIDst));
493  TII->insertSelect(*Head, FirstTerm, HeadDL,
494  DstReg, Cond, PI.TReg, PI.FReg);
495  LLVM_DEBUG(dbgs() << " --> " << *std::prev(FirstTerm));
496  }
497 
498  // Rewrite PHI operands TPred -> (DstReg, Head), remove FPred.
499  for (unsigned i = PI.PHI->getNumOperands(); i != 1; i -= 2) {
500  MachineBasicBlock *MBB = PI.PHI->getOperand(i-1).getMBB();
501  if (MBB == getTPred()) {
502  PI.PHI->getOperand(i-1).setMBB(Head);
503  PI.PHI->getOperand(i-2).setReg(DstReg);
504  } else if (MBB == getFPred()) {
505  PI.PHI->RemoveOperand(i-1);
506  PI.PHI->RemoveOperand(i-2);
507  }
508  }
509  LLVM_DEBUG(dbgs() << " --> " << *PI.PHI);
510  }
511 }
512 
513 /// convertIf - Execute the if conversion after canConvertIf has determined the
514 /// feasibility.
515 ///
516 /// Any basic blocks erased will be added to RemovedBlocks.
517 ///
518 void SSAIfConv::convertIf(SmallVectorImpl<MachineBasicBlock*> &RemovedBlocks) {
519  assert(Head && Tail && TBB && FBB && "Call canConvertIf first.");
520 
521  // Update statistics.
522  if (isTriangle())
523  ++NumTrianglesConv;
524  else
525  ++NumDiamondsConv;
526 
527  // Move all instructions into Head, except for the terminators.
528  if (TBB != Tail)
529  Head->splice(InsertionPoint, TBB, TBB->begin(), TBB->getFirstTerminator());
530  if (FBB != Tail)
531  Head->splice(InsertionPoint, FBB, FBB->begin(), FBB->getFirstTerminator());
532 
533  // Are there extra Tail predecessors?
534  bool ExtraPreds = Tail->pred_size() != 2;
535  if (ExtraPreds)
536  rewritePHIOperands();
537  else
538  replacePHIInstrs();
539 
540  // Fix up the CFG, temporarily leave Head without any successors.
541  Head->removeSuccessor(TBB);
542  Head->removeSuccessor(FBB, true);
543  if (TBB != Tail)
544  TBB->removeSuccessor(Tail, true);
545  if (FBB != Tail)
546  FBB->removeSuccessor(Tail, true);
547 
548  // Fix up Head's terminators.
549  // It should become a single branch or a fallthrough.
550  DebugLoc HeadDL = Head->getFirstTerminator()->getDebugLoc();
551  TII->removeBranch(*Head);
552 
553  // Erase the now empty conditional blocks. It is likely that Head can fall
554  // through to Tail, and we can join the two blocks.
555  if (TBB != Tail) {
556  RemovedBlocks.push_back(TBB);
557  TBB->eraseFromParent();
558  }
559  if (FBB != Tail) {
560  RemovedBlocks.push_back(FBB);
561  FBB->eraseFromParent();
562  }
563 
564  assert(Head->succ_empty() && "Additional head successors?");
565  if (!ExtraPreds && Head->isLayoutSuccessor(Tail)) {
566  // Splice Tail onto the end of Head.
567  LLVM_DEBUG(dbgs() << "Joining tail " << printMBBReference(*Tail)
568  << " into head " << printMBBReference(*Head) << '\n');
569  Head->splice(Head->end(), Tail,
570  Tail->begin(), Tail->end());
572  RemovedBlocks.push_back(Tail);
573  Tail->eraseFromParent();
574  } else {
575  // We need a branch to Tail, let code placement work it out later.
576  LLVM_DEBUG(dbgs() << "Converting to unconditional branch.\n");
578  TII->insertBranch(*Head, Tail, nullptr, EmptyCond, HeadDL);
579  Head->addSuccessor(Tail);
580  }
581  LLVM_DEBUG(dbgs() << *Head);
582 }
583 
584 
585 //===----------------------------------------------------------------------===//
586 // EarlyIfConverter Pass
587 //===----------------------------------------------------------------------===//
588 
589 namespace {
590 class EarlyIfConverter : public MachineFunctionPass {
591  const TargetInstrInfo *TII;
592  const TargetRegisterInfo *TRI;
593  MCSchedModel SchedModel;
595  MachineDominatorTree *DomTree;
597  MachineTraceMetrics *Traces;
599  SSAIfConv IfConv;
600 
601 public:
602  static char ID;
603  EarlyIfConverter() : MachineFunctionPass(ID) {}
604  void getAnalysisUsage(AnalysisUsage &AU) const override;
605  bool runOnMachineFunction(MachineFunction &MF) override;
606  StringRef getPassName() const override { return "Early If-Conversion"; }
607 
608 private:
609  bool tryConvertIf(MachineBasicBlock*);
610  void updateDomTree(ArrayRef<MachineBasicBlock*> Removed);
611  void updateLoops(ArrayRef<MachineBasicBlock*> Removed);
612  void invalidateTraces();
613  bool shouldConvertIf();
614 };
615 } // end anonymous namespace
616 
617 char EarlyIfConverter::ID = 0;
619 
620 INITIALIZE_PASS_BEGIN(EarlyIfConverter, DEBUG_TYPE,
621  "Early If Converter", false, false)
625 INITIALIZE_PASS_END(EarlyIfConverter, DEBUG_TYPE,
626  "Early If Converter", false, false)
627 
628 void EarlyIfConverter::getAnalysisUsage(AnalysisUsage &AU) const {
629  AU.addRequired<MachineBranchProbabilityInfo>();
630  AU.addRequired<MachineDominatorTree>();
631  AU.addPreserved<MachineDominatorTree>();
632  AU.addRequired<MachineLoopInfo>();
633  AU.addPreserved<MachineLoopInfo>();
634  AU.addRequired<MachineTraceMetrics>();
635  AU.addPreserved<MachineTraceMetrics>();
637 }
638 
639 /// Update the dominator tree after if-conversion erased some blocks.
640 void EarlyIfConverter::updateDomTree(ArrayRef<MachineBasicBlock*> Removed) {
641  // convertIf can remove TBB, FBB, and Tail can be merged into Head.
642  // TBB and FBB should not dominate any blocks.
643  // Tail children should be transferred to Head.
644  MachineDomTreeNode *HeadNode = DomTree->getNode(IfConv.Head);
645  for (unsigned i = 0, e = Removed.size(); i != e; ++i) {
646  MachineDomTreeNode *Node = DomTree->getNode(Removed[i]);
647  assert(Node != HeadNode && "Cannot erase the head node");
648  while (Node->getNumChildren()) {
649  assert(Node->getBlock() == IfConv.Tail && "Unexpected children");
650  DomTree->changeImmediateDominator(Node->getChildren().back(), HeadNode);
651  }
652  DomTree->eraseNode(Removed[i]);
653  }
654 }
655 
656 /// Update LoopInfo after if-conversion.
657 void EarlyIfConverter::updateLoops(ArrayRef<MachineBasicBlock*> Removed) {
658  if (!Loops)
659  return;
660  // If-conversion doesn't change loop structure, and it doesn't mess with back
661  // edges, so updating LoopInfo is simply removing the dead blocks.
662  for (unsigned i = 0, e = Removed.size(); i != e; ++i)
663  Loops->removeBlock(Removed[i]);
664 }
665 
666 /// Invalidate MachineTraceMetrics before if-conversion.
667 void EarlyIfConverter::invalidateTraces() {
668  Traces->verifyAnalysis();
669  Traces->invalidate(IfConv.Head);
670  Traces->invalidate(IfConv.Tail);
671  Traces->invalidate(IfConv.TBB);
672  Traces->invalidate(IfConv.FBB);
673  Traces->verifyAnalysis();
674 }
675 
676 // Adjust cycles with downward saturation.
677 static unsigned adjCycles(unsigned Cyc, int Delta) {
678  if (Delta < 0 && Cyc + Delta > Cyc)
679  return 0;
680  return Cyc + Delta;
681 }
682 
683 /// Apply cost model and heuristics to the if-conversion in IfConv.
684 /// Return true if the conversion is a good idea.
685 ///
686 bool EarlyIfConverter::shouldConvertIf() {
687  // Stress testing mode disables all cost considerations.
688  if (Stress)
689  return true;
690 
691  if (!MinInstr)
692  MinInstr = Traces->getEnsemble(MachineTraceMetrics::TS_MinInstrCount);
693 
694  MachineTraceMetrics::Trace TBBTrace = MinInstr->getTrace(IfConv.getTPred());
695  MachineTraceMetrics::Trace FBBTrace = MinInstr->getTrace(IfConv.getFPred());
696  LLVM_DEBUG(dbgs() << "TBB: " << TBBTrace << "FBB: " << FBBTrace);
697  unsigned MinCrit = std::min(TBBTrace.getCriticalPath(),
698  FBBTrace.getCriticalPath());
699 
700  // Set a somewhat arbitrary limit on the critical path extension we accept.
701  unsigned CritLimit = SchedModel.MispredictPenalty/2;
702 
703  // If-conversion only makes sense when there is unexploited ILP. Compute the
704  // maximum-ILP resource length of the trace after if-conversion. Compare it
705  // to the shortest critical path.
707  if (IfConv.TBB != IfConv.Tail)
708  ExtraBlocks.push_back(IfConv.TBB);
709  unsigned ResLength = FBBTrace.getResourceLength(ExtraBlocks);
710  LLVM_DEBUG(dbgs() << "Resource length " << ResLength
711  << ", minimal critical path " << MinCrit << '\n');
712  if (ResLength > MinCrit + CritLimit) {
713  LLVM_DEBUG(dbgs() << "Not enough available ILP.\n");
714  return false;
715  }
716 
717  // Assume that the depth of the first head terminator will also be the depth
718  // of the select instruction inserted, as determined by the flag dependency.
719  // TBB / FBB data dependencies may delay the select even more.
720  MachineTraceMetrics::Trace HeadTrace = MinInstr->getTrace(IfConv.Head);
721  unsigned BranchDepth =
722  HeadTrace.getInstrCycles(*IfConv.Head->getFirstTerminator()).Depth;
723  LLVM_DEBUG(dbgs() << "Branch depth: " << BranchDepth << '\n');
724 
725  // Look at all the tail phis, and compute the critical path extension caused
726  // by inserting select instructions.
727  MachineTraceMetrics::Trace TailTrace = MinInstr->getTrace(IfConv.Tail);
728  for (unsigned i = 0, e = IfConv.PHIs.size(); i != e; ++i) {
729  SSAIfConv::PHIInfo &PI = IfConv.PHIs[i];
730  unsigned Slack = TailTrace.getInstrSlack(*PI.PHI);
731  unsigned MaxDepth = Slack + TailTrace.getInstrCycles(*PI.PHI).Depth;
732  LLVM_DEBUG(dbgs() << "Slack " << Slack << ":\t" << *PI.PHI);
733 
734  // The condition is pulled into the critical path.
735  unsigned CondDepth = adjCycles(BranchDepth, PI.CondCycles);
736  if (CondDepth > MaxDepth) {
737  unsigned Extra = CondDepth - MaxDepth;
738  LLVM_DEBUG(dbgs() << "Condition adds " << Extra << " cycles.\n");
739  if (Extra > CritLimit) {
740  LLVM_DEBUG(dbgs() << "Exceeds limit of " << CritLimit << '\n');
741  return false;
742  }
743  }
744 
745  // The TBB value is pulled into the critical path.
746  unsigned TDepth = adjCycles(TBBTrace.getPHIDepth(*PI.PHI), PI.TCycles);
747  if (TDepth > MaxDepth) {
748  unsigned Extra = TDepth - MaxDepth;
749  LLVM_DEBUG(dbgs() << "TBB data adds " << Extra << " cycles.\n");
750  if (Extra > CritLimit) {
751  LLVM_DEBUG(dbgs() << "Exceeds limit of " << CritLimit << '\n');
752  return false;
753  }
754  }
755 
756  // The FBB value is pulled into the critical path.
757  unsigned FDepth = adjCycles(FBBTrace.getPHIDepth(*PI.PHI), PI.FCycles);
758  if (FDepth > MaxDepth) {
759  unsigned Extra = FDepth - MaxDepth;
760  LLVM_DEBUG(dbgs() << "FBB data adds " << Extra << " cycles.\n");
761  if (Extra > CritLimit) {
762  LLVM_DEBUG(dbgs() << "Exceeds limit of " << CritLimit << '\n');
763  return false;
764  }
765  }
766  }
767  return true;
768 }
769 
770 /// Attempt repeated if-conversion on MBB, return true if successful.
771 ///
772 bool EarlyIfConverter::tryConvertIf(MachineBasicBlock *MBB) {
773  bool Changed = false;
774  while (IfConv.canConvertIf(MBB) && shouldConvertIf()) {
775  // If-convert MBB and update analyses.
776  invalidateTraces();
778  IfConv.convertIf(RemovedBlocks);
779  Changed = true;
780  updateDomTree(RemovedBlocks);
781  updateLoops(RemovedBlocks);
782  }
783  return Changed;
784 }
785 
786 bool EarlyIfConverter::runOnMachineFunction(MachineFunction &MF) {
787  LLVM_DEBUG(dbgs() << "********** EARLY IF-CONVERSION **********\n"
788  << "********** Function: " << MF.getName() << '\n');
789  if (skipFunction(MF.getFunction()))
790  return false;
791 
792  // Only run if conversion if the target wants it.
793  const TargetSubtargetInfo &STI = MF.getSubtarget();
794  if (!STI.enableEarlyIfConversion())
795  return false;
796 
797  TII = STI.getInstrInfo();
798  TRI = STI.getRegisterInfo();
799  SchedModel = STI.getSchedModel();
800  MRI = &MF.getRegInfo();
801  DomTree = &getAnalysis<MachineDominatorTree>();
802  Loops = getAnalysisIfAvailable<MachineLoopInfo>();
803  Traces = &getAnalysis<MachineTraceMetrics>();
804  MinInstr = nullptr;
805 
806  bool Changed = false;
807  IfConv.runOnMachineFunction(MF);
808 
809  // Visit blocks in dominator tree post-order. The post-order enables nested
810  // if-conversion in a single pass. The tryConvertIf() function may erase
811  // blocks, but only blocks dominated by the head block. This makes it safe to
812  // update the dominator tree while the post-order iterator is still active.
813  for (auto DomNode : post_order(DomTree))
814  if (tryConvertIf(DomNode->getBlock()))
815  Changed = true;
816 
817  return Changed;
818 }
void resize(unsigned N, bool t=false)
resize - Grow or shrink the bitvector.
Definition: BitVector.h:372
void push_back(const T &Elt)
Definition: SmallVector.h:218
BitVector & set()
Definition: BitVector.h:398
static cl::opt< bool > Stress("stress-early-ifcvt", cl::Hidden, cl::desc("Turn all knobs to 11"))
virtual bool canInsertSelect(const MachineBasicBlock &MBB, ArrayRef< MachineOperand > Cond, unsigned TrueReg, unsigned FalseReg, int &CondCycles, int &TrueCycles, int &FalseCycles) const
Return true if it is possible to insert a select instruction that chooses between TrueReg and FalseRe...
virtual void insertSelect(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, const DebugLoc &DL, unsigned DstReg, ArrayRef< MachineOperand > Cond, unsigned TrueReg, unsigned FalseReg) const
Insert a select instruction into MBB before I that will copy TrueReg to DstReg when Cond is true...
const TargetRegisterClass * getRegClass(unsigned Reg) const
Return the register class of the specified virtual register.
Compute iterated dominance frontiers using a linear time algorithm.
Definition: AllocatorList.h:24
std::pair< iterator, bool > insert(const ValueT &Val)
insert - Attempts to insert a new element.
Definition: SparseSet.h:250
virtual const TargetRegisterInfo * getRegisterInfo() const
getRegisterInfo - If register information is available, return it.
unsigned Depth
Earliest issue cycle as determined by data dependencies and instruction latencies from the beginning ...
iterator getFirstTerminator()
Returns an iterator to the first terminator instruction of this basic block.
static bool isVirtualRegister(unsigned Reg)
Return true if the specified register number is in the virtual register namespace.
void transferSuccessorsAndUpdatePHIs(MachineBasicBlock *FromMBB)
Transfers all the successors, as in transferSuccessors, and update PHI operands in the successor bloc...
unsigned Reg
bool test(unsigned Idx) const
Definition: BitVector.h:502
virtual unsigned insertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB, MachineBasicBlock *FBB, ArrayRef< MachineOperand > Cond, const DebugLoc &DL, int *BytesAdded=nullptr) const
Insert branch code into the end of the specified MachineBasicBlock.
char & EarlyIfConverterID
EarlyIfConverter - This pass performs if-conversion on SSA form by inserting cmov instructions...
STATISTIC(NumFunctions, "Total number of functions")
unsigned const TargetRegisterInfo * TRI
A debug info location.
Definition: DebugLoc.h:34
A trace ensemble is a collection of traces selected using the same strategy, for example &#39;minimum res...
static unsigned InstrCount
bool isPHI() const
virtual unsigned removeBranch(MachineBasicBlock &MBB, int *BytesRemoved=nullptr) const
Remove the branching code at the end of the specific MBB.
unsigned getNumRegUnits() const
Return the number of (native) register units in the target.
virtual bool enableEarlyIfConversion() const
Enable the use of the early if conversion pass.
#define INITIALIZE_PASS_DEPENDENCY(depName)
Definition: PassSupport.h:51
Hexagon Hardware Loops
void eraseFromParent()
This method unlinks &#39;this&#39; from the containing function and deletes it.
void clear()
clear - Removes all bits from the bitvector. Does not change capacity.
Definition: BitVector.h:367
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
const HexagonInstrInfo * TII
Printable printMBBReference(const MachineBasicBlock &MBB)
Prints a machine basic block reference.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: APFloat.h:42
bool isTerminator(QueryType Type=AnyInBundle) const
Returns true if this instruction part of the terminator for a basic block.
Definition: MachineInstr.h:649
Printable printRegUnit(unsigned Unit, const TargetRegisterInfo *TRI)
Create Printable object to print register units on a raw_ostream.
MachineInstr * getVRegDef(unsigned Reg) const
getVRegDef - Return the machine instr that defines the specified virtual register or null if none is ...
Select the trace through a block that has the fewest instructions.
INITIALIZE_PASS_BEGIN(EarlyIfConverter, DEBUG_TYPE, "Early If Converter", false, false) INITIALIZE_PASS_END(EarlyIfConverter
Base class for the actual dominator tree node.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory)...
Definition: APInt.h:33
const std::vector< DomTreeNodeBase * > & getChildren() const
virtual const TargetInstrInfo * getInstrInfo() const
StringRef getName() const
getName - Return the name of the corresponding LLVM function.
iterator erase(iterator I)
erase - Erases an existing element identified by a valid iterator.
Definition: SparseSet.h:286
TargetInstrInfo - Interface to description of machine instruction set.
bool empty() const
empty - Returns true if the set is empty.
Definition: SparseSet.h:184
NodeT * getBlock() const
Early If Converter
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:410
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
unsigned const MachineRegisterInfo * MRI
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
size_t size() const
size - Get the array size.
Definition: ArrayRef.h:149
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
unsigned getCriticalPath() const
Return the length of the (data dependency) critical path through the trace.
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
Definition: SmallPtrSet.h:371
InstrCycles getInstrCycles(const MachineInstr &MI) const
Return the depth and height of MI.
Represent the analysis usage information of a pass.
BitVector & reset()
Definition: BitVector.h:439
iterator_range< po_iterator< T > > post_order(const T &G)
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
Definition: SmallPtrSet.h:382
void setUniverse(unsigned U)
setUniverse - Set the universe size which determines the largest key the set can hold.
Definition: SparseSet.h:156
virtual bool analyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB, MachineBasicBlock *&FBB, SmallVectorImpl< MachineOperand > &Cond, bool AllowModify=false) const
Analyze the branching code at the end of MBB, returning true if it cannot be understood (e...
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
A trace represents a plausible sequence of executed basic blocks that passes through the current basi...
size_t size() const
Definition: SmallVector.h:53
const_iterator end() const
Definition: SparseSet.h:176
#define DEBUG_TYPE
INITIALIZE_PASS_END(RegBankSelect, DEBUG_TYPE, "Assign register bank of generic virtual registers", false, false) RegBankSelect
const unsigned MaxDepth
static unsigned adjCycles(unsigned Cyc, int Delta)
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements...
Definition: SmallPtrSet.h:418
void addSuccessor(MachineBasicBlock *Succ, BranchProbability Prob=BranchProbability::getUnknown())
Add Succ as a successor of this MachineBasicBlock.
MachineOperand class - Representation of each machine instruction operand.
This is a &#39;vector&#39; (really, a variable-sized array), optimized for the case when the array is small...
Definition: SmallVector.h:847
size_t getNumChildren() const
MachineInstrBuilder MachineInstrBuilder & DefMI
const_iterator begin() const
Definition: SparseSet.h:175
LLVM_NODISCARD T pop_back_val()
Definition: SmallVector.h:381
unsigned pred_size() const
bool isLayoutSuccessor(const MachineBasicBlock *MBB) const
Return true if the specified MBB will be emitted immediately after this block, such that if this bloc...
const Function & getFunction() const
Return the LLVM function that this machine code represents.
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:133
static cl::opt< unsigned > BlockInstrLimit("early-ifcvt-limit", cl::init(30), cl::Hidden, cl::desc("Maximum number of instructions per speculated block."))
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:941
bool isValid() const
isValid - returns true if this iterator is not yet at the end.
unsigned succ_size() const
const MachineBasicBlock * getParent() const
Definition: MachineInstr.h:254
MachineRegisterInfo - Keep track of information for virtual and physical registers, including vreg register classes, use/def chains for registers, etc.
TargetSubtargetInfo - Generic base class for all target subtargets.
Representation of each machine instruction.
Definition: MachineInstr.h:64
static bool isPhysicalRegister(unsigned Reg)
Return true if the specified register number is in the physical register namespace.
void splice(iterator Where, MachineBasicBlock *Other, iterator From)
Take an instruction from MBB &#39;Other&#39; at the position From, and insert it into this MBB right before &#39;...
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
LLVM_NODISCARD bool empty() const
Definition: SmallVector.h:56
#define I(x, y, z)
Definition: MD5.cpp:58
A set of register units used to track register liveness.
Definition: LiveRegUnits.h:31
void removeSuccessor(MachineBasicBlock *Succ, bool NormalizeSuccProbs=false)
Remove successor from the successors list of this MachineBasicBlock.
void clear()
clear - Clears the set.
Definition: SparseSet.h:195
unsigned getPHIDepth(const MachineInstr &PHI) const
Return the Depth of a PHI instruction in a trace center block successor.
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
unsigned getResourceLength(ArrayRef< const MachineBasicBlock *> Extrablocks=None, ArrayRef< const MCSchedClassDesc *> ExtraInstrs=None, ArrayRef< const MCSchedClassDesc *> RemoveInstrs=None) const
Return the resource length of the trace.
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:49
unsigned getInstrSlack(const MachineInstr &MI) const
Return the slack of MI.
#define LLVM_DEBUG(X)
Definition: Debug.h:123
Machine model for scheduling, bundling, and heuristics.
Definition: MCSchedule.h:247
DominatorTree Class - Concrete subclass of DominatorTreeBase that is used to compute a normal dominat...
const MCSchedModel & getSchedModel() const
Get the machine model for this subtarget&#39;s CPU.
unsigned createVirtualRegister(const TargetRegisterClass *RegClass, StringRef Name="")
createVirtualRegister - Create and return a new virtual register in the function with the specified r...