LLVM  7.0.0svn
AArch64ConditionalCompares.cpp
Go to the documentation of this file.
1 //===-- AArch64ConditionalCompares.cpp --- CCMP formation for AArch64 -----===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This file implements the AArch64ConditionalCompares pass which reduces
11 // branching and code size by using the conditional compare instructions CCMP,
12 // CCMN, and FCMP.
13 //
14 // The CFG transformations for forming conditional compares are very similar to
15 // if-conversion, and this pass should run immediately before the early
16 // if-conversion pass.
17 //
18 //===----------------------------------------------------------------------===//
19 
20 #include "AArch64.h"
22 #include "llvm/ADT/SetVector.h"
23 #include "llvm/ADT/SmallPtrSet.h"
24 #include "llvm/ADT/Statistic.h"
33 #include "llvm/CodeGen/Passes.h"
38 #include "llvm/Support/Debug.h"
40 
41 using namespace llvm;
42 
43 #define DEBUG_TYPE "aarch64-ccmp"
44 
45 // Absolute maximum number of instructions allowed per speculated block.
46 // This bypasses all other heuristics, so it should be set fairly high.
48  "aarch64-ccmp-limit", cl::init(30), cl::Hidden,
49  cl::desc("Maximum number of instructions per speculated block."));
50 
51 // Stress testing mode - disable heuristics.
52 static cl::opt<bool> Stress("aarch64-stress-ccmp", cl::Hidden,
53  cl::desc("Turn all knobs to 11"));
54 
55 STATISTIC(NumConsidered, "Number of ccmps considered");
56 STATISTIC(NumPhiRejs, "Number of ccmps rejected (PHI)");
57 STATISTIC(NumPhysRejs, "Number of ccmps rejected (Physregs)");
58 STATISTIC(NumPhi2Rejs, "Number of ccmps rejected (PHI2)");
59 STATISTIC(NumHeadBranchRejs, "Number of ccmps rejected (Head branch)");
60 STATISTIC(NumCmpBranchRejs, "Number of ccmps rejected (CmpBB branch)");
61 STATISTIC(NumCmpTermRejs, "Number of ccmps rejected (CmpBB is cbz...)");
62 STATISTIC(NumImmRangeRejs, "Number of ccmps rejected (Imm out of range)");
63 STATISTIC(NumLiveDstRejs, "Number of ccmps rejected (Cmp dest live)");
64 STATISTIC(NumMultNZCVUses, "Number of ccmps rejected (NZCV used)");
65 STATISTIC(NumUnknNZCVDefs, "Number of ccmps rejected (NZCV def unknown)");
66 
67 STATISTIC(NumSpeculateRejs, "Number of ccmps rejected (Can't speculate)");
68 
69 STATISTIC(NumConverted, "Number of ccmp instructions created");
70 STATISTIC(NumCompBranches, "Number of cbz/cbnz branches converted");
71 
72 //===----------------------------------------------------------------------===//
73 // SSACCmpConv
74 //===----------------------------------------------------------------------===//
75 //
76 // The SSACCmpConv class performs ccmp-conversion on SSA form machine code
77 // after determining if it is possible. The class contains no heuristics;
78 // external code should be used to determine when ccmp-conversion is a good
79 // idea.
80 //
81 // CCmp-formation works on a CFG representing chained conditions, typically
82 // from C's short-circuit || and && operators:
83 //
84 // From: Head To: Head
85 // / | CmpBB
86 // / | / |
87 // | CmpBB / |
88 // | / | Tail |
89 // | / | | |
90 // Tail | | |
91 // | | | |
92 // ... ... ... ...
93 //
94 // The Head block is terminated by a br.cond instruction, and the CmpBB block
95 // contains compare + br.cond. Tail must be a successor of both.
96 //
97 // The cmp-conversion turns the compare instruction in CmpBB into a conditional
98 // compare, and merges CmpBB into Head, speculatively executing its
99 // instructions. The AArch64 conditional compare instructions have an immediate
100 // operand that specifies the NZCV flag values when the condition is false and
101 // the compare isn't executed. This makes it possible to chain compares with
102 // different condition codes.
103 //
104 // Example:
105 //
106 // if (a == 5 || b == 17)
107 // foo();
108 //
109 // Head:
110 // cmp w0, #5
111 // b.eq Tail
112 // CmpBB:
113 // cmp w1, #17
114 // b.eq Tail
115 // ...
116 // Tail:
117 // bl _foo
118 //
119 // Becomes:
120 //
121 // Head:
122 // cmp w0, #5
123 // ccmp w1, #17, 4, ne ; 4 = nZcv
124 // b.eq Tail
125 // ...
126 // Tail:
127 // bl _foo
128 //
129 // The ccmp condition code is the one that would cause the Head terminator to
130 // branch to CmpBB.
131 //
132 // FIXME: It should also be possible to speculate a block on the critical edge
133 // between Head and Tail, just like if-converting a diamond.
134 //
135 // FIXME: Handle PHIs in Tail by turning them into selects (if-conversion).
136 
137 namespace {
138 class SSACCmpConv {
139  MachineFunction *MF;
140  const TargetInstrInfo *TII;
141  const TargetRegisterInfo *TRI;
143  const MachineBranchProbabilityInfo *MBPI;
144 
145 public:
146  /// The first block containing a conditional branch, dominating everything
147  /// else.
148  MachineBasicBlock *Head;
149 
150  /// The block containing cmp+br.cond with a successor shared with Head.
151  MachineBasicBlock *CmpBB;
152 
153  /// The common successor for Head and CmpBB.
154  MachineBasicBlock *Tail;
155 
156  /// The compare instruction in CmpBB that can be converted to a ccmp.
157  MachineInstr *CmpMI;
158 
159 private:
160  /// The branch condition in Head as determined by AnalyzeBranch.
162 
163  /// The condition code that makes Head branch to CmpBB.
164  AArch64CC::CondCode HeadCmpBBCC;
165 
166  /// The branch condition in CmpBB.
168 
169  /// The condition code that makes CmpBB branch to Tail.
170  AArch64CC::CondCode CmpBBTailCC;
171 
172  /// Check if the Tail PHIs are trivially convertible.
173  bool trivialTailPHIs();
174 
175  /// Remove CmpBB from the Tail PHIs.
176  void updateTailPHIs();
177 
178  /// Check if an operand defining DstReg is dead.
179  bool isDeadDef(unsigned DstReg);
180 
181  /// Find the compare instruction in MBB that controls the conditional branch.
182  /// Return NULL if a convertible instruction can't be found.
183  MachineInstr *findConvertibleCompare(MachineBasicBlock *MBB);
184 
185  /// Return true if all non-terminator instructions in MBB can be safely
186  /// speculated.
187  bool canSpeculateInstrs(MachineBasicBlock *MBB, const MachineInstr *CmpMI);
188 
189 public:
190  /// runOnMachineFunction - Initialize per-function data structures.
191  void runOnMachineFunction(MachineFunction &MF,
192  const MachineBranchProbabilityInfo *MBPI) {
193  this->MF = &MF;
194  this->MBPI = MBPI;
195  TII = MF.getSubtarget().getInstrInfo();
196  TRI = MF.getSubtarget().getRegisterInfo();
197  MRI = &MF.getRegInfo();
198  }
199 
200  /// If the sub-CFG headed by MBB can be cmp-converted, initialize the
201  /// internal state, and return true.
202  bool canConvert(MachineBasicBlock *MBB);
203 
204  /// Cmo-convert the last block passed to canConvertCmp(), assuming
205  /// it is possible. Add any erased blocks to RemovedBlocks.
206  void convert(SmallVectorImpl<MachineBasicBlock *> &RemovedBlocks);
207 
208  /// Return the expected code size delta if the conversion into a
209  /// conditional compare is performed.
210  int expectedCodeSizeDelta() const;
211 };
212 } // end anonymous namespace
213 
214 // Check that all PHIs in Tail are selecting the same value from Head and CmpBB.
215 // This means that no if-conversion is required when merging CmpBB into Head.
216 bool SSACCmpConv::trivialTailPHIs() {
217  for (auto &I : *Tail) {
218  if (!I.isPHI())
219  break;
220  unsigned HeadReg = 0, CmpBBReg = 0;
221  // PHI operands come in (VReg, MBB) pairs.
222  for (unsigned oi = 1, oe = I.getNumOperands(); oi != oe; oi += 2) {
223  MachineBasicBlock *MBB = I.getOperand(oi + 1).getMBB();
224  unsigned Reg = I.getOperand(oi).getReg();
225  if (MBB == Head) {
226  assert((!HeadReg || HeadReg == Reg) && "Inconsistent PHI operands");
227  HeadReg = Reg;
228  }
229  if (MBB == CmpBB) {
230  assert((!CmpBBReg || CmpBBReg == Reg) && "Inconsistent PHI operands");
231  CmpBBReg = Reg;
232  }
233  }
234  if (HeadReg != CmpBBReg)
235  return false;
236  }
237  return true;
238 }
239 
240 // Assuming that trivialTailPHIs() is true, update the Tail PHIs by simply
241 // removing the CmpBB operands. The Head operands will be identical.
242 void SSACCmpConv::updateTailPHIs() {
243  for (auto &I : *Tail) {
244  if (!I.isPHI())
245  break;
246  // I is a PHI. It can have multiple entries for CmpBB.
247  for (unsigned oi = I.getNumOperands(); oi > 2; oi -= 2) {
248  // PHI operands are (Reg, MBB) at (oi-2, oi-1).
249  if (I.getOperand(oi - 1).getMBB() == CmpBB) {
250  I.RemoveOperand(oi - 1);
251  I.RemoveOperand(oi - 2);
252  }
253  }
254  }
255 }
256 
257 // This pass runs before the AArch64DeadRegisterDefinitions pass, so compares
258 // are still writing virtual registers without any uses.
259 bool SSACCmpConv::isDeadDef(unsigned DstReg) {
260  // Writes to the zero register are dead.
261  if (DstReg == AArch64::WZR || DstReg == AArch64::XZR)
262  return true;
264  return false;
265  // A virtual register def without any uses will be marked dead later, and
266  // eventually replaced by the zero register.
267  return MRI->use_nodbg_empty(DstReg);
268 }
269 
270 // Parse a condition code returned by AnalyzeBranch, and compute the CondCode
271 // corresponding to TBB.
272 // Return
274  // A normal br.cond simply has the condition code.
275  if (Cond[0].getImm() != -1) {
276  assert(Cond.size() == 1 && "Unknown Cond array format");
277  CC = (AArch64CC::CondCode)(int)Cond[0].getImm();
278  return true;
279  }
280  // For tbz and cbz instruction, the opcode is next.
281  switch (Cond[1].getImm()) {
282  default:
283  // This includes tbz / tbnz branches which can't be converted to
284  // ccmp + br.cond.
285  return false;
286  case AArch64::CBZW:
287  case AArch64::CBZX:
288  assert(Cond.size() == 3 && "Unknown Cond array format");
289  CC = AArch64CC::EQ;
290  return true;
291  case AArch64::CBNZW:
292  case AArch64::CBNZX:
293  assert(Cond.size() == 3 && "Unknown Cond array format");
294  CC = AArch64CC::NE;
295  return true;
296  }
297 }
298 
299 MachineInstr *SSACCmpConv::findConvertibleCompare(MachineBasicBlock *MBB) {
301  if (I == MBB->end())
302  return nullptr;
303  // The terminator must be controlled by the flags.
304  if (!I->readsRegister(AArch64::NZCV)) {
305  switch (I->getOpcode()) {
306  case AArch64::CBZW:
307  case AArch64::CBZX:
308  case AArch64::CBNZW:
309  case AArch64::CBNZX:
310  // These can be converted into a ccmp against #0.
311  return &*I;
312  }
313  ++NumCmpTermRejs;
314  DEBUG(dbgs() << "Flags not used by terminator: " << *I);
315  return nullptr;
316  }
317 
318  // Now find the instruction controlling the terminator.
319  for (MachineBasicBlock::iterator B = MBB->begin(); I != B;) {
320  --I;
321  assert(!I->isTerminator() && "Spurious terminator");
322  switch (I->getOpcode()) {
323  // cmp is an alias for subs with a dead destination register.
324  case AArch64::SUBSWri:
325  case AArch64::SUBSXri:
326  // cmn is an alias for adds with a dead destination register.
327  case AArch64::ADDSWri:
328  case AArch64::ADDSXri:
329  // Check that the immediate operand is within range, ccmp wants a uimm5.
330  // Rd = SUBSri Rn, imm, shift
331  if (I->getOperand(3).getImm() || !isUInt<5>(I->getOperand(2).getImm())) {
332  DEBUG(dbgs() << "Immediate out of range for ccmp: " << *I);
333  ++NumImmRangeRejs;
334  return nullptr;
335  }
337  case AArch64::SUBSWrr:
338  case AArch64::SUBSXrr:
339  case AArch64::ADDSWrr:
340  case AArch64::ADDSXrr:
341  if (isDeadDef(I->getOperand(0).getReg()))
342  return &*I;
343  DEBUG(dbgs() << "Can't convert compare with live destination: " << *I);
344  ++NumLiveDstRejs;
345  return nullptr;
346  case AArch64::FCMPSrr:
347  case AArch64::FCMPDrr:
348  case AArch64::FCMPESrr:
349  case AArch64::FCMPEDrr:
350  return &*I;
351  }
352 
353  // Check for flag reads and clobbers.
354  MIOperands::PhysRegInfo PRI =
355  MIOperands(*I).analyzePhysReg(AArch64::NZCV, TRI);
356 
357  if (PRI.Read) {
358  // The ccmp doesn't produce exactly the same flags as the original
359  // compare, so reject the transform if there are uses of the flags
360  // besides the terminators.
361  DEBUG(dbgs() << "Can't create ccmp with multiple uses: " << *I);
362  ++NumMultNZCVUses;
363  return nullptr;
364  }
365 
366  if (PRI.Defined || PRI.Clobbered) {
367  DEBUG(dbgs() << "Not convertible compare: " << *I);
368  ++NumUnknNZCVDefs;
369  return nullptr;
370  }
371  }
372  DEBUG(dbgs() << "Flags not defined in " << printMBBReference(*MBB) << '\n');
373  return nullptr;
374 }
375 
376 /// Determine if all the instructions in MBB can safely
377 /// be speculated. The terminators are not considered.
378 ///
379 /// Only CmpMI is allowed to clobber the flags.
380 ///
381 bool SSACCmpConv::canSpeculateInstrs(MachineBasicBlock *MBB,
382  const MachineInstr *CmpMI) {
383  // Reject any live-in physregs. It's probably NZCV/EFLAGS, and very hard to
384  // get right.
385  if (!MBB->livein_empty()) {
386  DEBUG(dbgs() << printMBBReference(*MBB) << " has live-ins.\n");
387  return false;
388  }
389 
390  unsigned InstrCount = 0;
391 
392  // Check all instructions, except the terminators. It is assumed that
393  // terminators never have side effects or define any used register values.
394  for (auto &I : make_range(MBB->begin(), MBB->getFirstTerminator())) {
395  if (I.isDebugValue())
396  continue;
397 
398  if (++InstrCount > BlockInstrLimit && !Stress) {
399  DEBUG(dbgs() << printMBBReference(*MBB) << " has more than "
400  << BlockInstrLimit << " instructions.\n");
401  return false;
402  }
403 
404  // There shouldn't normally be any phis in a single-predecessor block.
405  if (I.isPHI()) {
406  DEBUG(dbgs() << "Can't hoist: " << I);
407  return false;
408  }
409 
410  // Don't speculate loads. Note that it may be possible and desirable to
411  // speculate GOT or constant pool loads that are guaranteed not to trap,
412  // but we don't support that for now.
413  if (I.mayLoad()) {
414  DEBUG(dbgs() << "Won't speculate load: " << I);
415  return false;
416  }
417 
418  // We never speculate stores, so an AA pointer isn't necessary.
419  bool DontMoveAcrossStore = true;
420  if (!I.isSafeToMove(nullptr, DontMoveAcrossStore)) {
421  DEBUG(dbgs() << "Can't speculate: " << I);
422  return false;
423  }
424 
425  // Only CmpMI is allowed to clobber the flags.
426  if (&I != CmpMI && I.modifiesRegister(AArch64::NZCV, TRI)) {
427  DEBUG(dbgs() << "Clobbers flags: " << I);
428  return false;
429  }
430  }
431  return true;
432 }
433 
434 /// Analyze the sub-cfg rooted in MBB, and return true if it is a potential
435 /// candidate for cmp-conversion. Fill out the internal state.
436 ///
437 bool SSACCmpConv::canConvert(MachineBasicBlock *MBB) {
438  Head = MBB;
439  Tail = CmpBB = nullptr;
440 
441  if (Head->succ_size() != 2)
442  return false;
443  MachineBasicBlock *Succ0 = Head->succ_begin()[0];
444  MachineBasicBlock *Succ1 = Head->succ_begin()[1];
445 
446  // CmpBB can only have a single predecessor. Tail is allowed many.
447  if (Succ0->pred_size() != 1)
448  std::swap(Succ0, Succ1);
449 
450  // Succ0 is our candidate for CmpBB.
451  if (Succ0->pred_size() != 1 || Succ0->succ_size() != 2)
452  return false;
453 
454  CmpBB = Succ0;
455  Tail = Succ1;
456 
457  if (!CmpBB->isSuccessor(Tail))
458  return false;
459 
460  // The CFG topology checks out.
461  DEBUG(dbgs() << "\nTriangle: " << printMBBReference(*Head) << " -> "
462  << printMBBReference(*CmpBB) << " -> "
463  << printMBBReference(*Tail) << '\n');
464  ++NumConsidered;
465 
466  // Tail is allowed to have many predecessors, but we can't handle PHIs yet.
467  //
468  // FIXME: Real PHIs could be if-converted as long as the CmpBB values are
469  // defined before The CmpBB cmp clobbers the flags. Alternatively, it should
470  // always be safe to sink the ccmp down to immediately before the CmpBB
471  // terminators.
472  if (!trivialTailPHIs()) {
473  DEBUG(dbgs() << "Can't handle phis in Tail.\n");
474  ++NumPhiRejs;
475  return false;
476  }
477 
478  if (!Tail->livein_empty()) {
479  DEBUG(dbgs() << "Can't handle live-in physregs in Tail.\n");
480  ++NumPhysRejs;
481  return false;
482  }
483 
484  // CmpBB should never have PHIs since Head is its only predecessor.
485  // FIXME: Clean them up if it happens.
486  if (!CmpBB->empty() && CmpBB->front().isPHI()) {
487  DEBUG(dbgs() << "Can't handle phis in CmpBB.\n");
488  ++NumPhi2Rejs;
489  return false;
490  }
491 
492  if (!CmpBB->livein_empty()) {
493  DEBUG(dbgs() << "Can't handle live-in physregs in CmpBB.\n");
494  ++NumPhysRejs;
495  return false;
496  }
497 
498  // The branch we're looking to eliminate must be analyzable.
499  HeadCond.clear();
500  MachineBasicBlock *TBB = nullptr, *FBB = nullptr;
501  if (TII->analyzeBranch(*Head, TBB, FBB, HeadCond)) {
502  DEBUG(dbgs() << "Head branch not analyzable.\n");
503  ++NumHeadBranchRejs;
504  return false;
505  }
506 
507  // This is weird, probably some sort of degenerate CFG, or an edge to a
508  // landing pad.
509  if (!TBB || HeadCond.empty()) {
510  DEBUG(dbgs() << "AnalyzeBranch didn't find conditional branch in Head.\n");
511  ++NumHeadBranchRejs;
512  return false;
513  }
514 
515  if (!parseCond(HeadCond, HeadCmpBBCC)) {
516  DEBUG(dbgs() << "Unsupported branch type on Head\n");
517  ++NumHeadBranchRejs;
518  return false;
519  }
520 
521  // Make sure the branch direction is right.
522  if (TBB != CmpBB) {
523  assert(TBB == Tail && "Unexpected TBB");
524  HeadCmpBBCC = AArch64CC::getInvertedCondCode(HeadCmpBBCC);
525  }
526 
527  CmpBBCond.clear();
528  TBB = FBB = nullptr;
529  if (TII->analyzeBranch(*CmpBB, TBB, FBB, CmpBBCond)) {
530  DEBUG(dbgs() << "CmpBB branch not analyzable.\n");
531  ++NumCmpBranchRejs;
532  return false;
533  }
534 
535  if (!TBB || CmpBBCond.empty()) {
536  DEBUG(dbgs() << "AnalyzeBranch didn't find conditional branch in CmpBB.\n");
537  ++NumCmpBranchRejs;
538  return false;
539  }
540 
541  if (!parseCond(CmpBBCond, CmpBBTailCC)) {
542  DEBUG(dbgs() << "Unsupported branch type on CmpBB\n");
543  ++NumCmpBranchRejs;
544  return false;
545  }
546 
547  if (TBB != Tail)
548  CmpBBTailCC = AArch64CC::getInvertedCondCode(CmpBBTailCC);
549 
550  DEBUG(dbgs() << "Head->CmpBB on " << AArch64CC::getCondCodeName(HeadCmpBBCC)
551  << ", CmpBB->Tail on " << AArch64CC::getCondCodeName(CmpBBTailCC)
552  << '\n');
553 
554  CmpMI = findConvertibleCompare(CmpBB);
555  if (!CmpMI)
556  return false;
557 
558  if (!canSpeculateInstrs(CmpBB, CmpMI)) {
559  ++NumSpeculateRejs;
560  return false;
561  }
562  return true;
563 }
564 
565 void SSACCmpConv::convert(SmallVectorImpl<MachineBasicBlock *> &RemovedBlocks) {
566  DEBUG(dbgs() << "Merging " << printMBBReference(*CmpBB) << " into "
567  << printMBBReference(*Head) << ":\n"
568  << *CmpBB);
569 
570  // All CmpBB instructions are moved into Head, and CmpBB is deleted.
571  // Update the CFG first.
572  updateTailPHIs();
573 
574  // Save successor probabilties before removing CmpBB and Tail from their
575  // parents.
576  BranchProbability Head2CmpBB = MBPI->getEdgeProbability(Head, CmpBB);
577  BranchProbability CmpBB2Tail = MBPI->getEdgeProbability(CmpBB, Tail);
578 
579  Head->removeSuccessor(CmpBB);
580  CmpBB->removeSuccessor(Tail);
581 
582  // If Head and CmpBB had successor probabilties, udpate the probabilities to
583  // reflect the ccmp-conversion.
584  if (Head->hasSuccessorProbabilities() && CmpBB->hasSuccessorProbabilities()) {
585 
586  // Head is allowed two successors. We've removed CmpBB, so the remaining
587  // successor is Tail. We need to increase the successor probability for
588  // Tail to account for the CmpBB path we removed.
589  //
590  // Pr(Tail|Head) += Pr(CmpBB|Head) * Pr(Tail|CmpBB).
591  assert(*Head->succ_begin() == Tail && "Head successor is not Tail");
592  BranchProbability Head2Tail = MBPI->getEdgeProbability(Head, Tail);
593  Head->setSuccProbability(Head->succ_begin(),
594  Head2Tail + Head2CmpBB * CmpBB2Tail);
595 
596  // We will transfer successors of CmpBB to Head in a moment without
597  // normalizing the successor probabilities. Set the successor probabilites
598  // before doing so.
599  //
600  // Pr(I|Head) = Pr(CmpBB|Head) * Pr(I|CmpBB).
601  for (auto I = CmpBB->succ_begin(), E = CmpBB->succ_end(); I != E; ++I) {
602  BranchProbability CmpBB2I = MBPI->getEdgeProbability(CmpBB, *I);
603  CmpBB->setSuccProbability(I, Head2CmpBB * CmpBB2I);
604  }
605  }
606 
607  Head->transferSuccessorsAndUpdatePHIs(CmpBB);
608  DebugLoc TermDL = Head->getFirstTerminator()->getDebugLoc();
609  TII->removeBranch(*Head);
610 
611  // If the Head terminator was one of the cbz / tbz branches with built-in
612  // compare, we need to insert an explicit compare instruction in its place.
613  if (HeadCond[0].getImm() == -1) {
614  ++NumCompBranches;
615  unsigned Opc = 0;
616  switch (HeadCond[1].getImm()) {
617  case AArch64::CBZW:
618  case AArch64::CBNZW:
619  Opc = AArch64::SUBSWri;
620  break;
621  case AArch64::CBZX:
622  case AArch64::CBNZX:
623  Opc = AArch64::SUBSXri;
624  break;
625  default:
626  llvm_unreachable("Cannot convert Head branch");
627  }
628  const MCInstrDesc &MCID = TII->get(Opc);
629  // Create a dummy virtual register for the SUBS def.
630  unsigned DestReg =
631  MRI->createVirtualRegister(TII->getRegClass(MCID, 0, TRI, *MF));
632  // Insert a SUBS Rn, #0 instruction instead of the cbz / cbnz.
633  BuildMI(*Head, Head->end(), TermDL, MCID)
634  .addReg(DestReg, RegState::Define | RegState::Dead)
635  .add(HeadCond[2])
636  .addImm(0)
637  .addImm(0);
638  // SUBS uses the GPR*sp register classes.
639  MRI->constrainRegClass(HeadCond[2].getReg(),
640  TII->getRegClass(MCID, 1, TRI, *MF));
641  }
642 
643  Head->splice(Head->end(), CmpBB, CmpBB->begin(), CmpBB->end());
644 
645  // Now replace CmpMI with a ccmp instruction that also considers the incoming
646  // flags.
647  unsigned Opc = 0;
648  unsigned FirstOp = 1; // First CmpMI operand to copy.
649  bool isZBranch = false; // CmpMI is a cbz/cbnz instruction.
650  switch (CmpMI->getOpcode()) {
651  default:
652  llvm_unreachable("Unknown compare opcode");
653  case AArch64::SUBSWri: Opc = AArch64::CCMPWi; break;
654  case AArch64::SUBSWrr: Opc = AArch64::CCMPWr; break;
655  case AArch64::SUBSXri: Opc = AArch64::CCMPXi; break;
656  case AArch64::SUBSXrr: Opc = AArch64::CCMPXr; break;
657  case AArch64::ADDSWri: Opc = AArch64::CCMNWi; break;
658  case AArch64::ADDSWrr: Opc = AArch64::CCMNWr; break;
659  case AArch64::ADDSXri: Opc = AArch64::CCMNXi; break;
660  case AArch64::ADDSXrr: Opc = AArch64::CCMNXr; break;
661  case AArch64::FCMPSrr: Opc = AArch64::FCCMPSrr; FirstOp = 0; break;
662  case AArch64::FCMPDrr: Opc = AArch64::FCCMPDrr; FirstOp = 0; break;
663  case AArch64::FCMPESrr: Opc = AArch64::FCCMPESrr; FirstOp = 0; break;
664  case AArch64::FCMPEDrr: Opc = AArch64::FCCMPEDrr; FirstOp = 0; break;
665  case AArch64::CBZW:
666  case AArch64::CBNZW:
667  Opc = AArch64::CCMPWi;
668  FirstOp = 0;
669  isZBranch = true;
670  break;
671  case AArch64::CBZX:
672  case AArch64::CBNZX:
673  Opc = AArch64::CCMPXi;
674  FirstOp = 0;
675  isZBranch = true;
676  break;
677  }
678 
679  // The ccmp instruction should set the flags according to the comparison when
680  // Head would have branched to CmpBB.
681  // The NZCV immediate operand should provide flags for the case where Head
682  // would have branched to Tail. These flags should cause the new Head
683  // terminator to branch to tail.
684  unsigned NZCV = AArch64CC::getNZCVToSatisfyCondCode(CmpBBTailCC);
685  const MCInstrDesc &MCID = TII->get(Opc);
686  MRI->constrainRegClass(CmpMI->getOperand(FirstOp).getReg(),
687  TII->getRegClass(MCID, 0, TRI, *MF));
688  if (CmpMI->getOperand(FirstOp + 1).isReg())
689  MRI->constrainRegClass(CmpMI->getOperand(FirstOp + 1).getReg(),
690  TII->getRegClass(MCID, 1, TRI, *MF));
691  MachineInstrBuilder MIB = BuildMI(*Head, CmpMI, CmpMI->getDebugLoc(), MCID)
692  .add(CmpMI->getOperand(FirstOp)); // Register Rn
693  if (isZBranch)
694  MIB.addImm(0); // cbz/cbnz Rn -> ccmp Rn, #0
695  else
696  MIB.add(CmpMI->getOperand(FirstOp + 1)); // Register Rm / Immediate
697  MIB.addImm(NZCV).addImm(HeadCmpBBCC);
698 
699  // If CmpMI was a terminator, we need a new conditional branch to replace it.
700  // This now becomes a Head terminator.
701  if (isZBranch) {
702  bool isNZ = CmpMI->getOpcode() == AArch64::CBNZW ||
703  CmpMI->getOpcode() == AArch64::CBNZX;
704  BuildMI(*Head, CmpMI, CmpMI->getDebugLoc(), TII->get(AArch64::Bcc))
705  .addImm(isNZ ? AArch64CC::NE : AArch64CC::EQ)
706  .add(CmpMI->getOperand(1)); // Branch target.
707  }
708  CmpMI->eraseFromParent();
709  Head->updateTerminator();
710 
711  RemovedBlocks.push_back(CmpBB);
712  CmpBB->eraseFromParent();
713  DEBUG(dbgs() << "Result:\n" << *Head);
714  ++NumConverted;
715 }
716 
717 int SSACCmpConv::expectedCodeSizeDelta() const {
718  int delta = 0;
719  // If the Head terminator was one of the cbz / tbz branches with built-in
720  // compare, we need to insert an explicit compare instruction in its place
721  // plus a branch instruction.
722  if (HeadCond[0].getImm() == -1) {
723  switch (HeadCond[1].getImm()) {
724  case AArch64::CBZW:
725  case AArch64::CBNZW:
726  case AArch64::CBZX:
727  case AArch64::CBNZX:
728  // Therefore delta += 1
729  delta = 1;
730  break;
731  default:
732  llvm_unreachable("Cannot convert Head branch");
733  }
734  }
735  // If the Cmp terminator was one of the cbz / tbz branches with
736  // built-in compare, it will be turned into a compare instruction
737  // into Head, but we do not save any instruction.
738  // Otherwise, we save the branch instruction.
739  switch (CmpMI->getOpcode()) {
740  default:
741  --delta;
742  break;
743  case AArch64::CBZW:
744  case AArch64::CBNZW:
745  case AArch64::CBZX:
746  case AArch64::CBNZX:
747  break;
748  }
749  return delta;
750 }
751 
752 //===----------------------------------------------------------------------===//
753 // AArch64ConditionalCompares Pass
754 //===----------------------------------------------------------------------===//
755 
756 namespace {
757 class AArch64ConditionalCompares : public MachineFunctionPass {
758  const MachineBranchProbabilityInfo *MBPI;
759  const TargetInstrInfo *TII;
760  const TargetRegisterInfo *TRI;
761  MCSchedModel SchedModel;
762  // Does the proceeded function has Oz attribute.
763  bool MinSize;
765  MachineDominatorTree *DomTree;
767  MachineTraceMetrics *Traces;
769  SSACCmpConv CmpConv;
770 
771 public:
772  static char ID;
773  AArch64ConditionalCompares() : MachineFunctionPass(ID) {
775  }
776  void getAnalysisUsage(AnalysisUsage &AU) const override;
777  bool runOnMachineFunction(MachineFunction &MF) override;
778  StringRef getPassName() const override {
779  return "AArch64 Conditional Compares";
780  }
781 
782 private:
783  bool tryConvert(MachineBasicBlock *);
784  void updateDomTree(ArrayRef<MachineBasicBlock *> Removed);
785  void updateLoops(ArrayRef<MachineBasicBlock *> Removed);
786  void invalidateTraces();
787  bool shouldConvert();
788 };
789 } // end anonymous namespace
790 
792 
793 INITIALIZE_PASS_BEGIN(AArch64ConditionalCompares, "aarch64-ccmp",
794  "AArch64 CCMP Pass", false, false)
798 INITIALIZE_PASS_END(AArch64ConditionalCompares, "aarch64-ccmp",
799  "AArch64 CCMP Pass", false, false)
800 
802  return new AArch64ConditionalCompares();
803 }
804 
805 void AArch64ConditionalCompares::getAnalysisUsage(AnalysisUsage &AU) const {
814 }
815 
816 /// Update the dominator tree after if-conversion erased some blocks.
817 void AArch64ConditionalCompares::updateDomTree(
819  // convert() removes CmpBB which was previously dominated by Head.
820  // CmpBB children should be transferred to Head.
821  MachineDomTreeNode *HeadNode = DomTree->getNode(CmpConv.Head);
822  for (MachineBasicBlock *RemovedMBB : Removed) {
823  MachineDomTreeNode *Node = DomTree->getNode(RemovedMBB);
824  assert(Node != HeadNode && "Cannot erase the head node");
825  assert(Node->getIDom() == HeadNode && "CmpBB should be dominated by Head");
826  while (Node->getNumChildren())
827  DomTree->changeImmediateDominator(Node->getChildren().back(), HeadNode);
828  DomTree->eraseNode(RemovedMBB);
829  }
830 }
831 
832 /// Update LoopInfo after if-conversion.
833 void
834 AArch64ConditionalCompares::updateLoops(ArrayRef<MachineBasicBlock *> Removed) {
835  if (!Loops)
836  return;
837  for (MachineBasicBlock *RemovedMBB : Removed)
838  Loops->removeBlock(RemovedMBB);
839 }
840 
841 /// Invalidate MachineTraceMetrics before if-conversion.
842 void AArch64ConditionalCompares::invalidateTraces() {
843  Traces->invalidate(CmpConv.Head);
844  Traces->invalidate(CmpConv.CmpBB);
845 }
846 
847 /// Apply cost model and heuristics to the if-conversion in IfConv.
848 /// Return true if the conversion is a good idea.
849 ///
851  // Stress testing mode disables all cost considerations.
852  if (Stress)
853  return true;
854  if (!MinInstr)
855  MinInstr = Traces->getEnsemble(MachineTraceMetrics::TS_MinInstrCount);
856 
857  // Head dominates CmpBB, so it is always included in its trace.
858  MachineTraceMetrics::Trace Trace = MinInstr->getTrace(CmpConv.CmpBB);
859 
860  // If code size is the main concern
861  if (MinSize) {
862  int CodeSizeDelta = CmpConv.expectedCodeSizeDelta();
863  DEBUG(dbgs() << "Code size delta: " << CodeSizeDelta << '\n');
864  // If we are minimizing the code size, do the conversion whatever
865  // the cost is.
866  if (CodeSizeDelta < 0)
867  return true;
868  if (CodeSizeDelta > 0) {
869  DEBUG(dbgs() << "Code size is increasing, give up on this one.\n");
870  return false;
871  }
872  // CodeSizeDelta == 0, continue with the regular heuristics
873  }
874 
875  // Heuristic: The compare conversion delays the execution of the branch
876  // instruction because we must wait for the inputs to the second compare as
877  // well. The branch has no dependent instructions, but delaying it increases
878  // the cost of a misprediction.
879  //
880  // Set a limit on the delay we will accept.
881  unsigned DelayLimit = SchedModel.MispredictPenalty * 3 / 4;
882 
883  // Instruction depths can be computed for all trace instructions above CmpBB.
884  unsigned HeadDepth =
885  Trace.getInstrCycles(*CmpConv.Head->getFirstTerminator()).Depth;
886  unsigned CmpBBDepth =
887  Trace.getInstrCycles(*CmpConv.CmpBB->getFirstTerminator()).Depth;
888  DEBUG(dbgs() << "Head depth: " << HeadDepth
889  << "\nCmpBB depth: " << CmpBBDepth << '\n');
890  if (CmpBBDepth > HeadDepth + DelayLimit) {
891  DEBUG(dbgs() << "Branch delay would be larger than " << DelayLimit
892  << " cycles.\n");
893  return false;
894  }
895 
896  // Check the resource depth at the bottom of CmpBB - these instructions will
897  // be speculated.
898  unsigned ResDepth = Trace.getResourceDepth(true);
899  DEBUG(dbgs() << "Resources: " << ResDepth << '\n');
900 
901  // Heuristic: The speculatively executed instructions must all be able to
902  // merge into the Head block. The Head critical path should dominate the
903  // resource cost of the speculated instructions.
904  if (ResDepth > HeadDepth) {
905  DEBUG(dbgs() << "Too many instructions to speculate.\n");
906  return false;
907  }
908  return true;
909 }
910 
911 bool AArch64ConditionalCompares::tryConvert(MachineBasicBlock *MBB) {
912  bool Changed = false;
913  while (CmpConv.canConvert(MBB) && shouldConvert()) {
914  invalidateTraces();
916  CmpConv.convert(RemovedBlocks);
917  Changed = true;
918  updateDomTree(RemovedBlocks);
919  updateLoops(RemovedBlocks);
920  }
921  return Changed;
922 }
923 
924 bool AArch64ConditionalCompares::runOnMachineFunction(MachineFunction &MF) {
925  DEBUG(dbgs() << "********** AArch64 Conditional Compares **********\n"
926  << "********** Function: " << MF.getName() << '\n');
927  if (skipFunction(MF.getFunction()))
928  return false;
929 
930  TII = MF.getSubtarget().getInstrInfo();
931  TRI = MF.getSubtarget().getRegisterInfo();
932  SchedModel = MF.getSubtarget().getSchedModel();
933  MRI = &MF.getRegInfo();
934  DomTree = &getAnalysis<MachineDominatorTree>();
935  Loops = getAnalysisIfAvailable<MachineLoopInfo>();
936  MBPI = &getAnalysis<MachineBranchProbabilityInfo>();
937  Traces = &getAnalysis<MachineTraceMetrics>();
938  MinInstr = nullptr;
939  MinSize = MF.getFunction().optForMinSize();
940 
941  bool Changed = false;
942  CmpConv.runOnMachineFunction(MF, MBPI);
943 
944  // Visit blocks in dominator tree pre-order. The pre-order enables multiple
945  // cmp-conversions from the same head block.
946  // Note that updateDomTree() modifies the children of the DomTree node
947  // currently being visited. The df_iterator supports that; it doesn't look at
948  // child_begin() / child_end() until after a node has been visited.
949  for (auto *I : depth_first(DomTree))
950  if (tryConvert(I->getBlock()))
951  Changed = true;
952 
953  return Changed;
954 }
Pass interface - Implemented by all &#39;passes&#39;.
Definition: Pass.h:81
const MachineInstrBuilder & add(const MachineOperand &MO) const
bool hasSuccessorProbabilities() const
Return true if any of the successors have probabilities attached to them.
AnalysisUsage & addPreserved()
Add the specified Pass class to the set of analyses preserved by this pass.
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
bool use_nodbg_empty(unsigned RegNo) const
use_nodbg_empty - Return true if there are no non-Debug instructions using the specified register...
Compute iterated dominance frontiers using a linear time algorithm.
Definition: AllocatorList.h:24
#define LLVM_FALLTHROUGH
LLVM_FALLTHROUGH - Mark fallthrough cases in switch statements.
Definition: Compiler.h:235
virtual const TargetRegisterInfo * getRegisterInfo() const
getRegisterInfo - If register information is available, return it.
const DebugLoc & getDebugLoc() const
Returns the debug location id of this MachineInstr.
Definition: MachineInstr.h:271
static CondCode getInvertedCondCode(CondCode Code)
iterator getFirstTerminator()
Returns an iterator to the first terminator instruction of this basic block.
Describe properties that are true of each instruction in the target description file.
Definition: MCInstrDesc.h:163
unsigned getReg() const
getReg - Returns the register number.
static bool isVirtualRegister(unsigned Reg)
Return true if the specified register number is in the virtual register namespace.
void transferSuccessorsAndUpdatePHIs(MachineBasicBlock *FromMBB)
Transfers all the successors, as in transferSuccessors, and update PHI operands in the successor bloc...
STATISTIC(NumFunctions, "Total number of functions")
A debug info location.
Definition: DebugLoc.h:34
A trace ensemble is a collection of traces selected using the same strategy, for example &#39;minimum res...
FunctionPass * createAArch64ConditionalCompares()
static unsigned InstrCount
bool isPHI() const
Definition: MachineInstr.h:829
virtual unsigned removeBranch(MachineBasicBlock &MBB, int *BytesRemoved=nullptr) const
Remove the branching code at the end of the specific MBB.
static cl::opt< unsigned > BlockInstrLimit("aarch64-ccmp-limit", cl::init(30), cl::Hidden, cl::desc("Maximum number of instructions per speculated block."))
AnalysisUsage & addRequired()
static cl::opt< bool > Stress("aarch64-stress-ccmp", cl::Hidden, cl::desc("Turn all knobs to 11"))
#define INITIALIZE_PASS_DEPENDENCY(depName)
Definition: PassSupport.h:51
Hexagon Hardware Loops
void eraseFromParent()
This method unlinks &#39;this&#39; from the containing function and deletes it.
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
const HexagonInstrInfo * TII
Printable printMBBReference(const MachineBasicBlock &MBB)
Prints a machine basic block reference.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: APFloat.h:42
void eraseFromParent()
Unlink &#39;this&#39; from the containing basic block and delete it.
Reg
All possible values of the reg field in the ModR/M byte.
static bool shouldConvert(Constant &C, AArch64PromoteConstant::PromotionCacheTy &PromotionCache)
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
Definition: MachineInstr.h:293
static const char * getCondCodeName(CondCode Code)
PhysRegInfo analyzePhysReg(unsigned Reg, const TargetRegisterInfo *TRI)
analyzePhysReg - Analyze how the current instruction or bundle uses a physical register.
Select the trace through a block that has the fewest instructions.
const TargetRegisterClass * getRegClass(const MCInstrDesc &TID, unsigned OpNum, const TargetRegisterInfo *TRI, const MachineFunction &MF) const
Given a machine instruction descriptor, returns the register class constraint for OpNum...
Base class for the actual dominator tree node.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory)...
Definition: APInt.h:33
const std::vector< DomTreeNodeBase * > & getChildren() const
virtual const TargetInstrInfo * getInstrInfo() const
void setSuccProbability(succ_iterator I, BranchProbability Prob)
Set successor probability of a given iterator.
const TargetRegisterClass * constrainRegClass(unsigned Reg, const TargetRegisterClass *RC, unsigned MinNumRegs=0)
constrainRegClass - Constrain the register class of the specified virtual register to be a common sub...
StringRef getName() const
getName - Return the name of the corresponding LLVM function.
TargetInstrInfo - Interface to description of machine instruction set.
MachineInstrBuilder BuildMI(MachineFunction &MF, const DebugLoc &DL, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:406
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
unsigned const MachineRegisterInfo * MRI
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
size_t size() const
size - Get the array size.
Definition: ArrayRef.h:149
DomTreeNodeBase * getIDom() const
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
InstrCycles getInstrCycles(const MachineInstr &MI) const
Return the depth and height of MI.
Represent the analysis usage information of a pass.
FunctionPass class - This class is used to implement most global optimizations.
Definition: Pass.h:285
virtual bool analyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB, MachineBasicBlock *&FBB, SmallVectorImpl< MachineOperand > &Cond, bool AllowModify=false) const
Analyze the branching code at the end of MBB, returning true if it cannot be understood (e...
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
A trace represents a plausible sequence of executed basic blocks that passes through the current basi...
INITIALIZE_PASS_END(RegBankSelect, DEBUG_TYPE, "Assign register bank of generic virtual registers", false, false) RegBankSelect
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
This is a &#39;vector&#39; (really, a variable-sized array), optimized for the case when the array is small...
Definition: SmallVector.h:862
size_t getNumChildren() const
void updateTerminator()
Update the terminator instructions in block to account for changes to the layout. ...
static bool parseCond(ArrayRef< MachineOperand > Cond, AArch64CC::CondCode &CC)
MIOperands - Iterate over operands of a single instruction.
unsigned pred_size() const
const Function & getFunction() const
Return the LLVM function that this machine code represents.
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:132
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:924
static unsigned getNZCVToSatisfyCondCode(CondCode Code)
Given a condition code, return NZCV flags that would satisfy that condition.
static unsigned getReg(const void *D, unsigned RC, unsigned RegNo)
unsigned succ_size() const
MachineRegisterInfo - Keep track of information for virtual and physical registers, including vreg register classes, use/def chains for registers, etc.
BranchProbability getEdgeProbability(const MachineBasicBlock *Src, const MachineBasicBlock *Dst) const
Representation of each machine instruction.
Definition: MachineInstr.h:60
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
void splice(iterator Where, MachineBasicBlock *Other, iterator From)
Take an instruction from MBB &#39;Other&#39; at the position From, and insert it into this MBB right before &#39;...
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
LLVM_NODISCARD bool empty() const
Definition: SmallVector.h:61
const MCInstrDesc & get(unsigned Opcode) const
Return the machine instruction descriptor that corresponds to the specified instruction opcode...
Definition: MCInstrInfo.h:45
INITIALIZE_PASS_BEGIN(AArch64ConditionalCompares, "aarch64-ccmp", "AArch64 CCMP Pass", false, false) INITIALIZE_PASS_END(AArch64ConditionalCompares
#define I(x, y, z)
Definition: MD5.cpp:58
bool optForMinSize() const
Optimize this function for minimum size (-Oz).
Definition: Function.h:576
bool isSuccessor(const MachineBasicBlock *MBB) const
Return true if the specified MBB is a successor of this block.
void initializeAArch64ConditionalComparesPass(PassRegistry &)
unsigned getResourceDepth(bool Bottom) const
Return the resource depth of the top/bottom of the trace center block.
void removeSuccessor(MachineBasicBlock *Succ, bool NormalizeSuccProbs=false)
Remove successor from the successors list of this MachineBasicBlock.
bool isReg() const
isReg - Tests if this is a MO_Register operand.
iterator_range< df_iterator< T > > depth_first(const T &G)
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
#define DEBUG(X)
Definition: Debug.h:118
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:49
const MachineOperand & getOperand(unsigned i) const
Definition: MachineInstr.h:298
Machine model for scheduling, bundling, and heuristics.
Definition: MCSchedule.h:201
DominatorTree Class - Concrete subclass of DominatorTreeBase that is used to compute a normal dominat...
const MCSchedModel & getSchedModel() const
Get the machine model for this subtarget&#39;s CPU.
unsigned createVirtualRegister(const TargetRegisterClass *RegClass, StringRef Name="")
createVirtualRegister - Create and return a new virtual register in the function with the specified r...