LLVM  8.0.0svn
AArch64ConditionalCompares.cpp
Go to the documentation of this file.
1 //===-- AArch64ConditionalCompares.cpp --- CCMP formation for AArch64 -----===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This file implements the AArch64ConditionalCompares pass which reduces
11 // branching and code size by using the conditional compare instructions CCMP,
12 // CCMN, and FCMP.
13 //
14 // The CFG transformations for forming conditional compares are very similar to
15 // if-conversion, and this pass should run immediately before the early
16 // if-conversion pass.
17 //
18 //===----------------------------------------------------------------------===//
19 
20 #include "AArch64.h"
22 #include "llvm/ADT/SetVector.h"
23 #include "llvm/ADT/SmallPtrSet.h"
24 #include "llvm/ADT/Statistic.h"
33 #include "llvm/CodeGen/Passes.h"
38 #include "llvm/Support/Debug.h"
40 
41 using namespace llvm;
42 
43 #define DEBUG_TYPE "aarch64-ccmp"
44 
45 // Absolute maximum number of instructions allowed per speculated block.
46 // This bypasses all other heuristics, so it should be set fairly high.
48  "aarch64-ccmp-limit", cl::init(30), cl::Hidden,
49  cl::desc("Maximum number of instructions per speculated block."));
50 
51 // Stress testing mode - disable heuristics.
52 static cl::opt<bool> Stress("aarch64-stress-ccmp", cl::Hidden,
53  cl::desc("Turn all knobs to 11"));
54 
55 STATISTIC(NumConsidered, "Number of ccmps considered");
56 STATISTIC(NumPhiRejs, "Number of ccmps rejected (PHI)");
57 STATISTIC(NumPhysRejs, "Number of ccmps rejected (Physregs)");
58 STATISTIC(NumPhi2Rejs, "Number of ccmps rejected (PHI2)");
59 STATISTIC(NumHeadBranchRejs, "Number of ccmps rejected (Head branch)");
60 STATISTIC(NumCmpBranchRejs, "Number of ccmps rejected (CmpBB branch)");
61 STATISTIC(NumCmpTermRejs, "Number of ccmps rejected (CmpBB is cbz...)");
62 STATISTIC(NumImmRangeRejs, "Number of ccmps rejected (Imm out of range)");
63 STATISTIC(NumLiveDstRejs, "Number of ccmps rejected (Cmp dest live)");
64 STATISTIC(NumMultNZCVUses, "Number of ccmps rejected (NZCV used)");
65 STATISTIC(NumUnknNZCVDefs, "Number of ccmps rejected (NZCV def unknown)");
66 
67 STATISTIC(NumSpeculateRejs, "Number of ccmps rejected (Can't speculate)");
68 
69 STATISTIC(NumConverted, "Number of ccmp instructions created");
70 STATISTIC(NumCompBranches, "Number of cbz/cbnz branches converted");
71 
72 //===----------------------------------------------------------------------===//
73 // SSACCmpConv
74 //===----------------------------------------------------------------------===//
75 //
76 // The SSACCmpConv class performs ccmp-conversion on SSA form machine code
77 // after determining if it is possible. The class contains no heuristics;
78 // external code should be used to determine when ccmp-conversion is a good
79 // idea.
80 //
81 // CCmp-formation works on a CFG representing chained conditions, typically
82 // from C's short-circuit || and && operators:
83 //
84 // From: Head To: Head
85 // / | CmpBB
86 // / | / |
87 // | CmpBB / |
88 // | / | Tail |
89 // | / | | |
90 // Tail | | |
91 // | | | |
92 // ... ... ... ...
93 //
94 // The Head block is terminated by a br.cond instruction, and the CmpBB block
95 // contains compare + br.cond. Tail must be a successor of both.
96 //
97 // The cmp-conversion turns the compare instruction in CmpBB into a conditional
98 // compare, and merges CmpBB into Head, speculatively executing its
99 // instructions. The AArch64 conditional compare instructions have an immediate
100 // operand that specifies the NZCV flag values when the condition is false and
101 // the compare isn't executed. This makes it possible to chain compares with
102 // different condition codes.
103 //
104 // Example:
105 //
106 // if (a == 5 || b == 17)
107 // foo();
108 //
109 // Head:
110 // cmp w0, #5
111 // b.eq Tail
112 // CmpBB:
113 // cmp w1, #17
114 // b.eq Tail
115 // ...
116 // Tail:
117 // bl _foo
118 //
119 // Becomes:
120 //
121 // Head:
122 // cmp w0, #5
123 // ccmp w1, #17, 4, ne ; 4 = nZcv
124 // b.eq Tail
125 // ...
126 // Tail:
127 // bl _foo
128 //
129 // The ccmp condition code is the one that would cause the Head terminator to
130 // branch to CmpBB.
131 //
132 // FIXME: It should also be possible to speculate a block on the critical edge
133 // between Head and Tail, just like if-converting a diamond.
134 //
135 // FIXME: Handle PHIs in Tail by turning them into selects (if-conversion).
136 
137 namespace {
138 class SSACCmpConv {
139  MachineFunction *MF;
140  const TargetInstrInfo *TII;
141  const TargetRegisterInfo *TRI;
143  const MachineBranchProbabilityInfo *MBPI;
144 
145 public:
146  /// The first block containing a conditional branch, dominating everything
147  /// else.
148  MachineBasicBlock *Head;
149 
150  /// The block containing cmp+br.cond with a successor shared with Head.
151  MachineBasicBlock *CmpBB;
152 
153  /// The common successor for Head and CmpBB.
154  MachineBasicBlock *Tail;
155 
156  /// The compare instruction in CmpBB that can be converted to a ccmp.
157  MachineInstr *CmpMI;
158 
159 private:
160  /// The branch condition in Head as determined by AnalyzeBranch.
162 
163  /// The condition code that makes Head branch to CmpBB.
164  AArch64CC::CondCode HeadCmpBBCC;
165 
166  /// The branch condition in CmpBB.
168 
169  /// The condition code that makes CmpBB branch to Tail.
170  AArch64CC::CondCode CmpBBTailCC;
171 
172  /// Check if the Tail PHIs are trivially convertible.
173  bool trivialTailPHIs();
174 
175  /// Remove CmpBB from the Tail PHIs.
176  void updateTailPHIs();
177 
178  /// Check if an operand defining DstReg is dead.
179  bool isDeadDef(unsigned DstReg);
180 
181  /// Find the compare instruction in MBB that controls the conditional branch.
182  /// Return NULL if a convertible instruction can't be found.
183  MachineInstr *findConvertibleCompare(MachineBasicBlock *MBB);
184 
185  /// Return true if all non-terminator instructions in MBB can be safely
186  /// speculated.
187  bool canSpeculateInstrs(MachineBasicBlock *MBB, const MachineInstr *CmpMI);
188 
189 public:
190  /// runOnMachineFunction - Initialize per-function data structures.
191  void runOnMachineFunction(MachineFunction &MF,
192  const MachineBranchProbabilityInfo *MBPI) {
193  this->MF = &MF;
194  this->MBPI = MBPI;
195  TII = MF.getSubtarget().getInstrInfo();
196  TRI = MF.getSubtarget().getRegisterInfo();
197  MRI = &MF.getRegInfo();
198  }
199 
200  /// If the sub-CFG headed by MBB can be cmp-converted, initialize the
201  /// internal state, and return true.
202  bool canConvert(MachineBasicBlock *MBB);
203 
204  /// Cmo-convert the last block passed to canConvertCmp(), assuming
205  /// it is possible. Add any erased blocks to RemovedBlocks.
206  void convert(SmallVectorImpl<MachineBasicBlock *> &RemovedBlocks);
207 
208  /// Return the expected code size delta if the conversion into a
209  /// conditional compare is performed.
210  int expectedCodeSizeDelta() const;
211 };
212 } // end anonymous namespace
213 
214 // Check that all PHIs in Tail are selecting the same value from Head and CmpBB.
215 // This means that no if-conversion is required when merging CmpBB into Head.
216 bool SSACCmpConv::trivialTailPHIs() {
217  for (auto &I : *Tail) {
218  if (!I.isPHI())
219  break;
220  unsigned HeadReg = 0, CmpBBReg = 0;
221  // PHI operands come in (VReg, MBB) pairs.
222  for (unsigned oi = 1, oe = I.getNumOperands(); oi != oe; oi += 2) {
223  MachineBasicBlock *MBB = I.getOperand(oi + 1).getMBB();
224  unsigned Reg = I.getOperand(oi).getReg();
225  if (MBB == Head) {
226  assert((!HeadReg || HeadReg == Reg) && "Inconsistent PHI operands");
227  HeadReg = Reg;
228  }
229  if (MBB == CmpBB) {
230  assert((!CmpBBReg || CmpBBReg == Reg) && "Inconsistent PHI operands");
231  CmpBBReg = Reg;
232  }
233  }
234  if (HeadReg != CmpBBReg)
235  return false;
236  }
237  return true;
238 }
239 
240 // Assuming that trivialTailPHIs() is true, update the Tail PHIs by simply
241 // removing the CmpBB operands. The Head operands will be identical.
242 void SSACCmpConv::updateTailPHIs() {
243  for (auto &I : *Tail) {
244  if (!I.isPHI())
245  break;
246  // I is a PHI. It can have multiple entries for CmpBB.
247  for (unsigned oi = I.getNumOperands(); oi > 2; oi -= 2) {
248  // PHI operands are (Reg, MBB) at (oi-2, oi-1).
249  if (I.getOperand(oi - 1).getMBB() == CmpBB) {
250  I.RemoveOperand(oi - 1);
251  I.RemoveOperand(oi - 2);
252  }
253  }
254  }
255 }
256 
257 // This pass runs before the AArch64DeadRegisterDefinitions pass, so compares
258 // are still writing virtual registers without any uses.
259 bool SSACCmpConv::isDeadDef(unsigned DstReg) {
260  // Writes to the zero register are dead.
261  if (DstReg == AArch64::WZR || DstReg == AArch64::XZR)
262  return true;
264  return false;
265  // A virtual register def without any uses will be marked dead later, and
266  // eventually replaced by the zero register.
267  return MRI->use_nodbg_empty(DstReg);
268 }
269 
270 // Parse a condition code returned by AnalyzeBranch, and compute the CondCode
271 // corresponding to TBB.
272 // Return
274  // A normal br.cond simply has the condition code.
275  if (Cond[0].getImm() != -1) {
276  assert(Cond.size() == 1 && "Unknown Cond array format");
277  CC = (AArch64CC::CondCode)(int)Cond[0].getImm();
278  return true;
279  }
280  // For tbz and cbz instruction, the opcode is next.
281  switch (Cond[1].getImm()) {
282  default:
283  // This includes tbz / tbnz branches which can't be converted to
284  // ccmp + br.cond.
285  return false;
286  case AArch64::CBZW:
287  case AArch64::CBZX:
288  assert(Cond.size() == 3 && "Unknown Cond array format");
289  CC = AArch64CC::EQ;
290  return true;
291  case AArch64::CBNZW:
292  case AArch64::CBNZX:
293  assert(Cond.size() == 3 && "Unknown Cond array format");
294  CC = AArch64CC::NE;
295  return true;
296  }
297 }
298 
299 MachineInstr *SSACCmpConv::findConvertibleCompare(MachineBasicBlock *MBB) {
301  if (I == MBB->end())
302  return nullptr;
303  // The terminator must be controlled by the flags.
304  if (!I->readsRegister(AArch64::NZCV)) {
305  switch (I->getOpcode()) {
306  case AArch64::CBZW:
307  case AArch64::CBZX:
308  case AArch64::CBNZW:
309  case AArch64::CBNZX:
310  // These can be converted into a ccmp against #0.
311  return &*I;
312  }
313  ++NumCmpTermRejs;
314  LLVM_DEBUG(dbgs() << "Flags not used by terminator: " << *I);
315  return nullptr;
316  }
317 
318  // Now find the instruction controlling the terminator.
319  for (MachineBasicBlock::iterator B = MBB->begin(); I != B;) {
320  --I;
321  assert(!I->isTerminator() && "Spurious terminator");
322  switch (I->getOpcode()) {
323  // cmp is an alias for subs with a dead destination register.
324  case AArch64::SUBSWri:
325  case AArch64::SUBSXri:
326  // cmn is an alias for adds with a dead destination register.
327  case AArch64::ADDSWri:
328  case AArch64::ADDSXri:
329  // Check that the immediate operand is within range, ccmp wants a uimm5.
330  // Rd = SUBSri Rn, imm, shift
331  if (I->getOperand(3).getImm() || !isUInt<5>(I->getOperand(2).getImm())) {
332  LLVM_DEBUG(dbgs() << "Immediate out of range for ccmp: " << *I);
333  ++NumImmRangeRejs;
334  return nullptr;
335  }
337  case AArch64::SUBSWrr:
338  case AArch64::SUBSXrr:
339  case AArch64::ADDSWrr:
340  case AArch64::ADDSXrr:
341  if (isDeadDef(I->getOperand(0).getReg()))
342  return &*I;
343  LLVM_DEBUG(dbgs() << "Can't convert compare with live destination: "
344  << *I);
345  ++NumLiveDstRejs;
346  return nullptr;
347  case AArch64::FCMPSrr:
348  case AArch64::FCMPDrr:
349  case AArch64::FCMPESrr:
350  case AArch64::FCMPEDrr:
351  return &*I;
352  }
353 
354  // Check for flag reads and clobbers.
355  MIOperands::PhysRegInfo PRI =
356  MIOperands(*I).analyzePhysReg(AArch64::NZCV, TRI);
357 
358  if (PRI.Read) {
359  // The ccmp doesn't produce exactly the same flags as the original
360  // compare, so reject the transform if there are uses of the flags
361  // besides the terminators.
362  LLVM_DEBUG(dbgs() << "Can't create ccmp with multiple uses: " << *I);
363  ++NumMultNZCVUses;
364  return nullptr;
365  }
366 
367  if (PRI.Defined || PRI.Clobbered) {
368  LLVM_DEBUG(dbgs() << "Not convertible compare: " << *I);
369  ++NumUnknNZCVDefs;
370  return nullptr;
371  }
372  }
373  LLVM_DEBUG(dbgs() << "Flags not defined in " << printMBBReference(*MBB)
374  << '\n');
375  return nullptr;
376 }
377 
378 /// Determine if all the instructions in MBB can safely
379 /// be speculated. The terminators are not considered.
380 ///
381 /// Only CmpMI is allowed to clobber the flags.
382 ///
383 bool SSACCmpConv::canSpeculateInstrs(MachineBasicBlock *MBB,
384  const MachineInstr *CmpMI) {
385  // Reject any live-in physregs. It's probably NZCV/EFLAGS, and very hard to
386  // get right.
387  if (!MBB->livein_empty()) {
388  LLVM_DEBUG(dbgs() << printMBBReference(*MBB) << " has live-ins.\n");
389  return false;
390  }
391 
392  unsigned InstrCount = 0;
393 
394  // Check all instructions, except the terminators. It is assumed that
395  // terminators never have side effects or define any used register values.
396  for (auto &I : make_range(MBB->begin(), MBB->getFirstTerminator())) {
397  if (I.isDebugInstr())
398  continue;
399 
400  if (++InstrCount > BlockInstrLimit && !Stress) {
401  LLVM_DEBUG(dbgs() << printMBBReference(*MBB) << " has more than "
402  << BlockInstrLimit << " instructions.\n");
403  return false;
404  }
405 
406  // There shouldn't normally be any phis in a single-predecessor block.
407  if (I.isPHI()) {
408  LLVM_DEBUG(dbgs() << "Can't hoist: " << I);
409  return false;
410  }
411 
412  // Don't speculate loads. Note that it may be possible and desirable to
413  // speculate GOT or constant pool loads that are guaranteed not to trap,
414  // but we don't support that for now.
415  if (I.mayLoad()) {
416  LLVM_DEBUG(dbgs() << "Won't speculate load: " << I);
417  return false;
418  }
419 
420  // We never speculate stores, so an AA pointer isn't necessary.
421  bool DontMoveAcrossStore = true;
422  if (!I.isSafeToMove(nullptr, DontMoveAcrossStore)) {
423  LLVM_DEBUG(dbgs() << "Can't speculate: " << I);
424  return false;
425  }
426 
427  // Only CmpMI is allowed to clobber the flags.
428  if (&I != CmpMI && I.modifiesRegister(AArch64::NZCV, TRI)) {
429  LLVM_DEBUG(dbgs() << "Clobbers flags: " << I);
430  return false;
431  }
432  }
433  return true;
434 }
435 
436 /// Analyze the sub-cfg rooted in MBB, and return true if it is a potential
437 /// candidate for cmp-conversion. Fill out the internal state.
438 ///
439 bool SSACCmpConv::canConvert(MachineBasicBlock *MBB) {
440  Head = MBB;
441  Tail = CmpBB = nullptr;
442 
443  if (Head->succ_size() != 2)
444  return false;
445  MachineBasicBlock *Succ0 = Head->succ_begin()[0];
446  MachineBasicBlock *Succ1 = Head->succ_begin()[1];
447 
448  // CmpBB can only have a single predecessor. Tail is allowed many.
449  if (Succ0->pred_size() != 1)
450  std::swap(Succ0, Succ1);
451 
452  // Succ0 is our candidate for CmpBB.
453  if (Succ0->pred_size() != 1 || Succ0->succ_size() != 2)
454  return false;
455 
456  CmpBB = Succ0;
457  Tail = Succ1;
458 
459  if (!CmpBB->isSuccessor(Tail))
460  return false;
461 
462  // The CFG topology checks out.
463  LLVM_DEBUG(dbgs() << "\nTriangle: " << printMBBReference(*Head) << " -> "
464  << printMBBReference(*CmpBB) << " -> "
465  << printMBBReference(*Tail) << '\n');
466  ++NumConsidered;
467 
468  // Tail is allowed to have many predecessors, but we can't handle PHIs yet.
469  //
470  // FIXME: Real PHIs could be if-converted as long as the CmpBB values are
471  // defined before The CmpBB cmp clobbers the flags. Alternatively, it should
472  // always be safe to sink the ccmp down to immediately before the CmpBB
473  // terminators.
474  if (!trivialTailPHIs()) {
475  LLVM_DEBUG(dbgs() << "Can't handle phis in Tail.\n");
476  ++NumPhiRejs;
477  return false;
478  }
479 
480  if (!Tail->livein_empty()) {
481  LLVM_DEBUG(dbgs() << "Can't handle live-in physregs in Tail.\n");
482  ++NumPhysRejs;
483  return false;
484  }
485 
486  // CmpBB should never have PHIs since Head is its only predecessor.
487  // FIXME: Clean them up if it happens.
488  if (!CmpBB->empty() && CmpBB->front().isPHI()) {
489  LLVM_DEBUG(dbgs() << "Can't handle phis in CmpBB.\n");
490  ++NumPhi2Rejs;
491  return false;
492  }
493 
494  if (!CmpBB->livein_empty()) {
495  LLVM_DEBUG(dbgs() << "Can't handle live-in physregs in CmpBB.\n");
496  ++NumPhysRejs;
497  return false;
498  }
499 
500  // The branch we're looking to eliminate must be analyzable.
501  HeadCond.clear();
502  MachineBasicBlock *TBB = nullptr, *FBB = nullptr;
503  if (TII->analyzeBranch(*Head, TBB, FBB, HeadCond)) {
504  LLVM_DEBUG(dbgs() << "Head branch not analyzable.\n");
505  ++NumHeadBranchRejs;
506  return false;
507  }
508 
509  // This is weird, probably some sort of degenerate CFG, or an edge to a
510  // landing pad.
511  if (!TBB || HeadCond.empty()) {
512  LLVM_DEBUG(
513  dbgs() << "AnalyzeBranch didn't find conditional branch in Head.\n");
514  ++NumHeadBranchRejs;
515  return false;
516  }
517 
518  if (!parseCond(HeadCond, HeadCmpBBCC)) {
519  LLVM_DEBUG(dbgs() << "Unsupported branch type on Head\n");
520  ++NumHeadBranchRejs;
521  return false;
522  }
523 
524  // Make sure the branch direction is right.
525  if (TBB != CmpBB) {
526  assert(TBB == Tail && "Unexpected TBB");
527  HeadCmpBBCC = AArch64CC::getInvertedCondCode(HeadCmpBBCC);
528  }
529 
530  CmpBBCond.clear();
531  TBB = FBB = nullptr;
532  if (TII->analyzeBranch(*CmpBB, TBB, FBB, CmpBBCond)) {
533  LLVM_DEBUG(dbgs() << "CmpBB branch not analyzable.\n");
534  ++NumCmpBranchRejs;
535  return false;
536  }
537 
538  if (!TBB || CmpBBCond.empty()) {
539  LLVM_DEBUG(
540  dbgs() << "AnalyzeBranch didn't find conditional branch in CmpBB.\n");
541  ++NumCmpBranchRejs;
542  return false;
543  }
544 
545  if (!parseCond(CmpBBCond, CmpBBTailCC)) {
546  LLVM_DEBUG(dbgs() << "Unsupported branch type on CmpBB\n");
547  ++NumCmpBranchRejs;
548  return false;
549  }
550 
551  if (TBB != Tail)
552  CmpBBTailCC = AArch64CC::getInvertedCondCode(CmpBBTailCC);
553 
554  LLVM_DEBUG(dbgs() << "Head->CmpBB on "
555  << AArch64CC::getCondCodeName(HeadCmpBBCC)
556  << ", CmpBB->Tail on "
557  << AArch64CC::getCondCodeName(CmpBBTailCC) << '\n');
558 
559  CmpMI = findConvertibleCompare(CmpBB);
560  if (!CmpMI)
561  return false;
562 
563  if (!canSpeculateInstrs(CmpBB, CmpMI)) {
564  ++NumSpeculateRejs;
565  return false;
566  }
567  return true;
568 }
569 
570 void SSACCmpConv::convert(SmallVectorImpl<MachineBasicBlock *> &RemovedBlocks) {
571  LLVM_DEBUG(dbgs() << "Merging " << printMBBReference(*CmpBB) << " into "
572  << printMBBReference(*Head) << ":\n"
573  << *CmpBB);
574 
575  // All CmpBB instructions are moved into Head, and CmpBB is deleted.
576  // Update the CFG first.
577  updateTailPHIs();
578 
579  // Save successor probabilties before removing CmpBB and Tail from their
580  // parents.
581  BranchProbability Head2CmpBB = MBPI->getEdgeProbability(Head, CmpBB);
582  BranchProbability CmpBB2Tail = MBPI->getEdgeProbability(CmpBB, Tail);
583 
584  Head->removeSuccessor(CmpBB);
585  CmpBB->removeSuccessor(Tail);
586 
587  // If Head and CmpBB had successor probabilties, udpate the probabilities to
588  // reflect the ccmp-conversion.
589  if (Head->hasSuccessorProbabilities() && CmpBB->hasSuccessorProbabilities()) {
590 
591  // Head is allowed two successors. We've removed CmpBB, so the remaining
592  // successor is Tail. We need to increase the successor probability for
593  // Tail to account for the CmpBB path we removed.
594  //
595  // Pr(Tail|Head) += Pr(CmpBB|Head) * Pr(Tail|CmpBB).
596  assert(*Head->succ_begin() == Tail && "Head successor is not Tail");
597  BranchProbability Head2Tail = MBPI->getEdgeProbability(Head, Tail);
598  Head->setSuccProbability(Head->succ_begin(),
599  Head2Tail + Head2CmpBB * CmpBB2Tail);
600 
601  // We will transfer successors of CmpBB to Head in a moment without
602  // normalizing the successor probabilities. Set the successor probabilites
603  // before doing so.
604  //
605  // Pr(I|Head) = Pr(CmpBB|Head) * Pr(I|CmpBB).
606  for (auto I = CmpBB->succ_begin(), E = CmpBB->succ_end(); I != E; ++I) {
607  BranchProbability CmpBB2I = MBPI->getEdgeProbability(CmpBB, *I);
608  CmpBB->setSuccProbability(I, Head2CmpBB * CmpBB2I);
609  }
610  }
611 
612  Head->transferSuccessorsAndUpdatePHIs(CmpBB);
613  DebugLoc TermDL = Head->getFirstTerminator()->getDebugLoc();
614  TII->removeBranch(*Head);
615 
616  // If the Head terminator was one of the cbz / tbz branches with built-in
617  // compare, we need to insert an explicit compare instruction in its place.
618  if (HeadCond[0].getImm() == -1) {
619  ++NumCompBranches;
620  unsigned Opc = 0;
621  switch (HeadCond[1].getImm()) {
622  case AArch64::CBZW:
623  case AArch64::CBNZW:
624  Opc = AArch64::SUBSWri;
625  break;
626  case AArch64::CBZX:
627  case AArch64::CBNZX:
628  Opc = AArch64::SUBSXri;
629  break;
630  default:
631  llvm_unreachable("Cannot convert Head branch");
632  }
633  const MCInstrDesc &MCID = TII->get(Opc);
634  // Create a dummy virtual register for the SUBS def.
635  unsigned DestReg =
636  MRI->createVirtualRegister(TII->getRegClass(MCID, 0, TRI, *MF));
637  // Insert a SUBS Rn, #0 instruction instead of the cbz / cbnz.
638  BuildMI(*Head, Head->end(), TermDL, MCID)
639  .addReg(DestReg, RegState::Define | RegState::Dead)
640  .add(HeadCond[2])
641  .addImm(0)
642  .addImm(0);
643  // SUBS uses the GPR*sp register classes.
644  MRI->constrainRegClass(HeadCond[2].getReg(),
645  TII->getRegClass(MCID, 1, TRI, *MF));
646  }
647 
648  Head->splice(Head->end(), CmpBB, CmpBB->begin(), CmpBB->end());
649 
650  // Now replace CmpMI with a ccmp instruction that also considers the incoming
651  // flags.
652  unsigned Opc = 0;
653  unsigned FirstOp = 1; // First CmpMI operand to copy.
654  bool isZBranch = false; // CmpMI is a cbz/cbnz instruction.
655  switch (CmpMI->getOpcode()) {
656  default:
657  llvm_unreachable("Unknown compare opcode");
658  case AArch64::SUBSWri: Opc = AArch64::CCMPWi; break;
659  case AArch64::SUBSWrr: Opc = AArch64::CCMPWr; break;
660  case AArch64::SUBSXri: Opc = AArch64::CCMPXi; break;
661  case AArch64::SUBSXrr: Opc = AArch64::CCMPXr; break;
662  case AArch64::ADDSWri: Opc = AArch64::CCMNWi; break;
663  case AArch64::ADDSWrr: Opc = AArch64::CCMNWr; break;
664  case AArch64::ADDSXri: Opc = AArch64::CCMNXi; break;
665  case AArch64::ADDSXrr: Opc = AArch64::CCMNXr; break;
666  case AArch64::FCMPSrr: Opc = AArch64::FCCMPSrr; FirstOp = 0; break;
667  case AArch64::FCMPDrr: Opc = AArch64::FCCMPDrr; FirstOp = 0; break;
668  case AArch64::FCMPESrr: Opc = AArch64::FCCMPESrr; FirstOp = 0; break;
669  case AArch64::FCMPEDrr: Opc = AArch64::FCCMPEDrr; FirstOp = 0; break;
670  case AArch64::CBZW:
671  case AArch64::CBNZW:
672  Opc = AArch64::CCMPWi;
673  FirstOp = 0;
674  isZBranch = true;
675  break;
676  case AArch64::CBZX:
677  case AArch64::CBNZX:
678  Opc = AArch64::CCMPXi;
679  FirstOp = 0;
680  isZBranch = true;
681  break;
682  }
683 
684  // The ccmp instruction should set the flags according to the comparison when
685  // Head would have branched to CmpBB.
686  // The NZCV immediate operand should provide flags for the case where Head
687  // would have branched to Tail. These flags should cause the new Head
688  // terminator to branch to tail.
689  unsigned NZCV = AArch64CC::getNZCVToSatisfyCondCode(CmpBBTailCC);
690  const MCInstrDesc &MCID = TII->get(Opc);
691  MRI->constrainRegClass(CmpMI->getOperand(FirstOp).getReg(),
692  TII->getRegClass(MCID, 0, TRI, *MF));
693  if (CmpMI->getOperand(FirstOp + 1).isReg())
694  MRI->constrainRegClass(CmpMI->getOperand(FirstOp + 1).getReg(),
695  TII->getRegClass(MCID, 1, TRI, *MF));
696  MachineInstrBuilder MIB = BuildMI(*Head, CmpMI, CmpMI->getDebugLoc(), MCID)
697  .add(CmpMI->getOperand(FirstOp)); // Register Rn
698  if (isZBranch)
699  MIB.addImm(0); // cbz/cbnz Rn -> ccmp Rn, #0
700  else
701  MIB.add(CmpMI->getOperand(FirstOp + 1)); // Register Rm / Immediate
702  MIB.addImm(NZCV).addImm(HeadCmpBBCC);
703 
704  // If CmpMI was a terminator, we need a new conditional branch to replace it.
705  // This now becomes a Head terminator.
706  if (isZBranch) {
707  bool isNZ = CmpMI->getOpcode() == AArch64::CBNZW ||
708  CmpMI->getOpcode() == AArch64::CBNZX;
709  BuildMI(*Head, CmpMI, CmpMI->getDebugLoc(), TII->get(AArch64::Bcc))
710  .addImm(isNZ ? AArch64CC::NE : AArch64CC::EQ)
711  .add(CmpMI->getOperand(1)); // Branch target.
712  }
713  CmpMI->eraseFromParent();
714  Head->updateTerminator();
715 
716  RemovedBlocks.push_back(CmpBB);
717  CmpBB->eraseFromParent();
718  LLVM_DEBUG(dbgs() << "Result:\n" << *Head);
719  ++NumConverted;
720 }
721 
722 int SSACCmpConv::expectedCodeSizeDelta() const {
723  int delta = 0;
724  // If the Head terminator was one of the cbz / tbz branches with built-in
725  // compare, we need to insert an explicit compare instruction in its place
726  // plus a branch instruction.
727  if (HeadCond[0].getImm() == -1) {
728  switch (HeadCond[1].getImm()) {
729  case AArch64::CBZW:
730  case AArch64::CBNZW:
731  case AArch64::CBZX:
732  case AArch64::CBNZX:
733  // Therefore delta += 1
734  delta = 1;
735  break;
736  default:
737  llvm_unreachable("Cannot convert Head branch");
738  }
739  }
740  // If the Cmp terminator was one of the cbz / tbz branches with
741  // built-in compare, it will be turned into a compare instruction
742  // into Head, but we do not save any instruction.
743  // Otherwise, we save the branch instruction.
744  switch (CmpMI->getOpcode()) {
745  default:
746  --delta;
747  break;
748  case AArch64::CBZW:
749  case AArch64::CBNZW:
750  case AArch64::CBZX:
751  case AArch64::CBNZX:
752  break;
753  }
754  return delta;
755 }
756 
757 //===----------------------------------------------------------------------===//
758 // AArch64ConditionalCompares Pass
759 //===----------------------------------------------------------------------===//
760 
761 namespace {
762 class AArch64ConditionalCompares : public MachineFunctionPass {
763  const MachineBranchProbabilityInfo *MBPI;
764  const TargetInstrInfo *TII;
765  const TargetRegisterInfo *TRI;
766  MCSchedModel SchedModel;
767  // Does the proceeded function has Oz attribute.
768  bool MinSize;
770  MachineDominatorTree *DomTree;
772  MachineTraceMetrics *Traces;
774  SSACCmpConv CmpConv;
775 
776 public:
777  static char ID;
778  AArch64ConditionalCompares() : MachineFunctionPass(ID) {
780  }
781  void getAnalysisUsage(AnalysisUsage &AU) const override;
782  bool runOnMachineFunction(MachineFunction &MF) override;
783  StringRef getPassName() const override {
784  return "AArch64 Conditional Compares";
785  }
786 
787 private:
788  bool tryConvert(MachineBasicBlock *);
789  void updateDomTree(ArrayRef<MachineBasicBlock *> Removed);
790  void updateLoops(ArrayRef<MachineBasicBlock *> Removed);
791  void invalidateTraces();
792  bool shouldConvert();
793 };
794 } // end anonymous namespace
795 
797 
798 INITIALIZE_PASS_BEGIN(AArch64ConditionalCompares, "aarch64-ccmp",
799  "AArch64 CCMP Pass", false, false)
803 INITIALIZE_PASS_END(AArch64ConditionalCompares, "aarch64-ccmp",
804  "AArch64 CCMP Pass", false, false)
805 
807  return new AArch64ConditionalCompares();
808 }
809 
810 void AArch64ConditionalCompares::getAnalysisUsage(AnalysisUsage &AU) const {
819 }
820 
821 /// Update the dominator tree after if-conversion erased some blocks.
822 void AArch64ConditionalCompares::updateDomTree(
824  // convert() removes CmpBB which was previously dominated by Head.
825  // CmpBB children should be transferred to Head.
826  MachineDomTreeNode *HeadNode = DomTree->getNode(CmpConv.Head);
827  for (MachineBasicBlock *RemovedMBB : Removed) {
828  MachineDomTreeNode *Node = DomTree->getNode(RemovedMBB);
829  assert(Node != HeadNode && "Cannot erase the head node");
830  assert(Node->getIDom() == HeadNode && "CmpBB should be dominated by Head");
831  while (Node->getNumChildren())
832  DomTree->changeImmediateDominator(Node->getChildren().back(), HeadNode);
833  DomTree->eraseNode(RemovedMBB);
834  }
835 }
836 
837 /// Update LoopInfo after if-conversion.
838 void
839 AArch64ConditionalCompares::updateLoops(ArrayRef<MachineBasicBlock *> Removed) {
840  if (!Loops)
841  return;
842  for (MachineBasicBlock *RemovedMBB : Removed)
843  Loops->removeBlock(RemovedMBB);
844 }
845 
846 /// Invalidate MachineTraceMetrics before if-conversion.
847 void AArch64ConditionalCompares::invalidateTraces() {
848  Traces->invalidate(CmpConv.Head);
849  Traces->invalidate(CmpConv.CmpBB);
850 }
851 
852 /// Apply cost model and heuristics to the if-conversion in IfConv.
853 /// Return true if the conversion is a good idea.
854 ///
856  // Stress testing mode disables all cost considerations.
857  if (Stress)
858  return true;
859  if (!MinInstr)
860  MinInstr = Traces->getEnsemble(MachineTraceMetrics::TS_MinInstrCount);
861 
862  // Head dominates CmpBB, so it is always included in its trace.
863  MachineTraceMetrics::Trace Trace = MinInstr->getTrace(CmpConv.CmpBB);
864 
865  // If code size is the main concern
866  if (MinSize) {
867  int CodeSizeDelta = CmpConv.expectedCodeSizeDelta();
868  LLVM_DEBUG(dbgs() << "Code size delta: " << CodeSizeDelta << '\n');
869  // If we are minimizing the code size, do the conversion whatever
870  // the cost is.
871  if (CodeSizeDelta < 0)
872  return true;
873  if (CodeSizeDelta > 0) {
874  LLVM_DEBUG(dbgs() << "Code size is increasing, give up on this one.\n");
875  return false;
876  }
877  // CodeSizeDelta == 0, continue with the regular heuristics
878  }
879 
880  // Heuristic: The compare conversion delays the execution of the branch
881  // instruction because we must wait for the inputs to the second compare as
882  // well. The branch has no dependent instructions, but delaying it increases
883  // the cost of a misprediction.
884  //
885  // Set a limit on the delay we will accept.
886  unsigned DelayLimit = SchedModel.MispredictPenalty * 3 / 4;
887 
888  // Instruction depths can be computed for all trace instructions above CmpBB.
889  unsigned HeadDepth =
890  Trace.getInstrCycles(*CmpConv.Head->getFirstTerminator()).Depth;
891  unsigned CmpBBDepth =
892  Trace.getInstrCycles(*CmpConv.CmpBB->getFirstTerminator()).Depth;
893  LLVM_DEBUG(dbgs() << "Head depth: " << HeadDepth
894  << "\nCmpBB depth: " << CmpBBDepth << '\n');
895  if (CmpBBDepth > HeadDepth + DelayLimit) {
896  LLVM_DEBUG(dbgs() << "Branch delay would be larger than " << DelayLimit
897  << " cycles.\n");
898  return false;
899  }
900 
901  // Check the resource depth at the bottom of CmpBB - these instructions will
902  // be speculated.
903  unsigned ResDepth = Trace.getResourceDepth(true);
904  LLVM_DEBUG(dbgs() << "Resources: " << ResDepth << '\n');
905 
906  // Heuristic: The speculatively executed instructions must all be able to
907  // merge into the Head block. The Head critical path should dominate the
908  // resource cost of the speculated instructions.
909  if (ResDepth > HeadDepth) {
910  LLVM_DEBUG(dbgs() << "Too many instructions to speculate.\n");
911  return false;
912  }
913  return true;
914 }
915 
916 bool AArch64ConditionalCompares::tryConvert(MachineBasicBlock *MBB) {
917  bool Changed = false;
918  while (CmpConv.canConvert(MBB) && shouldConvert()) {
919  invalidateTraces();
921  CmpConv.convert(RemovedBlocks);
922  Changed = true;
923  updateDomTree(RemovedBlocks);
924  updateLoops(RemovedBlocks);
925  }
926  return Changed;
927 }
928 
929 bool AArch64ConditionalCompares::runOnMachineFunction(MachineFunction &MF) {
930  LLVM_DEBUG(dbgs() << "********** AArch64 Conditional Compares **********\n"
931  << "********** Function: " << MF.getName() << '\n');
932  if (skipFunction(MF.getFunction()))
933  return false;
934 
935  TII = MF.getSubtarget().getInstrInfo();
936  TRI = MF.getSubtarget().getRegisterInfo();
937  SchedModel = MF.getSubtarget().getSchedModel();
938  MRI = &MF.getRegInfo();
939  DomTree = &getAnalysis<MachineDominatorTree>();
940  Loops = getAnalysisIfAvailable<MachineLoopInfo>();
941  MBPI = &getAnalysis<MachineBranchProbabilityInfo>();
942  Traces = &getAnalysis<MachineTraceMetrics>();
943  MinInstr = nullptr;
944  MinSize = MF.getFunction().optForMinSize();
945 
946  bool Changed = false;
947  CmpConv.runOnMachineFunction(MF, MBPI);
948 
949  // Visit blocks in dominator tree pre-order. The pre-order enables multiple
950  // cmp-conversions from the same head block.
951  // Note that updateDomTree() modifies the children of the DomTree node
952  // currently being visited. The df_iterator supports that; it doesn't look at
953  // child_begin() / child_end() until after a node has been visited.
954  for (auto *I : depth_first(DomTree))
955  if (tryConvert(I->getBlock()))
956  Changed = true;
957 
958  return Changed;
959 }
Pass interface - Implemented by all &#39;passes&#39;.
Definition: Pass.h:81
const MachineInstrBuilder & add(const MachineOperand &MO) const
bool hasSuccessorProbabilities() const
Return true if any of the successors have probabilities attached to them.
AnalysisUsage & addPreserved()
Add the specified Pass class to the set of analyses preserved by this pass.
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
bool use_nodbg_empty(unsigned RegNo) const
use_nodbg_empty - Return true if there are no non-Debug instructions using the specified register...
Compute iterated dominance frontiers using a linear time algorithm.
Definition: AllocatorList.h:24
#define LLVM_FALLTHROUGH
Definition: Compiler.h:86
virtual const TargetRegisterInfo * getRegisterInfo() const
getRegisterInfo - If register information is available, return it.
const DebugLoc & getDebugLoc() const
Returns the debug location id of this MachineInstr.
Definition: MachineInstr.h:383
static CondCode getInvertedCondCode(CondCode Code)
iterator getFirstTerminator()
Returns an iterator to the first terminator instruction of this basic block.
Describe properties that are true of each instruction in the target description file.
Definition: MCInstrDesc.h:163
unsigned getReg() const
getReg - Returns the register number.
static bool isVirtualRegister(unsigned Reg)
Return true if the specified register number is in the virtual register namespace.
void transferSuccessorsAndUpdatePHIs(MachineBasicBlock *FromMBB)
Transfers all the successors, as in transferSuccessors, and update PHI operands in the successor bloc...
unsigned Reg
STATISTIC(NumFunctions, "Total number of functions")
unsigned const TargetRegisterInfo * TRI
A debug info location.
Definition: DebugLoc.h:34
A trace ensemble is a collection of traces selected using the same strategy, for example &#39;minimum res...
FunctionPass * createAArch64ConditionalCompares()
static unsigned InstrCount
bool isPHI() const
virtual unsigned removeBranch(MachineBasicBlock &MBB, int *BytesRemoved=nullptr) const
Remove the branching code at the end of the specific MBB.
static cl::opt< unsigned > BlockInstrLimit("aarch64-ccmp-limit", cl::init(30), cl::Hidden, cl::desc("Maximum number of instructions per speculated block."))
AnalysisUsage & addRequired()
static cl::opt< bool > Stress("aarch64-stress-ccmp", cl::Hidden, cl::desc("Turn all knobs to 11"))
#define INITIALIZE_PASS_DEPENDENCY(depName)
Definition: PassSupport.h:51
Hexagon Hardware Loops
void eraseFromParent()
This method unlinks &#39;this&#39; from the containing function and deletes it.
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
const HexagonInstrInfo * TII
Printable printMBBReference(const MachineBasicBlock &MBB)
Prints a machine basic block reference.
const TargetRegisterClass * getRegClass(const MCInstrDesc &MCID, unsigned OpNum, const TargetRegisterInfo *TRI, const MachineFunction &MF) const
Given a machine instruction descriptor, returns the register class constraint for OpNum...
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: APFloat.h:42
void eraseFromParent()
Unlink &#39;this&#39; from the containing basic block and delete it.
static bool shouldConvert(Constant &C, AArch64PromoteConstant::PromotionCacheTy &PromotionCache)
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
Definition: MachineInstr.h:409
static const char * getCondCodeName(CondCode Code)
PhysRegInfo analyzePhysReg(unsigned Reg, const TargetRegisterInfo *TRI)
analyzePhysReg - Analyze how the current instruction or bundle uses a physical register.
Select the trace through a block that has the fewest instructions.
Base class for the actual dominator tree node.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory)...
Definition: APInt.h:33
const std::vector< DomTreeNodeBase * > & getChildren() const
virtual const TargetInstrInfo * getInstrInfo() const
void setSuccProbability(succ_iterator I, BranchProbability Prob)
Set successor probability of a given iterator.
const TargetRegisterClass * constrainRegClass(unsigned Reg, const TargetRegisterClass *RC, unsigned MinNumRegs=0)
constrainRegClass - Constrain the register class of the specified virtual register to be a common sub...
StringRef getName() const
getName - Return the name of the corresponding LLVM function.
TargetInstrInfo - Interface to description of machine instruction set.
MachineInstrBuilder BuildMI(MachineFunction &MF, const DebugLoc &DL, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:410
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
unsigned const MachineRegisterInfo * MRI
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
size_t size() const
size - Get the array size.
Definition: ArrayRef.h:149
DomTreeNodeBase * getIDom() const
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
InstrCycles getInstrCycles(const MachineInstr &MI) const
Return the depth and height of MI.
Represent the analysis usage information of a pass.
FunctionPass class - This class is used to implement most global optimizations.
Definition: Pass.h:285
virtual bool analyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB, MachineBasicBlock *&FBB, SmallVectorImpl< MachineOperand > &Cond, bool AllowModify=false) const
Analyze the branching code at the end of MBB, returning true if it cannot be understood (e...
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
A trace represents a plausible sequence of executed basic blocks that passes through the current basi...
INITIALIZE_PASS_END(RegBankSelect, DEBUG_TYPE, "Assign register bank of generic virtual registers", false, false) RegBankSelect
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
This is a &#39;vector&#39; (really, a variable-sized array), optimized for the case when the array is small...
Definition: SmallVector.h:847
size_t getNumChildren() const
void updateTerminator()
Update the terminator instructions in block to account for changes to the layout. ...
static bool parseCond(ArrayRef< MachineOperand > Cond, AArch64CC::CondCode &CC)
MIOperands - Iterate over operands of a single instruction.
unsigned pred_size() const
const Function & getFunction() const
Return the LLVM function that this machine code represents.
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:133
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:941
static unsigned getNZCVToSatisfyCondCode(CondCode Code)
Given a condition code, return NZCV flags that would satisfy that condition.
static unsigned getReg(const void *D, unsigned RC, unsigned RegNo)
unsigned succ_size() const
MachineRegisterInfo - Keep track of information for virtual and physical registers, including vreg register classes, use/def chains for registers, etc.
BranchProbability getEdgeProbability(const MachineBasicBlock *Src, const MachineBasicBlock *Dst) const
Representation of each machine instruction.
Definition: MachineInstr.h:64
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
void splice(iterator Where, MachineBasicBlock *Other, iterator From)
Take an instruction from MBB &#39;Other&#39; at the position From, and insert it into this MBB right before &#39;...
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
LLVM_NODISCARD bool empty() const
Definition: SmallVector.h:56
const MCInstrDesc & get(unsigned Opcode) const
Return the machine instruction descriptor that corresponds to the specified instruction opcode...
Definition: MCInstrInfo.h:45
INITIALIZE_PASS_BEGIN(AArch64ConditionalCompares, "aarch64-ccmp", "AArch64 CCMP Pass", false, false) INITIALIZE_PASS_END(AArch64ConditionalCompares
#define I(x, y, z)
Definition: MD5.cpp:58
bool optForMinSize() const
Optimize this function for minimum size (-Oz).
Definition: Function.h:595
bool isSuccessor(const MachineBasicBlock *MBB) const
Return true if the specified MBB is a successor of this block.
void initializeAArch64ConditionalComparesPass(PassRegistry &)
unsigned getResourceDepth(bool Bottom) const
Return the resource depth of the top/bottom of the trace center block.
void removeSuccessor(MachineBasicBlock *Succ, bool NormalizeSuccProbs=false)
Remove successor from the successors list of this MachineBasicBlock.
bool isReg() const
isReg - Tests if this is a MO_Register operand.
iterator_range< df_iterator< T > > depth_first(const T &G)
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:49
#define LLVM_DEBUG(X)
Definition: Debug.h:123
const MachineOperand & getOperand(unsigned i) const
Definition: MachineInstr.h:414
Machine model for scheduling, bundling, and heuristics.
Definition: MCSchedule.h:258
DominatorTree Class - Concrete subclass of DominatorTreeBase that is used to compute a normal dominat...
const MCSchedModel & getSchedModel() const
Get the machine model for this subtarget&#39;s CPU.
unsigned createVirtualRegister(const TargetRegisterClass *RegClass, StringRef Name="")
createVirtualRegister - Create and return a new virtual register in the function with the specified r...