LLVM  4.0.0
AArch64ConditionalCompares.cpp
Go to the documentation of this file.
1 //===-- AArch64ConditionalCompares.cpp --- CCMP formation for AArch64 -----===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This file implements the AArch64ConditionalCompares pass which reduces
11 // branching and code size by using the conditional compare instructions CCMP,
12 // CCMN, and FCMP.
13 //
14 // The CFG transformations for forming conditional compares are very similar to
15 // if-conversion, and this pass should run immediately before the early
16 // if-conversion pass.
17 //
18 //===----------------------------------------------------------------------===//
19 
20 #include "AArch64.h"
22 #include "llvm/ADT/SetVector.h"
23 #include "llvm/ADT/SmallPtrSet.h"
24 #include "llvm/ADT/Statistic.h"
32 #include "llvm/CodeGen/Passes.h"
34 #include "llvm/Support/Debug.h"
39 
40 using namespace llvm;
41 
42 #define DEBUG_TYPE "aarch64-ccmp"
43 
44 // Absolute maximum number of instructions allowed per speculated block.
45 // This bypasses all other heuristics, so it should be set fairly high.
47  "aarch64-ccmp-limit", cl::init(30), cl::Hidden,
48  cl::desc("Maximum number of instructions per speculated block."));
49 
50 // Stress testing mode - disable heuristics.
51 static cl::opt<bool> Stress("aarch64-stress-ccmp", cl::Hidden,
52  cl::desc("Turn all knobs to 11"));
53 
54 STATISTIC(NumConsidered, "Number of ccmps considered");
55 STATISTIC(NumPhiRejs, "Number of ccmps rejected (PHI)");
56 STATISTIC(NumPhysRejs, "Number of ccmps rejected (Physregs)");
57 STATISTIC(NumPhi2Rejs, "Number of ccmps rejected (PHI2)");
58 STATISTIC(NumHeadBranchRejs, "Number of ccmps rejected (Head branch)");
59 STATISTIC(NumCmpBranchRejs, "Number of ccmps rejected (CmpBB branch)");
60 STATISTIC(NumCmpTermRejs, "Number of ccmps rejected (CmpBB is cbz...)");
61 STATISTIC(NumImmRangeRejs, "Number of ccmps rejected (Imm out of range)");
62 STATISTIC(NumLiveDstRejs, "Number of ccmps rejected (Cmp dest live)");
63 STATISTIC(NumMultNZCVUses, "Number of ccmps rejected (NZCV used)");
64 STATISTIC(NumUnknNZCVDefs, "Number of ccmps rejected (NZCV def unknown)");
65 
66 STATISTIC(NumSpeculateRejs, "Number of ccmps rejected (Can't speculate)");
67 
68 STATISTIC(NumConverted, "Number of ccmp instructions created");
69 STATISTIC(NumCompBranches, "Number of cbz/cbnz branches converted");
70 
71 //===----------------------------------------------------------------------===//
72 // SSACCmpConv
73 //===----------------------------------------------------------------------===//
74 //
75 // The SSACCmpConv class performs ccmp-conversion on SSA form machine code
76 // after determining if it is possible. The class contains no heuristics;
77 // external code should be used to determine when ccmp-conversion is a good
78 // idea.
79 //
80 // CCmp-formation works on a CFG representing chained conditions, typically
81 // from C's short-circuit || and && operators:
82 //
83 // From: Head To: Head
84 // / | CmpBB
85 // / | / |
86 // | CmpBB / |
87 // | / | Tail |
88 // | / | | |
89 // Tail | | |
90 // | | | |
91 // ... ... ... ...
92 //
93 // The Head block is terminated by a br.cond instruction, and the CmpBB block
94 // contains compare + br.cond. Tail must be a successor of both.
95 //
96 // The cmp-conversion turns the compare instruction in CmpBB into a conditional
97 // compare, and merges CmpBB into Head, speculatively executing its
98 // instructions. The AArch64 conditional compare instructions have an immediate
99 // operand that specifies the NZCV flag values when the condition is false and
100 // the compare isn't executed. This makes it possible to chain compares with
101 // different condition codes.
102 //
103 // Example:
104 //
105 // if (a == 5 || b == 17)
106 // foo();
107 //
108 // Head:
109 // cmp w0, #5
110 // b.eq Tail
111 // CmpBB:
112 // cmp w1, #17
113 // b.eq Tail
114 // ...
115 // Tail:
116 // bl _foo
117 //
118 // Becomes:
119 //
120 // Head:
121 // cmp w0, #5
122 // ccmp w1, #17, 4, ne ; 4 = nZcv
123 // b.eq Tail
124 // ...
125 // Tail:
126 // bl _foo
127 //
128 // The ccmp condition code is the one that would cause the Head terminator to
129 // branch to CmpBB.
130 //
131 // FIXME: It should also be possible to speculate a block on the critical edge
132 // between Head and Tail, just like if-converting a diamond.
133 //
134 // FIXME: Handle PHIs in Tail by turning them into selects (if-conversion).
135 
136 namespace {
137 class SSACCmpConv {
138  MachineFunction *MF;
139  const TargetInstrInfo *TII;
140  const TargetRegisterInfo *TRI;
142 
143 public:
144  /// The first block containing a conditional branch, dominating everything
145  /// else.
146  MachineBasicBlock *Head;
147 
148  /// The block containing cmp+br.cond with a successor shared with Head.
149  MachineBasicBlock *CmpBB;
150 
151  /// The common successor for Head and CmpBB.
152  MachineBasicBlock *Tail;
153 
154  /// The compare instruction in CmpBB that can be converted to a ccmp.
155  MachineInstr *CmpMI;
156 
157 private:
158  /// The branch condition in Head as determined by AnalyzeBranch.
160 
161  /// The condition code that makes Head branch to CmpBB.
162  AArch64CC::CondCode HeadCmpBBCC;
163 
164  /// The branch condition in CmpBB.
166 
167  /// The condition code that makes CmpBB branch to Tail.
168  AArch64CC::CondCode CmpBBTailCC;
169 
170  /// Check if the Tail PHIs are trivially convertible.
171  bool trivialTailPHIs();
172 
173  /// Remove CmpBB from the Tail PHIs.
174  void updateTailPHIs();
175 
176  /// Check if an operand defining DstReg is dead.
177  bool isDeadDef(unsigned DstReg);
178 
179  /// Find the compare instruction in MBB that controls the conditional branch.
180  /// Return NULL if a convertible instruction can't be found.
181  MachineInstr *findConvertibleCompare(MachineBasicBlock *MBB);
182 
183  /// Return true if all non-terminator instructions in MBB can be safely
184  /// speculated.
185  bool canSpeculateInstrs(MachineBasicBlock *MBB, const MachineInstr *CmpMI);
186 
187 public:
188  /// runOnMachineFunction - Initialize per-function data structures.
189  void runOnMachineFunction(MachineFunction &MF) {
190  this->MF = &MF;
191  TII = MF.getSubtarget().getInstrInfo();
192  TRI = MF.getSubtarget().getRegisterInfo();
193  MRI = &MF.getRegInfo();
194  }
195 
196  /// If the sub-CFG headed by MBB can be cmp-converted, initialize the
197  /// internal state, and return true.
198  bool canConvert(MachineBasicBlock *MBB);
199 
200  /// Cmo-convert the last block passed to canConvertCmp(), assuming
201  /// it is possible. Add any erased blocks to RemovedBlocks.
202  void convert(SmallVectorImpl<MachineBasicBlock *> &RemovedBlocks);
203 
204  /// Return the expected code size delta if the conversion into a
205  /// conditional compare is performed.
206  int expectedCodeSizeDelta() const;
207 };
208 } // end anonymous namespace
209 
210 // Check that all PHIs in Tail are selecting the same value from Head and CmpBB.
211 // This means that no if-conversion is required when merging CmpBB into Head.
212 bool SSACCmpConv::trivialTailPHIs() {
213  for (auto &I : *Tail) {
214  if (!I.isPHI())
215  break;
216  unsigned HeadReg = 0, CmpBBReg = 0;
217  // PHI operands come in (VReg, MBB) pairs.
218  for (unsigned oi = 1, oe = I.getNumOperands(); oi != oe; oi += 2) {
219  MachineBasicBlock *MBB = I.getOperand(oi + 1).getMBB();
220  unsigned Reg = I.getOperand(oi).getReg();
221  if (MBB == Head) {
222  assert((!HeadReg || HeadReg == Reg) && "Inconsistent PHI operands");
223  HeadReg = Reg;
224  }
225  if (MBB == CmpBB) {
226  assert((!CmpBBReg || CmpBBReg == Reg) && "Inconsistent PHI operands");
227  CmpBBReg = Reg;
228  }
229  }
230  if (HeadReg != CmpBBReg)
231  return false;
232  }
233  return true;
234 }
235 
236 // Assuming that trivialTailPHIs() is true, update the Tail PHIs by simply
237 // removing the CmpBB operands. The Head operands will be identical.
238 void SSACCmpConv::updateTailPHIs() {
239  for (auto &I : *Tail) {
240  if (!I.isPHI())
241  break;
242  // I is a PHI. It can have multiple entries for CmpBB.
243  for (unsigned oi = I.getNumOperands(); oi > 2; oi -= 2) {
244  // PHI operands are (Reg, MBB) at (oi-2, oi-1).
245  if (I.getOperand(oi - 1).getMBB() == CmpBB) {
246  I.RemoveOperand(oi - 1);
247  I.RemoveOperand(oi - 2);
248  }
249  }
250  }
251 }
252 
253 // This pass runs before the AArch64DeadRegisterDefinitions pass, so compares
254 // are still writing virtual registers without any uses.
255 bool SSACCmpConv::isDeadDef(unsigned DstReg) {
256  // Writes to the zero register are dead.
257  if (DstReg == AArch64::WZR || DstReg == AArch64::XZR)
258  return true;
260  return false;
261  // A virtual register def without any uses will be marked dead later, and
262  // eventually replaced by the zero register.
263  return MRI->use_nodbg_empty(DstReg);
264 }
265 
266 // Parse a condition code returned by AnalyzeBranch, and compute the CondCode
267 // corresponding to TBB.
268 // Return
270  // A normal br.cond simply has the condition code.
271  if (Cond[0].getImm() != -1) {
272  assert(Cond.size() == 1 && "Unknown Cond array format");
273  CC = (AArch64CC::CondCode)(int)Cond[0].getImm();
274  return true;
275  }
276  // For tbz and cbz instruction, the opcode is next.
277  switch (Cond[1].getImm()) {
278  default:
279  // This includes tbz / tbnz branches which can't be converted to
280  // ccmp + br.cond.
281  return false;
282  case AArch64::CBZW:
283  case AArch64::CBZX:
284  assert(Cond.size() == 3 && "Unknown Cond array format");
285  CC = AArch64CC::EQ;
286  return true;
287  case AArch64::CBNZW:
288  case AArch64::CBNZX:
289  assert(Cond.size() == 3 && "Unknown Cond array format");
290  CC = AArch64CC::NE;
291  return true;
292  }
293 }
294 
295 MachineInstr *SSACCmpConv::findConvertibleCompare(MachineBasicBlock *MBB) {
297  if (I == MBB->end())
298  return nullptr;
299  // The terminator must be controlled by the flags.
300  if (!I->readsRegister(AArch64::NZCV)) {
301  switch (I->getOpcode()) {
302  case AArch64::CBZW:
303  case AArch64::CBZX:
304  case AArch64::CBNZW:
305  case AArch64::CBNZX:
306  // These can be converted into a ccmp against #0.
307  return &*I;
308  }
309  ++NumCmpTermRejs;
310  DEBUG(dbgs() << "Flags not used by terminator: " << *I);
311  return nullptr;
312  }
313 
314  // Now find the instruction controlling the terminator.
315  for (MachineBasicBlock::iterator B = MBB->begin(); I != B;) {
316  --I;
317  assert(!I->isTerminator() && "Spurious terminator");
318  switch (I->getOpcode()) {
319  // cmp is an alias for subs with a dead destination register.
320  case AArch64::SUBSWri:
321  case AArch64::SUBSXri:
322  // cmn is an alias for adds with a dead destination register.
323  case AArch64::ADDSWri:
324  case AArch64::ADDSXri:
325  // Check that the immediate operand is within range, ccmp wants a uimm5.
326  // Rd = SUBSri Rn, imm, shift
327  if (I->getOperand(3).getImm() || !isUInt<5>(I->getOperand(2).getImm())) {
328  DEBUG(dbgs() << "Immediate out of range for ccmp: " << *I);
329  ++NumImmRangeRejs;
330  return nullptr;
331  }
333  case AArch64::SUBSWrr:
334  case AArch64::SUBSXrr:
335  case AArch64::ADDSWrr:
336  case AArch64::ADDSXrr:
337  if (isDeadDef(I->getOperand(0).getReg()))
338  return &*I;
339  DEBUG(dbgs() << "Can't convert compare with live destination: " << *I);
340  ++NumLiveDstRejs;
341  return nullptr;
342  case AArch64::FCMPSrr:
343  case AArch64::FCMPDrr:
344  case AArch64::FCMPESrr:
345  case AArch64::FCMPEDrr:
346  return &*I;
347  }
348 
349  // Check for flag reads and clobbers.
350  MIOperands::PhysRegInfo PRI =
351  MIOperands(*I).analyzePhysReg(AArch64::NZCV, TRI);
352 
353  if (PRI.Read) {
354  // The ccmp doesn't produce exactly the same flags as the original
355  // compare, so reject the transform if there are uses of the flags
356  // besides the terminators.
357  DEBUG(dbgs() << "Can't create ccmp with multiple uses: " << *I);
358  ++NumMultNZCVUses;
359  return nullptr;
360  }
361 
362  if (PRI.Defined || PRI.Clobbered) {
363  DEBUG(dbgs() << "Not convertible compare: " << *I);
364  ++NumUnknNZCVDefs;
365  return nullptr;
366  }
367  }
368  DEBUG(dbgs() << "Flags not defined in BB#" << MBB->getNumber() << '\n');
369  return nullptr;
370 }
371 
372 /// Determine if all the instructions in MBB can safely
373 /// be speculated. The terminators are not considered.
374 ///
375 /// Only CmpMI is allowed to clobber the flags.
376 ///
377 bool SSACCmpConv::canSpeculateInstrs(MachineBasicBlock *MBB,
378  const MachineInstr *CmpMI) {
379  // Reject any live-in physregs. It's probably NZCV/EFLAGS, and very hard to
380  // get right.
381  if (!MBB->livein_empty()) {
382  DEBUG(dbgs() << "BB#" << MBB->getNumber() << " has live-ins.\n");
383  return false;
384  }
385 
386  unsigned InstrCount = 0;
387 
388  // Check all instructions, except the terminators. It is assumed that
389  // terminators never have side effects or define any used register values.
390  for (auto &I : make_range(MBB->begin(), MBB->getFirstTerminator())) {
391  if (I.isDebugValue())
392  continue;
393 
394  if (++InstrCount > BlockInstrLimit && !Stress) {
395  DEBUG(dbgs() << "BB#" << MBB->getNumber() << " has more than "
396  << BlockInstrLimit << " instructions.\n");
397  return false;
398  }
399 
400  // There shouldn't normally be any phis in a single-predecessor block.
401  if (I.isPHI()) {
402  DEBUG(dbgs() << "Can't hoist: " << I);
403  return false;
404  }
405 
406  // Don't speculate loads. Note that it may be possible and desirable to
407  // speculate GOT or constant pool loads that are guaranteed not to trap,
408  // but we don't support that for now.
409  if (I.mayLoad()) {
410  DEBUG(dbgs() << "Won't speculate load: " << I);
411  return false;
412  }
413 
414  // We never speculate stores, so an AA pointer isn't necessary.
415  bool DontMoveAcrossStore = true;
416  if (!I.isSafeToMove(nullptr, DontMoveAcrossStore)) {
417  DEBUG(dbgs() << "Can't speculate: " << I);
418  return false;
419  }
420 
421  // Only CmpMI is allowed to clobber the flags.
422  if (&I != CmpMI && I.modifiesRegister(AArch64::NZCV, TRI)) {
423  DEBUG(dbgs() << "Clobbers flags: " << I);
424  return false;
425  }
426  }
427  return true;
428 }
429 
430 /// Analyze the sub-cfg rooted in MBB, and return true if it is a potential
431 /// candidate for cmp-conversion. Fill out the internal state.
432 ///
433 bool SSACCmpConv::canConvert(MachineBasicBlock *MBB) {
434  Head = MBB;
435  Tail = CmpBB = nullptr;
436 
437  if (Head->succ_size() != 2)
438  return false;
439  MachineBasicBlock *Succ0 = Head->succ_begin()[0];
440  MachineBasicBlock *Succ1 = Head->succ_begin()[1];
441 
442  // CmpBB can only have a single predecessor. Tail is allowed many.
443  if (Succ0->pred_size() != 1)
444  std::swap(Succ0, Succ1);
445 
446  // Succ0 is our candidate for CmpBB.
447  if (Succ0->pred_size() != 1 || Succ0->succ_size() != 2)
448  return false;
449 
450  CmpBB = Succ0;
451  Tail = Succ1;
452 
453  if (!CmpBB->isSuccessor(Tail))
454  return false;
455 
456  // The CFG topology checks out.
457  DEBUG(dbgs() << "\nTriangle: BB#" << Head->getNumber() << " -> BB#"
458  << CmpBB->getNumber() << " -> BB#" << Tail->getNumber() << '\n');
459  ++NumConsidered;
460 
461  // Tail is allowed to have many predecessors, but we can't handle PHIs yet.
462  //
463  // FIXME: Real PHIs could be if-converted as long as the CmpBB values are
464  // defined before The CmpBB cmp clobbers the flags. Alternatively, it should
465  // always be safe to sink the ccmp down to immediately before the CmpBB
466  // terminators.
467  if (!trivialTailPHIs()) {
468  DEBUG(dbgs() << "Can't handle phis in Tail.\n");
469  ++NumPhiRejs;
470  return false;
471  }
472 
473  if (!Tail->livein_empty()) {
474  DEBUG(dbgs() << "Can't handle live-in physregs in Tail.\n");
475  ++NumPhysRejs;
476  return false;
477  }
478 
479  // CmpBB should never have PHIs since Head is its only predecessor.
480  // FIXME: Clean them up if it happens.
481  if (!CmpBB->empty() && CmpBB->front().isPHI()) {
482  DEBUG(dbgs() << "Can't handle phis in CmpBB.\n");
483  ++NumPhi2Rejs;
484  return false;
485  }
486 
487  if (!CmpBB->livein_empty()) {
488  DEBUG(dbgs() << "Can't handle live-in physregs in CmpBB.\n");
489  ++NumPhysRejs;
490  return false;
491  }
492 
493  // The branch we're looking to eliminate must be analyzable.
494  HeadCond.clear();
495  MachineBasicBlock *TBB = nullptr, *FBB = nullptr;
496  if (TII->analyzeBranch(*Head, TBB, FBB, HeadCond)) {
497  DEBUG(dbgs() << "Head branch not analyzable.\n");
498  ++NumHeadBranchRejs;
499  return false;
500  }
501 
502  // This is weird, probably some sort of degenerate CFG, or an edge to a
503  // landing pad.
504  if (!TBB || HeadCond.empty()) {
505  DEBUG(dbgs() << "AnalyzeBranch didn't find conditional branch in Head.\n");
506  ++NumHeadBranchRejs;
507  return false;
508  }
509 
510  if (!parseCond(HeadCond, HeadCmpBBCC)) {
511  DEBUG(dbgs() << "Unsupported branch type on Head\n");
512  ++NumHeadBranchRejs;
513  return false;
514  }
515 
516  // Make sure the branch direction is right.
517  if (TBB != CmpBB) {
518  assert(TBB == Tail && "Unexpected TBB");
519  HeadCmpBBCC = AArch64CC::getInvertedCondCode(HeadCmpBBCC);
520  }
521 
522  CmpBBCond.clear();
523  TBB = FBB = nullptr;
524  if (TII->analyzeBranch(*CmpBB, TBB, FBB, CmpBBCond)) {
525  DEBUG(dbgs() << "CmpBB branch not analyzable.\n");
526  ++NumCmpBranchRejs;
527  return false;
528  }
529 
530  if (!TBB || CmpBBCond.empty()) {
531  DEBUG(dbgs() << "AnalyzeBranch didn't find conditional branch in CmpBB.\n");
532  ++NumCmpBranchRejs;
533  return false;
534  }
535 
536  if (!parseCond(CmpBBCond, CmpBBTailCC)) {
537  DEBUG(dbgs() << "Unsupported branch type on CmpBB\n");
538  ++NumCmpBranchRejs;
539  return false;
540  }
541 
542  if (TBB != Tail)
543  CmpBBTailCC = AArch64CC::getInvertedCondCode(CmpBBTailCC);
544 
545  DEBUG(dbgs() << "Head->CmpBB on " << AArch64CC::getCondCodeName(HeadCmpBBCC)
546  << ", CmpBB->Tail on " << AArch64CC::getCondCodeName(CmpBBTailCC)
547  << '\n');
548 
549  CmpMI = findConvertibleCompare(CmpBB);
550  if (!CmpMI)
551  return false;
552 
553  if (!canSpeculateInstrs(CmpBB, CmpMI)) {
554  ++NumSpeculateRejs;
555  return false;
556  }
557  return true;
558 }
559 
560 void SSACCmpConv::convert(SmallVectorImpl<MachineBasicBlock *> &RemovedBlocks) {
561  DEBUG(dbgs() << "Merging BB#" << CmpBB->getNumber() << " into BB#"
562  << Head->getNumber() << ":\n" << *CmpBB);
563 
564  // All CmpBB instructions are moved into Head, and CmpBB is deleted.
565  // Update the CFG first.
566  updateTailPHIs();
567  Head->removeSuccessor(CmpBB, true);
568  CmpBB->removeSuccessor(Tail, true);
569  Head->transferSuccessorsAndUpdatePHIs(CmpBB);
570  DebugLoc TermDL = Head->getFirstTerminator()->getDebugLoc();
571  TII->removeBranch(*Head);
572 
573  // If the Head terminator was one of the cbz / tbz branches with built-in
574  // compare, we need to insert an explicit compare instruction in its place.
575  if (HeadCond[0].getImm() == -1) {
576  ++NumCompBranches;
577  unsigned Opc = 0;
578  switch (HeadCond[1].getImm()) {
579  case AArch64::CBZW:
580  case AArch64::CBNZW:
581  Opc = AArch64::SUBSWri;
582  break;
583  case AArch64::CBZX:
584  case AArch64::CBNZX:
585  Opc = AArch64::SUBSXri;
586  break;
587  default:
588  llvm_unreachable("Cannot convert Head branch");
589  }
590  const MCInstrDesc &MCID = TII->get(Opc);
591  // Create a dummy virtual register for the SUBS def.
592  unsigned DestReg =
593  MRI->createVirtualRegister(TII->getRegClass(MCID, 0, TRI, *MF));
594  // Insert a SUBS Rn, #0 instruction instead of the cbz / cbnz.
595  BuildMI(*Head, Head->end(), TermDL, MCID)
596  .addReg(DestReg, RegState::Define | RegState::Dead)
597  .addOperand(HeadCond[2])
598  .addImm(0)
599  .addImm(0);
600  // SUBS uses the GPR*sp register classes.
601  MRI->constrainRegClass(HeadCond[2].getReg(),
602  TII->getRegClass(MCID, 1, TRI, *MF));
603  }
604 
605  Head->splice(Head->end(), CmpBB, CmpBB->begin(), CmpBB->end());
606 
607  // Now replace CmpMI with a ccmp instruction that also considers the incoming
608  // flags.
609  unsigned Opc = 0;
610  unsigned FirstOp = 1; // First CmpMI operand to copy.
611  bool isZBranch = false; // CmpMI is a cbz/cbnz instruction.
612  switch (CmpMI->getOpcode()) {
613  default:
614  llvm_unreachable("Unknown compare opcode");
615  case AArch64::SUBSWri: Opc = AArch64::CCMPWi; break;
616  case AArch64::SUBSWrr: Opc = AArch64::CCMPWr; break;
617  case AArch64::SUBSXri: Opc = AArch64::CCMPXi; break;
618  case AArch64::SUBSXrr: Opc = AArch64::CCMPXr; break;
619  case AArch64::ADDSWri: Opc = AArch64::CCMNWi; break;
620  case AArch64::ADDSWrr: Opc = AArch64::CCMNWr; break;
621  case AArch64::ADDSXri: Opc = AArch64::CCMNXi; break;
622  case AArch64::ADDSXrr: Opc = AArch64::CCMNXr; break;
623  case AArch64::FCMPSrr: Opc = AArch64::FCCMPSrr; FirstOp = 0; break;
624  case AArch64::FCMPDrr: Opc = AArch64::FCCMPDrr; FirstOp = 0; break;
625  case AArch64::FCMPESrr: Opc = AArch64::FCCMPESrr; FirstOp = 0; break;
626  case AArch64::FCMPEDrr: Opc = AArch64::FCCMPEDrr; FirstOp = 0; break;
627  case AArch64::CBZW:
628  case AArch64::CBNZW:
629  Opc = AArch64::CCMPWi;
630  FirstOp = 0;
631  isZBranch = true;
632  break;
633  case AArch64::CBZX:
634  case AArch64::CBNZX:
635  Opc = AArch64::CCMPXi;
636  FirstOp = 0;
637  isZBranch = true;
638  break;
639  }
640 
641  // The ccmp instruction should set the flags according to the comparison when
642  // Head would have branched to CmpBB.
643  // The NZCV immediate operand should provide flags for the case where Head
644  // would have branched to Tail. These flags should cause the new Head
645  // terminator to branch to tail.
646  unsigned NZCV = AArch64CC::getNZCVToSatisfyCondCode(CmpBBTailCC);
647  const MCInstrDesc &MCID = TII->get(Opc);
648  MRI->constrainRegClass(CmpMI->getOperand(FirstOp).getReg(),
649  TII->getRegClass(MCID, 0, TRI, *MF));
650  if (CmpMI->getOperand(FirstOp + 1).isReg())
651  MRI->constrainRegClass(CmpMI->getOperand(FirstOp + 1).getReg(),
652  TII->getRegClass(MCID, 1, TRI, *MF));
653  MachineInstrBuilder MIB =
654  BuildMI(*Head, CmpMI, CmpMI->getDebugLoc(), MCID)
655  .addOperand(CmpMI->getOperand(FirstOp)); // Register Rn
656  if (isZBranch)
657  MIB.addImm(0); // cbz/cbnz Rn -> ccmp Rn, #0
658  else
659  MIB.addOperand(CmpMI->getOperand(FirstOp + 1)); // Register Rm / Immediate
660  MIB.addImm(NZCV).addImm(HeadCmpBBCC);
661 
662  // If CmpMI was a terminator, we need a new conditional branch to replace it.
663  // This now becomes a Head terminator.
664  if (isZBranch) {
665  bool isNZ = CmpMI->getOpcode() == AArch64::CBNZW ||
666  CmpMI->getOpcode() == AArch64::CBNZX;
667  BuildMI(*Head, CmpMI, CmpMI->getDebugLoc(), TII->get(AArch64::Bcc))
668  .addImm(isNZ ? AArch64CC::NE : AArch64CC::EQ)
669  .addOperand(CmpMI->getOperand(1)); // Branch target.
670  }
671  CmpMI->eraseFromParent();
672  Head->updateTerminator();
673 
674  RemovedBlocks.push_back(CmpBB);
675  CmpBB->eraseFromParent();
676  DEBUG(dbgs() << "Result:\n" << *Head);
677  ++NumConverted;
678 }
679 
680 int SSACCmpConv::expectedCodeSizeDelta() const {
681  int delta = 0;
682  // If the Head terminator was one of the cbz / tbz branches with built-in
683  // compare, we need to insert an explicit compare instruction in its place
684  // plus a branch instruction.
685  if (HeadCond[0].getImm() == -1) {
686  switch (HeadCond[1].getImm()) {
687  case AArch64::CBZW:
688  case AArch64::CBNZW:
689  case AArch64::CBZX:
690  case AArch64::CBNZX:
691  // Therefore delta += 1
692  delta = 1;
693  break;
694  default:
695  llvm_unreachable("Cannot convert Head branch");
696  }
697  }
698  // If the Cmp terminator was one of the cbz / tbz branches with
699  // built-in compare, it will be turned into a compare instruction
700  // into Head, but we do not save any instruction.
701  // Otherwise, we save the branch instruction.
702  switch (CmpMI->getOpcode()) {
703  default:
704  --delta;
705  break;
706  case AArch64::CBZW:
707  case AArch64::CBNZW:
708  case AArch64::CBZX:
709  case AArch64::CBNZX:
710  break;
711  }
712  return delta;
713 }
714 
715 //===----------------------------------------------------------------------===//
716 // AArch64ConditionalCompares Pass
717 //===----------------------------------------------------------------------===//
718 
719 namespace {
720 class AArch64ConditionalCompares : public MachineFunctionPass {
721  const TargetInstrInfo *TII;
722  const TargetRegisterInfo *TRI;
723  MCSchedModel SchedModel;
724  // Does the proceeded function has Oz attribute.
725  bool MinSize;
727  MachineDominatorTree *DomTree;
729  MachineTraceMetrics *Traces;
731  SSACCmpConv CmpConv;
732 
733 public:
734  static char ID;
735  AArch64ConditionalCompares() : MachineFunctionPass(ID) {
737  }
738  void getAnalysisUsage(AnalysisUsage &AU) const override;
739  bool runOnMachineFunction(MachineFunction &MF) override;
740  StringRef getPassName() const override {
741  return "AArch64 Conditional Compares";
742  }
743 
744 private:
745  bool tryConvert(MachineBasicBlock *);
746  void updateDomTree(ArrayRef<MachineBasicBlock *> Removed);
747  void updateLoops(ArrayRef<MachineBasicBlock *> Removed);
748  void invalidateTraces();
749  bool shouldConvert();
750 };
751 } // end anonymous namespace
752 
754 
755 INITIALIZE_PASS_BEGIN(AArch64ConditionalCompares, "aarch64-ccmp",
756  "AArch64 CCMP Pass", false, false)
759 INITIALIZE_PASS_END(AArch64ConditionalCompares, "aarch64-ccmp",
760  "AArch64 CCMP Pass", false, false)
761 
763  return new AArch64ConditionalCompares();
764 }
765 
766 void AArch64ConditionalCompares::getAnalysisUsage(AnalysisUsage &AU) const {
774 }
775 
776 /// Update the dominator tree after if-conversion erased some blocks.
777 void AArch64ConditionalCompares::updateDomTree(
779  // convert() removes CmpBB which was previously dominated by Head.
780  // CmpBB children should be transferred to Head.
781  MachineDomTreeNode *HeadNode = DomTree->getNode(CmpConv.Head);
782  for (MachineBasicBlock *RemovedMBB : Removed) {
783  MachineDomTreeNode *Node = DomTree->getNode(RemovedMBB);
784  assert(Node != HeadNode && "Cannot erase the head node");
785  assert(Node->getIDom() == HeadNode && "CmpBB should be dominated by Head");
786  while (Node->getNumChildren())
787  DomTree->changeImmediateDominator(Node->getChildren().back(), HeadNode);
788  DomTree->eraseNode(RemovedMBB);
789  }
790 }
791 
792 /// Update LoopInfo after if-conversion.
793 void
794 AArch64ConditionalCompares::updateLoops(ArrayRef<MachineBasicBlock *> Removed) {
795  if (!Loops)
796  return;
797  for (MachineBasicBlock *RemovedMBB : Removed)
798  Loops->removeBlock(RemovedMBB);
799 }
800 
801 /// Invalidate MachineTraceMetrics before if-conversion.
802 void AArch64ConditionalCompares::invalidateTraces() {
803  Traces->invalidate(CmpConv.Head);
804  Traces->invalidate(CmpConv.CmpBB);
805 }
806 
807 /// Apply cost model and heuristics to the if-conversion in IfConv.
808 /// Return true if the conversion is a good idea.
809 ///
811  // Stress testing mode disables all cost considerations.
812  if (Stress)
813  return true;
814  if (!MinInstr)
815  MinInstr = Traces->getEnsemble(MachineTraceMetrics::TS_MinInstrCount);
816 
817  // Head dominates CmpBB, so it is always included in its trace.
818  MachineTraceMetrics::Trace Trace = MinInstr->getTrace(CmpConv.CmpBB);
819 
820  // If code size is the main concern
821  if (MinSize) {
822  int CodeSizeDelta = CmpConv.expectedCodeSizeDelta();
823  DEBUG(dbgs() << "Code size delta: " << CodeSizeDelta << '\n');
824  // If we are minimizing the code size, do the conversion whatever
825  // the cost is.
826  if (CodeSizeDelta < 0)
827  return true;
828  if (CodeSizeDelta > 0) {
829  DEBUG(dbgs() << "Code size is increasing, give up on this one.\n");
830  return false;
831  }
832  // CodeSizeDelta == 0, continue with the regular heuristics
833  }
834 
835  // Heuristic: The compare conversion delays the execution of the branch
836  // instruction because we must wait for the inputs to the second compare as
837  // well. The branch has no dependent instructions, but delaying it increases
838  // the cost of a misprediction.
839  //
840  // Set a limit on the delay we will accept.
841  unsigned DelayLimit = SchedModel.MispredictPenalty * 3 / 4;
842 
843  // Instruction depths can be computed for all trace instructions above CmpBB.
844  unsigned HeadDepth =
845  Trace.getInstrCycles(*CmpConv.Head->getFirstTerminator()).Depth;
846  unsigned CmpBBDepth =
847  Trace.getInstrCycles(*CmpConv.CmpBB->getFirstTerminator()).Depth;
848  DEBUG(dbgs() << "Head depth: " << HeadDepth
849  << "\nCmpBB depth: " << CmpBBDepth << '\n');
850  if (CmpBBDepth > HeadDepth + DelayLimit) {
851  DEBUG(dbgs() << "Branch delay would be larger than " << DelayLimit
852  << " cycles.\n");
853  return false;
854  }
855 
856  // Check the resource depth at the bottom of CmpBB - these instructions will
857  // be speculated.
858  unsigned ResDepth = Trace.getResourceDepth(true);
859  DEBUG(dbgs() << "Resources: " << ResDepth << '\n');
860 
861  // Heuristic: The speculatively executed instructions must all be able to
862  // merge into the Head block. The Head critical path should dominate the
863  // resource cost of the speculated instructions.
864  if (ResDepth > HeadDepth) {
865  DEBUG(dbgs() << "Too many instructions to speculate.\n");
866  return false;
867  }
868  return true;
869 }
870 
871 bool AArch64ConditionalCompares::tryConvert(MachineBasicBlock *MBB) {
872  bool Changed = false;
873  while (CmpConv.canConvert(MBB) && shouldConvert()) {
874  invalidateTraces();
876  CmpConv.convert(RemovedBlocks);
877  Changed = true;
878  updateDomTree(RemovedBlocks);
879  updateLoops(RemovedBlocks);
880  }
881  return Changed;
882 }
883 
884 bool AArch64ConditionalCompares::runOnMachineFunction(MachineFunction &MF) {
885  DEBUG(dbgs() << "********** AArch64 Conditional Compares **********\n"
886  << "********** Function: " << MF.getName() << '\n');
887  if (skipFunction(*MF.getFunction()))
888  return false;
889 
890  TII = MF.getSubtarget().getInstrInfo();
891  TRI = MF.getSubtarget().getRegisterInfo();
892  SchedModel = MF.getSubtarget().getSchedModel();
893  MRI = &MF.getRegInfo();
894  DomTree = &getAnalysis<MachineDominatorTree>();
895  Loops = getAnalysisIfAvailable<MachineLoopInfo>();
896  Traces = &getAnalysis<MachineTraceMetrics>();
897  MinInstr = nullptr;
898  MinSize = MF.getFunction()->optForMinSize();
899 
900  bool Changed = false;
901  CmpConv.runOnMachineFunction(MF);
902 
903  // Visit blocks in dominator tree pre-order. The pre-order enables multiple
904  // cmp-conversions from the same head block.
905  // Note that updateDomTree() modifies the children of the DomTree node
906  // currently being visited. The df_iterator supports that; it doesn't look at
907  // child_begin() / child_end() until after a node has been visited.
908  for (auto *I : depth_first(DomTree))
909  if (tryConvert(I->getBlock()))
910  Changed = true;
911 
912  return Changed;
913 }
Pass interface - Implemented by all 'passes'.
Definition: Pass.h:81
unsigned succ_size() const
AnalysisUsage & addPreserved()
Add the specified Pass class to the set of analyses preserved by this pass.
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
const std::vector< DomTreeNodeBase< NodeT > * > & getChildren() const
STATISTIC(NumFunctions,"Total number of functions")
int getNumber() const
MachineBasicBlocks are uniquely numbered at the function level, unless they're not in a MachineFuncti...
aarch64 AArch64 CCMP false
static CondCode getInvertedCondCode(CondCode Code)
iterator getFirstTerminator()
Returns an iterator to the first terminator instruction of this basic block.
Describe properties that are true of each instruction in the target description file.
Definition: MCInstrDesc.h:163
static bool isVirtualRegister(unsigned Reg)
Return true if the specified register number is in the virtual register namespace.
INITIALIZE_PASS_BEGIN(AArch64ConditionalCompares,"aarch64-ccmp","AArch64 CCMP Pass", false, false) INITIALIZE_PASS_END(AArch64ConditionalCompares
A debug info location.
Definition: DebugLoc.h:34
A trace ensemble is a collection of traces selected using the same strategy, for example 'minimum res...
const Function * getFunction() const
getFunction - Return the LLVM function that this machine code represents
FunctionPass * createAArch64ConditionalCompares()
static unsigned InstrCount
bool optForMinSize() const
Optimize this function for minimum size (-Oz).
Definition: Function.h:461
static MCDisassembler::DecodeStatus addOperand(MCInst &Inst, const MCOperand &Opnd)
static cl::opt< unsigned > BlockInstrLimit("aarch64-ccmp-limit", cl::init(30), cl::Hidden, cl::desc("Maximum number of instructions per speculated block."))
DomTreeNodeBase< NodeT > * getIDom() const
InstrCycles getInstrCycles(const MachineInstr &MI) const
Return the depth and height of MI.
bool analyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB, MachineBasicBlock *&FBB, SmallVectorImpl< MachineOperand > &Cond, bool AllowModify) const override
Analyze the branching code at the end of MBB, returning true if it cannot be understood (e...
AnalysisUsage & addRequired()
static cl::opt< bool > Stress("aarch64-stress-ccmp", cl::Hidden, cl::desc("Turn all knobs to 11"))
#define INITIALIZE_PASS_DEPENDENCY(depName)
Definition: PassSupport.h:53
Hexagon Hardware Loops
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
const HexagonInstrInfo * TII
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: APFloat.h:32
bool isReg() const
isReg - Tests if this is a MO_Register operand.
void eraseFromParent()
Unlink 'this' from the containing basic block and delete it.
Reg
All possible values of the reg field in the ModR/M byte.
static bool shouldConvert(Constant &C, AArch64PromoteConstant::PromotionCacheTy &PromotionCache)
const MCSchedModel & getSchedModel() const
Get the machine model for this subtarget's CPU.
static const char * getCondCodeName(CondCode Code)
PhysRegInfo analyzePhysReg(unsigned Reg, const TargetRegisterInfo *TRI)
analyzePhysReg - Analyze how the current instruction or bundle uses a physical register.
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
Select the trace through a block that has the fewest instructions.
MachineBasicBlock * MBB
Base class for the actual dominator tree node.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory)...
Definition: APInt.h:33
static GCRegistry::Add< OcamlGC > B("ocaml","ocaml 3.10-compatible GC")
size_t size() const
size - Get the array size.
Definition: ArrayRef.h:141
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
Definition: MachineInstr.h:273
TargetInstrInfo - Interface to description of machine instruction set.
MachineInstrBuilder BuildMI(MachineFunction &MF, const DebugLoc &DL, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:395
unsigned const MachineRegisterInfo * MRI
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
const MachineOperand & getOperand(unsigned i) const
Definition: MachineInstr.h:279
Represent the analysis usage information of a pass.
INITIALIZE_PASS_END(RegBankSelect, DEBUG_TYPE,"Assign register bank of generic virtual registers", false, false) RegBankSelect
FunctionPass class - This class is used to implement most global optimizations.
Definition: Pass.h:298
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
A trace represents a plausible sequence of executed basic blocks that passes through the current basi...
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small...
Definition: SmallVector.h:843
static bool parseCond(ArrayRef< MachineOperand > Cond, AArch64CC::CondCode &CC)
MIOperands - Iterate over operands of a single instruction.
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:132
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:586
static unsigned getNZCVToSatisfyCondCode(CondCode Code)
Given a condition code, return NZCV flags that would satisfy that condition.
static unsigned getReg(const void *D, unsigned RC, unsigned RegNo)
unsigned removeBranch(MachineBasicBlock &MBB, int *BytesRemoved=nullptr) const override
Remove the branching code at the end of the specific MBB.
const DebugLoc & getDebugLoc() const
Returns the debug location id of this MachineInstr.
Definition: MachineInstr.h:250
MachineRegisterInfo - Keep track of information for virtual and physical registers, including vreg register classes, use/def chains for registers, etc.
Representation of each machine instruction.
Definition: MachineInstr.h:52
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
#define I(x, y, z)
Definition: MD5.cpp:54
void initializeAArch64ConditionalComparesPass(PassRegistry &)
iterator_range< df_iterator< T > > depth_first(const T &G)
unsigned getReg() const
getReg - Returns the register number.
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
virtual const TargetInstrInfo * getInstrInfo() const
#define LLVM_FALLTHROUGH
LLVM_FALLTHROUGH - Mark fallthrough cases in switch statements.
Definition: Compiler.h:239
unsigned getResourceDepth(bool Bottom) const
Return the resource depth of the top/bottom of the trace center block.
const MachineInstrBuilder & addOperand(const MachineOperand &MO) const
#define DEBUG(X)
Definition: Debug.h:100
virtual const TargetRegisterInfo * getRegisterInfo() const
getRegisterInfo - If register information is available, return it.
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:47
StringRef getName() const
getName - Return the name of the corresponding LLVM function.
Machine model for scheduling, bundling, and heuristics.
Definition: MCSchedule.h:136
DominatorTree Class - Concrete subclass of DominatorTreeBase that is used to compute a normal dominat...
unsigned pred_size() const
size_t getNumChildren() const