LLVM  6.0.0svn
AArch64ConditionalCompares.cpp
Go to the documentation of this file.
1 //===-- AArch64ConditionalCompares.cpp --- CCMP formation for AArch64 -----===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This file implements the AArch64ConditionalCompares pass which reduces
11 // branching and code size by using the conditional compare instructions CCMP,
12 // CCMN, and FCMP.
13 //
14 // The CFG transformations for forming conditional compares are very similar to
15 // if-conversion, and this pass should run immediately before the early
16 // if-conversion pass.
17 //
18 //===----------------------------------------------------------------------===//
19 
20 #include "AArch64.h"
22 #include "llvm/ADT/SetVector.h"
23 #include "llvm/ADT/SmallPtrSet.h"
24 #include "llvm/ADT/Statistic.h"
33 #include "llvm/CodeGen/Passes.h"
38 #include "llvm/Support/Debug.h"
40 
41 using namespace llvm;
42 
43 #define DEBUG_TYPE "aarch64-ccmp"
44 
45 // Absolute maximum number of instructions allowed per speculated block.
46 // This bypasses all other heuristics, so it should be set fairly high.
48  "aarch64-ccmp-limit", cl::init(30), cl::Hidden,
49  cl::desc("Maximum number of instructions per speculated block."));
50 
51 // Stress testing mode - disable heuristics.
52 static cl::opt<bool> Stress("aarch64-stress-ccmp", cl::Hidden,
53  cl::desc("Turn all knobs to 11"));
54 
55 STATISTIC(NumConsidered, "Number of ccmps considered");
56 STATISTIC(NumPhiRejs, "Number of ccmps rejected (PHI)");
57 STATISTIC(NumPhysRejs, "Number of ccmps rejected (Physregs)");
58 STATISTIC(NumPhi2Rejs, "Number of ccmps rejected (PHI2)");
59 STATISTIC(NumHeadBranchRejs, "Number of ccmps rejected (Head branch)");
60 STATISTIC(NumCmpBranchRejs, "Number of ccmps rejected (CmpBB branch)");
61 STATISTIC(NumCmpTermRejs, "Number of ccmps rejected (CmpBB is cbz...)");
62 STATISTIC(NumImmRangeRejs, "Number of ccmps rejected (Imm out of range)");
63 STATISTIC(NumLiveDstRejs, "Number of ccmps rejected (Cmp dest live)");
64 STATISTIC(NumMultNZCVUses, "Number of ccmps rejected (NZCV used)");
65 STATISTIC(NumUnknNZCVDefs, "Number of ccmps rejected (NZCV def unknown)");
66 
67 STATISTIC(NumSpeculateRejs, "Number of ccmps rejected (Can't speculate)");
68 
69 STATISTIC(NumConverted, "Number of ccmp instructions created");
70 STATISTIC(NumCompBranches, "Number of cbz/cbnz branches converted");
71 
72 //===----------------------------------------------------------------------===//
73 // SSACCmpConv
74 //===----------------------------------------------------------------------===//
75 //
76 // The SSACCmpConv class performs ccmp-conversion on SSA form machine code
77 // after determining if it is possible. The class contains no heuristics;
78 // external code should be used to determine when ccmp-conversion is a good
79 // idea.
80 //
81 // CCmp-formation works on a CFG representing chained conditions, typically
82 // from C's short-circuit || and && operators:
83 //
84 // From: Head To: Head
85 // / | CmpBB
86 // / | / |
87 // | CmpBB / |
88 // | / | Tail |
89 // | / | | |
90 // Tail | | |
91 // | | | |
92 // ... ... ... ...
93 //
94 // The Head block is terminated by a br.cond instruction, and the CmpBB block
95 // contains compare + br.cond. Tail must be a successor of both.
96 //
97 // The cmp-conversion turns the compare instruction in CmpBB into a conditional
98 // compare, and merges CmpBB into Head, speculatively executing its
99 // instructions. The AArch64 conditional compare instructions have an immediate
100 // operand that specifies the NZCV flag values when the condition is false and
101 // the compare isn't executed. This makes it possible to chain compares with
102 // different condition codes.
103 //
104 // Example:
105 //
106 // if (a == 5 || b == 17)
107 // foo();
108 //
109 // Head:
110 // cmp w0, #5
111 // b.eq Tail
112 // CmpBB:
113 // cmp w1, #17
114 // b.eq Tail
115 // ...
116 // Tail:
117 // bl _foo
118 //
119 // Becomes:
120 //
121 // Head:
122 // cmp w0, #5
123 // ccmp w1, #17, 4, ne ; 4 = nZcv
124 // b.eq Tail
125 // ...
126 // Tail:
127 // bl _foo
128 //
129 // The ccmp condition code is the one that would cause the Head terminator to
130 // branch to CmpBB.
131 //
132 // FIXME: It should also be possible to speculate a block on the critical edge
133 // between Head and Tail, just like if-converting a diamond.
134 //
135 // FIXME: Handle PHIs in Tail by turning them into selects (if-conversion).
136 
137 namespace {
138 class SSACCmpConv {
139  MachineFunction *MF;
140  const TargetInstrInfo *TII;
141  const TargetRegisterInfo *TRI;
143  const MachineBranchProbabilityInfo *MBPI;
144 
145 public:
146  /// The first block containing a conditional branch, dominating everything
147  /// else.
148  MachineBasicBlock *Head;
149 
150  /// The block containing cmp+br.cond with a successor shared with Head.
151  MachineBasicBlock *CmpBB;
152 
153  /// The common successor for Head and CmpBB.
154  MachineBasicBlock *Tail;
155 
156  /// The compare instruction in CmpBB that can be converted to a ccmp.
157  MachineInstr *CmpMI;
158 
159 private:
160  /// The branch condition in Head as determined by AnalyzeBranch.
162 
163  /// The condition code that makes Head branch to CmpBB.
164  AArch64CC::CondCode HeadCmpBBCC;
165 
166  /// The branch condition in CmpBB.
168 
169  /// The condition code that makes CmpBB branch to Tail.
170  AArch64CC::CondCode CmpBBTailCC;
171 
172  /// Check if the Tail PHIs are trivially convertible.
173  bool trivialTailPHIs();
174 
175  /// Remove CmpBB from the Tail PHIs.
176  void updateTailPHIs();
177 
178  /// Check if an operand defining DstReg is dead.
179  bool isDeadDef(unsigned DstReg);
180 
181  /// Find the compare instruction in MBB that controls the conditional branch.
182  /// Return NULL if a convertible instruction can't be found.
183  MachineInstr *findConvertibleCompare(MachineBasicBlock *MBB);
184 
185  /// Return true if all non-terminator instructions in MBB can be safely
186  /// speculated.
187  bool canSpeculateInstrs(MachineBasicBlock *MBB, const MachineInstr *CmpMI);
188 
189 public:
190  /// runOnMachineFunction - Initialize per-function data structures.
191  void runOnMachineFunction(MachineFunction &MF,
192  const MachineBranchProbabilityInfo *MBPI) {
193  this->MF = &MF;
194  this->MBPI = MBPI;
195  TII = MF.getSubtarget().getInstrInfo();
196  TRI = MF.getSubtarget().getRegisterInfo();
197  MRI = &MF.getRegInfo();
198  }
199 
200  /// If the sub-CFG headed by MBB can be cmp-converted, initialize the
201  /// internal state, and return true.
202  bool canConvert(MachineBasicBlock *MBB);
203 
204  /// Cmo-convert the last block passed to canConvertCmp(), assuming
205  /// it is possible. Add any erased blocks to RemovedBlocks.
206  void convert(SmallVectorImpl<MachineBasicBlock *> &RemovedBlocks);
207 
208  /// Return the expected code size delta if the conversion into a
209  /// conditional compare is performed.
210  int expectedCodeSizeDelta() const;
211 };
212 } // end anonymous namespace
213 
214 // Check that all PHIs in Tail are selecting the same value from Head and CmpBB.
215 // This means that no if-conversion is required when merging CmpBB into Head.
216 bool SSACCmpConv::trivialTailPHIs() {
217  for (auto &I : *Tail) {
218  if (!I.isPHI())
219  break;
220  unsigned HeadReg = 0, CmpBBReg = 0;
221  // PHI operands come in (VReg, MBB) pairs.
222  for (unsigned oi = 1, oe = I.getNumOperands(); oi != oe; oi += 2) {
223  MachineBasicBlock *MBB = I.getOperand(oi + 1).getMBB();
224  unsigned Reg = I.getOperand(oi).getReg();
225  if (MBB == Head) {
226  assert((!HeadReg || HeadReg == Reg) && "Inconsistent PHI operands");
227  HeadReg = Reg;
228  }
229  if (MBB == CmpBB) {
230  assert((!CmpBBReg || CmpBBReg == Reg) && "Inconsistent PHI operands");
231  CmpBBReg = Reg;
232  }
233  }
234  if (HeadReg != CmpBBReg)
235  return false;
236  }
237  return true;
238 }
239 
240 // Assuming that trivialTailPHIs() is true, update the Tail PHIs by simply
241 // removing the CmpBB operands. The Head operands will be identical.
242 void SSACCmpConv::updateTailPHIs() {
243  for (auto &I : *Tail) {
244  if (!I.isPHI())
245  break;
246  // I is a PHI. It can have multiple entries for CmpBB.
247  for (unsigned oi = I.getNumOperands(); oi > 2; oi -= 2) {
248  // PHI operands are (Reg, MBB) at (oi-2, oi-1).
249  if (I.getOperand(oi - 1).getMBB() == CmpBB) {
250  I.RemoveOperand(oi - 1);
251  I.RemoveOperand(oi - 2);
252  }
253  }
254  }
255 }
256 
257 // This pass runs before the AArch64DeadRegisterDefinitions pass, so compares
258 // are still writing virtual registers without any uses.
259 bool SSACCmpConv::isDeadDef(unsigned DstReg) {
260  // Writes to the zero register are dead.
261  if (DstReg == AArch64::WZR || DstReg == AArch64::XZR)
262  return true;
264  return false;
265  // A virtual register def without any uses will be marked dead later, and
266  // eventually replaced by the zero register.
267  return MRI->use_nodbg_empty(DstReg);
268 }
269 
270 // Parse a condition code returned by AnalyzeBranch, and compute the CondCode
271 // corresponding to TBB.
272 // Return
274  // A normal br.cond simply has the condition code.
275  if (Cond[0].getImm() != -1) {
276  assert(Cond.size() == 1 && "Unknown Cond array format");
277  CC = (AArch64CC::CondCode)(int)Cond[0].getImm();
278  return true;
279  }
280  // For tbz and cbz instruction, the opcode is next.
281  switch (Cond[1].getImm()) {
282  default:
283  // This includes tbz / tbnz branches which can't be converted to
284  // ccmp + br.cond.
285  return false;
286  case AArch64::CBZW:
287  case AArch64::CBZX:
288  assert(Cond.size() == 3 && "Unknown Cond array format");
289  CC = AArch64CC::EQ;
290  return true;
291  case AArch64::CBNZW:
292  case AArch64::CBNZX:
293  assert(Cond.size() == 3 && "Unknown Cond array format");
294  CC = AArch64CC::NE;
295  return true;
296  }
297 }
298 
299 MachineInstr *SSACCmpConv::findConvertibleCompare(MachineBasicBlock *MBB) {
301  if (I == MBB->end())
302  return nullptr;
303  // The terminator must be controlled by the flags.
304  if (!I->readsRegister(AArch64::NZCV)) {
305  switch (I->getOpcode()) {
306  case AArch64::CBZW:
307  case AArch64::CBZX:
308  case AArch64::CBNZW:
309  case AArch64::CBNZX:
310  // These can be converted into a ccmp against #0.
311  return &*I;
312  }
313  ++NumCmpTermRejs;
314  DEBUG(dbgs() << "Flags not used by terminator: " << *I);
315  return nullptr;
316  }
317 
318  // Now find the instruction controlling the terminator.
319  for (MachineBasicBlock::iterator B = MBB->begin(); I != B;) {
320  --I;
321  assert(!I->isTerminator() && "Spurious terminator");
322  switch (I->getOpcode()) {
323  // cmp is an alias for subs with a dead destination register.
324  case AArch64::SUBSWri:
325  case AArch64::SUBSXri:
326  // cmn is an alias for adds with a dead destination register.
327  case AArch64::ADDSWri:
328  case AArch64::ADDSXri:
329  // Check that the immediate operand is within range, ccmp wants a uimm5.
330  // Rd = SUBSri Rn, imm, shift
331  if (I->getOperand(3).getImm() || !isUInt<5>(I->getOperand(2).getImm())) {
332  DEBUG(dbgs() << "Immediate out of range for ccmp: " << *I);
333  ++NumImmRangeRejs;
334  return nullptr;
335  }
337  case AArch64::SUBSWrr:
338  case AArch64::SUBSXrr:
339  case AArch64::ADDSWrr:
340  case AArch64::ADDSXrr:
341  if (isDeadDef(I->getOperand(0).getReg()))
342  return &*I;
343  DEBUG(dbgs() << "Can't convert compare with live destination: " << *I);
344  ++NumLiveDstRejs;
345  return nullptr;
346  case AArch64::FCMPSrr:
347  case AArch64::FCMPDrr:
348  case AArch64::FCMPESrr:
349  case AArch64::FCMPEDrr:
350  return &*I;
351  }
352 
353  // Check for flag reads and clobbers.
354  MIOperands::PhysRegInfo PRI =
355  MIOperands(*I).analyzePhysReg(AArch64::NZCV, TRI);
356 
357  if (PRI.Read) {
358  // The ccmp doesn't produce exactly the same flags as the original
359  // compare, so reject the transform if there are uses of the flags
360  // besides the terminators.
361  DEBUG(dbgs() << "Can't create ccmp with multiple uses: " << *I);
362  ++NumMultNZCVUses;
363  return nullptr;
364  }
365 
366  if (PRI.Defined || PRI.Clobbered) {
367  DEBUG(dbgs() << "Not convertible compare: " << *I);
368  ++NumUnknNZCVDefs;
369  return nullptr;
370  }
371  }
372  DEBUG(dbgs() << "Flags not defined in BB#" << MBB->getNumber() << '\n');
373  return nullptr;
374 }
375 
376 /// Determine if all the instructions in MBB can safely
377 /// be speculated. The terminators are not considered.
378 ///
379 /// Only CmpMI is allowed to clobber the flags.
380 ///
381 bool SSACCmpConv::canSpeculateInstrs(MachineBasicBlock *MBB,
382  const MachineInstr *CmpMI) {
383  // Reject any live-in physregs. It's probably NZCV/EFLAGS, and very hard to
384  // get right.
385  if (!MBB->livein_empty()) {
386  DEBUG(dbgs() << "BB#" << MBB->getNumber() << " has live-ins.\n");
387  return false;
388  }
389 
390  unsigned InstrCount = 0;
391 
392  // Check all instructions, except the terminators. It is assumed that
393  // terminators never have side effects or define any used register values.
394  for (auto &I : make_range(MBB->begin(), MBB->getFirstTerminator())) {
395  if (I.isDebugValue())
396  continue;
397 
398  if (++InstrCount > BlockInstrLimit && !Stress) {
399  DEBUG(dbgs() << "BB#" << MBB->getNumber() << " has more than "
400  << BlockInstrLimit << " instructions.\n");
401  return false;
402  }
403 
404  // There shouldn't normally be any phis in a single-predecessor block.
405  if (I.isPHI()) {
406  DEBUG(dbgs() << "Can't hoist: " << I);
407  return false;
408  }
409 
410  // Don't speculate loads. Note that it may be possible and desirable to
411  // speculate GOT or constant pool loads that are guaranteed not to trap,
412  // but we don't support that for now.
413  if (I.mayLoad()) {
414  DEBUG(dbgs() << "Won't speculate load: " << I);
415  return false;
416  }
417 
418  // We never speculate stores, so an AA pointer isn't necessary.
419  bool DontMoveAcrossStore = true;
420  if (!I.isSafeToMove(nullptr, DontMoveAcrossStore)) {
421  DEBUG(dbgs() << "Can't speculate: " << I);
422  return false;
423  }
424 
425  // Only CmpMI is allowed to clobber the flags.
426  if (&I != CmpMI && I.modifiesRegister(AArch64::NZCV, TRI)) {
427  DEBUG(dbgs() << "Clobbers flags: " << I);
428  return false;
429  }
430  }
431  return true;
432 }
433 
434 /// Analyze the sub-cfg rooted in MBB, and return true if it is a potential
435 /// candidate for cmp-conversion. Fill out the internal state.
436 ///
437 bool SSACCmpConv::canConvert(MachineBasicBlock *MBB) {
438  Head = MBB;
439  Tail = CmpBB = nullptr;
440 
441  if (Head->succ_size() != 2)
442  return false;
443  MachineBasicBlock *Succ0 = Head->succ_begin()[0];
444  MachineBasicBlock *Succ1 = Head->succ_begin()[1];
445 
446  // CmpBB can only have a single predecessor. Tail is allowed many.
447  if (Succ0->pred_size() != 1)
448  std::swap(Succ0, Succ1);
449 
450  // Succ0 is our candidate for CmpBB.
451  if (Succ0->pred_size() != 1 || Succ0->succ_size() != 2)
452  return false;
453 
454  CmpBB = Succ0;
455  Tail = Succ1;
456 
457  if (!CmpBB->isSuccessor(Tail))
458  return false;
459 
460  // The CFG topology checks out.
461  DEBUG(dbgs() << "\nTriangle: BB#" << Head->getNumber() << " -> BB#"
462  << CmpBB->getNumber() << " -> BB#" << Tail->getNumber() << '\n');
463  ++NumConsidered;
464 
465  // Tail is allowed to have many predecessors, but we can't handle PHIs yet.
466  //
467  // FIXME: Real PHIs could be if-converted as long as the CmpBB values are
468  // defined before The CmpBB cmp clobbers the flags. Alternatively, it should
469  // always be safe to sink the ccmp down to immediately before the CmpBB
470  // terminators.
471  if (!trivialTailPHIs()) {
472  DEBUG(dbgs() << "Can't handle phis in Tail.\n");
473  ++NumPhiRejs;
474  return false;
475  }
476 
477  if (!Tail->livein_empty()) {
478  DEBUG(dbgs() << "Can't handle live-in physregs in Tail.\n");
479  ++NumPhysRejs;
480  return false;
481  }
482 
483  // CmpBB should never have PHIs since Head is its only predecessor.
484  // FIXME: Clean them up if it happens.
485  if (!CmpBB->empty() && CmpBB->front().isPHI()) {
486  DEBUG(dbgs() << "Can't handle phis in CmpBB.\n");
487  ++NumPhi2Rejs;
488  return false;
489  }
490 
491  if (!CmpBB->livein_empty()) {
492  DEBUG(dbgs() << "Can't handle live-in physregs in CmpBB.\n");
493  ++NumPhysRejs;
494  return false;
495  }
496 
497  // The branch we're looking to eliminate must be analyzable.
498  HeadCond.clear();
499  MachineBasicBlock *TBB = nullptr, *FBB = nullptr;
500  if (TII->analyzeBranch(*Head, TBB, FBB, HeadCond)) {
501  DEBUG(dbgs() << "Head branch not analyzable.\n");
502  ++NumHeadBranchRejs;
503  return false;
504  }
505 
506  // This is weird, probably some sort of degenerate CFG, or an edge to a
507  // landing pad.
508  if (!TBB || HeadCond.empty()) {
509  DEBUG(dbgs() << "AnalyzeBranch didn't find conditional branch in Head.\n");
510  ++NumHeadBranchRejs;
511  return false;
512  }
513 
514  if (!parseCond(HeadCond, HeadCmpBBCC)) {
515  DEBUG(dbgs() << "Unsupported branch type on Head\n");
516  ++NumHeadBranchRejs;
517  return false;
518  }
519 
520  // Make sure the branch direction is right.
521  if (TBB != CmpBB) {
522  assert(TBB == Tail && "Unexpected TBB");
523  HeadCmpBBCC = AArch64CC::getInvertedCondCode(HeadCmpBBCC);
524  }
525 
526  CmpBBCond.clear();
527  TBB = FBB = nullptr;
528  if (TII->analyzeBranch(*CmpBB, TBB, FBB, CmpBBCond)) {
529  DEBUG(dbgs() << "CmpBB branch not analyzable.\n");
530  ++NumCmpBranchRejs;
531  return false;
532  }
533 
534  if (!TBB || CmpBBCond.empty()) {
535  DEBUG(dbgs() << "AnalyzeBranch didn't find conditional branch in CmpBB.\n");
536  ++NumCmpBranchRejs;
537  return false;
538  }
539 
540  if (!parseCond(CmpBBCond, CmpBBTailCC)) {
541  DEBUG(dbgs() << "Unsupported branch type on CmpBB\n");
542  ++NumCmpBranchRejs;
543  return false;
544  }
545 
546  if (TBB != Tail)
547  CmpBBTailCC = AArch64CC::getInvertedCondCode(CmpBBTailCC);
548 
549  DEBUG(dbgs() << "Head->CmpBB on " << AArch64CC::getCondCodeName(HeadCmpBBCC)
550  << ", CmpBB->Tail on " << AArch64CC::getCondCodeName(CmpBBTailCC)
551  << '\n');
552 
553  CmpMI = findConvertibleCompare(CmpBB);
554  if (!CmpMI)
555  return false;
556 
557  if (!canSpeculateInstrs(CmpBB, CmpMI)) {
558  ++NumSpeculateRejs;
559  return false;
560  }
561  return true;
562 }
563 
564 void SSACCmpConv::convert(SmallVectorImpl<MachineBasicBlock *> &RemovedBlocks) {
565  DEBUG(dbgs() << "Merging BB#" << CmpBB->getNumber() << " into BB#"
566  << Head->getNumber() << ":\n" << *CmpBB);
567 
568  // All CmpBB instructions are moved into Head, and CmpBB is deleted.
569  // Update the CFG first.
570  updateTailPHIs();
571 
572  // Save successor probabilties before removing CmpBB and Tail from their
573  // parents.
574  BranchProbability Head2CmpBB = MBPI->getEdgeProbability(Head, CmpBB);
575  BranchProbability CmpBB2Tail = MBPI->getEdgeProbability(CmpBB, Tail);
576 
577  Head->removeSuccessor(CmpBB);
578  CmpBB->removeSuccessor(Tail);
579 
580  // If Head and CmpBB had successor probabilties, udpate the probabilities to
581  // reflect the ccmp-conversion.
582  if (Head->hasSuccessorProbabilities() && CmpBB->hasSuccessorProbabilities()) {
583 
584  // Head is allowed two successors. We've removed CmpBB, so the remaining
585  // successor is Tail. We need to increase the successor probability for
586  // Tail to account for the CmpBB path we removed.
587  //
588  // Pr(Tail|Head) += Pr(CmpBB|Head) * Pr(Tail|CmpBB).
589  assert(*Head->succ_begin() == Tail && "Head successor is not Tail");
590  BranchProbability Head2Tail = MBPI->getEdgeProbability(Head, Tail);
591  Head->setSuccProbability(Head->succ_begin(),
592  Head2Tail + Head2CmpBB * CmpBB2Tail);
593 
594  // We will transfer successors of CmpBB to Head in a moment without
595  // normalizing the successor probabilities. Set the successor probabilites
596  // before doing so.
597  //
598  // Pr(I|Head) = Pr(CmpBB|Head) * Pr(I|CmpBB).
599  for (auto I = CmpBB->succ_begin(), E = CmpBB->succ_end(); I != E; ++I) {
600  BranchProbability CmpBB2I = MBPI->getEdgeProbability(CmpBB, *I);
601  CmpBB->setSuccProbability(I, Head2CmpBB * CmpBB2I);
602  }
603  }
604 
605  Head->transferSuccessorsAndUpdatePHIs(CmpBB);
606  DebugLoc TermDL = Head->getFirstTerminator()->getDebugLoc();
607  TII->removeBranch(*Head);
608 
609  // If the Head terminator was one of the cbz / tbz branches with built-in
610  // compare, we need to insert an explicit compare instruction in its place.
611  if (HeadCond[0].getImm() == -1) {
612  ++NumCompBranches;
613  unsigned Opc = 0;
614  switch (HeadCond[1].getImm()) {
615  case AArch64::CBZW:
616  case AArch64::CBNZW:
617  Opc = AArch64::SUBSWri;
618  break;
619  case AArch64::CBZX:
620  case AArch64::CBNZX:
621  Opc = AArch64::SUBSXri;
622  break;
623  default:
624  llvm_unreachable("Cannot convert Head branch");
625  }
626  const MCInstrDesc &MCID = TII->get(Opc);
627  // Create a dummy virtual register for the SUBS def.
628  unsigned DestReg =
629  MRI->createVirtualRegister(TII->getRegClass(MCID, 0, TRI, *MF));
630  // Insert a SUBS Rn, #0 instruction instead of the cbz / cbnz.
631  BuildMI(*Head, Head->end(), TermDL, MCID)
632  .addReg(DestReg, RegState::Define | RegState::Dead)
633  .add(HeadCond[2])
634  .addImm(0)
635  .addImm(0);
636  // SUBS uses the GPR*sp register classes.
637  MRI->constrainRegClass(HeadCond[2].getReg(),
638  TII->getRegClass(MCID, 1, TRI, *MF));
639  }
640 
641  Head->splice(Head->end(), CmpBB, CmpBB->begin(), CmpBB->end());
642 
643  // Now replace CmpMI with a ccmp instruction that also considers the incoming
644  // flags.
645  unsigned Opc = 0;
646  unsigned FirstOp = 1; // First CmpMI operand to copy.
647  bool isZBranch = false; // CmpMI is a cbz/cbnz instruction.
648  switch (CmpMI->getOpcode()) {
649  default:
650  llvm_unreachable("Unknown compare opcode");
651  case AArch64::SUBSWri: Opc = AArch64::CCMPWi; break;
652  case AArch64::SUBSWrr: Opc = AArch64::CCMPWr; break;
653  case AArch64::SUBSXri: Opc = AArch64::CCMPXi; break;
654  case AArch64::SUBSXrr: Opc = AArch64::CCMPXr; break;
655  case AArch64::ADDSWri: Opc = AArch64::CCMNWi; break;
656  case AArch64::ADDSWrr: Opc = AArch64::CCMNWr; break;
657  case AArch64::ADDSXri: Opc = AArch64::CCMNXi; break;
658  case AArch64::ADDSXrr: Opc = AArch64::CCMNXr; break;
659  case AArch64::FCMPSrr: Opc = AArch64::FCCMPSrr; FirstOp = 0; break;
660  case AArch64::FCMPDrr: Opc = AArch64::FCCMPDrr; FirstOp = 0; break;
661  case AArch64::FCMPESrr: Opc = AArch64::FCCMPESrr; FirstOp = 0; break;
662  case AArch64::FCMPEDrr: Opc = AArch64::FCCMPEDrr; FirstOp = 0; break;
663  case AArch64::CBZW:
664  case AArch64::CBNZW:
665  Opc = AArch64::CCMPWi;
666  FirstOp = 0;
667  isZBranch = true;
668  break;
669  case AArch64::CBZX:
670  case AArch64::CBNZX:
671  Opc = AArch64::CCMPXi;
672  FirstOp = 0;
673  isZBranch = true;
674  break;
675  }
676 
677  // The ccmp instruction should set the flags according to the comparison when
678  // Head would have branched to CmpBB.
679  // The NZCV immediate operand should provide flags for the case where Head
680  // would have branched to Tail. These flags should cause the new Head
681  // terminator to branch to tail.
682  unsigned NZCV = AArch64CC::getNZCVToSatisfyCondCode(CmpBBTailCC);
683  const MCInstrDesc &MCID = TII->get(Opc);
684  MRI->constrainRegClass(CmpMI->getOperand(FirstOp).getReg(),
685  TII->getRegClass(MCID, 0, TRI, *MF));
686  if (CmpMI->getOperand(FirstOp + 1).isReg())
687  MRI->constrainRegClass(CmpMI->getOperand(FirstOp + 1).getReg(),
688  TII->getRegClass(MCID, 1, TRI, *MF));
689  MachineInstrBuilder MIB = BuildMI(*Head, CmpMI, CmpMI->getDebugLoc(), MCID)
690  .add(CmpMI->getOperand(FirstOp)); // Register Rn
691  if (isZBranch)
692  MIB.addImm(0); // cbz/cbnz Rn -> ccmp Rn, #0
693  else
694  MIB.add(CmpMI->getOperand(FirstOp + 1)); // Register Rm / Immediate
695  MIB.addImm(NZCV).addImm(HeadCmpBBCC);
696 
697  // If CmpMI was a terminator, we need a new conditional branch to replace it.
698  // This now becomes a Head terminator.
699  if (isZBranch) {
700  bool isNZ = CmpMI->getOpcode() == AArch64::CBNZW ||
701  CmpMI->getOpcode() == AArch64::CBNZX;
702  BuildMI(*Head, CmpMI, CmpMI->getDebugLoc(), TII->get(AArch64::Bcc))
703  .addImm(isNZ ? AArch64CC::NE : AArch64CC::EQ)
704  .add(CmpMI->getOperand(1)); // Branch target.
705  }
706  CmpMI->eraseFromParent();
707  Head->updateTerminator();
708 
709  RemovedBlocks.push_back(CmpBB);
710  CmpBB->eraseFromParent();
711  DEBUG(dbgs() << "Result:\n" << *Head);
712  ++NumConverted;
713 }
714 
715 int SSACCmpConv::expectedCodeSizeDelta() const {
716  int delta = 0;
717  // If the Head terminator was one of the cbz / tbz branches with built-in
718  // compare, we need to insert an explicit compare instruction in its place
719  // plus a branch instruction.
720  if (HeadCond[0].getImm() == -1) {
721  switch (HeadCond[1].getImm()) {
722  case AArch64::CBZW:
723  case AArch64::CBNZW:
724  case AArch64::CBZX:
725  case AArch64::CBNZX:
726  // Therefore delta += 1
727  delta = 1;
728  break;
729  default:
730  llvm_unreachable("Cannot convert Head branch");
731  }
732  }
733  // If the Cmp terminator was one of the cbz / tbz branches with
734  // built-in compare, it will be turned into a compare instruction
735  // into Head, but we do not save any instruction.
736  // Otherwise, we save the branch instruction.
737  switch (CmpMI->getOpcode()) {
738  default:
739  --delta;
740  break;
741  case AArch64::CBZW:
742  case AArch64::CBNZW:
743  case AArch64::CBZX:
744  case AArch64::CBNZX:
745  break;
746  }
747  return delta;
748 }
749 
750 //===----------------------------------------------------------------------===//
751 // AArch64ConditionalCompares Pass
752 //===----------------------------------------------------------------------===//
753 
754 namespace {
755 class AArch64ConditionalCompares : public MachineFunctionPass {
756  const MachineBranchProbabilityInfo *MBPI;
757  const TargetInstrInfo *TII;
758  const TargetRegisterInfo *TRI;
759  MCSchedModel SchedModel;
760  // Does the proceeded function has Oz attribute.
761  bool MinSize;
763  MachineDominatorTree *DomTree;
765  MachineTraceMetrics *Traces;
767  SSACCmpConv CmpConv;
768 
769 public:
770  static char ID;
771  AArch64ConditionalCompares() : MachineFunctionPass(ID) {
773  }
774  void getAnalysisUsage(AnalysisUsage &AU) const override;
775  bool runOnMachineFunction(MachineFunction &MF) override;
776  StringRef getPassName() const override {
777  return "AArch64 Conditional Compares";
778  }
779 
780 private:
781  bool tryConvert(MachineBasicBlock *);
782  void updateDomTree(ArrayRef<MachineBasicBlock *> Removed);
783  void updateLoops(ArrayRef<MachineBasicBlock *> Removed);
784  void invalidateTraces();
785  bool shouldConvert();
786 };
787 } // end anonymous namespace
788 
790 
791 INITIALIZE_PASS_BEGIN(AArch64ConditionalCompares, "aarch64-ccmp",
792  "AArch64 CCMP Pass", false, false)
796 INITIALIZE_PASS_END(AArch64ConditionalCompares, "aarch64-ccmp",
797  "AArch64 CCMP Pass", false, false)
798 
800  return new AArch64ConditionalCompares();
801 }
802 
803 void AArch64ConditionalCompares::getAnalysisUsage(AnalysisUsage &AU) const {
812 }
813 
814 /// Update the dominator tree after if-conversion erased some blocks.
815 void AArch64ConditionalCompares::updateDomTree(
817  // convert() removes CmpBB which was previously dominated by Head.
818  // CmpBB children should be transferred to Head.
819  MachineDomTreeNode *HeadNode = DomTree->getNode(CmpConv.Head);
820  for (MachineBasicBlock *RemovedMBB : Removed) {
821  MachineDomTreeNode *Node = DomTree->getNode(RemovedMBB);
822  assert(Node != HeadNode && "Cannot erase the head node");
823  assert(Node->getIDom() == HeadNode && "CmpBB should be dominated by Head");
824  while (Node->getNumChildren())
825  DomTree->changeImmediateDominator(Node->getChildren().back(), HeadNode);
826  DomTree->eraseNode(RemovedMBB);
827  }
828 }
829 
830 /// Update LoopInfo after if-conversion.
831 void
832 AArch64ConditionalCompares::updateLoops(ArrayRef<MachineBasicBlock *> Removed) {
833  if (!Loops)
834  return;
835  for (MachineBasicBlock *RemovedMBB : Removed)
836  Loops->removeBlock(RemovedMBB);
837 }
838 
839 /// Invalidate MachineTraceMetrics before if-conversion.
840 void AArch64ConditionalCompares::invalidateTraces() {
841  Traces->invalidate(CmpConv.Head);
842  Traces->invalidate(CmpConv.CmpBB);
843 }
844 
845 /// Apply cost model and heuristics to the if-conversion in IfConv.
846 /// Return true if the conversion is a good idea.
847 ///
849  // Stress testing mode disables all cost considerations.
850  if (Stress)
851  return true;
852  if (!MinInstr)
853  MinInstr = Traces->getEnsemble(MachineTraceMetrics::TS_MinInstrCount);
854 
855  // Head dominates CmpBB, so it is always included in its trace.
856  MachineTraceMetrics::Trace Trace = MinInstr->getTrace(CmpConv.CmpBB);
857 
858  // If code size is the main concern
859  if (MinSize) {
860  int CodeSizeDelta = CmpConv.expectedCodeSizeDelta();
861  DEBUG(dbgs() << "Code size delta: " << CodeSizeDelta << '\n');
862  // If we are minimizing the code size, do the conversion whatever
863  // the cost is.
864  if (CodeSizeDelta < 0)
865  return true;
866  if (CodeSizeDelta > 0) {
867  DEBUG(dbgs() << "Code size is increasing, give up on this one.\n");
868  return false;
869  }
870  // CodeSizeDelta == 0, continue with the regular heuristics
871  }
872 
873  // Heuristic: The compare conversion delays the execution of the branch
874  // instruction because we must wait for the inputs to the second compare as
875  // well. The branch has no dependent instructions, but delaying it increases
876  // the cost of a misprediction.
877  //
878  // Set a limit on the delay we will accept.
879  unsigned DelayLimit = SchedModel.MispredictPenalty * 3 / 4;
880 
881  // Instruction depths can be computed for all trace instructions above CmpBB.
882  unsigned HeadDepth =
883  Trace.getInstrCycles(*CmpConv.Head->getFirstTerminator()).Depth;
884  unsigned CmpBBDepth =
885  Trace.getInstrCycles(*CmpConv.CmpBB->getFirstTerminator()).Depth;
886  DEBUG(dbgs() << "Head depth: " << HeadDepth
887  << "\nCmpBB depth: " << CmpBBDepth << '\n');
888  if (CmpBBDepth > HeadDepth + DelayLimit) {
889  DEBUG(dbgs() << "Branch delay would be larger than " << DelayLimit
890  << " cycles.\n");
891  return false;
892  }
893 
894  // Check the resource depth at the bottom of CmpBB - these instructions will
895  // be speculated.
896  unsigned ResDepth = Trace.getResourceDepth(true);
897  DEBUG(dbgs() << "Resources: " << ResDepth << '\n');
898 
899  // Heuristic: The speculatively executed instructions must all be able to
900  // merge into the Head block. The Head critical path should dominate the
901  // resource cost of the speculated instructions.
902  if (ResDepth > HeadDepth) {
903  DEBUG(dbgs() << "Too many instructions to speculate.\n");
904  return false;
905  }
906  return true;
907 }
908 
909 bool AArch64ConditionalCompares::tryConvert(MachineBasicBlock *MBB) {
910  bool Changed = false;
911  while (CmpConv.canConvert(MBB) && shouldConvert()) {
912  invalidateTraces();
914  CmpConv.convert(RemovedBlocks);
915  Changed = true;
916  updateDomTree(RemovedBlocks);
917  updateLoops(RemovedBlocks);
918  }
919  return Changed;
920 }
921 
922 bool AArch64ConditionalCompares::runOnMachineFunction(MachineFunction &MF) {
923  DEBUG(dbgs() << "********** AArch64 Conditional Compares **********\n"
924  << "********** Function: " << MF.getName() << '\n');
925  if (skipFunction(*MF.getFunction()))
926  return false;
927 
928  TII = MF.getSubtarget().getInstrInfo();
929  TRI = MF.getSubtarget().getRegisterInfo();
930  SchedModel = MF.getSubtarget().getSchedModel();
931  MRI = &MF.getRegInfo();
932  DomTree = &getAnalysis<MachineDominatorTree>();
933  Loops = getAnalysisIfAvailable<MachineLoopInfo>();
934  MBPI = &getAnalysis<MachineBranchProbabilityInfo>();
935  Traces = &getAnalysis<MachineTraceMetrics>();
936  MinInstr = nullptr;
937  MinSize = MF.getFunction()->optForMinSize();
938 
939  bool Changed = false;
940  CmpConv.runOnMachineFunction(MF, MBPI);
941 
942  // Visit blocks in dominator tree pre-order. The pre-order enables multiple
943  // cmp-conversions from the same head block.
944  // Note that updateDomTree() modifies the children of the DomTree node
945  // currently being visited. The df_iterator supports that; it doesn't look at
946  // child_begin() / child_end() until after a node has been visited.
947  for (auto *I : depth_first(DomTree))
948  if (tryConvert(I->getBlock()))
949  Changed = true;
950 
951  return Changed;
952 }
Pass interface - Implemented by all &#39;passes&#39;.
Definition: Pass.h:81
const MachineInstrBuilder & add(const MachineOperand &MO) const
bool hasSuccessorProbabilities() const
Return true if any of the successors have probabilities attached to them.
AnalysisUsage & addPreserved()
Add the specified Pass class to the set of analyses preserved by this pass.
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
bool use_nodbg_empty(unsigned RegNo) const
use_nodbg_empty - Return true if there are no non-Debug instructions using the specified register...
Compute iterated dominance frontiers using a linear time algorithm.
Definition: AllocatorList.h:24
virtual const TargetRegisterInfo * getRegisterInfo() const
getRegisterInfo - If register information is available, return it.
unsigned createVirtualRegister(const TargetRegisterClass *RegClass)
createVirtualRegister - Create and return a new virtual register in the function with the specified r...
const DebugLoc & getDebugLoc() const
Returns the debug location id of this MachineInstr.
Definition: MachineInstr.h:268
static CondCode getInvertedCondCode(CondCode Code)
iterator getFirstTerminator()
Returns an iterator to the first terminator instruction of this basic block.
Describe properties that are true of each instruction in the target description file.
Definition: MCInstrDesc.h:163
unsigned getReg() const
getReg - Returns the register number.
static bool isVirtualRegister(unsigned Reg)
Return true if the specified register number is in the virtual register namespace.
void transferSuccessorsAndUpdatePHIs(MachineBasicBlock *FromMBB)
Transfers all the successors, as in transferSuccessors, and update PHI operands in the successor bloc...
STATISTIC(NumFunctions, "Total number of functions")
A debug info location.
Definition: DebugLoc.h:34
A trace ensemble is a collection of traces selected using the same strategy, for example &#39;minimum res...
FunctionPass * createAArch64ConditionalCompares()
static unsigned InstrCount
bool isPHI() const
Definition: MachineInstr.h:826
virtual unsigned removeBranch(MachineBasicBlock &MBB, int *BytesRemoved=nullptr) const
Remove the branching code at the end of the specific MBB.
static cl::opt< unsigned > BlockInstrLimit("aarch64-ccmp-limit", cl::init(30), cl::Hidden, cl::desc("Maximum number of instructions per speculated block."))
AnalysisUsage & addRequired()
static cl::opt< bool > Stress("aarch64-stress-ccmp", cl::Hidden, cl::desc("Turn all knobs to 11"))
#define INITIALIZE_PASS_DEPENDENCY(depName)
Definition: PassSupport.h:51
Hexagon Hardware Loops
void eraseFromParent()
This method unlinks &#39;this&#39; from the containing function and deletes it.
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
const HexagonInstrInfo * TII
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: APFloat.h:42
void eraseFromParent()
Unlink &#39;this&#39; from the containing basic block and delete it.
Reg
All possible values of the reg field in the ModR/M byte.
static bool shouldConvert(Constant &C, AArch64PromoteConstant::PromotionCacheTy &PromotionCache)
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
Definition: MachineInstr.h:290
static const char * getCondCodeName(CondCode Code)
PhysRegInfo analyzePhysReg(unsigned Reg, const TargetRegisterInfo *TRI)
analyzePhysReg - Analyze how the current instruction or bundle uses a physical register.
Select the trace through a block that has the fewest instructions.
const TargetRegisterClass * getRegClass(const MCInstrDesc &TID, unsigned OpNum, const TargetRegisterInfo *TRI, const MachineFunction &MF) const
Given a machine instruction descriptor, returns the register class constraint for OpNum...
Base class for the actual dominator tree node.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory)...
Definition: APInt.h:33
const std::vector< DomTreeNodeBase * > & getChildren() const
int getNumber() const
MachineBasicBlocks are uniquely numbered at the function level, unless they&#39;re not in a MachineFuncti...
virtual const TargetInstrInfo * getInstrInfo() const
void setSuccProbability(succ_iterator I, BranchProbability Prob)
Set successor probability of a given iterator.
const TargetRegisterClass * constrainRegClass(unsigned Reg, const TargetRegisterClass *RC, unsigned MinNumRegs=0)
constrainRegClass - Constrain the register class of the specified virtual register to be a common sub...
StringRef getName() const
getName - Return the name of the corresponding LLVM function.
TargetInstrInfo - Interface to description of machine instruction set.
MachineInstrBuilder BuildMI(MachineFunction &MF, const DebugLoc &DL, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:406
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
unsigned const MachineRegisterInfo * MRI
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
size_t size() const
size - Get the array size.
Definition: ArrayRef.h:149
DomTreeNodeBase * getIDom() const
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
InstrCycles getInstrCycles(const MachineInstr &MI) const
Return the depth and height of MI.
Represent the analysis usage information of a pass.
FunctionPass class - This class is used to implement most global optimizations.
Definition: Pass.h:285
virtual bool analyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB, MachineBasicBlock *&FBB, SmallVectorImpl< MachineOperand > &Cond, bool AllowModify=false) const
Analyze the branching code at the end of MBB, returning true if it cannot be understood (e...
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
A trace represents a plausible sequence of executed basic blocks that passes through the current basi...
INITIALIZE_PASS_END(RegBankSelect, DEBUG_TYPE, "Assign register bank of generic virtual registers", false, false) RegBankSelect
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
This is a &#39;vector&#39; (really, a variable-sized array), optimized for the case when the array is small...
Definition: SmallVector.h:864
size_t getNumChildren() const
void updateTerminator()
Update the terminator instructions in block to account for changes to the layout. ...
static bool parseCond(ArrayRef< MachineOperand > Cond, AArch64CC::CondCode &CC)
MIOperands - Iterate over operands of a single instruction.
unsigned pred_size() const
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:132
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:923
static unsigned getNZCVToSatisfyCondCode(CondCode Code)
Given a condition code, return NZCV flags that would satisfy that condition.
static unsigned getReg(const void *D, unsigned RC, unsigned RegNo)
unsigned succ_size() const
MachineRegisterInfo - Keep track of information for virtual and physical registers, including vreg register classes, use/def chains for registers, etc.
BranchProbability getEdgeProbability(const MachineBasicBlock *Src, const MachineBasicBlock *Dst) const
Representation of each machine instruction.
Definition: MachineInstr.h:59
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
void splice(iterator Where, MachineBasicBlock *Other, iterator From)
Take an instruction from MBB &#39;Other&#39; at the position From, and insert it into this MBB right before &#39;...
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
LLVM_NODISCARD bool empty() const
Definition: SmallVector.h:61
const MCInstrDesc & get(unsigned Opcode) const
Return the machine instruction descriptor that corresponds to the specified instruction opcode...
Definition: MCInstrInfo.h:45
INITIALIZE_PASS_BEGIN(AArch64ConditionalCompares, "aarch64-ccmp", "AArch64 CCMP Pass", false, false) INITIALIZE_PASS_END(AArch64ConditionalCompares
#define I(x, y, z)
Definition: MD5.cpp:58
bool optForMinSize() const
Optimize this function for minimum size (-Oz).
Definition: Function.h:527
bool isSuccessor(const MachineBasicBlock *MBB) const
Return true if the specified MBB is a successor of this block.
void initializeAArch64ConditionalComparesPass(PassRegistry &)
unsigned getResourceDepth(bool Bottom) const
Return the resource depth of the top/bottom of the trace center block.
void removeSuccessor(MachineBasicBlock *Succ, bool NormalizeSuccProbs=false)
Remove successor from the successors list of this MachineBasicBlock.
bool isReg() const
isReg - Tests if this is a MO_Register operand.
const Function * getFunction() const
getFunction - Return the LLVM function that this machine code represents
iterator_range< df_iterator< T > > depth_first(const T &G)
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
#define LLVM_FALLTHROUGH
LLVM_FALLTHROUGH - Mark fallthrough cases in switch statements.
Definition: Compiler.h:235
#define DEBUG(X)
Definition: Debug.h:118
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:49
const MachineOperand & getOperand(unsigned i) const
Definition: MachineInstr.h:295
Machine model for scheduling, bundling, and heuristics.
Definition: MCSchedule.h:136
DominatorTree Class - Concrete subclass of DominatorTreeBase that is used to compute a normal dominat...
const MCSchedModel & getSchedModel() const
Get the machine model for this subtarget&#39;s CPU.