LLVM  10.0.0svn
AArch64ConditionalCompares.cpp
Go to the documentation of this file.
1 //===-- AArch64ConditionalCompares.cpp --- CCMP formation for AArch64 -----===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements the AArch64ConditionalCompares pass which reduces
10 // branching and code size by using the conditional compare instructions CCMP,
11 // CCMN, and FCMP.
12 //
13 // The CFG transformations for forming conditional compares are very similar to
14 // if-conversion, and this pass should run immediately before the early
15 // if-conversion pass.
16 //
17 //===----------------------------------------------------------------------===//
18 
19 #include "AArch64.h"
21 #include "llvm/ADT/SetVector.h"
22 #include "llvm/ADT/SmallPtrSet.h"
23 #include "llvm/ADT/Statistic.h"
32 #include "llvm/CodeGen/Passes.h"
37 #include "llvm/Support/Debug.h"
39 
40 using namespace llvm;
41 
42 #define DEBUG_TYPE "aarch64-ccmp"
43 
44 // Absolute maximum number of instructions allowed per speculated block.
45 // This bypasses all other heuristics, so it should be set fairly high.
47  "aarch64-ccmp-limit", cl::init(30), cl::Hidden,
48  cl::desc("Maximum number of instructions per speculated block."));
49 
50 // Stress testing mode - disable heuristics.
51 static cl::opt<bool> Stress("aarch64-stress-ccmp", cl::Hidden,
52  cl::desc("Turn all knobs to 11"));
53 
54 STATISTIC(NumConsidered, "Number of ccmps considered");
55 STATISTIC(NumPhiRejs, "Number of ccmps rejected (PHI)");
56 STATISTIC(NumPhysRejs, "Number of ccmps rejected (Physregs)");
57 STATISTIC(NumPhi2Rejs, "Number of ccmps rejected (PHI2)");
58 STATISTIC(NumHeadBranchRejs, "Number of ccmps rejected (Head branch)");
59 STATISTIC(NumCmpBranchRejs, "Number of ccmps rejected (CmpBB branch)");
60 STATISTIC(NumCmpTermRejs, "Number of ccmps rejected (CmpBB is cbz...)");
61 STATISTIC(NumImmRangeRejs, "Number of ccmps rejected (Imm out of range)");
62 STATISTIC(NumLiveDstRejs, "Number of ccmps rejected (Cmp dest live)");
63 STATISTIC(NumMultNZCVUses, "Number of ccmps rejected (NZCV used)");
64 STATISTIC(NumUnknNZCVDefs, "Number of ccmps rejected (NZCV def unknown)");
65 
66 STATISTIC(NumSpeculateRejs, "Number of ccmps rejected (Can't speculate)");
67 
68 STATISTIC(NumConverted, "Number of ccmp instructions created");
69 STATISTIC(NumCompBranches, "Number of cbz/cbnz branches converted");
70 
71 //===----------------------------------------------------------------------===//
72 // SSACCmpConv
73 //===----------------------------------------------------------------------===//
74 //
75 // The SSACCmpConv class performs ccmp-conversion on SSA form machine code
76 // after determining if it is possible. The class contains no heuristics;
77 // external code should be used to determine when ccmp-conversion is a good
78 // idea.
79 //
80 // CCmp-formation works on a CFG representing chained conditions, typically
81 // from C's short-circuit || and && operators:
82 //
83 // From: Head To: Head
84 // / | CmpBB
85 // / | / |
86 // | CmpBB / |
87 // | / | Tail |
88 // | / | | |
89 // Tail | | |
90 // | | | |
91 // ... ... ... ...
92 //
93 // The Head block is terminated by a br.cond instruction, and the CmpBB block
94 // contains compare + br.cond. Tail must be a successor of both.
95 //
96 // The cmp-conversion turns the compare instruction in CmpBB into a conditional
97 // compare, and merges CmpBB into Head, speculatively executing its
98 // instructions. The AArch64 conditional compare instructions have an immediate
99 // operand that specifies the NZCV flag values when the condition is false and
100 // the compare isn't executed. This makes it possible to chain compares with
101 // different condition codes.
102 //
103 // Example:
104 //
105 // if (a == 5 || b == 17)
106 // foo();
107 //
108 // Head:
109 // cmp w0, #5
110 // b.eq Tail
111 // CmpBB:
112 // cmp w1, #17
113 // b.eq Tail
114 // ...
115 // Tail:
116 // bl _foo
117 //
118 // Becomes:
119 //
120 // Head:
121 // cmp w0, #5
122 // ccmp w1, #17, 4, ne ; 4 = nZcv
123 // b.eq Tail
124 // ...
125 // Tail:
126 // bl _foo
127 //
128 // The ccmp condition code is the one that would cause the Head terminator to
129 // branch to CmpBB.
130 //
131 // FIXME: It should also be possible to speculate a block on the critical edge
132 // between Head and Tail, just like if-converting a diamond.
133 //
134 // FIXME: Handle PHIs in Tail by turning them into selects (if-conversion).
135 
136 namespace {
137 class SSACCmpConv {
138  MachineFunction *MF;
139  const TargetInstrInfo *TII;
140  const TargetRegisterInfo *TRI;
142  const MachineBranchProbabilityInfo *MBPI;
143 
144 public:
145  /// The first block containing a conditional branch, dominating everything
146  /// else.
147  MachineBasicBlock *Head;
148 
149  /// The block containing cmp+br.cond with a successor shared with Head.
150  MachineBasicBlock *CmpBB;
151 
152  /// The common successor for Head and CmpBB.
154 
155  /// The compare instruction in CmpBB that can be converted to a ccmp.
156  MachineInstr *CmpMI;
157 
158 private:
159  /// The branch condition in Head as determined by AnalyzeBranch.
161 
162  /// The condition code that makes Head branch to CmpBB.
163  AArch64CC::CondCode HeadCmpBBCC;
164 
165  /// The branch condition in CmpBB.
167 
168  /// The condition code that makes CmpBB branch to Tail.
169  AArch64CC::CondCode CmpBBTailCC;
170 
171  /// Check if the Tail PHIs are trivially convertible.
172  bool trivialTailPHIs();
173 
174  /// Remove CmpBB from the Tail PHIs.
175  void updateTailPHIs();
176 
177  /// Check if an operand defining DstReg is dead.
178  bool isDeadDef(unsigned DstReg);
179 
180  /// Find the compare instruction in MBB that controls the conditional branch.
181  /// Return NULL if a convertible instruction can't be found.
182  MachineInstr *findConvertibleCompare(MachineBasicBlock *MBB);
183 
184  /// Return true if all non-terminator instructions in MBB can be safely
185  /// speculated.
186  bool canSpeculateInstrs(MachineBasicBlock *MBB, const MachineInstr *CmpMI);
187 
188 public:
189  /// runOnMachineFunction - Initialize per-function data structures.
190  void runOnMachineFunction(MachineFunction &MF,
191  const MachineBranchProbabilityInfo *MBPI) {
192  this->MF = &MF;
193  this->MBPI = MBPI;
194  TII = MF.getSubtarget().getInstrInfo();
195  TRI = MF.getSubtarget().getRegisterInfo();
196  MRI = &MF.getRegInfo();
197  }
198 
199  /// If the sub-CFG headed by MBB can be cmp-converted, initialize the
200  /// internal state, and return true.
201  bool canConvert(MachineBasicBlock *MBB);
202 
203  /// Cmo-convert the last block passed to canConvertCmp(), assuming
204  /// it is possible. Add any erased blocks to RemovedBlocks.
205  void convert(SmallVectorImpl<MachineBasicBlock *> &RemovedBlocks);
206 
207  /// Return the expected code size delta if the conversion into a
208  /// conditional compare is performed.
209  int expectedCodeSizeDelta() const;
210 };
211 } // end anonymous namespace
212 
213 // Check that all PHIs in Tail are selecting the same value from Head and CmpBB.
214 // This means that no if-conversion is required when merging CmpBB into Head.
215 bool SSACCmpConv::trivialTailPHIs() {
216  for (auto &I : *Tail) {
217  if (!I.isPHI())
218  break;
219  unsigned HeadReg = 0, CmpBBReg = 0;
220  // PHI operands come in (VReg, MBB) pairs.
221  for (unsigned oi = 1, oe = I.getNumOperands(); oi != oe; oi += 2) {
222  MachineBasicBlock *MBB = I.getOperand(oi + 1).getMBB();
223  Register Reg = I.getOperand(oi).getReg();
224  if (MBB == Head) {
225  assert((!HeadReg || HeadReg == Reg) && "Inconsistent PHI operands");
226  HeadReg = Reg;
227  }
228  if (MBB == CmpBB) {
229  assert((!CmpBBReg || CmpBBReg == Reg) && "Inconsistent PHI operands");
230  CmpBBReg = Reg;
231  }
232  }
233  if (HeadReg != CmpBBReg)
234  return false;
235  }
236  return true;
237 }
238 
239 // Assuming that trivialTailPHIs() is true, update the Tail PHIs by simply
240 // removing the CmpBB operands. The Head operands will be identical.
241 void SSACCmpConv::updateTailPHIs() {
242  for (auto &I : *Tail) {
243  if (!I.isPHI())
244  break;
245  // I is a PHI. It can have multiple entries for CmpBB.
246  for (unsigned oi = I.getNumOperands(); oi > 2; oi -= 2) {
247  // PHI operands are (Reg, MBB) at (oi-2, oi-1).
248  if (I.getOperand(oi - 1).getMBB() == CmpBB) {
249  I.RemoveOperand(oi - 1);
250  I.RemoveOperand(oi - 2);
251  }
252  }
253  }
254 }
255 
256 // This pass runs before the AArch64DeadRegisterDefinitions pass, so compares
257 // are still writing virtual registers without any uses.
258 bool SSACCmpConv::isDeadDef(unsigned DstReg) {
259  // Writes to the zero register are dead.
260  if (DstReg == AArch64::WZR || DstReg == AArch64::XZR)
261  return true;
262  if (!Register::isVirtualRegister(DstReg))
263  return false;
264  // A virtual register def without any uses will be marked dead later, and
265  // eventually replaced by the zero register.
266  return MRI->use_nodbg_empty(DstReg);
267 }
268 
269 // Parse a condition code returned by AnalyzeBranch, and compute the CondCode
270 // corresponding to TBB.
271 // Return
273  // A normal br.cond simply has the condition code.
274  if (Cond[0].getImm() != -1) {
275  assert(Cond.size() == 1 && "Unknown Cond array format");
276  CC = (AArch64CC::CondCode)(int)Cond[0].getImm();
277  return true;
278  }
279  // For tbz and cbz instruction, the opcode is next.
280  switch (Cond[1].getImm()) {
281  default:
282  // This includes tbz / tbnz branches which can't be converted to
283  // ccmp + br.cond.
284  return false;
285  case AArch64::CBZW:
286  case AArch64::CBZX:
287  assert(Cond.size() == 3 && "Unknown Cond array format");
288  CC = AArch64CC::EQ;
289  return true;
290  case AArch64::CBNZW:
291  case AArch64::CBNZX:
292  assert(Cond.size() == 3 && "Unknown Cond array format");
293  CC = AArch64CC::NE;
294  return true;
295  }
296 }
297 
298 MachineInstr *SSACCmpConv::findConvertibleCompare(MachineBasicBlock *MBB) {
300  if (I == MBB->end())
301  return nullptr;
302  // The terminator must be controlled by the flags.
303  if (!I->readsRegister(AArch64::NZCV)) {
304  switch (I->getOpcode()) {
305  case AArch64::CBZW:
306  case AArch64::CBZX:
307  case AArch64::CBNZW:
308  case AArch64::CBNZX:
309  // These can be converted into a ccmp against #0.
310  return &*I;
311  }
312  ++NumCmpTermRejs;
313  LLVM_DEBUG(dbgs() << "Flags not used by terminator: " << *I);
314  return nullptr;
315  }
316 
317  // Now find the instruction controlling the terminator.
318  for (MachineBasicBlock::iterator B = MBB->begin(); I != B;) {
319  --I;
320  assert(!I->isTerminator() && "Spurious terminator");
321  switch (I->getOpcode()) {
322  // cmp is an alias for subs with a dead destination register.
323  case AArch64::SUBSWri:
324  case AArch64::SUBSXri:
325  // cmn is an alias for adds with a dead destination register.
326  case AArch64::ADDSWri:
327  case AArch64::ADDSXri:
328  // Check that the immediate operand is within range, ccmp wants a uimm5.
329  // Rd = SUBSri Rn, imm, shift
330  if (I->getOperand(3).getImm() || !isUInt<5>(I->getOperand(2).getImm())) {
331  LLVM_DEBUG(dbgs() << "Immediate out of range for ccmp: " << *I);
332  ++NumImmRangeRejs;
333  return nullptr;
334  }
336  case AArch64::SUBSWrr:
337  case AArch64::SUBSXrr:
338  case AArch64::ADDSWrr:
339  case AArch64::ADDSXrr:
340  if (isDeadDef(I->getOperand(0).getReg()))
341  return &*I;
342  LLVM_DEBUG(dbgs() << "Can't convert compare with live destination: "
343  << *I);
344  ++NumLiveDstRejs;
345  return nullptr;
346  case AArch64::FCMPSrr:
347  case AArch64::FCMPDrr:
348  case AArch64::FCMPESrr:
349  case AArch64::FCMPEDrr:
350  return &*I;
351  }
352 
353  // Check for flag reads and clobbers.
354  MIOperands::PhysRegInfo PRI =
355  MIOperands(*I).analyzePhysReg(AArch64::NZCV, TRI);
356 
357  if (PRI.Read) {
358  // The ccmp doesn't produce exactly the same flags as the original
359  // compare, so reject the transform if there are uses of the flags
360  // besides the terminators.
361  LLVM_DEBUG(dbgs() << "Can't create ccmp with multiple uses: " << *I);
362  ++NumMultNZCVUses;
363  return nullptr;
364  }
365 
366  if (PRI.Defined || PRI.Clobbered) {
367  LLVM_DEBUG(dbgs() << "Not convertible compare: " << *I);
368  ++NumUnknNZCVDefs;
369  return nullptr;
370  }
371  }
372  LLVM_DEBUG(dbgs() << "Flags not defined in " << printMBBReference(*MBB)
373  << '\n');
374  return nullptr;
375 }
376 
377 /// Determine if all the instructions in MBB can safely
378 /// be speculated. The terminators are not considered.
379 ///
380 /// Only CmpMI is allowed to clobber the flags.
381 ///
382 bool SSACCmpConv::canSpeculateInstrs(MachineBasicBlock *MBB,
383  const MachineInstr *CmpMI) {
384  // Reject any live-in physregs. It's probably NZCV/EFLAGS, and very hard to
385  // get right.
386  if (!MBB->livein_empty()) {
387  LLVM_DEBUG(dbgs() << printMBBReference(*MBB) << " has live-ins.\n");
388  return false;
389  }
390 
391  unsigned InstrCount = 0;
392 
393  // Check all instructions, except the terminators. It is assumed that
394  // terminators never have side effects or define any used register values.
395  for (auto &I : make_range(MBB->begin(), MBB->getFirstTerminator())) {
396  if (I.isDebugInstr())
397  continue;
398 
399  if (++InstrCount > BlockInstrLimit && !Stress) {
400  LLVM_DEBUG(dbgs() << printMBBReference(*MBB) << " has more than "
401  << BlockInstrLimit << " instructions.\n");
402  return false;
403  }
404 
405  // There shouldn't normally be any phis in a single-predecessor block.
406  if (I.isPHI()) {
407  LLVM_DEBUG(dbgs() << "Can't hoist: " << I);
408  return false;
409  }
410 
411  // Don't speculate loads. Note that it may be possible and desirable to
412  // speculate GOT or constant pool loads that are guaranteed not to trap,
413  // but we don't support that for now.
414  if (I.mayLoad()) {
415  LLVM_DEBUG(dbgs() << "Won't speculate load: " << I);
416  return false;
417  }
418 
419  // We never speculate stores, so an AA pointer isn't necessary.
420  bool DontMoveAcrossStore = true;
421  if (!I.isSafeToMove(nullptr, DontMoveAcrossStore)) {
422  LLVM_DEBUG(dbgs() << "Can't speculate: " << I);
423  return false;
424  }
425 
426  // Only CmpMI is allowed to clobber the flags.
427  if (&I != CmpMI && I.modifiesRegister(AArch64::NZCV, TRI)) {
428  LLVM_DEBUG(dbgs() << "Clobbers flags: " << I);
429  return false;
430  }
431  }
432  return true;
433 }
434 
435 /// Analyze the sub-cfg rooted in MBB, and return true if it is a potential
436 /// candidate for cmp-conversion. Fill out the internal state.
437 ///
438 bool SSACCmpConv::canConvert(MachineBasicBlock *MBB) {
439  Head = MBB;
440  Tail = CmpBB = nullptr;
441 
442  if (Head->succ_size() != 2)
443  return false;
444  MachineBasicBlock *Succ0 = Head->succ_begin()[0];
445  MachineBasicBlock *Succ1 = Head->succ_begin()[1];
446 
447  // CmpBB can only have a single predecessor. Tail is allowed many.
448  if (Succ0->pred_size() != 1)
449  std::swap(Succ0, Succ1);
450 
451  // Succ0 is our candidate for CmpBB.
452  if (Succ0->pred_size() != 1 || Succ0->succ_size() != 2)
453  return false;
454 
455  CmpBB = Succ0;
456  Tail = Succ1;
457 
458  if (!CmpBB->isSuccessor(Tail))
459  return false;
460 
461  // The CFG topology checks out.
462  LLVM_DEBUG(dbgs() << "\nTriangle: " << printMBBReference(*Head) << " -> "
463  << printMBBReference(*CmpBB) << " -> "
464  << printMBBReference(*Tail) << '\n');
465  ++NumConsidered;
466 
467  // Tail is allowed to have many predecessors, but we can't handle PHIs yet.
468  //
469  // FIXME: Real PHIs could be if-converted as long as the CmpBB values are
470  // defined before The CmpBB cmp clobbers the flags. Alternatively, it should
471  // always be safe to sink the ccmp down to immediately before the CmpBB
472  // terminators.
473  if (!trivialTailPHIs()) {
474  LLVM_DEBUG(dbgs() << "Can't handle phis in Tail.\n");
475  ++NumPhiRejs;
476  return false;
477  }
478 
479  if (!Tail->livein_empty()) {
480  LLVM_DEBUG(dbgs() << "Can't handle live-in physregs in Tail.\n");
481  ++NumPhysRejs;
482  return false;
483  }
484 
485  // CmpBB should never have PHIs since Head is its only predecessor.
486  // FIXME: Clean them up if it happens.
487  if (!CmpBB->empty() && CmpBB->front().isPHI()) {
488  LLVM_DEBUG(dbgs() << "Can't handle phis in CmpBB.\n");
489  ++NumPhi2Rejs;
490  return false;
491  }
492 
493  if (!CmpBB->livein_empty()) {
494  LLVM_DEBUG(dbgs() << "Can't handle live-in physregs in CmpBB.\n");
495  ++NumPhysRejs;
496  return false;
497  }
498 
499  // The branch we're looking to eliminate must be analyzable.
500  HeadCond.clear();
501  MachineBasicBlock *TBB = nullptr, *FBB = nullptr;
502  if (TII->analyzeBranch(*Head, TBB, FBB, HeadCond)) {
503  LLVM_DEBUG(dbgs() << "Head branch not analyzable.\n");
504  ++NumHeadBranchRejs;
505  return false;
506  }
507 
508  // This is weird, probably some sort of degenerate CFG, or an edge to a
509  // landing pad.
510  if (!TBB || HeadCond.empty()) {
511  LLVM_DEBUG(
512  dbgs() << "AnalyzeBranch didn't find conditional branch in Head.\n");
513  ++NumHeadBranchRejs;
514  return false;
515  }
516 
517  if (!parseCond(HeadCond, HeadCmpBBCC)) {
518  LLVM_DEBUG(dbgs() << "Unsupported branch type on Head\n");
519  ++NumHeadBranchRejs;
520  return false;
521  }
522 
523  // Make sure the branch direction is right.
524  if (TBB != CmpBB) {
525  assert(TBB == Tail && "Unexpected TBB");
526  HeadCmpBBCC = AArch64CC::getInvertedCondCode(HeadCmpBBCC);
527  }
528 
529  CmpBBCond.clear();
530  TBB = FBB = nullptr;
531  if (TII->analyzeBranch(*CmpBB, TBB, FBB, CmpBBCond)) {
532  LLVM_DEBUG(dbgs() << "CmpBB branch not analyzable.\n");
533  ++NumCmpBranchRejs;
534  return false;
535  }
536 
537  if (!TBB || CmpBBCond.empty()) {
538  LLVM_DEBUG(
539  dbgs() << "AnalyzeBranch didn't find conditional branch in CmpBB.\n");
540  ++NumCmpBranchRejs;
541  return false;
542  }
543 
544  if (!parseCond(CmpBBCond, CmpBBTailCC)) {
545  LLVM_DEBUG(dbgs() << "Unsupported branch type on CmpBB\n");
546  ++NumCmpBranchRejs;
547  return false;
548  }
549 
550  if (TBB != Tail)
551  CmpBBTailCC = AArch64CC::getInvertedCondCode(CmpBBTailCC);
552 
553  LLVM_DEBUG(dbgs() << "Head->CmpBB on "
554  << AArch64CC::getCondCodeName(HeadCmpBBCC)
555  << ", CmpBB->Tail on "
556  << AArch64CC::getCondCodeName(CmpBBTailCC) << '\n');
557 
558  CmpMI = findConvertibleCompare(CmpBB);
559  if (!CmpMI)
560  return false;
561 
562  if (!canSpeculateInstrs(CmpBB, CmpMI)) {
563  ++NumSpeculateRejs;
564  return false;
565  }
566  return true;
567 }
568 
569 void SSACCmpConv::convert(SmallVectorImpl<MachineBasicBlock *> &RemovedBlocks) {
570  LLVM_DEBUG(dbgs() << "Merging " << printMBBReference(*CmpBB) << " into "
571  << printMBBReference(*Head) << ":\n"
572  << *CmpBB);
573 
574  // All CmpBB instructions are moved into Head, and CmpBB is deleted.
575  // Update the CFG first.
576  updateTailPHIs();
577 
578  // Save successor probabilties before removing CmpBB and Tail from their
579  // parents.
580  BranchProbability Head2CmpBB = MBPI->getEdgeProbability(Head, CmpBB);
581  BranchProbability CmpBB2Tail = MBPI->getEdgeProbability(CmpBB, Tail);
582 
583  Head->removeSuccessor(CmpBB);
584  CmpBB->removeSuccessor(Tail);
585 
586  // If Head and CmpBB had successor probabilties, udpate the probabilities to
587  // reflect the ccmp-conversion.
588  if (Head->hasSuccessorProbabilities() && CmpBB->hasSuccessorProbabilities()) {
589 
590  // Head is allowed two successors. We've removed CmpBB, so the remaining
591  // successor is Tail. We need to increase the successor probability for
592  // Tail to account for the CmpBB path we removed.
593  //
594  // Pr(Tail|Head) += Pr(CmpBB|Head) * Pr(Tail|CmpBB).
595  assert(*Head->succ_begin() == Tail && "Head successor is not Tail");
596  BranchProbability Head2Tail = MBPI->getEdgeProbability(Head, Tail);
597  Head->setSuccProbability(Head->succ_begin(),
598  Head2Tail + Head2CmpBB * CmpBB2Tail);
599 
600  // We will transfer successors of CmpBB to Head in a moment without
601  // normalizing the successor probabilities. Set the successor probabilites
602  // before doing so.
603  //
604  // Pr(I|Head) = Pr(CmpBB|Head) * Pr(I|CmpBB).
605  for (auto I = CmpBB->succ_begin(), E = CmpBB->succ_end(); I != E; ++I) {
606  BranchProbability CmpBB2I = MBPI->getEdgeProbability(CmpBB, *I);
607  CmpBB->setSuccProbability(I, Head2CmpBB * CmpBB2I);
608  }
609  }
610 
611  Head->transferSuccessorsAndUpdatePHIs(CmpBB);
612  DebugLoc TermDL = Head->getFirstTerminator()->getDebugLoc();
613  TII->removeBranch(*Head);
614 
615  // If the Head terminator was one of the cbz / tbz branches with built-in
616  // compare, we need to insert an explicit compare instruction in its place.
617  if (HeadCond[0].getImm() == -1) {
618  ++NumCompBranches;
619  unsigned Opc = 0;
620  switch (HeadCond[1].getImm()) {
621  case AArch64::CBZW:
622  case AArch64::CBNZW:
623  Opc = AArch64::SUBSWri;
624  break;
625  case AArch64::CBZX:
626  case AArch64::CBNZX:
627  Opc = AArch64::SUBSXri;
628  break;
629  default:
630  llvm_unreachable("Cannot convert Head branch");
631  }
632  const MCInstrDesc &MCID = TII->get(Opc);
633  // Create a dummy virtual register for the SUBS def.
634  Register DestReg =
635  MRI->createVirtualRegister(TII->getRegClass(MCID, 0, TRI, *MF));
636  // Insert a SUBS Rn, #0 instruction instead of the cbz / cbnz.
637  BuildMI(*Head, Head->end(), TermDL, MCID)
638  .addReg(DestReg, RegState::Define | RegState::Dead)
639  .add(HeadCond[2])
640  .addImm(0)
641  .addImm(0);
642  // SUBS uses the GPR*sp register classes.
643  MRI->constrainRegClass(HeadCond[2].getReg(),
644  TII->getRegClass(MCID, 1, TRI, *MF));
645  }
646 
647  Head->splice(Head->end(), CmpBB, CmpBB->begin(), CmpBB->end());
648 
649  // Now replace CmpMI with a ccmp instruction that also considers the incoming
650  // flags.
651  unsigned Opc = 0;
652  unsigned FirstOp = 1; // First CmpMI operand to copy.
653  bool isZBranch = false; // CmpMI is a cbz/cbnz instruction.
654  switch (CmpMI->getOpcode()) {
655  default:
656  llvm_unreachable("Unknown compare opcode");
657  case AArch64::SUBSWri: Opc = AArch64::CCMPWi; break;
658  case AArch64::SUBSWrr: Opc = AArch64::CCMPWr; break;
659  case AArch64::SUBSXri: Opc = AArch64::CCMPXi; break;
660  case AArch64::SUBSXrr: Opc = AArch64::CCMPXr; break;
661  case AArch64::ADDSWri: Opc = AArch64::CCMNWi; break;
662  case AArch64::ADDSWrr: Opc = AArch64::CCMNWr; break;
663  case AArch64::ADDSXri: Opc = AArch64::CCMNXi; break;
664  case AArch64::ADDSXrr: Opc = AArch64::CCMNXr; break;
665  case AArch64::FCMPSrr: Opc = AArch64::FCCMPSrr; FirstOp = 0; break;
666  case AArch64::FCMPDrr: Opc = AArch64::FCCMPDrr; FirstOp = 0; break;
667  case AArch64::FCMPESrr: Opc = AArch64::FCCMPESrr; FirstOp = 0; break;
668  case AArch64::FCMPEDrr: Opc = AArch64::FCCMPEDrr; FirstOp = 0; break;
669  case AArch64::CBZW:
670  case AArch64::CBNZW:
671  Opc = AArch64::CCMPWi;
672  FirstOp = 0;
673  isZBranch = true;
674  break;
675  case AArch64::CBZX:
676  case AArch64::CBNZX:
677  Opc = AArch64::CCMPXi;
678  FirstOp = 0;
679  isZBranch = true;
680  break;
681  }
682 
683  // The ccmp instruction should set the flags according to the comparison when
684  // Head would have branched to CmpBB.
685  // The NZCV immediate operand should provide flags for the case where Head
686  // would have branched to Tail. These flags should cause the new Head
687  // terminator to branch to tail.
688  unsigned NZCV = AArch64CC::getNZCVToSatisfyCondCode(CmpBBTailCC);
689  const MCInstrDesc &MCID = TII->get(Opc);
690  MRI->constrainRegClass(CmpMI->getOperand(FirstOp).getReg(),
691  TII->getRegClass(MCID, 0, TRI, *MF));
692  if (CmpMI->getOperand(FirstOp + 1).isReg())
693  MRI->constrainRegClass(CmpMI->getOperand(FirstOp + 1).getReg(),
694  TII->getRegClass(MCID, 1, TRI, *MF));
695  MachineInstrBuilder MIB = BuildMI(*Head, CmpMI, CmpMI->getDebugLoc(), MCID)
696  .add(CmpMI->getOperand(FirstOp)); // Register Rn
697  if (isZBranch)
698  MIB.addImm(0); // cbz/cbnz Rn -> ccmp Rn, #0
699  else
700  MIB.add(CmpMI->getOperand(FirstOp + 1)); // Register Rm / Immediate
701  MIB.addImm(NZCV).addImm(HeadCmpBBCC);
702 
703  // If CmpMI was a terminator, we need a new conditional branch to replace it.
704  // This now becomes a Head terminator.
705  if (isZBranch) {
706  bool isNZ = CmpMI->getOpcode() == AArch64::CBNZW ||
707  CmpMI->getOpcode() == AArch64::CBNZX;
708  BuildMI(*Head, CmpMI, CmpMI->getDebugLoc(), TII->get(AArch64::Bcc))
709  .addImm(isNZ ? AArch64CC::NE : AArch64CC::EQ)
710  .add(CmpMI->getOperand(1)); // Branch target.
711  }
712  CmpMI->eraseFromParent();
713  Head->updateTerminator();
714 
715  RemovedBlocks.push_back(CmpBB);
716  CmpBB->eraseFromParent();
717  LLVM_DEBUG(dbgs() << "Result:\n" << *Head);
718  ++NumConverted;
719 }
720 
721 int SSACCmpConv::expectedCodeSizeDelta() const {
722  int delta = 0;
723  // If the Head terminator was one of the cbz / tbz branches with built-in
724  // compare, we need to insert an explicit compare instruction in its place
725  // plus a branch instruction.
726  if (HeadCond[0].getImm() == -1) {
727  switch (HeadCond[1].getImm()) {
728  case AArch64::CBZW:
729  case AArch64::CBNZW:
730  case AArch64::CBZX:
731  case AArch64::CBNZX:
732  // Therefore delta += 1
733  delta = 1;
734  break;
735  default:
736  llvm_unreachable("Cannot convert Head branch");
737  }
738  }
739  // If the Cmp terminator was one of the cbz / tbz branches with
740  // built-in compare, it will be turned into a compare instruction
741  // into Head, but we do not save any instruction.
742  // Otherwise, we save the branch instruction.
743  switch (CmpMI->getOpcode()) {
744  default:
745  --delta;
746  break;
747  case AArch64::CBZW:
748  case AArch64::CBNZW:
749  case AArch64::CBZX:
750  case AArch64::CBNZX:
751  break;
752  }
753  return delta;
754 }
755 
756 //===----------------------------------------------------------------------===//
757 // AArch64ConditionalCompares Pass
758 //===----------------------------------------------------------------------===//
759 
760 namespace {
761 class AArch64ConditionalCompares : public MachineFunctionPass {
762  const MachineBranchProbabilityInfo *MBPI;
763  const TargetInstrInfo *TII;
764  const TargetRegisterInfo *TRI;
765  MCSchedModel SchedModel;
766  // Does the proceeded function has Oz attribute.
767  bool MinSize;
769  MachineDominatorTree *DomTree;
771  MachineTraceMetrics *Traces;
773  SSACCmpConv CmpConv;
774 
775 public:
776  static char ID;
777  AArch64ConditionalCompares() : MachineFunctionPass(ID) {
779  }
780  void getAnalysisUsage(AnalysisUsage &AU) const override;
781  bool runOnMachineFunction(MachineFunction &MF) override;
782  StringRef getPassName() const override {
783  return "AArch64 Conditional Compares";
784  }
785 
786 private:
787  bool tryConvert(MachineBasicBlock *);
788  void updateDomTree(ArrayRef<MachineBasicBlock *> Removed);
789  void updateLoops(ArrayRef<MachineBasicBlock *> Removed);
790  void invalidateTraces();
791  bool shouldConvert();
792 };
793 } // end anonymous namespace
794 
796 
797 INITIALIZE_PASS_BEGIN(AArch64ConditionalCompares, "aarch64-ccmp",
798  "AArch64 CCMP Pass", false, false)
802 INITIALIZE_PASS_END(AArch64ConditionalCompares, "aarch64-ccmp",
803  "AArch64 CCMP Pass", false, false)
804 
806  return new AArch64ConditionalCompares();
807 }
808 
809 void AArch64ConditionalCompares::getAnalysisUsage(AnalysisUsage &AU) const {
818 }
819 
820 /// Update the dominator tree after if-conversion erased some blocks.
821 void AArch64ConditionalCompares::updateDomTree(
823  // convert() removes CmpBB which was previously dominated by Head.
824  // CmpBB children should be transferred to Head.
825  MachineDomTreeNode *HeadNode = DomTree->getNode(CmpConv.Head);
826  for (MachineBasicBlock *RemovedMBB : Removed) {
827  MachineDomTreeNode *Node = DomTree->getNode(RemovedMBB);
828  assert(Node != HeadNode && "Cannot erase the head node");
829  assert(Node->getIDom() == HeadNode && "CmpBB should be dominated by Head");
830  while (Node->getNumChildren())
831  DomTree->changeImmediateDominator(Node->getChildren().back(), HeadNode);
832  DomTree->eraseNode(RemovedMBB);
833  }
834 }
835 
836 /// Update LoopInfo after if-conversion.
837 void
838 AArch64ConditionalCompares::updateLoops(ArrayRef<MachineBasicBlock *> Removed) {
839  if (!Loops)
840  return;
841  for (MachineBasicBlock *RemovedMBB : Removed)
842  Loops->removeBlock(RemovedMBB);
843 }
844 
845 /// Invalidate MachineTraceMetrics before if-conversion.
846 void AArch64ConditionalCompares::invalidateTraces() {
847  Traces->invalidate(CmpConv.Head);
848  Traces->invalidate(CmpConv.CmpBB);
849 }
850 
851 /// Apply cost model and heuristics to the if-conversion in IfConv.
852 /// Return true if the conversion is a good idea.
853 ///
855  // Stress testing mode disables all cost considerations.
856  if (Stress)
857  return true;
858  if (!MinInstr)
859  MinInstr = Traces->getEnsemble(MachineTraceMetrics::TS_MinInstrCount);
860 
861  // Head dominates CmpBB, so it is always included in its trace.
862  MachineTraceMetrics::Trace Trace = MinInstr->getTrace(CmpConv.CmpBB);
863 
864  // If code size is the main concern
865  if (MinSize) {
866  int CodeSizeDelta = CmpConv.expectedCodeSizeDelta();
867  LLVM_DEBUG(dbgs() << "Code size delta: " << CodeSizeDelta << '\n');
868  // If we are minimizing the code size, do the conversion whatever
869  // the cost is.
870  if (CodeSizeDelta < 0)
871  return true;
872  if (CodeSizeDelta > 0) {
873  LLVM_DEBUG(dbgs() << "Code size is increasing, give up on this one.\n");
874  return false;
875  }
876  // CodeSizeDelta == 0, continue with the regular heuristics
877  }
878 
879  // Heuristic: The compare conversion delays the execution of the branch
880  // instruction because we must wait for the inputs to the second compare as
881  // well. The branch has no dependent instructions, but delaying it increases
882  // the cost of a misprediction.
883  //
884  // Set a limit on the delay we will accept.
885  unsigned DelayLimit = SchedModel.MispredictPenalty * 3 / 4;
886 
887  // Instruction depths can be computed for all trace instructions above CmpBB.
888  unsigned HeadDepth =
889  Trace.getInstrCycles(*CmpConv.Head->getFirstTerminator()).Depth;
890  unsigned CmpBBDepth =
891  Trace.getInstrCycles(*CmpConv.CmpBB->getFirstTerminator()).Depth;
892  LLVM_DEBUG(dbgs() << "Head depth: " << HeadDepth
893  << "\nCmpBB depth: " << CmpBBDepth << '\n');
894  if (CmpBBDepth > HeadDepth + DelayLimit) {
895  LLVM_DEBUG(dbgs() << "Branch delay would be larger than " << DelayLimit
896  << " cycles.\n");
897  return false;
898  }
899 
900  // Check the resource depth at the bottom of CmpBB - these instructions will
901  // be speculated.
902  unsigned ResDepth = Trace.getResourceDepth(true);
903  LLVM_DEBUG(dbgs() << "Resources: " << ResDepth << '\n');
904 
905  // Heuristic: The speculatively executed instructions must all be able to
906  // merge into the Head block. The Head critical path should dominate the
907  // resource cost of the speculated instructions.
908  if (ResDepth > HeadDepth) {
909  LLVM_DEBUG(dbgs() << "Too many instructions to speculate.\n");
910  return false;
911  }
912  return true;
913 }
914 
915 bool AArch64ConditionalCompares::tryConvert(MachineBasicBlock *MBB) {
916  bool Changed = false;
917  while (CmpConv.canConvert(MBB) && shouldConvert()) {
918  invalidateTraces();
920  CmpConv.convert(RemovedBlocks);
921  Changed = true;
922  updateDomTree(RemovedBlocks);
923  updateLoops(RemovedBlocks);
924  }
925  return Changed;
926 }
927 
928 bool AArch64ConditionalCompares::runOnMachineFunction(MachineFunction &MF) {
929  LLVM_DEBUG(dbgs() << "********** AArch64 Conditional Compares **********\n"
930  << "********** Function: " << MF.getName() << '\n');
931  if (skipFunction(MF.getFunction()))
932  return false;
933 
934  TII = MF.getSubtarget().getInstrInfo();
935  TRI = MF.getSubtarget().getRegisterInfo();
936  SchedModel = MF.getSubtarget().getSchedModel();
937  MRI = &MF.getRegInfo();
938  DomTree = &getAnalysis<MachineDominatorTree>();
939  Loops = getAnalysisIfAvailable<MachineLoopInfo>();
940  MBPI = &getAnalysis<MachineBranchProbabilityInfo>();
941  Traces = &getAnalysis<MachineTraceMetrics>();
942  MinInstr = nullptr;
943  MinSize = MF.getFunction().hasMinSize();
944 
945  bool Changed = false;
946  CmpConv.runOnMachineFunction(MF, MBPI);
947 
948  // Visit blocks in dominator tree pre-order. The pre-order enables multiple
949  // cmp-conversions from the same head block.
950  // Note that updateDomTree() modifies the children of the DomTree node
951  // currently being visited. The df_iterator supports that; it doesn't look at
952  // child_begin() / child_end() until after a node has been visited.
953  for (auto *I : depth_first(DomTree))
954  if (tryConvert(I->getBlock()))
955  Changed = true;
956 
957  return Changed;
958 }
Pass interface - Implemented by all &#39;passes&#39;.
Definition: Pass.h:80
const MachineInstrBuilder & add(const MachineOperand &MO) const
bool hasSuccessorProbabilities() const
Return true if any of the successors have probabilities attached to them.
AnalysisUsage & addPreserved()
Add the specified Pass class to the set of analyses preserved by this pass.
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
bool use_nodbg_empty(unsigned RegNo) const
use_nodbg_empty - Return true if there are no non-Debug instructions using the specified register...
This class represents lattice values for constants.
Definition: AllocatorList.h:23
Register createVirtualRegister(const TargetRegisterClass *RegClass, StringRef Name="")
createVirtualRegister - Create and return a new virtual register in the function with the specified r...
virtual const TargetRegisterInfo * getRegisterInfo() const
getRegisterInfo - If register information is available, return it.
const DebugLoc & getDebugLoc() const
Returns the debug location id of this MachineInstr.
Definition: MachineInstr.h:384
static CondCode getInvertedCondCode(CondCode Code)
iterator getFirstTerminator()
Returns an iterator to the first terminator instruction of this basic block.
Describe properties that are true of each instruction in the target description file.
Definition: MCInstrDesc.h:179
void transferSuccessorsAndUpdatePHIs(MachineBasicBlock *FromMBB)
Transfers all the successors, as in transferSuccessors, and update PHI operands in the successor bloc...
unsigned Reg
STATISTIC(NumFunctions, "Total number of functions")
unsigned const TargetRegisterInfo * TRI
A debug info location.
Definition: DebugLoc.h:33
A trace ensemble is a collection of traces selected using the same strategy, for example &#39;minimum res...
FunctionPass * createAArch64ConditionalCompares()
static unsigned InstrCount
bool isPHI() const
virtual unsigned removeBranch(MachineBasicBlock &MBB, int *BytesRemoved=nullptr) const
Remove the branching code at the end of the specific MBB.
static cl::opt< unsigned > BlockInstrLimit("aarch64-ccmp-limit", cl::init(30), cl::Hidden, cl::desc("Maximum number of instructions per speculated block."))
AnalysisUsage & addRequired()
static cl::opt< bool > Stress("aarch64-stress-ccmp", cl::Hidden, cl::desc("Turn all knobs to 11"))
#define INITIALIZE_PASS_DEPENDENCY(depName)
Definition: PassSupport.h:50
Hexagon Hardware Loops
void eraseFromParent()
This method unlinks &#39;this&#39; from the containing function and deletes it.
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
const HexagonInstrInfo * TII
Printable printMBBReference(const MachineBasicBlock &MBB)
Prints a machine basic block reference.
virtual const TargetRegisterClass * getRegClass(const MCInstrDesc &MCID, unsigned OpNum, const TargetRegisterInfo *TRI, const MachineFunction &MF) const
Given a machine instruction descriptor, returns the register class constraint for OpNum...
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: APFloat.h:41
void eraseFromParent()
Unlink &#39;this&#39; from the containing basic block and delete it.
static bool shouldConvert(Constant &C, AArch64PromoteConstant::PromotionCacheTy &PromotionCache)
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
Definition: MachineInstr.h:410
static const char * getCondCodeName(CondCode Code)
PhysRegInfo analyzePhysReg(unsigned Reg, const TargetRegisterInfo *TRI)
analyzePhysReg - Analyze how the current instruction or bundle uses a physical register.
Select the trace through a block that has the fewest instructions.
Base class for the actual dominator tree node.
Definition: LiveRangeCalc.h:37
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory)...
Definition: APInt.h:32
const std::vector< DomTreeNodeBase * > & getChildren() const
virtual const TargetInstrInfo * getInstrInfo() const
void setSuccProbability(succ_iterator I, BranchProbability Prob)
Set successor probability of a given iterator.
const TargetRegisterClass * constrainRegClass(unsigned Reg, const TargetRegisterClass *RC, unsigned MinNumRegs=0)
constrainRegClass - Constrain the register class of the specified virtual register to be a common sub...
StringRef getName() const
getName - Return the name of the corresponding LLVM function.
TargetInstrInfo - Interface to description of machine instruction set.
MachineInstrBuilder BuildMI(MachineFunction &MF, const DebugLoc &DL, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:432
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
unsigned const MachineRegisterInfo * MRI
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
size_t size() const
size - Get the array size.
Definition: ArrayRef.h:148
DomTreeNodeBase * getIDom() const
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
InstrCycles getInstrCycles(const MachineInstr &MI) const
Return the depth and height of MI.
Represent the analysis usage information of a pass.
FunctionPass class - This class is used to implement most global optimizations.
Definition: Pass.h:284
virtual bool analyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB, MachineBasicBlock *&FBB, SmallVectorImpl< MachineOperand > &Cond, bool AllowModify=false) const
Analyze the branching code at the end of MBB, returning true if it cannot be understood (e...
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
A trace represents a plausible sequence of executed basic blocks that passes through the current basi...
INITIALIZE_PASS_END(RegBankSelect, DEBUG_TYPE, "Assign register bank of generic virtual registers", false, false) RegBankSelect
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
static uint64_t add(uint64_t LeftOp, uint64_t RightOp)
Definition: FileCheck.cpp:215
This is a &#39;vector&#39; (really, a variable-sized array), optimized for the case when the array is small...
Definition: SmallVector.h:837
size_t getNumChildren() const
void updateTerminator()
Update the terminator instructions in block to account for changes to the layout. ...
static bool parseCond(ArrayRef< MachineOperand > Cond, AArch64CC::CondCode &CC)
MIOperands - Iterate over operands of a single instruction.
unsigned pred_size() const
const Function & getFunction() const
Return the LLVM function that this machine code represents.
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:132
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:940
static unsigned getNZCVToSatisfyCondCode(CondCode Code)
Given a condition code, return NZCV flags that would satisfy that condition.
static unsigned getReg(const void *D, unsigned RC, unsigned RegNo)
unsigned succ_size() const
MachineRegisterInfo - Keep track of information for virtual and physical registers, including vreg register classes, use/def chains for registers, etc.
BranchProbability getEdgeProbability(const MachineBasicBlock *Src, const MachineBasicBlock *Dst) const
Representation of each machine instruction.
Definition: MachineInstr.h:63
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
void splice(iterator Where, MachineBasicBlock *Other, iterator From)
Take an instruction from MBB &#39;Other&#39; at the position From, and insert it into this MBB right before &#39;...
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
LLVM_NODISCARD bool empty() const
Definition: SmallVector.h:55
const MCInstrDesc & get(unsigned Opcode) const
Return the machine instruction descriptor that corresponds to the specified instruction opcode...
Definition: MCInstrInfo.h:44
INITIALIZE_PASS_BEGIN(AArch64ConditionalCompares, "aarch64-ccmp", "AArch64 CCMP Pass", false, false) INITIALIZE_PASS_END(AArch64ConditionalCompares
#define I(x, y, z)
Definition: MD5.cpp:58
Tail - This calling convention attemps to make calls as fast as possible while guaranteeing that tail...
Definition: CallingConv.h:81
bool hasMinSize() const
Optimize this function for minimum size (-Oz).
Definition: Function.h:624
bool isSuccessor(const MachineBasicBlock *MBB) const
Return true if the specified MBB is a successor of this block.
void initializeAArch64ConditionalComparesPass(PassRegistry &)
unsigned getResourceDepth(bool Bottom) const
Return the resource depth of the top/bottom of the trace center block.
void removeSuccessor(MachineBasicBlock *Succ, bool NormalizeSuccProbs=false)
Remove successor from the successors list of this MachineBasicBlock.
bool isReg() const
isReg - Tests if this is a MO_Register operand.
iterator_range< df_iterator< T > > depth_first(const T &G)
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
static bool isVirtualRegister(unsigned Reg)
Return true if the specified register number is in the virtual register namespace.
Definition: Register.h:69
#define LLVM_FALLTHROUGH
LLVM_FALLTHROUGH - Mark fallthrough cases in switch statements.
Definition: Compiler.h:273
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:48
Register getReg() const
getReg - Returns the register number.
#define LLVM_DEBUG(X)
Definition: Debug.h:122
const MachineOperand & getOperand(unsigned i) const
Definition: MachineInstr.h:415
Machine model for scheduling, bundling, and heuristics.
Definition: MCSchedule.h:244
DominatorTree Class - Concrete subclass of DominatorTreeBase that is used to compute a normal dominat...
const MCSchedModel & getSchedModel() const
Get the machine model for this subtarget&#39;s CPU.
Wrapper class representing virtual and physical registers.
Definition: Register.h:19