LLVM 17.0.0git
AArch64ConditionalCompares.cpp
Go to the documentation of this file.
1//===-- AArch64ConditionalCompares.cpp --- CCMP formation for AArch64 -----===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file implements the AArch64ConditionalCompares pass which reduces
10// branching and code size by using the conditional compare instructions CCMP,
11// CCMN, and FCMP.
12//
13// The CFG transformations for forming conditional compares are very similar to
14// if-conversion, and this pass should run immediately before the early
15// if-conversion pass.
16//
17//===----------------------------------------------------------------------===//
18
19#include "AArch64.h"
22#include "llvm/ADT/Statistic.h"
31#include "llvm/CodeGen/Passes.h"
37#include "llvm/Support/Debug.h"
39
40using namespace llvm;
41
42#define DEBUG_TYPE "aarch64-ccmp"
43
44// Absolute maximum number of instructions allowed per speculated block.
45// This bypasses all other heuristics, so it should be set fairly high.
47 "aarch64-ccmp-limit", cl::init(30), cl::Hidden,
48 cl::desc("Maximum number of instructions per speculated block."));
49
50// Stress testing mode - disable heuristics.
51static cl::opt<bool> Stress("aarch64-stress-ccmp", cl::Hidden,
52 cl::desc("Turn all knobs to 11"));
53
54STATISTIC(NumConsidered, "Number of ccmps considered");
55STATISTIC(NumPhiRejs, "Number of ccmps rejected (PHI)");
56STATISTIC(NumPhysRejs, "Number of ccmps rejected (Physregs)");
57STATISTIC(NumPhi2Rejs, "Number of ccmps rejected (PHI2)");
58STATISTIC(NumHeadBranchRejs, "Number of ccmps rejected (Head branch)");
59STATISTIC(NumCmpBranchRejs, "Number of ccmps rejected (CmpBB branch)");
60STATISTIC(NumCmpTermRejs, "Number of ccmps rejected (CmpBB is cbz...)");
61STATISTIC(NumImmRangeRejs, "Number of ccmps rejected (Imm out of range)");
62STATISTIC(NumLiveDstRejs, "Number of ccmps rejected (Cmp dest live)");
63STATISTIC(NumMultNZCVUses, "Number of ccmps rejected (NZCV used)");
64STATISTIC(NumUnknNZCVDefs, "Number of ccmps rejected (NZCV def unknown)");
65
66STATISTIC(NumSpeculateRejs, "Number of ccmps rejected (Can't speculate)");
67
68STATISTIC(NumConverted, "Number of ccmp instructions created");
69STATISTIC(NumCompBranches, "Number of cbz/cbnz branches converted");
70
71//===----------------------------------------------------------------------===//
72// SSACCmpConv
73//===----------------------------------------------------------------------===//
74//
75// The SSACCmpConv class performs ccmp-conversion on SSA form machine code
76// after determining if it is possible. The class contains no heuristics;
77// external code should be used to determine when ccmp-conversion is a good
78// idea.
79//
80// CCmp-formation works on a CFG representing chained conditions, typically
81// from C's short-circuit || and && operators:
82//
83// From: Head To: Head
84// / | CmpBB
85// / | / |
86// | CmpBB / |
87// | / | Tail |
88// | / | | |
89// Tail | | |
90// | | | |
91// ... ... ... ...
92//
93// The Head block is terminated by a br.cond instruction, and the CmpBB block
94// contains compare + br.cond. Tail must be a successor of both.
95//
96// The cmp-conversion turns the compare instruction in CmpBB into a conditional
97// compare, and merges CmpBB into Head, speculatively executing its
98// instructions. The AArch64 conditional compare instructions have an immediate
99// operand that specifies the NZCV flag values when the condition is false and
100// the compare isn't executed. This makes it possible to chain compares with
101// different condition codes.
102//
103// Example:
104//
105// if (a == 5 || b == 17)
106// foo();
107//
108// Head:
109// cmp w0, #5
110// b.eq Tail
111// CmpBB:
112// cmp w1, #17
113// b.eq Tail
114// ...
115// Tail:
116// bl _foo
117//
118// Becomes:
119//
120// Head:
121// cmp w0, #5
122// ccmp w1, #17, 4, ne ; 4 = nZcv
123// b.eq Tail
124// ...
125// Tail:
126// bl _foo
127//
128// The ccmp condition code is the one that would cause the Head terminator to
129// branch to CmpBB.
130//
131// FIXME: It should also be possible to speculate a block on the critical edge
132// between Head and Tail, just like if-converting a diamond.
133//
134// FIXME: Handle PHIs in Tail by turning them into selects (if-conversion).
135
136namespace {
137class SSACCmpConv {
138 MachineFunction *MF;
139 const TargetInstrInfo *TII;
140 const TargetRegisterInfo *TRI;
143
144public:
145 /// The first block containing a conditional branch, dominating everything
146 /// else.
147 MachineBasicBlock *Head;
148
149 /// The block containing cmp+br.cond with a successor shared with Head.
150 MachineBasicBlock *CmpBB;
151
152 /// The common successor for Head and CmpBB.
154
155 /// The compare instruction in CmpBB that can be converted to a ccmp.
156 MachineInstr *CmpMI;
157
158private:
159 /// The branch condition in Head as determined by analyzeBranch.
161
162 /// The condition code that makes Head branch to CmpBB.
163 AArch64CC::CondCode HeadCmpBBCC;
164
165 /// The branch condition in CmpBB.
167
168 /// The condition code that makes CmpBB branch to Tail.
169 AArch64CC::CondCode CmpBBTailCC;
170
171 /// Check if the Tail PHIs are trivially convertible.
172 bool trivialTailPHIs();
173
174 /// Remove CmpBB from the Tail PHIs.
175 void updateTailPHIs();
176
177 /// Check if an operand defining DstReg is dead.
178 bool isDeadDef(unsigned DstReg);
179
180 /// Find the compare instruction in MBB that controls the conditional branch.
181 /// Return NULL if a convertible instruction can't be found.
182 MachineInstr *findConvertibleCompare(MachineBasicBlock *MBB);
183
184 /// Return true if all non-terminator instructions in MBB can be safely
185 /// speculated.
186 bool canSpeculateInstrs(MachineBasicBlock *MBB, const MachineInstr *CmpMI);
187
188public:
189 /// runOnMachineFunction - Initialize per-function data structures.
190 void runOnMachineFunction(MachineFunction &MF,
191 const MachineBranchProbabilityInfo *MBPI) {
192 this->MF = &MF;
193 this->MBPI = MBPI;
196 MRI = &MF.getRegInfo();
197 }
198
199 /// If the sub-CFG headed by MBB can be cmp-converted, initialize the
200 /// internal state, and return true.
201 bool canConvert(MachineBasicBlock *MBB);
202
203 /// Cmo-convert the last block passed to canConvertCmp(), assuming
204 /// it is possible. Add any erased blocks to RemovedBlocks.
206
207 /// Return the expected code size delta if the conversion into a
208 /// conditional compare is performed.
209 int expectedCodeSizeDelta() const;
210};
211} // end anonymous namespace
212
213// Check that all PHIs in Tail are selecting the same value from Head and CmpBB.
214// This means that no if-conversion is required when merging CmpBB into Head.
215bool SSACCmpConv::trivialTailPHIs() {
216 for (auto &I : *Tail) {
217 if (!I.isPHI())
218 break;
219 unsigned HeadReg = 0, CmpBBReg = 0;
220 // PHI operands come in (VReg, MBB) pairs.
221 for (unsigned oi = 1, oe = I.getNumOperands(); oi != oe; oi += 2) {
222 MachineBasicBlock *MBB = I.getOperand(oi + 1).getMBB();
223 Register Reg = I.getOperand(oi).getReg();
224 if (MBB == Head) {
225 assert((!HeadReg || HeadReg == Reg) && "Inconsistent PHI operands");
226 HeadReg = Reg;
227 }
228 if (MBB == CmpBB) {
229 assert((!CmpBBReg || CmpBBReg == Reg) && "Inconsistent PHI operands");
230 CmpBBReg = Reg;
231 }
232 }
233 if (HeadReg != CmpBBReg)
234 return false;
235 }
236 return true;
237}
238
239// Assuming that trivialTailPHIs() is true, update the Tail PHIs by simply
240// removing the CmpBB operands. The Head operands will be identical.
241void SSACCmpConv::updateTailPHIs() {
242 for (auto &I : *Tail) {
243 if (!I.isPHI())
244 break;
245 // I is a PHI. It can have multiple entries for CmpBB.
246 for (unsigned oi = I.getNumOperands(); oi > 2; oi -= 2) {
247 // PHI operands are (Reg, MBB) at (oi-2, oi-1).
248 if (I.getOperand(oi - 1).getMBB() == CmpBB) {
249 I.removeOperand(oi - 1);
250 I.removeOperand(oi - 2);
251 }
252 }
253 }
254}
255
256// This pass runs before the AArch64DeadRegisterDefinitions pass, so compares
257// are still writing virtual registers without any uses.
258bool SSACCmpConv::isDeadDef(unsigned DstReg) {
259 // Writes to the zero register are dead.
260 if (DstReg == AArch64::WZR || DstReg == AArch64::XZR)
261 return true;
262 if (!Register::isVirtualRegister(DstReg))
263 return false;
264 // A virtual register def without any uses will be marked dead later, and
265 // eventually replaced by the zero register.
266 return MRI->use_nodbg_empty(DstReg);
267}
268
269// Parse a condition code returned by analyzeBranch, and compute the CondCode
270// corresponding to TBB.
271// Return
273 // A normal br.cond simply has the condition code.
274 if (Cond[0].getImm() != -1) {
275 assert(Cond.size() == 1 && "Unknown Cond array format");
276 CC = (AArch64CC::CondCode)(int)Cond[0].getImm();
277 return true;
278 }
279 // For tbz and cbz instruction, the opcode is next.
280 switch (Cond[1].getImm()) {
281 default:
282 // This includes tbz / tbnz branches which can't be converted to
283 // ccmp + br.cond.
284 return false;
285 case AArch64::CBZW:
286 case AArch64::CBZX:
287 assert(Cond.size() == 3 && "Unknown Cond array format");
289 return true;
290 case AArch64::CBNZW:
291 case AArch64::CBNZX:
292 assert(Cond.size() == 3 && "Unknown Cond array format");
294 return true;
295 }
296}
297
298MachineInstr *SSACCmpConv::findConvertibleCompare(MachineBasicBlock *MBB) {
300 if (I == MBB->end())
301 return nullptr;
302 // The terminator must be controlled by the flags.
303 if (!I->readsRegister(AArch64::NZCV)) {
304 switch (I->getOpcode()) {
305 case AArch64::CBZW:
306 case AArch64::CBZX:
307 case AArch64::CBNZW:
308 case AArch64::CBNZX:
309 // These can be converted into a ccmp against #0.
310 return &*I;
311 }
312 ++NumCmpTermRejs;
313 LLVM_DEBUG(dbgs() << "Flags not used by terminator: " << *I);
314 return nullptr;
315 }
316
317 // Now find the instruction controlling the terminator.
318 for (MachineBasicBlock::iterator B = MBB->begin(); I != B;) {
319 I = prev_nodbg(I, MBB->begin());
320 assert(!I->isTerminator() && "Spurious terminator");
321 switch (I->getOpcode()) {
322 // cmp is an alias for subs with a dead destination register.
323 case AArch64::SUBSWri:
324 case AArch64::SUBSXri:
325 // cmn is an alias for adds with a dead destination register.
326 case AArch64::ADDSWri:
327 case AArch64::ADDSXri:
328 // Check that the immediate operand is within range, ccmp wants a uimm5.
329 // Rd = SUBSri Rn, imm, shift
330 if (I->getOperand(3).getImm() || !isUInt<5>(I->getOperand(2).getImm())) {
331 LLVM_DEBUG(dbgs() << "Immediate out of range for ccmp: " << *I);
332 ++NumImmRangeRejs;
333 return nullptr;
334 }
335 [[fallthrough]];
336 case AArch64::SUBSWrr:
337 case AArch64::SUBSXrr:
338 case AArch64::ADDSWrr:
339 case AArch64::ADDSXrr:
340 if (isDeadDef(I->getOperand(0).getReg()))
341 return &*I;
342 LLVM_DEBUG(dbgs() << "Can't convert compare with live destination: "
343 << *I);
344 ++NumLiveDstRejs;
345 return nullptr;
346 case AArch64::FCMPSrr:
347 case AArch64::FCMPDrr:
348 case AArch64::FCMPESrr:
349 case AArch64::FCMPEDrr:
350 return &*I;
351 }
352
353 // Check for flag reads and clobbers.
354 PhysRegInfo PRI = AnalyzePhysRegInBundle(*I, AArch64::NZCV, TRI);
355
356 if (PRI.Read) {
357 // The ccmp doesn't produce exactly the same flags as the original
358 // compare, so reject the transform if there are uses of the flags
359 // besides the terminators.
360 LLVM_DEBUG(dbgs() << "Can't create ccmp with multiple uses: " << *I);
361 ++NumMultNZCVUses;
362 return nullptr;
363 }
364
365 if (PRI.Defined || PRI.Clobbered) {
366 LLVM_DEBUG(dbgs() << "Not convertible compare: " << *I);
367 ++NumUnknNZCVDefs;
368 return nullptr;
369 }
370 }
371 LLVM_DEBUG(dbgs() << "Flags not defined in " << printMBBReference(*MBB)
372 << '\n');
373 return nullptr;
374}
375
376/// Determine if all the instructions in MBB can safely
377/// be speculated. The terminators are not considered.
378///
379/// Only CmpMI is allowed to clobber the flags.
380///
381bool SSACCmpConv::canSpeculateInstrs(MachineBasicBlock *MBB,
382 const MachineInstr *CmpMI) {
383 // Reject any live-in physregs. It's probably NZCV/EFLAGS, and very hard to
384 // get right.
385 if (!MBB->livein_empty()) {
386 LLVM_DEBUG(dbgs() << printMBBReference(*MBB) << " has live-ins.\n");
387 return false;
388 }
389
390 unsigned InstrCount = 0;
391
392 // Check all instructions, except the terminators. It is assumed that
393 // terminators never have side effects or define any used register values.
394 for (auto &I : make_range(MBB->begin(), MBB->getFirstTerminator())) {
395 if (I.isDebugInstr())
396 continue;
397
398 if (++InstrCount > BlockInstrLimit && !Stress) {
399 LLVM_DEBUG(dbgs() << printMBBReference(*MBB) << " has more than "
400 << BlockInstrLimit << " instructions.\n");
401 return false;
402 }
403
404 // There shouldn't normally be any phis in a single-predecessor block.
405 if (I.isPHI()) {
406 LLVM_DEBUG(dbgs() << "Can't hoist: " << I);
407 return false;
408 }
409
410 // Don't speculate loads. Note that it may be possible and desirable to
411 // speculate GOT or constant pool loads that are guaranteed not to trap,
412 // but we don't support that for now.
413 if (I.mayLoad()) {
414 LLVM_DEBUG(dbgs() << "Won't speculate load: " << I);
415 return false;
416 }
417
418 // We never speculate stores, so an AA pointer isn't necessary.
419 bool DontMoveAcrossStore = true;
420 if (!I.isSafeToMove(nullptr, DontMoveAcrossStore)) {
421 LLVM_DEBUG(dbgs() << "Can't speculate: " << I);
422 return false;
423 }
424
425 // Only CmpMI is allowed to clobber the flags.
426 if (&I != CmpMI && I.modifiesRegister(AArch64::NZCV, TRI)) {
427 LLVM_DEBUG(dbgs() << "Clobbers flags: " << I);
428 return false;
429 }
430 }
431 return true;
432}
433
434/// Analyze the sub-cfg rooted in MBB, and return true if it is a potential
435/// candidate for cmp-conversion. Fill out the internal state.
436///
437bool SSACCmpConv::canConvert(MachineBasicBlock *MBB) {
438 Head = MBB;
439 Tail = CmpBB = nullptr;
440
441 if (Head->succ_size() != 2)
442 return false;
443 MachineBasicBlock *Succ0 = Head->succ_begin()[0];
444 MachineBasicBlock *Succ1 = Head->succ_begin()[1];
445
446 // CmpBB can only have a single predecessor. Tail is allowed many.
447 if (Succ0->pred_size() != 1)
448 std::swap(Succ0, Succ1);
449
450 // Succ0 is our candidate for CmpBB.
451 if (Succ0->pred_size() != 1 || Succ0->succ_size() != 2)
452 return false;
453
454 CmpBB = Succ0;
455 Tail = Succ1;
456
457 if (!CmpBB->isSuccessor(Tail))
458 return false;
459
460 // The CFG topology checks out.
461 LLVM_DEBUG(dbgs() << "\nTriangle: " << printMBBReference(*Head) << " -> "
462 << printMBBReference(*CmpBB) << " -> "
463 << printMBBReference(*Tail) << '\n');
464 ++NumConsidered;
465
466 // Tail is allowed to have many predecessors, but we can't handle PHIs yet.
467 //
468 // FIXME: Real PHIs could be if-converted as long as the CmpBB values are
469 // defined before The CmpBB cmp clobbers the flags. Alternatively, it should
470 // always be safe to sink the ccmp down to immediately before the CmpBB
471 // terminators.
472 if (!trivialTailPHIs()) {
473 LLVM_DEBUG(dbgs() << "Can't handle phis in Tail.\n");
474 ++NumPhiRejs;
475 return false;
476 }
477
478 if (!Tail->livein_empty()) {
479 LLVM_DEBUG(dbgs() << "Can't handle live-in physregs in Tail.\n");
480 ++NumPhysRejs;
481 return false;
482 }
483
484 // CmpBB should never have PHIs since Head is its only predecessor.
485 // FIXME: Clean them up if it happens.
486 if (!CmpBB->empty() && CmpBB->front().isPHI()) {
487 LLVM_DEBUG(dbgs() << "Can't handle phis in CmpBB.\n");
488 ++NumPhi2Rejs;
489 return false;
490 }
491
492 if (!CmpBB->livein_empty()) {
493 LLVM_DEBUG(dbgs() << "Can't handle live-in physregs in CmpBB.\n");
494 ++NumPhysRejs;
495 return false;
496 }
497
498 // The branch we're looking to eliminate must be analyzable.
499 HeadCond.clear();
500 MachineBasicBlock *TBB = nullptr, *FBB = nullptr;
501 if (TII->analyzeBranch(*Head, TBB, FBB, HeadCond)) {
502 LLVM_DEBUG(dbgs() << "Head branch not analyzable.\n");
503 ++NumHeadBranchRejs;
504 return false;
505 }
506
507 // This is weird, probably some sort of degenerate CFG, or an edge to a
508 // landing pad.
509 if (!TBB || HeadCond.empty()) {
511 dbgs() << "analyzeBranch didn't find conditional branch in Head.\n");
512 ++NumHeadBranchRejs;
513 return false;
514 }
515
516 if (!parseCond(HeadCond, HeadCmpBBCC)) {
517 LLVM_DEBUG(dbgs() << "Unsupported branch type on Head\n");
518 ++NumHeadBranchRejs;
519 return false;
520 }
521
522 // Make sure the branch direction is right.
523 if (TBB != CmpBB) {
524 assert(TBB == Tail && "Unexpected TBB");
525 HeadCmpBBCC = AArch64CC::getInvertedCondCode(HeadCmpBBCC);
526 }
527
528 CmpBBCond.clear();
529 TBB = FBB = nullptr;
530 if (TII->analyzeBranch(*CmpBB, TBB, FBB, CmpBBCond)) {
531 LLVM_DEBUG(dbgs() << "CmpBB branch not analyzable.\n");
532 ++NumCmpBranchRejs;
533 return false;
534 }
535
536 if (!TBB || CmpBBCond.empty()) {
538 dbgs() << "analyzeBranch didn't find conditional branch in CmpBB.\n");
539 ++NumCmpBranchRejs;
540 return false;
541 }
542
543 if (!parseCond(CmpBBCond, CmpBBTailCC)) {
544 LLVM_DEBUG(dbgs() << "Unsupported branch type on CmpBB\n");
545 ++NumCmpBranchRejs;
546 return false;
547 }
548
549 if (TBB != Tail)
550 CmpBBTailCC = AArch64CC::getInvertedCondCode(CmpBBTailCC);
551
552 LLVM_DEBUG(dbgs() << "Head->CmpBB on "
553 << AArch64CC::getCondCodeName(HeadCmpBBCC)
554 << ", CmpBB->Tail on "
555 << AArch64CC::getCondCodeName(CmpBBTailCC) << '\n');
556
557 CmpMI = findConvertibleCompare(CmpBB);
558 if (!CmpMI)
559 return false;
560
561 if (!canSpeculateInstrs(CmpBB, CmpMI)) {
562 ++NumSpeculateRejs;
563 return false;
564 }
565 return true;
566}
567
568void SSACCmpConv::convert(SmallVectorImpl<MachineBasicBlock *> &RemovedBlocks) {
569 LLVM_DEBUG(dbgs() << "Merging " << printMBBReference(*CmpBB) << " into "
570 << printMBBReference(*Head) << ":\n"
571 << *CmpBB);
572
573 // All CmpBB instructions are moved into Head, and CmpBB is deleted.
574 // Update the CFG first.
575 updateTailPHIs();
576
577 // Save successor probabilties before removing CmpBB and Tail from their
578 // parents.
579 BranchProbability Head2CmpBB = MBPI->getEdgeProbability(Head, CmpBB);
580 BranchProbability CmpBB2Tail = MBPI->getEdgeProbability(CmpBB, Tail);
581
582 Head->removeSuccessor(CmpBB);
583 CmpBB->removeSuccessor(Tail);
584
585 // If Head and CmpBB had successor probabilties, udpate the probabilities to
586 // reflect the ccmp-conversion.
588
589 // Head is allowed two successors. We've removed CmpBB, so the remaining
590 // successor is Tail. We need to increase the successor probability for
591 // Tail to account for the CmpBB path we removed.
592 //
593 // Pr(Tail|Head) += Pr(CmpBB|Head) * Pr(Tail|CmpBB).
594 assert(*Head->succ_begin() == Tail && "Head successor is not Tail");
595 BranchProbability Head2Tail = MBPI->getEdgeProbability(Head, Tail);
596 Head->setSuccProbability(Head->succ_begin(),
597 Head2Tail + Head2CmpBB * CmpBB2Tail);
598
599 // We will transfer successors of CmpBB to Head in a moment without
600 // normalizing the successor probabilities. Set the successor probabilites
601 // before doing so.
602 //
603 // Pr(I|Head) = Pr(CmpBB|Head) * Pr(I|CmpBB).
604 for (auto I = CmpBB->succ_begin(), E = CmpBB->succ_end(); I != E; ++I) {
605 BranchProbability CmpBB2I = MBPI->getEdgeProbability(CmpBB, *I);
606 CmpBB->setSuccProbability(I, Head2CmpBB * CmpBB2I);
607 }
608 }
609
611 DebugLoc TermDL = Head->getFirstTerminator()->getDebugLoc();
612 TII->removeBranch(*Head);
613
614 // If the Head terminator was one of the cbz / tbz branches with built-in
615 // compare, we need to insert an explicit compare instruction in its place.
616 if (HeadCond[0].getImm() == -1) {
617 ++NumCompBranches;
618 unsigned Opc = 0;
619 switch (HeadCond[1].getImm()) {
620 case AArch64::CBZW:
621 case AArch64::CBNZW:
622 Opc = AArch64::SUBSWri;
623 break;
624 case AArch64::CBZX:
625 case AArch64::CBNZX:
626 Opc = AArch64::SUBSXri;
627 break;
628 default:
629 llvm_unreachable("Cannot convert Head branch");
630 }
631 const MCInstrDesc &MCID = TII->get(Opc);
632 // Create a dummy virtual register for the SUBS def.
633 Register DestReg =
634 MRI->createVirtualRegister(TII->getRegClass(MCID, 0, TRI, *MF));
635 // Insert a SUBS Rn, #0 instruction instead of the cbz / cbnz.
636 BuildMI(*Head, Head->end(), TermDL, MCID)
638 .add(HeadCond[2])
639 .addImm(0)
640 .addImm(0);
641 // SUBS uses the GPR*sp register classes.
642 MRI->constrainRegClass(HeadCond[2].getReg(),
643 TII->getRegClass(MCID, 1, TRI, *MF));
644 }
645
646 Head->splice(Head->end(), CmpBB, CmpBB->begin(), CmpBB->end());
647
648 // Now replace CmpMI with a ccmp instruction that also considers the incoming
649 // flags.
650 unsigned Opc = 0;
651 unsigned FirstOp = 1; // First CmpMI operand to copy.
652 bool isZBranch = false; // CmpMI is a cbz/cbnz instruction.
653 switch (CmpMI->getOpcode()) {
654 default:
655 llvm_unreachable("Unknown compare opcode");
656 case AArch64::SUBSWri: Opc = AArch64::CCMPWi; break;
657 case AArch64::SUBSWrr: Opc = AArch64::CCMPWr; break;
658 case AArch64::SUBSXri: Opc = AArch64::CCMPXi; break;
659 case AArch64::SUBSXrr: Opc = AArch64::CCMPXr; break;
660 case AArch64::ADDSWri: Opc = AArch64::CCMNWi; break;
661 case AArch64::ADDSWrr: Opc = AArch64::CCMNWr; break;
662 case AArch64::ADDSXri: Opc = AArch64::CCMNXi; break;
663 case AArch64::ADDSXrr: Opc = AArch64::CCMNXr; break;
664 case AArch64::FCMPSrr: Opc = AArch64::FCCMPSrr; FirstOp = 0; break;
665 case AArch64::FCMPDrr: Opc = AArch64::FCCMPDrr; FirstOp = 0; break;
666 case AArch64::FCMPESrr: Opc = AArch64::FCCMPESrr; FirstOp = 0; break;
667 case AArch64::FCMPEDrr: Opc = AArch64::FCCMPEDrr; FirstOp = 0; break;
668 case AArch64::CBZW:
669 case AArch64::CBNZW:
670 Opc = AArch64::CCMPWi;
671 FirstOp = 0;
672 isZBranch = true;
673 break;
674 case AArch64::CBZX:
675 case AArch64::CBNZX:
676 Opc = AArch64::CCMPXi;
677 FirstOp = 0;
678 isZBranch = true;
679 break;
680 }
681
682 // The ccmp instruction should set the flags according to the comparison when
683 // Head would have branched to CmpBB.
684 // The NZCV immediate operand should provide flags for the case where Head
685 // would have branched to Tail. These flags should cause the new Head
686 // terminator to branch to tail.
687 unsigned NZCV = AArch64CC::getNZCVToSatisfyCondCode(CmpBBTailCC);
688 const MCInstrDesc &MCID = TII->get(Opc);
689 MRI->constrainRegClass(CmpMI->getOperand(FirstOp).getReg(),
690 TII->getRegClass(MCID, 0, TRI, *MF));
691 if (CmpMI->getOperand(FirstOp + 1).isReg())
692 MRI->constrainRegClass(CmpMI->getOperand(FirstOp + 1).getReg(),
693 TII->getRegClass(MCID, 1, TRI, *MF));
694 MachineInstrBuilder MIB = BuildMI(*Head, CmpMI, CmpMI->getDebugLoc(), MCID)
695 .add(CmpMI->getOperand(FirstOp)); // Register Rn
696 if (isZBranch)
697 MIB.addImm(0); // cbz/cbnz Rn -> ccmp Rn, #0
698 else
699 MIB.add(CmpMI->getOperand(FirstOp + 1)); // Register Rm / Immediate
700 MIB.addImm(NZCV).addImm(HeadCmpBBCC);
701
702 // If CmpMI was a terminator, we need a new conditional branch to replace it.
703 // This now becomes a Head terminator.
704 if (isZBranch) {
705 bool isNZ = CmpMI->getOpcode() == AArch64::CBNZW ||
706 CmpMI->getOpcode() == AArch64::CBNZX;
707 BuildMI(*Head, CmpMI, CmpMI->getDebugLoc(), TII->get(AArch64::Bcc))
709 .add(CmpMI->getOperand(1)); // Branch target.
710 }
711 CmpMI->eraseFromParent();
712 Head->updateTerminator(CmpBB->getNextNode());
713
714 RemovedBlocks.push_back(CmpBB);
715 CmpBB->eraseFromParent();
716 LLVM_DEBUG(dbgs() << "Result:\n" << *Head);
717 ++NumConverted;
718}
719
720int SSACCmpConv::expectedCodeSizeDelta() const {
721 int delta = 0;
722 // If the Head terminator was one of the cbz / tbz branches with built-in
723 // compare, we need to insert an explicit compare instruction in its place
724 // plus a branch instruction.
725 if (HeadCond[0].getImm() == -1) {
726 switch (HeadCond[1].getImm()) {
727 case AArch64::CBZW:
728 case AArch64::CBNZW:
729 case AArch64::CBZX:
730 case AArch64::CBNZX:
731 // Therefore delta += 1
732 delta = 1;
733 break;
734 default:
735 llvm_unreachable("Cannot convert Head branch");
736 }
737 }
738 // If the Cmp terminator was one of the cbz / tbz branches with
739 // built-in compare, it will be turned into a compare instruction
740 // into Head, but we do not save any instruction.
741 // Otherwise, we save the branch instruction.
742 switch (CmpMI->getOpcode()) {
743 default:
744 --delta;
745 break;
746 case AArch64::CBZW:
747 case AArch64::CBNZW:
748 case AArch64::CBZX:
749 case AArch64::CBNZX:
750 break;
751 }
752 return delta;
753}
754
755//===----------------------------------------------------------------------===//
756// AArch64ConditionalCompares Pass
757//===----------------------------------------------------------------------===//
758
759namespace {
760class AArch64ConditionalCompares : public MachineFunctionPass {
762 const TargetInstrInfo *TII;
763 const TargetRegisterInfo *TRI;
764 MCSchedModel SchedModel;
765 // Does the proceeded function has Oz attribute.
766 bool MinSize;
768 MachineDominatorTree *DomTree;
770 MachineTraceMetrics *Traces;
772 SSACCmpConv CmpConv;
773
774public:
775 static char ID;
776 AArch64ConditionalCompares() : MachineFunctionPass(ID) {
778 }
779 void getAnalysisUsage(AnalysisUsage &AU) const override;
780 bool runOnMachineFunction(MachineFunction &MF) override;
781 StringRef getPassName() const override {
782 return "AArch64 Conditional Compares";
783 }
784
785private:
786 bool tryConvert(MachineBasicBlock *);
787 void updateDomTree(ArrayRef<MachineBasicBlock *> Removed);
788 void updateLoops(ArrayRef<MachineBasicBlock *> Removed);
789 void invalidateTraces();
790 bool shouldConvert();
791};
792} // end anonymous namespace
793
794char AArch64ConditionalCompares::ID = 0;
795
796INITIALIZE_PASS_BEGIN(AArch64ConditionalCompares, "aarch64-ccmp",
797 "AArch64 CCMP Pass", false, false)
801INITIALIZE_PASS_END(AArch64ConditionalCompares, "aarch64-ccmp",
802 "AArch64 CCMP Pass", false, false)
803
805 return new AArch64ConditionalCompares();
806}
807
808void AArch64ConditionalCompares::getAnalysisUsage(AnalysisUsage &AU) const {
817}
818
819/// Update the dominator tree after if-conversion erased some blocks.
820void AArch64ConditionalCompares::updateDomTree(
822 // convert() removes CmpBB which was previously dominated by Head.
823 // CmpBB children should be transferred to Head.
824 MachineDomTreeNode *HeadNode = DomTree->getNode(CmpConv.Head);
825 for (MachineBasicBlock *RemovedMBB : Removed) {
826 MachineDomTreeNode *Node = DomTree->getNode(RemovedMBB);
827 assert(Node != HeadNode && "Cannot erase the head node");
828 assert(Node->getIDom() == HeadNode && "CmpBB should be dominated by Head");
829 while (Node->getNumChildren())
830 DomTree->changeImmediateDominator(Node->back(), HeadNode);
831 DomTree->eraseNode(RemovedMBB);
832 }
833}
834
835/// Update LoopInfo after if-conversion.
836void
837AArch64ConditionalCompares::updateLoops(ArrayRef<MachineBasicBlock *> Removed) {
838 if (!Loops)
839 return;
840 for (MachineBasicBlock *RemovedMBB : Removed)
841 Loops->removeBlock(RemovedMBB);
842}
843
844/// Invalidate MachineTraceMetrics before if-conversion.
845void AArch64ConditionalCompares::invalidateTraces() {
846 Traces->invalidate(CmpConv.Head);
847 Traces->invalidate(CmpConv.CmpBB);
848}
849
850/// Apply cost model and heuristics to the if-conversion in IfConv.
851/// Return true if the conversion is a good idea.
852///
853bool AArch64ConditionalCompares::shouldConvert() {
854 // Stress testing mode disables all cost considerations.
855 if (Stress)
856 return true;
857 if (!MinInstr)
858 MinInstr = Traces->getEnsemble(MachineTraceStrategy::TS_MinInstrCount);
859
860 // Head dominates CmpBB, so it is always included in its trace.
861 MachineTraceMetrics::Trace Trace = MinInstr->getTrace(CmpConv.CmpBB);
862
863 // If code size is the main concern
864 if (MinSize) {
865 int CodeSizeDelta = CmpConv.expectedCodeSizeDelta();
866 LLVM_DEBUG(dbgs() << "Code size delta: " << CodeSizeDelta << '\n');
867 // If we are minimizing the code size, do the conversion whatever
868 // the cost is.
869 if (CodeSizeDelta < 0)
870 return true;
871 if (CodeSizeDelta > 0) {
872 LLVM_DEBUG(dbgs() << "Code size is increasing, give up on this one.\n");
873 return false;
874 }
875 // CodeSizeDelta == 0, continue with the regular heuristics
876 }
877
878 // Heuristic: The compare conversion delays the execution of the branch
879 // instruction because we must wait for the inputs to the second compare as
880 // well. The branch has no dependent instructions, but delaying it increases
881 // the cost of a misprediction.
882 //
883 // Set a limit on the delay we will accept.
884 unsigned DelayLimit = SchedModel.MispredictPenalty * 3 / 4;
885
886 // Instruction depths can be computed for all trace instructions above CmpBB.
887 unsigned HeadDepth =
888 Trace.getInstrCycles(*CmpConv.Head->getFirstTerminator()).Depth;
889 unsigned CmpBBDepth =
890 Trace.getInstrCycles(*CmpConv.CmpBB->getFirstTerminator()).Depth;
891 LLVM_DEBUG(dbgs() << "Head depth: " << HeadDepth
892 << "\nCmpBB depth: " << CmpBBDepth << '\n');
893 if (CmpBBDepth > HeadDepth + DelayLimit) {
894 LLVM_DEBUG(dbgs() << "Branch delay would be larger than " << DelayLimit
895 << " cycles.\n");
896 return false;
897 }
898
899 // Check the resource depth at the bottom of CmpBB - these instructions will
900 // be speculated.
901 unsigned ResDepth = Trace.getResourceDepth(true);
902 LLVM_DEBUG(dbgs() << "Resources: " << ResDepth << '\n');
903
904 // Heuristic: The speculatively executed instructions must all be able to
905 // merge into the Head block. The Head critical path should dominate the
906 // resource cost of the speculated instructions.
907 if (ResDepth > HeadDepth) {
908 LLVM_DEBUG(dbgs() << "Too many instructions to speculate.\n");
909 return false;
910 }
911 return true;
912}
913
914bool AArch64ConditionalCompares::tryConvert(MachineBasicBlock *MBB) {
915 bool Changed = false;
916 while (CmpConv.canConvert(MBB) && shouldConvert()) {
917 invalidateTraces();
919 CmpConv.convert(RemovedBlocks);
920 Changed = true;
921 updateDomTree(RemovedBlocks);
922 updateLoops(RemovedBlocks);
923 }
924 return Changed;
925}
926
927bool AArch64ConditionalCompares::runOnMachineFunction(MachineFunction &MF) {
928 LLVM_DEBUG(dbgs() << "********** AArch64 Conditional Compares **********\n"
929 << "********** Function: " << MF.getName() << '\n');
930 if (skipFunction(MF.getFunction()))
931 return false;
932
935 SchedModel = MF.getSubtarget().getSchedModel();
936 MRI = &MF.getRegInfo();
937 DomTree = &getAnalysis<MachineDominatorTree>();
938 Loops = getAnalysisIfAvailable<MachineLoopInfo>();
939 MBPI = &getAnalysis<MachineBranchProbabilityInfo>();
940 Traces = &getAnalysis<MachineTraceMetrics>();
941 MinInstr = nullptr;
942 MinSize = MF.getFunction().hasMinSize();
943
944 bool Changed = false;
945 CmpConv.runOnMachineFunction(MF, MBPI);
946
947 // Visit blocks in dominator tree pre-order. The pre-order enables multiple
948 // cmp-conversions from the same head block.
949 // Note that updateDomTree() modifies the children of the DomTree node
950 // currently being visited. The df_iterator supports that; it doesn't look at
951 // child_begin() / child_end() until after a node has been visited.
952 for (auto *I : depth_first(DomTree))
953 if (tryConvert(I->getBlock()))
954 Changed = true;
955
956 return Changed;
957}
unsigned const MachineRegisterInfo * MRI
static cl::opt< bool > Stress("aarch64-stress-ccmp", cl::Hidden, cl::desc("Turn all knobs to 11"))
static cl::opt< unsigned > BlockInstrLimit("aarch64-ccmp-limit", cl::init(30), cl::Hidden, cl::desc("Maximum number of instructions per speculated block."))
static bool parseCond(ArrayRef< MachineOperand > Cond, AArch64CC::CondCode &CC)
static bool shouldConvert(Constant &C, AArch64PromoteConstant::PromotionCacheTy &PromotionCache)
MachineBasicBlock & MBB
SmallVector< MachineOperand, 4 > Cond
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
static unsigned InstrCount
#define LLVM_DEBUG(X)
Definition: Debug.h:101
This file builds on the ADT/GraphTraits.h file to build generic depth first graph iterator.
expand large fp convert
const HexagonInstrInfo * TII
Hexagon Hardware Loops
#define I(x, y, z)
Definition: MD5.cpp:58
unsigned const TargetRegisterInfo * TRI
static unsigned getReg(const MCDisassembler *D, unsigned RC, unsigned RegNo)
#define INITIALIZE_PASS_DEPENDENCY(depName)
Definition: PassSupport.h:55
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:59
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:52
const SmallVectorImpl< MachineOperand > MachineBasicBlock * TBB
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
This file defines the SmallPtrSet class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Definition: Statistic.h:167
Represent the analysis usage information of a pass.
AnalysisUsage & addRequired()
AnalysisUsage & addPreserved()
Add the specified Pass class to the set of analyses preserved by this pass.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: ArrayRef.h:41
A debug info location.
Definition: DebugLoc.h:33
Base class for the actual dominator tree node.
FunctionPass class - This class is used to implement most global optimizations.
Definition: Pass.h:311
bool hasMinSize() const
Optimize this function for minimum size (-Oz).
Definition: Function.h:646
unsigned removeBranch(MachineBasicBlock &MBB, int *BytesRemoved=nullptr) const override
Remove the branching code at the end of the specific MBB.
bool analyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB, MachineBasicBlock *&FBB, SmallVectorImpl< MachineOperand > &Cond, bool AllowModify) const override
Analyze the branching code at the end of MBB, returning true if it cannot be understood (e....
Describe properties that are true of each instruction in the target description file.
Definition: MCInstrDesc.h:198
const MCSchedModel & getSchedModel() const
Get the machine model for this subtarget's CPU.
unsigned pred_size() const
void transferSuccessorsAndUpdatePHIs(MachineBasicBlock *FromMBB)
Transfers all the successors, as in transferSuccessors, and update PHI operands in the successor bloc...
void updateTerminator(MachineBasicBlock *PreviousLayoutSuccessor)
Update the terminator instructions in block to account for changes to block layout which may have bee...
void setSuccProbability(succ_iterator I, BranchProbability Prob)
Set successor probability of a given iterator.
iterator getFirstTerminator()
Returns an iterator to the first terminator instruction of this basic block.
unsigned succ_size() const
void removeSuccessor(MachineBasicBlock *Succ, bool NormalizeSuccProbs=false)
Remove successor from the successors list of this MachineBasicBlock.
bool hasSuccessorProbabilities() const
Return true if any of the successors have probabilities attached to them.
void eraseFromParent()
This method unlinks 'this' from the containing function and deletes it.
bool isSuccessor(const MachineBasicBlock *MBB) const
Return true if the specified MBB is a successor of this block.
void splice(iterator Where, MachineBasicBlock *Other, iterator From)
Take an instruction from MBB 'Other' at the position From, and insert it into this MBB right before '...
BranchProbability getEdgeProbability(const MachineBasicBlock *Src, const MachineBasicBlock *Dst) const
DominatorTree Class - Concrete subclass of DominatorTreeBase that is used to compute a normal dominat...
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
virtual bool runOnMachineFunction(MachineFunction &MF)=0
runOnMachineFunction - This method must be overloaded to perform the desired machine code transformat...
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
StringRef getName() const
getName - Return the name of the corresponding LLVM function.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
Function & getFunction()
Return the LLVM function that this machine code represents.
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
const MachineInstrBuilder & add(const MachineOperand &MO) const
const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
Representation of each machine instruction.
Definition: MachineInstr.h:68
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
Definition: MachineInstr.h:516
const DebugLoc & getDebugLoc() const
Returns the debug location id of this MachineInstr.
Definition: MachineInstr.h:445
void eraseFromParent()
Unlink 'this' from the containing basic block and delete it.
bool isPHI() const
const MachineOperand & getOperand(unsigned i) const
Definition: MachineInstr.h:526
bool isReg() const
isReg - Tests if this is a MO_Register operand.
Register getReg() const
getReg - Returns the register number.
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
A trace ensemble is a collection of traces selected using the same strategy, for example 'minimum res...
A trace represents a plausible sequence of executed basic blocks that passes through the current basi...
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
Pass interface - Implemented by all 'passes'.
Definition: Pass.h:94
virtual StringRef getPassName() const
getPassName - Return a nice clean name for a pass.
Definition: Pass.cpp:81
Wrapper class representing virtual and physical registers.
Definition: Register.h:19
static bool isVirtualRegister(unsigned Reg)
Return true if the specified register number is in the virtual register namespace.
Definition: Register.h:71
bool empty() const
Definition: SmallVector.h:94
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: SmallVector.h:577
void push_back(const T &Elt)
Definition: SmallVector.h:416
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1200
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:50
TargetInstrInfo - Interface to description of machine instruction set.
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
virtual const TargetRegisterInfo * getRegisterInfo() const
getRegisterInfo - If register information is available, return it.
virtual const TargetInstrInfo * getInstrInfo() const
NodeTy * getNextNode()
Get the next node, or nullptr for the list tail.
Definition: ilist_node.h:289
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
static const char * getCondCodeName(CondCode Code)
static CondCode getInvertedCondCode(CondCode Code)
static unsigned getNZCVToSatisfyCondCode(CondCode Code)
Given a condition code, return NZCV flags that would satisfy that condition.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition: CallingConv.h:24
@ Tail
Attemps to make calls as fast as possible while guaranteeing that tail call optimization can always b...
Definition: CallingConv.h:76
@ Dead
Unused definition.
@ Define
Register definition.
Reg
All possible values of the reg field in the ModR/M byte.
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:445
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
PhysRegInfo AnalyzePhysRegInBundle(const MachineInstr &MI, Register Reg, const TargetRegisterInfo *TRI)
AnalyzePhysRegInBundle - Analyze how the current instruction or bundle uses a physical register.
FunctionPass * createAArch64ConditionalCompares()
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
iterator_range< df_iterator< T > > depth_first(const T &G)
void initializeAArch64ConditionalComparesPass(PassRegistry &)
IterT prev_nodbg(IterT It, IterT Begin, bool SkipPseudoOp=true)
Decrement It, then continue decrementing it while it points to a debug instruction.
Printable printMBBReference(const MachineBasicBlock &MBB)
Prints a machine basic block reference.
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:860
Machine model for scheduling, bundling, and heuristics.
Definition: MCSchedule.h:249
Information about how a physical register Reg is used by a set of operands.
bool Read
Reg or one of its aliases is read.
bool Defined
Reg or one of its aliases is defined.
bool Clobbered
There is a regmask operand indicating Reg is clobbered.