LLVM 23.0.0git
AArch64ConditionalCompares.cpp
Go to the documentation of this file.
1//===-- AArch64ConditionalCompares.cpp --- CCMP formation for AArch64 -----===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file implements the AArch64ConditionalCompares pass which reduces
10// branching and code size by using the conditional compare instructions CCMP,
11// CCMN, and FCMP.
12//
13// The CFG transformations for forming conditional compares are very similar to
14// if-conversion, and this pass should run immediately before the early
15// if-conversion pass.
16//
17//===----------------------------------------------------------------------===//
18
19#include "AArch64.h"
21#include "llvm/ADT/Statistic.h"
30#include "llvm/CodeGen/Passes.h"
36#include "llvm/Support/Debug.h"
38
39using namespace llvm;
40
41#define DEBUG_TYPE "aarch64-ccmp"
42
43// Absolute maximum number of instructions allowed per speculated block.
44// This bypasses all other heuristics, so it should be set fairly high.
46 "aarch64-ccmp-limit", cl::init(30), cl::Hidden,
47 cl::desc("Maximum number of instructions per speculated block."));
48
49// Stress testing mode - disable heuristics.
50static cl::opt<bool> Stress("aarch64-stress-ccmp", cl::Hidden,
51 cl::desc("Turn all knobs to 11"));
52
53STATISTIC(NumConsidered, "Number of ccmps considered");
54STATISTIC(NumPhiRejs, "Number of ccmps rejected (PHI)");
55STATISTIC(NumPhysRejs, "Number of ccmps rejected (Physregs)");
56STATISTIC(NumPhi2Rejs, "Number of ccmps rejected (PHI2)");
57STATISTIC(NumHeadBranchRejs, "Number of ccmps rejected (Head branch)");
58STATISTIC(NumCmpBranchRejs, "Number of ccmps rejected (CmpBB branch)");
59STATISTIC(NumCmpTermRejs, "Number of ccmps rejected (CmpBB is cbz...)");
60STATISTIC(NumImmRangeRejs, "Number of ccmps rejected (Imm out of range)");
61STATISTIC(NumLiveDstRejs, "Number of ccmps rejected (Cmp dest live)");
62STATISTIC(NumMultNZCVUses, "Number of ccmps rejected (NZCV used)");
63STATISTIC(NumUnknNZCVDefs, "Number of ccmps rejected (NZCV def unknown)");
64
65STATISTIC(NumSpeculateRejs, "Number of ccmps rejected (Can't speculate)");
66
67STATISTIC(NumConverted, "Number of ccmp instructions created");
68STATISTIC(NumCompBranches, "Number of cbz/cbnz branches converted");
69
70//===----------------------------------------------------------------------===//
71// SSACCmpConv
72//===----------------------------------------------------------------------===//
73//
74// The SSACCmpConv class performs ccmp-conversion on SSA form machine code
75// after determining if it is possible. The class contains no heuristics;
76// external code should be used to determine when ccmp-conversion is a good
77// idea.
78//
79// CCmp-formation works on a CFG representing chained conditions, typically
80// from C's short-circuit || and && operators:
81//
82// From: Head To: Head
83// / | CmpBB
84// / | / |
85// | CmpBB / |
86// | / | Tail |
87// | / | | |
88// Tail | | |
89// | | | |
90// ... ... ... ...
91//
92// The Head block is terminated by a br.cond instruction, and the CmpBB block
93// contains compare + br.cond. Tail must be a successor of both.
94//
95// The cmp-conversion turns the compare instruction in CmpBB into a conditional
96// compare, and merges CmpBB into Head, speculatively executing its
97// instructions. The AArch64 conditional compare instructions have an immediate
98// operand that specifies the NZCV flag values when the condition is false and
99// the compare isn't executed. This makes it possible to chain compares with
100// different condition codes.
101//
102// Example:
103//
104// if (a == 5 || b == 17)
105// foo();
106//
107// Head:
108// cmp w0, #5
109// b.eq Tail
110// CmpBB:
111// cmp w1, #17
112// b.eq Tail
113// ...
114// Tail:
115// bl _foo
116//
117// Becomes:
118//
119// Head:
120// cmp w0, #5
121// ccmp w1, #17, 4, ne ; 4 = nZcv
122// b.eq Tail
123// ...
124// Tail:
125// bl _foo
126//
127// The ccmp condition code is the one that would cause the Head terminator to
128// branch to CmpBB.
129//
130// FIXME: It should also be possible to speculate a block on the critical edge
131// between Head and Tail, just like if-converting a diamond.
132//
133// FIXME: Handle PHIs in Tail by turning them into selects (if-conversion).
134
135namespace {
136class SSACCmpConv {
137 MachineFunction *MF;
138 const TargetInstrInfo *TII;
139 const TargetRegisterInfo *TRI;
142
143public:
144 /// The first block containing a conditional branch, dominating everything
145 /// else.
146 MachineBasicBlock *Head;
147
148 /// The block containing cmp+br.cond with a successor shared with Head.
149 MachineBasicBlock *CmpBB;
150
151 /// The common successor for Head and CmpBB.
153
154 /// The compare instruction in CmpBB that can be converted to a ccmp.
155 MachineInstr *CmpMI;
156
157private:
158 /// The branch condition in Head as determined by analyzeBranch.
160
161 /// The condition code that makes Head branch to CmpBB.
162 AArch64CC::CondCode HeadCmpBBCC;
163
164 /// The branch condition in CmpBB.
166
167 /// The condition code that makes CmpBB branch to Tail.
168 AArch64CC::CondCode CmpBBTailCC;
169
170 /// Check if the Tail PHIs are trivially convertible.
171 bool trivialTailPHIs();
172
173 /// Remove CmpBB from the Tail PHIs.
174 void updateTailPHIs();
175
176 /// Check if an operand defining DstReg is dead.
177 bool isDeadDef(unsigned DstReg);
178
179 /// Find the compare instruction in MBB that controls the conditional branch.
180 /// Return NULL if a convertible instruction can't be found.
181 MachineInstr *findConvertibleCompare(MachineBasicBlock *MBB);
182
183 /// Return true if all non-terminator instructions in MBB can be safely
184 /// speculated.
185 bool canSpeculateInstrs(MachineBasicBlock *MBB, const MachineInstr *CmpMI);
186
187public:
188 /// runOnMachineFunction - Initialize per-function data structures.
189 void runOnMachineFunction(MachineFunction &MF,
190 const MachineBranchProbabilityInfo *MBPI) {
191 this->MF = &MF;
192 this->MBPI = MBPI;
195 MRI = &MF.getRegInfo();
196 }
197
198 /// If the sub-CFG headed by MBB can be cmp-converted, initialize the
199 /// internal state, and return true.
200 bool canConvert(MachineBasicBlock *MBB);
201
202 /// Cmo-convert the last block passed to canConvertCmp(), assuming
203 /// it is possible. Add any erased blocks to RemovedBlocks.
204 void convert(SmallVectorImpl<MachineBasicBlock *> &RemovedBlocks);
205
206 /// Return the expected code size delta if the conversion into a
207 /// conditional compare is performed.
208 int expectedCodeSizeDelta() const;
209};
210} // end anonymous namespace
211
214 while ((MI = MRI->getUniqueVRegDef(Reg)) &&
215 MI->getOpcode() == TargetOpcode::COPY) {
216 if (MI->getOperand(1).getReg().isPhysical())
217 break;
218 Reg = MI->getOperand(1).getReg();
219 }
220 return Reg;
221}
222
223// Check that all PHIs in Tail are selecting the same value from Head and CmpBB.
224// This means that no if-conversion is required when merging CmpBB into Head.
225bool SSACCmpConv::trivialTailPHIs() {
226 for (auto &I : *Tail) {
227 if (!I.isPHI())
228 break;
229 unsigned HeadReg = 0, CmpBBReg = 0;
230 // PHI operands come in (VReg, MBB) pairs.
231 for (unsigned oi = 1, oe = I.getNumOperands(); oi != oe; oi += 2) {
232 MachineBasicBlock *MBB = I.getOperand(oi + 1).getMBB();
233 Register Reg = lookThroughCopies(I.getOperand(oi).getReg(), MRI);
234 if (MBB == Head) {
235 assert((!HeadReg || HeadReg == Reg) && "Inconsistent PHI operands");
236 HeadReg = Reg;
237 }
238 if (MBB == CmpBB) {
239 assert((!CmpBBReg || CmpBBReg == Reg) && "Inconsistent PHI operands");
240 CmpBBReg = Reg;
241 }
242 }
243 if (HeadReg != CmpBBReg)
244 return false;
245 }
246 return true;
247}
248
249// Assuming that trivialTailPHIs() is true, update the Tail PHIs by simply
250// removing the CmpBB operands. The Head operands will be identical.
251void SSACCmpConv::updateTailPHIs() {
252 for (auto &I : *Tail) {
253 if (!I.isPHI())
254 break;
255 // I is a PHI. It can have multiple entries for CmpBB.
256 for (unsigned oi = I.getNumOperands(); oi > 2; oi -= 2) {
257 // PHI operands are (Reg, MBB) at (oi-2, oi-1).
258 if (I.getOperand(oi - 1).getMBB() == CmpBB) {
259 I.removeOperand(oi - 1);
260 I.removeOperand(oi - 2);
261 }
262 }
263 }
264}
265
266// This pass runs before the AArch64DeadRegisterDefinitions pass, so compares
267// are still writing virtual registers without any uses.
268bool SSACCmpConv::isDeadDef(unsigned DstReg) {
269 // Writes to the zero register are dead.
270 if (DstReg == AArch64::WZR || DstReg == AArch64::XZR)
271 return true;
272 if (!Register::isVirtualRegister(DstReg))
273 return false;
274 // A virtual register def without any uses will be marked dead later, and
275 // eventually replaced by the zero register.
276 return MRI->use_nodbg_empty(DstReg);
277}
278
279// Parse a condition code returned by analyzeBranch, and compute the CondCode
280// corresponding to TBB.
281// Return
283 // A normal br.cond simply has the condition code.
284 if (Cond[0].getImm() != -1) {
285 assert(Cond.size() == 1 && "Unknown Cond array format");
286 CC = (AArch64CC::CondCode)(int)Cond[0].getImm();
287 return true;
288 }
289 // For tbz and cbz instruction, the opcode is next.
290 switch (Cond[1].getImm()) {
291 default:
292 // This includes tbz / tbnz branches which can't be converted to
293 // ccmp + br.cond.
294 return false;
295 case AArch64::CBZW:
296 case AArch64::CBZX:
297 assert(Cond.size() == 3 && "Unknown Cond array format");
298 CC = AArch64CC::EQ;
299 return true;
300 case AArch64::CBNZW:
301 case AArch64::CBNZX:
302 assert(Cond.size() == 3 && "Unknown Cond array format");
303 CC = AArch64CC::NE;
304 return true;
305 }
306}
307
308MachineInstr *SSACCmpConv::findConvertibleCompare(MachineBasicBlock *MBB) {
310 if (I == MBB->end())
311 return nullptr;
312 // The terminator must be controlled by the flags.
313 if (!I->readsRegister(AArch64::NZCV, /*TRI=*/nullptr)) {
314 switch (I->getOpcode()) {
315 case AArch64::CBZW:
316 case AArch64::CBZX:
317 case AArch64::CBNZW:
318 case AArch64::CBNZX:
319 // These can be converted into a ccmp against #0.
320 return &*I;
321 }
322 ++NumCmpTermRejs;
323 LLVM_DEBUG(dbgs() << "Flags not used by terminator: " << *I);
324 return nullptr;
325 }
326
327 // Now find the instruction controlling the terminator.
328 for (MachineBasicBlock::iterator B = MBB->begin(); I != B;) {
329 I = prev_nodbg(I, MBB->begin());
330 assert(!I->isTerminator() && "Spurious terminator");
331 switch (I->getOpcode()) {
332 // cmp is an alias for subs with a dead destination register.
333 case AArch64::SUBSWri:
334 case AArch64::SUBSXri:
335 // cmn is an alias for adds with a dead destination register.
336 case AArch64::ADDSWri:
337 case AArch64::ADDSXri:
338 // Check that the immediate operand is within range, ccmp wants a uimm5.
339 // Rd = SUBSri Rn, imm, shift
340 if (I->getOperand(3).getImm() || !isUInt<5>(I->getOperand(2).getImm())) {
341 LLVM_DEBUG(dbgs() << "Immediate out of range for ccmp: " << *I);
342 ++NumImmRangeRejs;
343 return nullptr;
344 }
345 [[fallthrough]];
346 case AArch64::SUBSWrr:
347 case AArch64::SUBSXrr:
348 case AArch64::ADDSWrr:
349 case AArch64::ADDSXrr:
350 if (isDeadDef(I->getOperand(0).getReg()))
351 return &*I;
352 LLVM_DEBUG(dbgs() << "Can't convert compare with live destination: "
353 << *I);
354 ++NumLiveDstRejs;
355 return nullptr;
356 case AArch64::FCMPSrr:
357 case AArch64::FCMPDrr:
358 case AArch64::FCMPESrr:
359 case AArch64::FCMPEDrr:
360 return &*I;
361 }
362
363 // Check for flag reads and clobbers.
364 PhysRegInfo PRI = AnalyzePhysRegInBundle(*I, AArch64::NZCV, TRI);
365
366 if (PRI.Read) {
367 // The ccmp doesn't produce exactly the same flags as the original
368 // compare, so reject the transform if there are uses of the flags
369 // besides the terminators.
370 LLVM_DEBUG(dbgs() << "Can't create ccmp with multiple uses: " << *I);
371 ++NumMultNZCVUses;
372 return nullptr;
373 }
374
375 if (PRI.Defined || PRI.Clobbered) {
376 LLVM_DEBUG(dbgs() << "Not convertible compare: " << *I);
377 ++NumUnknNZCVDefs;
378 return nullptr;
379 }
380 }
381 LLVM_DEBUG(dbgs() << "Flags not defined in " << printMBBReference(*MBB)
382 << '\n');
383 return nullptr;
384}
385
386/// Determine if all the instructions in MBB can safely
387/// be speculated. The terminators are not considered.
388///
389/// Only CmpMI is allowed to clobber the flags.
390///
391bool SSACCmpConv::canSpeculateInstrs(MachineBasicBlock *MBB,
392 const MachineInstr *CmpMI) {
393 // Reject any live-in physregs. It's probably NZCV/EFLAGS, and very hard to
394 // get right.
395 if (!MBB->livein_empty()) {
396 LLVM_DEBUG(dbgs() << printMBBReference(*MBB) << " has live-ins.\n");
397 return false;
398 }
399
400 unsigned InstrCount = 0;
401
402 // Check all instructions, except the terminators. It is assumed that
403 // terminators never have side effects or define any used register values.
404 for (auto &I : make_range(MBB->begin(), MBB->getFirstTerminator())) {
405 if (I.isDebugInstr())
406 continue;
407
408 if (++InstrCount > BlockInstrLimit && !Stress) {
409 LLVM_DEBUG(dbgs() << printMBBReference(*MBB) << " has more than "
410 << BlockInstrLimit << " instructions.\n");
411 return false;
412 }
413
414 // There shouldn't normally be any phis in a single-predecessor block.
415 if (I.isPHI()) {
416 LLVM_DEBUG(dbgs() << "Can't hoist: " << I);
417 return false;
418 }
419
420 // Don't speculate loads. Note that it may be possible and desirable to
421 // speculate GOT or constant pool loads that are guaranteed not to trap,
422 // but we don't support that for now.
423 if (I.mayLoad()) {
424 LLVM_DEBUG(dbgs() << "Won't speculate load: " << I);
425 return false;
426 }
427
428 // We never speculate stores, so an AA pointer isn't necessary.
429 bool DontMoveAcrossStore = true;
430 if (!I.isSafeToMove(DontMoveAcrossStore)) {
431 LLVM_DEBUG(dbgs() << "Can't speculate: " << I);
432 return false;
433 }
434
435 // Only CmpMI is allowed to clobber the flags.
436 if (&I != CmpMI && I.modifiesRegister(AArch64::NZCV, TRI)) {
437 LLVM_DEBUG(dbgs() << "Clobbers flags: " << I);
438 return false;
439 }
440 }
441 return true;
442}
443
444/// Analyze the sub-cfg rooted in MBB, and return true if it is a potential
445/// candidate for cmp-conversion. Fill out the internal state.
446///
447bool SSACCmpConv::canConvert(MachineBasicBlock *MBB) {
448 Head = MBB;
449 Tail = CmpBB = nullptr;
450
451 if (Head->succ_size() != 2)
452 return false;
453 MachineBasicBlock *Succ0 = Head->succ_begin()[0];
454 MachineBasicBlock *Succ1 = Head->succ_begin()[1];
455
456 // CmpBB can only have a single predecessor. Tail is allowed many.
457 if (Succ0->pred_size() != 1)
458 std::swap(Succ0, Succ1);
459
460 // Succ0 is our candidate for CmpBB.
461 if (Succ0->pred_size() != 1 || Succ0->succ_size() != 2)
462 return false;
463
464 CmpBB = Succ0;
465 Tail = Succ1;
466
467 if (!CmpBB->isSuccessor(Tail))
468 return false;
469
470 // The CFG topology checks out.
471 LLVM_DEBUG(dbgs() << "\nTriangle: " << printMBBReference(*Head) << " -> "
472 << printMBBReference(*CmpBB) << " -> "
473 << printMBBReference(*Tail) << '\n');
474 ++NumConsidered;
475
476 // Tail is allowed to have many predecessors, but we can't handle PHIs yet.
477 //
478 // FIXME: Real PHIs could be if-converted as long as the CmpBB values are
479 // defined before The CmpBB cmp clobbers the flags. Alternatively, it should
480 // always be safe to sink the ccmp down to immediately before the CmpBB
481 // terminators.
482 if (!trivialTailPHIs()) {
483 LLVM_DEBUG(dbgs() << "Can't handle phis in Tail.\n");
484 ++NumPhiRejs;
485 return false;
486 }
487
488 if (!Tail->livein_empty()) {
489 LLVM_DEBUG(dbgs() << "Can't handle live-in physregs in Tail.\n");
490 ++NumPhysRejs;
491 return false;
492 }
493
494 // CmpBB should never have PHIs since Head is its only predecessor.
495 // FIXME: Clean them up if it happens.
496 if (!CmpBB->empty() && CmpBB->front().isPHI()) {
497 LLVM_DEBUG(dbgs() << "Can't handle phis in CmpBB.\n");
498 ++NumPhi2Rejs;
499 return false;
500 }
501
502 if (!CmpBB->livein_empty()) {
503 LLVM_DEBUG(dbgs() << "Can't handle live-in physregs in CmpBB.\n");
504 ++NumPhysRejs;
505 return false;
506 }
507
508 // The branch we're looking to eliminate must be analyzable.
509 HeadCond.clear();
510 MachineBasicBlock *TBB = nullptr, *FBB = nullptr;
511 if (TII->analyzeBranch(*Head, TBB, FBB, HeadCond)) {
512 LLVM_DEBUG(dbgs() << "Head branch not analyzable.\n");
513 ++NumHeadBranchRejs;
514 return false;
515 }
516
517 // This is weird, probably some sort of degenerate CFG, or an edge to a
518 // landing pad.
519 if (!TBB || HeadCond.empty()) {
521 dbgs() << "analyzeBranch didn't find conditional branch in Head.\n");
522 ++NumHeadBranchRejs;
523 return false;
524 }
525
526 if (!parseCond(HeadCond, HeadCmpBBCC)) {
527 LLVM_DEBUG(dbgs() << "Unsupported branch type on Head\n");
528 ++NumHeadBranchRejs;
529 return false;
530 }
531
532 // Make sure the branch direction is right.
533 if (TBB != CmpBB) {
534 assert(TBB == Tail && "Unexpected TBB");
535 HeadCmpBBCC = AArch64CC::getInvertedCondCode(HeadCmpBBCC);
536 }
537
538 CmpBBCond.clear();
539 TBB = FBB = nullptr;
540 if (TII->analyzeBranch(*CmpBB, TBB, FBB, CmpBBCond)) {
541 LLVM_DEBUG(dbgs() << "CmpBB branch not analyzable.\n");
542 ++NumCmpBranchRejs;
543 return false;
544 }
545
546 if (!TBB || CmpBBCond.empty()) {
548 dbgs() << "analyzeBranch didn't find conditional branch in CmpBB.\n");
549 ++NumCmpBranchRejs;
550 return false;
551 }
552
553 if (!parseCond(CmpBBCond, CmpBBTailCC)) {
554 LLVM_DEBUG(dbgs() << "Unsupported branch type on CmpBB\n");
555 ++NumCmpBranchRejs;
556 return false;
557 }
558
559 if (TBB != Tail)
560 CmpBBTailCC = AArch64CC::getInvertedCondCode(CmpBBTailCC);
561
562 LLVM_DEBUG(dbgs() << "Head->CmpBB on "
563 << AArch64CC::getCondCodeName(HeadCmpBBCC)
564 << ", CmpBB->Tail on "
565 << AArch64CC::getCondCodeName(CmpBBTailCC) << '\n');
566
567 CmpMI = findConvertibleCompare(CmpBB);
568 if (!CmpMI)
569 return false;
570
571 if (!canSpeculateInstrs(CmpBB, CmpMI)) {
572 ++NumSpeculateRejs;
573 return false;
574 }
575 return true;
576}
577
578void SSACCmpConv::convert(SmallVectorImpl<MachineBasicBlock *> &RemovedBlocks) {
579 LLVM_DEBUG(dbgs() << "Merging " << printMBBReference(*CmpBB) << " into "
580 << printMBBReference(*Head) << ":\n"
581 << *CmpBB);
582
583 // All CmpBB instructions are moved into Head, and CmpBB is deleted.
584 // Update the CFG first.
585 updateTailPHIs();
586
587 // Save successor probabilities before removing CmpBB and Tail from their
588 // parents.
589 BranchProbability Head2CmpBB = MBPI->getEdgeProbability(Head, CmpBB);
590 BranchProbability CmpBB2Tail = MBPI->getEdgeProbability(CmpBB, Tail);
591
592 Head->removeSuccessor(CmpBB);
593 CmpBB->removeSuccessor(Tail);
594
595 // If Head and CmpBB had successor probabilities, update the probabilities to
596 // reflect the ccmp-conversion.
598
599 // Head is allowed two successors. We've removed CmpBB, so the remaining
600 // successor is Tail. We need to increase the successor probability for
601 // Tail to account for the CmpBB path we removed.
602 //
603 // Pr(Tail|Head) += Pr(CmpBB|Head) * Pr(Tail|CmpBB).
604 assert(*Head->succ_begin() == Tail && "Head successor is not Tail");
605 BranchProbability Head2Tail = MBPI->getEdgeProbability(Head, Tail);
606 Head->setSuccProbability(Head->succ_begin(),
607 Head2Tail + Head2CmpBB * CmpBB2Tail);
608
609 // We will transfer successors of CmpBB to Head in a moment without
610 // normalizing the successor probabilities. Set the successor probabilities
611 // before doing so.
612 //
613 // Pr(I|Head) = Pr(CmpBB|Head) * Pr(I|CmpBB).
614 for (auto I = CmpBB->succ_begin(), E = CmpBB->succ_end(); I != E; ++I) {
615 BranchProbability CmpBB2I = MBPI->getEdgeProbability(CmpBB, *I);
616 CmpBB->setSuccProbability(I, Head2CmpBB * CmpBB2I);
617 }
618 }
619
621 DebugLoc TermDL = Head->getFirstTerminator()->getDebugLoc();
622 TII->removeBranch(*Head);
623
624 // If the Head terminator was one of the cbz / tbz branches with built-in
625 // compare, we need to insert an explicit compare instruction in its place.
626 if (HeadCond[0].getImm() == -1) {
627 ++NumCompBranches;
628 unsigned Opc = 0;
629 switch (HeadCond[1].getImm()) {
630 case AArch64::CBZW:
631 case AArch64::CBNZW:
632 Opc = AArch64::SUBSWri;
633 break;
634 case AArch64::CBZX:
635 case AArch64::CBNZX:
636 Opc = AArch64::SUBSXri;
637 break;
638 default:
639 llvm_unreachable("Cannot convert Head branch");
640 }
641 const MCInstrDesc &MCID = TII->get(Opc);
642 // Create a dummy virtual register for the SUBS def.
643 Register DestReg = MRI->createVirtualRegister(TII->getRegClass(MCID, 0));
644 // Insert a SUBS Rn, #0 instruction instead of the cbz / cbnz.
645 BuildMI(*Head, Head->end(), TermDL, MCID)
646 .addReg(DestReg, RegState::Define | RegState::Dead)
647 .add(HeadCond[2])
648 .addImm(0)
649 .addImm(0);
650 // SUBS uses the GPR*sp register classes.
651 MRI->constrainRegClass(HeadCond[2].getReg(), TII->getRegClass(MCID, 1));
652 }
653
654 Head->splice(Head->end(), CmpBB, CmpBB->begin(), CmpBB->end());
655
656 // Now replace CmpMI with a ccmp instruction that also considers the incoming
657 // flags.
658 unsigned Opc = 0;
659 unsigned FirstOp = 1; // First CmpMI operand to copy.
660 bool isZBranch = false; // CmpMI is a cbz/cbnz instruction.
661 switch (CmpMI->getOpcode()) {
662 default:
663 llvm_unreachable("Unknown compare opcode");
664 case AArch64::SUBSWri: Opc = AArch64::CCMPWi; break;
665 case AArch64::SUBSWrr: Opc = AArch64::CCMPWr; break;
666 case AArch64::SUBSXri: Opc = AArch64::CCMPXi; break;
667 case AArch64::SUBSXrr: Opc = AArch64::CCMPXr; break;
668 case AArch64::ADDSWri: Opc = AArch64::CCMNWi; break;
669 case AArch64::ADDSWrr: Opc = AArch64::CCMNWr; break;
670 case AArch64::ADDSXri: Opc = AArch64::CCMNXi; break;
671 case AArch64::ADDSXrr: Opc = AArch64::CCMNXr; break;
672 case AArch64::FCMPSrr: Opc = AArch64::FCCMPSrr; FirstOp = 0; break;
673 case AArch64::FCMPDrr: Opc = AArch64::FCCMPDrr; FirstOp = 0; break;
674 case AArch64::FCMPESrr: Opc = AArch64::FCCMPESrr; FirstOp = 0; break;
675 case AArch64::FCMPEDrr: Opc = AArch64::FCCMPEDrr; FirstOp = 0; break;
676 case AArch64::CBZW:
677 case AArch64::CBNZW:
678 Opc = AArch64::CCMPWi;
679 FirstOp = 0;
680 isZBranch = true;
681 break;
682 case AArch64::CBZX:
683 case AArch64::CBNZX:
684 Opc = AArch64::CCMPXi;
685 FirstOp = 0;
686 isZBranch = true;
687 break;
688 }
689
690 // The ccmp instruction should set the flags according to the comparison when
691 // Head would have branched to CmpBB.
692 // The NZCV immediate operand should provide flags for the case where Head
693 // would have branched to Tail. These flags should cause the new Head
694 // terminator to branch to tail.
695 unsigned NZCV = AArch64CC::getNZCVToSatisfyCondCode(CmpBBTailCC);
696 const MCInstrDesc &MCID = TII->get(Opc);
697 MRI->constrainRegClass(CmpMI->getOperand(FirstOp).getReg(),
698 TII->getRegClass(MCID, 0));
699 if (CmpMI->getOperand(FirstOp + 1).isReg())
700 MRI->constrainRegClass(CmpMI->getOperand(FirstOp + 1).getReg(),
701 TII->getRegClass(MCID, 1));
702 MachineInstrBuilder MIB = BuildMI(*Head, CmpMI, CmpMI->getDebugLoc(), MCID)
703 .add(CmpMI->getOperand(FirstOp)); // Register Rn
704 if (isZBranch)
705 MIB.addImm(0); // cbz/cbnz Rn -> ccmp Rn, #0
706 else
707 MIB.add(CmpMI->getOperand(FirstOp + 1)); // Register Rm / Immediate
708 MIB.addImm(NZCV).addImm(HeadCmpBBCC);
709
710 // If CmpMI was a terminator, we need a new conditional branch to replace it.
711 // This now becomes a Head terminator.
712 if (isZBranch) {
713 bool isNZ = CmpMI->getOpcode() == AArch64::CBNZW ||
714 CmpMI->getOpcode() == AArch64::CBNZX;
715 BuildMI(*Head, CmpMI, CmpMI->getDebugLoc(), TII->get(AArch64::Bcc))
717 .add(CmpMI->getOperand(1)); // Branch target.
718 }
719 CmpMI->eraseFromParent();
720 Head->updateTerminator(CmpBB->getNextNode());
721
722 RemovedBlocks.push_back(CmpBB);
723 LLVM_DEBUG(dbgs() << "Result:\n" << *Head);
724 ++NumConverted;
725}
726
727int SSACCmpConv::expectedCodeSizeDelta() const {
728 int delta = 0;
729 // If the Head terminator was one of the cbz / tbz branches with built-in
730 // compare, we need to insert an explicit compare instruction in its place
731 // plus a branch instruction.
732 if (HeadCond[0].getImm() == -1) {
733 switch (HeadCond[1].getImm()) {
734 case AArch64::CBZW:
735 case AArch64::CBNZW:
736 case AArch64::CBZX:
737 case AArch64::CBNZX:
738 // Therefore delta += 1
739 delta = 1;
740 break;
741 default:
742 llvm_unreachable("Cannot convert Head branch");
743 }
744 }
745 // If the Cmp terminator was one of the cbz / tbz branches with
746 // built-in compare, it will be turned into a compare instruction
747 // into Head, but we do not save any instruction.
748 // Otherwise, we save the branch instruction.
749 switch (CmpMI->getOpcode()) {
750 default:
751 --delta;
752 break;
753 case AArch64::CBZW:
754 case AArch64::CBNZW:
755 case AArch64::CBZX:
756 case AArch64::CBNZX:
757 break;
758 }
759 return delta;
760}
761
762//===----------------------------------------------------------------------===//
763// AArch64ConditionalCompares Pass
764//===----------------------------------------------------------------------===//
765
766namespace {
767class AArch64ConditionalCompares : public MachineFunctionPass {
768 const MachineBranchProbabilityInfo *MBPI;
769 const TargetInstrInfo *TII;
770 const TargetRegisterInfo *TRI;
771 MCSchedModel SchedModel;
772 // Does the proceeded function has Oz attribute.
773 bool MinSize;
774 MachineRegisterInfo *MRI;
775 MachineDominatorTree *DomTree;
776 MachineLoopInfo *Loops;
777 MachineTraceMetrics *Traces;
779 SSACCmpConv CmpConv;
780
781public:
782 static char ID;
783 AArch64ConditionalCompares() : MachineFunctionPass(ID) {}
784 void getAnalysisUsage(AnalysisUsage &AU) const override;
785 bool runOnMachineFunction(MachineFunction &MF) override;
786 StringRef getPassName() const override {
787 return "AArch64 Conditional Compares";
788 }
789
790private:
791 bool tryConvert(MachineBasicBlock *);
792 void updateDomTree(ArrayRef<MachineBasicBlock *> Removed);
793 void updateLoops(ArrayRef<MachineBasicBlock *> Removed);
794 void invalidateTraces();
795 bool shouldConvert();
796};
797} // end anonymous namespace
798
799char AArch64ConditionalCompares::ID = 0;
800
801INITIALIZE_PASS_BEGIN(AArch64ConditionalCompares, "aarch64-ccmp",
802 "AArch64 CCMP Pass", false, false)
806INITIALIZE_PASS_END(AArch64ConditionalCompares, "aarch64-ccmp",
807 "AArch64 CCMP Pass", false, false)
808
810 return new AArch64ConditionalCompares();
811}
812
813void AArch64ConditionalCompares::getAnalysisUsage(AnalysisUsage &AU) const {
822}
823
824/// Update the dominator tree after if-conversion erased some blocks.
825void AArch64ConditionalCompares::updateDomTree(
827 // convert() removes CmpBB which was previously dominated by Head.
828 // CmpBB children should be transferred to Head.
829 MachineDomTreeNode *HeadNode = DomTree->getNode(CmpConv.Head);
830 for (MachineBasicBlock *RemovedMBB : Removed) {
831 MachineDomTreeNode *Node = DomTree->getNode(RemovedMBB);
832 assert(Node != HeadNode && "Cannot erase the head node");
833 assert(Node->getIDom() == HeadNode && "CmpBB should be dominated by Head");
834 while (!Node->isLeaf())
835 DomTree->changeImmediateDominator(*Node->begin(), HeadNode);
836 DomTree->eraseNode(RemovedMBB);
837 }
838}
839
840/// Update LoopInfo after if-conversion.
841void
842AArch64ConditionalCompares::updateLoops(ArrayRef<MachineBasicBlock *> Removed) {
843 if (!Loops)
844 return;
845 for (MachineBasicBlock *RemovedMBB : Removed)
846 Loops->removeBlock(RemovedMBB);
847}
848
849/// Invalidate MachineTraceMetrics before if-conversion.
850void AArch64ConditionalCompares::invalidateTraces() {
851 Traces->invalidate(CmpConv.Head);
852 Traces->invalidate(CmpConv.CmpBB);
853}
854
855/// Apply cost model and heuristics to the if-conversion in IfConv.
856/// Return true if the conversion is a good idea.
857///
858bool AArch64ConditionalCompares::shouldConvert() {
859 // Stress testing mode disables all cost considerations.
860 if (Stress)
861 return true;
862 if (!MinInstr)
863 MinInstr = Traces->getEnsemble(MachineTraceStrategy::TS_MinInstrCount);
864
865 // Head dominates CmpBB, so it is always included in its trace.
866 MachineTraceMetrics::Trace Trace = MinInstr->getTrace(CmpConv.CmpBB);
867
868 // If code size is the main concern
869 if (MinSize) {
870 int CodeSizeDelta = CmpConv.expectedCodeSizeDelta();
871 LLVM_DEBUG(dbgs() << "Code size delta: " << CodeSizeDelta << '\n');
872 // If we are minimizing the code size, do the conversion whatever
873 // the cost is.
874 if (CodeSizeDelta < 0)
875 return true;
876 if (CodeSizeDelta > 0) {
877 LLVM_DEBUG(dbgs() << "Code size is increasing, give up on this one.\n");
878 return false;
879 }
880 // CodeSizeDelta == 0, continue with the regular heuristics
881 }
882
883 // Heuristic: The compare conversion delays the execution of the branch
884 // instruction because we must wait for the inputs to the second compare as
885 // well. The branch has no dependent instructions, but delaying it increases
886 // the cost of a misprediction.
887 //
888 // Set a limit on the delay we will accept.
889 unsigned DelayLimit = SchedModel.MispredictPenalty * 3 / 4;
890
891 // Instruction depths can be computed for all trace instructions above CmpBB.
892 unsigned HeadDepth =
893 Trace.getInstrCycles(*CmpConv.Head->getFirstTerminator()).Depth;
894 unsigned CmpBBDepth =
895 Trace.getInstrCycles(*CmpConv.CmpBB->getFirstTerminator()).Depth;
896 LLVM_DEBUG(dbgs() << "Head depth: " << HeadDepth
897 << "\nCmpBB depth: " << CmpBBDepth << '\n');
898 if (CmpBBDepth > HeadDepth + DelayLimit) {
899 LLVM_DEBUG(dbgs() << "Branch delay would be larger than " << DelayLimit
900 << " cycles.\n");
901 return false;
902 }
903
904 // Check the resource depth at the bottom of CmpBB - these instructions will
905 // be speculated.
906 unsigned ResDepth = Trace.getResourceDepth(true);
907 LLVM_DEBUG(dbgs() << "Resources: " << ResDepth << '\n');
908
909 // Heuristic: The speculatively executed instructions must all be able to
910 // merge into the Head block. The Head critical path should dominate the
911 // resource cost of the speculated instructions.
912 if (ResDepth > HeadDepth) {
913 LLVM_DEBUG(dbgs() << "Too many instructions to speculate.\n");
914 return false;
915 }
916 return true;
917}
918
919bool AArch64ConditionalCompares::tryConvert(MachineBasicBlock *MBB) {
920 bool Changed = false;
921 while (CmpConv.canConvert(MBB) && shouldConvert()) {
922 invalidateTraces();
923 SmallVector<MachineBasicBlock *, 4> RemovedBlocks;
924 CmpConv.convert(RemovedBlocks);
925 Changed = true;
926 updateDomTree(RemovedBlocks);
927 updateLoops(RemovedBlocks);
928 for (MachineBasicBlock *MBB : RemovedBlocks)
930 }
931 return Changed;
932}
933
934bool AArch64ConditionalCompares::runOnMachineFunction(MachineFunction &MF) {
935 LLVM_DEBUG(dbgs() << "********** AArch64 Conditional Compares **********\n"
936 << "********** Function: " << MF.getName() << '\n');
937 if (skipFunction(MF.getFunction()))
938 return false;
939
942 SchedModel = MF.getSubtarget().getSchedModel();
943 MRI = &MF.getRegInfo();
944 DomTree = &getAnalysis<MachineDominatorTreeWrapperPass>().getDomTree();
945 Loops = &getAnalysis<MachineLoopInfoWrapperPass>().getLI();
946 MBPI = &getAnalysis<MachineBranchProbabilityInfoWrapperPass>().getMBPI();
947 Traces = &getAnalysis<MachineTraceMetricsWrapperPass>().getMTM();
948 MinInstr = nullptr;
949 MinSize = MF.getFunction().hasMinSize();
950
951 bool Changed = false;
952 CmpConv.runOnMachineFunction(MF, MBPI);
953
954 // Visit blocks in dominator tree pre-order. The pre-order enables multiple
955 // cmp-conversions from the same head block.
956 // Note that updateDomTree() modifies the children of the DomTree node
957 // currently being visited. The df_iterator supports that; it doesn't look at
958 // child_begin() / child_end() until after a node has been visited.
959 for (auto *I : depth_first(DomTree))
960 if (tryConvert(I->getBlock()))
961 Changed = true;
962
963 return Changed;
964}
static Register lookThroughCopies(Register Reg, MachineRegisterInfo *MRI)
static cl::opt< bool > Stress("aarch64-stress-ccmp", cl::Hidden, cl::desc("Turn all knobs to 11"))
static cl::opt< unsigned > BlockInstrLimit("aarch64-ccmp-limit", cl::init(30), cl::Hidden, cl::desc("Maximum number of instructions per speculated block."))
static bool parseCond(ArrayRef< MachineOperand > Cond, AArch64CC::CondCode &CC)
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
static bool shouldConvert(Constant &C, AArch64PromoteConstant::PromotionCacheTy &PromotionCache)
MachineBasicBlock & MBB
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static unsigned InstrCount
This file builds on the ADT/GraphTraits.h file to build generic depth first graph iterator.
static cl::opt< bool > Stress("stress-early-ifcvt", cl::Hidden, cl::desc("Turn all knobs to 11"))
static cl::opt< unsigned > BlockInstrLimit("early-ifcvt-limit", cl::init(30), cl::Hidden, cl::desc("Maximum number of instructions per speculated block."))
const HexagonInstrInfo * TII
Hexagon Hardware Loops
IRTranslator LLVM IR MI
#define I(x, y, z)
Definition MD5.cpp:57
Register Reg
Register const TargetRegisterInfo * TRI
Promote Memory to Register
Definition Mem2Reg.cpp:110
static MCRegister getReg(const MCDisassembler *D, unsigned RC, unsigned RegNo)
#define INITIALIZE_PASS_DEPENDENCY(depName)
Definition PassSupport.h:42
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
Definition PassSupport.h:44
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
Definition PassSupport.h:39
const SmallVectorImpl< MachineOperand > MachineBasicBlock * TBB
const SmallVectorImpl< MachineOperand > & Cond
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Definition Statistic.h:171
#define LLVM_DEBUG(...)
Definition Debug.h:114
Represent the analysis usage information of a pass.
AnalysisUsage & addRequired()
AnalysisUsage & addPreserved()
Add the specified Pass class to the set of analyses preserved by this pass.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition ArrayRef.h:40
void changeImmediateDominator(DomTreeNodeBase< NodeT > *N, DomTreeNodeBase< NodeT > *NewIDom)
changeImmediateDominator - This method is used to update the dominator tree information when a node's...
void eraseNode(NodeT *BB)
eraseNode - Removes a node from the dominator tree.
DomTreeNodeBase< NodeT > * getNode(const NodeT *BB) const
getNode - return the (Post)DominatorTree node for the specified basic block.
FunctionPass class - This class is used to implement most global optimizations.
Definition Pass.h:314
bool hasMinSize() const
Optimize this function for minimum size (-Oz).
Definition Function.h:711
unsigned removeBranch(MachineBasicBlock &MBB, int *BytesRemoved=nullptr) const override
Remove the branching code at the end of the specific MBB.
bool analyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB, MachineBasicBlock *&FBB, SmallVectorImpl< MachineOperand > &Cond, bool AllowModify) const override
Analyze the branching code at the end of MBB, returning true if it cannot be understood (e....
const MCSchedModel & getSchedModel() const
Get the machine model for this subtarget's CPU.
LLVM_ABI void transferSuccessorsAndUpdatePHIs(MachineBasicBlock *FromMBB)
Transfers all the successors, as in transferSuccessors, and update PHI operands in the successor bloc...
LLVM_ABI void updateTerminator(MachineBasicBlock *PreviousLayoutSuccessor)
Update the terminator instructions in block to account for changes to block layout which may have bee...
LLVM_ABI void setSuccProbability(succ_iterator I, BranchProbability Prob)
Set successor probability of a given iterator.
LLVM_ABI iterator getFirstTerminator()
Returns an iterator to the first terminator instruction of this basic block.
LLVM_ABI void removeSuccessor(MachineBasicBlock *Succ, bool NormalizeSuccProbs=false)
Remove successor from the successors list of this MachineBasicBlock.
bool hasSuccessorProbabilities() const
Return true if any of the successors have probabilities attached to them.
LLVM_ABI void eraseFromParent()
This method unlinks 'this' from the containing function and deletes it.
LLVM_ABI bool isSuccessor(const MachineBasicBlock *MBB) const
Return true if the specified MBB is a successor of this block.
void splice(iterator Where, MachineBasicBlock *Other, iterator From)
Take an instruction from MBB 'Other' at the position From, and insert it into this MBB right before '...
MachineInstrBundleIterator< MachineInstr > iterator
BranchProbability getEdgeProbability(const MachineBasicBlock *Src, const MachineBasicBlock *Dst) const
Analysis pass which computes a MachineDominatorTree.
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
StringRef getName() const
getName - Return the name of the corresponding LLVM function.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
Function & getFunction()
Return the LLVM function that this machine code represents.
const MachineInstrBuilder & addReg(Register RegNo, RegState Flags={}, unsigned SubReg=0) const
Add a new virtual register operand.
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
const MachineInstrBuilder & add(const MachineOperand &MO) const
Representation of each machine instruction.
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
const DebugLoc & getDebugLoc() const
Returns the debug location id of this MachineInstr.
const MachineOperand & getOperand(unsigned i) const
LLVM_ABI MachineInstrBundleIterator< MachineInstr > eraseFromParent()
Unlink 'this' from the containing basic block and delete it.
bool isReg() const
isReg - Tests if this is a MO_Register operand.
Register getReg() const
getReg - Returns the register number.
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
bool use_nodbg_empty(Register RegNo) const
use_nodbg_empty - Return true if there are no non-Debug instructions using the specified register.
LLVM_ABI Register createVirtualRegister(const TargetRegisterClass *RegClass, StringRef Name="")
createVirtualRegister - Create and return a new virtual register in the function with the specified r...
LLVM_ABI const TargetRegisterClass * constrainRegClass(Register Reg, const TargetRegisterClass *RC, unsigned MinNumRegs=0)
constrainRegClass - Constrain the register class of the specified virtual register to be a common sub...
LLVM_ABI MachineInstr * getUniqueVRegDef(Register Reg) const
getUniqueVRegDef - Return the unique machine instr that defines the specified virtual register or nul...
Trace getTrace(const MachineBasicBlock *MBB)
Get the trace that passes through MBB.
InstrCycles getInstrCycles(const MachineInstr &MI) const
Return the depth and height of MI.
unsigned getResourceDepth(bool Bottom) const
Return the resource depth of the top/bottom of the trace center block.
Ensemble * getEnsemble(MachineTraceStrategy)
Get the trace ensemble representing the given trace selection strategy.
void invalidate(const MachineBasicBlock *MBB)
Invalidate cached information about MBB.
Wrapper class representing virtual and physical registers.
Definition Register.h:20
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
TargetInstrInfo - Interface to description of machine instruction set.
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
virtual const TargetInstrInfo * getInstrInfo() const
virtual const TargetRegisterInfo * getRegisterInfo() const =0
Return the target's register information.
NodeTy * getNextNode()
Get the next node, or nullptr for the list tail.
Definition ilist_node.h:348
Changed
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
static const char * getCondCodeName(CondCode Code)
static CondCode getInvertedCondCode(CondCode Code)
static unsigned getNZCVToSatisfyCondCode(CondCode Code)
Given a condition code, return NZCV flags that would satisfy that condition.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition CallingConv.h:24
@ Tail
Attemps to make calls as fast as possible while guaranteeing that tail call optimization can always b...
Definition CallingConv.h:76
initializer< Ty > init(const Ty &Val)
NodeAddr< NodeBase * > Node
Definition RDFGraph.h:381
This is an optimization pass for GlobalISel generic memory operations.
Definition Types.h:26
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
LLVM_ABI PhysRegInfo AnalyzePhysRegInBundle(const MachineInstr &MI, Register Reg, const TargetRegisterInfo *TRI)
AnalyzePhysRegInBundle - Analyze how the current instruction or bundle uses a physical register.
FunctionPass * createAArch64ConditionalCompares()
MachineInstr * getImm(const MachineOperand &MO, const MachineRegisterInfo *MRI)
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition Debug.cpp:207
constexpr bool isUInt(uint64_t x)
Checks if an unsigned integer fits into the given bit width.
Definition MathExtras.h:189
DomTreeNodeBase< MachineBasicBlock > MachineDomTreeNode
ArrayRef(const T &OneElt) -> ArrayRef< T >
iterator_range< df_iterator< T > > depth_first(const T &G)
IterT prev_nodbg(IterT It, IterT Begin, bool SkipPseudoOp=true)
Decrement It, then continue decrementing it while it points to a debug instruction.
LLVM_ABI Printable printMBBReference(const MachineBasicBlock &MBB)
Prints a machine basic block reference.
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition BitVector.h:872
unsigned MispredictPenalty
Definition MCSchedule.h:311
unsigned Depth
Earliest issue cycle as determined by data dependencies and instruction latencies from the beginning ...
bool Read
Reg or one of its aliases is read.
bool Defined
Reg or one of its aliases is defined.
bool Clobbered
There is a regmask operand indicating Reg is clobbered.