LLVM 23.0.0git
AArch64ConditionOptimizer.cpp
Go to the documentation of this file.
1//=- AArch64ConditionOptimizer.cpp - Remove useless comparisons for AArch64 -=//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9//
10// This pass tries to make consecutive comparisons of values use the same
11// operands to allow the CSE pass to remove duplicate instructions. It adjusts
12// comparisons with immediate values by converting between inclusive and
13// exclusive forms (GE <-> GT, LE <-> LT) and correcting immediate values to
14// make them equal.
15//
16// The pass handles:
17// * Cross-block: SUBS/ADDS followed by conditional branches
18// * Intra-block: CSINC conditional instructions
19//
20//
21// Consider the following example in C:
22//
23// if ((a < 5 && ...) || (a > 5 && ...)) {
24// ~~~~~ ~~~~~
25// ^ ^
26// x y
27//
28// Here both "x" and "y" expressions compare "a" with "5". When "x" evaluates
29// to "false", "y" can just check flags set by the first comparison. As a
30// result of the canonicalization employed by
31// SelectionDAGBuilder::visitSwitchCase, DAGCombine, and other target-specific
32// code, assembly ends up in the form that is not CSE friendly:
33//
34// ...
35// cmp w8, #4
36// b.gt .LBB0_3
37// ...
38// .LBB0_3:
39// cmp w8, #6
40// b.lt .LBB0_6
41// ...
42//
43// Same assembly after the pass:
44//
45// ...
46// cmp w8, #5
47// b.ge .LBB0_3
48// ...
49// .LBB0_3:
50// cmp w8, #5 // <-- CSE pass removes this instruction
51// b.le .LBB0_6
52// ...
53//
54// See optimizeCrossBlock() and optimizeIntraBlock() for implementation details.
55//
56// TODO: maybe handle TBNZ/TBZ the same way as CMP when used instead for "a < 0"
57// TODO: For cross-block:
58// - handle other conditional instructions (e.g. CSET)
59// - allow second branching to be anything if it doesn't require adjusting
60// TODO: For intra-block:
61// - handle CINC and CSET (CSINC aliases) as their conditions are inverted
62// compared to CSINC.
63// - handle other non-CSINC conditional instructions
64//
65//===----------------------------------------------------------------------===//
66
67#include "AArch64.h"
70#include "llvm/ADT/ArrayRef.h"
73#include "llvm/ADT/Statistic.h"
85#include "llvm/Pass.h"
86#include "llvm/Support/Debug.h"
89#include <cassert>
90#include <cstdlib>
91
92using namespace llvm;
93
94#define DEBUG_TYPE "aarch64-condopt"
95
96STATISTIC(NumConditionsAdjusted, "Number of conditions adjusted");
97
98namespace {
99
100/// Bundles the parameters needed to adjust a comparison instruction.
101struct CmpInfo {
102 int Imm;
103 unsigned Opc;
105};
106
107class AArch64ConditionOptimizerImpl {
108 /// Represents a comparison instruction paired with its consuming
109 /// conditional instruction
110 struct CmpCondPair {
111 MachineInstr *CmpMI;
112 MachineInstr *CondMI;
114
115 int getImm() const { return CmpMI->getOperand(2).getImm(); }
116 unsigned getOpc() const { return CmpMI->getOpcode(); }
117 };
118
119 const TargetInstrInfo *TII;
120 const TargetRegisterInfo *TRI;
121 MachineDominatorTree *DomTree;
122 const MachineRegisterInfo *MRI;
123
124public:
125 bool run(MachineFunction &MF, MachineDominatorTree &MDT);
126
127private:
128 bool canAdjustCmp(MachineInstr &CmpMI);
129 bool registersMatch(MachineInstr *FirstMI, MachineInstr *SecondMI);
130 bool nzcvLivesOut(MachineBasicBlock *MBB);
131 MachineInstr *getBccTerminator(MachineBasicBlock *MBB);
132 MachineInstr *findAdjustableCmp(MachineInstr *CondMI);
133 CmpInfo getAdjustedCmpInfo(MachineInstr *CmpMI, AArch64CC::CondCode Cmp);
134 void updateCmpInstr(MachineInstr *CmpMI, int NewImm, unsigned NewOpc);
135 void updateCondInstr(MachineInstr *CondMI, AArch64CC::CondCode NewCC);
136 void applyCmpAdjustment(CmpCondPair &Pair, const CmpInfo &Info);
137 bool tryOptimizePair(CmpCondPair &First, CmpCondPair &Second);
138 bool optimizeIntraBlock(MachineBasicBlock &MBB);
139 bool optimizeCrossBlock(MachineBasicBlock &HBB);
140};
141
142class AArch64ConditionOptimizerLegacy : public MachineFunctionPass {
143public:
144 static char ID;
145 AArch64ConditionOptimizerLegacy() : MachineFunctionPass(ID) {}
146
147 void getAnalysisUsage(AnalysisUsage &AU) const override;
148 bool runOnMachineFunction(MachineFunction &MF) override;
149
150 StringRef getPassName() const override {
151 return "AArch64 Condition Optimizer";
152 }
153};
154
155} // end anonymous namespace
156
157char AArch64ConditionOptimizerLegacy::ID = 0;
158
159INITIALIZE_PASS_BEGIN(AArch64ConditionOptimizerLegacy, "aarch64-condopt",
160 "AArch64 CondOpt Pass", false, false)
162INITIALIZE_PASS_END(AArch64ConditionOptimizerLegacy, "aarch64-condopt",
163 "AArch64 CondOpt Pass", false, false)
164
166 return new AArch64ConditionOptimizerLegacy();
167}
168
169void AArch64ConditionOptimizerLegacy::getAnalysisUsage(
170 AnalysisUsage &AU) const {
174}
175
176// Verify that the MI's immediate is adjustable and it only sets flags (pure
177// cmp)
178bool AArch64ConditionOptimizerImpl::canAdjustCmp(MachineInstr &CmpMI) {
179 unsigned ShiftAmt = AArch64_AM::getShiftValue(CmpMI.getOperand(3).getImm());
180 if (!CmpMI.getOperand(2).isImm()) {
181 LLVM_DEBUG(dbgs() << "Immediate of cmp is symbolic, " << CmpMI << '\n');
182 return false;
183 } else if (CmpMI.getOperand(2).getImm() << ShiftAmt >= 0xfff) {
184 LLVM_DEBUG(dbgs() << "Immediate of cmp may be out of range, " << CmpMI
185 << '\n');
186 return false;
187 } else if (!MRI->use_nodbg_empty(CmpMI.getOperand(0).getReg())) {
188 LLVM_DEBUG(dbgs() << "Destination of cmp is not dead, " << CmpMI << '\n');
189 return false;
190 }
191
192 return true;
193}
194
195// Ensure both compare MIs use the same register, tracing through copies.
196bool AArch64ConditionOptimizerImpl::registersMatch(MachineInstr *FirstMI,
197 MachineInstr *SecondMI) {
198 Register FirstReg = FirstMI->getOperand(1).getReg();
199 Register SecondReg = SecondMI->getOperand(1).getReg();
200 Register FirstCmpReg =
201 FirstReg.isVirtual() ? TRI->lookThruCopyLike(FirstReg, MRI) : FirstReg;
202 Register SecondCmpReg =
203 SecondReg.isVirtual() ? TRI->lookThruCopyLike(SecondReg, MRI) : SecondReg;
204 if (FirstCmpReg != SecondCmpReg) {
205 LLVM_DEBUG(dbgs() << "CMPs compare different registers\n");
206 return false;
207 }
208
209 return true;
210}
211
212// Check if NZCV lives out to any successor block.
213bool AArch64ConditionOptimizerImpl::nzcvLivesOut(MachineBasicBlock *MBB) {
214 for (auto *SuccBB : MBB->successors()) {
215 if (SuccBB->isLiveIn(AArch64::NZCV)) {
216 LLVM_DEBUG(dbgs() << "NZCV live into successor "
217 << printMBBReference(*SuccBB) << " from "
218 << printMBBReference(*MBB) << '\n');
219 return true;
220 }
221 }
222 return false;
223}
224
225// Returns true if the opcode is a comparison instruction (CMP/CMN).
226static bool isCmpInstruction(unsigned Opc) {
227 switch (Opc) {
228 // cmp is an alias for SUBS with a dead destination register.
229 case AArch64::SUBSWri:
230 case AArch64::SUBSXri:
231 // cmp is an alias for ADDS with a dead destination register.
232 case AArch64::ADDSWri:
233 case AArch64::ADDSXri:
234 return true;
235 default:
236 return false;
237 }
238}
239
240static bool isCSINCInstruction(unsigned Opc) {
241 return Opc == AArch64::CSINCWr || Opc == AArch64::CSINCXr;
242}
243
244// Returns the Bcc terminator if present, otherwise nullptr.
245MachineInstr *
246AArch64ConditionOptimizerImpl::getBccTerminator(MachineBasicBlock *MBB) {
248 if (Term == MBB->end()) {
249 LLVM_DEBUG(dbgs() << "No terminator in " << printMBBReference(*MBB)
250 << '\n');
251 return nullptr;
252 }
253
254 if (Term->getOpcode() != AArch64::Bcc) {
255 LLVM_DEBUG(dbgs() << "Non-Bcc terminator in " << printMBBReference(*MBB)
256 << ": " << *Term);
257 return nullptr;
258 }
259
260 return &*Term;
261}
262
263// Find the CMP instruction controlling the given conditional instruction and
264// ensure it can be adjusted for CSE optimization. Searches backward from
265// CondMI, ensuring no NZCV interference. Returns nullptr if no suitable CMP
266// is found or if adjustments are not safe.
267MachineInstr *
268AArch64ConditionOptimizerImpl::findAdjustableCmp(MachineInstr *CondMI) {
269 assert(CondMI && "CondMI cannot be null");
270 MachineBasicBlock *MBB = CondMI->getParent();
271
272 // Search backward from the conditional to find the instruction controlling
273 // it.
275 It = MachineBasicBlock::iterator(CondMI);
276 It != B;) {
277 It = prev_nodbg(It, B);
278 MachineInstr &I = *It;
279 assert(!I.isTerminator() && "Spurious terminator");
280 // Ensure there is no use of NZCV between CMP and conditional.
281 if (I.readsRegister(AArch64::NZCV, /*TRI=*/nullptr))
282 return nullptr;
283
284 if (isCmpInstruction(I.getOpcode())) {
285 if (!canAdjustCmp(I)) {
286 return nullptr;
287 }
288 return &I;
289 }
290
291 if (I.modifiesRegister(AArch64::NZCV, /*TRI=*/nullptr))
292 return nullptr;
293 }
294 LLVM_DEBUG(dbgs() << "Flags not defined in " << printMBBReference(*MBB)
295 << '\n');
296 return nullptr;
297}
298
299// Changes opcode adds <-> subs considering register operand width.
300static int getComplementOpc(int Opc) {
301 switch (Opc) {
302 case AArch64::ADDSWri: return AArch64::SUBSWri;
303 case AArch64::ADDSXri: return AArch64::SUBSXri;
304 case AArch64::SUBSWri: return AArch64::ADDSWri;
305 case AArch64::SUBSXri: return AArch64::ADDSXri;
306 default:
307 llvm_unreachable("Unexpected opcode");
308 }
309}
310
311// Changes form of comparison inclusive <-> exclusive.
313 switch (Cmp) {
314 case AArch64CC::GT:
315 return AArch64CC::GE;
316 case AArch64CC::GE:
317 return AArch64CC::GT;
318 case AArch64CC::LT:
319 return AArch64CC::LE;
320 case AArch64CC::LE:
321 return AArch64CC::LT;
322 case AArch64CC::HI:
323 return AArch64CC::HS;
324 case AArch64CC::HS:
325 return AArch64CC::HI;
326 case AArch64CC::LO:
327 return AArch64CC::LS;
328 case AArch64CC::LS:
329 return AArch64CC::LO;
330 default:
331 llvm_unreachable("Unexpected condition code");
332 }
333}
334
335// Returns the adjusted immediate, opcode, and condition code for switching
336// between inclusive/exclusive forms (GT <-> GE, LT <-> LE).
337CmpInfo
338AArch64ConditionOptimizerImpl::getAdjustedCmpInfo(MachineInstr *CmpMI,
340 unsigned Opc = CmpMI->getOpcode();
341
342 bool IsSigned = Cmp == AArch64CC::GT || Cmp == AArch64CC::GE ||
344
345 // CMN (compare with negative immediate) is an alias to ADDS (as
346 // "operand - negative" == "operand + positive")
347 bool Negative = (Opc == AArch64::ADDSWri || Opc == AArch64::ADDSXri);
348
349 int Correction = (Cmp == AArch64CC::GT || Cmp == AArch64CC::HI) ? 1 : -1;
350 // Negate Correction value for comparison with negative immediate (CMN).
351 if (Negative) {
352 Correction = -Correction;
353 }
354
355 const int OldImm = (int)CmpMI->getOperand(2).getImm();
356 const int NewImm = std::abs(OldImm + Correction);
357
358 // Bail out on cmn 0 (ADDS with immediate 0). It is a valid instruction but
359 // doesn't set flags in a way we can safely transform, so skip optimization.
360 if (OldImm == 0 && Negative)
361 return {OldImm, Opc, Cmp};
362
363 if ((OldImm == 1 && Negative && Correction == -1) ||
364 (OldImm == 0 && Correction == -1)) {
365 // If we change opcodes for unsigned comparisons, this means we did an
366 // unsigned wrap (e.g., 0 wrapping to 0xFFFFFFFF), so return the old cmp.
367 // Note: For signed comparisons, opcode changes (cmn 1 ↔ cmp 0) are valid.
368 if (!IsSigned)
369 return {OldImm, Opc, Cmp};
371 }
372
373 return {NewImm, Opc, getAdjustedCmp(Cmp)};
374}
375
376// Modifies a comparison instruction's immediate and opcode.
377void AArch64ConditionOptimizerImpl::updateCmpInstr(MachineInstr *CmpMI,
378 int NewImm,
379 unsigned NewOpc) {
380 CmpMI->getOperand(2).setImm(NewImm);
381 CmpMI->setDesc(TII->get(NewOpc));
382}
383
384// Modifies the condition code of a conditional instruction.
385void AArch64ConditionOptimizerImpl::updateCondInstr(MachineInstr *CondMI,
386 AArch64CC::CondCode NewCC) {
387 // Get the correct operand index for the conditional instruction
388 unsigned CondOpIdx;
389 switch (CondMI->getOpcode()) {
390 case AArch64::Bcc:
391 CondOpIdx = 0;
392 break;
393 case AArch64::CSINCWr:
394 case AArch64::CSINCXr:
395 CondOpIdx = 3;
396 break;
397 default:
398 llvm_unreachable("Unsupported conditional instruction");
399 }
400 CondMI->getOperand(CondOpIdx).setImm(NewCC);
401 ++NumConditionsAdjusted;
402}
403
404// Applies a comparison adjustment to a cmp/cond instruction pair.
405void AArch64ConditionOptimizerImpl::applyCmpAdjustment(CmpCondPair &Pair,
406 const CmpInfo &Info) {
407 updateCmpInstr(Pair.CmpMI, Info.Imm, Info.Opc);
408 updateCondInstr(Pair.CondMI, Info.CC);
409}
410
411// Extracts the condition code from the result of analyzeBranch.
412// Returns the CondCode or Invalid if the format is not a simple br.cond.
414 assert(!Cond.empty() && "Expected non-empty condition from analyzeBranch");
415 // A normal br.cond simply has the condition code.
416 if (Cond[0].getImm() != -1) {
417 assert(Cond.size() == 1 && "Unknown Cond array format");
418 return (AArch64CC::CondCode)(int)Cond[0].getImm();
419 }
421}
422
424 return Cmp == AArch64CC::GT || Cmp == AArch64CC::HI;
425}
426
428 return Cmp == AArch64CC::LT || Cmp == AArch64CC::LO;
429}
430
431bool AArch64ConditionOptimizerImpl::tryOptimizePair(CmpCondPair &First,
432 CmpCondPair &Second) {
433 if (!((isGreaterThan(First.CC) || isLessThan(First.CC)) &&
434 (isGreaterThan(Second.CC) || isLessThan(Second.CC))))
435 return false;
436
437 int FirstImmTrueValue = First.getImm();
438 int SecondImmTrueValue = Second.getImm();
439
440 // Normalize immediate of CMN (ADDS) instructions
441 if (First.getOpc() == AArch64::ADDSWri || First.getOpc() == AArch64::ADDSXri)
442 FirstImmTrueValue = -FirstImmTrueValue;
443 if (Second.getOpc() == AArch64::ADDSWri ||
444 Second.getOpc() == AArch64::ADDSXri)
445 SecondImmTrueValue = -SecondImmTrueValue;
446
447 CmpInfo FirstAdj = getAdjustedCmpInfo(First.CmpMI, First.CC);
448 CmpInfo SecondAdj = getAdjustedCmpInfo(Second.CmpMI, Second.CC);
449
450 if (((isGreaterThan(First.CC) && isLessThan(Second.CC)) ||
451 (isLessThan(First.CC) && isGreaterThan(Second.CC))) &&
452 std::abs(SecondImmTrueValue - FirstImmTrueValue) == 2) {
453 // This branch transforms machine instructions that correspond to
454 //
455 // 1) (a > {SecondImm} && ...) || (a < {FirstImm} && ...)
456 // 2) (a < {SecondImm} && ...) || (a > {FirstImm} && ...)
457 //
458 // into
459 //
460 // 1) (a >= {NewImm} && ...) || (a <= {NewImm} && ...)
461 // 2) (a <= {NewImm} && ...) || (a >= {NewImm} && ...)
462
463 // Verify both adjustments converge to identical comparisons (same
464 // immediate and opcode). This ensures CSE can eliminate the duplicate.
465 if (FirstAdj.Imm != SecondAdj.Imm || FirstAdj.Opc != SecondAdj.Opc)
466 return false;
467
468 LLVM_DEBUG(dbgs() << "Optimized (opposite): "
469 << AArch64CC::getCondCodeName(First.CC) << " #"
470 << First.getImm() << ", "
471 << AArch64CC::getCondCodeName(Second.CC) << " #"
472 << Second.getImm() << " -> "
473 << AArch64CC::getCondCodeName(FirstAdj.CC) << " #"
474 << FirstAdj.Imm << ", "
475 << AArch64CC::getCondCodeName(SecondAdj.CC) << " #"
476 << SecondAdj.Imm << '\n');
477 applyCmpAdjustment(First, FirstAdj);
478 applyCmpAdjustment(Second, SecondAdj);
479 return true;
480
481 } else if (((isGreaterThan(First.CC) && isGreaterThan(Second.CC)) ||
482 (isLessThan(First.CC) && isLessThan(Second.CC))) &&
483 std::abs(SecondImmTrueValue - FirstImmTrueValue) == 1) {
484 // This branch transforms machine instructions that correspond to
485 //
486 // 1) (a > {SecondImm} && ...) || (a > {FirstImm} && ...)
487 // 2) (a < {SecondImm} && ...) || (a < {FirstImm} && ...)
488 //
489 // into
490 //
491 // 1) (a <= {NewImm} && ...) || (a > {NewImm} && ...)
492 // 2) (a < {NewImm} && ...) || (a >= {NewImm} && ...)
493
494 // GT -> GE transformation increases immediate value, so picking the
495 // smaller one; LT -> LE decreases immediate value so invert the choice.
496 bool AdjustFirst = (FirstImmTrueValue < SecondImmTrueValue);
497 if (isLessThan(First.CC))
498 AdjustFirst = !AdjustFirst;
499
500 CmpCondPair &Target = AdjustFirst ? Second : First;
501 CmpCondPair &ToChange = AdjustFirst ? First : Second;
502 CmpInfo &Adj = AdjustFirst ? FirstAdj : SecondAdj;
503
504 // Verify the adjustment converges to the target's comparison (same
505 // immediate and opcode). This ensures CSE can eliminate the duplicate.
506 if (Adj.Imm != Target.getImm() || Adj.Opc != Target.getOpc())
507 return false;
508
509 LLVM_DEBUG(dbgs() << "Optimized (same-direction): "
510 << AArch64CC::getCondCodeName(ToChange.CC) << " #"
511 << ToChange.getImm() << " -> "
512 << AArch64CC::getCondCodeName(Adj.CC) << " #" << Adj.Imm
513 << '\n');
514 applyCmpAdjustment(ToChange, Adj);
515 return true;
516 }
517
518 // Other transformation cases almost never occur due to generation of < or >
519 // comparisons instead of <= and >=.
520 return false;
521}
522
523// This function transforms two CMP+CSINC pairs within the same basic block
524// when both conditions are the same (GT/GT or LT/LT) and immediates differ
525// by 1.
526//
527// Example transformation:
528// cmp w8, #10
529// csinc w9, w0, w1, gt ; w9 = (w8 > 10) ? w0 : w1+1
530// cmp w8, #9
531// csinc w10, w0, w1, gt ; w10 = (w8 > 9) ? w0 : w1+1
532//
533// Into:
534// cmp w8, #10
535// csinc w9, w0, w1, gt ; w9 = (w8 > 10) ? w0 : w1+1
536// csinc w10, w0, w1, ge ; w10 = (w8 >= 10) ? w0 : w1+1
537//
538// The second CMP is eliminated, enabling CSE to remove the redundant
539// comparison.
540bool AArch64ConditionOptimizerImpl::optimizeIntraBlock(MachineBasicBlock &MBB) {
541 MachineInstr *FirstCSINC = nullptr;
542 MachineInstr *SecondCSINC = nullptr;
543
544 // Find two CSINC instructions
545 for (MachineInstr &MI : MBB) {
546 if (isCSINCInstruction(MI.getOpcode())) {
547 if (!FirstCSINC) {
548 FirstCSINC = &MI;
549 } else if (!SecondCSINC) {
550 SecondCSINC = &MI;
551 break; // Found both
552 }
553 }
554 }
555
556 if (!FirstCSINC || !SecondCSINC) {
557 return false;
558 }
559
560 // Since we may modify cmps in this MBB, make sure NZCV does not live out.
561 if (nzcvLivesOut(&MBB))
562 return false;
563
564 // Find the CMPs controlling each CSINC
565 MachineInstr *FirstCmpMI = findAdjustableCmp(FirstCSINC);
566 MachineInstr *SecondCmpMI = findAdjustableCmp(SecondCSINC);
567 if (!FirstCmpMI || !SecondCmpMI)
568 return false;
569
570 // Ensure we have two distinct CMPs
571 if (FirstCmpMI == SecondCmpMI) {
572 LLVM_DEBUG(dbgs() << "Both CSINCs already controlled by same CMP\n");
573 return false;
574 }
575
576 if (!registersMatch(FirstCmpMI, SecondCmpMI))
577 return false;
578
579 // Check that nothing else modifies the flags between the first CMP and second
580 // conditional
581 for (auto It = std::next(MachineBasicBlock::iterator(FirstCmpMI));
582 It != std::next(MachineBasicBlock::iterator(SecondCSINC)); ++It) {
583 if (&*It != SecondCmpMI &&
584 It->modifiesRegister(AArch64::NZCV, /*TRI=*/nullptr)) {
585 LLVM_DEBUG(dbgs() << "Flags modified between CMPs by: " << *It << '\n');
586 return false;
587 }
588 }
589
590 // Check flags aren't read after second conditional within the same block
591 for (auto It = std::next(MachineBasicBlock::iterator(SecondCSINC));
592 It != MBB.end(); ++It) {
593 if (It->readsRegister(AArch64::NZCV, /*TRI=*/nullptr)) {
594 LLVM_DEBUG(dbgs() << "Flags read after second CSINC by: " << *It << '\n');
595 return false;
596 }
597 }
598
599 // Extract condition codes from both CSINCs (operand 3)
600 AArch64CC::CondCode FirstCond =
601 (AArch64CC::CondCode)(int)FirstCSINC->getOperand(3).getImm();
602 AArch64CC::CondCode SecondCond =
603 (AArch64CC::CondCode)(int)SecondCSINC->getOperand(3).getImm();
604
605 const int FirstImm = (int)FirstCmpMI->getOperand(2).getImm();
606 const int SecondImm = (int)SecondCmpMI->getOperand(2).getImm();
607
608 LLVM_DEBUG(dbgs() << "Comparing intra-block CSINCs: "
609 << AArch64CC::getCondCodeName(FirstCond) << " #" << FirstImm
610 << " and " << AArch64CC::getCondCodeName(SecondCond) << " #"
611 << SecondImm << '\n');
612
613 // Check if both conditions are the same (GT/GT, LT/LT, HI/HI, LO/LO)
614 // and immediates differ by 1.
615 if (FirstCond == SecondCond &&
616 (isGreaterThan(FirstCond) || isLessThan(FirstCond)) &&
617 std::abs(SecondImm - FirstImm) == 1) {
618 // Pick which comparison to adjust to match the other
619 // For GT/HI: adjust the one with smaller immediate
620 // For LT/LO: adjust the one with larger immediate
621 bool adjustFirst = (FirstImm < SecondImm);
622 if (isLessThan(FirstCond)) {
623 adjustFirst = !adjustFirst;
624 }
625
626 MachineInstr *CmpToAdjust = adjustFirst ? FirstCmpMI : SecondCmpMI;
627 MachineInstr *CSINCToAdjust = adjustFirst ? FirstCSINC : SecondCSINC;
628 AArch64CC::CondCode CondToAdjust = adjustFirst ? FirstCond : SecondCond;
629 int TargetImm = adjustFirst ? SecondImm : FirstImm;
630
631 CmpInfo Adj = getAdjustedCmpInfo(CmpToAdjust, CondToAdjust);
632
633 if (Adj.Imm == TargetImm &&
634 Adj.Opc == (adjustFirst ? SecondCmpMI : FirstCmpMI)->getOpcode()) {
635 LLVM_DEBUG(dbgs() << "Successfully optimizing intra-block CSINC pair\n");
636
637 // Modify the selected CMP and CSINC
638 CmpCondPair ToChange{CmpToAdjust, CSINCToAdjust, CondToAdjust};
639 applyCmpAdjustment(ToChange, Adj);
640
641 return true;
642 }
643 }
644
645 return false;
646}
647
648// Optimizes CMP+Bcc pairs across two basic blocks in the dominator tree.
649bool AArch64ConditionOptimizerImpl::optimizeCrossBlock(MachineBasicBlock &HBB) {
650 SmallVector<MachineOperand, 4> HeadCondOperands;
651 MachineBasicBlock *TBB = nullptr, *FBB = nullptr;
652 if (TII->analyzeBranch(HBB, TBB, FBB, HeadCondOperands)) {
653 return false;
654 }
655
656 // Equivalence check is to skip loops.
657 if (!TBB || TBB == &HBB) {
658 return false;
659 }
660
661 SmallVector<MachineOperand, 4> TrueCondOperands;
662 MachineBasicBlock *TBB_TBB = nullptr, *TBB_FBB = nullptr;
663 if (TII->analyzeBranch(*TBB, TBB_TBB, TBB_FBB, TrueCondOperands)) {
664 return false;
665 }
666
667 MachineInstr *HeadBrMI = getBccTerminator(&HBB);
668 MachineInstr *TrueBrMI = getBccTerminator(TBB);
669 if (!HeadBrMI || !TrueBrMI)
670 return false;
671
672 // Since we may modify cmps in these blocks, make sure NZCV does not live out.
673 if (nzcvLivesOut(&HBB) || nzcvLivesOut(TBB))
674 return false;
675
676 // Find the CMPs controlling each branch
677 MachineInstr *HeadCmpMI = findAdjustableCmp(HeadBrMI);
678 MachineInstr *TrueCmpMI = findAdjustableCmp(TrueBrMI);
679 if (!HeadCmpMI || !TrueCmpMI)
680 return false;
681
682 if (!registersMatch(HeadCmpMI, TrueCmpMI))
683 return false;
684
685 AArch64CC::CondCode HeadCondCode = parseCondCode(HeadCondOperands);
686 AArch64CC::CondCode TrueCondCode = parseCondCode(TrueCondOperands);
687 if (HeadCondCode == AArch64CC::CondCode::Invalid ||
688 TrueCondCode == AArch64CC::CondCode::Invalid) {
689 return false;
690 }
691
692 LLVM_DEBUG(dbgs() << "Checking cross-block pair: "
693 << AArch64CC::getCondCodeName(HeadCondCode) << " #"
694 << HeadCmpMI->getOperand(2).getImm() << ", "
695 << AArch64CC::getCondCodeName(TrueCondCode) << " #"
696 << TrueCmpMI->getOperand(2).getImm() << '\n');
697
698 CmpCondPair Head{HeadCmpMI, HeadBrMI, HeadCondCode};
699 CmpCondPair True{TrueCmpMI, TrueBrMI, TrueCondCode};
700
701 return tryOptimizePair(Head, True);
702}
703
704bool AArch64ConditionOptimizerLegacy::runOnMachineFunction(
705 MachineFunction &MF) {
706 if (skipFunction(MF.getFunction()))
707 return false;
708 MachineDominatorTree &MDT =
709 getAnalysis<MachineDominatorTreeWrapperPass>().getDomTree();
710 return AArch64ConditionOptimizerImpl().run(MF, MDT);
711}
712
713bool AArch64ConditionOptimizerImpl::run(MachineFunction &MF,
714 MachineDominatorTree &MDT) {
715 LLVM_DEBUG(dbgs() << "********** AArch64 Conditional Compares **********\n"
716 << "********** Function: " << MF.getName() << '\n');
717
720 DomTree = &MDT;
721 MRI = &MF.getRegInfo();
722
723 bool Changed = false;
724
725 // Visit blocks in dominator tree pre-order. The pre-order enables multiple
726 // cmp-conversions from the same head block.
727 // Note that updateDomTree() modifies the children of the DomTree node
728 // currently being visited. The df_iterator supports that; it doesn't look at
729 // child_begin() / child_end() until after a node has been visited.
730 for (MachineDomTreeNode *I : depth_first(DomTree)) {
731 MachineBasicBlock *HBB = I->getBlock();
732 Changed |= optimizeIntraBlock(*HBB);
733 Changed |= optimizeCrossBlock(*HBB);
734 }
735
736 return Changed;
737}
738
739PreservedAnalyses
742 auto &MDT = MFAM.getResult<MachineDominatorTreeAnalysis>(MF);
743 bool Changed = AArch64ConditionOptimizerImpl().run(MF, MDT);
744 if (!Changed)
745 return PreservedAnalyses::all();
748 return PA;
749}
static AArch64CC::CondCode parseCondCode(ArrayRef< MachineOperand > Cond)
static int getComplementOpc(int Opc)
static bool isGreaterThan(AArch64CC::CondCode Cmp)
static AArch64CC::CondCode getAdjustedCmp(AArch64CC::CondCode Cmp)
static bool isCSINCInstruction(unsigned Opc)
static bool isLessThan(AArch64CC::CondCode Cmp)
static bool isCmpInstruction(unsigned Opc)
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
MachineBasicBlock & MBB
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
This file builds on the ADT/GraphTraits.h file to build generic depth first graph iterator.
const HexagonInstrInfo * TII
IRTranslator LLVM IR MI
#define I(x, y, z)
Definition MD5.cpp:57
Register const TargetRegisterInfo * TRI
Promote Memory to Register
Definition Mem2Reg.cpp:110
#define INITIALIZE_PASS_DEPENDENCY(depName)
Definition PassSupport.h:42
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
Definition PassSupport.h:44
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
Definition PassSupport.h:39
const SmallVectorImpl< MachineOperand > MachineBasicBlock * TBB
const SmallVectorImpl< MachineOperand > & Cond
This file defines the SmallVector class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Definition Statistic.h:171
#define LLVM_DEBUG(...)
Definition Debug.h:114
PreservedAnalyses run(MachineFunction &MF, MachineFunctionAnalysisManager &MFAM)
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
Represent the analysis usage information of a pass.
AnalysisUsage & addRequired()
AnalysisUsage & addPreserved()
Add the specified Pass class to the set of analyses preserved by this pass.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition ArrayRef.h:40
Represents analyses that only rely on functions' control flow.
Definition Analysis.h:73
FunctionPass class - This class is used to implement most global optimizations.
Definition Pass.h:314
bool analyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB, MachineBasicBlock *&FBB, SmallVectorImpl< MachineOperand > &Cond, bool AllowModify) const override
Analyze the branching code at the end of MBB, returning true if it cannot be understood (e....
LLVM_ABI iterator getFirstTerminator()
Returns an iterator to the first terminator instruction of this basic block.
iterator_range< succ_iterator > successors()
MachineInstrBundleIterator< MachineInstr > iterator
Analysis pass which computes a MachineDominatorTree.
Analysis pass which computes a MachineDominatorTree.
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
StringRef getName() const
getName - Return the name of the corresponding LLVM function.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
Function & getFunction()
Return the LLVM function that this machine code represents.
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
const MachineBasicBlock * getParent() const
LLVM_ABI void setDesc(const MCInstrDesc &TID)
Replace the instruction descriptor (thus opcode) of the current instruction with a new one.
const MachineOperand & getOperand(unsigned i) const
void setImm(int64_t immVal)
int64_t getImm() const
bool isImm() const
isImm - Tests if this is a MO_Immediate operand.
Register getReg() const
getReg - Returns the register number.
bool use_nodbg_empty(Register RegNo) const
use_nodbg_empty - Return true if there are no non-Debug instructions using the specified register.
A set of analyses that are preserved following a run of a transformation pass.
Definition Analysis.h:112
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition Analysis.h:118
PreservedAnalyses & preserveSet()
Mark an analysis set as preserved.
Definition Analysis.h:151
constexpr bool isVirtual() const
Return true if the specified register number is in the virtual register namespace.
Definition Register.h:79
virtual const TargetInstrInfo * getInstrInfo() const
virtual const TargetRegisterInfo * getRegisterInfo() const =0
Return the target's register information.
Changed
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
static const char * getCondCodeName(CondCode Code)
static unsigned getShiftValue(unsigned Imm)
getShiftValue - Extract the shift value.
PointerTypeMap run(const Module &M)
Compute the PointerTypeMap for the module M.
This is an optimization pass for GlobalISel generic memory operations.
Definition Types.h:26
FunctionPass * createAArch64ConditionOptimizerLegacyPass()
AnalysisManager< MachineFunction > MachineFunctionAnalysisManager
LLVM_ABI PreservedAnalyses getMachineFunctionPassPreservedAnalyses()
Returns the minimum set of Analyses that all machine function passes must preserve.
MachineInstr * getImm(const MachineOperand &MO, const MachineRegisterInfo *MRI)
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition Debug.cpp:207
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
DomTreeNodeBase< MachineBasicBlock > MachineDomTreeNode
@ First
Helpers to iterate all locations in the MemoryEffectsBase class.
Definition ModRef.h:74
iterator_range< df_iterator< T > > depth_first(const T &G)
IterT prev_nodbg(IterT It, IterT Begin, bool SkipPseudoOp=true)
Decrement It, then continue decrementing it while it points to a debug instruction.
LLVM_ABI Printable printMBBReference(const MachineBasicBlock &MBB)
Prints a machine basic block reference.