LLVM 23.0.0git
AArch64ConditionOptimizer.cpp
Go to the documentation of this file.
1//=- AArch64ConditionOptimizer.cpp - Remove useless comparisons for AArch64 -=//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9//
10// This pass tries to make consecutive comparisons of values use the same
11// operands to allow the CSE pass to remove duplicate instructions. It adjusts
12// comparisons with immediate values by converting between inclusive and
13// exclusive forms (GE <-> GT, LE <-> LT) and correcting immediate values to
14// make them equal.
15//
16// The pass handles:
17// * Cross-block: SUBS/ADDS followed by conditional branches
18// * Intra-block: CSINC conditional instructions
19//
20//
21// Consider the following example in C:
22//
23// if ((a < 5 && ...) || (a > 5 && ...)) {
24// ~~~~~ ~~~~~
25// ^ ^
26// x y
27//
28// Here both "x" and "y" expressions compare "a" with "5". When "x" evaluates
29// to "false", "y" can just check flags set by the first comparison. As a
30// result of the canonicalization employed by
31// SelectionDAGBuilder::visitSwitchCase, DAGCombine, and other target-specific
32// code, assembly ends up in the form that is not CSE friendly:
33//
34// ...
35// cmp w8, #4
36// b.gt .LBB0_3
37// ...
38// .LBB0_3:
39// cmp w8, #6
40// b.lt .LBB0_6
41// ...
42//
43// Same assembly after the pass:
44//
45// ...
46// cmp w8, #5
47// b.ge .LBB0_3
48// ...
49// .LBB0_3:
50// cmp w8, #5 // <-- CSE pass removes this instruction
51// b.le .LBB0_6
52// ...
53//
54// See optimizeCrossBlock() and optimizeIntraBlock() for implementation details.
55//
56// TODO: maybe handle TBNZ/TBZ the same way as CMP when used instead for "a < 0"
57// TODO: For cross-block:
58// - handle other conditional instructions (e.g. CSET)
59// - allow second branching to be anything if it doesn't require adjusting
60// TODO: For intra-block:
61// - handle CINC and CSET (CSINC aliases) as their conditions are inverted
62// compared to CSINC.
63// - handle other non-CSINC conditional instructions
64//
65//===----------------------------------------------------------------------===//
66
67#include "AArch64.h"
68#include "AArch64Subtarget.h"
71#include "llvm/ADT/ArrayRef.h"
74#include "llvm/ADT/Statistic.h"
86#include "llvm/Pass.h"
87#include "llvm/Support/Debug.h"
90#include <cassert>
91#include <cstdlib>
92
93using namespace llvm;
94
95#define DEBUG_TYPE "aarch64-condopt"
96
97STATISTIC(NumConditionsAdjusted, "Number of conditions adjusted");
98
99namespace {
100
101/// Bundles the parameters needed to adjust a comparison instruction.
102struct CmpInfo {
103 int Imm;
104 unsigned Opc;
106};
107
108class AArch64ConditionOptimizerImpl {
109 /// Represents a comparison instruction paired with its consuming
110 /// conditional instruction
111 struct CmpCondPair {
112 MachineInstr *CmpMI;
113 MachineInstr *CondMI;
115
116 int getImm() const { return CmpMI->getOperand(2).getImm(); }
117 unsigned getOpc() const { return CmpMI->getOpcode(); }
118 };
119
120 const AArch64InstrInfo *TII;
121 const TargetRegisterInfo *TRI;
122 MachineDominatorTree *DomTree;
123 const MachineRegisterInfo *MRI;
124
125public:
126 bool run(MachineFunction &MF, MachineDominatorTree &MDT);
127
128private:
129 bool canAdjustCmp(MachineInstr &CmpMI);
130 bool registersMatch(MachineInstr *FirstMI, MachineInstr *SecondMI);
131 bool nzcvLivesOut(MachineBasicBlock *MBB);
132 MachineInstr *getBccTerminator(MachineBasicBlock *MBB);
133 MachineInstr *findAdjustableCmp(MachineInstr *CondMI);
134 CmpInfo getAdjustedCmpInfo(MachineInstr *CmpMI, AArch64CC::CondCode Cmp);
135 void updateCmpInstr(MachineInstr *CmpMI, int NewImm, unsigned NewOpc);
136 void updateCondInstr(MachineInstr *CondMI, AArch64CC::CondCode NewCC);
137 void applyCmpAdjustment(CmpCondPair &Pair, const CmpInfo &Info);
138 bool commitPendingPair(std::optional<CmpCondPair> &PendingPair,
139 SmallDenseMap<Register, CmpCondPair> &PairsByReg);
140 bool tryOptimizePair(CmpCondPair &First, CmpCondPair &Second);
141 bool optimizeIntraBlock(MachineBasicBlock &MBB);
142 bool optimizeCrossBlock(MachineBasicBlock &HBB);
143};
144
145class AArch64ConditionOptimizerLegacy : public MachineFunctionPass {
146public:
147 static char ID;
148 AArch64ConditionOptimizerLegacy() : MachineFunctionPass(ID) {}
149
150 void getAnalysisUsage(AnalysisUsage &AU) const override;
151 bool runOnMachineFunction(MachineFunction &MF) override;
152
153 StringRef getPassName() const override {
154 return "AArch64 Condition Optimizer";
155 }
156};
157
158} // end anonymous namespace
159
160char AArch64ConditionOptimizerLegacy::ID = 0;
161
162INITIALIZE_PASS_BEGIN(AArch64ConditionOptimizerLegacy, "aarch64-condopt",
163 "AArch64 CondOpt Pass", false, false)
165INITIALIZE_PASS_END(AArch64ConditionOptimizerLegacy, "aarch64-condopt",
166 "AArch64 CondOpt Pass", false, false)
167
169 return new AArch64ConditionOptimizerLegacy();
170}
171
172void AArch64ConditionOptimizerLegacy::getAnalysisUsage(
173 AnalysisUsage &AU) const {
177}
178
179// Verify that the MI's immediate is adjustable and it only sets flags (pure
180// cmp)
181bool AArch64ConditionOptimizerImpl::canAdjustCmp(MachineInstr &CmpMI) {
182 unsigned ShiftAmt = AArch64_AM::getShiftValue(CmpMI.getOperand(3).getImm());
183 if (!CmpMI.getOperand(2).isImm()) {
184 LLVM_DEBUG(dbgs() << "Immediate of cmp is symbolic, " << CmpMI << '\n');
185 return false;
186 } else if (CmpMI.getOperand(2).getImm() << ShiftAmt >= 0xfff) {
187 LLVM_DEBUG(dbgs() << "Immediate of cmp may be out of range, " << CmpMI
188 << '\n');
189 return false;
190 } else if (!MRI->use_nodbg_empty(CmpMI.getOperand(0).getReg())) {
191 LLVM_DEBUG(dbgs() << "Destination of cmp is not dead, " << CmpMI << '\n');
192 return false;
193 }
194
195 return true;
196}
197
198// Ensure both compare MIs use the same register, tracing through copies.
199bool AArch64ConditionOptimizerImpl::registersMatch(MachineInstr *FirstMI,
200 MachineInstr *SecondMI) {
201 Register FirstReg = FirstMI->getOperand(1).getReg();
202 Register SecondReg = SecondMI->getOperand(1).getReg();
203 Register FirstCmpReg =
204 FirstReg.isVirtual() ? TRI->lookThruCopyLike(FirstReg, MRI) : FirstReg;
205 Register SecondCmpReg =
206 SecondReg.isVirtual() ? TRI->lookThruCopyLike(SecondReg, MRI) : SecondReg;
207 if (FirstCmpReg != SecondCmpReg) {
208 LLVM_DEBUG(dbgs() << "CMPs compare different registers\n");
209 return false;
210 }
211
212 return true;
213}
214
215// Check if NZCV lives out to any successor block.
216bool AArch64ConditionOptimizerImpl::nzcvLivesOut(MachineBasicBlock *MBB) {
217 for (auto *SuccBB : MBB->successors()) {
218 if (SuccBB->isLiveIn(AArch64::NZCV)) {
219 LLVM_DEBUG(dbgs() << "NZCV live into successor "
220 << printMBBReference(*SuccBB) << " from "
221 << printMBBReference(*MBB) << '\n');
222 return true;
223 }
224 }
225 return false;
226}
227
228// Returns true if the opcode is a comparison instruction (CMP/CMN).
229static bool isCmpInstruction(unsigned Opc) {
230 switch (Opc) {
231 // cmp is an alias for SUBS with a dead destination register.
232 case AArch64::SUBSWri:
233 case AArch64::SUBSXri:
234 // cmp is an alias for ADDS with a dead destination register.
235 case AArch64::ADDSWri:
236 case AArch64::ADDSXri:
237 return true;
238 default:
239 return false;
240 }
241}
242
243static bool isCSINCInstruction(unsigned Opc) {
244 return Opc == AArch64::CSINCWr || Opc == AArch64::CSINCXr;
245}
246
247// Returns the Bcc terminator if present, otherwise nullptr.
248MachineInstr *
249AArch64ConditionOptimizerImpl::getBccTerminator(MachineBasicBlock *MBB) {
251 if (Term == MBB->end()) {
252 LLVM_DEBUG(dbgs() << "No terminator in " << printMBBReference(*MBB)
253 << '\n');
254 return nullptr;
255 }
256
257 if (Term->getOpcode() != AArch64::Bcc) {
258 LLVM_DEBUG(dbgs() << "Non-Bcc terminator in " << printMBBReference(*MBB)
259 << ": " << *Term);
260 return nullptr;
261 }
262
263 return &*Term;
264}
265
266// Find the CMP instruction controlling the given conditional instruction and
267// ensure it can be adjusted for CSE optimization. Searches backward from
268// CondMI, ensuring no NZCV interference. Returns nullptr if no suitable CMP
269// is found or if adjustments are not safe.
270MachineInstr *
271AArch64ConditionOptimizerImpl::findAdjustableCmp(MachineInstr *CondMI) {
272 assert(CondMI && "CondMI cannot be null");
273 MachineBasicBlock *MBB = CondMI->getParent();
274
275 // Search backward from the conditional to find the instruction controlling
276 // it.
278 It = MachineBasicBlock::iterator(CondMI);
279 It != B;) {
280 It = prev_nodbg(It, B);
281 MachineInstr &I = *It;
282 assert(!I.isTerminator() && "Spurious terminator");
283 // Ensure there is no use of NZCV between CMP and conditional.
284 if (I.readsRegister(AArch64::NZCV, /*TRI=*/nullptr))
285 return nullptr;
286
287 if (isCmpInstruction(I.getOpcode())) {
288 if (!canAdjustCmp(I)) {
289 return nullptr;
290 }
291 return &I;
292 }
293
294 if (I.modifiesRegister(AArch64::NZCV, /*TRI=*/nullptr))
295 return nullptr;
296 }
297 LLVM_DEBUG(dbgs() << "Flags not defined in " << printMBBReference(*MBB)
298 << '\n');
299 return nullptr;
300}
301
302// Changes opcode adds <-> subs considering register operand width.
303static int getComplementOpc(int Opc) {
304 switch (Opc) {
305 case AArch64::ADDSWri: return AArch64::SUBSWri;
306 case AArch64::ADDSXri: return AArch64::SUBSXri;
307 case AArch64::SUBSWri: return AArch64::ADDSWri;
308 case AArch64::SUBSXri: return AArch64::ADDSXri;
309 default:
310 llvm_unreachable("Unexpected opcode");
311 }
312}
313
314// Changes form of comparison inclusive <-> exclusive.
316 switch (Cmp) {
317 case AArch64CC::GT:
318 return AArch64CC::GE;
319 case AArch64CC::GE:
320 return AArch64CC::GT;
321 case AArch64CC::LT:
322 return AArch64CC::LE;
323 case AArch64CC::LE:
324 return AArch64CC::LT;
325 case AArch64CC::HI:
326 return AArch64CC::HS;
327 case AArch64CC::HS:
328 return AArch64CC::HI;
329 case AArch64CC::LO:
330 return AArch64CC::LS;
331 case AArch64CC::LS:
332 return AArch64CC::LO;
333 default:
334 llvm_unreachable("Unexpected condition code");
335 }
336}
337
338// Returns the adjusted immediate, opcode, and condition code for switching
339// between inclusive/exclusive forms (GT <-> GE, LT <-> LE).
340CmpInfo
341AArch64ConditionOptimizerImpl::getAdjustedCmpInfo(MachineInstr *CmpMI,
343 unsigned Opc = CmpMI->getOpcode();
344
345 bool IsSigned = Cmp == AArch64CC::GT || Cmp == AArch64CC::GE ||
347
348 // CMN (compare with negative immediate) is an alias to ADDS (as
349 // "operand - negative" == "operand + positive")
350 bool Negative = (Opc == AArch64::ADDSWri || Opc == AArch64::ADDSXri);
351
352 int Correction = (Cmp == AArch64CC::GT || Cmp == AArch64CC::HI) ? 1 : -1;
353 // Negate Correction value for comparison with negative immediate (CMN).
354 if (Negative) {
355 Correction = -Correction;
356 }
357
358 const int OldImm = (int)CmpMI->getOperand(2).getImm();
359 const int NewImm = std::abs(OldImm + Correction);
360
361 // Bail out on cmn 0 (ADDS with immediate 0). It is a valid instruction but
362 // doesn't set flags in a way we can safely transform, so skip optimization.
363 if (OldImm == 0 && Negative)
364 return {OldImm, Opc, Cmp};
365
366 if ((OldImm == 1 && Negative && Correction == -1) ||
367 (OldImm == 0 && Correction == -1)) {
368 // If we change opcodes for unsigned comparisons, this means we did an
369 // unsigned wrap (e.g., 0 wrapping to 0xFFFFFFFF), so return the old cmp.
370 // Note: For signed comparisons, opcode changes (cmn 1 ↔ cmp 0) are valid.
371 if (!IsSigned)
372 return {OldImm, Opc, Cmp};
374 }
375
376 return {NewImm, Opc, getAdjustedCmp(Cmp)};
377}
378
379// Modifies a comparison instruction's immediate and opcode.
380void AArch64ConditionOptimizerImpl::updateCmpInstr(MachineInstr *CmpMI,
381 int NewImm,
382 unsigned NewOpc) {
383 CmpMI->getOperand(2).setImm(NewImm);
384 CmpMI->setDesc(TII->get(NewOpc));
385}
386
387// Modifies the condition code of a conditional instruction.
388void AArch64ConditionOptimizerImpl::updateCondInstr(MachineInstr *CondMI,
389 AArch64CC::CondCode NewCC) {
390 int CCOpIdx =
391 AArch64InstrInfo::findCondCodeUseOperandIdxForBranchOrSelect(*CondMI);
392 assert(CCOpIdx >= 0 && "Unsupported conditional instruction");
393 CondMI->getOperand(CCOpIdx).setImm(NewCC);
394 ++NumConditionsAdjusted;
395}
396
397// Applies a comparison adjustment to a cmp/cond instruction pair.
398void AArch64ConditionOptimizerImpl::applyCmpAdjustment(CmpCondPair &Pair,
399 const CmpInfo &Info) {
400 updateCmpInstr(Pair.CmpMI, Info.Imm, Info.Opc);
401 updateCondInstr(Pair.CondMI, Info.CC);
402 Pair.CC = Info.CC;
403}
404
405// Extracts the condition code from the result of analyzeBranch.
406// Returns the CondCode or Invalid if the format is not a simple br.cond.
408 assert(!Cond.empty() && "Expected non-empty condition from analyzeBranch");
409 // A normal br.cond simply has the condition code.
410 if (Cond[0].getImm() != -1) {
411 assert(Cond.size() == 1 && "Unknown Cond array format");
412 return (AArch64CC::CondCode)(int)Cond[0].getImm();
413 }
415}
416
418 return Cmp == AArch64CC::GT || Cmp == AArch64CC::HI;
419}
420
422 return Cmp == AArch64CC::LT || Cmp == AArch64CC::LO;
423}
424
425bool AArch64ConditionOptimizerImpl::tryOptimizePair(CmpCondPair &First,
426 CmpCondPair &Second) {
427 if (!((isGreaterThan(First.CC) || isLessThan(First.CC)) &&
428 (isGreaterThan(Second.CC) || isLessThan(Second.CC))))
429 return false;
430
431 int FirstImmTrueValue = First.getImm();
432 int SecondImmTrueValue = Second.getImm();
433
434 // Normalize immediate of CMN (ADDS) instructions
435 if (First.getOpc() == AArch64::ADDSWri || First.getOpc() == AArch64::ADDSXri)
436 FirstImmTrueValue = -FirstImmTrueValue;
437 if (Second.getOpc() == AArch64::ADDSWri ||
438 Second.getOpc() == AArch64::ADDSXri)
439 SecondImmTrueValue = -SecondImmTrueValue;
440
441 CmpInfo FirstAdj = getAdjustedCmpInfo(First.CmpMI, First.CC);
442 CmpInfo SecondAdj = getAdjustedCmpInfo(Second.CmpMI, Second.CC);
443
444 if (((isGreaterThan(First.CC) && isLessThan(Second.CC)) ||
445 (isLessThan(First.CC) && isGreaterThan(Second.CC))) &&
446 std::abs(SecondImmTrueValue - FirstImmTrueValue) == 2) {
447 // This branch transforms machine instructions that correspond to
448 //
449 // 1) (a > {SecondImm} && ...) || (a < {FirstImm} && ...)
450 // 2) (a < {SecondImm} && ...) || (a > {FirstImm} && ...)
451 //
452 // into
453 //
454 // 1) (a >= {NewImm} && ...) || (a <= {NewImm} && ...)
455 // 2) (a <= {NewImm} && ...) || (a >= {NewImm} && ...)
456
457 // Verify both adjustments converge to identical comparisons (same
458 // immediate and opcode). This ensures CSE can eliminate the duplicate.
459 if (FirstAdj.Imm != SecondAdj.Imm || FirstAdj.Opc != SecondAdj.Opc)
460 return false;
461
462 LLVM_DEBUG(dbgs() << "Optimized (opposite): "
463 << AArch64CC::getCondCodeName(First.CC) << " #"
464 << First.getImm() << ", "
465 << AArch64CC::getCondCodeName(Second.CC) << " #"
466 << Second.getImm() << " -> "
467 << AArch64CC::getCondCodeName(FirstAdj.CC) << " #"
468 << FirstAdj.Imm << ", "
469 << AArch64CC::getCondCodeName(SecondAdj.CC) << " #"
470 << SecondAdj.Imm << '\n');
471 applyCmpAdjustment(First, FirstAdj);
472 applyCmpAdjustment(Second, SecondAdj);
473 return true;
474
475 } else if (((isGreaterThan(First.CC) && isGreaterThan(Second.CC)) ||
476 (isLessThan(First.CC) && isLessThan(Second.CC))) &&
477 std::abs(SecondImmTrueValue - FirstImmTrueValue) == 1) {
478 // This branch transforms machine instructions that correspond to
479 //
480 // 1) (a > {SecondImm} && ...) || (a > {FirstImm} && ...)
481 // 2) (a < {SecondImm} && ...) || (a < {FirstImm} && ...)
482 //
483 // into
484 //
485 // 1) (a <= {NewImm} && ...) || (a > {NewImm} && ...)
486 // 2) (a < {NewImm} && ...) || (a >= {NewImm} && ...)
487
488 // GT -> GE transformation increases immediate value, so picking the
489 // smaller one; LT -> LE decreases immediate value so invert the choice.
490 bool AdjustFirst = (FirstImmTrueValue < SecondImmTrueValue);
491 if (isLessThan(First.CC))
492 AdjustFirst = !AdjustFirst;
493
494 CmpCondPair &Target = AdjustFirst ? Second : First;
495 CmpCondPair &ToChange = AdjustFirst ? First : Second;
496 CmpInfo &Adj = AdjustFirst ? FirstAdj : SecondAdj;
497
498 // Verify the adjustment converges to the target's comparison (same
499 // immediate and opcode). This ensures CSE can eliminate the duplicate.
500 if (Adj.Imm != Target.getImm() || Adj.Opc != Target.getOpc())
501 return false;
502
503 LLVM_DEBUG(dbgs() << "Optimized (same-direction): "
504 << AArch64CC::getCondCodeName(ToChange.CC) << " #"
505 << ToChange.getImm() << " -> "
506 << AArch64CC::getCondCodeName(Adj.CC) << " #" << Adj.Imm
507 << '\n');
508 applyCmpAdjustment(ToChange, Adj);
509 return true;
510 }
511
512 // Other transformation cases almost never occur due to generation of < or >
513 // comparisons instead of <= and >=.
514 return false;
515}
516
517bool AArch64ConditionOptimizerImpl::commitPendingPair(
518 std::optional<CmpCondPair> &PendingPair,
519 SmallDenseMap<Register, CmpCondPair> &PairsByReg) {
520 if (!PendingPair)
521 return false;
522
523 Register Reg = PendingPair->CmpMI->getOperand(1).getReg();
524 Register Key = Reg.isVirtual() ? TRI->lookThruCopyLike(Reg, MRI) : Reg;
525
526 auto MatchingPair = PairsByReg.find(Key);
527 bool Changed = MatchingPair != PairsByReg.end() &&
528 tryOptimizePair(MatchingPair->second, *PendingPair);
529
530 PairsByReg[Key] = *PendingPair;
531 PendingPair = std::nullopt;
532 return Changed;
533}
534
535// This function transforms cmps and their consuming conditionals (CmpCondPairs)
536// 1. Same direction: when both conditions are the same (e.g. GT/GT or LT/LT)
537// and immediates differ by 1
538// 2. Opposite direction: when both conditions are adjustable to a common middle
539// (e.g., GT/LT) and immediates differ by 2.
540// The compare instructions are made to match to enable CSE.
541// All cmp/cond pairs within a basic block are examined
542//
543// Example transformation:
544// cmp w8, #10
545// csinc w9, w0, w1, gt ; w9 = (w8 > 10) ? w0 : w1+1
546// cmp w8, #9
547// csinc w10, w0, w1, gt ; w10 = (w8 > 9) ? w0 : w1+1
548//
549// Into:
550// cmp w8, #10
551// csinc w9, w0, w1, gt ; w9 = (w8 > 10) ? w0 : w1+1
552// cmp w8, #10 ; <- CSE can remove the redundant cmp
553// csinc w10, w0, w1, ge ; w10 = (w8 >= 10) ? w0 : w1+1
554//
555bool AArch64ConditionOptimizerImpl::optimizeIntraBlock(MachineBasicBlock &MBB) {
556 SmallDenseMap<Register, CmpCondPair> PairsByReg;
557 std::optional<CmpCondPair> PendingPair;
558 MachineInstr *ActiveCmp = nullptr;
559 bool Changed = false;
560
561 for (MachineInstr &MI : MBB) {
562 if (MI.isDebugInstr())
563 continue;
564
565 if (isCmpInstruction(MI.getOpcode()) && canAdjustCmp(MI)) {
566 Changed |= commitPendingPair(PendingPair, PairsByReg);
567 ActiveCmp = &MI;
568 continue;
569 }
570
571 if (MI.modifiesRegister(AArch64::NZCV, /*TRI=*/nullptr)) {
572 // Non-CMP clobber: commit any pending pair and reset all state, since
573 // unknown flag state at this point invalidates all prior pairs
574 Changed |= commitPendingPair(PendingPair, PairsByReg);
575 ActiveCmp = nullptr;
576 PairsByReg.clear();
577 continue;
578 }
579
580 if (isCSINCInstruction(MI.getOpcode())) {
581 if (PendingPair) {
582 // A second conditional consuming the same CMP would invalidate any
583 // optimization: modifying the CMP would silently change what both
584 // consumers compare against. Mark the CMP spent.
585 PendingPair = std::nullopt;
586 ActiveCmp = nullptr;
587 } else if (ActiveCmp) {
588 int CCOpIdx =
589 AArch64InstrInfo::findCondCodeUseOperandIdxForBranchOrSelect(MI);
590 assert(CCOpIdx >= 0 && "Unsupported conditional instruction");
592 (AArch64CC::CondCode)(int)MI.getOperand(CCOpIdx).getImm();
593 PendingPair = CmpCondPair{ActiveCmp, &MI, CC};
594 }
595 continue;
596 }
597
598 if (MI.readsRegister(AArch64::NZCV, /*TRI=*/nullptr)) {
599 ActiveCmp = nullptr;
600 PendingPair = std::nullopt;
601 continue;
602 }
603 }
604
605 // Only commit the final pending pair if NZCV doesn't live out: a cross-block
606 // consumer would be affected by any CMP adjustment we make.
607 if (!nzcvLivesOut(&MBB))
608 Changed |= commitPendingPair(PendingPair, PairsByReg);
609
610 return Changed;
611}
612
613// Optimizes CMP+Bcc pairs across two basic blocks in the dominator tree.
614bool AArch64ConditionOptimizerImpl::optimizeCrossBlock(MachineBasicBlock &HBB) {
615 SmallVector<MachineOperand, 4> HeadCondOperands;
616 MachineBasicBlock *TBB = nullptr, *FBB = nullptr;
617 if (TII->analyzeBranch(HBB, TBB, FBB, HeadCondOperands)) {
618 return false;
619 }
620
621 // Equivalence check is to skip loops.
622 if (!TBB || TBB == &HBB) {
623 return false;
624 }
625
626 SmallVector<MachineOperand, 4> TrueCondOperands;
627 MachineBasicBlock *TBB_TBB = nullptr, *TBB_FBB = nullptr;
628 if (TII->analyzeBranch(*TBB, TBB_TBB, TBB_FBB, TrueCondOperands)) {
629 return false;
630 }
631
632 MachineInstr *HeadBrMI = getBccTerminator(&HBB);
633 MachineInstr *TrueBrMI = getBccTerminator(TBB);
634 if (!HeadBrMI || !TrueBrMI)
635 return false;
636
637 // Since we may modify cmps in these blocks, make sure NZCV does not live out.
638 if (nzcvLivesOut(&HBB) || nzcvLivesOut(TBB))
639 return false;
640
641 // Find the CMPs controlling each branch
642 MachineInstr *HeadCmpMI = findAdjustableCmp(HeadBrMI);
643 MachineInstr *TrueCmpMI = findAdjustableCmp(TrueBrMI);
644 if (!HeadCmpMI || !TrueCmpMI)
645 return false;
646
647 if (!registersMatch(HeadCmpMI, TrueCmpMI))
648 return false;
649
650 AArch64CC::CondCode HeadCondCode = parseCondCode(HeadCondOperands);
651 AArch64CC::CondCode TrueCondCode = parseCondCode(TrueCondOperands);
652 if (HeadCondCode == AArch64CC::CondCode::Invalid ||
653 TrueCondCode == AArch64CC::CondCode::Invalid) {
654 return false;
655 }
656
657 LLVM_DEBUG(dbgs() << "Checking cross-block pair: "
658 << AArch64CC::getCondCodeName(HeadCondCode) << " #"
659 << HeadCmpMI->getOperand(2).getImm() << ", "
660 << AArch64CC::getCondCodeName(TrueCondCode) << " #"
661 << TrueCmpMI->getOperand(2).getImm() << '\n');
662
663 CmpCondPair Head{HeadCmpMI, HeadBrMI, HeadCondCode};
664 CmpCondPair True{TrueCmpMI, TrueBrMI, TrueCondCode};
665
666 return tryOptimizePair(Head, True);
667}
668
669bool AArch64ConditionOptimizerLegacy::runOnMachineFunction(
670 MachineFunction &MF) {
671 if (skipFunction(MF.getFunction()))
672 return false;
673 MachineDominatorTree &MDT =
674 getAnalysis<MachineDominatorTreeWrapperPass>().getDomTree();
675 return AArch64ConditionOptimizerImpl().run(MF, MDT);
676}
677
678bool AArch64ConditionOptimizerImpl::run(MachineFunction &MF,
679 MachineDominatorTree &MDT) {
680 LLVM_DEBUG(dbgs() << "********** AArch64 Conditional Compares **********\n"
681 << "********** Function: " << MF.getName() << '\n');
682
683 TII = static_cast<const AArch64InstrInfo *>(MF.getSubtarget().getInstrInfo());
685 DomTree = &MDT;
686 MRI = &MF.getRegInfo();
687
688 bool Changed = false;
689
690 // Visit blocks in dominator tree pre-order. The pre-order enables multiple
691 // cmp-conversions from the same head block.
692 // Note that updateDomTree() modifies the children of the DomTree node
693 // currently being visited. The df_iterator supports that; it doesn't look at
694 // child_begin() / child_end() until after a node has been visited.
695 for (MachineDomTreeNode *I : depth_first(DomTree)) {
696 MachineBasicBlock *HBB = I->getBlock();
697 Changed |= optimizeIntraBlock(*HBB);
698 Changed |= optimizeCrossBlock(*HBB);
699 }
700
701 return Changed;
702}
703
704PreservedAnalyses
707 auto &MDT = MFAM.getResult<MachineDominatorTreeAnalysis>(MF);
708 bool Changed = AArch64ConditionOptimizerImpl().run(MF, MDT);
709 if (!Changed)
710 return PreservedAnalyses::all();
713 return PA;
714}
static AArch64CC::CondCode parseCondCode(ArrayRef< MachineOperand > Cond)
static int getComplementOpc(int Opc)
static bool isGreaterThan(AArch64CC::CondCode Cmp)
static AArch64CC::CondCode getAdjustedCmp(AArch64CC::CondCode Cmp)
static bool isCSINCInstruction(unsigned Opc)
static bool isLessThan(AArch64CC::CondCode Cmp)
static bool isCmpInstruction(unsigned Opc)
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
MachineBasicBlock & MBB
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
This file builds on the ADT/GraphTraits.h file to build generic depth first graph iterator.
const HexagonInstrInfo * TII
IRTranslator LLVM IR MI
#define I(x, y, z)
Definition MD5.cpp:57
Register Reg
Register const TargetRegisterInfo * TRI
Promote Memory to Register
Definition Mem2Reg.cpp:110
#define INITIALIZE_PASS_DEPENDENCY(depName)
Definition PassSupport.h:42
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
Definition PassSupport.h:44
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
Definition PassSupport.h:39
const SmallVectorImpl< MachineOperand > MachineBasicBlock * TBB
const SmallVectorImpl< MachineOperand > & Cond
This file defines the SmallVector class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Definition Statistic.h:171
#define LLVM_DEBUG(...)
Definition Debug.h:114
PreservedAnalyses run(MachineFunction &MF, MachineFunctionAnalysisManager &MFAM)
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
Represent the analysis usage information of a pass.
AnalysisUsage & addRequired()
AnalysisUsage & addPreserved()
Add the specified Pass class to the set of analyses preserved by this pass.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition ArrayRef.h:40
Represents analyses that only rely on functions' control flow.
Definition Analysis.h:73
iterator find(const_arg_type_t< KeyT > Val)
Definition DenseMap.h:178
iterator end()
Definition DenseMap.h:81
FunctionPass class - This class is used to implement most global optimizations.
Definition Pass.h:314
bool analyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB, MachineBasicBlock *&FBB, SmallVectorImpl< MachineOperand > &Cond, bool AllowModify) const override
Analyze the branching code at the end of MBB, returning true if it cannot be understood (e....
LLVM_ABI iterator getFirstTerminator()
Returns an iterator to the first terminator instruction of this basic block.
iterator_range< succ_iterator > successors()
MachineInstrBundleIterator< MachineInstr > iterator
Analysis pass which computes a MachineDominatorTree.
Analysis pass which computes a MachineDominatorTree.
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
StringRef getName() const
getName - Return the name of the corresponding LLVM function.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
Function & getFunction()
Return the LLVM function that this machine code represents.
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
const MachineBasicBlock * getParent() const
LLVM_ABI void setDesc(const MCInstrDesc &TID)
Replace the instruction descriptor (thus opcode) of the current instruction with a new one.
const MachineOperand & getOperand(unsigned i) const
void setImm(int64_t immVal)
int64_t getImm() const
bool isImm() const
isImm - Tests if this is a MO_Immediate operand.
Register getReg() const
getReg - Returns the register number.
bool use_nodbg_empty(Register RegNo) const
use_nodbg_empty - Return true if there are no non-Debug instructions using the specified register.
A set of analyses that are preserved following a run of a transformation pass.
Definition Analysis.h:112
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition Analysis.h:118
PreservedAnalyses & preserveSet()
Mark an analysis set as preserved.
Definition Analysis.h:151
constexpr bool isVirtual() const
Return true if the specified register number is in the virtual register namespace.
Definition Register.h:79
virtual const TargetRegisterInfo * getRegisterInfo() const =0
Return the target's register information.
Changed
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
static const char * getCondCodeName(CondCode Code)
static unsigned getShiftValue(unsigned Imm)
getShiftValue - Extract the shift value.
DXILDebugInfoMap run(Module &M)
This is an optimization pass for GlobalISel generic memory operations.
FunctionPass * createAArch64ConditionOptimizerLegacyPass()
AnalysisManager< MachineFunction > MachineFunctionAnalysisManager
LLVM_ABI PreservedAnalyses getMachineFunctionPassPreservedAnalyses()
Returns the minimum set of Analyses that all machine function passes must preserve.
MachineInstr * getImm(const MachineOperand &MO, const MachineRegisterInfo *MRI)
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition Debug.cpp:207
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
DomTreeNodeBase< MachineBasicBlock > MachineDomTreeNode
LLVM_ATTRIBUTE_VISIBILITY_DEFAULT AnalysisKey InnerAnalysisManagerProxy< AnalysisManagerT, IRUnitT, ExtraArgTs... >::Key
@ First
Helpers to iterate all locations in the MemoryEffectsBase class.
Definition ModRef.h:74
iterator_range< df_iterator< T > > depth_first(const T &G)
IterT prev_nodbg(IterT It, IterT Begin, bool SkipPseudoOp=true)
Decrement It, then continue decrementing it while it points to a debug instruction.
LLVM_ABI Printable printMBBReference(const MachineBasicBlock &MBB)
Prints a machine basic block reference.