Line data Source code
1 : //===-- AArch64CondBrTuning.cpp --- Conditional branch tuning for AArch64 -===//
2 : //
3 : // The LLVM Compiler Infrastructure
4 : //
5 : // This file is distributed under the University of Illinois Open Source
6 : // License. See LICENSE.TXT for details.
7 : //
8 : //===----------------------------------------------------------------------===//
9 : /// \file
10 : /// This file contains a pass that transforms CBZ/CBNZ/TBZ/TBNZ instructions
11 : /// into a conditional branch (B.cond), when the NZCV flags can be set for
12 : /// "free". This is preferred on targets that have more flexibility when
13 : /// scheduling B.cond instructions as compared to CBZ/CBNZ/TBZ/TBNZ (assuming
14 : /// all other variables are equal). This can also reduce register pressure.
15 : ///
16 : /// A few examples:
17 : ///
18 : /// 1) add w8, w0, w1 -> cmn w0, w1 ; CMN is an alias of ADDS.
19 : /// cbz w8, .LBB_2 -> b.eq .LBB0_2
20 : ///
21 : /// 2) add w8, w0, w1 -> adds w8, w0, w1 ; w8 has multiple uses.
22 : /// cbz w8, .LBB1_2 -> b.eq .LBB1_2
23 : ///
24 : /// 3) sub w8, w0, w1 -> subs w8, w0, w1 ; w8 has multiple uses.
25 : /// tbz w8, #31, .LBB6_2 -> b.pl .LBB6_2
26 : ///
27 : //===----------------------------------------------------------------------===//
28 :
29 : #include "AArch64.h"
30 : #include "AArch64Subtarget.h"
31 : #include "llvm/CodeGen/MachineFunction.h"
32 : #include "llvm/CodeGen/MachineFunctionPass.h"
33 : #include "llvm/CodeGen/MachineInstrBuilder.h"
34 : #include "llvm/CodeGen/MachineRegisterInfo.h"
35 : #include "llvm/CodeGen/Passes.h"
36 : #include "llvm/CodeGen/TargetInstrInfo.h"
37 : #include "llvm/CodeGen/TargetRegisterInfo.h"
38 : #include "llvm/CodeGen/TargetSubtargetInfo.h"
39 : #include "llvm/Support/Debug.h"
40 : #include "llvm/Support/raw_ostream.h"
41 :
42 : using namespace llvm;
43 :
44 : #define DEBUG_TYPE "aarch64-cond-br-tuning"
45 : #define AARCH64_CONDBR_TUNING_NAME "AArch64 Conditional Branch Tuning"
46 :
47 : namespace {
48 : class AArch64CondBrTuning : public MachineFunctionPass {
49 : const AArch64InstrInfo *TII;
50 : const TargetRegisterInfo *TRI;
51 :
52 : MachineRegisterInfo *MRI;
53 :
54 : public:
55 : static char ID;
56 1116 : AArch64CondBrTuning() : MachineFunctionPass(ID) {
57 1116 : initializeAArch64CondBrTuningPass(*PassRegistry::getPassRegistry());
58 1116 : }
59 : void getAnalysisUsage(AnalysisUsage &AU) const override;
60 : bool runOnMachineFunction(MachineFunction &MF) override;
61 1110 : StringRef getPassName() const override { return AARCH64_CONDBR_TUNING_NAME; }
62 :
63 : private:
64 : MachineInstr *getOperandDef(const MachineOperand &MO);
65 : MachineInstr *convertToFlagSetting(MachineInstr &MI, bool IsFlagSetting);
66 : MachineInstr *convertToCondBr(MachineInstr &MI);
67 : bool tryToTuneBranch(MachineInstr &MI, MachineInstr &DefMI);
68 : };
69 : } // end anonymous namespace
70 :
71 : char AArch64CondBrTuning::ID = 0;
72 :
73 2232 : INITIALIZE_PASS(AArch64CondBrTuning, "aarch64-cond-br-tuning",
74 : AARCH64_CONDBR_TUNING_NAME, false, false)
75 :
76 1103 : void AArch64CondBrTuning::getAnalysisUsage(AnalysisUsage &AU) const {
77 1103 : AU.setPreservesCFG();
78 1103 : MachineFunctionPass::getAnalysisUsage(AU);
79 1103 : }
80 :
81 0 : MachineInstr *AArch64CondBrTuning::getOperandDef(const MachineOperand &MO) {
82 512 : if (!TargetRegisterInfo::isVirtualRegister(MO.getReg()))
83 0 : return nullptr;
84 512 : return MRI->getUniqueVRegDef(MO.getReg());
85 : }
86 :
87 80 : MachineInstr *AArch64CondBrTuning::convertToFlagSetting(MachineInstr &MI,
88 : bool IsFlagSetting) {
89 : // If this is already the flag setting version of the instruction (e.g., SUBS)
90 : // just make sure the implicit-def of NZCV isn't marked dead.
91 80 : if (IsFlagSetting) {
92 114 : for (unsigned I = MI.getNumExplicitOperands(), E = MI.getNumOperands();
93 114 : I != E; ++I) {
94 57 : MachineOperand &MO = MI.getOperand(I);
95 57 : if (MO.isReg() && MO.isDead() && MO.getReg() == AArch64::NZCV)
96 : MO.setIsDead(false);
97 : }
98 : return &MI;
99 : }
100 : bool Is64Bit;
101 46 : unsigned NewOpc = TII->convertToFlagSettingOpc(MI.getOpcode(), Is64Bit);
102 23 : unsigned NewDestReg = MI.getOperand(0).getReg();
103 23 : if (MRI->hasOneNonDBGUse(MI.getOperand(0).getReg()))
104 6 : NewDestReg = Is64Bit ? AArch64::XZR : AArch64::WZR;
105 :
106 23 : MachineInstrBuilder MIB = BuildMI(*MI.getParent(), MI, MI.getDebugLoc(),
107 46 : TII->get(NewOpc), NewDestReg);
108 82 : for (unsigned I = 1, E = MI.getNumOperands(); I != E; ++I)
109 59 : MIB.add(MI.getOperand(I));
110 :
111 : return MIB;
112 : }
113 :
114 0 : MachineInstr *AArch64CondBrTuning::convertToCondBr(MachineInstr &MI) {
115 : AArch64CC::CondCode CC;
116 0 : MachineBasicBlock *TargetMBB = TII->getBranchDestBlock(MI);
117 0 : switch (MI.getOpcode()) {
118 0 : default:
119 0 : llvm_unreachable("Unexpected opcode!");
120 :
121 : case AArch64::CBZW:
122 : case AArch64::CBZX:
123 : CC = AArch64CC::EQ;
124 : break;
125 0 : case AArch64::CBNZW:
126 : case AArch64::CBNZX:
127 : CC = AArch64CC::NE;
128 0 : break;
129 0 : case AArch64::TBZW:
130 : case AArch64::TBZX:
131 : CC = AArch64CC::PL;
132 0 : break;
133 0 : case AArch64::TBNZW:
134 : case AArch64::TBNZX:
135 : CC = AArch64CC::MI;
136 0 : break;
137 : }
138 0 : return BuildMI(*MI.getParent(), MI, MI.getDebugLoc(), TII->get(AArch64::Bcc))
139 0 : .addImm(CC)
140 0 : .addMBB(TargetMBB);
141 : }
142 :
143 512 : bool AArch64CondBrTuning::tryToTuneBranch(MachineInstr &MI,
144 : MachineInstr &DefMI) {
145 : // We don't want NZCV bits live across blocks.
146 512 : if (MI.getParent() != DefMI.getParent())
147 : return false;
148 :
149 : bool IsFlagSetting = true;
150 481 : unsigned MIOpc = MI.getOpcode();
151 : MachineInstr *NewCmp = nullptr, *NewBr = nullptr;
152 962 : switch (DefMI.getOpcode()) {
153 : default:
154 : return false;
155 14 : case AArch64::ADDWri:
156 : case AArch64::ADDWrr:
157 : case AArch64::ADDWrs:
158 : case AArch64::ADDWrx:
159 : case AArch64::ANDWri:
160 : case AArch64::ANDWrr:
161 : case AArch64::ANDWrs:
162 : case AArch64::BICWrr:
163 : case AArch64::BICWrs:
164 : case AArch64::SUBWri:
165 : case AArch64::SUBWrr:
166 : case AArch64::SUBWrs:
167 : case AArch64::SUBWrx:
168 : IsFlagSetting = false;
169 : LLVM_FALLTHROUGH;
170 34 : case AArch64::ADDSWri:
171 : case AArch64::ADDSWrr:
172 : case AArch64::ADDSWrs:
173 : case AArch64::ADDSWrx:
174 : case AArch64::ANDSWri:
175 : case AArch64::ANDSWrr:
176 : case AArch64::ANDSWrs:
177 : case AArch64::BICSWrr:
178 : case AArch64::BICSWrs:
179 : case AArch64::SUBSWri:
180 : case AArch64::SUBSWrr:
181 : case AArch64::SUBSWrs:
182 : case AArch64::SUBSWrx:
183 : switch (MIOpc) {
184 0 : default:
185 0 : llvm_unreachable("Unexpected opcode!");
186 :
187 34 : case AArch64::CBZW:
188 : case AArch64::CBNZW:
189 : case AArch64::TBZW:
190 : case AArch64::TBNZW:
191 : // Check to see if the TBZ/TBNZ is checking the sign bit.
192 34 : if ((MIOpc == AArch64::TBZW || MIOpc == AArch64::TBNZW) &&
193 9 : MI.getOperand(1).getImm() != 31)
194 : return false;
195 :
196 : // There must not be any instruction between DefMI and MI that clobbers or
197 : // reads NZCV.
198 : MachineBasicBlock::iterator I(DefMI), E(MI);
199 81 : for (I = std::next(I); I != E; ++I) {
200 103 : if (I->modifiesRegister(AArch64::NZCV, TRI) ||
201 49 : I->readsRegister(AArch64::NZCV, TRI))
202 5 : return false;
203 : }
204 : LLVM_DEBUG(dbgs() << " Replacing instructions:\n ");
205 : LLVM_DEBUG(DefMI.print(dbgs()));
206 : LLVM_DEBUG(dbgs() << " ");
207 : LLVM_DEBUG(MI.print(dbgs()));
208 :
209 27 : NewCmp = convertToFlagSetting(DefMI, IsFlagSetting);
210 27 : NewBr = convertToCondBr(MI);
211 : break;
212 : }
213 : break;
214 :
215 12 : case AArch64::ADDXri:
216 : case AArch64::ADDXrr:
217 : case AArch64::ADDXrs:
218 : case AArch64::ADDXrx:
219 : case AArch64::ANDXri:
220 : case AArch64::ANDXrr:
221 : case AArch64::ANDXrs:
222 : case AArch64::BICXrr:
223 : case AArch64::BICXrs:
224 : case AArch64::SUBXri:
225 : case AArch64::SUBXrr:
226 : case AArch64::SUBXrs:
227 : case AArch64::SUBXrx:
228 : IsFlagSetting = false;
229 : LLVM_FALLTHROUGH;
230 53 : case AArch64::ADDSXri:
231 : case AArch64::ADDSXrr:
232 : case AArch64::ADDSXrs:
233 : case AArch64::ADDSXrx:
234 : case AArch64::ANDSXri:
235 : case AArch64::ANDSXrr:
236 : case AArch64::ANDSXrs:
237 : case AArch64::BICSXrr:
238 : case AArch64::BICSXrs:
239 : case AArch64::SUBSXri:
240 : case AArch64::SUBSXrr:
241 : case AArch64::SUBSXrs:
242 : case AArch64::SUBSXrx:
243 : switch (MIOpc) {
244 0 : default:
245 0 : llvm_unreachable("Unexpected opcode!");
246 :
247 53 : case AArch64::CBZX:
248 : case AArch64::CBNZX:
249 : case AArch64::TBZX:
250 : case AArch64::TBNZX: {
251 : // Check to see if the TBZ/TBNZ is checking the sign bit.
252 53 : if ((MIOpc == AArch64::TBZX || MIOpc == AArch64::TBNZX) &&
253 10 : MI.getOperand(1).getImm() != 63)
254 : return false;
255 : // There must not be any instruction between DefMI and MI that clobbers or
256 : // reads NZCV.
257 : MachineBasicBlock::iterator I(DefMI), E(MI);
258 159 : for (I = std::next(I); I != E; ++I) {
259 212 : if (I->modifiesRegister(AArch64::NZCV, TRI) ||
260 106 : I->readsRegister(AArch64::NZCV, TRI))
261 0 : return false;
262 : }
263 : LLVM_DEBUG(dbgs() << " Replacing instructions:\n ");
264 : LLVM_DEBUG(DefMI.print(dbgs()));
265 : LLVM_DEBUG(dbgs() << " ");
266 : LLVM_DEBUG(MI.print(dbgs()));
267 :
268 53 : NewCmp = convertToFlagSetting(DefMI, IsFlagSetting);
269 53 : NewBr = convertToCondBr(MI);
270 : break;
271 : }
272 : }
273 : break;
274 : }
275 : (void)NewCmp; (void)NewBr;
276 : assert(NewCmp && NewBr && "Expected new instructions.");
277 :
278 : LLVM_DEBUG(dbgs() << " with instruction:\n ");
279 : LLVM_DEBUG(NewCmp->print(dbgs()));
280 : LLVM_DEBUG(dbgs() << " ");
281 : LLVM_DEBUG(NewBr->print(dbgs()));
282 :
283 : // If this was a flag setting version of the instruction, we use the original
284 : // instruction by just clearing the dead marked on the implicit-def of NCZV.
285 : // Therefore, we should not erase this instruction.
286 80 : if (!IsFlagSetting)
287 23 : DefMI.eraseFromParent();
288 80 : MI.eraseFromParent();
289 80 : return true;
290 : }
291 :
292 14064 : bool AArch64CondBrTuning::runOnMachineFunction(MachineFunction &MF) {
293 14064 : if (skipFunction(MF.getFunction()))
294 : return false;
295 :
296 : LLVM_DEBUG(
297 : dbgs() << "********** AArch64 Conditional Branch Tuning **********\n"
298 : << "********** Function: " << MF.getName() << '\n');
299 :
300 14058 : TII = static_cast<const AArch64InstrInfo *>(MF.getSubtarget().getInstrInfo());
301 14058 : TRI = MF.getSubtarget().getRegisterInfo();
302 14058 : MRI = &MF.getRegInfo();
303 :
304 : bool Changed = false;
305 30295 : for (MachineBasicBlock &MBB : MF) {
306 : bool LocalChange = false;
307 16237 : for (MachineBasicBlock::iterator I = MBB.getFirstTerminator(),
308 : E = MBB.end();
309 32598 : I != E; ++I) {
310 : MachineInstr &MI = *I;
311 32882 : switch (MI.getOpcode()) {
312 : default:
313 : break;
314 512 : case AArch64::CBZW:
315 : case AArch64::CBZX:
316 : case AArch64::CBNZW:
317 : case AArch64::CBNZX:
318 : case AArch64::TBZW:
319 : case AArch64::TBZX:
320 : case AArch64::TBNZW:
321 : case AArch64::TBNZX:
322 512 : MachineInstr *DefMI = getOperandDef(MI.getOperand(0));
323 512 : LocalChange = (DefMI && tryToTuneBranch(MI, *DefMI));
324 : break;
325 : }
326 : // If the optimization was successful, we can't optimize any other
327 : // branches because doing so would clobber the NZCV flags.
328 : if (LocalChange) {
329 : Changed = true;
330 : break;
331 : }
332 : }
333 : }
334 : return Changed;
335 : }
336 :
337 1116 : FunctionPass *llvm::createAArch64CondBrTuning() {
338 1116 : return new AArch64CondBrTuning();
339 : }
|