File: | llvm/lib/Target/X86/X86FixupLEAs.cpp |
Warning: | line 884, column 3 Value stored to 'NewMI' is never read |
Press '?' to see keyboard shortcuts
Keyboard shortcuts:
1 | //===-- X86FixupLEAs.cpp - use or replace LEA instructions -----------===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | // |
9 | // This file defines the pass that finds instructions that can be |
10 | // re-written as LEA instructions in order to reduce pipeline delays. |
11 | // It replaces LEAs with ADD/INC/DEC when that is better for size/speed. |
12 | // |
13 | //===----------------------------------------------------------------------===// |
14 | |
15 | #include "X86.h" |
16 | #include "X86InstrInfo.h" |
17 | #include "X86Subtarget.h" |
18 | #include "llvm/ADT/Statistic.h" |
19 | #include "llvm/Analysis/ProfileSummaryInfo.h" |
20 | #include "llvm/CodeGen/LazyMachineBlockFrequencyInfo.h" |
21 | #include "llvm/CodeGen/MachineFunctionPass.h" |
22 | #include "llvm/CodeGen/MachineInstrBuilder.h" |
23 | #include "llvm/CodeGen/MachineSizeOpts.h" |
24 | #include "llvm/CodeGen/Passes.h" |
25 | #include "llvm/CodeGen/TargetSchedule.h" |
26 | #include "llvm/Support/Debug.h" |
27 | #include "llvm/Support/raw_ostream.h" |
28 | using namespace llvm; |
29 | |
30 | #define FIXUPLEA_DESC"X86 LEA Fixup" "X86 LEA Fixup" |
31 | #define FIXUPLEA_NAME"x86-fixup-LEAs" "x86-fixup-LEAs" |
32 | |
33 | #define DEBUG_TYPE"x86-fixup-LEAs" FIXUPLEA_NAME"x86-fixup-LEAs" |
34 | |
35 | STATISTIC(NumLEAs, "Number of LEA instructions created")static llvm::Statistic NumLEAs = {"x86-fixup-LEAs", "NumLEAs" , "Number of LEA instructions created"}; |
36 | |
37 | namespace { |
38 | class FixupLEAPass : public MachineFunctionPass { |
39 | enum RegUsageState { RU_NotUsed, RU_Write, RU_Read }; |
40 | |
41 | /// Given a machine register, look for the instruction |
42 | /// which writes it in the current basic block. If found, |
43 | /// try to replace it with an equivalent LEA instruction. |
44 | /// If replacement succeeds, then also process the newly created |
45 | /// instruction. |
46 | void seekLEAFixup(MachineOperand &p, MachineBasicBlock::iterator &I, |
47 | MachineBasicBlock &MBB); |
48 | |
49 | /// Given a memory access or LEA instruction |
50 | /// whose address mode uses a base and/or index register, look for |
51 | /// an opportunity to replace the instruction which sets the base or index |
52 | /// register with an equivalent LEA instruction. |
53 | void processInstruction(MachineBasicBlock::iterator &I, |
54 | MachineBasicBlock &MBB); |
55 | |
56 | /// Given a LEA instruction which is unprofitable |
57 | /// on SlowLEA targets try to replace it with an equivalent ADD instruction. |
58 | void processInstructionForSlowLEA(MachineBasicBlock::iterator &I, |
59 | MachineBasicBlock &MBB); |
60 | |
61 | /// Given a LEA instruction which is unprofitable |
62 | /// on SNB+ try to replace it with other instructions. |
63 | /// According to Intel's Optimization Reference Manual: |
64 | /// " For LEA instructions with three source operands and some specific |
65 | /// situations, instruction latency has increased to 3 cycles, and must |
66 | /// dispatch via port 1: |
67 | /// - LEA that has all three source operands: base, index, and offset |
68 | /// - LEA that uses base and index registers where the base is EBP, RBP, |
69 | /// or R13 |
70 | /// - LEA that uses RIP relative addressing mode |
71 | /// - LEA that uses 16-bit addressing mode " |
72 | /// This function currently handles the first 2 cases only. |
73 | void processInstrForSlow3OpLEA(MachineBasicBlock::iterator &I, |
74 | MachineBasicBlock &MBB, bool OptIncDec); |
75 | |
76 | /// Look for LEAs that are really two address LEAs that we might be able to |
77 | /// turn into regular ADD instructions. |
78 | bool optTwoAddrLEA(MachineBasicBlock::iterator &I, |
79 | MachineBasicBlock &MBB, bool OptIncDec, |
80 | bool UseLEAForSP) const; |
81 | |
82 | /// Look for and transform the sequence |
83 | /// lea (reg1, reg2), reg3 |
84 | /// sub reg3, reg4 |
85 | /// to |
86 | /// sub reg1, reg4 |
87 | /// sub reg2, reg4 |
88 | /// It can also optimize the sequence lea/add similarly. |
89 | bool optLEAALU(MachineBasicBlock::iterator &I, MachineBasicBlock &MBB) const; |
90 | |
91 | /// Step forwards in MBB, looking for an ADD/SUB instruction which uses |
92 | /// the dest register of LEA instruction I. |
93 | MachineBasicBlock::iterator searchALUInst(MachineBasicBlock::iterator &I, |
94 | MachineBasicBlock &MBB) const; |
95 | |
96 | /// Check instructions between LeaI and AluI (exclusively). |
97 | /// Set BaseIndexDef to true if base or index register from LeaI is defined. |
98 | /// Set AluDestRef to true if the dest register of AluI is used or defined. |
99 | /// *KilledBase is set to the killed base register usage. |
100 | /// *KilledIndex is set to the killed index register usage. |
101 | void checkRegUsage(MachineBasicBlock::iterator &LeaI, |
102 | MachineBasicBlock::iterator &AluI, bool &BaseIndexDef, |
103 | bool &AluDestRef, MachineOperand **KilledBase, |
104 | MachineOperand **KilledIndex) const; |
105 | |
106 | /// Determine if an instruction references a machine register |
107 | /// and, if so, whether it reads or writes the register. |
108 | RegUsageState usesRegister(MachineOperand &p, MachineBasicBlock::iterator I); |
109 | |
110 | /// Step backwards through a basic block, looking |
111 | /// for an instruction which writes a register within |
112 | /// a maximum of INSTR_DISTANCE_THRESHOLD instruction latency cycles. |
113 | MachineBasicBlock::iterator searchBackwards(MachineOperand &p, |
114 | MachineBasicBlock::iterator &I, |
115 | MachineBasicBlock &MBB); |
116 | |
117 | /// if an instruction can be converted to an |
118 | /// equivalent LEA, insert the new instruction into the basic block |
119 | /// and return a pointer to it. Otherwise, return zero. |
120 | MachineInstr *postRAConvertToLEA(MachineBasicBlock &MBB, |
121 | MachineBasicBlock::iterator &MBBI) const; |
122 | |
123 | public: |
124 | static char ID; |
125 | |
126 | StringRef getPassName() const override { return FIXUPLEA_DESC"X86 LEA Fixup"; } |
127 | |
128 | FixupLEAPass() : MachineFunctionPass(ID) { } |
129 | |
130 | /// Loop over all of the basic blocks, |
131 | /// replacing instructions by equivalent LEA instructions |
132 | /// if needed and when possible. |
133 | bool runOnMachineFunction(MachineFunction &MF) override; |
134 | |
135 | // This pass runs after regalloc and doesn't support VReg operands. |
136 | MachineFunctionProperties getRequiredProperties() const override { |
137 | return MachineFunctionProperties().set( |
138 | MachineFunctionProperties::Property::NoVRegs); |
139 | } |
140 | |
141 | void getAnalysisUsage(AnalysisUsage &AU) const override { |
142 | AU.addRequired<ProfileSummaryInfoWrapperPass>(); |
143 | AU.addRequired<LazyMachineBlockFrequencyInfoPass>(); |
144 | MachineFunctionPass::getAnalysisUsage(AU); |
145 | } |
146 | |
147 | private: |
148 | TargetSchedModel TSM; |
149 | const X86InstrInfo *TII = nullptr; |
150 | const X86RegisterInfo *TRI = nullptr; |
151 | }; |
152 | } |
153 | |
154 | char FixupLEAPass::ID = 0; |
155 | |
156 | INITIALIZE_PASS(FixupLEAPass, FIXUPLEA_NAME, FIXUPLEA_DESC, false, false)static void *initializeFixupLEAPassPassOnce(PassRegistry & Registry) { PassInfo *PI = new PassInfo( "X86 LEA Fixup", "x86-fixup-LEAs" , &FixupLEAPass::ID, PassInfo::NormalCtor_t(callDefaultCtor <FixupLEAPass>), false, false); Registry.registerPass(* PI, true); return PI; } static llvm::once_flag InitializeFixupLEAPassPassFlag ; void llvm::initializeFixupLEAPassPass(PassRegistry &Registry ) { llvm::call_once(InitializeFixupLEAPassPassFlag, initializeFixupLEAPassPassOnce , std::ref(Registry)); } |
157 | |
158 | MachineInstr * |
159 | FixupLEAPass::postRAConvertToLEA(MachineBasicBlock &MBB, |
160 | MachineBasicBlock::iterator &MBBI) const { |
161 | MachineInstr &MI = *MBBI; |
162 | switch (MI.getOpcode()) { |
163 | case X86::MOV32rr: |
164 | case X86::MOV64rr: { |
165 | const MachineOperand &Src = MI.getOperand(1); |
166 | const MachineOperand &Dest = MI.getOperand(0); |
167 | MachineInstr *NewMI = |
168 | BuildMI(MBB, MBBI, MI.getDebugLoc(), |
169 | TII->get(MI.getOpcode() == X86::MOV32rr ? X86::LEA32r |
170 | : X86::LEA64r)) |
171 | .add(Dest) |
172 | .add(Src) |
173 | .addImm(1) |
174 | .addReg(0) |
175 | .addImm(0) |
176 | .addReg(0); |
177 | return NewMI; |
178 | } |
179 | } |
180 | |
181 | if (!MI.isConvertibleTo3Addr()) |
182 | return nullptr; |
183 | |
184 | switch (MI.getOpcode()) { |
185 | default: |
186 | // Only convert instructions that we've verified are safe. |
187 | return nullptr; |
188 | case X86::ADD64ri32: |
189 | case X86::ADD64ri8: |
190 | case X86::ADD64ri32_DB: |
191 | case X86::ADD64ri8_DB: |
192 | case X86::ADD32ri: |
193 | case X86::ADD32ri8: |
194 | case X86::ADD32ri_DB: |
195 | case X86::ADD32ri8_DB: |
196 | if (!MI.getOperand(2).isImm()) { |
197 | // convertToThreeAddress will call getImm() |
198 | // which requires isImm() to be true |
199 | return nullptr; |
200 | } |
201 | break; |
202 | case X86::SHL64ri: |
203 | case X86::SHL32ri: |
204 | case X86::INC64r: |
205 | case X86::INC32r: |
206 | case X86::DEC64r: |
207 | case X86::DEC32r: |
208 | case X86::ADD64rr: |
209 | case X86::ADD64rr_DB: |
210 | case X86::ADD32rr: |
211 | case X86::ADD32rr_DB: |
212 | // These instructions are all fine to convert. |
213 | break; |
214 | } |
215 | MachineFunction::iterator MFI = MBB.getIterator(); |
216 | return TII->convertToThreeAddress(MFI, MI, nullptr); |
217 | } |
218 | |
219 | FunctionPass *llvm::createX86FixupLEAs() { return new FixupLEAPass(); } |
220 | |
221 | static bool isLEA(unsigned Opcode) { |
222 | return Opcode == X86::LEA32r || Opcode == X86::LEA64r || |
223 | Opcode == X86::LEA64_32r; |
224 | } |
225 | |
226 | bool FixupLEAPass::runOnMachineFunction(MachineFunction &MF) { |
227 | if (skipFunction(MF.getFunction())) |
228 | return false; |
229 | |
230 | const X86Subtarget &ST = MF.getSubtarget<X86Subtarget>(); |
231 | bool IsSlowLEA = ST.slowLEA(); |
232 | bool IsSlow3OpsLEA = ST.slow3OpsLEA(); |
233 | bool LEAUsesAG = ST.LEAusesAG(); |
234 | |
235 | bool OptIncDec = !ST.slowIncDec() || MF.getFunction().hasOptSize(); |
236 | bool UseLEAForSP = ST.useLeaForSP(); |
237 | |
238 | TSM.init(&ST); |
239 | TII = ST.getInstrInfo(); |
240 | TRI = ST.getRegisterInfo(); |
241 | auto *PSI = &getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI(); |
242 | auto *MBFI = (PSI && PSI->hasProfileSummary()) |
243 | ? &getAnalysis<LazyMachineBlockFrequencyInfoPass>().getBFI() |
244 | : nullptr; |
245 | |
246 | LLVM_DEBUG(dbgs() << "Start X86FixupLEAs\n";)do { } while (false); |
247 | for (MachineBasicBlock &MBB : MF) { |
248 | // First pass. Try to remove or optimize existing LEAs. |
249 | bool OptIncDecPerBB = |
250 | OptIncDec || llvm::shouldOptimizeForSize(&MBB, PSI, MBFI); |
251 | for (MachineBasicBlock::iterator I = MBB.begin(); I != MBB.end(); ++I) { |
252 | if (!isLEA(I->getOpcode())) |
253 | continue; |
254 | |
255 | if (optTwoAddrLEA(I, MBB, OptIncDecPerBB, UseLEAForSP)) |
256 | continue; |
257 | |
258 | if (IsSlowLEA) |
259 | processInstructionForSlowLEA(I, MBB); |
260 | else if (IsSlow3OpsLEA) |
261 | processInstrForSlow3OpLEA(I, MBB, OptIncDecPerBB); |
262 | } |
263 | |
264 | // Second pass for creating LEAs. This may reverse some of the |
265 | // transformations above. |
266 | if (LEAUsesAG) { |
267 | for (MachineBasicBlock::iterator I = MBB.begin(); I != MBB.end(); ++I) |
268 | processInstruction(I, MBB); |
269 | } |
270 | } |
271 | |
272 | LLVM_DEBUG(dbgs() << "End X86FixupLEAs\n";)do { } while (false); |
273 | |
274 | return true; |
275 | } |
276 | |
277 | FixupLEAPass::RegUsageState |
278 | FixupLEAPass::usesRegister(MachineOperand &p, MachineBasicBlock::iterator I) { |
279 | RegUsageState RegUsage = RU_NotUsed; |
280 | MachineInstr &MI = *I; |
281 | |
282 | for (unsigned i = 0; i < MI.getNumOperands(); ++i) { |
283 | MachineOperand &opnd = MI.getOperand(i); |
284 | if (opnd.isReg() && opnd.getReg() == p.getReg()) { |
285 | if (opnd.isDef()) |
286 | return RU_Write; |
287 | RegUsage = RU_Read; |
288 | } |
289 | } |
290 | return RegUsage; |
291 | } |
292 | |
293 | /// getPreviousInstr - Given a reference to an instruction in a basic |
294 | /// block, return a reference to the previous instruction in the block, |
295 | /// wrapping around to the last instruction of the block if the block |
296 | /// branches to itself. |
297 | static inline bool getPreviousInstr(MachineBasicBlock::iterator &I, |
298 | MachineBasicBlock &MBB) { |
299 | if (I == MBB.begin()) { |
300 | if (MBB.isPredecessor(&MBB)) { |
301 | I = --MBB.end(); |
302 | return true; |
303 | } else |
304 | return false; |
305 | } |
306 | --I; |
307 | return true; |
308 | } |
309 | |
310 | MachineBasicBlock::iterator |
311 | FixupLEAPass::searchBackwards(MachineOperand &p, MachineBasicBlock::iterator &I, |
312 | MachineBasicBlock &MBB) { |
313 | int InstrDistance = 1; |
314 | MachineBasicBlock::iterator CurInst; |
315 | static const int INSTR_DISTANCE_THRESHOLD = 5; |
316 | |
317 | CurInst = I; |
318 | bool Found; |
319 | Found = getPreviousInstr(CurInst, MBB); |
320 | while (Found && I != CurInst) { |
321 | if (CurInst->isCall() || CurInst->isInlineAsm()) |
322 | break; |
323 | if (InstrDistance > INSTR_DISTANCE_THRESHOLD) |
324 | break; // too far back to make a difference |
325 | if (usesRegister(p, CurInst) == RU_Write) { |
326 | return CurInst; |
327 | } |
328 | InstrDistance += TSM.computeInstrLatency(&*CurInst); |
329 | Found = getPreviousInstr(CurInst, MBB); |
330 | } |
331 | return MachineBasicBlock::iterator(); |
332 | } |
333 | |
334 | static inline bool isInefficientLEAReg(unsigned Reg) { |
335 | return Reg == X86::EBP || Reg == X86::RBP || |
336 | Reg == X86::R13D || Reg == X86::R13; |
337 | } |
338 | |
339 | /// Returns true if this LEA uses base an index registers, and the base register |
340 | /// is known to be inefficient for the subtarget. |
341 | // TODO: use a variant scheduling class to model the latency profile |
342 | // of LEA instructions, and implement this logic as a scheduling predicate. |
343 | static inline bool hasInefficientLEABaseReg(const MachineOperand &Base, |
344 | const MachineOperand &Index) { |
345 | return Base.isReg() && isInefficientLEAReg(Base.getReg()) && Index.isReg() && |
346 | Index.getReg() != X86::NoRegister; |
347 | } |
348 | |
349 | static inline bool hasLEAOffset(const MachineOperand &Offset) { |
350 | return (Offset.isImm() && Offset.getImm() != 0) || Offset.isGlobal(); |
351 | } |
352 | |
353 | static inline unsigned getADDrrFromLEA(unsigned LEAOpcode) { |
354 | switch (LEAOpcode) { |
355 | default: |
356 | llvm_unreachable("Unexpected LEA instruction")__builtin_unreachable(); |
357 | case X86::LEA32r: |
358 | case X86::LEA64_32r: |
359 | return X86::ADD32rr; |
360 | case X86::LEA64r: |
361 | return X86::ADD64rr; |
362 | } |
363 | } |
364 | |
365 | static inline unsigned getSUBrrFromLEA(unsigned LEAOpcode) { |
366 | switch (LEAOpcode) { |
367 | default: |
368 | llvm_unreachable("Unexpected LEA instruction")__builtin_unreachable(); |
369 | case X86::LEA32r: |
370 | case X86::LEA64_32r: |
371 | return X86::SUB32rr; |
372 | case X86::LEA64r: |
373 | return X86::SUB64rr; |
374 | } |
375 | } |
376 | |
377 | static inline unsigned getADDriFromLEA(unsigned LEAOpcode, |
378 | const MachineOperand &Offset) { |
379 | bool IsInt8 = Offset.isImm() && isInt<8>(Offset.getImm()); |
380 | switch (LEAOpcode) { |
381 | default: |
382 | llvm_unreachable("Unexpected LEA instruction")__builtin_unreachable(); |
383 | case X86::LEA32r: |
384 | case X86::LEA64_32r: |
385 | return IsInt8 ? X86::ADD32ri8 : X86::ADD32ri; |
386 | case X86::LEA64r: |
387 | return IsInt8 ? X86::ADD64ri8 : X86::ADD64ri32; |
388 | } |
389 | } |
390 | |
391 | static inline unsigned getINCDECFromLEA(unsigned LEAOpcode, bool IsINC) { |
392 | switch (LEAOpcode) { |
393 | default: |
394 | llvm_unreachable("Unexpected LEA instruction")__builtin_unreachable(); |
395 | case X86::LEA32r: |
396 | case X86::LEA64_32r: |
397 | return IsINC ? X86::INC32r : X86::DEC32r; |
398 | case X86::LEA64r: |
399 | return IsINC ? X86::INC64r : X86::DEC64r; |
400 | } |
401 | } |
402 | |
403 | MachineBasicBlock::iterator |
404 | FixupLEAPass::searchALUInst(MachineBasicBlock::iterator &I, |
405 | MachineBasicBlock &MBB) const { |
406 | const int InstrDistanceThreshold = 5; |
407 | int InstrDistance = 1; |
408 | MachineBasicBlock::iterator CurInst = std::next(I); |
409 | |
410 | unsigned LEAOpcode = I->getOpcode(); |
411 | unsigned AddOpcode = getADDrrFromLEA(LEAOpcode); |
412 | unsigned SubOpcode = getSUBrrFromLEA(LEAOpcode); |
413 | Register DestReg = I->getOperand(0).getReg(); |
414 | |
415 | while (CurInst != MBB.end()) { |
416 | if (CurInst->isCall() || CurInst->isInlineAsm()) |
417 | break; |
418 | if (InstrDistance > InstrDistanceThreshold) |
419 | break; |
420 | |
421 | // Check if the lea dest register is used in an add/sub instruction only. |
422 | for (unsigned I = 0, E = CurInst->getNumOperands(); I != E; ++I) { |
423 | MachineOperand &Opnd = CurInst->getOperand(I); |
424 | if (Opnd.isReg()) { |
425 | if (Opnd.getReg() == DestReg) { |
426 | if (Opnd.isDef() || !Opnd.isKill()) |
427 | return MachineBasicBlock::iterator(); |
428 | |
429 | unsigned AluOpcode = CurInst->getOpcode(); |
430 | if (AluOpcode != AddOpcode && AluOpcode != SubOpcode) |
431 | return MachineBasicBlock::iterator(); |
432 | |
433 | MachineOperand &Opnd2 = CurInst->getOperand(3 - I); |
434 | MachineOperand AluDest = CurInst->getOperand(0); |
435 | if (Opnd2.getReg() != AluDest.getReg()) |
436 | return MachineBasicBlock::iterator(); |
437 | |
438 | // X - (Y + Z) may generate different flags than (X - Y) - Z when |
439 | // there is overflow. So we can't change the alu instruction if the |
440 | // flags register is live. |
441 | if (!CurInst->registerDefIsDead(X86::EFLAGS, TRI)) |
442 | return MachineBasicBlock::iterator(); |
443 | |
444 | return CurInst; |
445 | } |
446 | if (TRI->regsOverlap(DestReg, Opnd.getReg())) |
447 | return MachineBasicBlock::iterator(); |
448 | } |
449 | } |
450 | |
451 | InstrDistance++; |
452 | ++CurInst; |
453 | } |
454 | return MachineBasicBlock::iterator(); |
455 | } |
456 | |
457 | void FixupLEAPass::checkRegUsage(MachineBasicBlock::iterator &LeaI, |
458 | MachineBasicBlock::iterator &AluI, |
459 | bool &BaseIndexDef, bool &AluDestRef, |
460 | MachineOperand **KilledBase, |
461 | MachineOperand **KilledIndex) const { |
462 | BaseIndexDef = AluDestRef = false; |
463 | *KilledBase = *KilledIndex = nullptr; |
464 | Register BaseReg = LeaI->getOperand(1 + X86::AddrBaseReg).getReg(); |
465 | Register IndexReg = LeaI->getOperand(1 + X86::AddrIndexReg).getReg(); |
466 | Register AluDestReg = AluI->getOperand(0).getReg(); |
467 | |
468 | MachineBasicBlock::iterator CurInst = std::next(LeaI); |
469 | while (CurInst != AluI) { |
470 | for (unsigned I = 0, E = CurInst->getNumOperands(); I != E; ++I) { |
471 | MachineOperand &Opnd = CurInst->getOperand(I); |
472 | if (!Opnd.isReg()) |
473 | continue; |
474 | Register Reg = Opnd.getReg(); |
475 | if (TRI->regsOverlap(Reg, AluDestReg)) |
476 | AluDestRef = true; |
477 | if (TRI->regsOverlap(Reg, BaseReg)) { |
478 | if (Opnd.isDef()) |
479 | BaseIndexDef = true; |
480 | else if (Opnd.isKill()) |
481 | *KilledBase = &Opnd; |
482 | } |
483 | if (TRI->regsOverlap(Reg, IndexReg)) { |
484 | if (Opnd.isDef()) |
485 | BaseIndexDef = true; |
486 | else if (Opnd.isKill()) |
487 | *KilledIndex = &Opnd; |
488 | } |
489 | } |
490 | ++CurInst; |
491 | } |
492 | } |
493 | |
494 | bool FixupLEAPass::optLEAALU(MachineBasicBlock::iterator &I, |
495 | MachineBasicBlock &MBB) const { |
496 | // Look for an add/sub instruction which uses the result of lea. |
497 | MachineBasicBlock::iterator AluI = searchALUInst(I, MBB); |
498 | if (AluI == MachineBasicBlock::iterator()) |
499 | return false; |
500 | |
501 | // Check if there are any related register usage between lea and alu. |
502 | bool BaseIndexDef, AluDestRef; |
503 | MachineOperand *KilledBase, *KilledIndex; |
504 | checkRegUsage(I, AluI, BaseIndexDef, AluDestRef, &KilledBase, &KilledIndex); |
505 | |
506 | MachineBasicBlock::iterator InsertPos = AluI; |
507 | if (BaseIndexDef) { |
508 | if (AluDestRef) |
509 | return false; |
510 | InsertPos = I; |
511 | KilledBase = KilledIndex = nullptr; |
512 | } |
513 | |
514 | // Check if there are same registers. |
515 | Register AluDestReg = AluI->getOperand(0).getReg(); |
516 | Register BaseReg = I->getOperand(1 + X86::AddrBaseReg).getReg(); |
517 | Register IndexReg = I->getOperand(1 + X86::AddrIndexReg).getReg(); |
518 | if (I->getOpcode() == X86::LEA64_32r) { |
519 | BaseReg = TRI->getSubReg(BaseReg, X86::sub_32bit); |
520 | IndexReg = TRI->getSubReg(IndexReg, X86::sub_32bit); |
521 | } |
522 | if (AluDestReg == IndexReg) { |
523 | if (BaseReg == IndexReg) |
524 | return false; |
525 | std::swap(BaseReg, IndexReg); |
526 | std::swap(KilledBase, KilledIndex); |
527 | } |
528 | if (BaseReg == IndexReg) |
529 | KilledBase = nullptr; |
530 | |
531 | // Now it's safe to change instructions. |
532 | MachineInstr *NewMI1, *NewMI2; |
533 | unsigned NewOpcode = AluI->getOpcode(); |
534 | NewMI1 = BuildMI(MBB, InsertPos, AluI->getDebugLoc(), TII->get(NewOpcode), |
535 | AluDestReg) |
536 | .addReg(AluDestReg, RegState::Kill) |
537 | .addReg(BaseReg, KilledBase ? RegState::Kill : 0); |
538 | NewMI1->addRegisterDead(X86::EFLAGS, TRI); |
539 | NewMI2 = BuildMI(MBB, InsertPos, AluI->getDebugLoc(), TII->get(NewOpcode), |
540 | AluDestReg) |
541 | .addReg(AluDestReg, RegState::Kill) |
542 | .addReg(IndexReg, KilledIndex ? RegState::Kill : 0); |
543 | NewMI2->addRegisterDead(X86::EFLAGS, TRI); |
544 | |
545 | // Clear the old Kill flags. |
546 | if (KilledBase) |
547 | KilledBase->setIsKill(false); |
548 | if (KilledIndex) |
549 | KilledIndex->setIsKill(false); |
550 | |
551 | MBB.getParent()->substituteDebugValuesForInst(*AluI, *NewMI1, 1); |
552 | MBB.getParent()->substituteDebugValuesForInst(*AluI, *NewMI2, 1); |
553 | MBB.erase(I); |
554 | MBB.erase(AluI); |
555 | I = NewMI1; |
556 | return true; |
557 | } |
558 | |
559 | bool FixupLEAPass::optTwoAddrLEA(MachineBasicBlock::iterator &I, |
560 | MachineBasicBlock &MBB, bool OptIncDec, |
561 | bool UseLEAForSP) const { |
562 | MachineInstr &MI = *I; |
563 | |
564 | const MachineOperand &Base = MI.getOperand(1 + X86::AddrBaseReg); |
565 | const MachineOperand &Scale = MI.getOperand(1 + X86::AddrScaleAmt); |
566 | const MachineOperand &Index = MI.getOperand(1 + X86::AddrIndexReg); |
567 | const MachineOperand &Disp = MI.getOperand(1 + X86::AddrDisp); |
568 | const MachineOperand &Segment = MI.getOperand(1 + X86::AddrSegmentReg); |
569 | |
570 | if (Segment.getReg() != 0 || !Disp.isImm() || Scale.getImm() > 1 || |
571 | MBB.computeRegisterLiveness(TRI, X86::EFLAGS, I) != |
572 | MachineBasicBlock::LQR_Dead) |
573 | return false; |
574 | |
575 | Register DestReg = MI.getOperand(0).getReg(); |
576 | Register BaseReg = Base.getReg(); |
577 | Register IndexReg = Index.getReg(); |
578 | |
579 | // Don't change stack adjustment LEAs. |
580 | if (UseLEAForSP && (DestReg == X86::ESP || DestReg == X86::RSP)) |
581 | return false; |
582 | |
583 | // LEA64_32 has 64-bit operands but 32-bit result. |
584 | if (MI.getOpcode() == X86::LEA64_32r) { |
585 | if (BaseReg != 0) |
586 | BaseReg = TRI->getSubReg(BaseReg, X86::sub_32bit); |
587 | if (IndexReg != 0) |
588 | IndexReg = TRI->getSubReg(IndexReg, X86::sub_32bit); |
589 | } |
590 | |
591 | MachineInstr *NewMI = nullptr; |
592 | |
593 | // Case 1. |
594 | // Look for lea(%reg1, %reg2), %reg1 or lea(%reg2, %reg1), %reg1 |
595 | // which can be turned into add %reg2, %reg1 |
596 | if (BaseReg != 0 && IndexReg != 0 && Disp.getImm() == 0 && |
597 | (DestReg == BaseReg || DestReg == IndexReg)) { |
598 | unsigned NewOpcode = getADDrrFromLEA(MI.getOpcode()); |
599 | if (DestReg != BaseReg) |
600 | std::swap(BaseReg, IndexReg); |
601 | |
602 | if (MI.getOpcode() == X86::LEA64_32r) { |
603 | // TODO: Do we need the super register implicit use? |
604 | NewMI = BuildMI(MBB, I, MI.getDebugLoc(), TII->get(NewOpcode), DestReg) |
605 | .addReg(BaseReg).addReg(IndexReg) |
606 | .addReg(Base.getReg(), RegState::Implicit) |
607 | .addReg(Index.getReg(), RegState::Implicit); |
608 | } else { |
609 | NewMI = BuildMI(MBB, I, MI.getDebugLoc(), TII->get(NewOpcode), DestReg) |
610 | .addReg(BaseReg).addReg(IndexReg); |
611 | } |
612 | } else if (DestReg == BaseReg && IndexReg == 0) { |
613 | // Case 2. |
614 | // This is an LEA with only a base register and a displacement, |
615 | // We can use ADDri or INC/DEC. |
616 | |
617 | // Does this LEA have one these forms: |
618 | // lea %reg, 1(%reg) |
619 | // lea %reg, -1(%reg) |
620 | if (OptIncDec && (Disp.getImm() == 1 || Disp.getImm() == -1)) { |
621 | bool IsINC = Disp.getImm() == 1; |
622 | unsigned NewOpcode = getINCDECFromLEA(MI.getOpcode(), IsINC); |
623 | |
624 | if (MI.getOpcode() == X86::LEA64_32r) { |
625 | // TODO: Do we need the super register implicit use? |
626 | NewMI = BuildMI(MBB, I, MI.getDebugLoc(), TII->get(NewOpcode), DestReg) |
627 | .addReg(BaseReg).addReg(Base.getReg(), RegState::Implicit); |
628 | } else { |
629 | NewMI = BuildMI(MBB, I, MI.getDebugLoc(), TII->get(NewOpcode), DestReg) |
630 | .addReg(BaseReg); |
631 | } |
632 | } else { |
633 | unsigned NewOpcode = getADDriFromLEA(MI.getOpcode(), Disp); |
634 | if (MI.getOpcode() == X86::LEA64_32r) { |
635 | // TODO: Do we need the super register implicit use? |
636 | NewMI = BuildMI(MBB, I, MI.getDebugLoc(), TII->get(NewOpcode), DestReg) |
637 | .addReg(BaseReg).addImm(Disp.getImm()) |
638 | .addReg(Base.getReg(), RegState::Implicit); |
639 | } else { |
640 | NewMI = BuildMI(MBB, I, MI.getDebugLoc(), TII->get(NewOpcode), DestReg) |
641 | .addReg(BaseReg).addImm(Disp.getImm()); |
642 | } |
643 | } |
644 | } else if (BaseReg != 0 && IndexReg != 0 && Disp.getImm() == 0) { |
645 | // Case 3. |
646 | // Look for and transform the sequence |
647 | // lea (reg1, reg2), reg3 |
648 | // sub reg3, reg4 |
649 | return optLEAALU(I, MBB); |
650 | } else |
651 | return false; |
652 | |
653 | MBB.getParent()->substituteDebugValuesForInst(*I, *NewMI, 1); |
654 | MBB.erase(I); |
655 | I = NewMI; |
656 | return true; |
657 | } |
658 | |
659 | void FixupLEAPass::processInstruction(MachineBasicBlock::iterator &I, |
660 | MachineBasicBlock &MBB) { |
661 | // Process a load, store, or LEA instruction. |
662 | MachineInstr &MI = *I; |
663 | const MCInstrDesc &Desc = MI.getDesc(); |
664 | int AddrOffset = X86II::getMemoryOperandNo(Desc.TSFlags); |
665 | if (AddrOffset >= 0) { |
666 | AddrOffset += X86II::getOperandBias(Desc); |
667 | MachineOperand &p = MI.getOperand(AddrOffset + X86::AddrBaseReg); |
668 | if (p.isReg() && p.getReg() != X86::ESP) { |
669 | seekLEAFixup(p, I, MBB); |
670 | } |
671 | MachineOperand &q = MI.getOperand(AddrOffset + X86::AddrIndexReg); |
672 | if (q.isReg() && q.getReg() != X86::ESP) { |
673 | seekLEAFixup(q, I, MBB); |
674 | } |
675 | } |
676 | } |
677 | |
678 | void FixupLEAPass::seekLEAFixup(MachineOperand &p, |
679 | MachineBasicBlock::iterator &I, |
680 | MachineBasicBlock &MBB) { |
681 | MachineBasicBlock::iterator MBI = searchBackwards(p, I, MBB); |
682 | if (MBI != MachineBasicBlock::iterator()) { |
683 | MachineInstr *NewMI = postRAConvertToLEA(MBB, MBI); |
684 | if (NewMI) { |
685 | ++NumLEAs; |
686 | LLVM_DEBUG(dbgs() << "FixLEA: Candidate to replace:"; MBI->dump();)do { } while (false); |
687 | // now to replace with an equivalent LEA... |
688 | LLVM_DEBUG(dbgs() << "FixLEA: Replaced by: "; NewMI->dump();)do { } while (false); |
689 | MBB.getParent()->substituteDebugValuesForInst(*MBI, *NewMI, 1); |
690 | MBB.erase(MBI); |
691 | MachineBasicBlock::iterator J = |
692 | static_cast<MachineBasicBlock::iterator>(NewMI); |
693 | processInstruction(J, MBB); |
694 | } |
695 | } |
696 | } |
697 | |
698 | void FixupLEAPass::processInstructionForSlowLEA(MachineBasicBlock::iterator &I, |
699 | MachineBasicBlock &MBB) { |
700 | MachineInstr &MI = *I; |
701 | const unsigned Opcode = MI.getOpcode(); |
702 | |
703 | const MachineOperand &Dst = MI.getOperand(0); |
704 | const MachineOperand &Base = MI.getOperand(1 + X86::AddrBaseReg); |
705 | const MachineOperand &Scale = MI.getOperand(1 + X86::AddrScaleAmt); |
706 | const MachineOperand &Index = MI.getOperand(1 + X86::AddrIndexReg); |
707 | const MachineOperand &Offset = MI.getOperand(1 + X86::AddrDisp); |
708 | const MachineOperand &Segment = MI.getOperand(1 + X86::AddrSegmentReg); |
709 | |
710 | if (Segment.getReg() != 0 || !Offset.isImm() || |
711 | MBB.computeRegisterLiveness(TRI, X86::EFLAGS, I, 4) != |
712 | MachineBasicBlock::LQR_Dead) |
713 | return; |
714 | const Register DstR = Dst.getReg(); |
715 | const Register SrcR1 = Base.getReg(); |
716 | const Register SrcR2 = Index.getReg(); |
717 | if ((SrcR1 == 0 || SrcR1 != DstR) && (SrcR2 == 0 || SrcR2 != DstR)) |
718 | return; |
719 | if (Scale.getImm() > 1) |
720 | return; |
721 | LLVM_DEBUG(dbgs() << "FixLEA: Candidate to replace:"; I->dump();)do { } while (false); |
722 | LLVM_DEBUG(dbgs() << "FixLEA: Replaced by: ";)do { } while (false); |
723 | MachineInstr *NewMI = nullptr; |
724 | // Make ADD instruction for two registers writing to LEA's destination |
725 | if (SrcR1 != 0 && SrcR2 != 0) { |
726 | const MCInstrDesc &ADDrr = TII->get(getADDrrFromLEA(Opcode)); |
727 | const MachineOperand &Src = SrcR1 == DstR ? Index : Base; |
728 | NewMI = |
729 | BuildMI(MBB, I, MI.getDebugLoc(), ADDrr, DstR).addReg(DstR).add(Src); |
730 | LLVM_DEBUG(NewMI->dump();)do { } while (false); |
731 | } |
732 | // Make ADD instruction for immediate |
733 | if (Offset.getImm() != 0) { |
734 | const MCInstrDesc &ADDri = |
735 | TII->get(getADDriFromLEA(Opcode, Offset)); |
736 | const MachineOperand &SrcR = SrcR1 == DstR ? Base : Index; |
737 | NewMI = BuildMI(MBB, I, MI.getDebugLoc(), ADDri, DstR) |
738 | .add(SrcR) |
739 | .addImm(Offset.getImm()); |
740 | LLVM_DEBUG(NewMI->dump();)do { } while (false); |
741 | } |
742 | if (NewMI) { |
743 | MBB.getParent()->substituteDebugValuesForInst(*I, *NewMI, 1); |
744 | MBB.erase(I); |
745 | I = NewMI; |
746 | } |
747 | } |
748 | |
749 | void FixupLEAPass::processInstrForSlow3OpLEA(MachineBasicBlock::iterator &I, |
750 | MachineBasicBlock &MBB, |
751 | bool OptIncDec) { |
752 | MachineInstr &MI = *I; |
753 | const unsigned LEAOpcode = MI.getOpcode(); |
754 | |
755 | const MachineOperand &Dest = MI.getOperand(0); |
756 | const MachineOperand &Base = MI.getOperand(1 + X86::AddrBaseReg); |
757 | const MachineOperand &Scale = MI.getOperand(1 + X86::AddrScaleAmt); |
758 | const MachineOperand &Index = MI.getOperand(1 + X86::AddrIndexReg); |
759 | const MachineOperand &Offset = MI.getOperand(1 + X86::AddrDisp); |
760 | const MachineOperand &Segment = MI.getOperand(1 + X86::AddrSegmentReg); |
761 | |
762 | if (!(TII->isThreeOperandsLEA(MI) || hasInefficientLEABaseReg(Base, Index)) || |
763 | MBB.computeRegisterLiveness(TRI, X86::EFLAGS, I, 4) != |
764 | MachineBasicBlock::LQR_Dead || |
765 | Segment.getReg() != X86::NoRegister) |
766 | return; |
767 | |
768 | Register DestReg = Dest.getReg(); |
769 | Register BaseReg = Base.getReg(); |
770 | Register IndexReg = Index.getReg(); |
771 | |
772 | if (MI.getOpcode() == X86::LEA64_32r) { |
773 | if (BaseReg != 0) |
774 | BaseReg = TRI->getSubReg(BaseReg, X86::sub_32bit); |
775 | if (IndexReg != 0) |
776 | IndexReg = TRI->getSubReg(IndexReg, X86::sub_32bit); |
777 | } |
778 | |
779 | bool IsScale1 = Scale.getImm() == 1; |
780 | bool IsInefficientBase = isInefficientLEAReg(BaseReg); |
781 | bool IsInefficientIndex = isInefficientLEAReg(IndexReg); |
782 | |
783 | // Skip these cases since it takes more than 2 instructions |
784 | // to replace the LEA instruction. |
785 | if (IsInefficientBase && DestReg == BaseReg && !IsScale1) |
786 | return; |
787 | |
788 | LLVM_DEBUG(dbgs() << "FixLEA: Candidate to replace:"; MI.dump();)do { } while (false); |
789 | LLVM_DEBUG(dbgs() << "FixLEA: Replaced by: ";)do { } while (false); |
790 | |
791 | MachineInstr *NewMI = nullptr; |
792 | |
793 | // First try to replace LEA with one or two (for the 3-op LEA case) |
794 | // add instructions: |
795 | // 1.lea (%base,%index,1), %base => add %index,%base |
796 | // 2.lea (%base,%index,1), %index => add %base,%index |
797 | if (IsScale1 && (DestReg == BaseReg || DestReg == IndexReg)) { |
798 | unsigned NewOpc = getADDrrFromLEA(MI.getOpcode()); |
799 | if (DestReg != BaseReg) |
800 | std::swap(BaseReg, IndexReg); |
801 | |
802 | if (MI.getOpcode() == X86::LEA64_32r) { |
803 | // TODO: Do we need the super register implicit use? |
804 | NewMI = BuildMI(MBB, I, MI.getDebugLoc(), TII->get(NewOpc), DestReg) |
805 | .addReg(BaseReg) |
806 | .addReg(IndexReg) |
807 | .addReg(Base.getReg(), RegState::Implicit) |
808 | .addReg(Index.getReg(), RegState::Implicit); |
809 | } else { |
810 | NewMI = BuildMI(MBB, I, MI.getDebugLoc(), TII->get(NewOpc), DestReg) |
811 | .addReg(BaseReg) |
812 | .addReg(IndexReg); |
813 | } |
814 | } else if (!IsInefficientBase || (!IsInefficientIndex && IsScale1)) { |
815 | // If the base is inefficient try switching the index and base operands, |
816 | // otherwise just break the 3-Ops LEA inst into 2-Ops LEA + ADD instruction: |
817 | // lea offset(%base,%index,scale),%dst => |
818 | // lea (%base,%index,scale); add offset,%dst |
819 | NewMI = BuildMI(MBB, MI, MI.getDebugLoc(), TII->get(LEAOpcode)) |
820 | .add(Dest) |
821 | .add(IsInefficientBase ? Index : Base) |
822 | .add(Scale) |
823 | .add(IsInefficientBase ? Base : Index) |
824 | .addImm(0) |
825 | .add(Segment); |
826 | LLVM_DEBUG(NewMI->dump();)do { } while (false); |
827 | } |
828 | |
829 | // If either replacement succeeded above, add the offset if needed, then |
830 | // replace the instruction. |
831 | if (NewMI) { |
832 | // Create ADD instruction for the Offset in case of 3-Ops LEA. |
833 | if (hasLEAOffset(Offset)) { |
834 | if (OptIncDec && Offset.isImm() && |
835 | (Offset.getImm() == 1 || Offset.getImm() == -1)) { |
836 | unsigned NewOpc = |
837 | getINCDECFromLEA(MI.getOpcode(), Offset.getImm() == 1); |
838 | NewMI = BuildMI(MBB, I, MI.getDebugLoc(), TII->get(NewOpc), DestReg) |
839 | .addReg(DestReg); |
840 | LLVM_DEBUG(NewMI->dump();)do { } while (false); |
841 | } else { |
842 | unsigned NewOpc = getADDriFromLEA(MI.getOpcode(), Offset); |
843 | NewMI = BuildMI(MBB, I, MI.getDebugLoc(), TII->get(NewOpc), DestReg) |
844 | .addReg(DestReg) |
845 | .add(Offset); |
846 | LLVM_DEBUG(NewMI->dump();)do { } while (false); |
847 | } |
848 | } |
849 | |
850 | MBB.getParent()->substituteDebugValuesForInst(*I, *NewMI, 1); |
851 | MBB.erase(I); |
852 | I = NewMI; |
853 | return; |
854 | } |
855 | |
856 | // Handle the rest of the cases with inefficient base register: |
857 | assert(DestReg != BaseReg && "DestReg == BaseReg should be handled already!")(static_cast<void> (0)); |
858 | assert(IsInefficientBase && "efficient base should be handled already!")(static_cast<void> (0)); |
859 | |
860 | // FIXME: Handle LEA64_32r. |
861 | if (LEAOpcode == X86::LEA64_32r) |
862 | return; |
863 | |
864 | // lea (%base,%index,1), %dst => mov %base,%dst; add %index,%dst |
865 | if (IsScale1 && !hasLEAOffset(Offset)) { |
866 | bool BIK = Base.isKill() && BaseReg != IndexReg; |
867 | TII->copyPhysReg(MBB, MI, MI.getDebugLoc(), DestReg, BaseReg, BIK); |
868 | LLVM_DEBUG(MI.getPrevNode()->dump();)do { } while (false); |
869 | |
870 | unsigned NewOpc = getADDrrFromLEA(MI.getOpcode()); |
871 | NewMI = BuildMI(MBB, MI, MI.getDebugLoc(), TII->get(NewOpc), DestReg) |
872 | .addReg(DestReg) |
873 | .add(Index); |
874 | LLVM_DEBUG(NewMI->dump();)do { } while (false); |
875 | |
876 | MBB.getParent()->substituteDebugValuesForInst(*I, *NewMI, 1); |
877 | MBB.erase(I); |
878 | I = NewMI; |
879 | return; |
880 | } |
881 | |
882 | // lea offset(%base,%index,scale), %dst => |
883 | // lea offset( ,%index,scale), %dst; add %base,%dst |
884 | NewMI = BuildMI(MBB, MI, MI.getDebugLoc(), TII->get(LEAOpcode)) |
Value stored to 'NewMI' is never read | |
885 | .add(Dest) |
886 | .addReg(0) |
887 | .add(Scale) |
888 | .add(Index) |
889 | .add(Offset) |
890 | .add(Segment); |
891 | LLVM_DEBUG(NewMI->dump();)do { } while (false); |
892 | |
893 | unsigned NewOpc = getADDrrFromLEA(MI.getOpcode()); |
894 | NewMI = BuildMI(MBB, MI, MI.getDebugLoc(), TII->get(NewOpc), DestReg) |
895 | .addReg(DestReg) |
896 | .add(Base); |
897 | LLVM_DEBUG(NewMI->dump();)do { } while (false); |
898 | |
899 | MBB.getParent()->substituteDebugValuesForInst(*I, *NewMI, 1); |
900 | MBB.erase(I); |
901 | I = NewMI; |
902 | } |