Line data Source code
1 : //=== A15SDOptimizerPass.cpp - Optimize DPR and SPR register accesses on A15==//
2 : //
3 : // The LLVM Compiler Infrastructure
4 : //
5 : // This file is distributed under the University of Illinois Open Source
6 : // License. See LICENSE.TXT for details.
7 : //
8 : //===----------------------------------------------------------------------===//
9 : //
10 : // The Cortex-A15 processor employs a tracking scheme in its register renaming
11 : // in order to process each instruction's micro-ops speculatively and
12 : // out-of-order with appropriate forwarding. The ARM architecture allows VFP
13 : // instructions to read and write 32-bit S-registers. Each S-register
14 : // corresponds to one half (upper or lower) of an overlaid 64-bit D-register.
15 : //
16 : // There are several instruction patterns which can be used to provide this
17 : // capability which can provide higher performance than other, potentially more
18 : // direct patterns, specifically around when one micro-op reads a D-register
19 : // operand that has recently been written as one or more S-register results.
20 : //
21 : // This file defines a pre-regalloc pass which looks for SPR producers which
22 : // are going to be used by a DPR (or QPR) consumers and creates the more
23 : // optimized access pattern.
24 : //
25 : //===----------------------------------------------------------------------===//
26 :
27 : #include "ARM.h"
28 : #include "ARMBaseInstrInfo.h"
29 : #include "ARMBaseRegisterInfo.h"
30 : #include "ARMSubtarget.h"
31 : #include "llvm/ADT/Statistic.h"
32 : #include "llvm/CodeGen/MachineFunction.h"
33 : #include "llvm/CodeGen/MachineFunctionPass.h"
34 : #include "llvm/CodeGen/MachineInstr.h"
35 : #include "llvm/CodeGen/MachineInstrBuilder.h"
36 : #include "llvm/CodeGen/MachineRegisterInfo.h"
37 : #include "llvm/CodeGen/TargetRegisterInfo.h"
38 : #include "llvm/CodeGen/TargetSubtargetInfo.h"
39 : #include "llvm/Support/Debug.h"
40 : #include "llvm/Support/raw_ostream.h"
41 : #include <map>
42 : #include <set>
43 :
44 : using namespace llvm;
45 :
46 : #define DEBUG_TYPE "a15-sd-optimizer"
47 :
48 : namespace {
49 : struct A15SDOptimizer : public MachineFunctionPass {
50 : static char ID;
51 2568 : A15SDOptimizer() : MachineFunctionPass(ID) {}
52 :
53 : bool runOnMachineFunction(MachineFunction &Fn) override;
54 :
55 2556 : StringRef getPassName() const override { return "ARM A15 S->D optimizer"; }
56 :
57 : private:
58 : const ARMBaseInstrInfo *TII;
59 : const TargetRegisterInfo *TRI;
60 : MachineRegisterInfo *MRI;
61 :
62 : bool runOnInstruction(MachineInstr *MI);
63 :
64 : //
65 : // Instruction builder helpers
66 : //
67 : unsigned createDupLane(MachineBasicBlock &MBB,
68 : MachineBasicBlock::iterator InsertBefore,
69 : const DebugLoc &DL, unsigned Reg, unsigned Lane,
70 : bool QPR = false);
71 :
72 : unsigned createExtractSubreg(MachineBasicBlock &MBB,
73 : MachineBasicBlock::iterator InsertBefore,
74 : const DebugLoc &DL, unsigned DReg,
75 : unsigned Lane, const TargetRegisterClass *TRC);
76 :
77 : unsigned createVExt(MachineBasicBlock &MBB,
78 : MachineBasicBlock::iterator InsertBefore,
79 : const DebugLoc &DL, unsigned Ssub0, unsigned Ssub1);
80 :
81 : unsigned createRegSequence(MachineBasicBlock &MBB,
82 : MachineBasicBlock::iterator InsertBefore,
83 : const DebugLoc &DL, unsigned Reg1,
84 : unsigned Reg2);
85 :
86 : unsigned createInsertSubreg(MachineBasicBlock &MBB,
87 : MachineBasicBlock::iterator InsertBefore,
88 : const DebugLoc &DL, unsigned DReg,
89 : unsigned Lane, unsigned ToInsert);
90 :
91 : unsigned createImplicitDef(MachineBasicBlock &MBB,
92 : MachineBasicBlock::iterator InsertBefore,
93 : const DebugLoc &DL);
94 :
95 : //
96 : // Various property checkers
97 : //
98 : bool usesRegClass(MachineOperand &MO, const TargetRegisterClass *TRC);
99 : bool hasPartialWrite(MachineInstr *MI);
100 : SmallVector<unsigned, 8> getReadDPRs(MachineInstr *MI);
101 : unsigned getDPRLaneFromSPR(unsigned SReg);
102 :
103 : //
104 : // Methods used for getting the definitions of partial registers
105 : //
106 :
107 : MachineInstr *elideCopies(MachineInstr *MI);
108 : void elideCopiesAndPHIs(MachineInstr *MI,
109 : SmallVectorImpl<MachineInstr*> &Outs);
110 :
111 : //
112 : // Pattern optimization methods
113 : //
114 : unsigned optimizeAllLanesPattern(MachineInstr *MI, unsigned Reg);
115 : unsigned optimizeSDPattern(MachineInstr *MI);
116 : unsigned getPrefSPRLane(unsigned SReg);
117 :
118 : //
119 : // Sanitizing method - used to make sure if don't leave dead code around.
120 : //
121 : void eraseInstrWithNoUses(MachineInstr *MI);
122 :
123 : //
124 : // A map used to track the changes done by this pass.
125 : //
126 : std::map<MachineInstr*, unsigned> Replacements;
127 : std::set<MachineInstr *> DeadInstr;
128 : };
129 : char A15SDOptimizer::ID = 0;
130 : } // end anonymous namespace
131 :
132 : // Returns true if this is a use of a SPR register.
133 0 : bool A15SDOptimizer::usesRegClass(MachineOperand &MO,
134 : const TargetRegisterClass *TRC) {
135 0 : if (!MO.isReg())
136 0 : return false;
137 0 : unsigned Reg = MO.getReg();
138 :
139 0 : if (TargetRegisterInfo::isVirtualRegister(Reg))
140 0 : return MRI->getRegClass(Reg)->hasSuperClassEq(TRC);
141 : else
142 0 : return TRC->contains(Reg);
143 : }
144 :
145 0 : unsigned A15SDOptimizer::getDPRLaneFromSPR(unsigned SReg) {
146 0 : unsigned DReg = TRI->getMatchingSuperReg(SReg, ARM::ssub_1,
147 : &ARM::DPRRegClass);
148 1 : if (DReg != ARM::NoRegister) return ARM::ssub_1;
149 : return ARM::ssub_0;
150 : }
151 :
152 : // Get the subreg type that is most likely to be coalesced
153 : // for an SPR register that will be used in VDUP32d pseudo.
154 5 : unsigned A15SDOptimizer::getPrefSPRLane(unsigned SReg) {
155 5 : if (!TRI->isVirtualRegister(SReg))
156 0 : return getDPRLaneFromSPR(SReg);
157 :
158 5 : MachineInstr *MI = MRI->getVRegDef(SReg);
159 5 : if (!MI) return ARM::ssub_0;
160 : MachineOperand *MO = MI->findRegisterDefOperand(SReg);
161 :
162 : assert(MO->isReg() && "Non-register operand found!");
163 5 : if (!MO) return ARM::ssub_0;
164 :
165 5 : if (MI->isCopy() && usesRegClass(MI->getOperand(1),
166 : &ARM::SPRRegClass)) {
167 1 : SReg = MI->getOperand(1).getReg();
168 : }
169 :
170 5 : if (TargetRegisterInfo::isVirtualRegister(SReg)) {
171 4 : if (MO->getSubReg() == ARM::ssub_1) return ARM::ssub_1;
172 4 : return ARM::ssub_0;
173 : }
174 1 : return getDPRLaneFromSPR(SReg);
175 : }
176 :
177 : // MI is known to be dead. Figure out what instructions
178 : // are also made dead by this and mark them for removal.
179 5 : void A15SDOptimizer::eraseInstrWithNoUses(MachineInstr *MI) {
180 : SmallVector<MachineInstr *, 8> Front;
181 : DeadInstr.insert(MI);
182 :
183 : LLVM_DEBUG(dbgs() << "Deleting base instruction " << *MI << "\n");
184 5 : Front.push_back(MI);
185 :
186 20 : while (Front.size() != 0) {
187 5 : MI = Front.back();
188 : Front.pop_back();
189 :
190 : // MI is already known to be dead. We need to see
191 : // if other instructions can also be removed.
192 33 : for (MachineOperand &MO : MI->operands()) {
193 28 : if ((!MO.isReg()) || (!MO.isUse()))
194 28 : continue;
195 12 : unsigned Reg = MO.getReg();
196 12 : if (!TRI->isVirtualRegister(Reg))
197 : continue;
198 12 : MachineOperand *Op = MI->findRegisterDefOperand(Reg);
199 :
200 0 : if (!Op)
201 : continue;
202 :
203 0 : MachineInstr *Def = Op->getParent();
204 :
205 : // We don't need to do anything if we have already marked
206 : // this instruction as being dead.
207 0 : if (DeadInstr.find(Def) != DeadInstr.end())
208 : continue;
209 :
210 : // Check if all the uses of this instruction are marked as
211 : // dead. If so, we can also mark this instruction as being
212 : // dead.
213 : bool IsDead = true;
214 0 : for (MachineOperand &MODef : Def->operands()) {
215 0 : if ((!MODef.isReg()) || (!MODef.isDef()))
216 : continue;
217 0 : unsigned DefReg = MODef.getReg();
218 0 : if (!TRI->isVirtualRegister(DefReg)) {
219 : IsDead = false;
220 : break;
221 : }
222 0 : for (MachineInstr &Use : MRI->use_instructions(Reg)) {
223 : // We don't care about self references.
224 0 : if (&Use == Def)
225 : continue;
226 0 : if (DeadInstr.find(&Use) == DeadInstr.end()) {
227 : IsDead = false;
228 : break;
229 : }
230 : }
231 : }
232 :
233 0 : if (!IsDead) continue;
234 :
235 : LLVM_DEBUG(dbgs() << "Deleting instruction " << *Def << "\n");
236 : DeadInstr.insert(Def);
237 : }
238 : }
239 5 : }
240 :
241 : // Creates the more optimized patterns and generally does all the code
242 : // transformations in this pass.
243 7 : unsigned A15SDOptimizer::optimizeSDPattern(MachineInstr *MI) {
244 7 : if (MI->isCopy()) {
245 0 : return optimizeAllLanesPattern(MI, MI->getOperand(1).getReg());
246 : }
247 :
248 7 : if (MI->isInsertSubreg()) {
249 2 : unsigned DPRReg = MI->getOperand(1).getReg();
250 2 : unsigned SPRReg = MI->getOperand(2).getReg();
251 :
252 2 : if (TRI->isVirtualRegister(DPRReg) && TRI->isVirtualRegister(SPRReg)) {
253 2 : MachineInstr *DPRMI = MRI->getVRegDef(MI->getOperand(1).getReg());
254 2 : MachineInstr *SPRMI = MRI->getVRegDef(MI->getOperand(2).getReg());
255 :
256 2 : if (DPRMI && SPRMI) {
257 : // See if the first operand of this insert_subreg is IMPLICIT_DEF
258 2 : MachineInstr *ECDef = elideCopies(DPRMI);
259 2 : if (ECDef && ECDef->isImplicitDef()) {
260 : // Another corner case - if we're inserting something that is purely
261 : // a subreg copy of a DPR, just use that DPR.
262 :
263 1 : MachineInstr *EC = elideCopies(SPRMI);
264 : // Is it a subreg copy of ssub_0?
265 1 : if (EC && EC->isCopy() &&
266 0 : EC->getOperand(1).getSubReg() == ARM::ssub_0) {
267 : LLVM_DEBUG(dbgs() << "Found a subreg copy: " << *SPRMI);
268 :
269 : // Find the thing we're subreg copying out of - is it of the same
270 : // regclass as DPRMI? (i.e. a DPR or QPR).
271 0 : unsigned FullReg = SPRMI->getOperand(1).getReg();
272 : const TargetRegisterClass *TRC =
273 0 : MRI->getRegClass(MI->getOperand(1).getReg());
274 0 : if (TRC->hasSuperClassEq(MRI->getRegClass(FullReg))) {
275 : LLVM_DEBUG(dbgs() << "Subreg copy is compatible - returning ");
276 : LLVM_DEBUG(dbgs() << printReg(FullReg) << "\n");
277 0 : eraseInstrWithNoUses(MI);
278 0 : return FullReg;
279 : }
280 : }
281 :
282 1 : return optimizeAllLanesPattern(MI, MI->getOperand(2).getReg());
283 : }
284 : }
285 : }
286 1 : return optimizeAllLanesPattern(MI, MI->getOperand(0).getReg());
287 : }
288 :
289 5 : if (MI->isRegSequence() && usesRegClass(MI->getOperand(1),
290 : &ARM::SPRRegClass)) {
291 : // See if all bar one of the operands are IMPLICIT_DEF and insert the
292 : // optimizer pattern accordingly.
293 : unsigned NumImplicit = 0, NumTotal = 0;
294 : unsigned NonImplicitReg = ~0U;
295 :
296 33 : for (unsigned I = 1; I < MI->getNumExplicitOperands(); ++I) {
297 56 : if (!MI->getOperand(I).isReg())
298 : continue;
299 14 : ++NumTotal;
300 14 : unsigned OpReg = MI->getOperand(I).getReg();
301 :
302 14 : if (!TRI->isVirtualRegister(OpReg))
303 : break;
304 :
305 14 : MachineInstr *Def = MRI->getVRegDef(OpReg);
306 14 : if (!Def)
307 : break;
308 14 : if (Def->isImplicitDef())
309 8 : ++NumImplicit;
310 : else
311 12 : NonImplicitReg = MI->getOperand(I).getReg();
312 : }
313 :
314 5 : if (NumImplicit == NumTotal - 1)
315 4 : return optimizeAllLanesPattern(MI, NonImplicitReg);
316 : else
317 1 : return optimizeAllLanesPattern(MI, MI->getOperand(0).getReg());
318 : }
319 :
320 0 : llvm_unreachable("Unhandled update pattern!");
321 : }
322 :
323 : // Return true if this MachineInstr inserts a scalar (SPR) value into
324 : // a D or Q register.
325 37 : bool A15SDOptimizer::hasPartialWrite(MachineInstr *MI) {
326 : // The only way we can do a partial register update is through a COPY,
327 : // INSERT_SUBREG or REG_SEQUENCE.
328 37 : if (MI->isCopy() && usesRegClass(MI->getOperand(1), &ARM::SPRRegClass))
329 : return true;
330 :
331 37 : if (MI->isInsertSubreg() && usesRegClass(MI->getOperand(2),
332 : &ARM::SPRRegClass))
333 : return true;
334 :
335 35 : if (MI->isRegSequence() && usesRegClass(MI->getOperand(1), &ARM::SPRRegClass))
336 5 : return true;
337 :
338 : return false;
339 : }
340 :
341 : // Looks through full copies to get the instruction that defines the input
342 : // operand for MI.
343 3 : MachineInstr *A15SDOptimizer::elideCopies(MachineInstr *MI) {
344 : if (!MI->isFullCopy())
345 : return MI;
346 2 : if (!TRI->isVirtualRegister(MI->getOperand(1).getReg()))
347 : return nullptr;
348 0 : MachineInstr *Def = MRI->getVRegDef(MI->getOperand(1).getReg());
349 0 : if (!Def)
350 : return nullptr;
351 : return elideCopies(Def);
352 : }
353 :
354 : // Look through full copies and PHIs to get the set of non-copy MachineInstrs
355 : // that can produce MI.
356 0 : void A15SDOptimizer::elideCopiesAndPHIs(MachineInstr *MI,
357 : SmallVectorImpl<MachineInstr*> &Outs) {
358 : // Looking through PHIs may create loops so we need to track what
359 : // instructions we have visited before.
360 : std::set<MachineInstr *> Reached;
361 : SmallVector<MachineInstr *, 8> Front;
362 0 : Front.push_back(MI);
363 0 : while (Front.size() != 0) {
364 0 : MI = Front.back();
365 : Front.pop_back();
366 :
367 : // If we have already explored this MachineInstr, ignore it.
368 0 : if (Reached.find(MI) != Reached.end())
369 0 : continue;
370 : Reached.insert(MI);
371 0 : if (MI->isPHI()) {
372 0 : for (unsigned I = 1, E = MI->getNumOperands(); I != E; I += 2) {
373 0 : unsigned Reg = MI->getOperand(I).getReg();
374 0 : if (!TRI->isVirtualRegister(Reg)) {
375 0 : continue;
376 : }
377 0 : MachineInstr *NewMI = MRI->getVRegDef(Reg);
378 0 : if (!NewMI)
379 0 : continue;
380 0 : Front.push_back(NewMI);
381 : }
382 : } else if (MI->isFullCopy()) {
383 0 : if (!TRI->isVirtualRegister(MI->getOperand(1).getReg()))
384 0 : continue;
385 0 : MachineInstr *NewMI = MRI->getVRegDef(MI->getOperand(1).getReg());
386 0 : if (!NewMI)
387 0 : continue;
388 0 : Front.push_back(NewMI);
389 : } else {
390 : LLVM_DEBUG(dbgs() << "Found partial copy" << *MI << "\n");
391 0 : Outs.push_back(MI);
392 : }
393 : }
394 0 : }
395 :
396 : // Return the DPR virtual registers that are read by this machine instruction
397 : // (if any).
398 724 : SmallVector<unsigned, 8> A15SDOptimizer::getReadDPRs(MachineInstr *MI) {
399 581 : if (MI->isCopyLike() || MI->isInsertSubreg() || MI->isRegSequence() ||
400 : MI->isKill())
401 : return SmallVector<unsigned, 8>();
402 :
403 : SmallVector<unsigned, 8> Defs;
404 3218 : for (MachineOperand &MO : MI->operands()) {
405 2644 : if (!MO.isReg() || !MO.isUse())
406 : continue;
407 2303 : if (!usesRegClass(MO, &ARM::DPRRegClass) &&
408 1165 : !usesRegClass(MO, &ARM::QPRRegClass) &&
409 1112 : !usesRegClass(MO, &ARM::DPairRegClass)) // Treat DPair as QPR
410 : continue;
411 :
412 57 : Defs.push_back(MO.getReg());
413 : }
414 : return Defs;
415 : }
416 :
417 : // Creates a DPR register from an SPR one by using a VDUP.
418 13 : unsigned A15SDOptimizer::createDupLane(MachineBasicBlock &MBB,
419 : MachineBasicBlock::iterator InsertBefore,
420 : const DebugLoc &DL, unsigned Reg,
421 : unsigned Lane, bool QPR) {
422 37 : unsigned Out = MRI->createVirtualRegister(QPR ? &ARM::QPRRegClass :
423 : &ARM::DPRRegClass);
424 13 : BuildMI(MBB, InsertBefore, DL,
425 35 : TII->get(QPR ? ARM::VDUPLN32q : ARM::VDUPLN32d), Out)
426 13 : .addReg(Reg)
427 13 : .addImm(Lane)
428 13 : .add(predOps(ARMCC::AL));
429 :
430 13 : return Out;
431 : }
432 :
433 : // Creates a SPR register from a DPR by copying the value in lane 0.
434 0 : unsigned A15SDOptimizer::createExtractSubreg(
435 : MachineBasicBlock &MBB, MachineBasicBlock::iterator InsertBefore,
436 : const DebugLoc &DL, unsigned DReg, unsigned Lane,
437 : const TargetRegisterClass *TRC) {
438 0 : unsigned Out = MRI->createVirtualRegister(TRC);
439 0 : BuildMI(MBB,
440 : InsertBefore,
441 : DL,
442 0 : TII->get(TargetOpcode::COPY), Out)
443 0 : .addReg(DReg, 0, Lane);
444 :
445 0 : return Out;
446 : }
447 :
448 : // Takes two SPR registers and creates a DPR by using a REG_SEQUENCE.
449 0 : unsigned A15SDOptimizer::createRegSequence(
450 : MachineBasicBlock &MBB, MachineBasicBlock::iterator InsertBefore,
451 : const DebugLoc &DL, unsigned Reg1, unsigned Reg2) {
452 0 : unsigned Out = MRI->createVirtualRegister(&ARM::QPRRegClass);
453 0 : BuildMI(MBB,
454 : InsertBefore,
455 : DL,
456 0 : TII->get(TargetOpcode::REG_SEQUENCE), Out)
457 0 : .addReg(Reg1)
458 : .addImm(ARM::dsub_0)
459 0 : .addReg(Reg2)
460 : .addImm(ARM::dsub_1);
461 0 : return Out;
462 : }
463 :
464 : // Takes two DPR registers that have previously been VDUPed (Ssub0 and Ssub1)
465 : // and merges them into one DPR register.
466 0 : unsigned A15SDOptimizer::createVExt(MachineBasicBlock &MBB,
467 : MachineBasicBlock::iterator InsertBefore,
468 : const DebugLoc &DL, unsigned Ssub0,
469 : unsigned Ssub1) {
470 0 : unsigned Out = MRI->createVirtualRegister(&ARM::DPRRegClass);
471 0 : BuildMI(MBB, InsertBefore, DL, TII->get(ARM::VEXTd32), Out)
472 0 : .addReg(Ssub0)
473 0 : .addReg(Ssub1)
474 : .addImm(1)
475 0 : .add(predOps(ARMCC::AL));
476 0 : return Out;
477 : }
478 :
479 0 : unsigned A15SDOptimizer::createInsertSubreg(
480 : MachineBasicBlock &MBB, MachineBasicBlock::iterator InsertBefore,
481 : const DebugLoc &DL, unsigned DReg, unsigned Lane, unsigned ToInsert) {
482 0 : unsigned Out = MRI->createVirtualRegister(&ARM::DPR_VFP2RegClass);
483 0 : BuildMI(MBB,
484 : InsertBefore,
485 : DL,
486 0 : TII->get(TargetOpcode::INSERT_SUBREG), Out)
487 0 : .addReg(DReg)
488 0 : .addReg(ToInsert)
489 0 : .addImm(Lane);
490 :
491 0 : return Out;
492 : }
493 :
494 : unsigned
495 0 : A15SDOptimizer::createImplicitDef(MachineBasicBlock &MBB,
496 : MachineBasicBlock::iterator InsertBefore,
497 : const DebugLoc &DL) {
498 0 : unsigned Out = MRI->createVirtualRegister(&ARM::DPRRegClass);
499 : BuildMI(MBB,
500 : InsertBefore,
501 : DL,
502 0 : TII->get(TargetOpcode::IMPLICIT_DEF), Out);
503 0 : return Out;
504 : }
505 :
506 : // This function inserts instructions in order to optimize interactions between
507 : // SPR registers and DPR/QPR registers. It does so by performing VDUPs on all
508 : // lanes, and the using VEXT instructions to recompose the result.
509 : unsigned
510 7 : A15SDOptimizer::optimizeAllLanesPattern(MachineInstr *MI, unsigned Reg) {
511 : MachineBasicBlock::iterator InsertPt(MI);
512 : DebugLoc DL = MI->getDebugLoc();
513 7 : MachineBasicBlock &MBB = *MI->getParent();
514 : InsertPt++;
515 : unsigned Out;
516 :
517 : // DPair has the same length as QPR and also has two DPRs as subreg.
518 : // Treat DPair as QPR.
519 14 : if (MRI->getRegClass(Reg)->hasSuperClassEq(&ARM::QPRRegClass) ||
520 : MRI->getRegClass(Reg)->hasSuperClassEq(&ARM::DPairRegClass)) {
521 2 : unsigned DSub0 = createExtractSubreg(MBB, InsertPt, DL, Reg,
522 : ARM::dsub_0, &ARM::DPRRegClass);
523 2 : unsigned DSub1 = createExtractSubreg(MBB, InsertPt, DL, Reg,
524 : ARM::dsub_1, &ARM::DPRRegClass);
525 :
526 2 : unsigned Out1 = createDupLane(MBB, InsertPt, DL, DSub0, 0);
527 2 : unsigned Out2 = createDupLane(MBB, InsertPt, DL, DSub0, 1);
528 2 : Out = createVExt(MBB, InsertPt, DL, Out1, Out2);
529 :
530 2 : unsigned Out3 = createDupLane(MBB, InsertPt, DL, DSub1, 0);
531 2 : unsigned Out4 = createDupLane(MBB, InsertPt, DL, DSub1, 1);
532 2 : Out2 = createVExt(MBB, InsertPt, DL, Out3, Out4);
533 :
534 2 : Out = createRegSequence(MBB, InsertPt, DL, Out, Out2);
535 :
536 5 : } else if (MRI->getRegClass(Reg)->hasSuperClassEq(&ARM::DPRRegClass)) {
537 0 : unsigned Out1 = createDupLane(MBB, InsertPt, DL, Reg, 0);
538 0 : unsigned Out2 = createDupLane(MBB, InsertPt, DL, Reg, 1);
539 0 : Out = createVExt(MBB, InsertPt, DL, Out1, Out2);
540 :
541 : } else {
542 : assert(MRI->getRegClass(Reg)->hasSuperClassEq(&ARM::SPRRegClass) &&
543 : "Found unexpected regclass!");
544 :
545 5 : unsigned PrefLane = getPrefSPRLane(Reg);
546 : unsigned Lane;
547 5 : switch (PrefLane) {
548 : case ARM::ssub_0: Lane = 0; break;
549 0 : case ARM::ssub_1: Lane = 1; break;
550 0 : default: llvm_unreachable("Unknown preferred lane!");
551 : }
552 :
553 : // Treat DPair as QPR
554 5 : bool UsesQPR = usesRegClass(MI->getOperand(0), &ARM::QPRRegClass) ||
555 3 : usesRegClass(MI->getOperand(0), &ARM::DPairRegClass);
556 :
557 5 : Out = createImplicitDef(MBB, InsertPt, DL);
558 5 : Out = createInsertSubreg(MBB, InsertPt, DL, Out, PrefLane, Reg);
559 5 : Out = createDupLane(MBB, InsertPt, DL, Out, Lane, UsesQPR);
560 5 : eraseInstrWithNoUses(MI);
561 : }
562 7 : return Out;
563 : }
564 :
565 724 : bool A15SDOptimizer::runOnInstruction(MachineInstr *MI) {
566 : // We look for instructions that write S registers that are then read as
567 : // D/Q registers. These can only be caused by COPY, INSERT_SUBREG and
568 : // REG_SEQUENCE pseudos that insert an SPR value into a DPR register or
569 : // merge two SPR values to form a DPR register. In order avoid false
570 : // positives we make sure that there is an SPR producer so we look past
571 : // COPY and PHI nodes to find it.
572 : //
573 : // The best code pattern for when an SPR producer is going to be used by a
574 : // DPR or QPR consumer depends on whether the other lanes of the
575 : // corresponding DPR/QPR are currently defined.
576 : //
577 : // We can handle these efficiently, depending on the type of
578 : // pseudo-instruction that is producing the pattern
579 : //
580 : // * COPY: * VDUP all lanes and merge the results together
581 : // using VEXTs.
582 : //
583 : // * INSERT_SUBREG: * If the SPR value was originally in another DPR/QPR
584 : // lane, and the other lane(s) of the DPR/QPR register
585 : // that we are inserting in are undefined, use the
586 : // original DPR/QPR value.
587 : // * Otherwise, fall back on the same stategy as COPY.
588 : //
589 : // * REG_SEQUENCE: * If all except one of the input operands are
590 : // IMPLICIT_DEFs, insert the VDUP pattern for just the
591 : // defined input operand
592 : // * Otherwise, fall back on the same stategy as COPY.
593 : //
594 :
595 : // First, get all the reads of D-registers done by this instruction.
596 724 : SmallVector<unsigned, 8> Defs = getReadDPRs(MI);
597 : bool Modified = false;
598 :
599 57 : for (SmallVectorImpl<unsigned>::iterator I = Defs.begin(), E = Defs.end();
600 781 : I != E; ++I) {
601 : // Follow the def-use chain for this DPR through COPYs, and also through
602 : // PHIs (which are essentially multi-way COPYs). It is because of PHIs that
603 : // we can end up with multiple defs of this DPR.
604 :
605 : SmallVector<MachineInstr *, 8> DefSrcs;
606 114 : if (!TRI->isVirtualRegister(*I))
607 : continue;
608 52 : MachineInstr *Def = MRI->getVRegDef(*I);
609 52 : if (!Def)
610 : continue;
611 :
612 52 : elideCopiesAndPHIs(Def, DefSrcs);
613 :
614 94 : for (MachineInstr *MI : DefSrcs) {
615 : // If we've already analyzed and replaced this operand, don't do
616 : // anything.
617 42 : if (Replacements.find(MI) != Replacements.end())
618 35 : continue;
619 :
620 : // Now, work out if the instruction causes a SPR->DPR dependency.
621 37 : if (!hasPartialWrite(MI))
622 : continue;
623 :
624 : // Collect all the uses of this MI's DPR def for updating later.
625 : SmallVector<MachineOperand*, 8> Uses;
626 7 : unsigned DPRDefReg = MI->getOperand(0).getReg();
627 7 : for (MachineRegisterInfo::use_iterator I = MRI->use_begin(DPRDefReg),
628 21 : E = MRI->use_end(); I != E; ++I)
629 14 : Uses.push_back(&*I);
630 :
631 : // We can optimize this.
632 7 : unsigned NewReg = optimizeSDPattern(MI);
633 :
634 7 : if (NewReg != 0) {
635 : Modified = true;
636 14 : for (SmallVectorImpl<MachineOperand *>::const_iterator I = Uses.begin(),
637 21 : E = Uses.end(); I != E; ++I) {
638 : // Make sure to constrain the register class of the new register to
639 : // match what we're replacing. Otherwise we can optimize a DPR_VFP2
640 : // reference into a plain DPR, and that will end poorly. NewReg is
641 : // always virtual here, so there will always be a matching subclass
642 : // to find.
643 28 : MRI->constrainRegClass(NewReg, MRI->getRegClass((*I)->getReg()));
644 :
645 : LLVM_DEBUG(dbgs() << "Replacing operand " << **I << " with "
646 : << printReg(NewReg) << "\n");
647 14 : (*I)->substVirtReg(NewReg, 0, *TRI);
648 : }
649 : }
650 7 : Replacements[MI] = NewReg;
651 : }
652 : }
653 724 : return Modified;
654 : }
655 :
656 13368 : bool A15SDOptimizer::runOnMachineFunction(MachineFunction &Fn) {
657 13368 : if (skipFunction(Fn.getFunction()))
658 : return false;
659 :
660 13360 : const ARMSubtarget &STI = Fn.getSubtarget<ARMSubtarget>();
661 : // Since the A15SDOptimizer pass can insert VDUP instructions, it can only be
662 : // enabled when NEON is available.
663 13360 : if (!(STI.useSplatVFPToNeon() && STI.hasNEON()))
664 : return false;
665 :
666 66 : TII = STI.getInstrInfo();
667 66 : TRI = STI.getRegisterInfo();
668 66 : MRI = &Fn.getRegInfo();
669 : bool Modified = false;
670 :
671 : LLVM_DEBUG(dbgs() << "Running on function " << Fn.getName() << "\n");
672 :
673 : DeadInstr.clear();
674 : Replacements.clear();
675 :
676 184 : for (MachineBasicBlock &MBB : Fn) {
677 842 : for (MachineInstr &MI : MBB) {
678 724 : Modified |= runOnInstruction(&MI);
679 : }
680 : }
681 :
682 71 : for (MachineInstr *MI : DeadInstr) {
683 5 : MI->eraseFromParent();
684 : }
685 :
686 : return Modified;
687 : }
688 :
689 2568 : FunctionPass *llvm::createA15SDOptimizerPass() {
690 2568 : return new A15SDOptimizer();
691 : }
|