45 #define DEBUG_TYPE "a15-sd-optimizer"
54 StringRef getPassName()
const override {
return "ARM A15 S->D optimizer"; }
78 const DebugLoc &
DL,
unsigned Ssub0,
unsigned Ssub1);
88 unsigned Lane,
unsigned ToInsert);
100 unsigned getDPRLaneFromSPR(
unsigned SReg);
115 unsigned getPrefSPRLane(
unsigned SReg);
125 std::map<MachineInstr*, unsigned> Replacements;
126 std::set<MachineInstr *> DeadInstr;
144 unsigned A15SDOptimizer::getDPRLaneFromSPR(
unsigned SReg) {
147 if (
DReg != ARM::NoRegister)
return ARM::ssub_1;
153 unsigned A15SDOptimizer::getPrefSPRLane(
unsigned SReg) {
155 return getDPRLaneFromSPR(SReg);
158 if (!
MI)
return ARM::ssub_0;
160 if (!MO)
return ARM::ssub_0;
161 assert(MO->
isReg() &&
"Non-register operand found!");
163 if (
MI->isCopy() && usesRegClass(
MI->getOperand(1),
164 &ARM::SPRRegClass)) {
165 SReg =
MI->getOperand(1).getReg();
169 if (MO->
getSubReg() == ARM::ssub_1)
return ARM::ssub_1;
172 return getDPRLaneFromSPR(SReg);
184 while (Front.size() != 0) {
193 if (!
Reg.isVirtual())
204 if (DeadInstr.find(
Def) != DeadInstr.end())
212 if ((!MODef.isReg()) || (!MODef.isDef()))
223 if (DeadInstr.find(&
Use) == DeadInstr.end()) {
233 DeadInstr.insert(
Def);
242 return optimizeAllLanesPattern(
MI,
MI->getOperand(1).getReg());
245 if (
MI->isInsertSubreg()) {
253 if (DPRMI && SPRMI) {
262 if (EC &&
EC->isCopy() &&
263 EC->getOperand(1).getSubReg() == ARM::ssub_0) {
274 eraseInstrWithNoUses(
MI);
279 return optimizeAllLanesPattern(
MI,
MI->getOperand(2).getReg());
283 return optimizeAllLanesPattern(
MI,
MI->getOperand(0).getReg());
286 if (
MI->isRegSequence() && usesRegClass(
MI->getOperand(1),
287 &ARM::SPRRegClass)) {
290 unsigned NumImplicit = 0, NumTotal = 0;
291 unsigned NonImplicitReg = ~0U;
305 if (
Def->isImplicitDef())
308 NonImplicitReg = MO.
getReg();
311 if (NumImplicit == NumTotal - 1)
312 return optimizeAllLanesPattern(
MI, NonImplicitReg);
314 return optimizeAllLanesPattern(
MI,
MI->getOperand(0).getReg());
325 if (
MI->isCopy() && usesRegClass(
MI->getOperand(1), &ARM::SPRRegClass))
328 if (
MI->isInsertSubreg() && usesRegClass(
MI->getOperand(2),
332 if (
MI->isRegSequence() && usesRegClass(
MI->getOperand(1), &ARM::SPRRegClass))
341 if (!
MI->isFullCopy())
343 if (!
MI->getOperand(1).getReg().isVirtual())
348 return elideCopies(
Def);
357 std::set<MachineInstr *> Reached;
360 while (Front.size() != 0) {
364 if (!Reached.insert(
MI).second)
367 for (
unsigned I = 1,
E =
MI->getNumOperands();
I !=
E;
I += 2) {
369 if (!
Reg.isVirtual()) {
375 Front.push_back(NewMI);
377 }
else if (
MI->isFullCopy()) {
378 if (!
MI->getOperand(1).getReg().isVirtual())
383 Front.push_back(NewMI);
394 if (
MI->isCopyLike() ||
MI->isInsertSubreg() ||
MI->isRegSequence() ||
402 if (!usesRegClass(MO, &ARM::DPRRegClass) &&
403 !usesRegClass(MO, &ARM::QPRRegClass) &&
404 !usesRegClass(MO, &ARM::DPairRegClass))
407 Defs.push_back(MO.
getReg());
416 unsigned Lane,
bool QPR) {
420 TII->get(QPR ? ARM::VDUPLN32q : ARM::VDUPLN32d), Out)
429 unsigned A15SDOptimizer::createExtractSubreg(
437 TII->get(TargetOpcode::COPY), Out)
444 unsigned A15SDOptimizer::createRegSequence(
446 const DebugLoc &
DL,
unsigned Reg1,
unsigned Reg2) {
451 TII->get(TargetOpcode::REG_SEQUENCE), Out)
474 unsigned A15SDOptimizer::createInsertSubreg(
476 const DebugLoc &
DL,
unsigned DReg,
unsigned Lane,
unsigned ToInsert) {
481 TII->get(TargetOpcode::INSERT_SUBREG), Out)
497 TII->get(TargetOpcode::IMPLICIT_DEF), Out);
516 unsigned DSub0 = createExtractSubreg(
MBB, InsertPt,
DL,
Reg,
517 ARM::dsub_0, &ARM::DPRRegClass);
518 unsigned DSub1 = createExtractSubreg(
MBB, InsertPt,
DL,
Reg,
519 ARM::dsub_1, &ARM::DPRRegClass);
521 unsigned Out1 = createDupLane(
MBB, InsertPt,
DL, DSub0, 0);
522 unsigned Out2 = createDupLane(
MBB, InsertPt,
DL, DSub0, 1);
523 Out = createVExt(
MBB, InsertPt,
DL, Out1, Out2);
525 unsigned Out3 = createDupLane(
MBB, InsertPt,
DL, DSub1, 0);
526 unsigned Out4 = createDupLane(
MBB, InsertPt,
DL, DSub1, 1);
527 Out2 = createVExt(
MBB, InsertPt,
DL, Out3, Out4);
529 Out = createRegSequence(
MBB, InsertPt,
DL, Out, Out2);
532 unsigned Out1 = createDupLane(
MBB, InsertPt,
DL,
Reg, 0);
533 unsigned Out2 = createDupLane(
MBB, InsertPt,
DL,
Reg, 1);
534 Out = createVExt(
MBB, InsertPt,
DL, Out1, Out2);
538 "Found unexpected regclass!");
540 unsigned PrefLane = getPrefSPRLane(
Reg);
543 case ARM::ssub_0: Lane = 0;
break;
544 case ARM::ssub_1: Lane = 1;
break;
549 bool UsesQPR = usesRegClass(
MI->getOperand(0), &ARM::QPRRegClass) ||
550 usesRegClass(
MI->getOperand(0), &ARM::DPairRegClass);
552 Out = createImplicitDef(
MBB, InsertPt,
DL);
553 Out = createInsertSubreg(
MBB, InsertPt,
DL, Out, PrefLane,
Reg);
554 Out = createDupLane(
MBB, InsertPt,
DL, Out, Lane, UsesQPR);
555 eraseInstrWithNoUses(
MI);
594 for (
unsigned I : Defs) {
606 elideCopiesAndPHIs(
Def, DefSrcs);
611 if (Replacements.find(
MI) != Replacements.end())
615 if (!hasPartialWrite(
MI))
620 Register DPRDefReg =
MI->getOperand(0).getReg();
625 unsigned NewReg = optimizeSDPattern(
MI);
639 Use->substVirtReg(NewReg, 0, *
TRI);
642 Replacements[
MI] = NewReg;
655 if (!(STI.useSplatVFPToNeon() && STI.hasNEON()))
666 Replacements.clear();
675 MI->eraseFromParent();
682 return new A15SDOptimizer();