45 #define DEBUG_TYPE "a15-sd-optimizer"
54 StringRef getPassName()
const override {
return "ARM A15 S->D optimizer"; }
78 const DebugLoc &
DL,
unsigned Ssub0,
unsigned Ssub1);
88 unsigned Lane,
unsigned ToInsert);
100 unsigned getDPRLaneFromSPR(
unsigned SReg);
115 unsigned getPrefSPRLane(
unsigned SReg);
125 std::map<MachineInstr*, unsigned> Replacements;
126 std::set<MachineInstr *> DeadInstr;
144 unsigned A15SDOptimizer::getDPRLaneFromSPR(
unsigned SReg) {
147 if (
DReg != ARM::NoRegister)
return ARM::ssub_1;
153 unsigned A15SDOptimizer::getPrefSPRLane(
unsigned SReg) {
155 return getDPRLaneFromSPR(SReg);
158 if (!
MI)
return ARM::ssub_0;
160 if (!MO)
return ARM::ssub_0;
161 assert(MO->
isReg() &&
"Non-register operand found!");
163 if (
MI->isCopy() && usesRegClass(
MI->getOperand(1),
164 &ARM::SPRRegClass)) {
165 SReg =
MI->getOperand(1).getReg();
169 if (MO->
getSubReg() == ARM::ssub_1)
return ARM::ssub_1;
172 return getDPRLaneFromSPR(SReg);
184 while (Front.size() != 0) {
204 if (DeadInstr.find(
Def) != DeadInstr.end())
212 if ((!MODef.isReg()) || (!MODef.isDef()))
223 if (DeadInstr.find(&
Use) == DeadInstr.end()) {
233 DeadInstr.insert(
Def);
242 return optimizeAllLanesPattern(
MI,
MI->getOperand(1).getReg());
245 if (
MI->isInsertSubreg()) {
253 if (DPRMI && SPRMI) {
262 if (EC &&
EC->isCopy() &&
263 EC->getOperand(1).getSubReg() == ARM::ssub_0) {
274 eraseInstrWithNoUses(
MI);
279 return optimizeAllLanesPattern(
MI,
MI->getOperand(2).getReg());
283 return optimizeAllLanesPattern(
MI,
MI->getOperand(0).getReg());
286 if (
MI->isRegSequence() && usesRegClass(
MI->getOperand(1),
287 &ARM::SPRRegClass)) {
290 unsigned NumImplicit = 0, NumTotal = 0;
291 unsigned NonImplicitReg = ~0U;
293 for (
unsigned I = 1;
I <
MI->getNumExplicitOperands(); ++
I) {
294 if (!
MI->getOperand(
I).isReg())
305 if (
Def->isImplicitDef())
308 NonImplicitReg =
MI->getOperand(
I).getReg();
311 if (NumImplicit == NumTotal - 1)
312 return optimizeAllLanesPattern(
MI, NonImplicitReg);
314 return optimizeAllLanesPattern(
MI,
MI->getOperand(0).getReg());
325 if (
MI->isCopy() && usesRegClass(
MI->getOperand(1), &ARM::SPRRegClass))
328 if (
MI->isInsertSubreg() && usesRegClass(
MI->getOperand(2),
332 if (
MI->isRegSequence() && usesRegClass(
MI->getOperand(1), &ARM::SPRRegClass))
341 if (!
MI->isFullCopy())
357 std::set<MachineInstr *> Reached;
360 while (Front.size() != 0) {
364 if (Reached.find(
MI) != Reached.end())
368 for (
unsigned I = 1,
E =
MI->getNumOperands();
I !=
E;
I += 2) {
376 Front.push_back(NewMI);
378 }
else if (
MI->isFullCopy()) {
384 Front.push_back(NewMI);
395 if (
MI->isCopyLike() ||
MI->isInsertSubreg() ||
MI->isRegSequence() ||
403 if (!usesRegClass(MO, &ARM::DPRRegClass) &&
404 !usesRegClass(MO, &ARM::QPRRegClass) &&
405 !usesRegClass(MO, &ARM::DPairRegClass))
408 Defs.push_back(MO.
getReg());
417 unsigned Lane,
bool QPR) {
421 TII->get(QPR ? ARM::VDUPLN32q : ARM::VDUPLN32d), Out)
430 unsigned A15SDOptimizer::createExtractSubreg(
438 TII->get(TargetOpcode::COPY), Out)
445 unsigned A15SDOptimizer::createRegSequence(
447 const DebugLoc &
DL,
unsigned Reg1,
unsigned Reg2) {
452 TII->get(TargetOpcode::REG_SEQUENCE), Out)
475 unsigned A15SDOptimizer::createInsertSubreg(
477 const DebugLoc &
DL,
unsigned DReg,
unsigned Lane,
unsigned ToInsert) {
482 TII->get(TargetOpcode::INSERT_SUBREG), Out)
498 TII->get(TargetOpcode::IMPLICIT_DEF), Out);
517 unsigned DSub0 = createExtractSubreg(
MBB, InsertPt,
DL,
Reg,
518 ARM::dsub_0, &ARM::DPRRegClass);
519 unsigned DSub1 = createExtractSubreg(
MBB, InsertPt,
DL,
Reg,
520 ARM::dsub_1, &ARM::DPRRegClass);
522 unsigned Out1 = createDupLane(
MBB, InsertPt,
DL, DSub0, 0);
523 unsigned Out2 = createDupLane(
MBB, InsertPt,
DL, DSub0, 1);
524 Out = createVExt(
MBB, InsertPt,
DL, Out1, Out2);
526 unsigned Out3 = createDupLane(
MBB, InsertPt,
DL, DSub1, 0);
527 unsigned Out4 = createDupLane(
MBB, InsertPt,
DL, DSub1, 1);
528 Out2 = createVExt(
MBB, InsertPt,
DL, Out3, Out4);
530 Out = createRegSequence(
MBB, InsertPt,
DL, Out, Out2);
533 unsigned Out1 = createDupLane(
MBB, InsertPt,
DL,
Reg, 0);
534 unsigned Out2 = createDupLane(
MBB, InsertPt,
DL,
Reg, 1);
535 Out = createVExt(
MBB, InsertPt,
DL, Out1, Out2);
539 "Found unexpected regclass!");
541 unsigned PrefLane = getPrefSPRLane(
Reg);
544 case ARM::ssub_0: Lane = 0;
break;
545 case ARM::ssub_1: Lane = 1;
break;
550 bool UsesQPR = usesRegClass(
MI->getOperand(0), &ARM::QPRRegClass) ||
551 usesRegClass(
MI->getOperand(0), &ARM::DPairRegClass);
553 Out = createImplicitDef(
MBB, InsertPt,
DL);
554 Out = createInsertSubreg(
MBB, InsertPt,
DL, Out, PrefLane,
Reg);
555 Out = createDupLane(
MBB, InsertPt,
DL, Out, Lane, UsesQPR);
556 eraseInstrWithNoUses(
MI);
595 for (
unsigned I : Defs) {
607 elideCopiesAndPHIs(
Def, DefSrcs);
612 if (Replacements.find(
MI) != Replacements.end())
616 if (!hasPartialWrite(
MI))
621 Register DPRDefReg =
MI->getOperand(0).getReg();
626 unsigned NewReg = optimizeSDPattern(
MI);
640 Use->substVirtReg(NewReg, 0, *
TRI);
643 Replacements[
MI] = NewReg;
656 if (!(STI.useSplatVFPToNeon() && STI.hasNEON()))
667 Replacements.clear();
676 MI->eraseFromParent();
683 return new A15SDOptimizer();