49 #define DEBUG_TYPE "arm-ldst-opt"
51 STATISTIC(NumLDMGened ,
"Number of ldm instructions generated");
52 STATISTIC(NumSTMGened ,
"Number of stm instructions generated");
53 STATISTIC(NumVLDMGened,
"Number of vldm instructions generated");
54 STATISTIC(NumVSTMGened,
"Number of vstm instructions generated");
55 STATISTIC(NumLdStMoved,
"Number of load / store instructions moved");
56 STATISTIC(NumLDRDFormed,
"Number of ldrd created before allocation");
57 STATISTIC(NumSTRDFormed,
"Number of strd created before allocation");
58 STATISTIC(NumLDRD2LDM,
"Number of ldrd instructions turned back into ldm");
59 STATISTIC(NumSTRD2STM,
"Number of strd instructions turned back into stm");
60 STATISTIC(NumLDRD2LDR,
"Number of ldrd instructions turned back into ldr's");
61 STATISTIC(NumSTRD2STR,
"Number of strd instructions turned back into str's");
72 #define ARM_LOAD_STORE_OPT_NAME "ARM load / store optimization pass"
91 bool RegClassInfoValid;
92 bool isThumb1, isThumb2;
106 struct MemOpQueueEntry {
111 :
MI(&MI),
Offset(Offset), Position(Position) {}
117 struct MergeCandidate {
121 unsigned LatestMIIdx;
123 unsigned EarliestMIIdx;
128 bool CanMergeToLSMulti;
130 bool CanMergeToLSDouble;
141 unsigned Base,
unsigned WordOffset,
145 int Offset,
unsigned Base,
bool BaseKill,
unsigned Opcode,
150 int Offset,
unsigned Base,
bool BaseKill,
unsigned Opcode,
153 void FormCandidates(
const MemOpQueue &MemOps);
154 MachineInstr *MergeOpsUpdate(
const MergeCandidate &Cand);
171 for (
const auto &MO : MI.operands()) {
174 if (MO.isDef() && MO.getReg() == ARM::CPSR && !MO.isDead())
185 bool isAM3 = Opcode == ARM::LDRD || Opcode == ARM::STRD;
189 if (Opcode == ARM::t2LDRi12 || Opcode == ARM::t2LDRi8 ||
190 Opcode == ARM::t2STRi12 || Opcode == ARM::t2STRi8 ||
191 Opcode == ARM::t2LDRDi8 || Opcode == ARM::t2STRDi8 ||
192 Opcode == ARM::LDRi12 || Opcode == ARM::STRi12)
196 if (Opcode == ARM::tLDRi || Opcode == ARM::tSTRi ||
197 Opcode == ARM::tLDRspi || Opcode == ARM::tSTRspi)
313 case ARM::tLDMIA_UPD:
314 case ARM::tSTMIA_UPD:
315 case ARM::t2LDMIA_RET:
317 case ARM::t2LDMIA_UPD:
319 case ARM::t2STMIA_UPD:
321 case ARM::VLDMSIA_UPD:
323 case ARM::VSTMSIA_UPD:
325 case ARM::VLDMDIA_UPD:
327 case ARM::VSTMDIA_UPD:
341 case ARM::t2LDMDB_UPD:
343 case ARM::t2STMDB_UPD:
344 case ARM::VLDMSDB_UPD:
345 case ARM::VSTMSDB_UPD:
346 case ARM::VLDMDDB_UPD:
347 case ARM::VSTMDDB_UPD:
359 return Opc == ARM::tLDRi || Opc == ARM::tLDRspi;
363 return Opc == ARM::t2LDRi12 || Opc == ARM::t2LDRi8;
371 return Opc == ARM::tSTRi || Opc == ARM::tSTRspi;
375 return Opc == ARM::t2STRi12 || Opc == ARM::t2STRi8;
383 return isi32Load(Opc) || Opc == ARM::VLDRS || Opc == ARM::VLDRD;
431 case ARM::tLDMIA_UPD:
432 case ARM::tSTMIA_UPD:
454 assert(isThumb1 &&
"Can only update base register uses for Thumb1!");
457 for (; MBBI != MBB.
end(); ++MBBI) {
458 bool InsertSub =
false;
459 unsigned Opc = MBBI->getOpcode();
461 if (MBBI->readsRegister(Base)) {
464 Opc == ARM::tLDRi || Opc == ARM::tLDRHi || Opc == ARM::tLDRBi;
466 Opc == ARM::tSTRi || Opc == ARM::tSTRHi || Opc == ARM::tSTRBi;
468 if (IsLoad || IsStore) {
474 MBBI->getOperand(MBBI->getDesc().getNumOperands() - 3);
481 if (Offset >= 0 && !(IsStore && InstrSrcReg == Base))
486 }
else if ((Opc == ARM::tSUBi8 || Opc == ARM::tADDi8) &&
487 !definesCPSR(*MBBI)) {
492 MBBI->getOperand(MBBI->getDesc().getNumOperands() - 3);
493 Offset = (Opc == ARM::tSUBi8) ?
494 MO.
getImm() + WordOffset * 4 :
495 MO.
getImm() - WordOffset * 4 ;
496 if (Offset >= 0 && TL->isLegalAddImmediate(Offset)) {
511 }
else if (definesCPSR(*MBBI) || MBBI->isCall() || MBBI->isBranch()) {
525 if (MBBI->killsRegister(Base) || MBBI->definesRegister(Base))
536 if (MBBI != MBB.
end()) --MBBI;
538 BuildMI(MBB, MBBI, DL,
TII->get(ARM::tSUBi8), Base),
true)
545 if (!RegClassInfoValid) {
546 RegClassInfo.runOnMachineFunction(*MF);
547 RegClassInfoValid =
true;
550 for (
unsigned Reg : RegClassInfo.getOrder(&RegClass))
551 if (!LiveRegs.contains(
Reg))
562 if (!LiveRegsValid) {
564 LiveRegs.addLiveOuts(MBB);
565 LiveRegPos = MBB.
end();
566 LiveRegsValid =
true;
569 while (LiveRegPos != Before) {
571 LiveRegs.stepBackward(*LiveRegPos);
577 for (
const std::pair<unsigned, bool> &R :
Regs)
588 int Offset,
unsigned Base,
bool BaseKill,
unsigned Opcode,
591 unsigned NumRegs =
Regs.size();
596 bool SafeToClobberCPSR = !isThumb1 ||
600 bool Writeback = isThumb1;
606 assert(Base != ARM::SP &&
"Thumb1 does not allow SP in register list");
607 if (Opcode == ARM::tLDRi) {
609 }
else if (Opcode == ARM::tSTRi) {
617 bool haveIBAndDA = isNotVFP && !isThumb2 && !isThumb1;
619 if (Offset == 4 && haveIBAndDA) {
621 }
else if (Offset == -4 * (
int)NumRegs + 4 && haveIBAndDA) {
623 }
else if (Offset == -4 * (
int)NumRegs && isNotVFP && !isThumb1) {
626 }
else if (Offset != 0 || Opcode == ARM::tLDRspi || Opcode == ARM::tSTRspi) {
639 if (!SafeToClobberCPSR)
646 NewBase =
Regs[NumRegs-1].first;
650 moveLiveRegsBefore(MBB, InsertBefore);
654 for (
const std::pair<unsigned, bool> &R :
Regs)
655 LiveRegs.addReg(R.first);
657 NewBase = findFreeReg(isThumb1 ? ARM::tGPRRegClass : ARM::GPRRegClass);
663 isThumb2 ? ARM::t2ADDri :
664 (isThumb1 && Base == ARM::SP) ? ARM::tADDrSPi :
665 (isThumb1 && Offset < 8) ? ARM::tADDi3 :
666 isThumb1 ? ARM::tADDi8 : ARM::ADDri;
671 isThumb2 ? ARM::t2SUBri :
672 (isThumb1 && Offset < 8 && Base != ARM::SP) ? ARM::tSUBi3 :
673 isThumb1 ? ARM::tSUBi8 : ARM::SUBri;
676 if (!TL->isLegalAddImmediate(Offset))
682 bool KillOldBase = BaseKill &&
691 if (Base != NewBase &&
692 (BaseOpc == ARM::tADDi8 || BaseOpc == ARM::tSUBi8)) {
699 BuildMI(MBB, InsertBefore, DL,
TII->get(ARM::tMOVSr), NewBase)
702 BuildMI(MBB, InsertBefore, DL,
TII->get(ARM::tMOVr), NewBase)
704 .addImm(Pred).
addReg(PredReg);
710 if (BaseOpc == ARM::tADDrSPi) {
711 assert(Offset % 4 == 0 &&
"tADDrSPi offset is scaled by 4");
712 BuildMI(MBB, InsertBefore, DL,
TII->get(BaseOpc), NewBase)
717 BuildMI(MBB, InsertBefore, DL,
TII->get(BaseOpc), NewBase),
true)
721 BuildMI(MBB, InsertBefore, DL,
TII->get(BaseOpc), NewBase)
745 if (isThumb1 && !SafeToClobberCPSR && Writeback && !BaseKill)
751 assert(isThumb1 &&
"expected Writeback only inThumb1");
752 if (Opcode == ARM::tLDMIA) {
755 Opcode = ARM::tLDMIA_UPD;
758 MIB =
BuildMI(MBB, InsertBefore, DL,
TII->get(Opcode));
767 UpdateBaseRegUses(MBB, InsertBefore, DL, Base, NumRegs, Pred, PredReg);
771 MIB =
BuildMI(MBB, InsertBefore, DL,
TII->get(Opcode));
777 for (
const std::pair<unsigned, bool> &R : Regs)
785 int Offset,
unsigned Base,
bool BaseKill,
unsigned Opcode,
787 ArrayRef<std::pair<unsigned, bool>> Regs)
const {
789 assert((IsLoad ||
isi32Store(Opcode)) &&
"Must have integer load or store");
790 unsigned LoadStoreOpcode = IsLoad ? ARM::t2LDRDi8 : ARM::t2STRDi8;
794 TII->get(LoadStoreOpcode));
807 MachineInstr *ARMLoadStoreOpt::MergeOpsUpdate(
const MergeCandidate &Cand) {
819 bool IsKill = MO.
isKill();
822 Regs.
push_back(std::make_pair(Reg, IsKill));
833 unsigned DefReg = MO.
getReg();
848 iterator InsertBefore = std::next(iterator(LatestMI));
853 unsigned PredReg = 0;
857 if (Cand.CanMergeToLSDouble)
858 Merged = CreateLoadStoreDouble(MBB, InsertBefore, Offset, Base, BaseKill,
859 Opcode, Pred, PredReg, DL, Regs);
860 if (!Merged && Cand.CanMergeToLSMulti)
861 Merged = CreateLoadStoreMulti(MBB, InsertBefore, Offset, Base, BaseKill,
862 Opcode, Pred, PredReg, DL, Regs);
868 iterator EarliestI(Cand.Instrs[Cand.EarliestMIIdx]);
869 bool EarliestAtBegin =
false;
870 if (EarliestI == MBB.
begin()) {
871 EarliestAtBegin =
true;
873 EarliestI = std::prev(EarliestI);
882 EarliestI = MBB.
begin();
884 EarliestI = std::next(EarliestI);
885 auto FixupRange =
make_range(EarliestI, iterator(Merged));
891 for (
unsigned &ImpDefReg : ImpDefs) {
904 for (
unsigned ImpDef : ImpDefs)
927 return (Value % 4) == 0 && Value < 1024;
949 void ARMLoadStoreOpt::FormCandidates(
const MemOpQueue &MemOps) {
956 unsigned EIndex = MemOps.size();
960 int Offset = MemOps[SIndex].Offset;
962 unsigned PReg = PMO.
getReg();
963 unsigned PRegNum = PMO.
isUndef() ? UINT_MAX : TRI->getEncodingValue(PReg);
964 unsigned Latest = SIndex;
965 unsigned Earliest = SIndex;
967 bool CanMergeToLSDouble =
971 if (STI->isCortexM3() &&
isi32Load(Opcode) &&
973 CanMergeToLSDouble =
false;
975 bool CanMergeToLSMulti =
true;
978 if (STI->hasSlowOddRegister() && !isNotVFP && (PRegNum % 2) == 1)
979 CanMergeToLSMulti =
false;
983 if (PReg == ARM::SP || PReg ==
ARM::PC)
984 CanMergeToLSMulti = CanMergeToLSDouble =
false;
988 CanMergeToLSMulti = CanMergeToLSDouble =
false;
991 for (
unsigned I = SIndex+1;
I < EIndex; ++
I, ++Count) {
992 int NewOffset = MemOps[
I].Offset;
993 if (NewOffset != Offset + (
int)Size)
996 unsigned Reg = MO.
getReg();
997 if (Reg == ARM::SP || Reg ==
ARM::PC)
1001 unsigned RegNum = MO.
isUndef() ? UINT_MAX : TRI->getEncodingValue(Reg);
1002 bool PartOfLSMulti = CanMergeToLSMulti;
1003 if (PartOfLSMulti) {
1005 if (RegNum <= PRegNum)
1006 PartOfLSMulti =
false;
1010 else if (!isNotVFP && RegNum != PRegNum+1)
1011 PartOfLSMulti =
false;
1014 bool PartOfLSDouble = CanMergeToLSDouble && Count <= 1;
1016 if (!PartOfLSMulti && !PartOfLSDouble)
1018 CanMergeToLSMulti &= PartOfLSMulti;
1019 CanMergeToLSDouble &= PartOfLSDouble;
1022 unsigned Position = MemOps[
I].Position;
1023 if (Position < MemOps[Latest].Position)
1025 else if (Position > MemOps[Earliest].Position)
1033 MergeCandidate *Candidate =
new(
Allocator.Allocate()) MergeCandidate;
1034 for (
unsigned C = SIndex, CE = SIndex + Count;
C <
CE; ++
C)
1035 Candidate->Instrs.push_back(MemOps[
C].MI);
1036 Candidate->LatestMIIdx = Latest - SIndex;
1037 Candidate->EarliestMIIdx = Earliest - SIndex;
1038 Candidate->InsertPos = MemOps[Latest].Position;
1040 CanMergeToLSMulti = CanMergeToLSDouble =
false;
1041 Candidate->CanMergeToLSMulti = CanMergeToLSMulti;
1042 Candidate->CanMergeToLSDouble = CanMergeToLSDouble;
1043 Candidates.push_back(Candidate);
1046 }
while (SIndex < EIndex);
1124 case ARM::tADDi8: Scale = 4; CheckCPSRDef =
true;
break;
1125 case ARM::tSUBi8: Scale = -4; CheckCPSRDef =
true;
break;
1127 case ARM::SUBri: Scale = -1; CheckCPSRDef =
true;
break;
1129 case ARM::ADDri: Scale = 1; CheckCPSRDef =
true;
break;
1130 case ARM::tADDspi: Scale = 4; CheckCPSRDef =
false;
break;
1131 case ARM::tSUBspi: Scale = -4; CheckCPSRDef =
false;
break;
1139 MIPredReg != PredReg)
1142 if (CheckCPSRDef && definesCPSR(MI))
1155 if (MBBI == BeginMBBI)
1160 while (PrevMBBI->isDebugValue() && PrevMBBI != BeginMBBI)
1164 return Offset == 0 ? EndMBBI : PrevMBBI;
1176 while (NextMBBI != EndMBBI && NextMBBI->isDebugValue())
1178 if (NextMBBI == EndMBBI)
1182 return Offset == 0 ? EndMBBI : NextMBBI;
1197 bool ARMLoadStoreOpt::MergeBaseUpdateLSMultiple(
MachineInstr *MI) {
1199 if (isThumb1)
return false;
1202 unsigned Base = BaseOP.
getReg();
1203 bool BaseKill = BaseOP.
isKill();
1204 unsigned PredReg = 0;
1222 if (Mode ==
ARM_AM::ia && Offset == -Bytes) {
1224 }
else if (Mode ==
ARM_AM::ib && Offset == -Bytes) {
1238 bool HighRegsUsed =
false;
1241 HighRegsUsed =
true;
1246 MergeInstr = MBB.
end();
1251 if (MergeInstr != MBB.
end())
1252 MBB.
erase(MergeInstr);
1258 .addImm(Pred).
addReg(PredReg);
1261 for (
unsigned OpNum = 3, e = MI->
getNumOperands(); OpNum != e; ++OpNum)
1275 return ARM::LDR_PRE_IMM;
1277 return ARM::STR_PRE_IMM;
1279 return Mode ==
ARM_AM::add ? ARM::VLDMSIA_UPD : ARM::VLDMSDB_UPD;
1281 return Mode ==
ARM_AM::add ? ARM::VLDMDIA_UPD : ARM::VLDMDDB_UPD;
1283 return Mode ==
ARM_AM::add ? ARM::VSTMSIA_UPD : ARM::VSTMSDB_UPD;
1285 return Mode ==
ARM_AM::add ? ARM::VSTMDIA_UPD : ARM::VSTMDDB_UPD;
1288 return ARM::t2LDR_PRE;
1291 return ARM::t2STR_PRE;
1300 return ARM::LDR_POST_IMM;
1302 return ARM::STR_POST_IMM;
1304 return Mode ==
ARM_AM::add ? ARM::VLDMSIA_UPD : ARM::VLDMSDB_UPD;
1306 return Mode ==
ARM_AM::add ? ARM::VLDMDIA_UPD : ARM::VLDMDDB_UPD;
1308 return Mode ==
ARM_AM::add ? ARM::VSTMSIA_UPD : ARM::VSTMSDB_UPD;
1310 return Mode ==
ARM_AM::add ? ARM::VSTMDIA_UPD : ARM::VSTMDDB_UPD;
1313 return ARM::t2LDR_POST;
1316 return ARM::t2STR_POST;
1323 bool ARMLoadStoreOpt::MergeBaseUpdateLoadStore(
MachineInstr *MI) {
1326 if (isThumb1)
return false;
1332 bool isAM5 = (Opcode == ARM::VLDRD || Opcode == ARM::VLDRS ||
1333 Opcode == ARM::VSTRD || Opcode == ARM::VSTRS);
1334 bool isAM2 = (Opcode == ARM::LDRi12 || Opcode == ARM::STRi12);
1346 unsigned PredReg = 0;
1355 if (!isAM5 && Offset == Bytes) {
1357 }
else if (Offset == -Bytes) {
1361 if (Offset == Bytes) {
1363 }
else if (!isAM5 && Offset == -Bytes) {
1368 MBB.
erase(MergeInstr);
1382 .addImm(Pred).
addReg(PredReg)
1388 if (NewOpc == ARM::LDR_PRE_IMM || NewOpc == ARM::LDRB_PRE_IMM) {
1409 if (isAM2 && NewOpc == ARM::STR_POST_IMM) {
1412 BuildMI(MBB, MBBI, DL,
TII->get(NewOpc), Base)
1417 BuildMI(MBB, MBBI, DL,
TII->get(NewOpc), Base)
1427 bool ARMLoadStoreOpt::MergeBaseUpdateLSDouble(
MachineInstr &MI)
const {
1429 assert((Opcode == ARM::t2LDRDi8 || Opcode == ARM::t2STRDi8) &&
1430 "Must have t2STRDi8 or t2LDRDi8");
1437 unsigned Base = BaseOp.
getReg();
1440 if (Reg0Op.
getReg() == Base || Reg1Op.
getReg() == Base)
1451 if (Offset == 8 || Offset == -8) {
1452 NewOpc = Opcode == ARM::t2LDRDi8 ? ARM::t2LDRD_PRE : ARM::t2STRD_PRE;
1455 if (Offset == 8 || Offset == -8) {
1456 NewOpc = Opcode == ARM::t2LDRDi8 ? ARM::t2LDRD_POST : ARM::t2STRD_POST;
1460 MBB.
erase(MergeInstr);
1464 if (NewOpc == ARM::t2LDRD_PRE || NewOpc == ARM::t2LDRD_POST) {
1468 assert(NewOpc == ARM::t2STRD_PRE || NewOpc == ARM::t2STRD_POST);
1474 assert(
TII->get(Opcode).getNumOperands() == 6 &&
1475 TII->get(NewOpc).getNumOperands() == 7 &&
1476 "Unexpected number of operands in Opcode specification.");
1544 bool isDef,
const DebugLoc &DL,
unsigned NewOpc,
1545 unsigned Reg,
bool RegDeadKill,
bool RegUndef,
1546 unsigned BaseReg,
bool BaseKill,
bool BaseUndef,
1569 if (Opcode != ARM::LDRD && Opcode != ARM::STRD && Opcode != ARM::t2LDRDi8)
1573 unsigned BaseReg = BaseOp.
getReg();
1576 unsigned EvenRegNum = TRI->getDwarfRegNum(EvenReg,
false);
1577 unsigned OddRegNum = TRI->getDwarfRegNum(OddReg,
false);
1581 bool Errata602117 = EvenReg == BaseReg &&
1582 (Opcode == ARM::LDRD || Opcode == ARM::t2LDRDi8) && STI->isCortexM3();
1584 bool NonConsecutiveRegs = (Opcode == ARM::LDRD || Opcode == ARM::STRD) &&
1585 (EvenRegNum % 2 != 0 || EvenRegNum + 1 != OddRegNum);
1587 if (!Errata602117 && !NonConsecutiveRegs)
1590 bool isT2 = Opcode == ARM::t2LDRDi8 || Opcode == ARM::t2STRDi8;
1591 bool isLd = Opcode == ARM::LDRD || Opcode == ARM::t2LDRDi8;
1592 bool EvenDeadKill = isLd ?
1595 bool OddDeadKill = isLd ?
1598 bool BaseKill = BaseOp.
isKill();
1599 bool BaseUndef = BaseOp.
isUndef();
1603 unsigned PredReg = 0;
1606 if (OddRegNum > EvenRegNum && OffImm == 0) {
1609 unsigned NewOpc = (isLd)
1610 ? (isT2 ? ARM::t2LDMIA : ARM::LDMIA)
1611 : (isT2 ? ARM::t2STMIA : ARM::STMIA);
1613 BuildMI(MBB, MBBI, MBBI->getDebugLoc(),
TII->get(NewOpc))
1615 .addImm(Pred).
addReg(PredReg)
1620 BuildMI(MBB, MBBI, MBBI->getDebugLoc(),
TII->get(NewOpc))
1622 .addImm(Pred).
addReg(PredReg)
1631 unsigned NewOpc = (isLd)
1632 ? (isT2 ? (OffImm < 0 ? ARM::t2LDRi8 : ARM::t2LDRi12) : ARM::LDRi12)
1633 : (isT2 ? (OffImm < 0 ? ARM::t2STRi8 : ARM::t2STRi12) : ARM::STRi12);
1636 unsigned NewOpc2 = (isLd)
1637 ? (isT2 ? (OffImm+4 < 0 ? ARM::t2LDRi8 : ARM::t2LDRi12) : ARM::LDRi12)
1638 : (isT2 ? (OffImm+4 < 0 ? ARM::t2STRi8 : ARM::t2STRi12) : ARM::STRi12);
1643 (BaseKill || OffKill) &&
1644 (TRI->regsOverlap(EvenReg, BaseReg))) {
1645 assert(!TRI->regsOverlap(OddReg, BaseReg));
1647 OddReg, OddDeadKill,
false,
1648 BaseReg,
false, BaseUndef,
false, OffUndef,
1649 Pred, PredReg,
TII, isT2);
1651 EvenReg, EvenDeadKill,
false,
1652 BaseReg, BaseKill, BaseUndef, OffKill, OffUndef,
1653 Pred, PredReg,
TII, isT2);
1655 if (OddReg == EvenReg && EvenDeadKill) {
1659 EvenDeadKill =
false;
1663 if (EvenReg == BaseReg)
1664 EvenDeadKill =
false;
1666 EvenReg, EvenDeadKill, EvenUndef,
1667 BaseReg,
false, BaseUndef,
false, OffUndef,
1668 Pred, PredReg,
TII, isT2);
1670 OddReg, OddDeadKill, OddUndef,
1671 BaseReg, BaseKill, BaseUndef, OffKill, OffUndef,
1672 Pred, PredReg,
TII, isT2);
1680 MBBI = MBB.
erase(MBBI);
1688 unsigned CurrBase = 0;
1689 unsigned CurrOpc = ~0u;
1691 unsigned Position = 0;
1692 assert(Candidates.size() == 0);
1693 assert(MergeBaseCandidates.size() == 0);
1694 LiveRegsValid =
false;
1699 MBBI = std::prev(
I);
1700 if (FixInvalidRegPairOp(MBB, MBBI))
1705 unsigned Opcode = MBBI->getOpcode();
1707 unsigned Reg = MO.
getReg();
1709 unsigned PredReg = 0;
1712 if (CurrBase == 0) {
1717 MemOps.push_back(MemOpQueueEntry(*MBBI, Offset, Position));
1721 if (CurrOpc == Opcode && CurrBase == Base && CurrPred == Pred) {
1729 bool Overlap =
false;
1731 Overlap = (Base ==
Reg);
1733 for (
const MemOpQueueEntry &
E : MemOps) {
1734 if (TRI->regsOverlap(Reg,
E.MI->getOperand(0).getReg())) {
1744 if (Offset > MemOps.back().Offset) {
1745 MemOps.push_back(MemOpQueueEntry(*MBBI, Offset, Position));
1748 MemOpQueue::iterator
MI, ME;
1749 for (MI = MemOps.begin(), ME = MemOps.end(); MI != ME; ++
MI) {
1750 if (Offset < MI->Offset) {
1754 if (Offset == MI->Offset) {
1760 if (MI != MemOps.end()) {
1761 MemOps.insert(MI, MemOpQueueEntry(*MBBI, Offset, Position));
1772 }
else if (MBBI->isDebugValue()) {
1774 }
else if (MBBI->getOpcode() == ARM::t2LDRDi8 ||
1775 MBBI->getOpcode() == ARM::t2STRDi8) {
1778 MergeBaseCandidates.push_back(&*MBBI);
1783 if (MemOps.size() > 0) {
1784 FormCandidates(MemOps);
1792 if (MemOps.size() > 0)
1793 FormCandidates(MemOps);
1797 auto LessThan = [](
const MergeCandidate* M0,
const MergeCandidate *M1) {
1798 return M0->InsertPos < M1->InsertPos;
1800 std::sort(Candidates.begin(), Candidates.end(), LessThan);
1803 bool Changed =
false;
1804 for (
const MergeCandidate *Candidate : Candidates) {
1805 if (Candidate->CanMergeToLSMulti || Candidate->CanMergeToLSDouble) {
1811 if (Opcode == ARM::t2STRDi8 || Opcode == ARM::t2LDRDi8)
1812 MergeBaseUpdateLSDouble(*Merged);
1814 MergeBaseUpdateLSMultiple(Merged);
1817 if (MergeBaseUpdateLoadStore(MI))
1822 assert(Candidate->Instrs.size() == 1);
1823 if (MergeBaseUpdateLoadStore(Candidate->Instrs.front()))
1830 MergeBaseUpdateLSDouble(*MI);
1831 MergeBaseCandidates.clear();
1848 if (isThumb1)
return false;
1849 if (MBB.
empty())
return false;
1852 if (MBBI != MBB.
begin() && MBBI != MBB.
end() &&
1853 (MBBI->getOpcode() == ARM::BX_RET ||
1854 MBBI->getOpcode() == ARM::tBX_RET ||
1855 MBBI->getOpcode() == ARM::MOVPCLR)) {
1858 while (PrevI->isDebugValue() && PrevI != MBB.
begin())
1862 if (Opcode == ARM::LDMIA_UPD || Opcode == ARM::LDMDA_UPD ||
1863 Opcode == ARM::LDMDB_UPD || Opcode == ARM::LDMIB_UPD ||
1864 Opcode == ARM::t2LDMIA_UPD || Opcode == ARM::t2LDMDB_UPD) {
1866 if (MO.
getReg() != ARM::LR)
1868 unsigned NewOpc = (isThumb2 ? ARM::t2LDMIA_RET : ARM::LDMIA_RET);
1869 assert(((isThumb2 && Opcode == ARM::t2LDMIA_UPD) ||
1870 Opcode == ARM::LDMIA_UPD) &&
"Unsupported multiple load-return!");
1883 if (MBBI == MBB.
begin() || MBBI == MBB.
end() ||
1884 MBBI->getOpcode() != ARM::tBX_RET)
1889 if (Prev->getOpcode() != ARM::tMOVr || !Prev->definesRegister(ARM::LR))
1892 for (
auto Use : Prev->uses())
1913 TII = STI->getInstrInfo();
1916 RegClassInfoValid =
false;
1917 isThumb2 = AFI->isThumb2Function();
1918 isThumb1 = AFI->isThumbFunction() && !isThumb2;
1920 bool Modified =
false;
1924 Modified |= LoadStoreMultipleOpti(MBB);
1925 if (STI->hasV5TOps())
1926 Modified |= MergeReturnIntoLDM(MBB);
1928 Modified |= CombineMovBx(MBB);
1935 #define ARM_PREALLOC_LOAD_STORE_OPT_NAME \
1936 "ARM pre- register allocation load / store optimization pass"
1954 StringRef getPassName()
const override {
1960 unsigned &NewOpc,
unsigned &EvenReg,
1961 unsigned &OddReg,
unsigned &BaseReg,
1967 unsigned Base,
bool isLd,
1981 TD = &Fn.getDataLayout();
1982 STI = &
static_cast<const ARMSubtarget &
>(Fn.getSubtarget());
1983 TII = STI->getInstrInfo();
1984 TRI = STI->getRegisterInfo();
1985 MRI = &Fn.getRegInfo();
1988 bool Modified =
false;
1990 Modified |= RescheduleLoadStoreInstrs(&MFI);
2006 if (I->isDebugValue() || MemOps.
count(&*I))
2008 if (I->isCall() || I->isTerminator() || I->hasUnmodeledSideEffects())
2010 if (isLd && I->mayStore())
2022 for (
unsigned j = 0, NumOps = I->getNumOperands(); j != NumOps; ++j) {
2026 unsigned Reg = MO.
getReg();
2029 if (Reg != Base && !MemRegs.
count(Reg))
2030 AddedRegPressure.
insert(Reg);
2035 if (MemRegs.
size() <= 4)
2038 return AddedRegPressure.
size() <= MemRegs.
size() * 2;
2045 unsigned &SecondReg,
2046 unsigned &BaseReg,
int &Offset,
2051 if (!STI->hasV5TEOps())
2057 if (Opcode == ARM::LDRi12) {
2059 }
else if (Opcode == ARM::STRi12) {
2061 }
else if (Opcode == ARM::t2LDRi8 || Opcode == ARM::t2LDRi12) {
2062 NewOpc = ARM::t2LDRDi8;
2065 }
else if (Opcode == ARM::t2STRi8 || Opcode == ARM::t2STRi12) {
2066 NewOpc = ARM::t2STRDi8;
2082 unsigned ReqAlign = STI->hasV6Ops()
2085 if (Align < ReqAlign)
2091 int Limit = (1 << 8) * Scale;
2092 if (OffImm >= Limit || (OffImm <= -Limit) || (OffImm & (Scale-1)))
2101 int Limit = (1 << 8) * Scale;
2102 if (OffImm >= Limit || (OffImm & (Scale-1)))
2108 if (FirstReg == SecondReg)
2118 unsigned Base,
bool isLd,
2120 bool RetVal =
false;
2127 assert(LHS == RHS || LOffset != ROffset);
2128 return LOffset > ROffset;
2135 while (Ops.
size() > 1) {
2136 unsigned FirstLoc = ~0U;
2137 unsigned LastLoc = 0;
2141 unsigned LastOpcode = 0;
2142 unsigned LastBytes = 0;
2143 unsigned NumMove = 0;
2144 for (
int i = Ops.
size() - 1;
i >= 0; --
i) {
2146 unsigned Loc = MI2LocMap[
Op];
2147 if (Loc <= FirstLoc) {
2151 if (Loc >= LastLoc) {
2158 if (LastOpcode && LSMOpcode != LastOpcode)
2164 if (Bytes != LastBytes || Offset != (LastOffset + (
int)Bytes))
2169 LastOpcode = LSMOpcode;
2179 for (
int i = NumMove-1;
i >= 0; --
i) {
2186 bool DoMove = (LastLoc - FirstLoc) <= NumMove*4;
2189 MemOps, MemRegs, TRI);
2191 for (
unsigned i = 0; i != NumMove; ++
i)
2196 while (InsertPos != MBB->
end() &&
2197 (MemOps.
count(&*InsertPos) || InsertPos->isDebugValue()))
2204 unsigned FirstReg = 0, SecondReg = 0;
2205 unsigned BaseReg = 0, PredReg = 0;
2208 unsigned NewOpc = 0;
2211 if (NumMove == 2 && CanFormLdStDWord(Op0, Op1, dl, NewOpc,
2212 FirstReg, SecondReg, BaseReg,
2213 Offset, PredReg, Pred, isT2)) {
2219 MRI->constrainRegClass(FirstReg, TRC);
2220 MRI->constrainRegClass(SecondReg, TRC);
2235 DEBUG(
dbgs() <<
"Formed " << *MIB <<
"\n");
2249 DEBUG(
dbgs() <<
"Formed " << *MIB <<
"\n");
2261 for (
unsigned i = 0; i != NumMove; ++
i) {
2264 MBB->
splice(InsertPos, MBB, Op);
2268 NumLdStMoved += NumMove;
2279 bool RetVal =
false;
2291 for (; MBBI !=
E; ++MBBI) {
2300 MI2LocMap[&MI] = ++Loc;
2304 unsigned PredReg = 0;
2313 bool StopHere =
false;
2316 Base2LdsMap.
find(Base);
2317 if (BI != Base2LdsMap.
end()) {
2318 for (
unsigned i = 0, e = BI->second.
size(); i != e; ++
i) {
2325 BI->second.push_back(&MI);
2327 Base2LdsMap[Base].push_back(&MI);
2332 Base2StsMap.
find(Base);
2333 if (BI != Base2StsMap.
end()) {
2334 for (
unsigned i = 0, e = BI->second.
size(); i != e; ++
i) {
2341 BI->second.push_back(&MI);
2343 Base2StsMap[Base].push_back(&MI);
2357 for (
unsigned i = 0, e = LdBases.
size(); i != e; ++
i) {
2358 unsigned Base = LdBases[
i];
2361 RetVal |= RescheduleOps(MBB, Lds, Base,
true, MI2LocMap);
2365 for (
unsigned i = 0, e = StBases.
size(); i != e; ++
i) {
2366 unsigned Base = StBases[
i];
2369 RetVal |= RescheduleOps(MBB, Sts, Base,
false, MI2LocMap);
2373 Base2LdsMap.
clear();
2374 Base2StsMap.
clear();
2387 return new ARMPreAllocLoadStoreOpt();
2388 return new ARMLoadStoreOpt();
static unsigned getPreIndexedLoadStoreOpcode(unsigned Opc, ARM_AM::AddrOpc Mode)
unsigned succ_size() const
void push_back(const T &Elt)
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
const MachineInstrBuilder & copyImplicitOps(const MachineInstr &OtherMI) const
Copy all the implicit operands from OtherMI onto this one.
A parsed version of the target data layout string in and methods for querying it. ...
static bool isT1i32Load(unsigned Opc)
instr_iterator erase(instr_iterator I)
Remove an instruction from the instruction list and delete it.
LLVMContext & getContext() const
getContext - Return a reference to the LLVMContext associated with this function. ...
static unsigned char getAM3Offset(unsigned AM3Opc)
STATISTIC(NumFunctions,"Total number of functions")
iterator_range< mop_iterator > uses()
Returns a range that includes all operands that are register uses.
Implements a dense probed hash-table based set.
static unsigned getImmScale(unsigned Opc)
iterator getFirstTerminator()
Returns an iterator to the first terminator instruction of this basic block.
Describe properties that are true of each instruction in the target description file.
void setIsUndef(bool Val=true)
const ARMTargetLowering * getTargetLowering() const override
size_type count(PtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
const MCInstrDesc & getDesc() const
Returns the target instruction descriptor of this MachineInstr.
const Function * getFunction() const
getFunction - Return the LLVM function that this machine code represents
static IntegerType * getInt64Ty(LLVMContext &C)
static int isIncrementOrDecrement(const MachineInstr &MI, unsigned Reg, ARMCC::CondCodes Pred, unsigned PredReg)
Check if the given instruction increments or decrements a register and return the amount it is increm...
bool isTerminator(QueryType Type=AnyInBundle) const
Returns true if this instruction part of the terminator for a basic block.
This file defines the MallocAllocator and BumpPtrAllocator interfaces.
bool optForMinSize() const
Optimize this function for minimum size (-Oz).
A templated base class for SmallPtrSet which provides the typesafe interface that is common across al...
static MCDisassembler::DecodeStatus addOperand(MCInst &Inst, const MCOperand &Opnd)
static const MachineInstrBuilder & AddDefaultPred(const MachineInstrBuilder &MIB)
A description of a memory reference used in the backend.
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
const HexagonInstrInfo * TII
A Use represents the edge between a Value definition and its users.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
bool isReg() const
isReg - Tests if this is a MO_Register operand.
Reg
All possible values of the reg field in the ModR/M byte.
Windows NT (Windows on ARM)
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
This class defines information used to lower LLVM code to legal SelectionDAG operators that the targe...
unsigned getNumOperands() const
Access to explicit operands of the instruction.
static bool IsSafeAndProfitableToMove(bool isLd, unsigned Base, MachineBasicBlock::iterator I, MachineBasicBlock::iterator E, SmallPtrSetImpl< MachineInstr * > &MemOps, SmallSet< unsigned, 4 > &MemRegs, const TargetRegisterInfo *TRI)
const HexagonRegisterInfo & getRegisterInfo() const
HexagonInstrInfo specifics.
static unsigned getAlignment(GlobalVariable *GV)
void copyImplicitOps(MachineFunction &MF, const MachineInstr &MI)
Copy implicit register operands from specified instruction to this instruction.
static ARM_AM::AMSubMode getLoadStoreMultipleSubMode(unsigned Opcode)
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory)...
uint64_t getAlignment() const
Return the minimum known alignment in bytes of the actual memory reference.
iterator getLastNonDebugInstr()
Returns an iterator to the last non-debug instruction in the basic block, or end().
unsigned getUndefRegState(bool B)
LivenessQueryResult computeRegisterLiveness(const TargetRegisterInfo *TRI, unsigned Reg, const_iterator Before, unsigned Neighborhood=10) const
Return whether (physical) register Reg has been <def>ined and not <kill>ed as of just before Before...
unsigned getTransientStackAlignment() const
getTransientStackAlignment - This method returns the number of bytes to which the stack pointer must ...
unsigned getKillRegState(bool B)
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
static GCRegistry::Add< CoreCLRGC > E("coreclr","CoreCLR-compatible GC")
static unsigned getPostIndexedLoadStoreOpcode(unsigned Opc, ARM_AM::AddrOpc Mode)
const MachineBasicBlock * getParent() const
TargetInstrInfo - Interface to description of machine instruction set.
bool isDebugValue() const
#define ARM_PREALLOC_LOAD_STORE_OPT_NAME
unsigned getDeadRegState(bool B)
mmo_iterator memoperands_end() const
unsigned getDefRegState(bool B)
MachineInstrBuilder BuildMI(MachineFunction &MF, const DebugLoc &DL, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
initializer< Ty > init(const Ty &Val)
const MachineInstrBuilder & setMemRefs(MachineInstr::mmo_iterator b, MachineInstr::mmo_iterator e) const
bool regsOverlap(unsigned regA, unsigned regB) const
Returns true if the two registers are equal or alias each other.
static unsigned getAM2Opc(AddrOpc Opc, unsigned Imm12, ShiftOpc SO, unsigned IdxMode=0)
unsigned const MachineRegisterInfo * MRI
static unsigned char getAM5Offset(unsigned AM5Opc)
LLVM_ATTRIBUTE_ALWAYS_INLINE iterator begin()
static bool isT2i32Load(unsigned Opc)
SmallSet - This maintains a set of unique values, optimizing for the case when the set is small (less...
const MachineOperand & getOperand(unsigned i) const
std::pair< iterator, bool > insert(const ValueT &V)
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
Register is known to be fully dead.
Greedy Register Allocator
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
void setImm(int64_t immVal)
bool hasOneMemOperand() const
Return true if this instruction has exactly one MachineMemOperand.
FunctionPass class - This class is used to implement most global optimizations.
std::pair< NoneType, bool > insert(const T &V)
insert - Insert an element into the set if it isn't already there.
ARMCC::CondCodes getInstrPredicate(const MachineInstr &MI, unsigned &PredReg)
getInstrPredicate - If instruction is predicated, returns its predicate condition, otherwise returns AL.
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
static bool isMemoryOp(const MachineInstr &MI)
Returns true if instruction is a memory operation that this pass is capable of operating on...
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
const MCInstrDesc & get(unsigned Opcode) const
Return the machine instruction descriptor that corresponds to the specified instruction opcode...
static cl::opt< bool > AssumeMisalignedLoadStores("arm-assume-misaligned-load-store", cl::Hidden, cl::init(false), cl::desc("Be more conservative in ARM load/store opt"))
This switch disables formation of double/multi instructions that could potentially lead to (new) alig...
FunctionPass * createARMLoadStoreOptimizationPass(bool PreAlloc=false)
Returns an instance of the load / store optimization pass.
void setIsKill(bool Val=true)
static bool mayCombineMisaligned(const TargetSubtargetInfo &STI, const MachineInstr &MI)
Return true for loads/stores that can be combined to a double/multi operation without increasing the ...
size_type count(const T &V) const
count - Return 1 if the element is in the set, 0 otherwise.
static MachineBasicBlock::iterator findIncDecBefore(MachineBasicBlock::iterator MBBI, unsigned Reg, ARMCC::CondCodes Pred, unsigned PredReg, int &Offset)
Searches for an increment or decrement of Reg before MBBI.
static bool isi32Store(unsigned Opc)
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
bool readsRegister(unsigned Reg, const TargetRegisterInfo *TRI=nullptr) const
Return true if the MachineInstr reads the specified register.
virtual const TargetFrameLowering * getFrameLowering() const
Iterator for intrusive lists based on ilist_node.
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements...
void setDesc(const MCInstrDesc &tid)
Replace the instruction descriptor (thus opcode) of the current instruction with a new one...
MachineOperand class - Representation of each machine instruction operand.
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small...
A BumpPtrAllocator that allows only elements of a specific type to be allocated.
static GCRegistry::Add< ShadowStackGC > C("shadow-stack","Very portable GC for uncooperative code generators")
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
#define ARM_LOAD_STORE_OPT_NAME
static unsigned getReg(const void *D, unsigned RC, unsigned RegNo)
static AddrOpc getAM3Op(unsigned AM3Opc)
static unsigned getAM3Opc(AddrOpc Opc, unsigned char Offset, unsigned IdxMode=0)
getAM3Opc - This function encodes the addrmode3 opc field.
iterator_range< mop_iterator > implicit_operands()
static const MachineOperand & getLoadStoreBaseOp(const MachineInstr &MI)
const DebugLoc & getDebugLoc() const
Returns the debug location id of this MachineInstr.
MachineRegisterInfo - Keep track of information for virtual and physical registers, including vreg register classes, use/def chains for registers, etc.
MachineFunctionProperties & set(Property P)
TargetSubtargetInfo - Generic base class for all target subtargets.
static bool isARMLowRegister(unsigned Reg)
isARMLowRegister - Returns true if the register is a low register (r0-r7).
Representation of each machine instruction.
static bool isT2i32Store(unsigned Opc)
LLVM_ATTRIBUTE_ALWAYS_INLINE iterator end()
static bool isValidLSDoubleOffset(int Offset)
size_type count(const ValueT &V) const
Return 1 if the specified key is in the set, 0 otherwise.
void splice(iterator Where, MachineBasicBlock *Other, iterator From)
Take an instruction from MBB 'Other' at the position From, and insert it into this MBB right before '...
A set of live physical registers with functions to track liveness when walking backward/forward throu...
std::pair< mmo_iterator, unsigned > mergeMemRefsWith(const MachineInstr &Other)
Return a set of memrefs (begin iterator, size) which conservatively describe the memory behavior of b...
ARMFunctionInfo - This class is derived from MachineFunctionInfo and contains private ARM-specific in...
static void InsertLDR_STR(MachineBasicBlock &MBB, MachineBasicBlock::iterator &MBBI, int Offset, bool isDef, const DebugLoc &DL, unsigned NewOpc, unsigned Reg, bool RegDeadKill, bool RegUndef, unsigned BaseReg, bool BaseKill, bool BaseUndef, bool OffKill, bool OffUndef, ARMCC::CondCodes Pred, unsigned PredReg, const TargetInstrInfo *TII, bool isT2)
void setReg(unsigned Reg)
Change the register this operand corresponds to.
bool isCall(QueryType Type=AnyInBundle) const
LLVM_ATTRIBUTE_ALWAYS_INLINE size_type size() const
APFloat abs(APFloat X)
Returns the absolute value of the argument.
iterator find(const KeyT &Val)
static bool isLoadSingle(unsigned Opc)
static AddrOpc getAM5Op(unsigned AM5Opc)
static unsigned getUpdatingLSMultipleOpcode(unsigned Opc, ARM_AM::AMSubMode Mode)
unsigned getReg() const
getReg - Returns the register number.
static bool isi32Load(unsigned Opc)
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
bool killsRegister(unsigned Reg, const TargetRegisterInfo *TRI=nullptr) const
Return true if the MachineInstr kills the specified register.
static unsigned getLSMultipleTransferSize(const MachineInstr *MI)
LLVM Value Representation.
unsigned getNumOperands() const
Return the number of declared MachineOperands for this MachineInstruction.
static int getLoadStoreMultipleOpcode(unsigned Opcode, ARM_AM::AMSubMode Mode)
const MachineInstrBuilder & addOperand(const MachineOperand &MO) const
static const MachineOperand & getLoadStoreRegOp(const MachineInstr &MI)
static MachineBasicBlock::iterator findIncDecAfter(MachineBasicBlock::iterator MBBI, unsigned Reg, ARMCC::CondCodes Pred, unsigned PredReg, int &Offset)
Searches for a increment or decrement of Reg after MBBI.
StringRef - Represent a constant reference to a string, i.e.
bool readsReg() const
readsReg - Returns true if this operand reads the previous value of its register. ...
static bool isVolatile(Instruction *Inst)
static bool isT1i32Store(unsigned Opc)
static int getMemoryOpOffset(const MachineInstr &MI)
const MachineInstrBuilder & addReg(unsigned RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
void setMemRefs(mmo_iterator NewMemRefs, mmo_iterator NewMemRefsEnd)
Assign this MachineInstr's memory reference descriptor list.
static bool ContainsReg(const ArrayRef< std::pair< unsigned, bool >> &Regs, unsigned Reg)
static const MachineInstrBuilder & AddDefaultT1CC(const MachineInstrBuilder &MIB, bool isDead=false)
Properties which a MachineFunction may have at a given point in time.
INITIALIZE_PASS(ARMLoadStoreOpt,"arm-ldst-opt", ARM_LOAD_STORE_OPT_NAME, false, false) static bool definesCPSR(const MachineInstr &MI)
mmo_iterator memoperands_begin() const
Access to memory operands of the instruction.
bool is_contained(R &&Range, const E &Element)
Wrapper function around std::find to detect if an element exists in a container.
MachineInstr * getInstr() const
If conversion operators fail, use this method to get the MachineInstr explicitly. ...