49 #define DEBUG_TYPE "arm-ldst-opt"
51 STATISTIC(NumLDMGened ,
"Number of ldm instructions generated");
52 STATISTIC(NumSTMGened ,
"Number of stm instructions generated");
53 STATISTIC(NumVLDMGened,
"Number of vldm instructions generated");
54 STATISTIC(NumVSTMGened,
"Number of vstm instructions generated");
55 STATISTIC(NumLdStMoved,
"Number of load / store instructions moved");
56 STATISTIC(NumLDRDFormed,
"Number of ldrd created before allocation");
57 STATISTIC(NumSTRDFormed,
"Number of strd created before allocation");
58 STATISTIC(NumLDRD2LDM,
"Number of ldrd instructions turned back into ldm");
59 STATISTIC(NumSTRD2STM,
"Number of strd instructions turned back into stm");
60 STATISTIC(NumLDRD2LDR,
"Number of ldrd instructions turned back into ldr's");
61 STATISTIC(NumSTRD2STR,
"Number of strd instructions turned back into str's");
81 bool RegClassInfoValid;
82 bool isThumb1, isThumb2;
86 const char *getPassName()
const override {
87 return "ARM load / store optimization pass";
93 struct MemOpQueueEntry {
98 : MI(MI), Offset(Offset), Position(Position) {}
104 struct MergeCandidate {
108 unsigned LatestMIIdx;
110 unsigned EarliestMIIdx;
115 bool CanMergeToLSMulti;
117 bool CanMergeToLSDouble;
127 DebugLoc DL,
unsigned Base,
unsigned WordOffset,
137 void FormCandidates(
const MemOpQueue &MemOps);
138 MachineInstr *MergeOpsUpdate(
const MergeCandidate &Cand);
150 for (
const auto &MO : MI->
operands()) {
153 if (MO.isDef() && MO.getReg() == ARM::CPSR && !MO.isDead())
164 bool isAM3 = Opcode == ARM::LDRD || Opcode == ARM::STRD;
168 if (Opcode == ARM::t2LDRi12 || Opcode == ARM::t2LDRi8 ||
169 Opcode == ARM::t2STRi12 || Opcode == ARM::t2STRi8 ||
170 Opcode == ARM::t2LDRDi8 || Opcode == ARM::t2STRDi8 ||
171 Opcode == ARM::LDRi12 || Opcode == ARM::STRi12)
175 if (Opcode == ARM::tLDRi || Opcode == ARM::tSTRi ||
176 Opcode == ARM::tLDRspi || Opcode == ARM::tSTRspi)
292 case ARM::tLDMIA_UPD:
293 case ARM::tSTMIA_UPD:
294 case ARM::t2LDMIA_RET:
296 case ARM::t2LDMIA_UPD:
298 case ARM::t2STMIA_UPD:
300 case ARM::VLDMSIA_UPD:
302 case ARM::VSTMSIA_UPD:
304 case ARM::VLDMDIA_UPD:
306 case ARM::VSTMDIA_UPD:
320 case ARM::t2LDMDB_UPD:
322 case ARM::t2STMDB_UPD:
323 case ARM::VLDMSDB_UPD:
324 case ARM::VSTMSDB_UPD:
325 case ARM::VLDMDDB_UPD:
326 case ARM::VSTMDDB_UPD:
338 return Opc == ARM::tLDRi || Opc == ARM::tLDRspi;
342 return Opc == ARM::t2LDRi12 || Opc == ARM::t2LDRi8;
350 return Opc == ARM::tSTRi || Opc == ARM::tSTRspi;
354 return Opc == ARM::t2STRi12 || Opc == ARM::t2STRi8;
362 return isi32Load(Opc) || Opc == ARM::VLDRS || Opc == ARM::VLDRD;
410 case ARM::tLDMIA_UPD:
411 case ARM::tSTMIA_UPD:
433 assert(isThumb1 &&
"Can only update base register uses for Thumb1!");
436 for (; MBBI != MBB.
end(); ++MBBI) {
437 bool InsertSub =
false;
438 unsigned Opc = MBBI->getOpcode();
440 if (MBBI->readsRegister(Base)) {
443 Opc == ARM::tLDRi || Opc == ARM::tLDRHi || Opc == ARM::tLDRBi;
445 Opc == ARM::tSTRi || Opc == ARM::tSTRHi || Opc == ARM::tSTRBi;
447 if (IsLoad || IsStore) {
453 MBBI->getOperand(MBBI->getDesc().getNumOperands() - 3);
460 if (Offset >= 0 && !(IsStore && InstrSrcReg == Base))
465 }
else if ((Opc == ARM::tSUBi8 || Opc == ARM::tADDi8) &&
471 MBBI->getOperand(MBBI->getDesc().getNumOperands() - 3);
472 Offset = (Opc == ARM::tSUBi8) ?
473 MO.
getImm() + WordOffset * 4 :
474 MO.
getImm() - WordOffset * 4 ;
475 if (Offset >= 0 && TL->isLegalAddImmediate(Offset)) {
490 }
else if (
definesCPSR(MBBI) || MBBI->isCall() || MBBI->isBranch()) {
504 if (MBBI->killsRegister(Base) || MBBI->definesRegister(Base))
515 if (MBBI != MBB.
end()) --MBBI;
517 BuildMI(MBB, MBBI, DL,
TII->get(ARM::tSUBi8), Base),
true)
524 if (!RegClassInfoValid) {
525 RegClassInfo.runOnMachineFunction(*MF);
526 RegClassInfoValid =
true;
529 for (
unsigned Reg : RegClassInfo.getOrder(&RegClass))
530 if (!LiveRegs.contains(
Reg))
541 if (!LiveRegsValid) {
543 LiveRegs.addLiveOuts(&MBB,
true);
544 LiveRegPos = MBB.
end();
545 LiveRegsValid =
true;
548 while (LiveRegPos != Before) {
550 LiveRegs.stepBackward(*LiveRegPos);
556 for (
const std::pair<unsigned, bool> &R : Regs)
569 unsigned NumRegs = Regs.size();
574 bool SafeToClobberCPSR = !isThumb1 ||
578 bool Writeback = isThumb1;
584 assert(Base != ARM::SP &&
"Thumb1 does not allow SP in register list");
585 if (Opcode == ARM::tLDRi) {
587 }
else if (Opcode == ARM::tSTRi) {
595 bool haveIBAndDA = isNotVFP && !isThumb2 && !isThumb1;
597 if (Offset == 4 && haveIBAndDA) {
599 }
else if (Offset == -4 * (
int)NumRegs + 4 && haveIBAndDA) {
601 }
else if (Offset == -4 * (
int)NumRegs && isNotVFP && !isThumb1) {
604 }
else if (Offset != 0 || Opcode == ARM::tLDRspi || Opcode == ARM::tSTRspi) {
617 if (!SafeToClobberCPSR)
624 NewBase = Regs[NumRegs-1].first;
627 moveLiveRegsBefore(MBB, InsertBefore);
631 for (
const std::pair<unsigned, bool> &R : Regs)
632 LiveRegs.addReg(R.first);
634 NewBase = findFreeReg(isThumb1 ? ARM::tGPRRegClass : ARM::GPRRegClass);
640 isThumb2 ? ARM::t2ADDri :
641 (isThumb1 && Base == ARM::SP) ? ARM::tADDrSPi :
642 (isThumb1 && Offset < 8) ? ARM::tADDi3 :
643 isThumb1 ? ARM::tADDi8 : ARM::ADDri;
648 isThumb2 ? ARM::t2SUBri :
649 (isThumb1 && Offset < 8 && Base != ARM::SP) ? ARM::tSUBi3 :
650 isThumb1 ? ARM::tSUBi8 : ARM::SUBri;
653 if (!TL->isLegalAddImmediate(Offset))
659 bool KillOldBase = BaseKill &&
668 if (Base != NewBase &&
669 (BaseOpc == ARM::tADDi8 || BaseOpc == ARM::tSUBi8)) {
676 BuildMI(MBB, InsertBefore, DL,
TII->get(ARM::tMOVSr), NewBase)
679 BuildMI(MBB, InsertBefore, DL,
TII->get(ARM::tMOVr), NewBase)
681 .addImm(Pred).
addReg(PredReg);
687 if (BaseOpc == ARM::tADDrSPi) {
688 assert(Offset % 4 == 0 &&
"tADDrSPi offset is scaled by 4");
689 BuildMI(MBB, InsertBefore, DL,
TII->get(BaseOpc), NewBase)
694 BuildMI(MBB, InsertBefore, DL,
TII->get(BaseOpc), NewBase),
true)
698 BuildMI(MBB, InsertBefore, DL,
TII->get(BaseOpc), NewBase)
722 if (isThumb1 && !SafeToClobberCPSR && Writeback && !BaseKill)
728 if (Opcode == ARM::tLDMIA)
730 Opcode = ARM::tLDMIA_UPD;
732 MIB =
BuildMI(MBB, InsertBefore, DL,
TII->get(Opcode));
741 UpdateBaseRegUses(MBB, InsertBefore, DL, Base, NumRegs, Pred, PredReg);
745 MIB =
BuildMI(MBB, InsertBefore, DL,
TII->get(Opcode));
751 for (
const std::pair<unsigned, bool> &R : Regs)
762 assert((IsLoad ||
isi32Store(Opcode)) &&
"Must have integer load or store");
763 unsigned LoadStoreOpcode = IsLoad ? ARM::t2LDRDi8 : ARM::t2STRDi8;
765 assert(Regs.size() == 2);
767 TII->get(LoadStoreOpcode));
780 MachineInstr *ARMLoadStoreOpt::MergeOpsUpdate(
const MergeCandidate &Cand) {
791 bool IsKill = MO.
isKill();
794 Regs.
push_back(std::make_pair(Reg, IsKill));
804 unsigned DefReg = MO.
getReg();
806 if (std::find(ImpDefs.
begin(), ImpDefs.
end(), DefReg) != ImpDefs.
end())
819 iterator InsertBefore = std::next(iterator(LatestMI));
824 unsigned PredReg = 0;
828 if (Cand.CanMergeToLSDouble)
829 Merged = CreateLoadStoreDouble(MBB, InsertBefore, Offset, Base, BaseKill,
830 Opcode, Pred, PredReg, DL, Regs);
831 if (!Merged && Cand.CanMergeToLSMulti)
832 Merged = CreateLoadStoreMulti(MBB, InsertBefore, Offset, Base, BaseKill,
833 Opcode, Pred, PredReg, DL, Regs);
839 iterator EarliestI(Cand.Instrs[Cand.EarliestMIIdx]);
840 bool EarliestAtBegin =
false;
841 if (EarliestI == MBB.
begin()) {
842 EarliestAtBegin =
true;
844 EarliestI = std::prev(EarliestI);
853 EarliestI = MBB.
begin();
855 EarliestI = std::next(EarliestI);
856 auto FixupRange =
make_range(EarliestI, iterator(Merged));
862 for (
unsigned &ImpDefReg : ImpDefs) {
875 for (
unsigned ImpDef : ImpDefs)
879 assert(
isi32Store(Opcode) || Opcode == ARM::VSTRS || Opcode == ARM::VSTRD);
888 assert(ImpDefs.empty());
898 return (Value % 4) == 0 && Value < 1024;
902 void ARMLoadStoreOpt::FormCandidates(
const MemOpQueue &MemOps) {
909 unsigned EIndex = MemOps.size();
913 int Offset = MemOps[SIndex].Offset;
915 unsigned PReg = PMO.
getReg();
916 unsigned PRegNum = PMO.
isUndef() ? UINT_MAX : TRI->getEncodingValue(PReg);
917 unsigned Latest = SIndex;
918 unsigned Earliest = SIndex;
920 bool CanMergeToLSDouble =
924 if (STI->isCortexM3() &&
isi32Load(Opcode) &&
926 CanMergeToLSDouble =
false;
928 bool CanMergeToLSMulti =
true;
931 if (STI->isSwift() && !isNotVFP && (PRegNum % 2) == 1)
932 CanMergeToLSMulti =
false;
936 if (PReg == ARM::SP || PReg == ARM::PC)
937 CanMergeToLSMulti = CanMergeToLSDouble =
false;
940 for (
unsigned I = SIndex+1;
I < EIndex; ++
I, ++Count) {
941 int NewOffset = MemOps[
I].Offset;
942 if (NewOffset != Offset + (
int)Size)
945 unsigned Reg = MO.
getReg();
946 if (Reg == ARM::SP || Reg == ARM::PC)
950 unsigned RegNum = MO.
isUndef() ? UINT_MAX : TRI->getEncodingValue(Reg);
951 bool PartOfLSMulti = CanMergeToLSMulti;
954 if (RegNum <= PRegNum)
955 PartOfLSMulti =
false;
959 else if (!isNotVFP && RegNum != PRegNum+1)
960 PartOfLSMulti =
false;
963 bool PartOfLSDouble = CanMergeToLSDouble && Count <= 1;
965 if (!PartOfLSMulti && !PartOfLSDouble)
967 CanMergeToLSMulti &= PartOfLSMulti;
968 CanMergeToLSDouble &= PartOfLSDouble;
971 unsigned Position = MemOps[
I].Position;
972 if (Position < MemOps[Latest].Position)
974 else if (Position > MemOps[Earliest].Position)
982 MergeCandidate *Candidate =
new(Allocator.Allocate()) MergeCandidate;
983 for (
unsigned C = SIndex, CE = SIndex + Count;
C <
CE; ++
C)
984 Candidate->Instrs.push_back(MemOps[C].MI);
985 Candidate->LatestMIIdx = Latest - SIndex;
986 Candidate->EarliestMIIdx = Earliest - SIndex;
987 Candidate->InsertPos = MemOps[Latest].Position;
989 CanMergeToLSMulti = CanMergeToLSDouble =
false;
990 Candidate->CanMergeToLSMulti = CanMergeToLSMulti;
991 Candidate->CanMergeToLSDouble = CanMergeToLSDouble;
992 Candidates.push_back(Candidate);
995 }
while (SIndex < EIndex);
999 unsigned Bytes,
unsigned Limit,
1001 unsigned MyPredReg = 0;
1005 bool CheckCPSRDef =
false;
1007 default:
return false;
1011 CheckCPSRDef =
true;
1018 if (Bytes == 0 || (Limit && Bytes >= Limit))
1021 unsigned Scale = (MI->
getOpcode() == ARM::tSUBspi ||
1022 MI->
getOpcode() == ARM::tSUBi8) ? 4 : 1;
1027 MyPredReg == PredReg))
1034 unsigned Bytes,
unsigned Limit,
1036 unsigned MyPredReg = 0;
1040 bool CheckCPSRDef =
false;
1042 default:
return false;
1046 CheckCPSRDef =
true;
1052 if (Bytes == 0 || (Limit && Bytes >= Limit))
1056 unsigned Scale = (MI->
getOpcode() == ARM::tADDspi ||
1057 MI->
getOpcode() == ARM::tADDi8) ? 4 : 1;
1062 MyPredReg == PredReg))
1147 bool ARMLoadStoreOpt::MergeBaseUpdateLSMultiple(
MachineInstr *MI) {
1149 if (isThumb1)
return false;
1152 unsigned Base = BaseOP.
getReg();
1153 bool BaseKill = BaseOP.
isKill();
1155 unsigned PredReg = 0;
1166 bool DoMerge =
false;
1173 if (MBBI != BeginMBBI) {
1175 while (PrevMBBI != BeginMBBI && PrevMBBI->isDebugValue())
1187 MBB.
erase(PrevMBBI);
1192 if (!DoMerge && MBBI != EndMBBI) {
1194 while (NextMBBI != EndMBBI && NextMBBI->isDebugValue())
1204 MBB.
erase(NextMBBI);
1214 .addImm(Pred).
addReg(PredReg);
1217 for (
unsigned OpNum = 3, e = MI->
getNumOperands(); OpNum != e; ++OpNum)
1231 return ARM::LDR_PRE_IMM;
1233 return ARM::STR_PRE_IMM;
1235 return Mode ==
ARM_AM::add ? ARM::VLDMSIA_UPD : ARM::VLDMSDB_UPD;
1237 return Mode ==
ARM_AM::add ? ARM::VLDMDIA_UPD : ARM::VLDMDDB_UPD;
1239 return Mode ==
ARM_AM::add ? ARM::VSTMSIA_UPD : ARM::VSTMSDB_UPD;
1241 return Mode ==
ARM_AM::add ? ARM::VSTMDIA_UPD : ARM::VSTMDDB_UPD;
1244 return ARM::t2LDR_PRE;
1247 return ARM::t2STR_PRE;
1256 return ARM::LDR_POST_IMM;
1258 return ARM::STR_POST_IMM;
1260 return Mode ==
ARM_AM::add ? ARM::VLDMSIA_UPD : ARM::VLDMSDB_UPD;
1262 return Mode ==
ARM_AM::add ? ARM::VLDMDIA_UPD : ARM::VLDMDDB_UPD;
1264 return Mode ==
ARM_AM::add ? ARM::VSTMSIA_UPD : ARM::VSTMSDB_UPD;
1266 return Mode ==
ARM_AM::add ? ARM::VSTMDIA_UPD : ARM::VSTMDDB_UPD;
1269 return ARM::t2LDR_POST;
1272 return ARM::t2STR_POST;
1279 bool ARMLoadStoreOpt::MergeBaseUpdateLoadStore(
MachineInstr *MI) {
1282 if (isThumb1)
return false;
1289 bool isAM5 = (Opcode == ARM::VLDRD || Opcode == ARM::VLDRS ||
1290 Opcode == ARM::VSTRD || Opcode == ARM::VSTRS);
1291 bool isAM2 = (Opcode == ARM::LDRi12 || Opcode == ARM::STRi12);
1304 unsigned PredReg = 0;
1306 bool DoMerge =
false;
1308 unsigned NewOpc = 0;
1310 unsigned Limit = isAM5 ? 0 : (isAM2 ? 0x1000 : 0x100);
1316 if (MBBI != BeginMBBI) {
1318 while (PrevMBBI != BeginMBBI && PrevMBBI->isDebugValue())
1323 }
else if (!isAM5 &&
1329 MBB.
erase(PrevMBBI);
1335 if (!DoMerge && MBBI != EndMBBI) {
1337 while (NextMBBI != EndMBBI && NextMBBI->isDebugValue())
1348 MBB.
erase(NextMBBI);
1364 .addImm(Pred).
addReg(PredReg)
1370 if (NewOpc == ARM::LDR_PRE_IMM || NewOpc == ARM::LDRB_PRE_IMM) {
1371 int Offset = AddSub ==
ARM_AM::sub ? -Bytes : Bytes;
1382 int Offset = AddSub ==
ARM_AM::sub ? -Bytes : Bytes;
1393 if (isAM2 && NewOpc == ARM::STR_POST_IMM) {
1396 BuildMI(MBB, MBBI, DL,
TII->get(NewOpc), Base)
1400 int Offset = AddSub ==
ARM_AM::sub ? -Bytes : Bytes;
1402 BuildMI(MBB, MBBI, DL,
TII->get(NewOpc), Base)
1469 int Offset,
bool isDef,
1471 unsigned Reg,
bool RegDeadKill,
bool RegUndef,
1472 unsigned BaseReg,
bool BaseKill,
bool BaseUndef,
1473 bool OffKill,
bool OffUndef,
1495 if (Opcode != ARM::LDRD && Opcode != ARM::STRD && Opcode != ARM::t2LDRDi8)
1499 unsigned BaseReg = BaseOp.
getReg();
1502 unsigned EvenRegNum = TRI->getDwarfRegNum(EvenReg,
false);
1503 unsigned OddRegNum = TRI->getDwarfRegNum(OddReg,
false);
1507 bool Errata602117 = EvenReg == BaseReg &&
1508 (Opcode == ARM::LDRD || Opcode == ARM::t2LDRDi8) && STI->isCortexM3();
1510 bool NonConsecutiveRegs = (Opcode == ARM::LDRD || Opcode == ARM::STRD) &&
1511 (EvenRegNum % 2 != 0 || EvenRegNum + 1 != OddRegNum);
1513 if (!Errata602117 && !NonConsecutiveRegs)
1516 bool isT2 = Opcode == ARM::t2LDRDi8 || Opcode == ARM::t2STRDi8;
1517 bool isLd = Opcode == ARM::LDRD || Opcode == ARM::t2LDRDi8;
1518 bool EvenDeadKill = isLd ?
1521 bool OddDeadKill = isLd ?
1524 bool BaseKill = BaseOp.
isKill();
1525 bool BaseUndef = BaseOp.
isUndef();
1529 unsigned PredReg = 0;
1532 if (OddRegNum > EvenRegNum && OffImm == 0) {
1535 unsigned NewOpc = (isLd)
1536 ? (isT2 ? ARM::t2LDMIA : ARM::LDMIA)
1537 : (isT2 ? ARM::t2STMIA : ARM::STMIA);
1539 BuildMI(MBB, MBBI, MBBI->getDebugLoc(),
TII->get(NewOpc))
1541 .addImm(Pred).
addReg(PredReg)
1546 BuildMI(MBB, MBBI, MBBI->getDebugLoc(),
TII->get(NewOpc))
1548 .addImm(Pred).
addReg(PredReg)
1557 unsigned NewOpc = (isLd)
1558 ? (isT2 ? (OffImm < 0 ? ARM::t2LDRi8 : ARM::t2LDRi12) : ARM::LDRi12)
1559 : (isT2 ? (OffImm < 0 ? ARM::t2STRi8 : ARM::t2STRi12) : ARM::STRi12);
1562 unsigned NewOpc2 = (isLd)
1563 ? (isT2 ? (OffImm+4 < 0 ? ARM::t2LDRi8 : ARM::t2LDRi12) : ARM::LDRi12)
1564 : (isT2 ? (OffImm+4 < 0 ? ARM::t2STRi8 : ARM::t2STRi12) : ARM::STRi12);
1569 (BaseKill || OffKill) &&
1570 (TRI->regsOverlap(EvenReg, BaseReg))) {
1571 assert(!TRI->regsOverlap(OddReg, BaseReg));
1573 OddReg, OddDeadKill,
false,
1574 BaseReg,
false, BaseUndef,
false, OffUndef,
1575 Pred, PredReg,
TII, isT2);
1577 EvenReg, EvenDeadKill,
false,
1578 BaseReg, BaseKill, BaseUndef, OffKill, OffUndef,
1579 Pred, PredReg,
TII, isT2);
1581 if (OddReg == EvenReg && EvenDeadKill) {
1585 EvenDeadKill =
false;
1589 if (EvenReg == BaseReg)
1590 EvenDeadKill =
false;
1592 EvenReg, EvenDeadKill, EvenUndef,
1593 BaseReg,
false, BaseUndef,
false, OffUndef,
1594 Pred, PredReg,
TII, isT2);
1596 OddReg, OddDeadKill, OddUndef,
1597 BaseReg, BaseKill, BaseUndef, OffKill, OffUndef,
1598 Pred, PredReg,
TII, isT2);
1606 MBBI = MBB.
erase(MBBI);
1614 unsigned CurrBase = 0;
1615 unsigned CurrOpc = ~0u;
1617 unsigned Position = 0;
1618 assert(Candidates.size() == 0);
1619 LiveRegsValid =
false;
1624 MBBI = std::prev(
I);
1625 if (FixInvalidRegPairOp(MBB, MBBI))
1630 unsigned Opcode = MBBI->getOpcode();
1632 unsigned Reg = MO.
getReg();
1634 unsigned PredReg = 0;
1637 if (CurrBase == 0) {
1642 MemOps.push_back(MemOpQueueEntry(MBBI, Offset, Position));
1646 if (CurrOpc == Opcode && CurrBase == Base && CurrPred == Pred) {
1654 bool Overlap =
false;
1656 Overlap = (Base ==
Reg);
1658 for (
const MemOpQueueEntry &E : MemOps) {
1659 if (TRI->regsOverlap(Reg, E.MI->getOperand(0).getReg())) {
1669 if (Offset > MemOps.back().Offset) {
1670 MemOps.push_back(MemOpQueueEntry(MBBI, Offset, Position));
1673 MemOpQueue::iterator
MI, ME;
1674 for (MI = MemOps.begin(), ME = MemOps.end(); MI != ME; ++
MI) {
1675 if (Offset < MI->Offset) {
1679 if (Offset == MI->Offset) {
1685 if (MI != MemOps.end()) {
1686 MemOps.insert(MI, MemOpQueueEntry(MBBI, Offset, Position));
1697 }
else if (MBBI->isDebugValue())
1701 if (MemOps.size() > 0) {
1702 FormCandidates(MemOps);
1710 if (MemOps.size() > 0)
1711 FormCandidates(MemOps);
1715 auto LessThan = [](
const MergeCandidate* M0,
const MergeCandidate *M1) {
1716 return M0->InsertPos < M1->InsertPos;
1718 std::sort(Candidates.begin(), Candidates.end(), LessThan);
1721 bool Changed =
false;
1722 for (
const MergeCandidate *Candidate : Candidates) {
1723 if (Candidate->CanMergeToLSMulti || Candidate->CanMergeToLSDouble) {
1729 if (Opcode != ARM::t2STRDi8 && Opcode != ARM::t2LDRDi8)
1730 MergeBaseUpdateLSMultiple(Merged);
1733 if (MergeBaseUpdateLoadStore(MI))
1738 assert(Candidate->Instrs.size() == 1);
1739 if (MergeBaseUpdateLoadStore(Candidate->Instrs.front()))
1760 if (isThumb1)
return false;
1761 if (MBB.
empty())
return false;
1764 if (MBBI != MBB.
begin() &&
1765 (MBBI->getOpcode() == ARM::BX_RET ||
1766 MBBI->getOpcode() == ARM::tBX_RET ||
1767 MBBI->getOpcode() == ARM::MOVPCLR)) {
1770 if (Opcode == ARM::LDMIA_UPD || Opcode == ARM::LDMDA_UPD ||
1771 Opcode == ARM::LDMDB_UPD || Opcode == ARM::LDMIB_UPD ||
1772 Opcode == ARM::t2LDMIA_UPD || Opcode == ARM::t2LDMDB_UPD) {
1774 if (MO.
getReg() != ARM::LR)
1776 unsigned NewOpc = (isThumb2 ? ARM::t2LDMIA_RET : ARM::LDMIA_RET);
1777 assert(((isThumb2 && Opcode == ARM::t2LDMIA_UPD) ||
1778 Opcode == ARM::LDMIA_UPD) &&
"Unsupported multiple load-return!");
1794 TII = STI->getInstrInfo();
1797 RegClassInfoValid =
false;
1798 isThumb2 = AFI->isThumb2Function();
1799 isThumb1 = AFI->isThumbFunction() && !isThumb2;
1801 bool Modified =
false;
1805 Modified |= LoadStoreMultipleOpti(MBB);
1806 if (STI->hasV5TOps())
1807 Modified |= MergeReturnIntoLDM(MBB);
1810 Allocator.DestroyAll();
1830 const char *getPassName()
const override {
1831 return "ARM pre- register allocation load / store optimization pass";
1836 unsigned &NewOpc,
unsigned &EvenReg,
1837 unsigned &OddReg,
unsigned &BaseReg,
1843 unsigned Base,
bool isLd,
1850 bool ARMPreAllocLoadStoreOpt::runOnMachineFunction(
MachineFunction &Fn) {
1853 TII = STI->getInstrInfo();
1854 TRI = STI->getRegisterInfo();
1858 bool Modified =
false;
1861 Modified |= RescheduleLoadStoreInstrs(MFI);
1877 if (I->isDebugValue() || MemOps.
count(&*I))
1879 if (I->isCall() || I->isTerminator() || I->hasUnmodeledSideEffects())
1881 if (isLd && I->mayStore())
1893 for (
unsigned j = 0, NumOps = I->getNumOperands(); j != NumOps; ++j) {
1897 unsigned Reg = MO.
getReg();
1900 if (Reg != Base && !MemRegs.
count(Reg))
1901 AddedRegPressure.
insert(Reg);
1906 if (MemRegs.
size() <= 4)
1909 return AddedRegPressure.
size() <= MemRegs.
size() * 2;
1933 unsigned &SecondReg,
1934 unsigned &BaseReg,
int &Offset,
1939 if (!STI->hasV5TEOps())
1945 if (Opcode == ARM::LDRi12) {
1947 }
else if (Opcode == ARM::STRi12) {
1949 }
else if (Opcode == ARM::t2LDRi8 || Opcode == ARM::t2LDRi12) {
1950 NewOpc = ARM::t2LDRDi8;
1953 }
else if (Opcode == ARM::t2STRi8 || Opcode == ARM::t2STRi12) {
1954 NewOpc = ARM::t2STRDi8;
1970 unsigned ReqAlign = STI->hasV6Ops()
1973 if (Align < ReqAlign)
1979 int Limit = (1 << 8) * Scale;
1980 if (OffImm >= Limit || (OffImm <= -Limit) || (OffImm & (Scale-1)))
1989 int Limit = (1 << 8) * Scale;
1990 if (OffImm >= Limit || (OffImm & (Scale-1)))
1996 if (FirstReg == SecondReg)
2006 unsigned Base,
bool isLd,
2008 bool RetVal =
false;
2015 assert(LHS == RHS || LOffset != ROffset);
2016 return LOffset > ROffset;
2023 while (Ops.
size() > 1) {
2024 unsigned FirstLoc = ~0U;
2025 unsigned LastLoc = 0;
2029 unsigned LastOpcode = 0;
2030 unsigned LastBytes = 0;
2031 unsigned NumMove = 0;
2032 for (
int i = Ops.
size() - 1; i >= 0; --i) {
2034 unsigned Loc = MI2LocMap[Op];
2035 if (Loc <= FirstLoc) {
2039 if (Loc >= LastLoc) {
2046 if (LastOpcode && LSMOpcode != LastOpcode)
2052 if (Bytes != LastBytes || Offset != (LastOffset + (
int)Bytes))
2055 LastOffset = Offset;
2057 LastOpcode = LSMOpcode;
2067 for (
int i = NumMove-1; i >= 0; --i) {
2074 bool DoMove = (LastLoc - FirstLoc) <= NumMove*4;
2077 MemOps, MemRegs, TRI);
2079 for (
unsigned i = 0; i != NumMove; ++i)
2084 while (InsertPos != MBB->
end()
2085 && (MemOps.
count(InsertPos) || InsertPos->isDebugValue()))
2092 unsigned FirstReg = 0, SecondReg = 0;
2093 unsigned BaseReg = 0, PredReg = 0;
2096 unsigned NewOpc = 0;
2099 if (NumMove == 2 && CanFormLdStDWord(Op0, Op1, dl, NewOpc,
2100 FirstReg, SecondReg, BaseReg,
2101 Offset, PredReg, Pred, isT2)) {
2107 MRI->constrainRegClass(FirstReg, TRC);
2108 MRI->constrainRegClass(SecondReg, TRC);
2123 DEBUG(
dbgs() <<
"Formed " << *MIB <<
"\n");
2137 DEBUG(
dbgs() <<
"Formed " << *MIB <<
"\n");
2149 for (
unsigned i = 0; i != NumMove; ++i) {
2152 MBB->
splice(InsertPos, MBB, Op);
2156 NumLdStMoved += NumMove;
2167 bool RetVal =
false;
2179 for (; MBBI != E; ++MBBI) {
2188 MI2LocMap[MI] = ++Loc;
2192 unsigned PredReg = 0;
2201 bool StopHere =
false;
2204 Base2LdsMap.
find(Base);
2205 if (BI != Base2LdsMap.
end()) {
2206 for (
unsigned i = 0, e = BI->second.
size(); i != e; ++i) {
2213 BI->second.push_back(MI);
2215 Base2LdsMap[Base].push_back(MI);
2220 Base2StsMap.
find(Base);
2221 if (BI != Base2StsMap.
end()) {
2222 for (
unsigned i = 0, e = BI->second.
size(); i != e; ++i) {
2229 BI->second.push_back(MI);
2231 Base2StsMap[Base].push_back(MI);
2245 for (
unsigned i = 0, e = LdBases.
size(); i != e; ++i) {
2246 unsigned Base = LdBases[i];
2249 RetVal |= RescheduleOps(MBB, Lds, Base,
true, MI2LocMap);
2253 for (
unsigned i = 0, e = StBases.
size(); i != e; ++i) {
2254 unsigned Base = StBases[i];
2257 RetVal |= RescheduleOps(MBB, Sts, Base,
false, MI2LocMap);
2261 Base2LdsMap.
clear();
2262 Base2StsMap.
clear();
2275 return new ARMPreAllocLoadStoreOpt();
2276 return new ARMLoadStoreOpt();
static unsigned getPreIndexedLoadStoreOpcode(unsigned Opc, ARM_AM::AddrOpc Mode)
unsigned succ_size() const
void push_back(const T &Elt)
const MachineFunction * getParent() const
getParent - Return the MachineFunction containing this basic block.
A parsed version of the target data layout string in and methods for querying it. ...
static bool isT1i32Load(unsigned Opc)
LLVMContext & getContext() const
getContext - Return a reference to the LLVMContext associated with this function. ...
static unsigned char getAM3Offset(unsigned AM3Opc)
STATISTIC(NumFunctions,"Total number of functions")
iterator_range< mop_iterator > uses()
DenseSet - This implements a dense probed hash-table based set.
static unsigned getImmScale(unsigned Opc)
Describe properties that are true of each instruction in the target description file.
void setIsUndef(bool Val=true)
const ARMTargetLowering * getTargetLowering() const override
size_type count(PtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
static bool isMatchingIncrement(MachineInstr *MI, unsigned Base, unsigned Bytes, unsigned Limit, ARMCC::CondCodes Pred, unsigned PredReg)
const MCInstrDesc & getDesc() const
Returns the target instruction descriptor of this MachineInstr.
static IntegerType * getInt64Ty(LLVMContext &C)
iterator_range< mop_iterator > operands()
bool isTerminator(QueryType Type=AnyInBundle) const
Returns true if this instruction part of the terminator for a basic block.
This file defines the MallocAllocator and BumpPtrAllocator interfaces.
A templated base class for SmallPtrSet which provides the typesafe interface that is common across al...
MachineMemOperand - A description of a memory reference used in the backend.
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
instr_iterator erase(instr_iterator I)
Remove an instruction from the instruction list and delete it.
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
const HexagonInstrInfo * TII
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
bool isReg() const
isReg - Tests if this is a MO_Register operand.
static int getMemoryOpOffset(const MachineInstr *MI)
Reg
All possible values of the reg field in the ModR/M byte.
Windows NT (Windows on ARM)
const MachineInstrBuilder & addImm(int64_t Val) const
addImm - Add a new immediate operand.
This class defines information used to lower LLVM code to legal SelectionDAG operators that the targe...
unsigned getNumOperands() const
Access to explicit operands of the instruction.
static bool IsSafeAndProfitableToMove(bool isLd, unsigned Base, MachineBasicBlock::iterator I, MachineBasicBlock::iterator E, SmallPtrSetImpl< MachineInstr * > &MemOps, SmallSet< unsigned, 4 > &MemRegs, const TargetRegisterInfo *TRI)
const HexagonRegisterInfo & getRegisterInfo() const
getRegisterInfo - TargetInstrInfo is a superset of MRegister info.
static ARM_AM::AMSubMode getLoadStoreMultipleSubMode(unsigned Opcode)
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory)...
uint64_t getAlignment() const
getAlignment - Return the minimum known alignment in bytes of the actual memory reference.
iterator getLastNonDebugInstr()
getLastNonDebugInstr - returns an iterator to the last non-debug instruction in the basic block...
unsigned getUndefRegState(bool B)
LivenessQueryResult computeRegisterLiveness(const TargetRegisterInfo *TRI, unsigned Reg, const_iterator Before, unsigned Neighborhood=10) const
Return whether (physical) register Reg has been <def>ined and not <kill>ed as of just before Before...
unsigned getKillRegState(bool B)
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
static unsigned getPostIndexedLoadStoreOpcode(unsigned Opc, ARM_AM::AddrOpc Mode)
const MachineBasicBlock * getParent() const
TargetInstrInfo - Interface to description of machine instruction set.
bool isDebugValue() const
unsigned getDeadRegState(bool B)
mmo_iterator memoperands_end() const
unsigned getDefRegState(bool B)
bundle_iterator< MachineInstr, instr_iterator > iterator
bool regsOverlap(unsigned regA, unsigned regB) const
regsOverlap - Returns true if the two registers are equal or alias each other.
static unsigned getAM2Opc(AddrOpc Opc, unsigned Imm12, ShiftOpc SO, unsigned IdxMode=0)
size_type count(const ValueT &V) const
Return 1 if the specified key is in the set, 0 otherwise.
ARMCC::CondCodes getInstrPredicate(const MachineInstr *MI, unsigned &PredReg)
getInstrPredicate - If instruction is predicated, returns its predicate condition, otherwise returns AL.
static unsigned char getAM5Offset(unsigned AM5Opc)
static bool isT2i32Load(unsigned Opc)
SmallSet - This maintains a set of unique values, optimizing for the case when the set is small (less...
const MachineOperand & getOperand(unsigned i) const
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
Register is known to be dead.
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
void setImm(int64_t immVal)
bool hasOneMemOperand() const
Return true if this instruction has exactly one MachineMemOperand.
FunctionPass class - This class is used to implement most global optimizations.
std::pair< NoneType, bool > insert(const T &V)
insert - Insert an element into the set if it isn't already there.
MachineInstrBuilder BuildMI(MachineFunction &MF, DebugLoc DL, const MCInstrDesc &MCID)
BuildMI - Builder interface.
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
const MCInstrDesc & get(unsigned Opcode) const
Return the machine instruction descriptor that corresponds to the specified instruction opcode...
FunctionPass * createARMLoadStoreOptimizationPass(bool PreAlloc=false)
Returns an instance of the load / store optimization pass.
void setIsKill(bool Val=true)
size_type count(const T &V) const
count - Return 1 if the element is in the set, 0 otherwise.
static bool isi32Store(unsigned Opc)
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
bool readsRegister(unsigned Reg, const TargetRegisterInfo *TRI=nullptr) const
Return true if the MachineInstr reads the specified register.
bool memoperands_empty() const
std::pair< iterator, bool > insert(const ValueT &V)
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements...
void setDesc(const MCInstrDesc &tid)
Replace the instruction descriptor (thus opcode) of the current instruction with a new one...
const DataLayout * getDataLayout() const
Deprecated in 3.7, will be removed in 3.8.
MachineOperand class - Representation of each machine instruction operand.
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small...
A BumpPtrAllocator that allows only elements of a specific type to be allocated.
static cl::opt< AlignMode > Align(cl::desc("Load/store alignment support"), cl::Hidden, cl::init(NoStrictAlign), cl::values(clEnumValN(StrictAlign,"aarch64-strict-align","Disallow all unaligned memory accesses"), clEnumValN(NoStrictAlign,"aarch64-no-strict-align","Allow unaligned memory accesses"), clEnumValEnd))
static bool isMemoryOp(const MachineInstr *MI)
Returns true if instruction is a memory operation that this pass is capable of operating on...
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
static unsigned getReg(const void *D, unsigned RC, unsigned RegNo)
static AddrOpc getAM3Op(unsigned AM3Opc)
static unsigned getAM3Opc(AddrOpc Opc, unsigned char Offset, unsigned IdxMode=0)
getAM3Opc - This function encodes the addrmode3 opc field.
iterator_range< mop_iterator > implicit_operands()
static const MachineOperand & getLoadStoreBaseOp(const MachineInstr &MI)
const DebugLoc & getDebugLoc() const
Returns the debug location id of this MachineInstr.
MachineRegisterInfo - Keep track of information for virtual and physical registers, including vreg register classes, use/def chains for registers, etc.
static bool isARMLowRegister(unsigned Reg)
isARMLowRegister - Returns true if the register is a low register (r0-r7).
Representation of each machine instruction.
static bool isT2i32Store(unsigned Opc)
bundle_iterator< const MachineInstr, const_instr_iterator > const_iterator
void copyImplicitOps(MachineFunction &MF, const MachineInstr *MI)
Copy implicit register operands from specified instruction to this instruction.
static bool definesCPSR(const MachineInstr *MI)
static bool isValidLSDoubleOffset(int Offset)
void splice(iterator Where, MachineBasicBlock *Other, iterator From)
Take an instruction from MBB 'Other' at the position From, and insert it into this MBB right before '...
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
A set of live physical registers with functions to track liveness when walking backward/forward throu...
ARMFunctionInfo - This class is derived from MachineFunctionInfo and contains private ARM-specific in...
void setReg(unsigned Reg)
Change the register this operand corresponds to.
bool isCall(QueryType Type=AnyInBundle) const
APFloat abs(APFloat X)
Returns the absolute value of the argument.
iterator find(const KeyT &Val)
const TargetMachine & getTarget() const
getTarget - Return the target machine this machine code is compiled with
static bool isLoadSingle(unsigned Opc)
static AddrOpc getAM5Op(unsigned AM5Opc)
static unsigned getUpdatingLSMultipleOpcode(unsigned Opc, ARM_AM::AMSubMode Mode)
unsigned getReg() const
getReg - Returns the register number.
static bool isi32Load(unsigned Opc)
bool killsRegister(unsigned Reg, const TargetRegisterInfo *TRI=nullptr) const
Return true if the MachineInstr kills the specified register.
static unsigned getLSMultipleTransferSize(const MachineInstr *MI)
static bool isMatchingDecrement(MachineInstr *MI, unsigned Base, unsigned Bytes, unsigned Limit, ARMCC::CondCodes Pred, unsigned PredReg)
LLVM Value Representation.
static void concatenateMemOperands(MachineInstr *MI, MachineInstr *Op0, MachineInstr *Op1)
Copy Op0 and Op1 operands into a new array assigned to MI.
unsigned getNumOperands() const
Return the number of declared MachineOperands for this MachineInstruction.
static int getLoadStoreMultipleOpcode(unsigned Opcode, ARM_AM::AMSubMode Mode)
const MachineInstrBuilder & addOperand(const MachineOperand &MO) const
BasicBlockListType::iterator iterator
static const MachineOperand & getLoadStoreRegOp(const MachineInstr &MI)
C - The default llvm calling convention, compatible with C.
bool readsReg() const
readsReg - Returns true if this operand reads the previous value of its register. ...
static bool isVolatile(Instruction *Inst)
static bool isT1i32Store(unsigned Opc)
const MachineInstrBuilder & addReg(unsigned RegNo, unsigned flags=0, unsigned SubReg=0) const
addReg - Add a new virtual register operand...
MachineInstr::mmo_iterator allocateMemRefsArray(unsigned long Num)
allocateMemRefsArray - Allocate an array to hold MachineMemOperand pointers.
void setMemRefs(mmo_iterator NewMemRefs, mmo_iterator NewMemRefsEnd)
Assign this MachineInstr's memory reference descriptor list.
static bool ContainsReg(const ArrayRef< std::pair< unsigned, bool >> &Regs, unsigned Reg)
static const MachineInstrBuilder & AddDefaultT1CC(const MachineInstrBuilder &MIB, bool isDead=false)
mmo_iterator memoperands_begin() const
Access to memory operands of the instruction.
static void InsertLDR_STR(MachineBasicBlock &MBB, MachineBasicBlock::iterator &MBBI, int Offset, bool isDef, DebugLoc DL, unsigned NewOpc, unsigned Reg, bool RegDeadKill, bool RegUndef, unsigned BaseReg, bool BaseKill, bool BaseUndef, bool OffKill, bool OffUndef, ARMCC::CondCodes Pred, unsigned PredReg, const TargetInstrInfo *TII, bool isT2)
MachineInstr * getInstr() const
If conversion operators fail, use this method to get the MachineInstr explicitly. ...