72#define DEBUG_TYPE "arm-ldst-opt"
74STATISTIC(NumLDMGened ,
"Number of ldm instructions generated");
75STATISTIC(NumSTMGened ,
"Number of stm instructions generated");
76STATISTIC(NumVLDMGened,
"Number of vldm instructions generated");
77STATISTIC(NumVSTMGened,
"Number of vstm instructions generated");
78STATISTIC(NumLdStMoved,
"Number of load / store instructions moved");
79STATISTIC(NumLDRDFormed,
"Number of ldrd created before allocation");
80STATISTIC(NumSTRDFormed,
"Number of strd created before allocation");
81STATISTIC(NumLDRD2LDM,
"Number of ldrd instructions turned back into ldm");
82STATISTIC(NumSTRD2STM,
"Number of strd instructions turned back into stm");
83STATISTIC(NumLDRD2LDR,
"Number of ldrd instructions turned back into ldr's");
84STATISTIC(NumSTRD2STR,
"Number of strd instructions turned back into str's");
95#define ARM_LOAD_STORE_OPT_NAME "ARM load / store optimization pass"
101struct ARMLoadStoreOpt {
112 bool RegClassInfoValid;
113 bool isThumb1, isThumb2;
120 struct MemOpQueueEntry {
132 struct MergeCandidate {
137 unsigned LatestMIIdx;
140 unsigned EarliestMIIdx;
147 bool CanMergeToLSMulti;
150 bool CanMergeToLSDouble;
161 unsigned Base,
unsigned WordOffset,
168 ArrayRef<std::pair<unsigned, bool>> Regs,
175 ArrayRef<std::pair<unsigned, bool>> Regs,
177 void FormCandidates(
const MemOpQueue &MemOps);
178 MachineInstr *MergeOpsUpdate(
const MergeCandidate &Cand);
203char ARMLoadStoreOptLegacy::ID = 0;
211 for (
const auto &MO :
MI.operands()) {
214 if (MO.isDef() && MO.getReg() == ARM::CPSR && !MO.isDead())
224 unsigned Opcode =
MI.getOpcode();
225 bool isAM3 = Opcode == ARM::LDRD || Opcode == ARM::STRD;
226 unsigned NumOperands =
MI.getDesc().getNumOperands();
227 unsigned OffField =
MI.getOperand(NumOperands - 3).getImm();
229 if (Opcode == ARM::t2LDRi12 || Opcode == ARM::t2LDRi8 ||
230 Opcode == ARM::t2STRi12 || Opcode == ARM::t2STRi8 ||
231 Opcode == ARM::t2LDRDi8 || Opcode == ARM::t2STRDi8 ||
232 Opcode == ARM::LDRi12 || Opcode == ARM::STRi12)
236 if (Opcode == ARM::tLDRi || Opcode == ARM::tSTRi ||
237 Opcode == ARM::tLDRspi || Opcode == ARM::tSTRspi)
252 return MI.getOperand(1);
256 return MI.getOperand(0);
353 case ARM::tLDMIA_UPD:
354 case ARM::tSTMIA_UPD:
355 case ARM::t2LDMIA_RET:
357 case ARM::t2LDMIA_UPD:
359 case ARM::t2STMIA_UPD:
361 case ARM::VLDMSIA_UPD:
363 case ARM::VSTMSIA_UPD:
365 case ARM::VLDMDIA_UPD:
367 case ARM::VSTMDIA_UPD:
381 case ARM::t2LDMDB_UPD:
383 case ARM::t2STMDB_UPD:
384 case ARM::VLDMSDB_UPD:
385 case ARM::VSTMSDB_UPD:
386 case ARM::VLDMDDB_UPD:
387 case ARM::VSTMDDB_UPD:
399 return Opc == ARM::tLDRi ||
Opc == ARM::tLDRspi;
403 return Opc == ARM::t2LDRi12 ||
Opc == ARM::t2LDRi8;
411 return Opc == ARM::tSTRi ||
Opc == ARM::tSTRspi;
415 return Opc == ARM::t2STRi12 ||
Opc == ARM::t2STRi8;
444 switch (
MI->getOpcode()) {
471 case ARM::tLDMIA_UPD:
472 case ARM::tSTMIA_UPD:
479 return (
MI->getNumOperands() -
MI->getDesc().getNumOperands() + 1) * 4;
482 return (
MI->getNumOperands() -
MI->getDesc().getNumOperands() + 1) * 8;
494 assert(isThumb1 &&
"Can only update base register uses for Thumb1!");
498 bool InsertSub =
false;
499 unsigned Opc =
MBBI->getOpcode();
501 if (
MBBI->readsRegister(
Base,
nullptr)) {
504 Opc == ARM::tLDRi ||
Opc == ARM::tLDRHi ||
Opc == ARM::tLDRBi;
506 Opc == ARM::tSTRi ||
Opc == ARM::tSTRHi ||
Opc == ARM::tSTRBi;
508 if (IsLoad || IsStore) {
514 MBBI->getOperand(
MBBI->getDesc().getNumOperands() - 3);
521 if (
Offset >= 0 && !(IsStore && InstrSrcReg ==
Base))
525 }
else if ((
Opc == ARM::tSUBi8 ||
Opc == ARM::tADDi8) &&
526 !definesCPSR(*
MBBI)) {
531 MBBI->getOperand(
MBBI->getDesc().getNumOperands() - 3);
533 MO.
getImm() + WordOffset * 4 :
534 MO.
getImm() - WordOffset * 4 ;
548 }
else if (definesCPSR(*
MBBI) ||
MBBI->isCall() ||
MBBI->isBranch()) {
566 if (
MBBI->killsRegister(
Base,
nullptr) ||
567 MBBI->definesRegister(
Base,
nullptr))
589unsigned ARMLoadStoreOpt::findFreeReg(
const TargetRegisterClass &RegClass) {
590 if (!RegClassInfoValid) {
592 RegClassInfoValid =
true;
595 for (
unsigned Reg : RegClassInfo.
getOrder(&RegClass))
604void ARMLoadStoreOpt::moveLiveRegsBefore(
const MachineBasicBlock &
MBB,
607 if (!LiveRegsValid) {
611 LiveRegsValid =
true;
614 while (LiveRegPos != Before) {
616 if (!LiveRegPos->isDebugInstr())
623 for (
const std::pair<unsigned, bool> &R : Regs)
632MachineInstr *ARMLoadStoreOpt::CreateLoadStoreMulti(
634 int Offset,
unsigned Base,
bool BaseKill,
unsigned Opcode,
636 ArrayRef<std::pair<unsigned, bool>> Regs,
638 unsigned NumRegs = Regs.size();
643 bool SafeToClobberCPSR = !isThumb1 ||
647 bool Writeback = isThumb1;
653 assert(
Base != ARM::SP &&
"Thumb1 does not allow SP in register list");
654 if (Opcode == ARM::tLDRi)
656 else if (Opcode == ARM::tSTRi)
663 bool haveIBAndDA = isNotVFP && !isThumb2 && !isThumb1;
665 if (
Offset == 4 && haveIBAndDA) {
667 }
else if (
Offset == -4 * (
int)NumRegs + 4 && haveIBAndDA) {
669 }
else if (
Offset == -4 * (
int)NumRegs && isNotVFP && !isThumb1) {
672 }
else if (
Offset != 0 || Opcode == ARM::tLDRspi || Opcode == ARM::tSTRspi) {
685 if (!SafeToClobberCPSR)
692 NewBase = Regs[NumRegs-1].first;
696 moveLiveRegsBefore(
MBB, InsertBefore);
700 for (
const std::pair<unsigned, bool> &R : Regs)
703 NewBase = findFreeReg(isThumb1 ? ARM::tGPRRegClass : ARM::GPRRegClass);
708 int BaseOpc = isThumb2 ? (BaseKill &&
Base == ARM::SP ? ARM::t2ADDspImm
712 : (isThumb1 &&
Offset < 8)
714 : isThumb1 ?
ARM::tADDi8 :
ARM::ADDri;
720 BaseOpc = isThumb2 ? (BaseKill &&
Base == ARM::SP ? ARM::t2SUBspImm
724 : isThumb1 ?
ARM::tSUBi8 :
ARM::SUBri;
733 bool KillOldBase = BaseKill &&
742 if (
Base != NewBase &&
743 (BaseOpc == ARM::tADDi8 || BaseOpc == ARM::tSUBi8)) {
761 if (BaseOpc == ARM::tADDrSPi) {
762 assert(
Offset % 4 == 0 &&
"tADDrSPi offset is scaled by 4");
800 if (isThumb1 && !SafeToClobberCPSR && Writeback && !BaseKill)
803 MachineInstrBuilder MIB;
806 assert(isThumb1 &&
"expected Writeback only inThumb1");
807 if (Opcode == ARM::tLDMIA) {
810 Opcode = ARM::tLDMIA_UPD;
822 UpdateBaseRegUses(
MBB, InsertBefore,
DL,
Base, NumRegs, Pred, PredReg);
831 for (
const std::pair<unsigned, bool> &R : Regs)
839MachineInstr *ARMLoadStoreOpt::CreateLoadStoreDouble(
841 int Offset,
unsigned Base,
bool BaseKill,
unsigned Opcode,
843 ArrayRef<std::pair<unsigned, bool>> Regs,
846 assert((IsLoad ||
isi32Store(Opcode)) &&
"Must have integer load or store");
847 unsigned LoadStoreOpcode = IsLoad ? ARM::t2LDRDi8 : ARM::t2STRDi8;
850 MachineInstrBuilder MIB =
BuildMI(
MBB, InsertBefore,
DL,
851 TII->get(LoadStoreOpcode));
853 MIB.
addReg(Regs[0].first, RegState::Define)
854 .
addReg(Regs[1].first, RegState::Define);
865MachineInstr *ARMLoadStoreOpt::MergeOpsUpdate(
const MergeCandidate &Cand) {
866 const MachineInstr *
First = Cand.Instrs.front();
867 unsigned Opcode =
First->getOpcode();
870 SmallVector<unsigned, 4> ImpDefs;
871 DenseSet<unsigned> KilledRegs;
872 DenseSet<unsigned> UsedRegs;
874 for (
const MachineInstr *
MI : Cand.Instrs) {
877 bool IsKill = MO.
isKill();
887 for (
const MachineOperand &MO :
MI->implicit_operands()) {
896 if (
MI->readsRegister(DefReg,
nullptr))
906 MachineInstr *LatestMI = Cand.Instrs[Cand.LatestMIIdx];
915 MachineInstr *Merged =
nullptr;
916 if (Cand.CanMergeToLSDouble)
917 Merged = CreateLoadStoreDouble(
MBB, InsertBefore,
Offset,
Base, BaseKill,
918 Opcode, Pred, PredReg,
DL, Regs,
920 if (!Merged && Cand.CanMergeToLSMulti)
921 Merged = CreateLoadStoreMulti(
MBB, InsertBefore,
Offset,
Base, BaseKill,
922 Opcode, Pred, PredReg,
DL, Regs, Cand.Instrs);
928 iterator EarliestI(Cand.Instrs[Cand.EarliestMIIdx]);
929 bool EarliestAtBegin =
false;
931 EarliestAtBegin =
true;
933 EarliestI = std::prev(EarliestI);
937 for (MachineInstr *
MI : Cand.Instrs)
944 EarliestI = std::next(EarliestI);
950 for (MachineInstr &
MI : FixupRange) {
951 for (
unsigned &ImpDefReg : ImpDefs) {
952 for (MachineOperand &MO :
MI.implicit_operands()) {
964 for (
unsigned ImpDef : ImpDefs)
965 MIB.
addReg(ImpDef, RegState::ImplicitDefine);
969 for (MachineInstr &
MI : FixupRange) {
970 for (MachineOperand &MO :
MI.uses()) {
996 unsigned Opcode =
MI.getOpcode();
1009void ARMLoadStoreOpt::FormCandidates(
const MemOpQueue &MemOps) {
1010 const MachineInstr *FirstMI = MemOps[0].MI;
1015 unsigned SIndex = 0;
1016 unsigned EIndex = MemOps.size();
1019 const MachineInstr *
MI = MemOps[SIndex].MI;
1020 int Offset = MemOps[SIndex].Offset;
1023 unsigned PRegNum = PMO.
isUndef() ? std::numeric_limits<unsigned>::max()
1024 :
TRI->getEncodingValue(PReg);
1025 unsigned Latest = SIndex;
1026 unsigned Earliest = SIndex;
1028 bool CanMergeToLSDouble =
1034 CanMergeToLSDouble =
false;
1036 bool CanMergeToLSMulti =
true;
1039 if (STI->hasSlowOddRegister() && !isNotVFP && (PRegNum % 2) == 1)
1040 CanMergeToLSMulti =
false;
1044 if (PReg == ARM::SP || PReg == ARM::PC)
1045 CanMergeToLSMulti = CanMergeToLSDouble =
false;
1049 CanMergeToLSMulti = CanMergeToLSDouble =
false;
1064 for (
unsigned I = SIndex+1;
I < EIndex; ++
I, ++
Count) {
1065 int NewOffset = MemOps[
I].Offset;
1070 if (
Reg == ARM::SP ||
Reg == ARM::PC)
1076 unsigned RegNum = MO.
isUndef() ? std::numeric_limits<unsigned>::max()
1077 :
TRI->getEncodingValue(
Reg);
1078 bool PartOfLSMulti = CanMergeToLSMulti;
1079 if (PartOfLSMulti) {
1081 if (RegNum <= PRegNum)
1082 PartOfLSMulti =
false;
1086 else if (!isNotVFP && RegNum != PRegNum+1)
1087 PartOfLSMulti =
false;
1090 bool PartOfLSDouble = CanMergeToLSDouble &&
Count <= 1;
1092 if (!PartOfLSMulti && !PartOfLSDouble)
1094 CanMergeToLSMulti &= PartOfLSMulti;
1095 CanMergeToLSDouble &= PartOfLSDouble;
1098 unsigned Position = MemOps[
I].Position;
1099 if (Position < MemOps[Latest].Position)
1101 else if (Position > MemOps[Earliest].Position)
1109 MergeCandidate *Candidate =
new(
Allocator.Allocate()) MergeCandidate;
1110 for (
unsigned C = SIndex, CE = SIndex +
Count;
C <
CE; ++
C)
1111 Candidate->Instrs.push_back(MemOps[
C].
MI);
1112 Candidate->LatestMIIdx = Latest - SIndex;
1113 Candidate->EarliestMIIdx = Earliest - SIndex;
1114 Candidate->InsertPos = MemOps[Latest].Position;
1116 CanMergeToLSMulti = CanMergeToLSDouble =
false;
1117 Candidate->CanMergeToLSMulti = CanMergeToLSMulti;
1118 Candidate->CanMergeToLSDouble = CanMergeToLSDouble;
1119 Candidates.push_back(Candidate);
1122 }
while (SIndex < EIndex);
1199 switch (
MI.getOpcode()) {
1200 case ARM::tADDi8: Scale = 4; CheckCPSRDef =
true;
break;
1201 case ARM::tSUBi8: Scale = -4; CheckCPSRDef =
true;
break;
1203 case ARM::t2SUBspImm:
1204 case ARM::SUBri: Scale = -1; CheckCPSRDef =
true;
break;
1206 case ARM::t2ADDspImm:
1207 case ARM::ADDri: Scale = 1; CheckCPSRDef =
true;
break;
1208 case ARM::tADDspi: Scale = 4; CheckCPSRDef =
false;
break;
1209 case ARM::tSUBspi: Scale = -4; CheckCPSRDef =
false;
break;
1214 if (
MI.getOperand(0).getReg() !=
Reg ||
1215 MI.getOperand(1).getReg() !=
Reg ||
1217 MIPredReg != PredReg)
1220 if (CheckCPSRDef && definesCPSR(
MI))
1222 return MI.getOperand(2).getImm() * Scale;
1233 if (
MBBI == BeginMBBI)
1238 while (PrevMBBI->isDebugInstr() && PrevMBBI != BeginMBBI)
1242 return Offset == 0 ? EndMBBI : PrevMBBI;
1254 while (NextMBBI != EndMBBI) {
1256 while (NextMBBI != EndMBBI && NextMBBI->isDebugInstr())
1258 if (NextMBBI == EndMBBI)
1272 if (
Reg == ARM::SP || NextMBBI->readsRegister(
Reg,
TRI) ||
1273 NextMBBI->definesRegister(
Reg,
TRI))
1293bool ARMLoadStoreOpt::MergeBaseUpdateLSMultiple(MachineInstr *
MI) {
1295 if (isThumb1)
return false;
1298 const MachineOperand &BaseOP =
MI->getOperand(0);
1300 bool BaseKill = BaseOP.
isKill();
1303 unsigned Opcode =
MI->getOpcode();
1313 MachineBasicBlock &
MBB = *
MI->getParent();
1335 bool HighRegsUsed =
false;
1337 if (MO.
getReg() >= ARM::R8) {
1338 HighRegsUsed =
true;
1348 if (MergeInstr !=
MBB.
end()) {
1375 return ARM::LDR_PRE_IMM;
1377 return ARM::STR_PRE_IMM;
1388 return ARM::t2LDR_PRE;
1391 return ARM::t2STR_PRE;
1400 return ARM::LDR_POST_IMM;
1402 return ARM::STR_POST_IMM;
1413 return ARM::t2LDR_POST;
1415 case ARM::t2LDRBi12:
1416 return ARM::t2LDRB_POST;
1417 case ARM::t2LDRSBi8:
1418 case ARM::t2LDRSBi12:
1419 return ARM::t2LDRSB_POST;
1421 case ARM::t2LDRHi12:
1422 return ARM::t2LDRH_POST;
1423 case ARM::t2LDRSHi8:
1424 case ARM::t2LDRSHi12:
1425 return ARM::t2LDRSH_POST;
1428 return ARM::t2STR_POST;
1430 case ARM::t2STRBi12:
1431 return ARM::t2STRB_POST;
1433 case ARM::t2STRHi12:
1434 return ARM::t2STRH_POST;
1436 case ARM::MVE_VLDRBS16:
1437 return ARM::MVE_VLDRBS16_post;
1438 case ARM::MVE_VLDRBS32:
1439 return ARM::MVE_VLDRBS32_post;
1440 case ARM::MVE_VLDRBU16:
1441 return ARM::MVE_VLDRBU16_post;
1442 case ARM::MVE_VLDRBU32:
1443 return ARM::MVE_VLDRBU32_post;
1444 case ARM::MVE_VLDRHS32:
1445 return ARM::MVE_VLDRHS32_post;
1446 case ARM::MVE_VLDRHU32:
1447 return ARM::MVE_VLDRHU32_post;
1448 case ARM::MVE_VLDRBU8:
1449 return ARM::MVE_VLDRBU8_post;
1450 case ARM::MVE_VLDRHU16:
1451 return ARM::MVE_VLDRHU16_post;
1452 case ARM::MVE_VLDRWU32:
1453 return ARM::MVE_VLDRWU32_post;
1454 case ARM::MVE_VSTRB16:
1455 return ARM::MVE_VSTRB16_post;
1456 case ARM::MVE_VSTRB32:
1457 return ARM::MVE_VSTRB32_post;
1458 case ARM::MVE_VSTRH32:
1459 return ARM::MVE_VSTRH32_post;
1460 case ARM::MVE_VSTRBU8:
1461 return ARM::MVE_VSTRBU8_post;
1462 case ARM::MVE_VSTRHU16:
1463 return ARM::MVE_VSTRHU16_post;
1464 case ARM::MVE_VSTRWU32:
1465 return ARM::MVE_VSTRWU32_post;
1473bool ARMLoadStoreOpt::MergeBaseUpdateLoadStore(MachineInstr *
MI) {
1476 if (isThumb1)
return false;
1481 unsigned Opcode =
MI->getOpcode();
1483 bool isAM5 = (Opcode == ARM::VLDRD || Opcode == ARM::VLDRS ||
1484 Opcode == ARM::VSTRD || Opcode == ARM::VSTRS);
1485 bool isAM2 = (Opcode == ARM::LDRi12 || Opcode == ARM::STRi12);
1487 if (
MI->getOperand(2).getImm() != 0)
1494 if (
MI->getOperand(0).getReg() ==
Base)
1500 MachineBasicBlock &
MBB = *
MI->getParent();
1506 if (!isAM5 &&
Offset == Bytes) {
1508 }
else if (
Offset == -Bytes) {
1512 if (MergeInstr ==
MBB.
end())
1516 if ((isAM5 &&
Offset != Bytes) ||
1534 MachineOperand &MO =
MI->getOperand(0);
1548 if (NewOpc == ARM::LDR_PRE_IMM || NewOpc == ARM::LDRB_PRE_IMM) {
1585 MachineOperand &MO =
MI->getOperand(0);
1589 if (isAM2 && NewOpc == ARM::STR_POST_IMM) {
1618bool ARMLoadStoreOpt::MergeBaseUpdateLSDouble(MachineInstr &
MI)
const {
1619 unsigned Opcode =
MI.getOpcode();
1620 assert((Opcode == ARM::t2LDRDi8 || Opcode == ARM::t2STRDi8) &&
1621 "Must have t2STRDi8 or t2LDRDi8");
1622 if (
MI.getOperand(3).getImm() != 0)
1628 const MachineOperand &BaseOp =
MI.getOperand(2);
1630 const MachineOperand &Reg0Op =
MI.getOperand(0);
1631 const MachineOperand &Reg1Op =
MI.getOperand(1);
1638 MachineBasicBlock &
MBB = *
MI.getParent();
1644 NewOpc = Opcode == ARM::t2LDRDi8 ? ARM::t2LDRD_PRE : ARM::t2STRD_PRE;
1647 if (MergeInstr ==
MBB.
end())
1649 NewOpc = Opcode == ARM::t2LDRDi8 ? ARM::t2LDRD_POST : ARM::t2STRD_POST;
1658 if (NewOpc == ARM::t2LDRD_PRE || NewOpc == ARM::t2LDRD_POST) {
1661 assert(NewOpc == ARM::t2STRD_PRE || NewOpc == ARM::t2STRD_POST);
1666 assert(
TII->get(Opcode).getNumOperands() == 6 &&
1667 TII->get(NewOpc).getNumOperands() == 7 &&
1668 "Unexpected number of operands in Opcode specification.");
1671 for (
const MachineOperand &MO :
MI.implicit_operands())
1683 unsigned Opcode =
MI.getOpcode();
1703 if (!
MI.getOperand(1).isReg())
1708 if (!
MI.hasOneMemOperand())
1727 if (
MI.getOperand(0).isReg() &&
MI.getOperand(0).isUndef())
1731 if (
MI.getOperand(1).isUndef())
1739 bool isDef,
unsigned NewOpc,
unsigned Reg,
1740 bool RegDeadKill,
bool RegUndef,
unsigned BaseReg,
1765bool ARMLoadStoreOpt::FixInvalidRegPairOp(MachineBasicBlock &
MBB,
1767 MachineInstr *
MI = &*
MBBI;
1768 unsigned Opcode =
MI->getOpcode();
1771 if (Opcode != ARM::LDRD && Opcode != ARM::STRD && Opcode != ARM::t2LDRDi8)
1774 const MachineOperand &BaseOp =
MI->getOperand(2);
1776 Register EvenReg =
MI->getOperand(0).getReg();
1777 Register OddReg =
MI->getOperand(1).getReg();
1778 unsigned EvenRegNum =
TRI->getDwarfRegNum(EvenReg,
false);
1779 unsigned OddRegNum =
TRI->getDwarfRegNum(OddReg,
false);
1783 bool Errata602117 = EvenReg ==
BaseReg &&
1784 (Opcode == ARM::LDRD || Opcode == ARM::t2LDRDi8) && STI->
isCortexM3();
1786 bool NonConsecutiveRegs = (Opcode == ARM::LDRD || Opcode == ARM::STRD) &&
1787 (EvenRegNum % 2 != 0 || EvenRegNum + 1 != OddRegNum);
1789 if (!Errata602117 && !NonConsecutiveRegs)
1792 bool isT2 = Opcode == ARM::t2LDRDi8 || Opcode == ARM::t2STRDi8;
1793 bool isLd = Opcode == ARM::LDRD || Opcode == ARM::t2LDRDi8;
1794 bool EvenDeadKill = isLd ?
1795 MI->getOperand(0).isDead() :
MI->getOperand(0).isKill();
1796 bool EvenUndef =
MI->getOperand(0).isUndef();
1797 bool OddDeadKill = isLd ?
1798 MI->getOperand(1).isDead() :
MI->getOperand(1).isKill();
1799 bool OddUndef =
MI->getOperand(1).isUndef();
1800 bool BaseKill = BaseOp.
isKill();
1801 bool BaseUndef = BaseOp.
isUndef();
1802 assert((isT2 ||
MI->getOperand(3).getReg() == ARM::NoRegister) &&
1803 "register offset not handled below");
1808 if (OddRegNum > EvenRegNum && OffImm == 0) {
1811 unsigned NewOpc = (isLd)
1812 ? (isT2 ? ARM::t2LDMIA : ARM::LDMIA)
1813 : (isT2 ? ARM::t2STMIA : ARM::STMIA);
1837 unsigned NewOpc = (isLd)
1838 ? (isT2 ? (OffImm < 0 ? ARM::t2LDRi8 : ARM::t2LDRi12) : ARM::LDRi12)
1839 : (isT2 ? (OffImm < 0 ? ARM::t2STRi8 : ARM::t2STRi12) : ARM::STRi12);
1842 unsigned NewOpc2 = (isLd)
1843 ? (isT2 ? (OffImm+4 < 0 ? ARM::t2LDRi8 : ARM::t2LDRi12) : ARM::LDRi12)
1844 : (isT2 ? (OffImm+4 < 0 ? ARM::t2STRi8 : ARM::t2STRi12) : ARM::STRi12);
1847 if (isLd &&
TRI->regsOverlap(EvenReg, BaseReg)) {
1848 assert(!
TRI->regsOverlap(OddReg, BaseReg));
1850 false, BaseReg,
false, BaseUndef, Pred, PredReg,
TII,
MI);
1852 false, BaseReg, BaseKill, BaseUndef, Pred, PredReg,
TII,
1855 if (OddReg == EvenReg && EvenDeadKill) {
1859 EvenDeadKill =
false;
1863 if (EvenReg == BaseReg)
1864 EvenDeadKill =
false;
1866 EvenUndef, BaseReg,
false, BaseUndef, Pred, PredReg,
TII,
1869 OddUndef, BaseReg, BaseKill, BaseUndef, Pred, PredReg,
TII,
1884bool ARMLoadStoreOpt::LoadStoreMultipleOpti(MachineBasicBlock &
MBB) {
1886 unsigned CurrBase = 0;
1887 unsigned CurrOpc = ~0
u;
1889 unsigned Position = 0;
1890 assert(Candidates.size() == 0);
1892 LiveRegsValid =
false;
1897 MBBI = std::prev(
I);
1898 if (FixInvalidRegPairOp(
MBB,
MBBI))
1903 unsigned Opcode =
MBBI->getOpcode();
1904 const MachineOperand &MO =
MBBI->getOperand(0);
1910 if (CurrBase == 0) {
1915 MemOps.push_back(MemOpQueueEntry(*
MBBI,
Offset, Position));
1919 if (CurrOpc == Opcode && CurrBase ==
Base && CurrPred == Pred) {
1927 bool Overlap =
false;
1931 for (
const MemOpQueueEntry &
E : MemOps) {
1932 if (
TRI->regsOverlap(
Reg,
E.MI->getOperand(0).getReg())) {
1942 if (
Offset > MemOps.back().Offset) {
1943 MemOps.push_back(MemOpQueueEntry(*
MBBI,
Offset, Position));
1946 MemOpQueue::iterator
MI, ME;
1947 for (
MI = MemOps.begin(), ME = MemOps.end();
MI != ME; ++
MI) {
1958 if (
MI != MemOps.end()) {
1959 MemOps.insert(
MI, MemOpQueueEntry(*
MBBI,
Offset, Position));
1970 }
else if (
MBBI->isDebugInstr()) {
1972 }
else if (
MBBI->getOpcode() == ARM::t2LDRDi8 ||
1973 MBBI->getOpcode() == ARM::t2STRDi8) {
1980 if (MemOps.size() > 0) {
1981 FormCandidates(MemOps);
1989 if (MemOps.size() > 0)
1990 FormCandidates(MemOps);
1994 auto LessThan = [](
const MergeCandidate*
M0,
const MergeCandidate *
M1) {
1995 return M0->InsertPos <
M1->InsertPos;
2001 for (
const MergeCandidate *Candidate : Candidates) {
2002 if (Candidate->CanMergeToLSMulti || Candidate->CanMergeToLSDouble) {
2003 MachineInstr *Merged = MergeOpsUpdate(*Candidate);
2008 if (Opcode == ARM::t2STRDi8 || Opcode == ARM::t2LDRDi8)
2009 MergeBaseUpdateLSDouble(*Merged);
2011 MergeBaseUpdateLSMultiple(Merged);
2013 for (MachineInstr *
MI : Candidate->Instrs) {
2014 if (MergeBaseUpdateLoadStore(
MI))
2019 assert(Candidate->Instrs.size() == 1);
2020 if (MergeBaseUpdateLoadStore(Candidate->Instrs.front()))
2026 for (MachineInstr *
MI : MergeBaseCandidates)
2027 MergeBaseUpdateLSDouble(*
MI);
2028 MergeBaseCandidates.clear();
2043bool ARMLoadStoreOpt::MergeReturnIntoLDM(MachineBasicBlock &
MBB) {
2045 if (isThumb1)
return false;
2050 (
MBBI->getOpcode() == ARM::BX_RET ||
2051 MBBI->getOpcode() == ARM::tBX_RET ||
2052 MBBI->getOpcode() == ARM::MOVPCLR)) {
2055 while (PrevI->isDebugInstr() && PrevI !=
MBB.
begin())
2057 MachineInstr &PrevMI = *PrevI;
2059 if (Opcode == ARM::LDMIA_UPD || Opcode == ARM::LDMDA_UPD ||
2060 Opcode == ARM::LDMDB_UPD || Opcode == ARM::LDMIB_UPD ||
2061 Opcode == ARM::t2LDMIA_UPD || Opcode == ARM::t2LDMDB_UPD) {
2063 if (MO.
getReg() != ARM::LR)
2065 unsigned NewOpc = (isThumb2 ? ARM::t2LDMIA_RET : ARM::LDMIA_RET);
2066 assert(((isThumb2 && Opcode == ARM::t2LDMIA_UPD) ||
2067 Opcode == ARM::LDMIA_UPD) &&
"Unsupported multiple load-return!");
2078bool ARMLoadStoreOpt::CombineMovBx(MachineBasicBlock &
MBB) {
2081 MBBI->getOpcode() != ARM::tBX_RET)
2086 if (Prev->getOpcode() != ARM::tMOVr ||
2087 !Prev->definesRegister(ARM::LR,
nullptr))
2090 for (
auto Use : Prev->uses())
2092 assert(STI->hasV4TOps());
2105bool ARMLoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) {
2109 AFI = Fn.
getInfo<ARMFunctionInfo>();
2113 RegClassInfoValid =
false;
2117 bool Modified =
false, ModifiedLDMReturn =
false;
2118 for (MachineBasicBlock &
MBB : Fn) {
2121 ModifiedLDMReturn |= MergeReturnIntoLDM(
MBB);
2131 if (ModifiedLDMReturn)
2138bool ARMLoadStoreOptLegacy::runOnMachineFunction(MachineFunction &MF) {
2141 ARMLoadStoreOpt Impl;
2142 return Impl.runOnMachineFunction(MF);
2145#define ARM_PREALLOC_LOAD_STORE_OPT_NAME \
2146 "ARM pre- register allocation load / store optimization pass"
2152struct ARMPreAllocLoadStoreOpt {
2175 bool DistributeIncrements();
2186 StringRef getPassName()
const override {
2198char ARMPreAllocLoadStoreOptLegacy::ID = 0;
2211 cl::init(8),
cl::Hidden);
2213bool ARMPreAllocLoadStoreOpt::runOnMachineFunction(
MachineFunction &Fn,
2221 TD = &Fn.getDataLayout();
2223 TII = STI->getInstrInfo();
2224 TRI = STI->getRegisterInfo();
2225 MRI = &Fn.getRegInfo();
2228 bool Modified = DistributeIncrements();
2230 Modified |= RescheduleLoadStoreInstrs(&MFI);
2235bool ARMPreAllocLoadStoreOptLegacy::runOnMachineFunction(MachineFunction &Fn) {
2239 ARMPreAllocLoadStoreOpt Impl;
2240 AliasAnalysis *AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
2241 MachineDominatorTree *DT =
2242 &getAnalysis<MachineDominatorTreeWrapperPass>().getDomTree();
2243 return Impl.runOnMachineFunction(Fn, AA, DT);
2256 if (
I->isDebugInstr() || MemOps.
count(&*
I))
2258 if (
I->isCall() ||
I->isTerminator() ||
I->hasUnmodeledSideEffects())
2260 if (
I->mayStore() || (!isLd &&
I->mayLoad()))
2262 if (
I->mayAlias(
AA, *
MemOp,
false))
2264 for (
unsigned j = 0,
NumOps =
I->getNumOperands(); j !=
NumOps; ++j) {
2277 if (MemRegs.
size() <= 4)
2280 return AddedRegPressure.
size() <= MemRegs.
size() * 2;
2283bool ARMPreAllocLoadStoreOpt::CanFormLdStDWord(
2284 MachineInstr *Op0, MachineInstr *Op1,
DebugLoc &dl,
unsigned &NewOpc,
2288 if (!STI->hasV5TEOps())
2294 if (Opcode == ARM::LDRi12) {
2296 }
else if (Opcode == ARM::STRi12) {
2298 }
else if (Opcode == ARM::t2LDRi8 || Opcode == ARM::t2LDRi12) {
2299 NewOpc = ARM::t2LDRDi8;
2302 }
else if (Opcode == ARM::t2STRi8 || Opcode == ARM::t2STRi12) {
2303 NewOpc = ARM::t2STRDi8;
2320 if (Alignment < ReqAlign)
2326 int Limit = (1 << 8) * Scale;
2327 if (OffImm >= Limit || (OffImm <= -Limit) || (OffImm & (Scale-1)))
2336 int Limit = (1 << 8) * Scale;
2337 if (OffImm >= Limit || (OffImm & (Scale-1)))
2343 if (FirstReg == SecondReg)
2351bool ARMPreAllocLoadStoreOpt::RescheduleOps(
2352 MachineBasicBlock *
MBB, SmallVectorImpl<MachineInstr *> &
Ops,
unsigned Base,
2353 bool isLd, DenseMap<MachineInstr *, unsigned> &MI2LocMap,
2355 bool RetVal =
false;
2362 return LOffset > ROffset;
2369 while (
Ops.size() > 1) {
2370 unsigned FirstLoc = ~0
U;
2371 unsigned LastLoc = 0;
2372 MachineInstr *FirstOp =
nullptr;
2373 MachineInstr *LastOp =
nullptr;
2375 unsigned LastOpcode = 0;
2376 unsigned LastBytes = 0;
2377 unsigned NumMove = 0;
2382 if (LastOpcode && LSMOpcode != LastOpcode)
2389 if (Bytes != LastBytes ||
Offset != (LastOffset + (
int)Bytes))
2401 LastOpcode = LSMOpcode;
2403 unsigned Loc = MI2LocMap[
Op];
2404 if (Loc <= FirstLoc) {
2408 if (Loc >= LastLoc) {
2417 SmallPtrSet<MachineInstr*, 4> MemOps;
2418 SmallSet<unsigned, 4> MemRegs;
2419 for (
size_t i =
Ops.size() - NumMove, e =
Ops.size(); i != e; ++i) {
2426 bool DoMove = (LastLoc - FirstLoc) <= NumMove*4;
2429 MemOps, MemRegs,
TRI, AA);
2431 for (
unsigned i = 0; i != NumMove; ++i)
2436 while (InsertPos !=
MBB->
end() &&
2437 (MemOps.
count(&*InsertPos) || InsertPos->isDebugInstr()))
2442 MachineInstr *Op0 =
Ops.back();
2443 MachineInstr *Op1 =
Ops[
Ops.size()-2];
2448 unsigned NewOpc = 0;
2451 if (NumMove == 2 && CanFormLdStDWord(Op0, Op1, dl, NewOpc,
2452 FirstReg, SecondReg, BaseReg,
2453 Offset, PredReg, Pred, isT2)) {
2457 const MCInstrDesc &MCID =
TII->get(NewOpc);
2458 const TargetRegisterClass *TRC =
TII->getRegClass(MCID, 0);
2464 MachineInstrBuilder MIB =
BuildMI(*
MBB, InsertPos, dl, MCID)
2465 .
addReg(FirstReg, RegState::Define)
2466 .
addReg(SecondReg, RegState::Define)
2478 MachineInstrBuilder MIB =
BuildMI(*
MBB, InsertPos, dl, MCID)
2501 for (
unsigned i = 0; i != NumMove; ++i) {
2502 MachineInstr *
Op =
Ops.pop_back_val();
2513 NumLdStMoved += NumMove;
2524 if (
MI->isNonListDebugValue()) {
2525 auto &
Op =
MI->getOperand(0);
2529 for (
unsigned I = 2;
I <
MI->getNumOperands();
I++) {
2530 auto &
Op =
MI->getOperand(
I);
2544 auto RegIt = RegisterMap.find(
Op.getReg());
2545 if (RegIt == RegisterMap.end())
2547 auto &InstrVec = RegIt->getSecond();
2554 MI->getDebugLoc()->getInlinedAt());
2559ARMPreAllocLoadStoreOpt::RescheduleLoadStoreInstrs(MachineBasicBlock *
MBB) {
2560 bool RetVal =
false;
2562 DenseMap<MachineInstr *, unsigned> MI2LocMap;
2563 using Base2InstMap = DenseMap<unsigned, SmallVector<MachineInstr *, 4>>;
2564 using BaseVec = SmallVector<unsigned, 4>;
2565 Base2InstMap Base2LdsMap;
2566 Base2InstMap Base2StsMap;
2572 SmallDenseMap<Register, SmallVector<MachineInstr *>, 8> RegisterMap;
2579 MachineInstr &
MI = *
MBBI;
2580 if (
MI.isCall() ||
MI.isTerminator()) {
2586 if (!
MI.isDebugInstr())
2587 MI2LocMap[&
MI] = ++Loc;
2595 int Opc =
MI.getOpcode();
2599 bool StopHere =
false;
2600 auto FindBases = [&](Base2InstMap &Base2Ops, BaseVec &Bases) {
2603 BI->second.push_back(&
MI);
2604 Bases.push_back(
Base);
2607 for (
const MachineInstr *
MI : BI->second) {
2614 BI->second.push_back(&
MI);
2618 FindBases(Base2LdsMap, LdBases);
2620 FindBases(Base2StsMap, StBases);
2631 for (
unsigned Base : LdBases) {
2632 SmallVectorImpl<MachineInstr *> &Lds = Base2LdsMap[
Base];
2634 RetVal |= RescheduleOps(
MBB, Lds,
Base,
true, MI2LocMap, RegisterMap);
2638 for (
unsigned Base : StBases) {
2639 SmallVectorImpl<MachineInstr *> &Sts = Base2StsMap[
Base];
2641 RetVal |= RescheduleOps(
MBB, Sts,
Base,
false, MI2LocMap, RegisterMap);
2645 Base2LdsMap.clear();
2646 Base2StsMap.clear();
2802 SmallDenseMap<DebugVariable, MachineInstr *, 8> DbgValueSinkCandidates;
2805 SmallDenseMap<MachineInstr *, SmallVector<Register>, 8> InstrMap;
2807 MachineInstr &
MI = *
MBBI;
2809 auto PopulateRegisterAndInstrMapForDebugInstr = [&](
Register Reg) {
2810 auto RegIt = RegisterMap.
find(
Reg);
2811 if (RegIt == RegisterMap.
end())
2813 auto &InstrVec = RegIt->getSecond();
2814 InstrVec.push_back(&
MI);
2815 InstrMap[&
MI].push_back(
Reg);
2818 if (
MI.isDebugValue()) {
2820 "DBG_VALUE or DBG_VALUE_LIST must contain a DILocalVariable");
2828 PopulateRegisterAndInstrMapForDebugInstr(
Op.getReg());
2836 auto InstrIt = DbgValueSinkCandidates.
find(DbgVar);
2837 if (InstrIt != DbgValueSinkCandidates.
end()) {
2838 auto *
Instr = InstrIt->getSecond();
2839 auto RegIt = InstrMap.
find(Instr);
2840 if (RegIt != InstrMap.
end()) {
2841 const auto &RegVec = RegIt->getSecond();
2844 for (
auto &
Reg : RegVec) {
2845 auto RegIt = RegisterMap.
find(
Reg);
2846 if (RegIt == RegisterMap.
end())
2848 auto &InstrVec = RegIt->getSecond();
2849 auto IsDbgVar = [&](MachineInstr *
I) ->
bool {
2851 return Var == DbgVar;
2857 [&](MachineOperand &
Op) {
Op.setReg(0); });
2860 DbgValueSinkCandidates[DbgVar] = &
MI;
2864 auto Opc =
MI.getOpcode();
2867 auto Reg =
MI.getOperand(0).getReg();
2868 auto RegIt = RegisterMap.
find(
Reg);
2869 if (RegIt == RegisterMap.
end())
2871 auto &DbgInstrVec = RegIt->getSecond();
2872 if (!DbgInstrVec.size())
2874 for (
auto *DbgInstr : DbgInstrVec) {
2876 auto *ClonedMI =
MI.getMF()->CloneMachineInstr(DbgInstr);
2885 DbgValueSinkCandidates.
erase(DbgVar);
2888 [&](MachineOperand &
Op) {
Op.setReg(0); });
2891 if (DbgInstr->isDebugValueList())
2905 switch (
MI.getOpcode()) {
2906 case ARM::MVE_VLDRBS16:
2907 case ARM::MVE_VLDRBS32:
2908 case ARM::MVE_VLDRBU16:
2909 case ARM::MVE_VLDRBU32:
2910 case ARM::MVE_VLDRHS32:
2911 case ARM::MVE_VLDRHU32:
2912 case ARM::MVE_VLDRBU8:
2913 case ARM::MVE_VLDRHU16:
2914 case ARM::MVE_VLDRWU32:
2915 case ARM::MVE_VSTRB16:
2916 case ARM::MVE_VSTRB32:
2917 case ARM::MVE_VSTRH32:
2918 case ARM::MVE_VSTRBU8:
2919 case ARM::MVE_VSTRHU16:
2920 case ARM::MVE_VSTRWU32:
2922 case ARM::t2LDRHi12:
2923 case ARM::t2LDRSHi8:
2924 case ARM::t2LDRSHi12:
2926 case ARM::t2LDRBi12:
2927 case ARM::t2LDRSBi8:
2928 case ARM::t2LDRSBi12:
2930 case ARM::t2STRBi12:
2932 case ARM::t2STRHi12:
2934 case ARM::MVE_VLDRBS16_post:
2935 case ARM::MVE_VLDRBS32_post:
2936 case ARM::MVE_VLDRBU16_post:
2937 case ARM::MVE_VLDRBU32_post:
2938 case ARM::MVE_VLDRHS32_post:
2939 case ARM::MVE_VLDRHU32_post:
2940 case ARM::MVE_VLDRBU8_post:
2941 case ARM::MVE_VLDRHU16_post:
2942 case ARM::MVE_VLDRWU32_post:
2943 case ARM::MVE_VSTRB16_post:
2944 case ARM::MVE_VSTRB32_post:
2945 case ARM::MVE_VSTRH32_post:
2946 case ARM::MVE_VSTRBU8_post:
2947 case ARM::MVE_VSTRHU16_post:
2948 case ARM::MVE_VSTRWU32_post:
2949 case ARM::MVE_VLDRBS16_pre:
2950 case ARM::MVE_VLDRBS32_pre:
2951 case ARM::MVE_VLDRBU16_pre:
2952 case ARM::MVE_VLDRBU32_pre:
2953 case ARM::MVE_VLDRHS32_pre:
2954 case ARM::MVE_VLDRHU32_pre:
2955 case ARM::MVE_VLDRBU8_pre:
2956 case ARM::MVE_VLDRHU16_pre:
2957 case ARM::MVE_VLDRWU32_pre:
2958 case ARM::MVE_VSTRB16_pre:
2959 case ARM::MVE_VSTRB32_pre:
2960 case ARM::MVE_VSTRH32_pre:
2961 case ARM::MVE_VSTRBU8_pre:
2962 case ARM::MVE_VSTRHU16_pre:
2963 case ARM::MVE_VSTRWU32_pre:
2970 switch (
MI.getOpcode()) {
2971 case ARM::MVE_VLDRBS16_post:
2972 case ARM::MVE_VLDRBS32_post:
2973 case ARM::MVE_VLDRBU16_post:
2974 case ARM::MVE_VLDRBU32_post:
2975 case ARM::MVE_VLDRHS32_post:
2976 case ARM::MVE_VLDRHU32_post:
2977 case ARM::MVE_VLDRBU8_post:
2978 case ARM::MVE_VLDRHU16_post:
2979 case ARM::MVE_VLDRWU32_post:
2980 case ARM::MVE_VSTRB16_post:
2981 case ARM::MVE_VSTRB32_post:
2982 case ARM::MVE_VSTRH32_post:
2983 case ARM::MVE_VSTRBU8_post:
2984 case ARM::MVE_VSTRHU16_post:
2985 case ARM::MVE_VSTRWU32_post:
2992 switch (
MI.getOpcode()) {
2993 case ARM::MVE_VLDRBS16_pre:
2994 case ARM::MVE_VLDRBS32_pre:
2995 case ARM::MVE_VLDRBU16_pre:
2996 case ARM::MVE_VLDRBU32_pre:
2997 case ARM::MVE_VLDRHS32_pre:
2998 case ARM::MVE_VLDRHU32_pre:
2999 case ARM::MVE_VLDRBU8_pre:
3000 case ARM::MVE_VLDRHU16_pre:
3001 case ARM::MVE_VLDRWU32_pre:
3002 case ARM::MVE_VSTRB16_pre:
3003 case ARM::MVE_VSTRB32_pre:
3004 case ARM::MVE_VSTRH32_pre:
3005 case ARM::MVE_VSTRBU8_pre:
3006 case ARM::MVE_VSTRHU16_pre:
3007 case ARM::MVE_VSTRWU32_pre:
3020 int &CodesizeEstimate) {
3029 CodesizeEstimate += 1;
3030 return Imm < 0 && -Imm < ((1 << 8) * 1);
3043 MI->getOperand(BaseOp).setReg(NewBaseReg);
3051 int OldOffset =
MI->getOperand(BaseOp + 1).getImm();
3053 MI->getOperand(BaseOp + 1).setImm(OldOffset -
Offset);
3055 unsigned ConvOpcode;
3056 switch (
MI->getOpcode()) {
3057 case ARM::t2LDRHi12:
3058 ConvOpcode = ARM::t2LDRHi8;
3060 case ARM::t2LDRSHi12:
3061 ConvOpcode = ARM::t2LDRSHi8;
3063 case ARM::t2LDRBi12:
3064 ConvOpcode = ARM::t2LDRBi8;
3066 case ARM::t2LDRSBi12:
3067 ConvOpcode = ARM::t2LDRSBi8;
3069 case ARM::t2STRHi12:
3070 ConvOpcode = ARM::t2STRHi8;
3072 case ARM::t2STRBi12:
3073 ConvOpcode = ARM::t2STRBi8;
3079 "Illegal Address Immediate after convert!");
3083 .
add(
MI->getOperand(0))
3084 .
add(
MI->getOperand(1))
3086 .
add(
MI->getOperand(3))
3087 .
add(
MI->getOperand(4))
3089 MI->eraseFromParent();
3108 TRC =
TII->getRegClass(
MCID, 2);
3119 .
add(
MI->getOperand(0))
3120 .
add(
MI->getOperand(1))
3122 .
add(
MI->getOperand(3))
3123 .
add(
MI->getOperand(4))
3124 .
add(
MI->getOperand(5))
3127 if (
MI->mayLoad()) {
3129 .
add(
MI->getOperand(0))
3131 .
add(
MI->getOperand(1))
3133 .
add(
MI->getOperand(3))
3134 .
add(
MI->getOperand(4))
3139 .
add(
MI->getOperand(0))
3140 .
add(
MI->getOperand(1))
3142 .
add(
MI->getOperand(3))
3143 .
add(
MI->getOperand(4))
3167bool ARMPreAllocLoadStoreOpt::DistributeIncrements(
Register Base) {
3170 MachineInstr *BaseAccess =
nullptr;
3171 MachineInstr *PrePostInc =
nullptr;
3176 SmallPtrSet<MachineInstr *, 8> OtherAccesses;
3187 if (!
Use.getOperand(BaseOp).isReg() ||
3188 Use.getOperand(BaseOp).getReg() !=
Base)
3192 else if (
Use.getOperand(BaseOp + 1).getImm() == 0)
3195 OtherAccesses.
insert(&Use);
3198 int IncrementOffset;
3204 if (
Increment->definesRegister(ARM::CPSR,
nullptr) ||
3208 LLVM_DEBUG(
dbgs() <<
"\nAttempting to distribute increments on VirtualReg "
3209 <<
Base.virtRegIndex() <<
"\n");
3213 for (MachineInstr &Use :
3215 if (&Use == BaseAccess || (
Use.getOpcode() != TargetOpcode::PHI &&
3217 LLVM_DEBUG(
dbgs() <<
" BaseAccess doesn't dominate use of increment\n");
3227 LLVM_DEBUG(
dbgs() <<
" Illegal addressing mode immediate on postinc\n");
3231 else if (PrePostInc) {
3239 LLVM_DEBUG(
dbgs() <<
"\nAttempting to distribute increments on already "
3240 <<
"indexed VirtualReg " <<
Base.virtRegIndex() <<
"\n");
3243 BaseAccess = PrePostInc;
3257 SmallPtrSet<MachineInstr *, 4> SuccessorAccesses;
3258 int CodesizeEstimate = -1;
3259 for (
auto *Use : OtherAccesses) {
3261 SuccessorAccesses.
insert(Use);
3264 Use->getOperand(BaseOp + 1).getImm() -
3266 TII, CodesizeEstimate)) {
3267 LLVM_DEBUG(
dbgs() <<
" Illegal addressing mode immediate on use\n");
3270 }
else if (!DT->
dominates(Use, BaseAccess)) {
3272 dbgs() <<
" Unknown dominance relation between Base and Use\n");
3276 if (STI->
hasMinSize() && CodesizeEstimate > 0) {
3277 LLVM_DEBUG(
dbgs() <<
" Expected to grow instructions under minsize\n");
3285 NewBaseReg =
Increment->getOperand(0).getReg();
3286 MachineInstr *BaseAccessPost =
3290 (void)BaseAccessPost;
3294 for (
auto *Use : SuccessorAccesses) {
3303 Op.setIsKill(
false);
3307bool ARMPreAllocLoadStoreOpt::DistributeIncrements() {
3309 SmallSetVector<Register, 4> Visited;
3310 for (
auto &
MBB : *MF) {
3311 for (
auto &
MI :
MBB) {
3313 if (BaseOp == -1 || !
MI.getOperand(BaseOp).isReg())
3317 if (!
Base.isVirtual())
3324 for (
auto Base : Visited)
3333 return new ARMPreAllocLoadStoreOptLegacy();
3334 return new ARMLoadStoreOptLegacy();
3340 ARMLoadStoreOpt Impl;
3341 bool Changed = Impl.runOnMachineFunction(MF);
3352 ARMPreAllocLoadStoreOpt Impl;
3358 bool Changed = Impl.runOnMachineFunction(MF,
AA, DT);
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
static bool isLoadSingle(unsigned Opc)
static int getMemoryOpOffset(const MachineInstr &MI)
static unsigned getPostIndexedLoadStoreOpcode(unsigned Opc, ARM_AM::AddrOpc Mode)
static bool IsSafeAndProfitableToMove(bool isLd, unsigned Base, MachineBasicBlock::iterator I, MachineBasicBlock::iterator E, SmallPtrSetImpl< MachineInstr * > &MemOps, SmallSet< unsigned, 4 > &MemRegs, const TargetRegisterInfo *TRI, AliasAnalysis *AA)
static bool ContainsReg(ArrayRef< std::pair< unsigned, bool > > Regs, unsigned Reg)
static bool isPreIndex(MachineInstr &MI)
static void forEachDbgRegOperand(MachineInstr *MI, std::function< void(MachineOperand &)> Fn)
static bool isPostIndex(MachineInstr &MI)
static int getLoadStoreMultipleOpcode(unsigned Opcode, ARM_AM::AMSubMode Mode)
static unsigned getLSMultipleTransferSize(const MachineInstr *MI)
static bool isLegalOrConvertibleAddressImm(unsigned Opcode, int Imm, const TargetInstrInfo *TII, int &CodesizeEstimate)
static ARM_AM::AMSubMode getLoadStoreMultipleSubMode(unsigned Opcode)
static bool isT1i32Load(unsigned Opc)
static void AdjustBaseAndOffset(MachineInstr *MI, Register NewBaseReg, int Offset, const TargetInstrInfo *TII, const TargetRegisterInfo *TRI)
static unsigned getPreIndexedLoadStoreOpcode(unsigned Opc, ARM_AM::AddrOpc Mode)
static MachineInstr * createPostIncLoadStore(MachineInstr *MI, int Offset, Register NewReg, const TargetInstrInfo *TII, const TargetRegisterInfo *TRI)
static bool isi32Store(unsigned Opc)
static MachineBasicBlock::iterator findIncDecAfter(MachineBasicBlock::iterator MBBI, Register Reg, ARMCC::CondCodes Pred, Register PredReg, int &Offset, const TargetRegisterInfo *TRI)
Searches for a increment or decrement of Reg after MBBI.
static MachineBasicBlock::iterator findIncDecBefore(MachineBasicBlock::iterator MBBI, Register Reg, ARMCC::CondCodes Pred, Register PredReg, int &Offset)
Searches for an increment or decrement of Reg before MBBI.
static const MachineOperand & getLoadStoreBaseOp(const MachineInstr &MI)
static void updateRegisterMapForDbgValueListAfterMove(SmallDenseMap< Register, SmallVector< MachineInstr * >, 8 > &RegisterMap, MachineInstr *DbgValueListInstr, MachineInstr *InstrToReplace)
arm prera ldst static false cl::opt< unsigned > InstReorderLimit("arm-prera-ldst-opt-reorder-limit", cl::init(8), cl::Hidden)
static void InsertLDR_STR(MachineBasicBlock &MBB, MachineBasicBlock::iterator &MBBI, int Offset, bool isDef, unsigned NewOpc, unsigned Reg, bool RegDeadKill, bool RegUndef, unsigned BaseReg, bool BaseKill, bool BaseUndef, ARMCC::CondCodes Pred, unsigned PredReg, const TargetInstrInfo *TII, MachineInstr *MI)
static int isIncrementOrDecrement(const MachineInstr &MI, Register Reg, ARMCC::CondCodes Pred, Register PredReg)
Check if the given instruction increments or decrements a register and return the amount it is increm...
static bool isT2i32Store(unsigned Opc)
static bool mayCombineMisaligned(const TargetSubtargetInfo &STI, const MachineInstr &MI)
Return true for loads/stores that can be combined to a double/multi operation without increasing the ...
static int getBaseOperandIndex(MachineInstr &MI)
static bool isT2i32Load(unsigned Opc)
static bool isi32Load(unsigned Opc)
static unsigned getImmScale(unsigned Opc)
static bool isT1i32Store(unsigned Opc)
#define ARM_PREALLOC_LOAD_STORE_OPT_NAME
#define ARM_LOAD_STORE_OPT_NAME
static unsigned getUpdatingLSMultipleOpcode(unsigned Opc, ARM_AM::AMSubMode Mode)
static bool isMemoryOp(const MachineInstr &MI)
Returns true if instruction is a memory operation that this pass is capable of operating on.
static const MachineOperand & getLoadStoreRegOp(const MachineInstr &MI)
static bool isValidLSDoubleOffset(int Offset)
static DebugVariable createDebugVariableFromMachineInstr(MachineInstr *MI)
static cl::opt< bool > AssumeMisalignedLoadStores("arm-assume-misaligned-load-store", cl::Hidden, cl::init(false), cl::desc("Be more conservative in ARM load/store opt"))
This switch disables formation of double/multi instructions that could potentially lead to (new) alig...
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
MachineBasicBlock MachineBasicBlock::iterator MBBI
This file defines the BumpPtrAllocator interface.
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
This file defines the DenseMap class.
This file defines the DenseSet and SmallDenseSet classes.
const HexagonInstrInfo * TII
const size_t AbstractManglingParser< Derived, Alloc >::NumOps
const AbstractManglingParser< Derived, Alloc >::OperatorInfo AbstractManglingParser< Derived, Alloc >::Ops[]
Register const TargetRegisterInfo * TRI
Promote Memory to Register
static MCRegister getReg(const MCDisassembler *D, unsigned RC, unsigned RegNo)
#define INITIALIZE_PASS_DEPENDENCY(depName)
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
#define INITIALIZE_PASS(passName, arg, name, cfg, analysis)
static cl::opt< RegAllocEvictionAdvisorAnalysisLegacy::AdvisorMode > Mode("regalloc-enable-advisor", cl::Hidden, cl::init(RegAllocEvictionAdvisorAnalysisLegacy::AdvisorMode::Default), cl::desc("Enable regalloc advisor mode"), cl::values(clEnumValN(RegAllocEvictionAdvisorAnalysisLegacy::AdvisorMode::Default, "default", "Default"), clEnumValN(RegAllocEvictionAdvisorAnalysisLegacy::AdvisorMode::Release, "release", "precompiled"), clEnumValN(RegAllocEvictionAdvisorAnalysisLegacy::AdvisorMode::Development, "development", "for training")))
This file implements a set that has insertion order iteration characteristics.
This file defines the SmallPtrSet class.
This file defines the SmallSet class.
This file defines the SmallVector class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
This file describes how to lower LLVM code to machine code.
A manager for alias analyses.
A wrapper pass to provide the legacy pass manager access to a suitably prepared AAResults object.
static void updateLRRestored(MachineFunction &MF)
Update the IsRestored flag on LR if it is spilled, based on the return instructions.
ARMFunctionInfo - This class is derived from MachineFunctionInfo and contains private ARM-specific in...
bool isThumb2Function() const
bool isThumbFunction() const
bool shouldSignReturnAddress() const
PreservedAnalyses run(MachineFunction &MF, MachineFunctionAnalysisManager &MFAM)
PreservedAnalyses run(MachineFunction &MF, MachineFunctionAnalysisManager &MFAM)
const ARMBaseInstrInfo * getInstrInfo() const override
const ARMTargetLowering * getTargetLowering() const override
const ARMBaseRegisterInfo * getRegisterInfo() const override
Align getDualLoadStoreAlignment() const
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
Represent the analysis usage information of a pass.
AnalysisUsage & addRequired()
AnalysisUsage & addPreserved()
Add the specified Pass class to the set of analyses preserved by this pass.
Represent a constant reference to an array (0 or more elements consecutively in memory),...
Represents analyses that only rely on functions' control flow.
A parsed version of the target data layout string in and methods for querying it.
Identifies a unique instance of a variable.
iterator find(const_arg_type_t< KeyT > Val)
bool erase(const KeyT &Val)
FunctionPass class - This class is used to implement most global optimizations.
A set of register units used to track register liveness.
bool available(MCRegister Reg) const
Returns true if no part of physical register Reg is live.
void init(const TargetRegisterInfo &TRI)
Initialize and clear the set.
void addReg(MCRegister Reg)
Adds register units covered by physical register Reg.
LLVM_ABI void stepBackward(const MachineInstr &MI)
Updates liveness when stepping backwards over the instruction MI.
LLVM_ABI void addLiveOuts(const MachineBasicBlock &MBB)
Adds registers living out of block MBB.
Describe properties that are true of each instruction in the target description file.
MachineInstrBundleIterator< const MachineInstr > const_iterator
LLVM_ABI instr_iterator insert(instr_iterator I, MachineInstr *M)
Insert MI into the instruction list before I, possibly inside a bundle.
LLVM_ABI LivenessQueryResult computeRegisterLiveness(const TargetRegisterInfo *TRI, MCRegister Reg, const_iterator Before, unsigned Neighborhood=10) const
Return whether (physical) register Reg has been defined and not killed as of just before Before.
LLVM_ABI iterator getFirstTerminator()
Returns an iterator to the first terminator instruction of this basic block.
LLVM_ABI iterator getLastNonDebugInstr(bool SkipPseudoOp=true)
Returns an iterator to the last non-debug instruction in the basic block, or end().
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
LLVM_ABI instr_iterator erase(instr_iterator I)
Remove an instruction from the instruction list and delete it.
void splice(iterator Where, MachineBasicBlock *Other, iterator From)
Take an instruction from MBB 'Other' at the position From, and insert it into this MBB right before '...
MachineInstrBundleIterator< MachineInstr > iterator
@ LQR_Dead
Register is known to be fully dead.
Analysis pass which computes a MachineDominatorTree.
Analysis pass which computes a MachineDominatorTree.
DominatorTree Class - Concrete subclass of DominatorTreeBase that is used to compute a normal dominat...
bool dominates(const MachineInstr *A, const MachineInstr *B) const
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
Properties which a MachineFunction may have at a given point in time.
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
Function & getFunction()
Return the LLVM function that this machine code represents.
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
const MachineInstrBuilder & cloneMergedMemRefs(ArrayRef< const MachineInstr * > OtherMIs) const
const MachineInstrBuilder & setMemRefs(ArrayRef< MachineMemOperand * > MMOs) const
const MachineInstrBuilder & addReg(Register RegNo, RegState Flags={}, unsigned SubReg=0) const
Add a new virtual register operand.
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
const MachineInstrBuilder & add(const MachineOperand &MO) const
const MachineInstrBuilder & cloneMemRefs(const MachineInstr &OtherMI) const
const MachineInstrBuilder & copyImplicitOps(const MachineInstr &OtherMI) const
Copy all the implicit operands from OtherMI onto this one.
MachineInstr * getInstr() const
If conversion operators fail, use this method to get the MachineInstr explicitly.
Representation of each machine instruction.
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
const MachineBasicBlock * getParent() const
unsigned getNumOperands() const
Retuns the total number of operands.
LLVM_ABI void copyImplicitOps(MachineFunction &MF, const MachineInstr &MI)
Copy implicit register operands from specified instruction to this instruction.
bool killsRegister(Register Reg, const TargetRegisterInfo *TRI) const
Return true if the MachineInstr kills the specified register.
LLVM_ABI void setDesc(const MCInstrDesc &TID)
Replace the instruction descriptor (thus opcode) of the current instruction with a new one.
bool hasOneMemOperand() const
Return true if this instruction has exactly one MachineMemOperand.
mmo_iterator memoperands_begin() const
Access to memory operands of the instruction.
const DebugLoc & getDebugLoc() const
Returns the debug location id of this MachineInstr.
LLVM_ABI void dump() const
const MachineOperand & getOperand(unsigned i) const
LLVM_ABI MachineInstrBundleIterator< MachineInstr > eraseFromParent()
Unlink 'this' from the containing basic block and delete it.
A description of a memory reference used in the backend.
bool isAtomic() const
Returns true if this operation has an atomic ordering requirement of unordered or higher,...
LLVM_ABI Align getAlign() const
Return the minimum known alignment in bytes of the actual memory reference.
MachineOperand class - Representation of each machine instruction operand.
void setImm(int64_t immVal)
bool readsReg() const
readsReg - Returns true if this operand reads the previous value of its register.
bool isReg() const
isReg - Tests if this is a MO_Register operand.
LLVM_ABI void setReg(Register Reg)
Change the register this operand corresponds to.
void setIsKill(bool Val=true)
void setIsUndef(bool Val=true)
Register getReg() const
getReg - Returns the register number.
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
iterator_range< use_nodbg_iterator > use_nodbg_operands(Register Reg) const
bool isReserved(MCRegister PhysReg) const
isReserved - Returns true when PhysReg is a reserved register.
iterator_range< use_instr_nodbg_iterator > use_nodbg_instructions(Register Reg) const
void setRegAllocationHint(Register VReg, unsigned Type, Register PrefReg)
setRegAllocationHint - Specify a register allocation hint for the specified virtual register.
LLVM_ABI const TargetRegisterClass * constrainRegClass(Register Reg, const TargetRegisterClass *RC, unsigned MinNumRegs=0)
constrainRegClass - Constrain the register class of the specified virtual register to be a common sub...
A set of analyses that are preserved following a run of a transformation pass.
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
PreservedAnalyses & preserveSet()
Mark an analysis set as preserved.
LLVM_ABI void runOnMachineFunction(const MachineFunction &MF, bool Rev=false)
runOnFunction - Prepare to answer questions about MF.
ArrayRef< MCPhysReg > getOrder(const TargetRegisterClass *RC) const
getOrder - Returns the preferred allocation order for RC.
Wrapper class representing virtual and physical registers.
bool insert(const value_type &X)
Insert a new element into the SetVector.
A templated base class for SmallPtrSet which provides the typesafe interface that is common across al...
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
SmallSet - This maintains a set of unique values, optimizing for the case when the set is small (less...
size_type count(const T &V) const
count - Return 1 if the element is in the set, 0 otherwise.
std::pair< const_iterator, bool > insert(const T &V)
insert - Insert an element into the set if it isn't already there.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
A BumpPtrAllocator that allows only elements of a specific type to be allocated.
Represent a constant reference to a string, i.e.
Align getTransientStackAlign() const
getTransientStackAlignment - This method returns the number of bytes to which the stack pointer must ...
TargetInstrInfo - Interface to description of machine instruction set.
virtual bool isLegalAddImmediate(int64_t) const
Return true if the specified immediate is legal add immediate, that is the target has add instruction...
This class defines information used to lower LLVM code to legal SelectionDAG operators that the targe...
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
TargetSubtargetInfo - Generic base class for all target subtargets.
virtual const TargetFrameLowering * getFrameLowering() const
LLVM Value Representation.
std::pair< iterator, bool > insert(const ValueT &V)
size_type count(const_arg_type_t< ValueT > V) const
Return 1 if the specified key is in the set, 0 otherwise.
This provides a very simple, boring adaptor for a begin and end iterator into a range type.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
Abstract Attribute helper functions.
constexpr char Align[]
Key for Kernel::Arg::Metadata::mAlign.
unsigned char getAM3Offset(unsigned AM3Opc)
unsigned getAM2Opc(AddrOpc Opc, unsigned Imm12, ShiftOpc SO, unsigned IdxMode=0)
AddrOpc getAM5Op(unsigned AM5Opc)
unsigned getAM3Opc(AddrOpc Opc, unsigned char Offset, unsigned IdxMode=0)
getAM3Opc - This function encodes the addrmode3 opc field.
unsigned char getAM5Offset(unsigned AM5Opc)
AddrOpc getAM3Op(unsigned AM3Opc)
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
@ C
The default llvm calling convention, compatible with C.
@ CE
Windows NT (Windows on ARM)
This namespace contains all of the command line option processing machinery.
initializer< Ty > init(const Ty &Val)
NodeAddr< InstrNode * > Instr
NodeAddr< UseNode * > Use
BaseReg
Stack frame base register. Bit 0 of FREInfo.Info.
This is an optimization pass for GlobalISel generic memory operations.
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
@ Define
Register definition.
constexpr RegState getKillRegState(bool B)
static bool isARMLowRegister(MCRegister Reg)
isARMLowRegister - Returns true if the register is a low register (r0-r7).
APFloat abs(APFloat X)
Returns the absolute value of the argument.
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
bool isLegalAddressImm(unsigned Opcode, int Imm, const TargetInstrInfo *TII)
AnalysisManager< MachineFunction > MachineFunctionAnalysisManager
constexpr RegState getDeadRegState(bool B)
static std::array< MachineOperand, 2 > predOps(ARMCC::CondCodes Pred, unsigned PredReg=0)
Get the operands corresponding to the given Pred value.
unsigned M1(unsigned Val)
LLVM_ABI PreservedAnalyses getMachineFunctionPassPreservedAnalyses()
Returns the minimum set of Analyses that all machine function passes must preserve.
auto reverse(ContainerTy &&C)
void sort(IteratorTy Start, IteratorTy End)
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
FunctionAddr VTableAddr Count
constexpr RegState getDefRegState(bool B)
FunctionPass * createARMLoadStoreOptLegacyPass(bool PreAlloc=false)
Returns an instance of the load / store optimization pass.
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
@ First
Helpers to iterate all locations in the MemoryEffectsBase class.
void replace(R &&Range, const T &OldValue, const T &NewValue)
Provide wrappers to std::replace which take ranges instead of having to pass begin/end explicitly.
ARMCC::CondCodes getInstrPredicate(const MachineInstr &MI, Register &PredReg)
getInstrPredicate - If instruction is predicated, returns its predicate condition,...
DWARFExpression::Operation Op
unsigned M0(unsigned Val)
ArrayRef(const T &OneElt) -> ArrayRef< T >
static MachineOperand t1CondCodeOp(bool isDead=false)
Get the operand corresponding to the conditional code result for Thumb1.
void erase_if(Container &C, UnaryPredicate P)
Provide a container algorithm similar to C++ Library Fundamentals v2's erase_if which is equivalent t...
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
static MachineOperand condCodeOp(unsigned CCReg=0)
Get the operand corresponding to the conditional code result.
@ Increment
Incrementally increasing token ID.
int getAddSubImmediate(MachineInstr &MI)
AAResults AliasAnalysis
Temporary typedef for legacy code that uses a generic AliasAnalysis pointer or reference.
constexpr RegState getUndefRegState(bool B)
This struct is a compact representation of a valid (non-zero power of two) alignment.