72#define DEBUG_TYPE "arm-ldst-opt"
74STATISTIC(NumLDMGened ,
"Number of ldm instructions generated");
75STATISTIC(NumSTMGened ,
"Number of stm instructions generated");
76STATISTIC(NumVLDMGened,
"Number of vldm instructions generated");
77STATISTIC(NumVSTMGened,
"Number of vstm instructions generated");
78STATISTIC(NumLdStMoved,
"Number of load / store instructions moved");
79STATISTIC(NumLDRDFormed,
"Number of ldrd created before allocation");
80STATISTIC(NumSTRDFormed,
"Number of strd created before allocation");
81STATISTIC(NumLDRD2LDM,
"Number of ldrd instructions turned back into ldm");
82STATISTIC(NumSTRD2STM,
"Number of strd instructions turned back into stm");
83STATISTIC(NumLDRD2LDR,
"Number of ldrd instructions turned back into ldr's");
84STATISTIC(NumSTRD2STR,
"Number of strd instructions turned back into str's");
95#define ARM_LOAD_STORE_OPT_NAME "ARM load / store optimization pass"
114 bool RegClassInfoValid;
115 bool isThumb1, isThumb2;
130 struct MemOpQueueEntry {
142 struct MergeCandidate {
147 unsigned LatestMIIdx;
150 unsigned EarliestMIIdx;
157 bool CanMergeToLSMulti;
160 bool CanMergeToLSDouble;
171 unsigned Base,
unsigned WordOffset,
178 ArrayRef<std::pair<unsigned, bool>> Regs,
185 ArrayRef<std::pair<unsigned, bool>> Regs,
187 void FormCandidates(
const MemOpQueue &MemOps);
188 MachineInstr *MergeOpsUpdate(
const MergeCandidate &Cand);
201char ARMLoadStoreOpt::ID = 0;
207 for (
const auto &MO :
MI.operands()) {
210 if (MO.isDef() && MO.getReg() == ARM::CPSR && !MO.isDead())
220 unsigned Opcode =
MI.getOpcode();
221 bool isAM3 = Opcode == ARM::LDRD || Opcode == ARM::STRD;
222 unsigned NumOperands =
MI.getDesc().getNumOperands();
223 unsigned OffField =
MI.getOperand(NumOperands - 3).getImm();
225 if (Opcode == ARM::t2LDRi12 || Opcode == ARM::t2LDRi8 ||
226 Opcode == ARM::t2STRi12 || Opcode == ARM::t2STRi8 ||
227 Opcode == ARM::t2LDRDi8 || Opcode == ARM::t2STRDi8 ||
228 Opcode == ARM::LDRi12 || Opcode == ARM::STRi12)
232 if (Opcode == ARM::tLDRi || Opcode == ARM::tSTRi ||
233 Opcode == ARM::tLDRspi || Opcode == ARM::tSTRspi)
248 return MI.getOperand(1);
252 return MI.getOperand(0);
349 case ARM::tLDMIA_UPD:
350 case ARM::tSTMIA_UPD:
351 case ARM::t2LDMIA_RET:
353 case ARM::t2LDMIA_UPD:
355 case ARM::t2STMIA_UPD:
357 case ARM::VLDMSIA_UPD:
359 case ARM::VSTMSIA_UPD:
361 case ARM::VLDMDIA_UPD:
363 case ARM::VSTMDIA_UPD:
377 case ARM::t2LDMDB_UPD:
379 case ARM::t2STMDB_UPD:
380 case ARM::VLDMSDB_UPD:
381 case ARM::VSTMSDB_UPD:
382 case ARM::VLDMDDB_UPD:
383 case ARM::VSTMDDB_UPD:
395 return Opc == ARM::tLDRi ||
Opc == ARM::tLDRspi;
399 return Opc == ARM::t2LDRi12 ||
Opc == ARM::t2LDRi8;
407 return Opc == ARM::tSTRi ||
Opc == ARM::tSTRspi;
411 return Opc == ARM::t2STRi12 ||
Opc == ARM::t2STRi8;
440 switch (
MI->getOpcode()) {
467 case ARM::tLDMIA_UPD:
468 case ARM::tSTMIA_UPD:
475 return (
MI->getNumOperands() -
MI->getDesc().getNumOperands() + 1) * 4;
478 return (
MI->getNumOperands() -
MI->getDesc().getNumOperands() + 1) * 8;
490 assert(isThumb1 &&
"Can only update base register uses for Thumb1!");
494 bool InsertSub =
false;
495 unsigned Opc =
MBBI->getOpcode();
497 if (
MBBI->readsRegister(
Base,
nullptr)) {
500 Opc == ARM::tLDRi ||
Opc == ARM::tLDRHi ||
Opc == ARM::tLDRBi;
502 Opc == ARM::tSTRi ||
Opc == ARM::tSTRHi ||
Opc == ARM::tSTRBi;
504 if (IsLoad || IsStore) {
510 MBBI->getOperand(
MBBI->getDesc().getNumOperands() - 3);
517 if (
Offset >= 0 && !(IsStore && InstrSrcReg ==
Base))
521 }
else if ((
Opc == ARM::tSUBi8 ||
Opc == ARM::tADDi8) &&
522 !definesCPSR(*
MBBI)) {
527 MBBI->getOperand(
MBBI->getDesc().getNumOperands() - 3);
529 MO.
getImm() + WordOffset * 4 :
530 MO.
getImm() - WordOffset * 4 ;
544 }
else if (definesCPSR(*
MBBI) ||
MBBI->isCall() ||
MBBI->isBranch()) {
562 if (
MBBI->killsRegister(
Base,
nullptr) ||
563 MBBI->definesRegister(
Base,
nullptr))
585unsigned ARMLoadStoreOpt::findFreeReg(
const TargetRegisterClass &RegClass) {
586 if (!RegClassInfoValid) {
588 RegClassInfoValid =
true;
591 for (
unsigned Reg : RegClassInfo.
getOrder(&RegClass))
600void ARMLoadStoreOpt::moveLiveRegsBefore(
const MachineBasicBlock &
MBB,
603 if (!LiveRegsValid) {
607 LiveRegsValid =
true;
610 while (LiveRegPos != Before) {
618 for (
const std::pair<unsigned, bool> &R : Regs)
627MachineInstr *ARMLoadStoreOpt::CreateLoadStoreMulti(
629 int Offset,
unsigned Base,
bool BaseKill,
unsigned Opcode,
631 ArrayRef<std::pair<unsigned, bool>> Regs,
633 unsigned NumRegs = Regs.size();
638 bool SafeToClobberCPSR = !isThumb1 ||
642 bool Writeback = isThumb1;
648 assert(
Base != ARM::SP &&
"Thumb1 does not allow SP in register list");
649 if (Opcode == ARM::tLDRi)
651 else if (Opcode == ARM::tSTRi)
658 bool haveIBAndDA = isNotVFP && !isThumb2 && !isThumb1;
660 if (
Offset == 4 && haveIBAndDA) {
662 }
else if (
Offset == -4 * (
int)NumRegs + 4 && haveIBAndDA) {
664 }
else if (
Offset == -4 * (
int)NumRegs && isNotVFP && !isThumb1) {
667 }
else if (
Offset != 0 || Opcode == ARM::tLDRspi || Opcode == ARM::tSTRspi) {
680 if (!SafeToClobberCPSR)
687 NewBase = Regs[NumRegs-1].first;
691 moveLiveRegsBefore(
MBB, InsertBefore);
695 for (
const std::pair<unsigned, bool> &R : Regs)
698 NewBase = findFreeReg(isThumb1 ? ARM::tGPRRegClass : ARM::GPRRegClass);
703 int BaseOpc = isThumb2 ? (BaseKill &&
Base == ARM::SP ? ARM::t2ADDspImm
707 : (isThumb1 &&
Offset < 8)
709 : isThumb1 ?
ARM::tADDi8 :
ARM::ADDri;
715 BaseOpc = isThumb2 ? (BaseKill &&
Base == ARM::SP ? ARM::t2SUBspImm
719 : isThumb1 ?
ARM::tSUBi8 :
ARM::SUBri;
728 bool KillOldBase = BaseKill &&
737 if (
Base != NewBase &&
738 (BaseOpc == ARM::tADDi8 || BaseOpc == ARM::tSUBi8)) {
756 if (BaseOpc == ARM::tADDrSPi) {
757 assert(
Offset % 4 == 0 &&
"tADDrSPi offset is scaled by 4");
795 if (isThumb1 && !SafeToClobberCPSR && Writeback && !BaseKill)
798 MachineInstrBuilder MIB;
801 assert(isThumb1 &&
"expected Writeback only inThumb1");
802 if (Opcode == ARM::tLDMIA) {
805 Opcode = ARM::tLDMIA_UPD;
817 UpdateBaseRegUses(
MBB, InsertBefore,
DL,
Base, NumRegs, Pred, PredReg);
826 for (
const std::pair<unsigned, bool> &R : Regs)
834MachineInstr *ARMLoadStoreOpt::CreateLoadStoreDouble(
836 int Offset,
unsigned Base,
bool BaseKill,
unsigned Opcode,
838 ArrayRef<std::pair<unsigned, bool>> Regs,
841 assert((IsLoad ||
isi32Store(Opcode)) &&
"Must have integer load or store");
842 unsigned LoadStoreOpcode = IsLoad ? ARM::t2LDRDi8 : ARM::t2STRDi8;
845 MachineInstrBuilder MIB =
BuildMI(
MBB, InsertBefore,
DL,
846 TII->get(LoadStoreOpcode));
848 MIB.
addReg(Regs[0].first, RegState::Define)
849 .
addReg(Regs[1].first, RegState::Define);
860MachineInstr *ARMLoadStoreOpt::MergeOpsUpdate(
const MergeCandidate &Cand) {
861 const MachineInstr *
First = Cand.Instrs.front();
862 unsigned Opcode =
First->getOpcode();
865 SmallVector<unsigned, 4> ImpDefs;
866 DenseSet<unsigned> KilledRegs;
867 DenseSet<unsigned> UsedRegs;
869 for (
const MachineInstr *
MI : Cand.Instrs) {
872 bool IsKill = MO.
isKill();
882 for (
const MachineOperand &MO :
MI->implicit_operands()) {
891 if (
MI->readsRegister(DefReg,
nullptr))
901 MachineInstr *LatestMI = Cand.Instrs[Cand.LatestMIIdx];
910 MachineInstr *Merged =
nullptr;
911 if (Cand.CanMergeToLSDouble)
912 Merged = CreateLoadStoreDouble(
MBB, InsertBefore,
Offset,
Base, BaseKill,
913 Opcode, Pred, PredReg,
DL, Regs,
915 if (!Merged && Cand.CanMergeToLSMulti)
916 Merged = CreateLoadStoreMulti(
MBB, InsertBefore,
Offset,
Base, BaseKill,
917 Opcode, Pred, PredReg,
DL, Regs, Cand.Instrs);
923 iterator EarliestI(Cand.Instrs[Cand.EarliestMIIdx]);
924 bool EarliestAtBegin =
false;
926 EarliestAtBegin =
true;
928 EarliestI = std::prev(EarliestI);
932 for (MachineInstr *
MI : Cand.Instrs)
939 EarliestI = std::next(EarliestI);
945 for (MachineInstr &
MI : FixupRange) {
946 for (
unsigned &ImpDefReg : ImpDefs) {
947 for (MachineOperand &MO :
MI.implicit_operands()) {
959 for (
unsigned ImpDef : ImpDefs)
960 MIB.
addReg(ImpDef, RegState::ImplicitDefine);
964 for (MachineInstr &
MI : FixupRange) {
965 for (MachineOperand &MO :
MI.uses()) {
991 unsigned Opcode =
MI.getOpcode();
1004void ARMLoadStoreOpt::FormCandidates(
const MemOpQueue &MemOps) {
1005 const MachineInstr *FirstMI = MemOps[0].MI;
1010 unsigned SIndex = 0;
1011 unsigned EIndex = MemOps.size();
1014 const MachineInstr *
MI = MemOps[SIndex].MI;
1015 int Offset = MemOps[SIndex].Offset;
1018 unsigned PRegNum = PMO.
isUndef() ? std::numeric_limits<unsigned>::max()
1019 :
TRI->getEncodingValue(PReg);
1020 unsigned Latest = SIndex;
1021 unsigned Earliest = SIndex;
1023 bool CanMergeToLSDouble =
1029 CanMergeToLSDouble =
false;
1031 bool CanMergeToLSMulti =
true;
1034 if (STI->hasSlowOddRegister() && !isNotVFP && (PRegNum % 2) == 1)
1035 CanMergeToLSMulti =
false;
1039 if (PReg == ARM::SP || PReg == ARM::PC)
1040 CanMergeToLSMulti = CanMergeToLSDouble =
false;
1044 CanMergeToLSMulti = CanMergeToLSDouble =
false;
1059 for (
unsigned I = SIndex+1;
I < EIndex; ++
I, ++
Count) {
1060 int NewOffset = MemOps[
I].Offset;
1065 if (
Reg == ARM::SP ||
Reg == ARM::PC)
1071 unsigned RegNum = MO.
isUndef() ? std::numeric_limits<unsigned>::max()
1072 :
TRI->getEncodingValue(
Reg);
1073 bool PartOfLSMulti = CanMergeToLSMulti;
1074 if (PartOfLSMulti) {
1076 if (RegNum <= PRegNum)
1077 PartOfLSMulti =
false;
1081 else if (!isNotVFP && RegNum != PRegNum+1)
1082 PartOfLSMulti =
false;
1085 bool PartOfLSDouble = CanMergeToLSDouble &&
Count <= 1;
1087 if (!PartOfLSMulti && !PartOfLSDouble)
1089 CanMergeToLSMulti &= PartOfLSMulti;
1090 CanMergeToLSDouble &= PartOfLSDouble;
1093 unsigned Position = MemOps[
I].Position;
1094 if (Position < MemOps[Latest].Position)
1096 else if (Position > MemOps[Earliest].Position)
1104 MergeCandidate *Candidate =
new(
Allocator.Allocate()) MergeCandidate;
1105 for (
unsigned C = SIndex, CE = SIndex +
Count;
C <
CE; ++
C)
1106 Candidate->Instrs.push_back(MemOps[
C].
MI);
1107 Candidate->LatestMIIdx = Latest - SIndex;
1108 Candidate->EarliestMIIdx = Earliest - SIndex;
1109 Candidate->InsertPos = MemOps[Latest].Position;
1111 CanMergeToLSMulti = CanMergeToLSDouble =
false;
1112 Candidate->CanMergeToLSMulti = CanMergeToLSMulti;
1113 Candidate->CanMergeToLSDouble = CanMergeToLSDouble;
1114 Candidates.push_back(Candidate);
1117 }
while (SIndex < EIndex);
1194 switch (
MI.getOpcode()) {
1195 case ARM::tADDi8: Scale = 4; CheckCPSRDef =
true;
break;
1196 case ARM::tSUBi8: Scale = -4; CheckCPSRDef =
true;
break;
1198 case ARM::t2SUBspImm:
1199 case ARM::SUBri: Scale = -1; CheckCPSRDef =
true;
break;
1201 case ARM::t2ADDspImm:
1202 case ARM::ADDri: Scale = 1; CheckCPSRDef =
true;
break;
1203 case ARM::tADDspi: Scale = 4; CheckCPSRDef =
false;
break;
1204 case ARM::tSUBspi: Scale = -4; CheckCPSRDef =
false;
break;
1209 if (
MI.getOperand(0).getReg() !=
Reg ||
1210 MI.getOperand(1).getReg() !=
Reg ||
1212 MIPredReg != PredReg)
1215 if (CheckCPSRDef && definesCPSR(
MI))
1217 return MI.getOperand(2).getImm() * Scale;
1228 if (
MBBI == BeginMBBI)
1233 while (PrevMBBI->isDebugInstr() && PrevMBBI != BeginMBBI)
1237 return Offset == 0 ? EndMBBI : PrevMBBI;
1249 while (NextMBBI != EndMBBI) {
1251 while (NextMBBI != EndMBBI && NextMBBI->isDebugInstr())
1253 if (NextMBBI == EndMBBI)
1267 if (
Reg == ARM::SP || NextMBBI->readsRegister(
Reg,
TRI) ||
1268 NextMBBI->definesRegister(
Reg,
TRI))
1288bool ARMLoadStoreOpt::MergeBaseUpdateLSMultiple(MachineInstr *
MI) {
1290 if (isThumb1)
return false;
1293 const MachineOperand &BaseOP =
MI->getOperand(0);
1295 bool BaseKill = BaseOP.
isKill();
1298 unsigned Opcode =
MI->getOpcode();
1308 MachineBasicBlock &
MBB = *
MI->getParent();
1330 bool HighRegsUsed =
false;
1332 if (MO.
getReg() >= ARM::R8) {
1333 HighRegsUsed =
true;
1343 if (MergeInstr !=
MBB.
end()) {
1370 return ARM::LDR_PRE_IMM;
1372 return ARM::STR_PRE_IMM;
1383 return ARM::t2LDR_PRE;
1386 return ARM::t2STR_PRE;
1395 return ARM::LDR_POST_IMM;
1397 return ARM::STR_POST_IMM;
1408 return ARM::t2LDR_POST;
1410 case ARM::t2LDRBi12:
1411 return ARM::t2LDRB_POST;
1412 case ARM::t2LDRSBi8:
1413 case ARM::t2LDRSBi12:
1414 return ARM::t2LDRSB_POST;
1416 case ARM::t2LDRHi12:
1417 return ARM::t2LDRH_POST;
1418 case ARM::t2LDRSHi8:
1419 case ARM::t2LDRSHi12:
1420 return ARM::t2LDRSH_POST;
1423 return ARM::t2STR_POST;
1425 case ARM::t2STRBi12:
1426 return ARM::t2STRB_POST;
1428 case ARM::t2STRHi12:
1429 return ARM::t2STRH_POST;
1431 case ARM::MVE_VLDRBS16:
1432 return ARM::MVE_VLDRBS16_post;
1433 case ARM::MVE_VLDRBS32:
1434 return ARM::MVE_VLDRBS32_post;
1435 case ARM::MVE_VLDRBU16:
1436 return ARM::MVE_VLDRBU16_post;
1437 case ARM::MVE_VLDRBU32:
1438 return ARM::MVE_VLDRBU32_post;
1439 case ARM::MVE_VLDRHS32:
1440 return ARM::MVE_VLDRHS32_post;
1441 case ARM::MVE_VLDRHU32:
1442 return ARM::MVE_VLDRHU32_post;
1443 case ARM::MVE_VLDRBU8:
1444 return ARM::MVE_VLDRBU8_post;
1445 case ARM::MVE_VLDRHU16:
1446 return ARM::MVE_VLDRHU16_post;
1447 case ARM::MVE_VLDRWU32:
1448 return ARM::MVE_VLDRWU32_post;
1449 case ARM::MVE_VSTRB16:
1450 return ARM::MVE_VSTRB16_post;
1451 case ARM::MVE_VSTRB32:
1452 return ARM::MVE_VSTRB32_post;
1453 case ARM::MVE_VSTRH32:
1454 return ARM::MVE_VSTRH32_post;
1455 case ARM::MVE_VSTRBU8:
1456 return ARM::MVE_VSTRBU8_post;
1457 case ARM::MVE_VSTRHU16:
1458 return ARM::MVE_VSTRHU16_post;
1459 case ARM::MVE_VSTRWU32:
1460 return ARM::MVE_VSTRWU32_post;
1468bool ARMLoadStoreOpt::MergeBaseUpdateLoadStore(MachineInstr *
MI) {
1471 if (isThumb1)
return false;
1476 unsigned Opcode =
MI->getOpcode();
1478 bool isAM5 = (Opcode == ARM::VLDRD || Opcode == ARM::VLDRS ||
1479 Opcode == ARM::VSTRD || Opcode == ARM::VSTRS);
1480 bool isAM2 = (Opcode == ARM::LDRi12 || Opcode == ARM::STRi12);
1482 if (
MI->getOperand(2).getImm() != 0)
1489 if (
MI->getOperand(0).getReg() ==
Base)
1495 MachineBasicBlock &
MBB = *
MI->getParent();
1501 if (!isAM5 &&
Offset == Bytes) {
1503 }
else if (
Offset == -Bytes) {
1507 if (MergeInstr ==
MBB.
end())
1511 if ((isAM5 &&
Offset != Bytes) ||
1529 MachineOperand &MO =
MI->getOperand(0);
1543 if (NewOpc == ARM::LDR_PRE_IMM || NewOpc == ARM::LDRB_PRE_IMM) {
1580 MachineOperand &MO =
MI->getOperand(0);
1584 if (isAM2 && NewOpc == ARM::STR_POST_IMM) {
1613bool ARMLoadStoreOpt::MergeBaseUpdateLSDouble(MachineInstr &
MI)
const {
1614 unsigned Opcode =
MI.getOpcode();
1615 assert((Opcode == ARM::t2LDRDi8 || Opcode == ARM::t2STRDi8) &&
1616 "Must have t2STRDi8 or t2LDRDi8");
1617 if (
MI.getOperand(3).getImm() != 0)
1623 const MachineOperand &BaseOp =
MI.getOperand(2);
1625 const MachineOperand &Reg0Op =
MI.getOperand(0);
1626 const MachineOperand &Reg1Op =
MI.getOperand(1);
1633 MachineBasicBlock &
MBB = *
MI.getParent();
1639 NewOpc = Opcode == ARM::t2LDRDi8 ? ARM::t2LDRD_PRE : ARM::t2STRD_PRE;
1642 if (MergeInstr ==
MBB.
end())
1644 NewOpc = Opcode == ARM::t2LDRDi8 ? ARM::t2LDRD_POST : ARM::t2STRD_POST;
1653 if (NewOpc == ARM::t2LDRD_PRE || NewOpc == ARM::t2LDRD_POST) {
1656 assert(NewOpc == ARM::t2STRD_PRE || NewOpc == ARM::t2STRD_POST);
1661 assert(
TII->get(Opcode).getNumOperands() == 6 &&
1662 TII->get(NewOpc).getNumOperands() == 7 &&
1663 "Unexpected number of operands in Opcode specification.");
1666 for (
const MachineOperand &MO :
MI.implicit_operands())
1678 unsigned Opcode =
MI.getOpcode();
1698 if (!
MI.getOperand(1).isReg())
1703 if (!
MI.hasOneMemOperand())
1722 if (
MI.getOperand(0).isReg() &&
MI.getOperand(0).isUndef())
1726 if (
MI.getOperand(1).isUndef())
1734 bool isDef,
unsigned NewOpc,
unsigned Reg,
1735 bool RegDeadKill,
bool RegUndef,
unsigned BaseReg,
1760bool ARMLoadStoreOpt::FixInvalidRegPairOp(MachineBasicBlock &
MBB,
1762 MachineInstr *
MI = &*
MBBI;
1763 unsigned Opcode =
MI->getOpcode();
1766 if (Opcode != ARM::LDRD && Opcode != ARM::STRD && Opcode != ARM::t2LDRDi8)
1769 const MachineOperand &BaseOp =
MI->getOperand(2);
1771 Register EvenReg =
MI->getOperand(0).getReg();
1772 Register OddReg =
MI->getOperand(1).getReg();
1773 unsigned EvenRegNum =
TRI->getDwarfRegNum(EvenReg,
false);
1774 unsigned OddRegNum =
TRI->getDwarfRegNum(OddReg,
false);
1778 bool Errata602117 = EvenReg ==
BaseReg &&
1779 (Opcode == ARM::LDRD || Opcode == ARM::t2LDRDi8) && STI->
isCortexM3();
1781 bool NonConsecutiveRegs = (Opcode == ARM::LDRD || Opcode == ARM::STRD) &&
1782 (EvenRegNum % 2 != 0 || EvenRegNum + 1 != OddRegNum);
1784 if (!Errata602117 && !NonConsecutiveRegs)
1787 bool isT2 = Opcode == ARM::t2LDRDi8 || Opcode == ARM::t2STRDi8;
1788 bool isLd = Opcode == ARM::LDRD || Opcode == ARM::t2LDRDi8;
1789 bool EvenDeadKill = isLd ?
1790 MI->getOperand(0).isDead() :
MI->getOperand(0).isKill();
1791 bool EvenUndef =
MI->getOperand(0).isUndef();
1792 bool OddDeadKill = isLd ?
1793 MI->getOperand(1).isDead() :
MI->getOperand(1).isKill();
1794 bool OddUndef =
MI->getOperand(1).isUndef();
1795 bool BaseKill = BaseOp.
isKill();
1796 bool BaseUndef = BaseOp.
isUndef();
1797 assert((isT2 ||
MI->getOperand(3).getReg() == ARM::NoRegister) &&
1798 "register offset not handled below");
1803 if (OddRegNum > EvenRegNum && OffImm == 0) {
1806 unsigned NewOpc = (isLd)
1807 ? (isT2 ? ARM::t2LDMIA : ARM::LDMIA)
1808 : (isT2 ? ARM::t2STMIA : ARM::STMIA);
1830 unsigned NewOpc = (isLd)
1831 ? (isT2 ? (OffImm < 0 ? ARM::t2LDRi8 : ARM::t2LDRi12) : ARM::LDRi12)
1832 : (isT2 ? (OffImm < 0 ? ARM::t2STRi8 : ARM::t2STRi12) : ARM::STRi12);
1835 unsigned NewOpc2 = (isLd)
1836 ? (isT2 ? (OffImm+4 < 0 ? ARM::t2LDRi8 : ARM::t2LDRi12) : ARM::LDRi12)
1837 : (isT2 ? (OffImm+4 < 0 ? ARM::t2STRi8 : ARM::t2STRi12) : ARM::STRi12);
1840 if (isLd &&
TRI->regsOverlap(EvenReg, BaseReg)) {
1841 assert(!
TRI->regsOverlap(OddReg, BaseReg));
1843 false, BaseReg,
false, BaseUndef, Pred, PredReg,
TII,
MI);
1845 false, BaseReg, BaseKill, BaseUndef, Pred, PredReg,
TII,
1848 if (OddReg == EvenReg && EvenDeadKill) {
1852 EvenDeadKill =
false;
1856 if (EvenReg == BaseReg)
1857 EvenDeadKill =
false;
1859 EvenUndef, BaseReg,
false, BaseUndef, Pred, PredReg,
TII,
1862 OddUndef, BaseReg, BaseKill, BaseUndef, Pred, PredReg,
TII,
1877bool ARMLoadStoreOpt::LoadStoreMultipleOpti(MachineBasicBlock &
MBB) {
1879 unsigned CurrBase = 0;
1880 unsigned CurrOpc = ~0
u;
1882 unsigned Position = 0;
1883 assert(Candidates.size() == 0);
1885 LiveRegsValid =
false;
1890 MBBI = std::prev(
I);
1891 if (FixInvalidRegPairOp(
MBB,
MBBI))
1896 unsigned Opcode =
MBBI->getOpcode();
1897 const MachineOperand &MO =
MBBI->getOperand(0);
1903 if (CurrBase == 0) {
1908 MemOps.push_back(MemOpQueueEntry(*
MBBI,
Offset, Position));
1912 if (CurrOpc == Opcode && CurrBase ==
Base && CurrPred == Pred) {
1920 bool Overlap =
false;
1924 for (
const MemOpQueueEntry &
E : MemOps) {
1925 if (
TRI->regsOverlap(
Reg,
E.MI->getOperand(0).getReg())) {
1935 if (
Offset > MemOps.back().Offset) {
1936 MemOps.push_back(MemOpQueueEntry(*
MBBI,
Offset, Position));
1939 MemOpQueue::iterator
MI, ME;
1940 for (
MI = MemOps.begin(), ME = MemOps.end();
MI != ME; ++
MI) {
1951 if (
MI != MemOps.end()) {
1952 MemOps.insert(
MI, MemOpQueueEntry(*
MBBI,
Offset, Position));
1963 }
else if (
MBBI->isDebugInstr()) {
1965 }
else if (
MBBI->getOpcode() == ARM::t2LDRDi8 ||
1966 MBBI->getOpcode() == ARM::t2STRDi8) {
1973 if (MemOps.size() > 0) {
1974 FormCandidates(MemOps);
1982 if (MemOps.size() > 0)
1983 FormCandidates(MemOps);
1987 auto LessThan = [](
const MergeCandidate*
M0,
const MergeCandidate *
M1) {
1988 return M0->InsertPos <
M1->InsertPos;
1994 for (
const MergeCandidate *Candidate : Candidates) {
1995 if (Candidate->CanMergeToLSMulti || Candidate->CanMergeToLSDouble) {
1996 MachineInstr *Merged = MergeOpsUpdate(*Candidate);
2001 if (Opcode == ARM::t2STRDi8 || Opcode == ARM::t2LDRDi8)
2002 MergeBaseUpdateLSDouble(*Merged);
2004 MergeBaseUpdateLSMultiple(Merged);
2006 for (MachineInstr *
MI : Candidate->Instrs) {
2007 if (MergeBaseUpdateLoadStore(
MI))
2012 assert(Candidate->Instrs.size() == 1);
2013 if (MergeBaseUpdateLoadStore(Candidate->Instrs.front()))
2019 for (MachineInstr *
MI : MergeBaseCandidates)
2020 MergeBaseUpdateLSDouble(*
MI);
2021 MergeBaseCandidates.clear();
2036bool ARMLoadStoreOpt::MergeReturnIntoLDM(MachineBasicBlock &
MBB) {
2038 if (isThumb1)
return false;
2043 (
MBBI->getOpcode() == ARM::BX_RET ||
2044 MBBI->getOpcode() == ARM::tBX_RET ||
2045 MBBI->getOpcode() == ARM::MOVPCLR)) {
2048 while (PrevI->isDebugInstr() && PrevI !=
MBB.
begin())
2050 MachineInstr &PrevMI = *PrevI;
2052 if (Opcode == ARM::LDMIA_UPD || Opcode == ARM::LDMDA_UPD ||
2053 Opcode == ARM::LDMDB_UPD || Opcode == ARM::LDMIB_UPD ||
2054 Opcode == ARM::t2LDMIA_UPD || Opcode == ARM::t2LDMDB_UPD) {
2056 if (MO.
getReg() != ARM::LR)
2058 unsigned NewOpc = (isThumb2 ? ARM::t2LDMIA_RET : ARM::LDMIA_RET);
2059 assert(((isThumb2 && Opcode == ARM::t2LDMIA_UPD) ||
2060 Opcode == ARM::LDMIA_UPD) &&
"Unsupported multiple load-return!");
2071bool ARMLoadStoreOpt::CombineMovBx(MachineBasicBlock &
MBB) {
2074 MBBI->getOpcode() != ARM::tBX_RET)
2079 if (Prev->getOpcode() != ARM::tMOVr ||
2080 !Prev->definesRegister(ARM::LR,
nullptr))
2083 for (
auto Use : Prev->uses())
2085 assert(STI->hasV4TOps());
2098bool ARMLoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) {
2105 AFI = Fn.
getInfo<ARMFunctionInfo>();
2109 RegClassInfoValid =
false;
2113 bool Modified =
false, ModifiedLDMReturn =
false;
2114 for (MachineBasicBlock &
MBB : Fn) {
2117 ModifiedLDMReturn |= MergeReturnIntoLDM(
MBB);
2127 if (ModifiedLDMReturn)
2134#define ARM_PREALLOC_LOAD_STORE_OPT_NAME \
2135 "ARM pre- register allocation load / store optimization pass"
2157 StringRef getPassName()
const override {
2178 bool DistributeIncrements();
2184char ARMPreAllocLoadStoreOpt::ID = 0;
2195 cl::init(8),
cl::Hidden);
2197bool ARMPreAllocLoadStoreOpt::runOnMachineFunction(
MachineFunction &Fn) {
2201 TD = &Fn.getDataLayout();
2203 TII = STI->getInstrInfo();
2204 TRI = STI->getRegisterInfo();
2205 MRI = &Fn.getRegInfo();
2206 DT = &getAnalysis<MachineDominatorTreeWrapperPass>().getDomTree();
2208 AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
2210 bool Modified = DistributeIncrements();
2212 Modified |= RescheduleLoadStoreInstrs(&MFI);
2227 if (
I->isDebugInstr() || MemOps.
count(&*
I))
2229 if (
I->isCall() ||
I->isTerminator() ||
I->hasUnmodeledSideEffects())
2231 if (
I->mayStore() || (!isLd &&
I->mayLoad()))
2233 if (
I->mayAlias(
AA, *
MemOp,
false))
2235 for (
unsigned j = 0,
NumOps =
I->getNumOperands(); j !=
NumOps; ++j) {
2248 if (MemRegs.
size() <= 4)
2251 return AddedRegPressure.
size() <= MemRegs.
size() * 2;
2254bool ARMPreAllocLoadStoreOpt::CanFormLdStDWord(
2255 MachineInstr *Op0, MachineInstr *Op1,
DebugLoc &dl,
unsigned &NewOpc,
2259 if (!STI->hasV5TEOps())
2265 if (Opcode == ARM::LDRi12) {
2267 }
else if (Opcode == ARM::STRi12) {
2269 }
else if (Opcode == ARM::t2LDRi8 || Opcode == ARM::t2LDRi12) {
2270 NewOpc = ARM::t2LDRDi8;
2273 }
else if (Opcode == ARM::t2STRi8 || Opcode == ARM::t2STRi12) {
2274 NewOpc = ARM::t2STRDi8;
2291 if (Alignment < ReqAlign)
2297 int Limit = (1 << 8) * Scale;
2298 if (OffImm >= Limit || (OffImm <= -Limit) || (OffImm & (Scale-1)))
2307 int Limit = (1 << 8) * Scale;
2308 if (OffImm >= Limit || (OffImm & (Scale-1)))
2314 if (FirstReg == SecondReg)
2322bool ARMPreAllocLoadStoreOpt::RescheduleOps(
2323 MachineBasicBlock *
MBB, SmallVectorImpl<MachineInstr *> &
Ops,
unsigned Base,
2324 bool isLd, DenseMap<MachineInstr *, unsigned> &MI2LocMap,
2326 bool RetVal =
false;
2333 return LOffset > ROffset;
2340 while (
Ops.size() > 1) {
2341 unsigned FirstLoc = ~0
U;
2342 unsigned LastLoc = 0;
2343 MachineInstr *FirstOp =
nullptr;
2344 MachineInstr *LastOp =
nullptr;
2346 unsigned LastOpcode = 0;
2347 unsigned LastBytes = 0;
2348 unsigned NumMove = 0;
2353 if (LastOpcode && LSMOpcode != LastOpcode)
2360 if (Bytes != LastBytes ||
Offset != (LastOffset + (
int)Bytes))
2372 LastOpcode = LSMOpcode;
2374 unsigned Loc = MI2LocMap[
Op];
2375 if (Loc <= FirstLoc) {
2379 if (Loc >= LastLoc) {
2388 SmallPtrSet<MachineInstr*, 4> MemOps;
2389 SmallSet<unsigned, 4> MemRegs;
2390 for (
size_t i =
Ops.size() - NumMove, e =
Ops.size(); i != e; ++i) {
2397 bool DoMove = (LastLoc - FirstLoc) <= NumMove*4;
2400 MemOps, MemRegs,
TRI, AA);
2402 for (
unsigned i = 0; i != NumMove; ++i)
2407 while (InsertPos !=
MBB->
end() &&
2408 (MemOps.
count(&*InsertPos) || InsertPos->isDebugInstr()))
2413 MachineInstr *Op0 =
Ops.back();
2414 MachineInstr *Op1 =
Ops[
Ops.size()-2];
2419 unsigned NewOpc = 0;
2422 if (NumMove == 2 && CanFormLdStDWord(Op0, Op1, dl, NewOpc,
2423 FirstReg, SecondReg, BaseReg,
2424 Offset, PredReg, Pred, isT2)) {
2428 const MCInstrDesc &MCID =
TII->get(NewOpc);
2429 const TargetRegisterClass *TRC =
TII->getRegClass(MCID, 0);
2430 MRI->constrainRegClass(FirstReg, TRC);
2431 MRI->constrainRegClass(SecondReg, TRC);
2435 MachineInstrBuilder MIB =
BuildMI(*
MBB, InsertPos, dl, MCID)
2436 .
addReg(FirstReg, RegState::Define)
2437 .
addReg(SecondReg, RegState::Define)
2449 MachineInstrBuilder MIB =
BuildMI(*
MBB, InsertPos, dl, MCID)
2472 for (
unsigned i = 0; i != NumMove; ++i) {
2473 MachineInstr *
Op =
Ops.pop_back_val();
2484 NumLdStMoved += NumMove;
2495 if (
MI->isNonListDebugValue()) {
2496 auto &
Op =
MI->getOperand(0);
2500 for (
unsigned I = 2;
I <
MI->getNumOperands();
I++) {
2501 auto &
Op =
MI->getOperand(
I);
2515 auto RegIt = RegisterMap.find(
Op.getReg());
2516 if (RegIt == RegisterMap.end())
2518 auto &InstrVec = RegIt->getSecond();
2525 MI->getDebugLoc()->getInlinedAt());
2530ARMPreAllocLoadStoreOpt::RescheduleLoadStoreInstrs(MachineBasicBlock *
MBB) {
2531 bool RetVal =
false;
2533 DenseMap<MachineInstr *, unsigned> MI2LocMap;
2534 using Base2InstMap = DenseMap<unsigned, SmallVector<MachineInstr *, 4>>;
2535 using BaseVec = SmallVector<unsigned, 4>;
2536 Base2InstMap Base2LdsMap;
2537 Base2InstMap Base2StsMap;
2543 SmallDenseMap<Register, SmallVector<MachineInstr *>, 8> RegisterMap;
2550 MachineInstr &
MI = *
MBBI;
2551 if (
MI.isCall() ||
MI.isTerminator()) {
2557 if (!
MI.isDebugInstr())
2558 MI2LocMap[&
MI] = ++Loc;
2566 int Opc =
MI.getOpcode();
2570 bool StopHere =
false;
2571 auto FindBases = [&](Base2InstMap &Base2Ops, BaseVec &Bases) {
2574 BI->second.push_back(&
MI);
2575 Bases.push_back(
Base);
2578 for (
const MachineInstr *
MI : BI->second) {
2585 BI->second.push_back(&
MI);
2589 FindBases(Base2LdsMap, LdBases);
2591 FindBases(Base2StsMap, StBases);
2602 for (
unsigned Base : LdBases) {
2603 SmallVectorImpl<MachineInstr *> &Lds = Base2LdsMap[
Base];
2605 RetVal |= RescheduleOps(
MBB, Lds,
Base,
true, MI2LocMap, RegisterMap);
2609 for (
unsigned Base : StBases) {
2610 SmallVectorImpl<MachineInstr *> &Sts = Base2StsMap[
Base];
2612 RetVal |= RescheduleOps(
MBB, Sts,
Base,
false, MI2LocMap, RegisterMap);
2616 Base2LdsMap.clear();
2617 Base2StsMap.clear();
2773 SmallDenseMap<DebugVariable, MachineInstr *, 8> DbgValueSinkCandidates;
2776 SmallDenseMap<MachineInstr *, SmallVector<Register>, 8> InstrMap;
2778 MachineInstr &
MI = *
MBBI;
2780 auto PopulateRegisterAndInstrMapForDebugInstr = [&](
Register Reg) {
2781 auto RegIt = RegisterMap.
find(
Reg);
2782 if (RegIt == RegisterMap.
end())
2784 auto &InstrVec = RegIt->getSecond();
2785 InstrVec.push_back(&
MI);
2786 InstrMap[&
MI].push_back(
Reg);
2789 if (
MI.isDebugValue()) {
2791 "DBG_VALUE or DBG_VALUE_LIST must contain a DILocalVariable");
2799 PopulateRegisterAndInstrMapForDebugInstr(
Op.getReg());
2807 auto InstrIt = DbgValueSinkCandidates.
find(DbgVar);
2808 if (InstrIt != DbgValueSinkCandidates.
end()) {
2809 auto *
Instr = InstrIt->getSecond();
2810 auto RegIt = InstrMap.
find(Instr);
2811 if (RegIt != InstrMap.
end()) {
2812 const auto &RegVec = RegIt->getSecond();
2815 for (
auto &
Reg : RegVec) {
2816 auto RegIt = RegisterMap.
find(
Reg);
2817 if (RegIt == RegisterMap.
end())
2819 auto &InstrVec = RegIt->getSecond();
2820 auto IsDbgVar = [&](MachineInstr *
I) ->
bool {
2822 return Var == DbgVar;
2828 [&](MachineOperand &
Op) {
Op.setReg(0); });
2831 DbgValueSinkCandidates[DbgVar] = &
MI;
2835 auto Opc =
MI.getOpcode();
2838 auto Reg =
MI.getOperand(0).getReg();
2839 auto RegIt = RegisterMap.
find(
Reg);
2840 if (RegIt == RegisterMap.
end())
2842 auto &DbgInstrVec = RegIt->getSecond();
2843 if (!DbgInstrVec.size())
2845 for (
auto *DbgInstr : DbgInstrVec) {
2847 auto *ClonedMI =
MI.getMF()->CloneMachineInstr(DbgInstr);
2856 DbgValueSinkCandidates.
erase(DbgVar);
2859 [&](MachineOperand &
Op) {
Op.setReg(0); });
2862 if (DbgInstr->isDebugValueList())
2876 switch (
MI.getOpcode()) {
2877 case ARM::MVE_VLDRBS16:
2878 case ARM::MVE_VLDRBS32:
2879 case ARM::MVE_VLDRBU16:
2880 case ARM::MVE_VLDRBU32:
2881 case ARM::MVE_VLDRHS32:
2882 case ARM::MVE_VLDRHU32:
2883 case ARM::MVE_VLDRBU8:
2884 case ARM::MVE_VLDRHU16:
2885 case ARM::MVE_VLDRWU32:
2886 case ARM::MVE_VSTRB16:
2887 case ARM::MVE_VSTRB32:
2888 case ARM::MVE_VSTRH32:
2889 case ARM::MVE_VSTRBU8:
2890 case ARM::MVE_VSTRHU16:
2891 case ARM::MVE_VSTRWU32:
2893 case ARM::t2LDRHi12:
2894 case ARM::t2LDRSHi8:
2895 case ARM::t2LDRSHi12:
2897 case ARM::t2LDRBi12:
2898 case ARM::t2LDRSBi8:
2899 case ARM::t2LDRSBi12:
2901 case ARM::t2STRBi12:
2903 case ARM::t2STRHi12:
2905 case ARM::MVE_VLDRBS16_post:
2906 case ARM::MVE_VLDRBS32_post:
2907 case ARM::MVE_VLDRBU16_post:
2908 case ARM::MVE_VLDRBU32_post:
2909 case ARM::MVE_VLDRHS32_post:
2910 case ARM::MVE_VLDRHU32_post:
2911 case ARM::MVE_VLDRBU8_post:
2912 case ARM::MVE_VLDRHU16_post:
2913 case ARM::MVE_VLDRWU32_post:
2914 case ARM::MVE_VSTRB16_post:
2915 case ARM::MVE_VSTRB32_post:
2916 case ARM::MVE_VSTRH32_post:
2917 case ARM::MVE_VSTRBU8_post:
2918 case ARM::MVE_VSTRHU16_post:
2919 case ARM::MVE_VSTRWU32_post:
2920 case ARM::MVE_VLDRBS16_pre:
2921 case ARM::MVE_VLDRBS32_pre:
2922 case ARM::MVE_VLDRBU16_pre:
2923 case ARM::MVE_VLDRBU32_pre:
2924 case ARM::MVE_VLDRHS32_pre:
2925 case ARM::MVE_VLDRHU32_pre:
2926 case ARM::MVE_VLDRBU8_pre:
2927 case ARM::MVE_VLDRHU16_pre:
2928 case ARM::MVE_VLDRWU32_pre:
2929 case ARM::MVE_VSTRB16_pre:
2930 case ARM::MVE_VSTRB32_pre:
2931 case ARM::MVE_VSTRH32_pre:
2932 case ARM::MVE_VSTRBU8_pre:
2933 case ARM::MVE_VSTRHU16_pre:
2934 case ARM::MVE_VSTRWU32_pre:
2941 switch (
MI.getOpcode()) {
2942 case ARM::MVE_VLDRBS16_post:
2943 case ARM::MVE_VLDRBS32_post:
2944 case ARM::MVE_VLDRBU16_post:
2945 case ARM::MVE_VLDRBU32_post:
2946 case ARM::MVE_VLDRHS32_post:
2947 case ARM::MVE_VLDRHU32_post:
2948 case ARM::MVE_VLDRBU8_post:
2949 case ARM::MVE_VLDRHU16_post:
2950 case ARM::MVE_VLDRWU32_post:
2951 case ARM::MVE_VSTRB16_post:
2952 case ARM::MVE_VSTRB32_post:
2953 case ARM::MVE_VSTRH32_post:
2954 case ARM::MVE_VSTRBU8_post:
2955 case ARM::MVE_VSTRHU16_post:
2956 case ARM::MVE_VSTRWU32_post:
2963 switch (
MI.getOpcode()) {
2964 case ARM::MVE_VLDRBS16_pre:
2965 case ARM::MVE_VLDRBS32_pre:
2966 case ARM::MVE_VLDRBU16_pre:
2967 case ARM::MVE_VLDRBU32_pre:
2968 case ARM::MVE_VLDRHS32_pre:
2969 case ARM::MVE_VLDRHU32_pre:
2970 case ARM::MVE_VLDRBU8_pre:
2971 case ARM::MVE_VLDRHU16_pre:
2972 case ARM::MVE_VLDRWU32_pre:
2973 case ARM::MVE_VSTRB16_pre:
2974 case ARM::MVE_VSTRB32_pre:
2975 case ARM::MVE_VSTRH32_pre:
2976 case ARM::MVE_VSTRBU8_pre:
2977 case ARM::MVE_VSTRHU16_pre:
2978 case ARM::MVE_VSTRWU32_pre:
2991 int &CodesizeEstimate) {
3000 CodesizeEstimate += 1;
3001 return Imm < 0 && -Imm < ((1 << 8) * 1);
3014 MI->getOperand(BaseOp).setReg(NewBaseReg);
3020 MRI.constrainRegClass(NewBaseReg, TRC);
3022 int OldOffset =
MI->getOperand(BaseOp + 1).getImm();
3024 MI->getOperand(BaseOp + 1).setImm(OldOffset -
Offset);
3026 unsigned ConvOpcode;
3027 switch (
MI->getOpcode()) {
3028 case ARM::t2LDRHi12:
3029 ConvOpcode = ARM::t2LDRHi8;
3031 case ARM::t2LDRSHi12:
3032 ConvOpcode = ARM::t2LDRSHi8;
3034 case ARM::t2LDRBi12:
3035 ConvOpcode = ARM::t2LDRBi8;
3037 case ARM::t2LDRSBi12:
3038 ConvOpcode = ARM::t2LDRSBi8;
3040 case ARM::t2STRHi12:
3041 ConvOpcode = ARM::t2STRHi8;
3043 case ARM::t2STRBi12:
3044 ConvOpcode = ARM::t2STRBi8;
3050 "Illegal Address Immediate after convert!");
3054 .
add(
MI->getOperand(0))
3055 .
add(
MI->getOperand(1))
3057 .
add(
MI->getOperand(3))
3058 .
add(
MI->getOperand(4))
3060 MI->eraseFromParent();
3077 MRI.constrainRegClass(NewReg, TRC);
3079 TRC =
TII->getRegClass(
MCID, 2);
3080 MRI.constrainRegClass(
MI->getOperand(1).getReg(), TRC);
3090 .
add(
MI->getOperand(0))
3091 .
add(
MI->getOperand(1))
3093 .
add(
MI->getOperand(3))
3094 .
add(
MI->getOperand(4))
3095 .
add(
MI->getOperand(5))
3098 if (
MI->mayLoad()) {
3100 .
add(
MI->getOperand(0))
3102 .
add(
MI->getOperand(1))
3104 .
add(
MI->getOperand(3))
3105 .
add(
MI->getOperand(4))
3110 .
add(
MI->getOperand(0))
3111 .
add(
MI->getOperand(1))
3113 .
add(
MI->getOperand(3))
3114 .
add(
MI->getOperand(4))
3138bool ARMPreAllocLoadStoreOpt::DistributeIncrements(
Register Base) {
3141 MachineInstr *BaseAccess =
nullptr;
3142 MachineInstr *PrePostInc =
nullptr;
3147 SmallPtrSet<MachineInstr *, 8> OtherAccesses;
3148 for (
auto &Use :
MRI->use_nodbg_instructions(
Base)) {
3158 if (!
Use.getOperand(BaseOp).isReg() ||
3159 Use.getOperand(BaseOp).getReg() !=
Base)
3163 else if (
Use.getOperand(BaseOp + 1).getImm() == 0)
3166 OtherAccesses.
insert(&Use);
3169 int IncrementOffset;
3175 if (
Increment->definesRegister(ARM::CPSR,
nullptr) ||
3179 LLVM_DEBUG(
dbgs() <<
"\nAttempting to distribute increments on VirtualReg "
3180 <<
Base.virtRegIndex() <<
"\n");
3184 for (MachineInstr &Use :
3185 MRI->use_nodbg_instructions(
Increment->getOperand(0).getReg())) {
3186 if (&Use == BaseAccess || (
Use.getOpcode() != TargetOpcode::PHI &&
3188 LLVM_DEBUG(
dbgs() <<
" BaseAccess doesn't dominate use of increment\n");
3198 LLVM_DEBUG(
dbgs() <<
" Illegal addressing mode immediate on postinc\n");
3202 else if (PrePostInc) {
3210 LLVM_DEBUG(
dbgs() <<
"\nAttempting to distribute increments on already "
3211 <<
"indexed VirtualReg " <<
Base.virtRegIndex() <<
"\n");
3214 BaseAccess = PrePostInc;
3228 SmallPtrSet<MachineInstr *, 4> SuccessorAccesses;
3229 int CodesizeEstimate = -1;
3230 for (
auto *Use : OtherAccesses) {
3232 SuccessorAccesses.
insert(Use);
3235 Use->getOperand(BaseOp + 1).getImm() -
3237 TII, CodesizeEstimate)) {
3238 LLVM_DEBUG(
dbgs() <<
" Illegal addressing mode immediate on use\n");
3241 }
else if (!DT->
dominates(Use, BaseAccess)) {
3243 dbgs() <<
" Unknown dominance relation between Base and Use\n");
3247 if (STI->
hasMinSize() && CodesizeEstimate > 0) {
3248 LLVM_DEBUG(
dbgs() <<
" Expected to grow instructions under minsize\n");
3256 NewBaseReg =
Increment->getOperand(0).getReg();
3257 MachineInstr *BaseAccessPost =
3261 (void)BaseAccessPost;
3265 for (
auto *Use : SuccessorAccesses) {
3273 for (MachineOperand &
Op :
MRI->use_nodbg_operands(NewBaseReg))
3274 Op.setIsKill(
false);
3278bool ARMPreAllocLoadStoreOpt::DistributeIncrements() {
3280 SmallSetVector<Register, 4> Visited;
3281 for (
auto &
MBB : *MF) {
3282 for (
auto &
MI :
MBB) {
3284 if (BaseOp == -1 || !
MI.getOperand(BaseOp).isReg())
3288 if (!
Base.isVirtual())
3295 for (
auto Base : Visited)
3304 return new ARMPreAllocLoadStoreOpt();
3305 return new ARMLoadStoreOpt();
unsigned const MachineRegisterInfo * MRI
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
static bool isLoadSingle(unsigned Opc)
static int getMemoryOpOffset(const MachineInstr &MI)
static unsigned getPostIndexedLoadStoreOpcode(unsigned Opc, ARM_AM::AddrOpc Mode)
static bool IsSafeAndProfitableToMove(bool isLd, unsigned Base, MachineBasicBlock::iterator I, MachineBasicBlock::iterator E, SmallPtrSetImpl< MachineInstr * > &MemOps, SmallSet< unsigned, 4 > &MemRegs, const TargetRegisterInfo *TRI, AliasAnalysis *AA)
static bool ContainsReg(ArrayRef< std::pair< unsigned, bool > > Regs, unsigned Reg)
static bool isPreIndex(MachineInstr &MI)
static void forEachDbgRegOperand(MachineInstr *MI, std::function< void(MachineOperand &)> Fn)
static bool isPostIndex(MachineInstr &MI)
static int getLoadStoreMultipleOpcode(unsigned Opcode, ARM_AM::AMSubMode Mode)
static unsigned getLSMultipleTransferSize(const MachineInstr *MI)
static ARM_AM::AMSubMode getLoadStoreMultipleSubMode(unsigned Opcode)
static bool isT1i32Load(unsigned Opc)
static void AdjustBaseAndOffset(MachineInstr *MI, Register NewBaseReg, int Offset, const TargetInstrInfo *TII, const TargetRegisterInfo *TRI)
static unsigned getPreIndexedLoadStoreOpcode(unsigned Opc, ARM_AM::AddrOpc Mode)
static MachineInstr * createPostIncLoadStore(MachineInstr *MI, int Offset, Register NewReg, const TargetInstrInfo *TII, const TargetRegisterInfo *TRI)
static bool isi32Store(unsigned Opc)
static MachineBasicBlock::iterator findIncDecAfter(MachineBasicBlock::iterator MBBI, Register Reg, ARMCC::CondCodes Pred, Register PredReg, int &Offset, const TargetRegisterInfo *TRI)
Searches for a increment or decrement of Reg after MBBI.
static MachineBasicBlock::iterator findIncDecBefore(MachineBasicBlock::iterator MBBI, Register Reg, ARMCC::CondCodes Pred, Register PredReg, int &Offset)
Searches for an increment or decrement of Reg before MBBI.
static const MachineOperand & getLoadStoreBaseOp(const MachineInstr &MI)
static void updateRegisterMapForDbgValueListAfterMove(SmallDenseMap< Register, SmallVector< MachineInstr * >, 8 > &RegisterMap, MachineInstr *DbgValueListInstr, MachineInstr *InstrToReplace)
arm prera ldst static false cl::opt< unsigned > InstReorderLimit("arm-prera-ldst-opt-reorder-limit", cl::init(8), cl::Hidden)
static void InsertLDR_STR(MachineBasicBlock &MBB, MachineBasicBlock::iterator &MBBI, int Offset, bool isDef, unsigned NewOpc, unsigned Reg, bool RegDeadKill, bool RegUndef, unsigned BaseReg, bool BaseKill, bool BaseUndef, ARMCC::CondCodes Pred, unsigned PredReg, const TargetInstrInfo *TII, MachineInstr *MI)
static int isIncrementOrDecrement(const MachineInstr &MI, Register Reg, ARMCC::CondCodes Pred, Register PredReg)
Check if the given instruction increments or decrements a register and return the amount it is increm...
static bool isT2i32Store(unsigned Opc)
static bool isLegalOrConvertableAddressImm(unsigned Opcode, int Imm, const TargetInstrInfo *TII, int &CodesizeEstimate)
static bool mayCombineMisaligned(const TargetSubtargetInfo &STI, const MachineInstr &MI)
Return true for loads/stores that can be combined to a double/multi operation without increasing the ...
static int getBaseOperandIndex(MachineInstr &MI)
static bool isT2i32Load(unsigned Opc)
static bool isi32Load(unsigned Opc)
static unsigned getImmScale(unsigned Opc)
static bool isT1i32Store(unsigned Opc)
#define ARM_PREALLOC_LOAD_STORE_OPT_NAME
#define ARM_LOAD_STORE_OPT_NAME
static unsigned getUpdatingLSMultipleOpcode(unsigned Opc, ARM_AM::AMSubMode Mode)
static bool isMemoryOp(const MachineInstr &MI)
Returns true if instruction is a memory operation that this pass is capable of operating on.
static const MachineOperand & getLoadStoreRegOp(const MachineInstr &MI)
static bool isValidLSDoubleOffset(int Offset)
static DebugVariable createDebugVariableFromMachineInstr(MachineInstr *MI)
static cl::opt< bool > AssumeMisalignedLoadStores("arm-assume-misaligned-load-store", cl::Hidden, cl::init(false), cl::desc("Be more conservative in ARM load/store opt"))
This switch disables formation of double/multi instructions that could potentially lead to (new) alig...
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
MachineBasicBlock MachineBasicBlock::iterator MBBI
This file defines the BumpPtrAllocator interface.
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
This file defines the DenseMap class.
This file defines the DenseSet and SmallDenseSet classes.
const HexagonInstrInfo * TII
const size_t AbstractManglingParser< Derived, Alloc >::NumOps
const AbstractManglingParser< Derived, Alloc >::OperatorInfo AbstractManglingParser< Derived, Alloc >::Ops[]
Register const TargetRegisterInfo * TRI
Promote Memory to Register
static MCRegister getReg(const MCDisassembler *D, unsigned RC, unsigned RegNo)
#define INITIALIZE_PASS_DEPENDENCY(depName)
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
#define INITIALIZE_PASS(passName, arg, name, cfg, analysis)
static cl::opt< RegAllocEvictionAdvisorAnalysisLegacy::AdvisorMode > Mode("regalloc-enable-advisor", cl::Hidden, cl::init(RegAllocEvictionAdvisorAnalysisLegacy::AdvisorMode::Default), cl::desc("Enable regalloc advisor mode"), cl::values(clEnumValN(RegAllocEvictionAdvisorAnalysisLegacy::AdvisorMode::Default, "default", "Default"), clEnumValN(RegAllocEvictionAdvisorAnalysisLegacy::AdvisorMode::Release, "release", "precompiled"), clEnumValN(RegAllocEvictionAdvisorAnalysisLegacy::AdvisorMode::Development, "development", "for training")))
This file implements a set that has insertion order iteration characteristics.
This file defines the SmallPtrSet class.
This file defines the SmallSet class.
This file defines the SmallVector class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
This file describes how to lower LLVM code to machine code.
A wrapper pass to provide the legacy pass manager access to a suitably prepared AAResults object.
static void updateLRRestored(MachineFunction &MF)
Update the IsRestored flag on LR if it is spilled, based on the return instructions.
ARMFunctionInfo - This class is derived from MachineFunctionInfo and contains private ARM-specific in...
bool isThumb2Function() const
bool isThumbFunction() const
bool shouldSignReturnAddress() const
const ARMBaseInstrInfo * getInstrInfo() const override
const ARMTargetLowering * getTargetLowering() const override
const ARMBaseRegisterInfo * getRegisterInfo() const override
Align getDualLoadStoreAlignment() const
Represent the analysis usage information of a pass.
AnalysisUsage & addRequired()
AnalysisUsage & addPreserved()
Add the specified Pass class to the set of analyses preserved by this pass.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
A parsed version of the target data layout string in and methods for querying it.
Identifies a unique instance of a variable.
iterator find(const_arg_type_t< KeyT > Val)
bool erase(const KeyT &Val)
FunctionPass class - This class is used to implement most global optimizations.
A set of register units used to track register liveness.
bool available(MCRegister Reg) const
Returns true if no part of physical register Reg is live.
void init(const TargetRegisterInfo &TRI)
Initialize and clear the set.
void addReg(MCRegister Reg)
Adds register units covered by physical register Reg.
LLVM_ABI void stepBackward(const MachineInstr &MI)
Updates liveness when stepping backwards over the instruction MI.
LLVM_ABI void addLiveOuts(const MachineBasicBlock &MBB)
Adds registers living out of block MBB.
Describe properties that are true of each instruction in the target description file.
MachineInstrBundleIterator< const MachineInstr > const_iterator
LLVM_ABI instr_iterator insert(instr_iterator I, MachineInstr *M)
Insert MI into the instruction list before I, possibly inside a bundle.
LLVM_ABI LivenessQueryResult computeRegisterLiveness(const TargetRegisterInfo *TRI, MCRegister Reg, const_iterator Before, unsigned Neighborhood=10) const
Return whether (physical) register Reg has been defined and not killed as of just before Before.
LLVM_ABI iterator getFirstTerminator()
Returns an iterator to the first terminator instruction of this basic block.
LLVM_ABI iterator getLastNonDebugInstr(bool SkipPseudoOp=true)
Returns an iterator to the last non-debug instruction in the basic block, or end().
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
LLVM_ABI instr_iterator erase(instr_iterator I)
Remove an instruction from the instruction list and delete it.
void splice(iterator Where, MachineBasicBlock *Other, iterator From)
Take an instruction from MBB 'Other' at the position From, and insert it into this MBB right before '...
MachineInstrBundleIterator< MachineInstr > iterator
@ LQR_Dead
Register is known to be fully dead.
Analysis pass which computes a MachineDominatorTree.
DominatorTree Class - Concrete subclass of DominatorTreeBase that is used to compute a normal dominat...
bool dominates(const MachineInstr *A, const MachineInstr *B) const
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
Properties which a MachineFunction may have at a given point in time.
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
Function & getFunction()
Return the LLVM function that this machine code represents.
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
const MachineInstrBuilder & cloneMergedMemRefs(ArrayRef< const MachineInstr * > OtherMIs) const
const MachineInstrBuilder & setMemRefs(ArrayRef< MachineMemOperand * > MMOs) const
const MachineInstrBuilder & addReg(Register RegNo, RegState Flags={}, unsigned SubReg=0) const
Add a new virtual register operand.
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
const MachineInstrBuilder & add(const MachineOperand &MO) const
const MachineInstrBuilder & cloneMemRefs(const MachineInstr &OtherMI) const
const MachineInstrBuilder & copyImplicitOps(const MachineInstr &OtherMI) const
Copy all the implicit operands from OtherMI onto this one.
MachineInstr * getInstr() const
If conversion operators fail, use this method to get the MachineInstr explicitly.
Representation of each machine instruction.
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
const MachineBasicBlock * getParent() const
unsigned getNumOperands() const
Retuns the total number of operands.
LLVM_ABI void copyImplicitOps(MachineFunction &MF, const MachineInstr &MI)
Copy implicit register operands from specified instruction to this instruction.
bool killsRegister(Register Reg, const TargetRegisterInfo *TRI) const
Return true if the MachineInstr kills the specified register.
LLVM_ABI void setDesc(const MCInstrDesc &TID)
Replace the instruction descriptor (thus opcode) of the current instruction with a new one.
bool hasOneMemOperand() const
Return true if this instruction has exactly one MachineMemOperand.
mmo_iterator memoperands_begin() const
Access to memory operands of the instruction.
const DebugLoc & getDebugLoc() const
Returns the debug location id of this MachineInstr.
LLVM_ABI void eraseFromParent()
Unlink 'this' from the containing basic block and delete it.
LLVM_ABI void dump() const
const MachineOperand & getOperand(unsigned i) const
A description of a memory reference used in the backend.
bool isAtomic() const
Returns true if this operation has an atomic ordering requirement of unordered or higher,...
LLVM_ABI Align getAlign() const
Return the minimum known alignment in bytes of the actual memory reference.
MachineOperand class - Representation of each machine instruction operand.
void setImm(int64_t immVal)
bool readsReg() const
readsReg - Returns true if this operand reads the previous value of its register.
bool isReg() const
isReg - Tests if this is a MO_Register operand.
LLVM_ABI void setReg(Register Reg)
Change the register this operand corresponds to.
void setIsKill(bool Val=true)
void setIsUndef(bool Val=true)
Register getReg() const
getReg - Returns the register number.
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
bool isReserved(MCRegister PhysReg) const
isReserved - Returns true when PhysReg is a reserved register.
LLVM_ABI void runOnMachineFunction(const MachineFunction &MF, bool Rev=false)
runOnFunction - Prepare to answer questions about MF.
ArrayRef< MCPhysReg > getOrder(const TargetRegisterClass *RC) const
getOrder - Returns the preferred allocation order for RC.
Wrapper class representing virtual and physical registers.
bool insert(const value_type &X)
Insert a new element into the SetVector.
A templated base class for SmallPtrSet which provides the typesafe interface that is common across al...
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
SmallSet - This maintains a set of unique values, optimizing for the case when the set is small (less...
size_type count(const T &V) const
count - Return 1 if the element is in the set, 0 otherwise.
std::pair< const_iterator, bool > insert(const T &V)
insert - Insert an element into the set if it isn't already there.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
A BumpPtrAllocator that allows only elements of a specific type to be allocated.
StringRef - Represent a constant reference to a string, i.e.
Align getTransientStackAlign() const
getTransientStackAlignment - This method returns the number of bytes to which the stack pointer must ...
TargetInstrInfo - Interface to description of machine instruction set.
virtual bool isLegalAddImmediate(int64_t) const
Return true if the specified immediate is legal add immediate, that is the target has add instruction...
This class defines information used to lower LLVM code to legal SelectionDAG operators that the targe...
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
TargetSubtargetInfo - Generic base class for all target subtargets.
virtual const TargetFrameLowering * getFrameLowering() const
LLVM Value Representation.
std::pair< iterator, bool > insert(const ValueT &V)
size_type count(const_arg_type_t< ValueT > V) const
Return 1 if the specified key is in the set, 0 otherwise.
This provides a very simple, boring adaptor for a begin and end iterator into a range type.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
Abstract Attribute helper functions.
constexpr char Align[]
Key for Kernel::Arg::Metadata::mAlign.
unsigned char getAM3Offset(unsigned AM3Opc)
unsigned getAM2Opc(AddrOpc Opc, unsigned Imm12, ShiftOpc SO, unsigned IdxMode=0)
AddrOpc getAM5Op(unsigned AM5Opc)
unsigned getAM3Opc(AddrOpc Opc, unsigned char Offset, unsigned IdxMode=0)
getAM3Opc - This function encodes the addrmode3 opc field.
unsigned char getAM5Offset(unsigned AM5Opc)
AddrOpc getAM3Op(unsigned AM3Opc)
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
@ C
The default llvm calling convention, compatible with C.
@ CE
Windows NT (Windows on ARM)
This namespace contains all of the command line option processing machinery.
initializer< Ty > init(const Ty &Val)
NodeAddr< InstrNode * > Instr
NodeAddr< UseNode * > Use
BaseReg
Stack frame base register. Bit 0 of FREInfo.Info.
This is an optimization pass for GlobalISel generic memory operations.
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
@ Define
Register definition.
constexpr RegState getKillRegState(bool B)
static bool isARMLowRegister(MCRegister Reg)
isARMLowRegister - Returns true if the register is a low register (r0-r7).
APFloat abs(APFloat X)
Returns the absolute value of the argument.
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
bool isLegalAddressImm(unsigned Opcode, int Imm, const TargetInstrInfo *TII)
constexpr RegState getDeadRegState(bool B)
static std::array< MachineOperand, 2 > predOps(ARMCC::CondCodes Pred, unsigned PredReg=0)
Get the operands corresponding to the given Pred value.
FunctionPass * createARMLoadStoreOptimizationPass(bool PreAlloc=false)
Returns an instance of the load / store optimization pass.
unsigned M1(unsigned Val)
auto reverse(ContainerTy &&C)
void sort(IteratorTy Start, IteratorTy End)
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
FunctionAddr VTableAddr Count
constexpr RegState getDefRegState(bool B)
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
@ First
Helpers to iterate all locations in the MemoryEffectsBase class.
void replace(R &&Range, const T &OldValue, const T &NewValue)
Provide wrappers to std::replace which take ranges instead of having to pass begin/end explicitly.
ARMCC::CondCodes getInstrPredicate(const MachineInstr &MI, Register &PredReg)
getInstrPredicate - If instruction is predicated, returns its predicate condition,...
DWARFExpression::Operation Op
unsigned M0(unsigned Val)
ArrayRef(const T &OneElt) -> ArrayRef< T >
static MachineOperand t1CondCodeOp(bool isDead=false)
Get the operand corresponding to the conditional code result for Thumb1.
void erase_if(Container &C, UnaryPredicate P)
Provide a container algorithm similar to C++ Library Fundamentals v2's erase_if which is equivalent t...
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
static MachineOperand condCodeOp(unsigned CCReg=0)
Get the operand corresponding to the conditional code result.
@ Increment
Incrementally increasing token ID.
int getAddSubImmediate(MachineInstr &MI)
AAResults AliasAnalysis
Temporary typedef for legacy code that uses a generic AliasAnalysis pointer or reference.
constexpr RegState getUndefRegState(bool B)
This struct is a compact representation of a valid (non-zero power of two) alignment.