46 #define DEBUG_TYPE "thumb2-reduce-size"
47 #define THUMB2_SIZE_REDUCE_NAME "Thumb2 instruction size reduce pass"
49 STATISTIC(NumNarrows,
"Number of 32-bit instrs reduced to 16-bit ones");
50 STATISTIC(Num2Addrs,
"Number of 32-bit instrs reduced to 2addr 16-bit ones");
51 STATISTIC(NumLdSts,
"Number of 32-bit load / store reduced to 16-bit ones");
70 unsigned LowRegs1 : 1;
71 unsigned LowRegs2 : 1;
76 unsigned PartFlag : 1;
78 unsigned AvoidMovs: 1;
81 static const ReduceEntry ReduceTable[] = {
83 { ARM::t2ADCrr, 0, ARM::tADC, 0, 0, 0, 1, 0,0, 0,0,0 },
84 { ARM::t2ADDri, ARM::tADDi3, ARM::tADDi8, 3, 8, 1, 1, 0,0, 0,1,0 },
85 { ARM::t2ADDrr, ARM::tADDrr, ARM::tADDhirr, 0, 0, 1, 0, 0,1, 0,0,0 },
86 { ARM::t2ADDSri,ARM::tADDi3, ARM::tADDi8, 3, 8, 1, 1, 2,2, 0,1,0 },
87 { ARM::t2ADDSrr,ARM::tADDrr, 0, 0, 0, 1, 0, 2,0, 0,1,0 },
88 { ARM::t2ANDrr, 0, ARM::tAND, 0, 0, 0, 1, 0,0, 1,0,0 },
89 { ARM::t2ASRri, ARM::tASRri, 0, 5, 0, 1, 0, 0,0, 1,0,1 },
90 { ARM::t2ASRrr, 0, ARM::tASRrr, 0, 0, 0, 1, 0,0, 1,0,1 },
91 { ARM::t2BICrr, 0, ARM::tBIC, 0, 0, 0, 1, 0,0, 1,0,0 },
94 { ARM::t2CMNzrr, ARM::tCMNz, 0, 0, 0, 1, 0, 2,0, 0,0,0 },
95 { ARM::t2CMPri, ARM::tCMPi8, 0, 8, 0, 1, 0, 2,0, 0,0,0 },
96 { ARM::t2CMPrr, ARM::tCMPhir, 0, 0, 0, 0, 0, 2,0, 0,1,0 },
97 { ARM::t2EORrr, 0, ARM::tEOR, 0, 0, 0, 1, 0,0, 1,0,0 },
100 { ARM::t2LSLri, ARM::tLSLri, 0, 5, 0, 1, 0, 0,0, 1,0,1 },
101 { ARM::t2LSLrr, 0, ARM::tLSLrr, 0, 0, 0, 1, 0,0, 1,0,1 },
102 { ARM::t2LSRri, ARM::tLSRri, 0, 5, 0, 1, 0, 0,0, 1,0,1 },
103 { ARM::t2LSRrr, 0, ARM::tLSRrr, 0, 0, 0, 1, 0,0, 1,0,1 },
104 { ARM::t2MOVi, ARM::tMOVi8, 0, 8, 0, 1, 0, 0,0, 1,0,0 },
105 { ARM::t2MOVi16,ARM::tMOVi8, 0, 8, 0, 1, 0, 0,0, 1,1,0 },
107 { ARM::t2MOVr,ARM::tMOVr, 0, 0, 0, 0, 0, 1,0, 0,0,0 },
108 { ARM::t2MUL, 0, ARM::tMUL, 0, 0, 0, 1, 0,0, 1,0,0 },
109 { ARM::t2MVNr, ARM::tMVN, 0, 0, 0, 1, 0, 0,0, 0,0,0 },
110 { ARM::t2ORRrr, 0, ARM::tORR, 0, 0, 0, 1, 0,0, 1,0,0 },
111 { ARM::t2REV, ARM::tREV, 0, 0, 0, 1, 0, 1,0, 0,0,0 },
112 { ARM::t2REV16, ARM::tREV16, 0, 0, 0, 1, 0, 1,0, 0,0,0 },
113 { ARM::t2REVSH, ARM::tREVSH, 0, 0, 0, 1, 0, 1,0, 0,0,0 },
114 { ARM::t2RORrr, 0, ARM::tROR, 0, 0, 0, 1, 0,0, 1,0,0 },
115 { ARM::t2RSBri, ARM::tRSB, 0, 0, 0, 1, 0, 0,0, 0,1,0 },
116 { ARM::t2RSBSri,ARM::tRSB, 0, 0, 0, 1, 0, 2,0, 0,1,0 },
117 { ARM::t2SBCrr, 0, ARM::tSBC, 0, 0, 0, 1, 0,0, 0,0,0 },
118 { ARM::t2SUBri, ARM::tSUBi3, ARM::tSUBi8, 3, 8, 1, 1, 0,0, 0,0,0 },
119 { ARM::t2SUBrr, ARM::tSUBrr, 0, 0, 0, 1, 0, 0,0, 0,0,0 },
120 { ARM::t2SUBSri,ARM::tSUBi3, ARM::tSUBi8, 3, 8, 1, 1, 2,2, 0,0,0 },
121 { ARM::t2SUBSrr,ARM::tSUBrr, 0, 0, 0, 1, 0, 2,0, 0,0,0 },
122 { ARM::t2SXTB, ARM::tSXTB, 0, 0, 0, 1, 0, 1,0, 0,1,0 },
123 { ARM::t2SXTH, ARM::tSXTH, 0, 0, 0, 1, 0, 1,0, 0,1,0 },
124 { ARM::t2TEQrr, ARM::tEOR, 0, 0, 0, 1, 0, 2,0, 0,1,0 },
125 { ARM::t2TSTrr, ARM::tTST, 0, 0, 0, 1, 0, 2,0, 0,0,0 },
126 { ARM::t2UXTB, ARM::tUXTB, 0, 0, 0, 1, 0, 1,0, 0,1,0 },
127 { ARM::t2UXTH, ARM::tUXTH, 0, 0, 0, 1, 0, 1,0, 0,1,0 },
131 { ARM::t2LDRi12,ARM::tLDRi, ARM::tLDRspi, 5, 8, 1, 0, 0,0, 0,1,0 },
132 { ARM::t2LDRs, ARM::tLDRr, 0, 0, 0, 1, 0, 0,0, 0,1,0 },
133 { ARM::t2LDRBi12,ARM::tLDRBi, 0, 5, 0, 1, 0, 0,0, 0,1,0 },
134 { ARM::t2LDRBs, ARM::tLDRBr, 0, 0, 0, 1, 0, 0,0, 0,1,0 },
135 { ARM::t2LDRHi12,ARM::tLDRHi, 0, 5, 0, 1, 0, 0,0, 0,1,0 },
136 { ARM::t2LDRHs, ARM::tLDRHr, 0, 0, 0, 1, 0, 0,0, 0,1,0 },
137 { ARM::t2LDRSBs,ARM::tLDRSB, 0, 0, 0, 1, 0, 0,0, 0,1,0 },
138 { ARM::t2LDRSHs,ARM::tLDRSH, 0, 0, 0, 1, 0, 0,0, 0,1,0 },
139 { ARM::t2LDR_POST,ARM::tLDMIA_UPD,0, 0, 0, 1, 0, 0,0, 0,1,0 },
140 { ARM::t2STRi12,ARM::tSTRi, ARM::tSTRspi, 5, 8, 1, 0, 0,0, 0,1,0 },
141 { ARM::t2STRs, ARM::tSTRr, 0, 0, 0, 1, 0, 0,0, 0,1,0 },
142 { ARM::t2STRBi12,ARM::tSTRBi, 0, 5, 0, 1, 0, 0,0, 0,1,0 },
143 { ARM::t2STRBs, ARM::tSTRBr, 0, 0, 0, 1, 0, 0,0, 0,1,0 },
144 { ARM::t2STRHi12,ARM::tSTRHi, 0, 5, 0, 1, 0, 0,0, 0,1,0 },
145 { ARM::t2STRHs, ARM::tSTRHr, 0, 0, 0, 1, 0, 0,0, 0,1,0 },
146 { ARM::t2STR_POST,ARM::tSTMIA_UPD,0, 0, 0, 1, 0, 0,0, 0,1,0 },
148 { ARM::t2LDMIA, ARM::tLDMIA, 0, 0, 0, 1, 1, 1,1, 0,1,0 },
149 { ARM::t2LDMIA_RET,0, ARM::tPOP_RET, 0, 0, 1, 1, 1,1, 0,1,0 },
150 { ARM::t2LDMIA_UPD,ARM::tLDMIA_UPD,ARM::tPOP,0, 0, 1, 1, 1,1, 0,1,0 },
154 { ARM::t2STMIA, ARM::tSTMIA_UPD, 0, 0, 0, 1, 1, 1,1, 0,1,0 },
155 { ARM::t2STMIA_UPD,ARM::tSTMIA_UPD, 0, 0, 0, 1, 1, 1,1, 0,1,0 },
156 { ARM::t2STMDB_UPD, 0, ARM::tPUSH, 0, 0, 1, 1, 1,1, 0,1,0 }
187 bool LiveCPSR,
bool &HasCC,
bool &CCDead);
190 const ReduceEntry &Entry);
193 const ReduceEntry &Entry,
bool LiveCPSR,
bool IsSelfLoop);
198 const ReduceEntry &Entry,
bool LiveCPSR,
204 const ReduceEntry &Entry,
bool LiveCPSR,
209 bool LiveCPSR,
bool IsSelfLoop);
221 bool HighLatencyCPSR;
225 bool HighLatencyCPSR =
false;
227 bool Visited =
false;
246 OptimizeSize = MinimizeSize =
false;
248 unsigned FromOpc = ReduceTable[
i].WideOpc;
249 if (!ReduceOpcodeMap.insert(std::make_pair(FromOpc,
i)).second)
256 if (*Regs == ARM::CPSR)
263 switch(
Def->getOpcode()) {
289 Thumb2SizeReduce::canAddPseudoFlagDep(
MachineInstr *
Use,
bool FirstInSelfLoop) {
291 if (MinimizeSize || !STI->avoidCPSRPartialUpdate())
297 return HighLatencyCPSR || FirstInSelfLoop;
301 if (!MO.isReg() || MO.isUndef() || MO.isUse())
304 if (
Reg == 0 ||
Reg == ARM::CPSR)
310 if (!MO.isReg() || MO.isUndef() || MO.isDef())
323 if (
Use->getOpcode() == ARM::t2MOVi ||
324 Use->getOpcode() == ARM::t2MOVi16)
332 Thumb2SizeReduce::VerifyPredAndCC(
MachineInstr *
MI,
const ReduceEntry &Entry,
334 bool LiveCPSR,
bool &HasCC,
bool &CCDead) {
335 if ((is2Addr && Entry.PredCC2 == 0) ||
336 (!is2Addr && Entry.PredCC1 == 0)) {
355 }
else if ((is2Addr && Entry.PredCC2 == 2) ||
356 (!is2Addr && Entry.PredCC1 == 2)) {
375 unsigned Opc =
MI->getOpcode();
376 bool isPCOk = (Opc == ARM::t2LDMIA_RET || Opc == ARM::t2LDMIA_UPD);
377 bool isLROk = (Opc == ARM::t2STMDB_UPD);
378 bool isSPOk = isPCOk || isLROk;
379 for (
unsigned i = 0,
e =
MI->getNumOperands();
i !=
e; ++
i) {
384 if (
Reg == 0 ||
Reg == ARM::CPSR)
386 if (isPCOk &&
Reg == ARM::PC)
388 if (isLROk &&
Reg == ARM::LR)
390 if (
Reg == ARM::SP) {
393 if (
i == 1 && (Opc == ARM::t2LDRi12 || Opc == ARM::t2STRi12))
405 const ReduceEntry &Entry) {
410 bool HasImmOffset =
false;
411 bool HasShift =
false;
412 bool HasOffReg =
true;
413 bool isLdStMul =
false;
414 unsigned Opc = Entry.NarrowOpc1;
416 uint8_t ImmLimit = Entry.Imm1Limit;
418 switch (Entry.WideOpc) {
423 if (
MI->getOperand(1).getReg() == ARM::SP) {
424 Opc = Entry.NarrowOpc2;
425 ImmLimit = Entry.Imm2Limit;
454 case ARM::t2LDR_POST:
455 case ARM::t2STR_POST: {
459 if (!
MI->hasOneMemOperand() ||
460 (*
MI->memoperands_begin())->getAlign() <
Align(4))
466 bool IsStore = Entry.WideOpc == ARM::t2STR_POST;
467 Register Rt =
MI->getOperand(IsStore ? 1 : 0).getReg();
468 Register Rn =
MI->getOperand(IsStore ? 0 : 1).getReg();
469 unsigned Offset =
MI->getOperand(3).getImm();
470 unsigned PredImm =
MI->getOperand(4).getImm();
471 Register PredReg =
MI->getOperand(5).getReg();
494 MI->eraseFromBundle();
499 Register BaseReg =
MI->getOperand(0).getReg();
506 if (MO.getReg() == BaseReg) {
523 if (!
MI->getOperand(0).isKill())
529 Register BaseReg =
MI->getOperand(0).getReg();
531 if (MO.getReg() == BaseReg)
536 case ARM::t2LDMIA_RET: {
537 Register BaseReg =
MI->getOperand(1).getReg();
538 if (BaseReg != ARM::SP)
540 Opc = Entry.NarrowOpc2;
545 case ARM::t2LDMIA_UPD:
546 case ARM::t2STMIA_UPD:
547 case ARM::t2STMDB_UPD: {
550 Register BaseReg =
MI->getOperand(1).getReg();
551 if (BaseReg == ARM::SP &&
552 (Entry.WideOpc == ARM::t2LDMIA_UPD ||
553 Entry.WideOpc == ARM::t2STMDB_UPD)) {
554 Opc = Entry.NarrowOpc2;
557 (Entry.WideOpc != ARM::t2LDMIA_UPD &&
558 Entry.WideOpc != ARM::t2STMIA_UPD)) {
567 unsigned OffsetReg = 0;
568 bool OffsetKill =
false;
569 bool OffsetInternal =
false;
571 OffsetReg =
MI->getOperand(2).getReg();
572 OffsetKill =
MI->getOperand(2).isKill();
573 OffsetInternal =
MI->getOperand(2).isInternalRead();
575 if (
MI->getOperand(3).getImm())
580 unsigned OffsetImm = 0;
582 OffsetImm =
MI->getOperand(2).getImm();
583 unsigned MaxOffset = ((1 << ImmLimit) - 1) * Scale;
585 if ((OffsetImm & (Scale - 1)) || OffsetImm > MaxOffset)
596 if (Entry.WideOpc == ARM::t2STMIA)
600 MIB.
add(
MI->getOperand(0));
601 MIB.
add(
MI->getOperand(1));
604 MIB.
addImm(OffsetImm / Scale);
606 assert((!HasShift || OffsetReg) &&
"Invalid so_reg load / store address!");
624 <<
" to 16-bit: " << *MIB);
633 const ReduceEntry &Entry,
634 bool LiveCPSR,
bool IsSelfLoop) {
635 unsigned Opc =
MI->getOpcode();
636 if (Opc == ARM::t2ADDri) {
639 if (
MI->getOperand(1).getReg() != ARM::SP) {
640 if (ReduceTo2Addr(
MBB,
MI, Entry, LiveCPSR, IsSelfLoop))
642 return ReduceToNarrow(
MBB,
MI, Entry, LiveCPSR, IsSelfLoop);
645 unsigned Imm =
MI->getOperand(2).getImm();
649 if (Imm & 3 || Imm > 1020)
662 TII->get(ARM::tADDrSPi))
663 .
add(
MI->getOperand(0))
664 .
add(
MI->getOperand(1))
672 <<
" to 16-bit: " << *MIB);
682 if (
MI->mayLoadOrStore())
683 return ReduceLoadStore(
MBB,
MI, Entry);
688 case ARM::t2ADDSrr: {
694 if (ReduceTo2Addr(
MBB,
MI, Entry, LiveCPSR, IsSelfLoop))
698 return ReduceToNarrow(
MBB,
MI, Entry, LiveCPSR, IsSelfLoop);
709 if (
MI->getOperand(2).getImm() == 0)
710 return ReduceToNarrow(
MBB,
MI, Entry, LiveCPSR, IsSelfLoop);
715 if (
MI->getOperand(1).isImm())
716 return ReduceToNarrow(
MBB,
MI, Entry, LiveCPSR, IsSelfLoop);
724 static const ReduceEntry NarrowEntry =
725 { ARM::t2CMPrr,ARM::tCMPr, 0, 0, 0, 1, 1,2, 0, 0,1,0 };
726 if (ReduceToNarrow(
MBB,
MI, NarrowEntry, LiveCPSR, IsSelfLoop))
728 return ReduceToNarrow(
MBB,
MI, Entry, LiveCPSR, IsSelfLoop);
737 if (
MI->getOperand(0).isKill())
738 return ReduceToNarrow(
MBB,
MI, Entry, LiveCPSR, IsSelfLoop);
746 const ReduceEntry &Entry,
747 bool LiveCPSR,
bool IsSelfLoop) {
751 if (!OptimizeSize && Entry.AvoidMovs && STI->avoidMOVsShifterOperand())
759 if (
MI->getOpcode() == ARM::t2MUL) {
775 }
else if (Reg0 != Reg1) {
777 unsigned CommOpIdx1 = 1;
779 if (!
TII->findCommutedOpIndices(*
MI, CommOpIdx1, CommOpIdx2) ||
780 MI->getOperand(CommOpIdx2).getReg() != Reg0)
783 TII->commuteInstruction(*
MI,
false, CommOpIdx1, CommOpIdx2);
789 if (Entry.Imm2Limit) {
790 unsigned Imm =
MI->getOperand(2).getImm();
791 unsigned Limit = (1 << Entry.Imm2Limit) - 1;
804 bool SkipPred =
false;
818 HasCC = (
MI->getOperand(NumOps-1).getReg() == ARM::CPSR);
819 if (HasCC &&
MI->getOperand(NumOps-1).isDead())
822 if (!VerifyPredAndCC(
MI, Entry,
true, Pred, LiveCPSR, HasCC, CCDead))
828 canAddPseudoFlagDep(
MI, IsSelfLoop))
834 MIB.
add(
MI->getOperand(0));
840 for (
unsigned i = 1,
e =
MI->getNumOperands();
i !=
e; ++
i) {
845 MIB.
add(
MI->getOperand(
i));
852 <<
" to 16-bit: " << *MIB);
861 const ReduceEntry &Entry,
862 bool LiveCPSR,
bool IsSelfLoop) {
866 if (!OptimizeSize && Entry.AvoidMovs && STI->avoidMOVsShifterOperand())
871 unsigned Limit = ~0U;
873 Limit = (1 << Entry.Imm1Limit) - 1;
882 if (!
Reg ||
Reg == ARM::CPSR)
886 }
else if (MO.
isImm() &&
888 if (((
unsigned)MO.
getImm()) > Limit)
897 bool SkipPred =
false;
910 HasCC = (
MI->getOperand(NumOps-1).getReg() == ARM::CPSR);
911 if (HasCC &&
MI->getOperand(NumOps-1).isDead())
914 if (!VerifyPredAndCC(
MI, Entry,
false, Pred, LiveCPSR, HasCC, CCDead))
920 canAddPseudoFlagDep(
MI, IsSelfLoop))
931 MIB.
add(
MI->getOperand(0));
938 MIB.
add(
MI->getOperand(0));
940 MIB.
add(
MI->getOperand(0));
947 for (
unsigned i = 1,
e =
MI->getNumOperands();
i !=
e; ++
i) {
950 if ((MCID.
getOpcode() == ARM::t2RSBSri ||
959 if (SkipPred && isPred)
975 <<
" to 16-bit: " << *MIB);
987 if (MO.
getReg() != ARM::CPSR)
995 return HasDef || LiveCPSR;
1002 if (MO.
getReg() != ARM::CPSR)
1004 assert(LiveCPSR &&
"CPSR liveness tracking is wrong!");
1015 bool LiveCPSR,
bool IsSelfLoop) {
1016 unsigned Opcode =
MI->getOpcode();
1018 if (OPI == ReduceOpcodeMap.end())
1020 const ReduceEntry &Entry = ReduceTable[OPI->second];
1024 return ReduceSpecial(
MBB,
MI, Entry, LiveCPSR, IsSelfLoop);
1027 if (Entry.NarrowOpc2 &&
1028 ReduceTo2Addr(
MBB,
MI, Entry, LiveCPSR, IsSelfLoop))
1032 if (Entry.NarrowOpc1 &&
1033 ReduceToNarrow(
MBB,
MI, Entry, LiveCPSR, IsSelfLoop))
1047 HighLatencyCPSR =
false;
1051 const MBBInfo &PInfo = BlockInfo[Pred->getNumber()];
1052 if (!PInfo.Visited) {
1056 if (PInfo.HighLatencyCPSR) {
1057 HighLatencyCPSR =
true;
1067 for (; MII !=
E; MII = NextMII) {
1068 NextMII = std::next(MII);
1071 if (
MI->isBundle()) {
1075 if (
MI->isDebugInstr())
1081 bool NextInSameBundle = NextMII !=
E && NextMII->isBundledWithPred();
1083 if (ReduceMI(
MBB,
MI, LiveCPSR, IsSelfLoop)) {
1089 if (NextInSameBundle && !NextMII->isBundledWithPred())
1090 NextMII->bundleWithPred();
1093 if (BundleMI && !NextInSameBundle &&
MI->isInsideBundle()) {
1108 bool DefCPSR =
false;
1113 HighLatencyCPSR =
false;
1115 }
else if (DefCPSR) {
1124 Info.HighLatencyCPSR = HighLatencyCPSR;
1125 Info.Visited =
true;
1130 if (PredicateFtor && !PredicateFtor(MF.
getFunction()))
1159 return new Thumb2SizeReduce(
std::move(Ftor));