48#define DEBUG_TYPE "hexbit"
60 cl::init(std::numeric_limits<unsigned>::max()));
63 cl::init(std::numeric_limits<unsigned>::max()));
80 RegisterSet() =
default;
81 explicit RegisterSet(
unsigned s,
bool t =
false) : Bits(s, t) {}
89 unsigned count()
const {
93 unsigned find_first()
const {
100 unsigned find_next(
unsigned Prev)
const {
101 int Next =
Bits.find_next(v2x(Prev));
108 unsigned Idx = v2x(R);
115 unsigned T = LRU.front();
123 unsigned Idx = v2x(R);
137 for (
unsigned R = Rs.find_first(); R; R = Rs.find_next(R))
142 for (
unsigned R = Rs.find_first(); R; R = Rs.find_next(R))
147 bool operator[](
unsigned R)
const {
148 unsigned Idx = v2x(R);
151 bool has(
unsigned R)
const {
152 unsigned Idx = v2x(R);
161 bool includes(
const RegisterSet &Rs)
const {
163 return !Rs.Bits.test(Bits);
165 bool intersects(
const RegisterSet &Rs)
const {
166 return Bits.anyCommon(Rs.Bits);
171 std::deque<unsigned> LRU;
173 void ensure(
unsigned Idx) {
175 Bits.resize(std::max(
Idx+1, 32U));
178 static inline unsigned v2x(
unsigned v) {
182 static inline unsigned x2v(
unsigned x) {
192 const PrintRegSet &
P);
203 for (
unsigned R =
P.RS.find_first(); R; R =
P.RS.find_next(R))
209 class Transformation;
218 return "Hexagon bit simplification";
229 static void getInstrDefs(
const MachineInstr &
MI, RegisterSet &Defs);
249 static bool getUsedBitsInStore(
unsigned Opc,
BitVector &Bits,
251 static bool getUsedBits(
unsigned Opc,
unsigned OpN,
BitVector &Bits,
264 unsigned NewSub = Hexagon::NoSubRegister);
267 using HBS = HexagonBitSimplify;
272 class Transformation {
276 Transformation(
bool TD) :
TopDown(TD) {}
277 virtual ~Transformation() =
default;
284char HexagonBitSimplify::ID = 0;
287 "Hexagon bit simplification",
false,
false)
294 bool Changed =
false;
297 Changed =
T.processBlock(
B, AVs);
301 getInstrDefs(
I, Defs);
302 RegisterSet NewAVs = AVs;
305 for (
auto *DTN : children<MachineDomTreeNode*>(MDT->getNode(&
B)))
306 Changed |= visitBlock(*(DTN->getBlock()),
T, NewAVs);
309 Changed |=
T.processBlock(
B, AVs);
319 for (
auto &
Op :
MI.operands()) {
320 if (!
Op.isReg() || !
Op.isDef())
331 for (
auto &
Op :
MI.operands()) {
332 if (!
Op.isReg() || !
Op.isUse())
352 if (RC1[B1+i] != RC2[B2+i])
387 auto Begin =
MRI.use_begin(OldR),
End =
MRI.use_end();
389 for (
auto I = Begin;
I !=
End;
I = NextI) {
390 NextI = std::next(
I);
401 if (hasTiedUse(OldR,
MRI, NewSR))
403 auto Begin =
MRI.use_begin(OldR),
End =
MRI.use_end();
405 for (
auto I = Begin;
I !=
End;
I = NextI) {
406 NextI = std::next(
I);
413bool HexagonBitSimplify::replaceSubWithSub(
Register OldR,
unsigned OldSR,
418 if (OldSR != NewSR && hasTiedUse(OldR,
MRI, NewSR))
420 auto Begin =
MRI.use_begin(OldR),
End =
MRI.use_end();
422 for (
auto I = Begin;
I !=
End;
I = NextI) {
423 NextI = std::next(
I);
424 if (
I->getSubReg() != OldSR)
440 Width =
MRI.getTargetRegisterInfo()->getRegSizeInBits(*RC);
446 switch (RC->
getID()) {
447 case Hexagon::DoubleRegsRegClassID:
448 case Hexagon::HvxWRRegClassID:
449 Width =
MRI.getTargetRegisterInfo()->getRegSizeInBits(*RC) / 2;
450 if (RR.
Sub == Hexagon::isub_hi || RR.
Sub == Hexagon::vsub_hi)
462bool HexagonBitSimplify::parseRegSequence(
const MachineInstr &
I,
465 assert(
I.getOpcode() == TargetOpcode::REG_SEQUENCE);
466 unsigned Sub1 =
I.getOperand(2).getImm(), Sub2 =
I.getOperand(4).getImm();
467 auto &DstRC = *
MRI.getRegClass(
I.getOperand(0).getReg());
469 *
MRI.getTargetRegisterInfo());
472 assert((Sub1 == SubLo && Sub2 == SubHi) || (Sub1 == SubHi && Sub2 == SubLo));
473 if (Sub1 == SubLo && Sub2 == SubHi) {
474 SL =
I.getOperand(1);
475 SH =
I.getOperand(3);
478 if (Sub1 == SubHi && Sub2 == SubLo) {
479 SH =
I.getOperand(1);
480 SL =
I.getOperand(3);
492bool HexagonBitSimplify::getUsedBitsInStore(
unsigned Opc,
BitVector &Bits,
494 using namespace Hexagon;
499 case S2_storerbnew_io:
500 case S2_pstorerbt_io:
501 case S2_pstorerbf_io:
502 case S4_pstorerbtnew_io:
503 case S4_pstorerbfnew_io:
504 case S2_pstorerbnewt_io:
505 case S2_pstorerbnewf_io:
506 case S4_pstorerbnewtnew_io:
507 case S4_pstorerbnewfnew_io:
509 case S2_storerbnew_pi:
510 case S2_pstorerbt_pi:
511 case S2_pstorerbf_pi:
512 case S2_pstorerbtnew_pi:
513 case S2_pstorerbfnew_pi:
514 case S2_pstorerbnewt_pi:
515 case S2_pstorerbnewf_pi:
516 case S2_pstorerbnewtnew_pi:
517 case S2_pstorerbnewfnew_pi:
519 case S4_storerbnew_ap:
521 case S2_storerbnew_pr:
523 case S4_storerbnew_ur:
525 case S2_storerbnew_pbr:
527 case S2_storerbnew_pci:
529 case S2_storerbnew_pcr:
531 case S4_storerbnew_rr:
532 case S4_pstorerbt_rr:
533 case S4_pstorerbf_rr:
534 case S4_pstorerbtnew_rr:
535 case S4_pstorerbfnew_rr:
536 case S4_pstorerbnewt_rr:
537 case S4_pstorerbnewf_rr:
538 case S4_pstorerbnewtnew_rr:
539 case S4_pstorerbnewfnew_rr:
541 case S2_storerbnewgp:
542 case S4_pstorerbt_abs:
543 case S4_pstorerbf_abs:
544 case S4_pstorerbtnew_abs:
545 case S4_pstorerbfnew_abs:
546 case S4_pstorerbnewt_abs:
547 case S4_pstorerbnewf_abs:
548 case S4_pstorerbnewtnew_abs:
549 case S4_pstorerbnewfnew_abs:
550 Bits.set(Begin, Begin+8);
555 case S2_storerhnew_io:
556 case S2_pstorerht_io:
557 case S2_pstorerhf_io:
558 case S4_pstorerhtnew_io:
559 case S4_pstorerhfnew_io:
560 case S2_pstorerhnewt_io:
561 case S2_pstorerhnewf_io:
562 case S4_pstorerhnewtnew_io:
563 case S4_pstorerhnewfnew_io:
565 case S2_storerhnew_pi:
566 case S2_pstorerht_pi:
567 case S2_pstorerhf_pi:
568 case S2_pstorerhtnew_pi:
569 case S2_pstorerhfnew_pi:
570 case S2_pstorerhnewt_pi:
571 case S2_pstorerhnewf_pi:
572 case S2_pstorerhnewtnew_pi:
573 case S2_pstorerhnewfnew_pi:
575 case S4_storerhnew_ap:
577 case S2_storerhnew_pr:
579 case S4_storerhnew_ur:
581 case S2_storerhnew_pbr:
583 case S2_storerhnew_pci:
585 case S2_storerhnew_pcr:
587 case S4_pstorerht_rr:
588 case S4_pstorerhf_rr:
589 case S4_pstorerhtnew_rr:
590 case S4_pstorerhfnew_rr:
591 case S4_storerhnew_rr:
592 case S4_pstorerhnewt_rr:
593 case S4_pstorerhnewf_rr:
594 case S4_pstorerhnewtnew_rr:
595 case S4_pstorerhnewfnew_rr:
597 case S2_storerhnewgp:
598 case S4_pstorerht_abs:
599 case S4_pstorerhf_abs:
600 case S4_pstorerhtnew_abs:
601 case S4_pstorerhfnew_abs:
602 case S4_pstorerhnewt_abs:
603 case S4_pstorerhnewf_abs:
604 case S4_pstorerhnewtnew_abs:
605 case S4_pstorerhnewfnew_abs:
606 Bits.set(Begin, Begin+16);
611 case S2_pstorerft_io:
612 case S2_pstorerff_io:
613 case S4_pstorerftnew_io:
614 case S4_pstorerffnew_io:
616 case S2_pstorerft_pi:
617 case S2_pstorerff_pi:
618 case S2_pstorerftnew_pi:
619 case S2_pstorerffnew_pi:
627 case S4_pstorerft_rr:
628 case S4_pstorerff_rr:
629 case S4_pstorerftnew_rr:
630 case S4_pstorerffnew_rr:
632 case S4_pstorerft_abs:
633 case S4_pstorerff_abs:
634 case S4_pstorerftnew_abs:
635 case S4_pstorerffnew_abs:
636 Bits.set(Begin+16, Begin+32);
651bool HexagonBitSimplify::getUsedBits(
unsigned Opc,
unsigned OpN,
653 using namespace Hexagon;
657 if (OpN ==
D.getNumOperands()-1)
658 return getUsedBitsInStore(Opc, Bits, Begin);
670 Bits.set(Begin, Begin+8);
683 Bits.set(Begin, Begin+16);
691 Bits.set(Begin+16, Begin+32);
701 Bits.set(Begin, Begin+8);
711 case A2_addh_h16_sat_ll:
713 case A2_addh_l16_sat_ll:
716 case A2_subh_h16_sat_ll:
718 case A2_subh_l16_sat_ll:
719 case M2_mpy_acc_ll_s0:
720 case M2_mpy_acc_ll_s1:
721 case M2_mpy_acc_sat_ll_s0:
722 case M2_mpy_acc_sat_ll_s1:
725 case M2_mpy_nac_ll_s0:
726 case M2_mpy_nac_ll_s1:
727 case M2_mpy_nac_sat_ll_s0:
728 case M2_mpy_nac_sat_ll_s1:
729 case M2_mpy_rnd_ll_s0:
730 case M2_mpy_rnd_ll_s1:
731 case M2_mpy_sat_ll_s0:
732 case M2_mpy_sat_ll_s1:
733 case M2_mpy_sat_rnd_ll_s0:
734 case M2_mpy_sat_rnd_ll_s1:
735 case M2_mpyd_acc_ll_s0:
736 case M2_mpyd_acc_ll_s1:
739 case M2_mpyd_nac_ll_s0:
740 case M2_mpyd_nac_ll_s1:
741 case M2_mpyd_rnd_ll_s0:
742 case M2_mpyd_rnd_ll_s1:
743 case M2_mpyu_acc_ll_s0:
744 case M2_mpyu_acc_ll_s1:
747 case M2_mpyu_nac_ll_s0:
748 case M2_mpyu_nac_ll_s1:
749 case M2_mpyud_acc_ll_s0:
750 case M2_mpyud_acc_ll_s1:
753 case M2_mpyud_nac_ll_s0:
754 case M2_mpyud_nac_ll_s1:
755 if (OpN == 1 || OpN == 2) {
756 Bits.set(Begin, Begin+16);
763 case A2_addh_h16_sat_lh:
766 case A2_subh_h16_sat_lh:
767 case M2_mpy_acc_lh_s0:
768 case M2_mpy_acc_lh_s1:
769 case M2_mpy_acc_sat_lh_s0:
770 case M2_mpy_acc_sat_lh_s1:
773 case M2_mpy_nac_lh_s0:
774 case M2_mpy_nac_lh_s1:
775 case M2_mpy_nac_sat_lh_s0:
776 case M2_mpy_nac_sat_lh_s1:
777 case M2_mpy_rnd_lh_s0:
778 case M2_mpy_rnd_lh_s1:
779 case M2_mpy_sat_lh_s0:
780 case M2_mpy_sat_lh_s1:
781 case M2_mpy_sat_rnd_lh_s0:
782 case M2_mpy_sat_rnd_lh_s1:
783 case M2_mpyd_acc_lh_s0:
784 case M2_mpyd_acc_lh_s1:
787 case M2_mpyd_nac_lh_s0:
788 case M2_mpyd_nac_lh_s1:
789 case M2_mpyd_rnd_lh_s0:
790 case M2_mpyd_rnd_lh_s1:
791 case M2_mpyu_acc_lh_s0:
792 case M2_mpyu_acc_lh_s1:
795 case M2_mpyu_nac_lh_s0:
796 case M2_mpyu_nac_lh_s1:
797 case M2_mpyud_acc_lh_s0:
798 case M2_mpyud_acc_lh_s1:
801 case M2_mpyud_nac_lh_s0:
802 case M2_mpyud_nac_lh_s1:
805 case A2_addh_l16_sat_hl:
807 case A2_subh_l16_sat_hl:
809 Bits.set(Begin, Begin+16);
813 Bits.set(Begin+16, Begin+32);
820 case A2_addh_h16_sat_hl:
823 case A2_subh_h16_sat_hl:
824 case M2_mpy_acc_hl_s0:
825 case M2_mpy_acc_hl_s1:
826 case M2_mpy_acc_sat_hl_s0:
827 case M2_mpy_acc_sat_hl_s1:
830 case M2_mpy_nac_hl_s0:
831 case M2_mpy_nac_hl_s1:
832 case M2_mpy_nac_sat_hl_s0:
833 case M2_mpy_nac_sat_hl_s1:
834 case M2_mpy_rnd_hl_s0:
835 case M2_mpy_rnd_hl_s1:
836 case M2_mpy_sat_hl_s0:
837 case M2_mpy_sat_hl_s1:
838 case M2_mpy_sat_rnd_hl_s0:
839 case M2_mpy_sat_rnd_hl_s1:
840 case M2_mpyd_acc_hl_s0:
841 case M2_mpyd_acc_hl_s1:
844 case M2_mpyd_nac_hl_s0:
845 case M2_mpyd_nac_hl_s1:
846 case M2_mpyd_rnd_hl_s0:
847 case M2_mpyd_rnd_hl_s1:
848 case M2_mpyu_acc_hl_s0:
849 case M2_mpyu_acc_hl_s1:
852 case M2_mpyu_nac_hl_s0:
853 case M2_mpyu_nac_hl_s1:
854 case M2_mpyud_acc_hl_s0:
855 case M2_mpyud_acc_hl_s1:
858 case M2_mpyud_nac_hl_s0:
859 case M2_mpyud_nac_hl_s1:
861 Bits.set(Begin+16, Begin+32);
865 Bits.set(Begin, Begin+16);
872 case A2_addh_h16_sat_hh:
875 case A2_subh_h16_sat_hh:
876 case M2_mpy_acc_hh_s0:
877 case M2_mpy_acc_hh_s1:
878 case M2_mpy_acc_sat_hh_s0:
879 case M2_mpy_acc_sat_hh_s1:
882 case M2_mpy_nac_hh_s0:
883 case M2_mpy_nac_hh_s1:
884 case M2_mpy_nac_sat_hh_s0:
885 case M2_mpy_nac_sat_hh_s1:
886 case M2_mpy_rnd_hh_s0:
887 case M2_mpy_rnd_hh_s1:
888 case M2_mpy_sat_hh_s0:
889 case M2_mpy_sat_hh_s1:
890 case M2_mpy_sat_rnd_hh_s0:
891 case M2_mpy_sat_rnd_hh_s1:
892 case M2_mpyd_acc_hh_s0:
893 case M2_mpyd_acc_hh_s1:
896 case M2_mpyd_nac_hh_s0:
897 case M2_mpyd_nac_hh_s1:
898 case M2_mpyd_rnd_hh_s0:
899 case M2_mpyd_rnd_hh_s1:
900 case M2_mpyu_acc_hh_s0:
901 case M2_mpyu_acc_hh_s1:
904 case M2_mpyu_nac_hh_s0:
905 case M2_mpyu_nac_hh_s1:
906 case M2_mpyud_acc_hh_s0:
907 case M2_mpyud_acc_hh_s1:
910 case M2_mpyud_nac_hh_s0:
911 case M2_mpyud_nac_hh_s1:
912 if (OpN == 1 || OpN == 2) {
913 Bits.set(Begin+16, Begin+32);
929 auto *RC =
MRI.getRegClass(RR.
Reg);
933 *
MRI.getTargetRegisterInfo());
941 switch (RC->
getID()) {
942 case Hexagon::DoubleRegsRegClassID:
943 VerifySR(RC, RR.
Sub);
944 return &Hexagon::IntRegsRegClass;
945 case Hexagon::HvxWRRegClassID:
946 VerifySR(RC, RR.
Sub);
947 return &Hexagon::HvxVRRegClass;
961 auto *DRC = getFinalVRegClass(RD,
MRI);
965 return DRC == getFinalVRegClass(RS,
MRI);
974 return Op.getSubReg() != NewSub && Op.isTied();
980 class DeadCodeElimination {
984 MDT(mdt),
MRI(mf.getRegInfo()) {}
987 return runOnNode(MDT.getRootNode());
991 bool isDead(
unsigned R)
const;
1002bool DeadCodeElimination::isDead(
unsigned R)
const {
1007 if (UseI->
isPHI()) {
1019 bool Changed =
false;
1021 for (
auto *DTN : children<MachineDomTreeNode*>(
N))
1022 Changed |= runOnNode(DTN);
1025 std::vector<MachineInstr*> Instrs;
1027 Instrs.push_back(&
MI);
1029 for (
auto *
MI : Instrs) {
1030 unsigned Opc =
MI->getOpcode();
1033 if (Opc == TargetOpcode::LIFETIME_START ||
1034 Opc == TargetOpcode::LIFETIME_END)
1037 if (
MI->isInlineAsm())
1040 if (!
MI->isPHI() && !
MI->isSafeToMove(Store))
1043 bool AllDead =
true;
1045 for (
auto &
Op :
MI->operands()) {
1046 if (!
Op.isReg() || !
Op.isDef())
1049 if (!
R.isVirtual() || !
isDead(R)) {
1059 for (
unsigned Reg : Regs)
1060 MRI.markUsesInDebugValueAsUndef(Reg);
1079 class RedundantInstrElimination :
public Transformation {
1083 : Transformation(
true), HII(hii), HRI(hri),
MRI(mri),
BT(bt) {}
1089 unsigned &LostB,
unsigned &LostE);
1091 unsigned &LostB,
unsigned &LostE);
1092 bool computeUsedBits(
unsigned Reg,
BitVector &Bits);
1108bool RedundantInstrElimination::isLossyShiftLeft(
const MachineInstr &
MI,
1109 unsigned OpN,
unsigned &LostB,
unsigned &LostE) {
1110 using namespace Hexagon;
1112 unsigned Opc =
MI.getOpcode();
1113 unsigned ImN, RegN, Width;
1120 case S2_asl_i_p_acc:
1121 case S2_asl_i_p_and:
1122 case S2_asl_i_p_nac:
1124 case S2_asl_i_p_xacc:
1134 case S2_addasl_rrri:
1135 case S4_andi_asl_ri:
1137 case S4_addi_asl_ri:
1138 case S4_subi_asl_ri:
1139 case S2_asl_i_r_acc:
1140 case S2_asl_i_r_and:
1141 case S2_asl_i_r_nac:
1143 case S2_asl_i_r_sat:
1144 case S2_asl_i_r_xacc:
1156 assert(
MI.getOperand(ImN).isImm());
1157 unsigned S =
MI.getOperand(ImN).getImm();
1168bool RedundantInstrElimination::isLossyShiftRight(
const MachineInstr &
MI,
1169 unsigned OpN,
unsigned &LostB,
unsigned &LostE) {
1170 using namespace Hexagon;
1172 unsigned Opc =
MI.getOpcode();
1180 case S2_asr_i_p_acc:
1181 case S2_asr_i_p_and:
1182 case S2_asr_i_p_nac:
1184 case S2_lsr_i_p_acc:
1185 case S2_lsr_i_p_and:
1186 case S2_lsr_i_p_nac:
1188 case S2_lsr_i_p_xacc:
1197 case S4_andi_lsr_ri:
1199 case S4_addi_lsr_ri:
1200 case S4_subi_lsr_ri:
1201 case S2_asr_i_r_acc:
1202 case S2_asr_i_r_and:
1203 case S2_asr_i_r_nac:
1205 case S2_lsr_i_r_acc:
1206 case S2_lsr_i_r_and:
1207 case S2_lsr_i_r_nac:
1209 case S2_lsr_i_r_xacc:
1221 assert(
MI.getOperand(ImN).isImm());
1222 unsigned S =
MI.getOperand(ImN).getImm();
1232bool RedundantInstrElimination::computeUsedBits(
unsigned Reg,
BitVector &Bits) {
1235 std::vector<unsigned> Pending;
1236 Pending.push_back(Reg);
1238 for (
unsigned i = 0; i < Pending.size(); ++i) {
1239 unsigned R = Pending[i];
1243 for (
auto I =
MRI.use_begin(R), E =
MRI.use_end();
I != E; ++
I) {
1246 if (!HBS::getSubregMask(UR,
B, W,
MRI))
1253 Pending.push_back(DefR);
1255 if (!computeUsedBits(UseI,
I.getOperandNo(), Used,
B))
1278bool RedundantInstrElimination::computeUsedBits(
const MachineInstr &
MI,
1280 unsigned Opc =
MI.getOpcode();
1282 bool GotBits = HBS::getUsedBits(Opc, OpN,
T, Begin, HII);
1287 if (isLossyShiftLeft(
MI, OpN, LB, LE) || isLossyShiftRight(
MI, OpN, LB, LE)) {
1288 assert(
MI.getOperand(OpN).isReg());
1291 uint16_t Width = HRI.getRegSizeInBits(*RC);
1294 T.set(Begin, Begin+Width);
1295 assert(LB <= LE && LB < Width && LE <= Width);
1296 T.reset(Begin+LB, Begin+LE);
1312 if (!HBS::getSubregMask(RD, DB, DW,
MRI))
1315 if (!HBS::getSubregMask(RS, SB, SW,
MRI))
1321 if (!computeUsedBits(RD.
Reg, Used))
1324 for (
unsigned i = 0; i != DW; ++i)
1325 if (Used[i+DB] && DC[DB+i] != SC[SB+i])
1331 const RegisterSet&) {
1334 bool Changed =
false;
1336 for (
auto I =
B.begin(), E =
B.end();
I != E; ++
I) {
1339 if (
MI->getOpcode() == TargetOpcode::COPY)
1341 if (
MI->isPHI() ||
MI->hasUnmodeledSideEffects() ||
MI->isInlineAsm())
1343 unsigned NumD =
MI->getDesc().getNumDefs();
1354 for (
auto &
Op :
MI->uses()) {
1360 if (!HBS::isTransparentCopy(RD, RS,
MRI))
1364 if (!HBS::getSubregMask(RS, BN, BW,
MRI))
1368 if (!usedBitsEqual(RD, RS) && !HBS::isEqual(DC, 0, SC, BN, BW))
1376 BuildMI(
B, At,
DL, HII.get(TargetOpcode::COPY), NewR)
1378 HBS::replaceSubWithSub(RD.
Reg, RD.
Sub, NewR, 0,
MRI);
1401 class ConstGeneration :
public Transformation {
1405 : Transformation(
true), HII(hii),
MRI(mri),
BT(bt) {}
1423 unsigned Opc =
MI.getOpcode();
1425 case Hexagon::A2_combineii:
1426 case Hexagon::A4_combineii:
1427 case Hexagon::A2_tfrsi:
1428 case Hexagon::A2_tfrpi:
1429 case Hexagon::PS_true:
1430 case Hexagon::PS_false:
1431 case Hexagon::CONST32:
1432 case Hexagon::CONST64:
1445 if (RC == &Hexagon::IntRegsRegClass) {
1446 BuildMI(
B, At,
DL, HII.get(Hexagon::A2_tfrsi), Reg)
1451 if (RC == &Hexagon::DoubleRegsRegClass) {
1453 BuildMI(
B, At,
DL, HII.get(Hexagon::A2_tfrpi), Reg)
1459 if (isInt<8>(
Lo) || isInt<8>(
Hi)) {
1460 unsigned Opc = isInt<8>(
Lo) ? Hexagon::A2_combineii
1461 : Hexagon::A4_combineii;
1471 if (!HST.isTinyCore() ||
1473 BuildMI(
B, At,
DL, HII.get(Hexagon::CONST64), Reg)
1479 if (RC == &Hexagon::PredRegsRegClass) {
1482 Opc = Hexagon::PS_false;
1483 else if ((
C & 0xFF) == 0xFF)
1484 Opc = Hexagon::PS_true;
1497 bool Changed =
false;
1500 for (
auto I =
B.begin(), E =
B.end();
I != E; ++
I) {
1504 HBS::getInstrDefs(*
I, Defs);
1505 if (Defs.count() != 1)
1512 if (HBS::getConst(DRC, 0, DRC.
width(), U)) {
1515 auto At =
I->isPHI() ?
B.getFirstNonPHI() :
I;
1518 HBS::replaceReg(DR, ImmReg,
MRI);
1519 BT.
put(ImmReg, DRC);
1532 class CopyGeneration :
public Transformation {
1536 : Transformation(
true), HII(hii), HRI(hri),
MRI(mri),
BT(bt) {}
1553 class CopyPropagation :
public Transformation {
1556 : Transformation(
false), HRI(hri),
MRI(mri) {}
1560 static bool isCopyReg(
unsigned Opc,
bool NoConv);
1578 auto *FRC = HBS::getFinalVRegClass(Inp,
MRI);
1580 if (!HBS::getSubregMask(Inp,
B, W,
MRI))
1583 for (
Register R = AVs.find_first(); R; R = AVs.find_next(R)) {
1584 if (!
BT.
has(R) || Forbidden[R])
1587 unsigned RW = RC.
width();
1589 if (FRC !=
MRI.getRegClass(R))
1591 if (!HBS::isTransparentCopy(R, Inp,
MRI))
1593 if (!HBS::isEqual(InpRC,
B, RC, 0, W))
1604 if (
MRI.getRegClass(R) != &Hexagon::DoubleRegsRegClass)
1607 if (HBS::isEqual(InpRC,
B, RC, 0, W))
1608 Out.
Sub = Hexagon::isub_lo;
1609 else if (HBS::isEqual(InpRC,
B, RC, W, W))
1610 Out.
Sub = Hexagon::isub_hi;
1614 if (HBS::isTransparentCopy(Out, Inp,
MRI))
1621 const RegisterSet &AVs) {
1625 bool Changed =
false;
1628 for (
auto I =
B.begin(), E =
B.end();
I != E; ++
I, AVB.insert(Defs)) {
1630 HBS::getInstrDefs(*
I, Defs);
1632 unsigned Opc =
I->getOpcode();
1633 if (CopyPropagation::isCopyReg(Opc,
false) ||
1634 ConstGeneration::isTfrConst(*
I))
1638 auto At =
I->isPHI() ?
B.getFirstNonPHI() :
I;
1640 for (
Register R = Defs.find_first(); R; R = Defs.find_next(R)) {
1642 auto *FRC = HBS::getFinalVRegClass(R,
MRI);
1644 if (findMatch(R, MR, AVB)) {
1646 BuildMI(
B, At,
DL, HII.get(TargetOpcode::COPY), NewR)
1649 HBS::replaceReg(R, NewR,
MRI);
1650 Forbidden.insert(R);
1654 if (FRC == &Hexagon::DoubleRegsRegClass ||
1655 FRC == &Hexagon::HvxWRRegClass) {
1662 if (findMatch(TL,
ML, AVB) && findMatch(TH, MH, AVB)) {
1663 auto *FRC = HBS::getFinalVRegClass(R,
MRI);
1665 BuildMI(
B, At,
DL, HII.get(TargetOpcode::REG_SEQUENCE), NewR)
1671 HBS::replaceReg(R, NewR,
MRI);
1672 Forbidden.insert(R);
1681bool CopyPropagation::isCopyReg(
unsigned Opc,
bool NoConv) {
1683 case TargetOpcode::COPY:
1684 case TargetOpcode::REG_SEQUENCE:
1685 case Hexagon::A4_combineir:
1686 case Hexagon::A4_combineri:
1688 case Hexagon::A2_tfr:
1689 case Hexagon::A2_tfrp:
1690 case Hexagon::A2_combinew:
1691 case Hexagon::V6_vcombine:
1700 bool Changed =
false;
1701 unsigned Opc =
MI.getOpcode();
1703 assert(
MI.getOperand(0).getSubReg() == 0);
1706 case TargetOpcode::COPY:
1707 case Hexagon::A2_tfr:
1708 case Hexagon::A2_tfrp: {
1710 if (!HBS::isTransparentCopy(RD, RS,
MRI))
1713 Changed = HBS::replaceRegWithSub(RD.
Reg, RS.
Reg, RS.
Sub,
MRI);
1715 Changed = HBS::replaceReg(RD.
Reg, RS.
Reg,
MRI);
1718 case TargetOpcode::REG_SEQUENCE: {
1720 if (HBS::parseRegSequence(
MI, SL, SH,
MRI)) {
1724 Changed = HBS::replaceSubWithSub(RD.
Reg, SubLo, SL.
Reg, SL.
Sub,
MRI);
1725 Changed |= HBS::replaceSubWithSub(RD.
Reg, SubHi, SH.
Reg, SH.
Sub,
MRI);
1729 case Hexagon::A2_combinew:
1730 case Hexagon::V6_vcombine: {
1735 Changed = HBS::replaceSubWithSub(RD.
Reg, SubLo, RL.Reg, RL.Sub,
MRI);
1736 Changed |= HBS::replaceSubWithSub(RD.
Reg, SubHi, RH.
Reg, RH.
Sub,
MRI);
1739 case Hexagon::A4_combineir:
1740 case Hexagon::A4_combineri: {
1741 unsigned SrcX = (Opc == Hexagon::A4_combineir) ? 2 : 1;
1742 unsigned Sub = (Opc == Hexagon::A4_combineir) ? Hexagon::isub_lo
1745 Changed = HBS::replaceSubWithSub(RD.
Reg, Sub, RS.
Reg, RS.
Sub,
MRI);
1753 std::vector<MachineInstr*> Instrs;
1755 Instrs.push_back(&
MI);
1757 bool Changed =
false;
1758 for (
auto *
I : Instrs) {
1759 unsigned Opc =
I->getOpcode();
1760 if (!CopyPropagation::isCopyReg(Opc,
true))
1762 Changed |= propagateRegCopy(*
I);
1773 class BitSimplification :
public Transformation {
1778 : Transformation(
true), MDT(mdt), HII(hii), HRI(hri),
MRI(mri),
1789 unsigned B, RegHalf &RH);
1794 unsigned getCombineOpcode(
bool HLow,
bool LLow);
1816 std::vector<MachineInstr*> NewMIs;
1831bool BitSimplification::matchHalf(
unsigned SelfR,
1842 while (
I <
B+16 && RC[
I].num())
1848 unsigned P = RC[
I].RefI.Pos;
1851 unsigned Pos =
P - (
I-
B);
1853 if (Reg == 0 || Reg == SelfR)
1855 if (!
Reg.isVirtual())
1861 if (Pos+16 >
SC.width())
1864 for (
unsigned i = 0; i < 16; ++i) {
1873 if (RC[i+
B] != SC[i+Pos])
1880 Sub = Hexagon::isub_lo;
1884 Sub = Hexagon::isub_lo;
1888 Sub = Hexagon::isub_hi;
1892 Sub = Hexagon::isub_hi;
1903 if (!HBS::getFinalVRegClass(RH,
MRI))
1911 auto *OpRC = HII.getRegClass(HII.get(Opc), OpNum, &HRI, MF);
1912 auto *
RRC = HBS::getFinalVRegClass(R,
MRI);
1913 return OpRC->hasSubClassEq(RRC);
1918bool BitSimplification::matchPackhl(
unsigned SelfR,
1921 RegHalf L1, H1, L2, H2;
1923 if (!matchHalf(SelfR, RC, 0, L2) || !matchHalf(SelfR, RC, 16, L1))
1925 if (!matchHalf(SelfR, RC, 32, H2) || !matchHalf(SelfR, RC, 48, H1))
1929 if (H1.Reg != L1.Reg || H1.Sub != L1.Sub || H1.Low || !L1.Low)
1931 if (H2.Reg != L2.Reg || H2.Sub != L2.Sub || H2.Low || !L2.Low)
1939unsigned BitSimplification::getCombineOpcode(
bool HLow,
bool LLow) {
1940 return HLow ? LLow ? Hexagon::A2_combine_ll
1941 : Hexagon::A2_combine_lh
1942 : LLow ? Hexagon::A2_combine_hl
1943 : Hexagon::A2_combine_hh;
1950 unsigned Opc =
MI->getOpcode();
1951 if (Opc != Hexagon::S2_storerh_io)
1960 unsigned B = (RS.
Sub == Hexagon::isub_hi) ? 32 : 0;
1961 if (!matchHalf(0, RC,
B,
H))
1965 MI->setDesc(HII.get(Hexagon::S2_storerf_io));
1974 unsigned Opc =
MI->getOpcode();
1977 case Hexagon::S2_storeri_io:
1980 case Hexagon::S2_storerh_io:
1983 case Hexagon::S2_storerb_io:
1990 if (!
MI->getOperand(0).isReg())
2006 if (!HBS::getConst(RC, 0, RC.
width(), U))
2012 case Hexagon::S2_storerb_io:
2015 case Hexagon::S2_storerh_io:
2018 case Hexagon::S2_storeri_io:
2029 MI->removeOperand(2);
2031 case Hexagon::S2_storerb_io:
2032 MI->setDesc(HII.get(Hexagon::S4_storeirb_io));
2034 case Hexagon::S2_storerh_io:
2035 MI->setDesc(HII.get(Hexagon::S4_storeirh_io));
2037 case Hexagon::S2_storeri_io:
2038 MI->setDesc(HII.get(Hexagon::S4_storeiri_io));
2051 unsigned Opc =
MI->getOpcode();
2052 if (Opc == Hexagon::S2_packhl)
2055 if (!matchPackhl(RD.
Reg, RC, Rs, Rt))
2057 if (!validateReg(Rs, Hexagon::S2_packhl, 1) ||
2058 !validateReg(Rt, Hexagon::S2_packhl, 2))
2062 Register NewR =
MRI.createVirtualRegister(&Hexagon::DoubleRegsRegClass);
2064 auto At =
MI->isPHI() ?
B.getFirstNonPHI()
2066 BuildMI(
B, At,
DL, HII.get(Hexagon::S2_packhl), NewR)
2069 HBS::replaceSubWithSub(RD.
Reg, RD.
Sub, NewR, 0,
MRI);
2080 if (!matchHalf(RD.
Reg, RC, 0, L) || !HBS::isZero(RC, 16, 16))
2083 unsigned Opc =
MI->getOpcode();
2090 auto At =
MI->isPHI() ?
B.getFirstNonPHI()
2092 if (
L.Low && Opc != Hexagon::A2_zxth) {
2093 if (validateReg(L, Hexagon::A2_zxth, 1)) {
2094 NewR =
MRI.createVirtualRegister(&Hexagon::IntRegsRegClass);
2095 BuildMI(
B, At,
DL, HII.get(Hexagon::A2_zxth), NewR)
2098 }
else if (!
L.Low && Opc != Hexagon::S2_lsr_i_r) {
2099 if (validateReg(L, Hexagon::S2_lsr_i_r, 1)) {
2100 NewR =
MRI.createVirtualRegister(&Hexagon::IntRegsRegClass);
2108 HBS::replaceSubWithSub(RD.
Reg, RD.
Sub, NewR, 0,
MRI);
2119 if (!matchHalf(RD.
Reg, RC, 0, L) || !matchHalf(RD.
Reg, RC, 16,
H))
2122 if (
L.Reg ==
H.Reg &&
L.Sub ==
H.Sub && !
H.Low &&
L.Low)
2125 unsigned Opc =
MI->getOpcode();
2126 unsigned COpc = getCombineOpcode(
H.Low,
L.Low);
2129 if (!validateReg(
H, COpc, 1) || !validateReg(L, COpc, 2))
2134 Register NewR =
MRI.createVirtualRegister(&Hexagon::IntRegsRegClass);
2135 auto At =
MI->isPHI() ?
B.getFirstNonPHI()
2140 HBS::replaceSubWithSub(RD.
Reg, RD.
Sub, NewR, 0,
MRI);
2149 unsigned Opc =
MI->getOpcode();
2151 case Hexagon::A2_zxtb:
2152 case Hexagon::A2_zxth:
2153 case Hexagon::S2_extractu:
2156 if (Opc == Hexagon::A2_andir &&
MI->getOperand(2).isImm()) {
2157 int32_t
Imm =
MI->getOperand(2).getImm();
2162 if (
MI->hasUnmodeledSideEffects() ||
MI->isInlineAsm())
2165 while (W > 0 && RC[W-1].is(0))
2167 if (W == 0 || W == RC.
width())
2169 unsigned NewOpc = (
W == 8) ? Hexagon::A2_zxtb
2170 : (W == 16) ? Hexagon::A2_zxth
2171 : (
W < 10) ? Hexagon::A2_andir
2172 : Hexagon::S2_extractu;
2176 for (
auto &
Op :
MI->uses()) {
2184 if (!HBS::getSubregMask(RS, BN, BW,
MRI))
2186 if (BW < W || !HBS::isEqual(RC, 0, SC, BN, W))
2188 if (!validateReg(RS, NewOpc, 1))
2191 Register NewR =
MRI.createVirtualRegister(&Hexagon::IntRegsRegClass);
2192 auto At =
MI->isPHI() ?
B.getFirstNonPHI()
2194 auto MIB =
BuildMI(
B, At,
DL, HII.get(NewOpc), NewR)
2196 if (NewOpc == Hexagon::A2_andir)
2197 MIB.
addImm((1 << W) - 1);
2198 else if (NewOpc == Hexagon::S2_extractu)
2200 HBS::replaceSubWithSub(RD.
Reg, RD.
Sub, NewR, 0,
MRI);
2209 const RegisterSet &AVs) {
2217 unsigned Opc =
MI->getOpcode();
2219 case Hexagon::A4_bitsplit:
2220 case Hexagon::A4_bitspliti:
2229 unsigned Z =
C.width();
2230 while (Z > 0 &&
C[Z-1].is(0))
2232 return C.width() -
Z;
2236 unsigned Z = ctlz(RC);
2237 if (Z == 0 || Z == W)
2251 for (
unsigned i = 1; i <
W-
Z; ++i) {
2255 if (
V.RefI.Reg != SrcR ||
V.RefI.Pos != Pos+i)
2260 for (
unsigned S = AVs.find_first(); S; S = AVs.find_next(S)) {
2263 unsigned SRC =
MRI.getRegClass(S)->getID();
2264 if (SRC != Hexagon::IntRegsRegClassID &&
2265 SRC != Hexagon::DoubleRegsRegClassID)
2270 if (
SC.width() != W || ctlz(SC) != W-Z)
2278 if (Pos <=
P && (Pos + W-Z) !=
P)
2280 if (
P < Pos && (
P + Z) != Pos)
2283 if (std::min(
P, Pos) != 0 && std::min(
P, Pos) != 32)
2287 for (
I = 1;
I <
Z; ++
I) {
2291 if (
V.RefI.Reg != SrcR ||
V.RefI.Pos !=
P+
I)
2304 auto At = DefS->
isPHI() ?
B.getFirstNonPHI()
2306 if (
MRI.getRegClass(SrcR)->getID() == Hexagon::DoubleRegsRegClassID)
2307 SrcSR = (std::min(Pos,
P) == 32) ? Hexagon::isub_hi : Hexagon::isub_lo;
2308 if (!validateReg({SrcR,SrcSR}, Hexagon::A4_bitspliti, 1))
2310 unsigned ImmOp = Pos <=
P ?
W-
Z :
Z;
2315 if (
In->getOpcode() != Hexagon::A4_bitspliti)
2320 if (
In->getOperand(2).getImm() != ImmOp)
2326 if (!MDT.dominates(DefI, &*At))
2335 NewR =
MRI.createVirtualRegister(&Hexagon::DoubleRegsRegClass);
2336 auto NewBS =
BuildMI(
B, At,
DL, HII.get(Hexagon::A4_bitspliti), NewR)
2339 NewMIs.push_back(NewBS);
2342 HBS::replaceRegWithSub(RD.
Reg, NewR, Hexagon::isub_lo,
MRI);
2343 HBS::replaceRegWithSub(S, NewR, Hexagon::isub_hi,
MRI);
2345 HBS::replaceRegWithSub(S, NewR, Hexagon::isub_lo,
MRI);
2346 HBS::replaceRegWithSub(RD.
Reg, NewR, Hexagon::isub_hi,
MRI);
2362 unsigned Opc =
MI->getOpcode();
2363 if (Opc != Hexagon::S2_tstbit_i)
2366 unsigned BN =
MI->getOperand(2).getImm();
2370 if (!
BT.
has(RS.
Reg) || !HBS::getSubregMask(RS,
F, W,
MRI))
2373 auto At =
MI->isPHI() ?
B.getFirstNonPHI()
2383 unsigned P = std::numeric_limits<unsigned>::max();
2385 if (TC == &Hexagon::DoubleRegsRegClass) {
2387 RR.
Sub = Hexagon::isub_lo;
2390 RR.
Sub = Hexagon::isub_hi;
2392 }
else if (TC == &Hexagon::IntRegsRegClass) {
2395 if (
P != std::numeric_limits<unsigned>::max()) {
2396 Register NewR =
MRI.createVirtualRegister(&Hexagon::PredRegsRegClass);
2397 BuildMI(
B, At,
DL, HII.get(Hexagon::S2_tstbit_i), NewR)
2400 HBS::replaceReg(RD.
Reg, NewR,
MRI);
2404 }
else if (
V.is(0) ||
V.is(1)) {
2405 Register NewR =
MRI.createVirtualRegister(&Hexagon::PredRegsRegClass);
2406 unsigned NewOpc =
V.is(0) ? Hexagon::PS_false : Hexagon::PS_true;
2408 HBS::replaceReg(RD.
Reg, NewR,
MRI);
2422 const RegisterSet &AVs) {
2440 if (FRC != &Hexagon::IntRegsRegClass && FRC != &Hexagon::DoubleRegsRegClass)
2460 for (
unsigned I = 0;
I !=
W; ++
I) {
2469 if (TopV.
is(0) || TopV.
is(1)) {
2470 bool S = TopV.
is(1);
2471 for (--W;
W > 0 && RC[
W-1].is(S); --
W)
2483 for (--W;
W > 0 && RC[
W-1] == TopV; --
W)
2498 dbgs() <<
"Cell: " << RC <<
'\n';
2499 dbgs() <<
"Expected bitfield size: " <<
Len <<
" bits, "
2500 << (
Signed ?
"sign" :
"zero") <<
"-extended\n";
2503 bool Changed =
false;
2505 for (
unsigned R = AVs.find_first();
R != 0;
R = AVs.find_next(R)) {
2509 unsigned SW =
SC.width();
2515 if (SW < RW || (SW % RW) != 0)
2522 while (Off <= SW-Len) {
2523 unsigned OE = (
Off+
Len)/RW;
2534 if (HBS::isEqual(RC, 0, SC, Off, Len))
2543 unsigned ExtOpc = 0;
2546 ExtOpc =
Signed ? Hexagon::A2_sxtb : Hexagon::A2_zxtb;
2548 ExtOpc =
Signed ? Hexagon::A2_sxth : Hexagon::A2_zxth;
2549 else if (Len < 10 && !
Signed)
2550 ExtOpc = Hexagon::A2_andir;
2554 Signed ? (RW == 32 ? Hexagon::S4_extract : Hexagon::S4_extractp)
2555 : (RW == 32 ? Hexagon::S2_extractu : Hexagon::S2_extractup);
2559 if (RW != SW && RW*2 != SW)
2562 SR = (
Off/RW == 0) ? Hexagon::isub_lo : Hexagon::isub_hi;
2565 if (!validateReg({
R,SR}, ExtOpc, 1))
2569 if (
MI->getOpcode() == ExtOpc) {
2579 auto At =
MI->isPHI() ?
B.getFirstNonPHI()
2581 auto MIB =
BuildMI(
B, At,
DL, HII.get(ExtOpc), NewR)
2584 case Hexagon::A2_sxtb:
2585 case Hexagon::A2_zxtb:
2586 case Hexagon::A2_sxth:
2587 case Hexagon::A2_zxth:
2589 case Hexagon::A2_andir:
2590 MIB.
addImm((1u << Len) - 1);
2592 case Hexagon::S4_extract:
2593 case Hexagon::S2_extractu:
2594 case Hexagon::S4_extractp:
2595 case Hexagon::S2_extractup:
2603 HBS::replaceReg(RD.
Reg, NewR,
MRI);
2614 unsigned Opc =
MI->getOpcode();
2615 if (Opc != Hexagon::A4_rcmpeqi && Opc != Hexagon::A4_rcmpneqi)
2622 if (FRC != &Hexagon::IntRegsRegClass && FRC != &Hexagon::DoubleRegsRegClass)
2628 auto At =
MI->isPHI() ?
B.getFirstNonPHI()
2631 bool KnownNZ =
false;
2638 if (!HBS::getSubregMask(SR,
F, W,
MRI))
2649 auto ReplaceWithConst = [&](
int C) {
2651 BuildMI(
B, At,
DL, HII.get(Hexagon::A2_tfrsi), NewR)
2653 HBS::replaceReg(RD.
Reg, NewR,
MRI);
2664 if (
Op.isGlobal() ||
Op.isBlockAddress())
2667 return Op.getImm() != 0;
2669 return !
Op.getCImm()->isZero();
2671 return !
Op.getFPImm()->isZero();
2676 if (
Op.isGlobal() ||
Op.isBlockAddress())
2679 return Op.getImm() == 0;
2681 return Op.getCImm()->isZero();
2683 return Op.getFPImm()->isZero();
2689 if (KnownZ || KnownNZ) {
2690 assert(KnownZ != KnownNZ &&
"Register cannot be both 0 and non-0");
2691 return ReplaceWithConst(KnownZ == (Opc == Hexagon::A4_rcmpeqi));
2699 if (SR.
Sub == 0 && InpDef->
getOpcode() == Hexagon::C2_muxii) {
2703 bool KnownNZ1 = IsNonZero(Src1), KnownNZ2 = IsNonZero(Src2);
2704 if (KnownNZ1 && KnownNZ2)
2705 return ReplaceWithConst(Opc == Hexagon::A4_rcmpneqi);
2707 bool KnownZ1 = IsZero(Src1), KnownZ2 = IsZero(Src2);
2708 if (KnownZ1 && KnownZ2)
2709 return ReplaceWithConst(Opc == Hexagon::A4_rcmpeqi);
2714 if ((KnownZ1 || KnownNZ1) && (KnownZ2 || KnownNZ2)) {
2716 BuildMI(
B, At,
DL, HII.get(Hexagon::C2_muxii), NewR)
2718 .
addImm(KnownZ1 == (Opc == Hexagon::A4_rcmpeqi))
2719 .
addImm(KnownZ2 == (Opc == Hexagon::A4_rcmpeqi));
2720 HBS::replaceReg(RD.
Reg, NewR,
MRI);
2734 const RegisterSet &AVs) {
2737 bool Changed =
false;
2741 for (
auto I =
B.begin(), E =
B.end();
I != E; ++
I, AVB.insert(Defs)) {
2744 HBS::getInstrDefs(*
MI, Defs);
2746 unsigned Opc =
MI->getOpcode();
2747 if (Opc == TargetOpcode::COPY || Opc == TargetOpcode::REG_SEQUENCE)
2750 if (
MI->mayStore()) {
2751 bool T = genStoreUpperHalf(
MI);
2752 T =
T || genStoreImmediate(
MI);
2757 if (Defs.count() != 1)
2768 if (FRC->
getID() == Hexagon::DoubleRegsRegClassID) {
2769 bool T = genPackhl(
MI, RD, RC);
2770 T =
T || simplifyExtractLow(
MI, RD, RC, AVB);
2775 if (FRC->
getID() == Hexagon::IntRegsRegClassID) {
2776 bool T = genBitSplit(
MI, RD, RC, AVB);
2777 T =
T || simplifyExtractLow(
MI, RD, RC, AVB);
2778 T =
T || genExtractHalf(
MI, RD, RC);
2779 T =
T || genCombineHalf(
MI, RD, RC);
2780 T =
T || genExtractLow(
MI, RD, RC);
2781 T =
T || simplifyRCmp0(
MI, RD);
2786 if (FRC->
getID() == Hexagon::PredRegsRegClassID) {
2787 bool T = simplifyTstbit(
MI, RD, RC);
2801 auto &HII = *HST.getInstrInfo();
2803 MDT = &getAnalysis<MachineDominatorTreeWrapperPass>().getDomTree();
2807 Changed = DeadCodeElimination(MF, *MDT).run();
2817 ConstGeneration ImmG(
BT, HII,
MRI);
2818 Changed |= visitBlock(Entry, ImmG, AIG);
2821 RedundantInstrElimination RIE(
BT, HII, HRI,
MRI);
2822 bool Ried = visitBlock(Entry, RIE, ARE);
2829 CopyGeneration CopyG(
BT, HII, HRI,
MRI);
2830 Changed |= visitBlock(Entry, CopyG, ACG);
2833 CopyPropagation CopyP(HRI,
MRI);
2834 Changed |= visitBlock(Entry, CopyP, ACP);
2836 Changed = DeadCodeElimination(MF, *MDT).run() || Changed;
2840 BitSimplification BitS(
BT, *MDT, HII, HRI,
MRI, MF);
2841 Changed |= visitBlock(Entry, BitS, ABS);
2843 Changed = DeadCodeElimination(MF, *MDT).run() || Changed;
2849 DeadCodeElimination(MF, *MDT).run();
2947 using InstrList = std::vector<MachineInstr *>;
2961 bool isConst(
unsigned Reg)
const;
2964 bool isShuffleOf(
unsigned OutR,
unsigned InpR)
const;
2965 bool isSameShuffle(
unsigned OutR1,
unsigned InpR1,
unsigned OutR2,
2966 unsigned &InpR2)
const;
2969 bool processLoop(LoopCand &
C);
2974char HexagonLoopRescheduling::ID = 0;
2977 "Hexagon Loop Rescheduling",
false,
false)
2981 DefR = HexagonLoopRescheduling::getDefReg(&
P);
2984 for (
unsigned i = 1, n =
P.getNumOperands(); i < n; i += 2) {
2987 LR =
P.getOperand(i);
2991 PR =
P.getOperand(i);
2995unsigned HexagonLoopRescheduling::getDefReg(
const MachineInstr *
MI) {
2997 HBS::getInstrDefs(*
MI, Defs);
2998 if (Defs.count() != 1)
3000 return Defs.find_first();
3003bool HexagonLoopRescheduling::isConst(
unsigned Reg)
const {
3007 for (
unsigned i = 0, w = RC.
width(); i < w; ++i) {
3009 if (!
V.is(0) && !
V.is(1))
3015bool HexagonLoopRescheduling::isBitShuffle(
const MachineInstr *
MI,
3016 unsigned DefR)
const {
3017 unsigned Opc =
MI->getOpcode();
3019 case TargetOpcode::COPY:
3020 case Hexagon::S2_lsr_i_r:
3021 case Hexagon::S2_asr_i_r:
3022 case Hexagon::S2_asl_i_r:
3023 case Hexagon::S2_lsr_i_p:
3024 case Hexagon::S2_asr_i_p:
3025 case Hexagon::S2_asl_i_p:
3026 case Hexagon::S2_insert:
3027 case Hexagon::A2_or:
3028 case Hexagon::A2_orp:
3029 case Hexagon::A2_and:
3030 case Hexagon::A2_andp:
3031 case Hexagon::A2_combinew:
3032 case Hexagon::A4_combineri:
3033 case Hexagon::A4_combineir:
3034 case Hexagon::A2_combineii:
3035 case Hexagon::A4_combineii:
3036 case Hexagon::A2_combine_ll:
3037 case Hexagon::A2_combine_lh:
3038 case Hexagon::A2_combine_hl:
3039 case Hexagon::A2_combine_hh:
3045bool HexagonLoopRescheduling::isStoreInput(
const MachineInstr *
MI,
3046 unsigned InpR)
const {
3047 for (
unsigned i = 0, n =
MI->getNumOperands(); i < n; ++i) {
3051 if (
Op.getReg() == InpR)
3057bool HexagonLoopRescheduling::isShuffleOf(
unsigned OutR,
unsigned InpR)
const {
3058 if (!BTP->has(OutR) || !BTP->has(InpR))
3061 for (
unsigned i = 0, w = OutC.
width(); i < w; ++i) {
3065 if (
V.RefI.Reg != InpR)
3071bool HexagonLoopRescheduling::isSameShuffle(
unsigned OutR1,
unsigned InpR1,
3072 unsigned OutR2,
unsigned &InpR2)
const {
3073 if (!BTP->has(OutR1) || !BTP->has(InpR1) || !BTP->has(OutR2))
3077 unsigned W = OutC1.
width();
3078 unsigned MatchR = 0;
3079 if (W != OutC2.
width())
3081 for (
unsigned i = 0; i <
W; ++i) {
3091 if (
V2.RefI.Reg == 0 ||
V2.RefI.Reg == OutR2)
3094 MatchR =
V2.RefI.Reg;
3095 else if (
V2.RefI.Reg != MatchR)
3104 unsigned NewPredR) {
3108 Register PhiR =
MRI->createVirtualRegister(PhiRC);
3109 BuildMI(LB, At, At->getDebugLoc(), HII->get(TargetOpcode::PHI), PhiR)
3114 RegMap.
insert(std::make_pair(
G.Inp.Reg, PhiR));
3117 unsigned DR = getDefReg(SI);
3122 auto MIB =
BuildMI(LB, At,
DL, HII->get(
SI->getOpcode()), NewDR);
3130 unsigned UseR = RegMap[
Op.getReg()];
3131 MIB.addReg(UseR, 0,
Op.getSubReg());
3133 RegMap.
insert(std::make_pair(DR, NewDR));
3136 HBS::replaceReg(OldPhiR, RegMap[
G.Out.Reg], *
MRI);
3139bool HexagonLoopRescheduling::processLoop(LoopCand &
C) {
3142 std::vector<PhiInfo> Phis;
3143 for (
auto &
I : *
C.LB) {
3146 unsigned PR = getDefReg(&
I);
3149 bool BadUse =
false, GoodUse =
false;
3156 if (isBitShuffle(UseI, PR) || isStoreInput(UseI, PR))
3159 if (BadUse || !GoodUse)
3162 Phis.push_back(PhiInfo(
I, *
C.LB));
3166 dbgs() <<
"Phis: {";
3167 for (
auto &
I : Phis) {
3169 <<
printReg(
I.PR.Reg, HRI,
I.PR.Sub) <<
":b" <<
I.PB->getNumber()
3170 <<
',' <<
printReg(
I.LR.Reg, HRI,
I.LR.Sub) <<
":b"
3171 <<
I.LB->getNumber() <<
')';
3179 bool Changed =
false;
3187 if (
MI.isTerminator())
3193 HBS::getInstrDefs(
MI, Defs);
3194 if (Defs.count() != 1)
3199 if (!isBitShuffle(&
MI, DefR))
3202 bool BadUse =
false;
3203 for (
auto UI =
MRI->use_begin(DefR), UE =
MRI->use_end(); UI != UE; ++UI) {
3206 if (UseI->
isPHI()) {
3209 unsigned Idx = UI.getOperandNo();
3219 if (
C.EB ==
nullptr)
3228 ShufIns.push_back(&
MI);
3240 using InstrGroupList = std::vector<InstrGroup>;
3243 for (
unsigned i = 0, n = ShufIns.size(); i < n; ++i) {
3249 G.Ins.push_back(SI);
3250 G.Out.Reg = getDefReg(SI);
3252 HBS::getInstrUses(*SI, Inputs);
3254 for (
unsigned j = i+1;
j < n; ++
j) {
3259 HBS::getInstrDefs(*
MI, Defs);
3261 if (!Defs.intersects(Inputs))
3265 G.Ins.push_back(
MI);
3266 Inputs.remove(Defs);
3268 HBS::getInstrUses(*
MI, Inputs);
3269 ShufIns[
j] =
nullptr;
3273 if (Inputs.count() > 1)
3275 auto LoopInpEq = [
G] (
const PhiInfo &
P) ->
bool {
3276 return G.Out.Reg ==
P.LR.Reg;
3281 G.Inp.Reg = Inputs.find_first();
3286 for (
unsigned i = 0, n =
Groups.size(); i < n; ++i) {
3287 InstrGroup &G = Groups[i];
3288 dbgs() <<
"Group[" << i <<
"] inp: "
3289 << printReg(G.Inp.Reg, HRI, G.Inp.Sub)
3290 <<
" out: " << printReg(G.Out.Reg, HRI, G.Out.Sub) <<
"\n";
3291 for (const MachineInstr *MI : G.Ins)
3292 dbgs() <<
" " << MI;
3296 for (InstrGroup &
G :
Groups) {
3297 if (!isShuffleOf(
G.Out.Reg,
G.Inp.Reg))
3299 auto LoopInpEq = [
G] (
const PhiInfo &
P) ->
bool {
3300 return G.Out.Reg ==
P.LR.Reg;
3303 if (
F == Phis.end())
3306 if (!isSameShuffle(
G.Out.Reg,
G.Inp.Reg,
F->PR.Reg, PrehR)) {
3309 if (Opc != Hexagon::A2_tfrsi && Opc != Hexagon::A2_tfrpi)
3316 if (RC !=
MRI->getRegClass(
F->PR.Reg)) {
3317 PrehR =
MRI->createVirtualRegister(RC);
3318 unsigned TfrI = (RC == &Hexagon::IntRegsRegClass) ? Hexagon::A2_tfrsi
3319 : Hexagon::A2_tfrpi;
3320 auto T =
C.PB->getFirstTerminator();
3333 if (
MRI->getRegClass(PrehR) !=
MRI->getRegClass(
G.Inp.Reg))
3335 moveGroup(
G, *
F->LB, *
F->PB,
F->LB->getFirstNonPHI(),
F->DefR, PrehR);
3342bool HexagonLoopRescheduling::runOnMachineFunction(
MachineFunction &MF) {
3347 HII = HST.getInstrInfo();
3356 std::vector<LoopCand> Cand;
3358 for (
auto &
B : MF) {
3359 if (
B.pred_size() != 2 ||
B.succ_size() != 2)
3362 bool IsLoop =
false;
3378 if (Succ->pred_size() == 1)
3386 bool Changed =
false;
3387 for (
auto &
C : Cand)
3388 Changed |= processLoop(
C);
3398 return new HexagonLoopRescheduling();
3402 return new HexagonBitSimplify();
unsigned const MachineRegisterInfo * MRI
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
This file implements the BitVector class.
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")
#define LLVM_ATTRIBUTE_UNUSED
static std::optional< ArrayRef< InsnRange >::iterator > intersects(const MachineInstr *StartMI, const MachineInstr *EndMI, const ArrayRef< InsnRange > &Ranges, const InstructionOrdering &Ordering)
Check if the instruction range [StartMI, EndMI] intersects any instruction range in Ranges.
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
This file defines the DenseMap class.
This file defines the little GraphTraits<X> template class that should be specialized by classes that...
static unsigned CountBitSplit
static cl::opt< bool > PreserveTiedOps("hexbit-keep-tied", cl::Hidden, cl::init(true), cl::desc("Preserve subregisters in tied operands"))
static cl::opt< bool > GenExtract("hexbit-extract", cl::Hidden, cl::init(true), cl::desc("Generate extract instructions"))
static cl::opt< unsigned > MaxBitSplit("hexbit-max-bitsplit", cl::Hidden, cl::init(std::numeric_limits< unsigned >::max()))
hexagon bit Hexagon bit simplification
static cl::opt< bool > GenBitSplit("hexbit-bitsplit", cl::Hidden, cl::init(true), cl::desc("Generate bitsplit instructions"))
static cl::opt< unsigned > MaxExtract("hexbit-max-extract", cl::Hidden, cl::init(std::numeric_limits< unsigned >::max()))
static cl::opt< unsigned > RegisterSetLimit("hexbit-registerset-limit", cl::Hidden, cl::init(1000))
static unsigned CountExtract
static bool isZero(Value *V, const DataLayout &DL, DominatorTree *DT, AssumptionCache *AC)
unsigned const TargetRegisterInfo * TRI
PassBuilder PB(Machine, PassOpts->PTO, std::nullopt, &PIC)
#define INITIALIZE_PASS_DEPENDENCY(depName)
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
#define INITIALIZE_PASS(passName, arg, name, cfg, analysis)
Remove Loads Into Fake Uses
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
bool isDead(const MachineInstr &MI, const MachineRegisterInfo &MRI)
This file defines the SmallVector class.
static const X86InstrFMA3Group Groups[]
support::ulittle16_t & Lo
support::ulittle16_t & Hi
Represent the analysis usage information of a pass.
AnalysisUsage & addRequired()
AnalysisUsage & addPreserved()
Add the specified Pass class to the set of analyses preserved by this pass.
This class represents an Operation in the Expression.
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
Base class for the actual dominator tree node.
FunctionPass class - This class is used to implement most global optimizations.
bool hasOptSize() const
Optimize this function for size (-Os) or minimum size (-Oz).
unsigned getHexagonSubRegIndex(const TargetRegisterClass &RC, unsigned GenIdx) const
const HexagonRegisterInfo * getRegisterInfo() const override
Describe properties that are true of each instruction in the target description file.
void push_back(MachineInstr *MI)
MachineInstrBundleIterator< MachineInstr > iterator
Analysis pass which computes a MachineDominatorTree.
DominatorTree Class - Concrete subclass of DominatorTreeBase that is used to compute a normal dominat...
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
virtual bool runOnMachineFunction(MachineFunction &MF)=0
runOnMachineFunction - This method must be overloaded to perform the desired machine code transformat...
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
Function & getFunction()
Return the LLVM function that this machine code represents.
const MachineBasicBlock & front() const
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
const MachineInstrBuilder & addMBB(MachineBasicBlock *MBB, unsigned TargetFlags=0) const
Representation of each machine instruction.
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
const MachineBasicBlock * getParent() const
bool isDebugInstr() const
const DebugLoc & getDebugLoc() const
Returns the debug location id of this MachineInstr.
const MachineOperand & getOperand(unsigned i) const
MachineOperand class - Representation of each machine instruction operand.
void setSubReg(unsigned subReg)
unsigned getSubReg() const
bool isReg() const
isReg - Tests if this is a MO_Register operand.
MachineBasicBlock * getMBB() const
void setReg(Register Reg)
Change the register this operand corresponds to.
bool isImm() const
isImm - Tests if this is a MO_Immediate operand.
static MachineOperand CreateImm(int64_t Val)
Register getReg() const
getReg - Returns the register number.
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
PassRegistry - This class manages the registration and intitialization of the pass subsystem as appli...
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
virtual StringRef getPassName() const
getPassName - Return a nice clean name for a pass.
Wrapper class representing virtual and physical registers.
static Register index2VirtReg(unsigned Index)
Convert a 0-based index to a virtual register number.
static unsigned virtReg2Index(Register Reg)
Convert a virtual register number to a 0-based index.
constexpr bool isVirtual() const
Return true if the specified register number is in the virtual register namespace.
A global registry used in conjunction with static constructors to make pluggable components (like tar...
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
StringRef - Represent a constant reference to a string, i.e.
unsigned getID() const
Return the register class ID number.
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
The instances of the Type class are immutable: once they are created, they are never changed.
This class implements an extremely fast bulk output stream that can only output to a stream.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
constexpr char IsConst[]
Key for Kernel::Arg::Metadata::mIsConst.
@ C
The default llvm calling convention, compatible with C.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
@ ABS
ABS - Determine the unsigned absolute value of a signed integer value of the same bitwidth.
@ RRC
Y = RRC X, rotate right via carry.
@ SC
CHAIN = SC CHAIN, Imm128 - System call.
SmallVector< MachineInstr * > InstrList
bool isConst(unsigned Opc)
Reg
All possible values of the reg field in the ModR/M byte.
initializer< Ty > init(const Ty &Val)
PointerTypeMap run(const Module &M)
Compute the PointerTypeMap for the module M.
std::set< RegisterRef > RegisterSet
std::error_code remove(const Twine &path, bool IgnoreNonExisting=true)
Remove path.
This is an optimization pass for GlobalISel generic memory operations.
@ Low
Lower the current thread's priority such that it does not affect foreground tasks significantly.
auto find(R &&Range, const T &Val)
Provide wrappers to std::find which take ranges instead of having to pass begin/end explicitly.
bool isEqual(const GCNRPTracker::LiveRegSet &S1, const GCNRPTracker::LiveRegSet &S2)
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
bool isUIntN(unsigned N, uint64_t x)
Checks if an unsigned integer fits into the given (dynamic) bit width.
FunctionPass * createHexagonBitSimplify()
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
void initializeHexagonBitSimplifyPass(PassRegistry &Registry)
auto reverse(ContainerTy &&C)
constexpr uint32_t Hi_32(uint64_t Value)
Return the high 32 bits of a 64 bit value.
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
bool none_of(R &&Range, UnaryPredicate P)
Provide wrappers to std::none_of which take ranges instead of having to pass begin/end explicitly.
constexpr uint32_t Lo_32(uint64_t Value)
Return the low 32 bits of a 64 bit value.
FunctionPass * createHexagonLoopRescheduling()
@ First
Helpers to iterate all locations in the MemoryEffectsBase class.
void initializeHexagonLoopReschedulingPass(PassRegistry &)
auto count(R &&Range, const E &Element)
Wrapper function around std::count to count the number of times an element Element occurs in the give...
DWARFExpression::Operation Op
raw_ostream & operator<<(raw_ostream &OS, const APFixedPoint &FX)
auto find_if(R &&Range, UnaryPredicate P)
Provide wrappers to std::find_if which take ranges instead of having to pass begin/end explicitly.
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
Printable printReg(Register Reg, const TargetRegisterInfo *TRI=nullptr, unsigned SubIdx=0, const MachineRegisterInfo *MRI=nullptr)
Prints virtual and physical registers with or without a TRI instance.
Printable printMBBReference(const MachineBasicBlock &MBB)
Prints a machine basic block reference.
This struct is a compact representation of a valid (non-zero power of two) alignment.
bool is(unsigned T) const
static BitValue self(const BitRef &Self=BitRef())
bool has(unsigned Reg) const
const RegisterCell & lookup(unsigned Reg) const
bool reached(const MachineBasicBlock *B) const
void trace(bool On=false)
void put(RegisterRef RR, const RegisterCell &RC)
void visit(const MachineInstr &MI)
RegisterCell get(RegisterRef RR) const