47 #define AARCH64_EXPAND_PSEUDO_NAME "AArch64 pseudo instruction expansion pass"
75 unsigned LdarOp,
unsigned StlrOp,
unsigned CmpOp,
76 unsigned ExtendImm,
unsigned ZeroReg,
108 assert(MO.isReg() && MO.getReg());
127 if (DstReg == AArch64::XZR || DstReg == AArch64::WZR) {
130 MI.eraseFromParent();
140 bool LastItem = std::next(
I) ==
E;
145 case AArch64::ORRWri:
146 case AArch64::ORRXri:
148 .
add(
MI.getOperand(0))
149 .
addReg(BitSize == 32 ? AArch64::WZR : AArch64::XZR)
152 case AArch64::MOVNWi:
153 case AArch64::MOVNXi:
154 case AArch64::MOVZWi:
155 case AArch64::MOVZXi: {
156 bool DstIsDead =
MI.getOperand(0).isDead();
164 case AArch64::MOVKWi:
165 case AArch64::MOVKXi: {
167 bool DstIsDead =
MI.getOperand(0).isDead();
179 transferImpOps(
MI, MIBS.front(), MIBS.back());
180 MI.eraseFromParent();
184 bool AArch64ExpandPseudo::expandCMP_SWAP(
186 unsigned StlrOp,
unsigned CmpOp,
unsigned ExtendImm,
unsigned ZeroReg,
191 Register StatusReg =
MI.getOperand(1).getReg();
192 bool StatusDead =
MI.getOperand(1).isDead();
195 assert(!
MI.getOperand(2).isUndef() &&
"cannot handle undef");
197 Register DesiredReg =
MI.getOperand(3).getReg();
206 MF->
insert(++LoadCmpBB->getIterator(), StoreBB);
207 MF->
insert(++StoreBB->getIterator(), DoneBB);
215 BuildMI(LoadCmpBB,
DL,
TII->get(AArch64::MOVZWi), StatusReg)
227 LoadCmpBB->addSuccessor(DoneBB);
228 LoadCmpBB->addSuccessor(StoreBB);
239 StoreBB->addSuccessor(LoadCmpBB);
240 StoreBB->addSuccessor(DoneBB);
242 DoneBB->splice(DoneBB->end(), &
MBB,
MI,
MBB.
end());
243 DoneBB->transferSuccessors(&
MBB);
248 MI.eraseFromParent();
256 StoreBB->clearLiveIns();
258 LoadCmpBB->clearLiveIns();
264 bool AArch64ExpandPseudo::expandCMP_SWAP_128(
271 Register StatusReg =
MI.getOperand(2).getReg();
272 bool StatusDead =
MI.getOperand(2).isDead();
275 assert(!
MI.getOperand(3).isUndef() &&
"cannot handle undef");
277 Register DesiredLoReg =
MI.getOperand(4).getReg();
278 Register DesiredHiReg =
MI.getOperand(5).getReg();
279 Register NewLoReg =
MI.getOperand(6).getReg();
280 Register NewHiReg =
MI.getOperand(7).getReg();
282 unsigned LdxpOp, StxpOp;
284 switch (
MI.getOpcode()) {
285 case AArch64::CMP_SWAP_128_MONOTONIC:
286 LdxpOp = AArch64::LDXPX;
287 StxpOp = AArch64::STXPX;
289 case AArch64::CMP_SWAP_128_RELEASE:
290 LdxpOp = AArch64::LDXPX;
291 StxpOp = AArch64::STLXPX;
293 case AArch64::CMP_SWAP_128_ACQUIRE:
294 LdxpOp = AArch64::LDAXPX;
295 StxpOp = AArch64::STXPX;
297 case AArch64::CMP_SWAP_128:
298 LdxpOp = AArch64::LDAXPX;
299 StxpOp = AArch64::STLXPX;
312 MF->
insert(++LoadCmpBB->getIterator(), StoreBB);
313 MF->
insert(++StoreBB->getIterator(), FailBB);
314 MF->
insert(++FailBB->getIterator(), DoneBB);
325 BuildMI(LoadCmpBB,
DL,
TII->get(AArch64::SUBSXrs), AArch64::XZR)
329 BuildMI(LoadCmpBB,
DL,
TII->get(AArch64::CSINCWr), StatusReg)
333 BuildMI(LoadCmpBB,
DL,
TII->get(AArch64::SUBSXrs), AArch64::XZR)
337 BuildMI(LoadCmpBB,
DL,
TII->get(AArch64::CSINCWr), StatusReg)
344 LoadCmpBB->addSuccessor(FailBB);
345 LoadCmpBB->addSuccessor(StoreBB);
358 StoreBB->addSuccessor(LoadCmpBB);
359 StoreBB->addSuccessor(DoneBB);
371 FailBB->addSuccessor(LoadCmpBB);
372 FailBB->addSuccessor(DoneBB);
374 DoneBB->splice(DoneBB->end(), &
MBB,
MI,
MBB.
end());
375 DoneBB->transferSuccessors(&
MBB);
380 MI.eraseFromParent();
390 FailBB->clearLiveIns();
392 StoreBB->clearLiveIns();
394 LoadCmpBB->clearLiveIns();
438 bool AArch64ExpandPseudo::expand_DestructiveOp(
448 bool DstIsDead =
MI.getOperand(0).isDead();
451 assert(DstReg !=
MI.getOperand(3).getReg());
454 unsigned PredIdx, DOPIdx, SrcIdx, Src2Idx;
458 if (DstReg ==
MI.getOperand(3).getReg()) {
460 std::tie(PredIdx, DOPIdx, SrcIdx) = std::make_tuple(1, 3, 2);
467 std::tie(PredIdx, DOPIdx, SrcIdx) = std::make_tuple(1, 2, 3);
470 std::tie(PredIdx, DOPIdx, SrcIdx) = std::make_tuple(2, 3, 3);
473 std::tie(PredIdx, DOPIdx, SrcIdx, Src2Idx) = std::make_tuple(1, 2, 3, 4);
474 if (DstReg ==
MI.getOperand(3).getReg()) {
476 std::tie(PredIdx, DOPIdx, SrcIdx, Src2Idx) = std::make_tuple(1, 3, 4, 2);
478 }
else if (DstReg ==
MI.getOperand(4).getReg()) {
480 std::tie(PredIdx, DOPIdx, SrcIdx, Src2Idx) = std::make_tuple(1, 4, 3, 2);
492 bool DOPRegIsUnique =
false;
497 DstReg !=
MI.getOperand(DOPIdx).getReg() ||
498 MI.getOperand(DOPIdx).getReg() !=
MI.getOperand(SrcIdx).getReg();
502 DOPRegIsUnique =
true;
506 DstReg !=
MI.getOperand(DOPIdx).getReg() ||
507 (
MI.getOperand(DOPIdx).getReg() !=
MI.getOperand(SrcIdx).getReg() &&
508 MI.getOperand(DOPIdx).getReg() !=
MI.getOperand(Src2Idx).getReg());
525 uint64_t ElementSize =
TII->getElementSizeForOpcode(Opcode);
526 unsigned MovPrfx, MovPrfxZero;
527 switch (ElementSize) {
530 MovPrfx = AArch64::MOVPRFX_ZZ;
531 MovPrfxZero = AArch64::MOVPRFX_ZPzZ_B;
534 MovPrfx = AArch64::MOVPRFX_ZZ;
535 MovPrfxZero = AArch64::MOVPRFX_ZPzZ_H;
538 MovPrfx = AArch64::MOVPRFX_ZZ;
539 MovPrfxZero = AArch64::MOVPRFX_ZPzZ_S;
542 MovPrfx = AArch64::MOVPRFX_ZZ;
543 MovPrfxZero = AArch64::MOVPRFX_ZPzZ_D;
555 assert(DOPRegIsUnique &&
"The destructive operand should be unique");
558 "This instruction is unpredicated");
563 .
addReg(
MI.getOperand(PredIdx).getReg())
564 .
addReg(
MI.getOperand(DOPIdx).getReg());
568 }
else if (DstReg !=
MI.getOperand(DOPIdx).getReg()) {
570 assert(DOPRegIsUnique &&
"The destructive operand should be unique");
574 .
addReg(
MI.getOperand(DOPIdx).getReg());
587 .
add(
MI.getOperand(PredIdx))
588 .
add(
MI.getOperand(SrcIdx));
593 DOP.
add(
MI.getOperand(PredIdx))
595 .
add(
MI.getOperand(SrcIdx));
598 DOP.
add(
MI.getOperand(PredIdx))
600 .
add(
MI.getOperand(SrcIdx))
601 .
add(
MI.getOperand(Src2Idx));
607 transferImpOps(
MI, PRFX, DOP);
609 transferImpOps(
MI, DOP, DOP);
611 MI.eraseFromParent();
615 bool AArch64ExpandPseudo::expandSetTagLoop(
621 Register AddressReg =
MI.getOperand(1).getReg();
625 bool ZeroData =
MI.getOpcode() == AArch64::STZGloop_wback;
626 const unsigned OpCode1 =
627 ZeroData ? AArch64::STZGPostIndex : AArch64::STGPostIndex;
628 const unsigned OpCode2 =
629 ZeroData ? AArch64::STZ2GPostIndex : AArch64::ST2GPostIndex;
631 unsigned Size =
MI.getOperand(2).getImm();
632 assert(Size > 0 && Size % 16 == 0);
633 if (Size % (16 * 2) != 0) {
649 MF->
insert(++LoopBB->getIterator(), DoneBB);
665 LoopBB->addSuccessor(LoopBB);
666 LoopBB->addSuccessor(DoneBB);
668 DoneBB->splice(DoneBB->end(), &
MBB,
MI,
MBB.
end());
669 DoneBB->transferSuccessors(&
MBB);
674 MI.eraseFromParent();
681 LoopBB->clearLiveIns();
683 DoneBB->clearLiveIns();
691 unsigned Opc,
unsigned N) {
696 int ImmOffset =
MI.getOperand(2).getImm() +
Offset;
697 bool Kill = (
Offset + 1 ==
N) ?
MI.getOperand(1).isKill() :
false;
698 assert(ImmOffset >= -256 && ImmOffset < 256 &&
699 "Immediate spill offset out of range");
702 TRI->
getSubReg(
MI.getOperand(0).getReg(), AArch64::zsub0 + Offset),
707 MI.eraseFromParent();
711 bool AArch64ExpandPseudo::expandCALL_RVMARKER(
724 "invalid operand for regular call");
725 assert(RVTarget.
isGlobal() &&
"invalid operand for attached call");
730 unsigned RegMaskStartIdx = 2;
733 while (!
MI.getOperand(RegMaskStartIdx).isRegMask()) {
734 auto MOP =
MI.getOperand(RegMaskStartIdx);
735 assert(MOP.isReg() &&
"can only add register operands");
737 MOP.getReg(),
false,
true));
754 if (
MI.shouldUpdateCallSiteInfo())
757 MI.eraseFromParent();
759 std::next(RVCall->getIterator()));
774 "invalid operand for regular call");
778 Call->addOperand(CallTarget);
786 if (
MI.shouldUpdateCallSiteInfo())
789 MI.eraseFromParent();
794 bool AArch64ExpandPseudo::expandStoreSwiftAsyncContext(
802 if (STI.getTargetTriple().getArchName() !=
"arm64e") {
819 unsigned Opc =
Offset >= 0 ? AArch64::ADDXri : AArch64::SUBXri;
857 unsigned Opcode =
MI.getOpcode();
861 if (OrigInstr != -1) {
862 auto &Orig =
TII->get(OrigInstr);
865 return expand_DestructiveOp(
MI,
MBB,
MBBI);
873 case AArch64::BSPv8i8:
874 case AArch64::BSPv16i8: {
876 if (DstReg ==
MI.getOperand(3).getReg()) {
879 TII->get(Opcode == AArch64::BSPv8i8 ? AArch64::BITv8i8
880 : AArch64::BITv16i8))
881 .
add(
MI.getOperand(0))
882 .
add(
MI.getOperand(3))
883 .
add(
MI.getOperand(2))
884 .
add(
MI.getOperand(1));
885 }
else if (DstReg ==
MI.getOperand(2).getReg()) {
888 TII->get(Opcode == AArch64::BSPv8i8 ? AArch64::BIFv8i8
889 : AArch64::BIFv16i8))
890 .
add(
MI.getOperand(0))
891 .
add(
MI.getOperand(2))
892 .
add(
MI.getOperand(3))
893 .
add(
MI.getOperand(1));
896 if (DstReg ==
MI.getOperand(1).getReg()) {
898 TII->get(Opcode == AArch64::BSPv8i8 ? AArch64::BSLv8i8
899 : AArch64::BSLv16i8))
900 .
add(
MI.getOperand(0))
901 .
add(
MI.getOperand(1))
902 .
add(
MI.getOperand(2))
903 .
add(
MI.getOperand(3));
906 TII->get(Opcode == AArch64::BSPv8i8 ? AArch64::ORRv8i8
907 : AArch64::ORRv16i8))
911 .
add(
MI.getOperand(1))
912 .
add(
MI.getOperand(1));
914 TII->get(Opcode == AArch64::BSPv8i8 ? AArch64::BSLv8i8
915 : AArch64::BSLv16i8))
916 .
add(
MI.getOperand(0))
920 .
add(
MI.getOperand(2))
921 .
add(
MI.getOperand(3));
924 MI.eraseFromParent();
928 case AArch64::ADDWrr:
929 case AArch64::SUBWrr:
930 case AArch64::ADDXrr:
931 case AArch64::SUBXrr:
932 case AArch64::ADDSWrr:
933 case AArch64::SUBSWrr:
934 case AArch64::ADDSXrr:
935 case AArch64::SUBSXrr:
936 case AArch64::ANDWrr:
937 case AArch64::ANDXrr:
938 case AArch64::BICWrr:
939 case AArch64::BICXrr:
940 case AArch64::ANDSWrr:
941 case AArch64::ANDSXrr:
942 case AArch64::BICSWrr:
943 case AArch64::BICSXrr:
944 case AArch64::EONWrr:
945 case AArch64::EONXrr:
946 case AArch64::EORWrr:
947 case AArch64::EORXrr:
948 case AArch64::ORNWrr:
949 case AArch64::ORNXrr:
950 case AArch64::ORRWrr:
951 case AArch64::ORRXrr: {
953 switch (
MI.getOpcode()) {
956 case AArch64::ADDWrr: Opcode = AArch64::ADDWrs;
break;
957 case AArch64::SUBWrr: Opcode = AArch64::SUBWrs;
break;
958 case AArch64::ADDXrr: Opcode = AArch64::ADDXrs;
break;
959 case AArch64::SUBXrr: Opcode = AArch64::SUBXrs;
break;
960 case AArch64::ADDSWrr: Opcode = AArch64::ADDSWrs;
break;
961 case AArch64::SUBSWrr: Opcode = AArch64::SUBSWrs;
break;
962 case AArch64::ADDSXrr: Opcode = AArch64::ADDSXrs;
break;
963 case AArch64::SUBSXrr: Opcode = AArch64::SUBSXrs;
break;
964 case AArch64::ANDWrr: Opcode = AArch64::ANDWrs;
break;
965 case AArch64::ANDXrr: Opcode = AArch64::ANDXrs;
break;
966 case AArch64::BICWrr: Opcode = AArch64::BICWrs;
break;
967 case AArch64::BICXrr: Opcode = AArch64::BICXrs;
break;
968 case AArch64::ANDSWrr: Opcode = AArch64::ANDSWrs;
break;
969 case AArch64::ANDSXrr: Opcode = AArch64::ANDSXrs;
break;
970 case AArch64::BICSWrr: Opcode = AArch64::BICSWrs;
break;
971 case AArch64::BICSXrr: Opcode = AArch64::BICSXrs;
break;
972 case AArch64::EONWrr: Opcode = AArch64::EONWrs;
break;
973 case AArch64::EONXrr: Opcode = AArch64::EONXrs;
break;
974 case AArch64::EORWrr: Opcode = AArch64::EORWrs;
break;
975 case AArch64::EORXrr: Opcode = AArch64::EORXrs;
break;
976 case AArch64::ORNWrr: Opcode = AArch64::ORNWrs;
break;
977 case AArch64::ORNXrr: Opcode = AArch64::ORNXrs;
break;
978 case AArch64::ORRWrr: Opcode = AArch64::ORRWrs;
break;
979 case AArch64::ORRXrr: Opcode = AArch64::ORRXrs;
break;
984 TII->get(Opcode),
MI.getDebugLoc(),
true);
988 .add(
MI.getOperand(1))
989 .add(
MI.getOperand(2))
991 transferImpOps(
MI, MIB1, MIB1);
992 MI.eraseFromParent();
1005 TII->get(AArch64::LDRXl), DstReg);
1013 "Only expect globals, externalsymbols, or constant pools");
1026 unsigned Reg32 =
TRI->
getSubReg(DstReg, AArch64::sub_32);
1027 unsigned DstFlags =
MI.getOperand(0).getTargetFlags();
1035 .
add(
MI.getOperand(0))
1050 "Only expect globals, externalsymbols, or constant pools");
1058 transferImpOps(
MI, MIB1, MIB2);
1060 MI.eraseFromParent();
1063 case AArch64::MOVaddrBA: {
1070 assert(
MI.getOperand(1).getOffset() == 0 &&
"unexpected offset");
1080 TII->get(AArch64::LDRXui), DstReg)
1084 transferImpOps(
MI, MIB1, MIB2);
1085 MI.eraseFromParent();
1090 case AArch64::MOVaddr:
1091 case AArch64::MOVaddrJT:
1092 case AArch64::MOVaddrCP:
1093 case AArch64::MOVaddrTLS:
1094 case AArch64::MOVaddrEXT: {
1097 assert(DstReg != AArch64::XZR);
1100 .
add(
MI.getOperand(1));
1110 auto Tag =
MI.getOperand(1);
1112 Tag.setOffset(0x100000000);
1121 .
add(
MI.getOperand(0))
1123 .
add(
MI.getOperand(2))
1126 transferImpOps(
MI, MIB1, MIB2);
1127 MI.eraseFromParent();
1130 case AArch64::ADDlowTLS:
1133 .
add(
MI.getOperand(0))
1134 .
add(
MI.getOperand(1))
1135 .
add(
MI.getOperand(2))
1137 MI.eraseFromParent();
1140 case AArch64::MOVbaseTLS: {
1142 auto SysReg = AArch64SysReg::TPIDR_EL0;
1145 SysReg = AArch64SysReg::TPIDR_EL3;
1147 SysReg = AArch64SysReg::TPIDR_EL2;
1149 SysReg = AArch64SysReg::TPIDR_EL1;
1152 MI.eraseFromParent();
1156 case AArch64::MOVi32imm:
1158 case AArch64::MOVi64imm:
1160 case AArch64::RET_ReallyLR: {
1169 transferImpOps(
MI, MIB, MIB);
1170 MI.eraseFromParent();
1173 case AArch64::CMP_SWAP_8:
1174 return expandCMP_SWAP(
MBB,
MBBI, AArch64::LDAXRB, AArch64::STLXRB,
1177 AArch64::WZR, NextMBBI);
1178 case AArch64::CMP_SWAP_16:
1179 return expandCMP_SWAP(
MBB,
MBBI, AArch64::LDAXRH, AArch64::STLXRH,
1182 AArch64::WZR, NextMBBI);
1183 case AArch64::CMP_SWAP_32:
1184 return expandCMP_SWAP(
MBB,
MBBI, AArch64::LDAXRW, AArch64::STLXRW,
1187 AArch64::WZR, NextMBBI);
1188 case AArch64::CMP_SWAP_64:
1189 return expandCMP_SWAP(
MBB,
MBBI,
1190 AArch64::LDAXRX, AArch64::STLXRX, AArch64::SUBSXrs,
1192 AArch64::XZR, NextMBBI);
1193 case AArch64::CMP_SWAP_128:
1194 case AArch64::CMP_SWAP_128_RELEASE:
1195 case AArch64::CMP_SWAP_128_ACQUIRE:
1196 case AArch64::CMP_SWAP_128_MONOTONIC:
1197 return expandCMP_SWAP_128(
MBB,
MBBI, NextMBBI);
1199 case AArch64::AESMCrrTied:
1200 case AArch64::AESIMCrrTied: {
1203 TII->get(Opcode == AArch64::AESMCrrTied ? AArch64::AESMCrr :
1205 .
add(
MI.getOperand(0))
1206 .
add(
MI.getOperand(1));
1207 transferImpOps(
MI, MIB, MIB);
1208 MI.eraseFromParent();
1211 case AArch64::IRGstack: {
1222 StackOffset FrameRegOffset = TFI->resolveFrameOffsetReference(
1223 MF, BaseOffset,
false ,
false , FrameReg,
1227 if (FrameRegOffset) {
1229 SrcReg =
MI.getOperand(0).getReg();
1231 FrameRegOffset,
TII);
1234 .
add(
MI.getOperand(0))
1236 .
add(
MI.getOperand(2));
1237 MI.eraseFromParent();
1240 case AArch64::TAGPstack: {
1241 int64_t
Offset =
MI.getOperand(2).getImm();
1243 TII->get(Offset >= 0 ? AArch64::ADDG : AArch64::SUBG))
1244 .
add(
MI.getOperand(0))
1245 .
add(
MI.getOperand(1))
1247 .
add(
MI.getOperand(4));
1248 MI.eraseFromParent();
1251 case AArch64::STGloop_wback:
1252 case AArch64::STZGloop_wback:
1253 return expandSetTagLoop(
MBB,
MBBI, NextMBBI);
1254 case AArch64::STGloop:
1255 case AArch64::STZGloop:
1257 "Non-writeback variants of STGloop / STZGloop should not "
1258 "survive past PrologEpilogInserter.");
1259 case AArch64::STR_ZZZZXI:
1260 return expandSVESpillFill(
MBB,
MBBI, AArch64::STR_ZXI, 4);
1261 case AArch64::STR_ZZZXI:
1262 return expandSVESpillFill(
MBB,
MBBI, AArch64::STR_ZXI, 3);
1263 case AArch64::STR_ZZXI:
1264 return expandSVESpillFill(
MBB,
MBBI, AArch64::STR_ZXI, 2);
1265 case AArch64::LDR_ZZZZXI:
1266 return expandSVESpillFill(
MBB,
MBBI, AArch64::LDR_ZXI, 4);
1267 case AArch64::LDR_ZZZXI:
1268 return expandSVESpillFill(
MBB,
MBBI, AArch64::LDR_ZXI, 3);
1269 case AArch64::LDR_ZZXI:
1270 return expandSVESpillFill(
MBB,
MBBI, AArch64::LDR_ZXI, 2);
1271 case AArch64::BLR_RVMARKER:
1272 return expandCALL_RVMARKER(
MBB,
MBBI);
1273 case AArch64::BLR_BTI:
1274 return expandCALL_BTI(
MBB,
MBBI);
1275 case AArch64::StoreSwiftAsyncContext:
1276 return expandStoreSwiftAsyncContext(
MBB,
MBBI);
1300 for (
auto &
MBB : MF)
1307 return new AArch64ExpandPseudo();