37 #define DEBUG_TYPE "legalizer"
40 using namespace LegalizeActions;
41 using namespace MIPatternMatch;
50 static std::pair<int, int>
56 unsigned NumParts = Size / NarrowSize;
57 unsigned LeftoverSize = Size - NumParts * NarrowSize;
60 if (LeftoverSize == 0)
65 if (LeftoverSize % EltSize != 0)
74 return std::make_pair(NumParts, NumLeftover);
101 : MIRBuilder(
Builder), Observer(Observer),
MRI(MF.getRegInfo()),
102 LI(*MF.getSubtarget().getLegalizerInfo()),
103 TLI(*MF.getSubtarget().getTargetLowering()) { }
108 : MIRBuilder(
B), Observer(Observer),
MRI(MF.getRegInfo()), LI(LI),
109 TLI(*MF.getSubtarget().getTargetLowering()) { }
118 if (
MI.getOpcode() == TargetOpcode::G_INTRINSIC ||
119 MI.getOpcode() == TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS)
122 switch (Step.Action) {
137 return bitcast(
MI, Step.TypeIdx, Step.NewType);
140 return lower(
MI, Step.TypeIdx, Step.NewType);
156 void LegalizerHelper::extractParts(
Register Reg,
LLT Ty,
int NumParts,
158 for (
int i = 0;
i < NumParts; ++
i)
164 LLT MainTy,
LLT &LeftoverTy,
171 unsigned NumParts =
RegSize / MainSize;
172 unsigned LeftoverSize =
RegSize - NumParts * MainSize;
175 if (LeftoverSize == 0) {
176 for (
unsigned I = 0;
I < NumParts; ++
I)
186 for (
unsigned i = 0;
i < RegPieces.size() - 1; ++
i)
187 VRegs.push_back(RegPieces[
i]);
188 LeftoverRegs.push_back(RegPieces[RegPieces.size() - 1]);
189 LeftoverTy = MRI.
getType(LeftoverRegs[0]);
195 for (
unsigned I = 0;
I != NumParts; ++
I) {
197 VRegs.push_back(NewReg);
201 for (
unsigned Offset = MainSize * NumParts;
Offset <
RegSize;
204 LeftoverRegs.push_back(NewReg);
211 void LegalizerHelper::extractVectorParts(
Register Reg,
unsigned NumElts,
219 unsigned LeftoverNumElts = RegNumElts % NumElts;
220 unsigned NumNarrowTyPieces = RegNumElts / NumElts;
223 if (LeftoverNumElts == 0)
224 return extractParts(
Reg, NarrowTy, NumNarrowTyPieces, VRegs);
230 extractParts(
Reg, EltTy, RegNumElts, Elts);
234 for (
unsigned i = 0;
i < NumNarrowTyPieces; ++
i,
Offset += NumElts) {
240 if (LeftoverNumElts == 1) {
241 VRegs.push_back(Elts[Offset]);
249 void LegalizerHelper::insertParts(
Register DstReg,
271 assert(LeftoverRegs.
size() == 1 &&
"Expected one leftover register");
273 for (
auto Reg : concat<const Register>(PartRegs, LeftoverRegs))
274 AllRegs.push_back(
Reg);
275 return mergeMixedSubvectors(DstReg, AllRegs);
280 for (
auto PartReg : concat<const Register>(PartRegs, LeftoverRegs))
281 extractGCDType(GCDRegs, GCDTy, PartReg);
282 LLT ResultLCMTy = buildLCMMergePieces(ResultTy, LeftoverTy, GCDTy, GCDRegs);
283 buildWidenedRemergeToDst(DstReg, ResultLCMTy, GCDRegs);
295 void LegalizerHelper::mergeMixedSubvectors(
Register DstReg,
298 for (
unsigned i = 0;
i < PartRegs.
size() - 1; ++
i)
299 appendVectorElts(AllElts, PartRegs[
i]);
303 AllElts.push_back(Leftover);
305 appendVectorElts(AllElts, Leftover);
313 assert(
MI.getOpcode() == TargetOpcode::G_UNMERGE_VALUES);
315 const int StartIdx = Regs.size();
316 const int NumResults =
MI.getNumOperands() - 1;
317 Regs.
resize(Regs.size() + NumResults);
318 for (
int I = 0;
I != NumResults; ++
I)
319 Regs[StartIdx +
I] =
MI.getOperand(
I).getReg();
325 if (SrcTy == GCDTy) {
328 Parts.push_back(SrcReg);
340 extractGCDType(Parts, GCDTy, SrcReg);
344 LLT LegalizerHelper::buildLCMMergePieces(
LLT DstTy,
LLT NarrowTy,
LLT GCDTy,
346 unsigned PadStrategy) {
351 int NumOrigSrc = VRegs.size();
357 if (NumOrigSrc < NumParts * NumSubParts) {
358 if (PadStrategy == TargetOpcode::G_ZEXT)
360 else if (PadStrategy == TargetOpcode::G_ANYEXT)
363 assert(PadStrategy == TargetOpcode::G_SEXT);
384 for (
int I = 0;
I != NumParts; ++
I) {
385 bool AllMergePartsArePadding =
true;
388 for (
int J = 0; J != NumSubParts; ++J) {
389 int Idx =
I * NumSubParts + J;
390 if (Idx >= NumOrigSrc) {
391 SubMerge[J] = PadReg;
395 SubMerge[J] = VRegs[Idx];
398 AllMergePartsArePadding =
false;
404 if (AllMergePartsArePadding && !AllPadReg) {
405 if (PadStrategy == TargetOpcode::G_ANYEXT)
407 else if (PadStrategy == TargetOpcode::G_ZEXT)
417 Remerge[
I] = AllPadReg;
421 if (NumSubParts == 1)
422 Remerge[
I] = SubMerge[0];
427 if (AllMergePartsArePadding && !AllPadReg)
428 AllPadReg = Remerge[
I];
435 void LegalizerHelper::buildWidenedRemergeToDst(
Register DstReg,
LLT LCMTy,
442 if (DstTy == LCMTy) {
456 UnmergeDefs[0] = DstReg;
457 for (
unsigned I = 1;
I != NumDefs; ++
I)
469 #define RTLIBCASE_INT(LibcallPrefix) \
473 return RTLIB::LibcallPrefix##32; \
475 return RTLIB::LibcallPrefix##64; \
477 return RTLIB::LibcallPrefix##128; \
479 llvm_unreachable("unexpected size"); \
483 #define RTLIBCASE(LibcallPrefix) \
487 return RTLIB::LibcallPrefix##32; \
489 return RTLIB::LibcallPrefix##64; \
491 return RTLIB::LibcallPrefix##80; \
493 return RTLIB::LibcallPrefix##128; \
495 llvm_unreachable("unexpected size"); \
500 case TargetOpcode::G_SDIV:
502 case TargetOpcode::G_UDIV:
504 case TargetOpcode::G_SREM:
506 case TargetOpcode::G_UREM:
508 case TargetOpcode::G_CTLZ_ZERO_UNDEF:
510 case TargetOpcode::G_FADD:
512 case TargetOpcode::G_FSUB:
514 case TargetOpcode::G_FMUL:
516 case TargetOpcode::G_FDIV:
518 case TargetOpcode::G_FEXP:
520 case TargetOpcode::G_FEXP2:
522 case TargetOpcode::G_FREM:
524 case TargetOpcode::G_FPOW:
526 case TargetOpcode::G_FMA:
528 case TargetOpcode::G_FSIN:
530 case TargetOpcode::G_FCOS:
532 case TargetOpcode::G_FLOG10:
534 case TargetOpcode::G_FLOG:
536 case TargetOpcode::G_FLOG2:
538 case TargetOpcode::G_FCEIL:
540 case TargetOpcode::G_FFLOOR:
542 case TargetOpcode::G_FMINNUM:
544 case TargetOpcode::G_FMAXNUM:
546 case TargetOpcode::G_FSQRT:
548 case TargetOpcode::G_FRINT:
550 case TargetOpcode::G_FNEARBYINT:
552 case TargetOpcode::G_INTRINSIC_ROUNDEVEN:
577 if (CallerAttrs.
hasRetAttr(Attribute::ZExt) ||
589 switch (
MI.getOpcode()) {
592 case TargetOpcode::G_BZERO:
594 case TargetOpcode::G_MEMCPY:
595 case TargetOpcode::G_MEMMOVE:
596 case TargetOpcode::G_MEMSET:
601 if (!VReg.
isVirtual() || VReg != Next->getOperand(1).getReg())
604 Register PReg = Next->getOperand(0).getReg();
612 if (
Ret->getNumImplicitOperands() != 1)
615 if (PReg !=
Ret->getOperand(0).getReg())
638 Info.OrigRet = Result;
640 if (!CLI.lowerCall(MIRBuilder,
Info))
665 Args.push_back({MO.getReg(), OpType, 0});
677 for (
unsigned i = 0;
i <
MI.getNumOperands() - 1; ++
i) {
682 Type *OpTy =
nullptr;
687 Args.push_back({
Reg, OpTy, 0});
693 unsigned Opc =
MI.getOpcode();
695 case TargetOpcode::G_BZERO:
696 RTLibcall = RTLIB::BZERO;
698 case TargetOpcode::G_MEMCPY:
700 Args[0].Flags[0].setReturned();
702 case TargetOpcode::G_MEMMOVE:
703 RTLibcall = RTLIB::MEMMOVE;
704 Args[0].Flags[0].setReturned();
706 case TargetOpcode::G_MEMSET:
707 RTLibcall = RTLIB::MEMSET;
708 Args[0].Flags[0].setReturned();
713 const char *
Name = TLI.getLibcallName(RTLibcall);
723 Info.CallConv = TLI.getLibcallCallingConv(RTLibcall);
726 Info.IsTailCall =
MI.getOperand(
MI.getNumOperands() - 1).getImm() &&
730 if (!CLI.lowerCall(MIRBuilder,
Info))
733 if (
Info.LoweredTailCall) {
734 assert(
Info.IsTailCall &&
"Lowered tail call when it wasn't a tail call?");
745 "Expected instr following MI to be return or debug inst?");
749 }
while (
MI.getNextNode());
764 case TargetOpcode::G_FPEXT:
766 case TargetOpcode::G_FPTRUNC:
768 case TargetOpcode::G_FPTOSI:
770 case TargetOpcode::G_FPTOUI:
772 case TargetOpcode::G_SITOFP:
774 case TargetOpcode::G_UITOFP:
785 {
MI.getOperand(0).
getReg(), ToType, 0},
795 switch (
MI.getOpcode()) {
798 case TargetOpcode::G_SDIV:
799 case TargetOpcode::G_UDIV:
800 case TargetOpcode::G_SREM:
801 case TargetOpcode::G_UREM:
802 case TargetOpcode::G_CTLZ_ZERO_UNDEF: {
809 case TargetOpcode::G_FADD:
810 case TargetOpcode::G_FSUB:
811 case TargetOpcode::G_FMUL:
812 case TargetOpcode::G_FDIV:
813 case TargetOpcode::G_FMA:
814 case TargetOpcode::G_FPOW:
815 case TargetOpcode::G_FREM:
816 case TargetOpcode::G_FCOS:
817 case TargetOpcode::G_FSIN:
818 case TargetOpcode::G_FLOG10:
819 case TargetOpcode::G_FLOG:
820 case TargetOpcode::G_FLOG2:
821 case TargetOpcode::G_FEXP:
822 case TargetOpcode::G_FEXP2:
823 case TargetOpcode::G_FCEIL:
824 case TargetOpcode::G_FFLOOR:
825 case TargetOpcode::G_FMINNUM:
826 case TargetOpcode::G_FMAXNUM:
827 case TargetOpcode::G_FSQRT:
828 case TargetOpcode::G_FRINT:
829 case TargetOpcode::G_FNEARBYINT:
830 case TargetOpcode::G_INTRINSIC_ROUNDEVEN: {
832 if (!HLTy || (Size != 32 && Size != 64 && Size != 80 && Size != 128)) {
833 LLVM_DEBUG(
dbgs() <<
"No libcall available for type " << LLTy <<
".\n");
841 case TargetOpcode::G_FPEXT:
842 case TargetOpcode::G_FPTRUNC: {
845 if (!FromTy || !ToTy)
852 case TargetOpcode::G_FPTOSI:
853 case TargetOpcode::G_FPTOUI: {
857 if ((ToSize != 32 && ToSize != 64) || (FromSize != 32 && FromSize != 64))
867 case TargetOpcode::G_SITOFP:
868 case TargetOpcode::G_UITOFP: {
872 if ((FromSize != 32 && FromSize != 64) || (ToSize != 32 && ToSize != 64))
882 case TargetOpcode::G_BZERO:
883 case TargetOpcode::G_MEMCPY:
884 case TargetOpcode::G_MEMMOVE:
885 case TargetOpcode::G_MEMSET: {
890 MI.eraseFromParent();
895 MI.eraseFromParent();
905 switch (
MI.getOpcode()) {
908 case TargetOpcode::G_IMPLICIT_DEF: {
918 if (SizeOp0 % NarrowSize != 0) {
919 LLT ImplicitTy = NarrowTy;
926 MI.eraseFromParent();
930 int NumParts = SizeOp0 / NarrowSize;
933 for (
int i = 0;
i < NumParts; ++
i)
940 MI.eraseFromParent();
943 case TargetOpcode::G_CONSTANT: {
945 const APInt &Val =
MI.getOperand(1).getCImm()->getValue();
948 int NumParts = TotalSize / NarrowSize;
951 for (
int I = 0;
I != NumParts; ++
I) {
952 unsigned Offset =
I * NarrowSize;
955 PartRegs.push_back(K.getReg(0));
959 unsigned LeftoverBits = TotalSize - NumParts * NarrowSize;
961 if (LeftoverBits != 0) {
965 Val.
lshr(NumParts * NarrowSize).
trunc(LeftoverBits));
966 LeftoverRegs.push_back(K.getReg(0));
969 insertParts(
MI.getOperand(0).getReg(),
970 Ty, NarrowTy, PartRegs, LeftoverTy, LeftoverRegs);
972 MI.eraseFromParent();
975 case TargetOpcode::G_SEXT:
976 case TargetOpcode::G_ZEXT:
977 case TargetOpcode::G_ANYEXT:
979 case TargetOpcode::G_TRUNC: {
985 LLVM_DEBUG(
dbgs() <<
"Can't narrow trunc to type " << NarrowTy <<
"\n");
991 MI.eraseFromParent();
995 case TargetOpcode::G_FREEZE: {
1006 for (
unsigned i = 0;
i < Unmerge->getNumDefs(); ++
i) {
1012 MI.eraseFromParent();
1015 case TargetOpcode::G_ADD:
1016 case TargetOpcode::G_SUB:
1017 case TargetOpcode::G_SADDO:
1018 case TargetOpcode::G_SSUBO:
1019 case TargetOpcode::G_SADDE:
1020 case TargetOpcode::G_SSUBE:
1021 case TargetOpcode::G_UADDO:
1022 case TargetOpcode::G_USUBO:
1023 case TargetOpcode::G_UADDE:
1024 case TargetOpcode::G_USUBE:
1026 case TargetOpcode::G_MUL:
1027 case TargetOpcode::G_UMULH:
1029 case TargetOpcode::G_EXTRACT:
1031 case TargetOpcode::G_INSERT:
1033 case TargetOpcode::G_LOAD: {
1034 auto &LoadMI = cast<GLoad>(
MI);
1035 Register DstReg = LoadMI.getDstReg();
1044 LoadMI.eraseFromParent();
1050 case TargetOpcode::G_ZEXTLOAD:
1051 case TargetOpcode::G_SEXTLOAD: {
1052 auto &LoadMI = cast<GExtLoad>(
MI);
1053 Register DstReg = LoadMI.getDstReg();
1054 Register PtrReg = LoadMI.getPointerReg();
1057 auto &MMO = LoadMI.getMMO();
1058 unsigned MemSize = MMO.getSizeInBits();
1060 if (MemSize == NarrowSize) {
1062 }
else if (MemSize < NarrowSize) {
1064 }
else if (MemSize > NarrowSize) {
1069 if (isa<GZExtLoad>(LoadMI))
1074 LoadMI.eraseFromParent();
1077 case TargetOpcode::G_STORE: {
1078 auto &StoreMI = cast<GStore>(
MI);
1080 Register SrcReg = StoreMI.getValueReg();
1085 int NumParts = SizeOp0 / NarrowSize;
1087 unsigned LeftoverBits = SrcTy.
getSizeInBits() - HandledSize;
1088 if (SrcTy.
isVector() && LeftoverBits != 0)
1095 StoreMI.eraseFromParent();
1101 case TargetOpcode::G_SELECT:
1103 case TargetOpcode::G_AND:
1104 case TargetOpcode::G_OR:
1105 case TargetOpcode::G_XOR: {
1117 case TargetOpcode::G_SHL:
1118 case TargetOpcode::G_LSHR:
1119 case TargetOpcode::G_ASHR:
1121 case TargetOpcode::G_CTLZ:
1122 case TargetOpcode::G_CTLZ_ZERO_UNDEF:
1123 case TargetOpcode::G_CTTZ:
1124 case TargetOpcode::G_CTTZ_ZERO_UNDEF:
1125 case TargetOpcode::G_CTPOP:
1127 switch (
MI.getOpcode()) {
1128 case TargetOpcode::G_CTLZ:
1129 case TargetOpcode::G_CTLZ_ZERO_UNDEF:
1131 case TargetOpcode::G_CTTZ:
1132 case TargetOpcode::G_CTTZ_ZERO_UNDEF:
1134 case TargetOpcode::G_CTPOP:
1144 case TargetOpcode::G_INTTOPTR:
1152 case TargetOpcode::G_PTRTOINT:
1160 case TargetOpcode::G_PHI: {
1163 if (SizeOp0 % NarrowSize != 0)
1166 unsigned NumParts = SizeOp0 / NarrowSize;
1170 for (
unsigned i = 1;
i <
MI.getNumOperands();
i += 2) {
1173 extractParts(
MI.getOperand(
i).getReg(), NarrowTy, NumParts,
1178 for (
unsigned i = 0;
i < NumParts; ++
i) {
1182 for (
unsigned j = 1;
j <
MI.getNumOperands();
j += 2)
1188 MI.eraseFromParent();
1191 case TargetOpcode::G_EXTRACT_VECTOR_ELT:
1192 case TargetOpcode::G_INSERT_VECTOR_ELT: {
1196 int OpIdx =
MI.getOpcode() == TargetOpcode::G_EXTRACT_VECTOR_ELT ? 2 : 3;
1202 case TargetOpcode::G_ICMP: {
1215 if (!extractParts(
LHS, SrcTy, NarrowTy, LeftoverTy, LHSPartRegs,
1221 if (!extractParts(
MI.getOperand(3).getReg(), SrcTy, NarrowTy, Unused,
1222 RHSPartRegs, RHSLeftoverRegs))
1235 for (
auto LHSAndRHS :
zip(LHSPartRegs, RHSPartRegs)) {
1236 auto LHS = std::get<0>(LHSAndRHS);
1237 auto RHS = std::get<1>(LHSAndRHS);
1239 Xors.push_back(
Xor);
1245 for (
auto LHSAndRHS :
zip(LHSLeftoverRegs, RHSLeftoverRegs)) {
1246 auto LHS = std::get<0>(LHSAndRHS);
1247 auto RHS = std::get<1>(LHSAndRHS);
1249 LLT GCDTy = extractGCDType(WidenedXors, NarrowTy, LeftoverTy,
Xor);
1250 buildLCMMergePieces(LeftoverTy, NarrowTy, GCDTy, WidenedXors,
1251 TargetOpcode::G_ZEXT);
1252 Xors.
insert(Xors.end(), WidenedXors.begin(), WidenedXors.end());
1258 assert(Xors.size() >= 2 &&
"Should have gotten at least two Xors?");
1260 for (
unsigned I = 2,
E = Xors.size();
I <
E; ++
I)
1265 assert(LHSPartRegs.size() == 2 &&
"Expected exactly 2 LHS part regs?");
1266 assert(RHSPartRegs.size() == 2 &&
"Expected exactly 2 RHS part regs?");
1278 MI.eraseFromParent();
1281 case TargetOpcode::G_SEXT_INREG: {
1285 int64_t SizeInBits =
MI.getOperand(2).getImm();
1295 MO1.
setReg(TruncMIB.getReg(0));
1310 if (SizeOp0 % NarrowSize != 0)
1312 int NumParts = SizeOp0 / NarrowSize;
1320 for (
int i = 0;
i < NumParts; ++
i) {
1323 SrcRegs.push_back(SrcReg);
1336 for (
int i = 0;
i < NumParts; ++
i) {
1338 DstRegs.push_back(SrcRegs[
i]);
1340 assert(PartialExtensionReg &&
1341 "Expected to visit partial extension before full");
1342 if (FullExtensionReg) {
1343 DstRegs.push_back(FullExtensionReg);
1349 FullExtensionReg = DstRegs.back();
1354 TargetOpcode::G_SEXT_INREG, {NarrowTy},
1357 PartialExtensionReg = DstRegs.back();
1364 MI.eraseFromParent();
1367 case TargetOpcode::G_BSWAP:
1368 case TargetOpcode::G_BITREVERSE: {
1369 if (SizeOp0 % NarrowSize != 0)
1374 unsigned NumParts = SizeOp0 / NarrowSize;
1375 extractParts(
MI.getOperand(1).getReg(), NarrowTy, NumParts, SrcRegs);
1377 for (
unsigned i = 0;
i < NumParts; ++
i) {
1379 {SrcRegs[NumParts - 1 - i]});
1380 DstRegs.push_back(DstPart.getReg(0));
1386 MI.eraseFromParent();
1389 case TargetOpcode::G_PTR_ADD:
1390 case TargetOpcode::G_PTRMASK: {
1398 case TargetOpcode::G_FPTOUI:
1399 case TargetOpcode::G_FPTOSI:
1401 case TargetOpcode::G_FPEXT:
1434 unsigned OpIdx,
unsigned ExtOpcode) {
1437 MO.
setReg(ExtB.getReg(0));
1444 MO.
setReg(ExtB.getReg(0));
1448 unsigned OpIdx,
unsigned TruncOpcode) {
1457 unsigned OpIdx,
unsigned ExtOpcode) {
1496 LegalizerHelper::widenScalarMergeValues(
MachineInstr &
MI,
unsigned TypeIdx,
1511 const int NumMerge = (DstSize + WideSize - 1) / WideSize;
1513 unsigned NumOps =
MI.getNumOperands();
1514 unsigned NumSrc =
MI.getNumOperands() - 1;
1517 if (WideSize >= DstSize) {
1521 for (
unsigned I = 2;
I != NumOps; ++
I) {
1522 const unsigned Offset = (
I - 1) * PartSize;
1529 Register NextResult =
I + 1 == NumOps && WideTy == DstTy ? DstReg :
1535 ResultReg = NextResult;
1538 if (WideSize > DstSize)
1543 MI.eraseFromParent();
1579 if (GCD == SrcSize) {
1580 Unmerges.push_back(SrcReg);
1583 for (
int J = 0, JE = Unmerge->getNumOperands() - 1; J != JE; ++J)
1584 Unmerges.push_back(Unmerge.getReg(J));
1589 if (
static_cast<int>(Unmerges.size()) != NumMerge * WideSize) {
1591 for (
int I = Unmerges.size();
I != NumMerge * WideSize; ++
I)
1592 Unmerges.push_back(UndefReg);
1595 const int PartsPerGCD = WideSize / GCD;
1599 for (
int I = 0;
I != NumMerge; ++
I, Slicer = Slicer.drop_front(PartsPerGCD)) {
1601 NewMergeRegs.push_back(
Merge.getReg(0));
1613 MI.eraseFromParent();
1618 LegalizerHelper::widenScalarUnmergeValues(
MachineInstr &
MI,
unsigned TypeIdx,
1623 int NumDst =
MI.getNumOperands() - 1;
1624 Register SrcReg =
MI.getOperand(NumDst).getReg();
1629 Register Dst0Reg =
MI.getOperand(0).getReg();
1639 dbgs() <<
"Not casting non-integral address space integer\n");
1660 for (
int I = 1;
I != NumDst; ++
I) {
1666 MI.eraseFromParent();
1677 LLVM_DEBUG(
dbgs() <<
"Widening pointer source types not implemented\n");
1702 const int NumUnmerge = Unmerge->getNumOperands() - 1;
1707 if (PartsPerRemerge == 1) {
1710 for (
int I = 0;
I != NumUnmerge; ++
I) {
1713 for (
int J = 0; J != PartsPerUnmerge; ++J) {
1714 int Idx =
I * PartsPerUnmerge + J;
1716 MIB.addDef(
MI.getOperand(Idx).getReg());
1723 MIB.addUse(Unmerge.getReg(
I));
1727 for (
int J = 0; J != NumUnmerge; ++J)
1728 extractGCDType(Parts, GCDTy, Unmerge.getReg(J));
1731 for (
int I = 0;
I != NumDst; ++
I) {
1732 for (
int J = 0; J < PartsPerRemerge; ++J) {
1733 const int Idx =
I * PartsPerRemerge + J;
1738 RemergeParts.
clear();
1742 MI.eraseFromParent();
1747 LegalizerHelper::widenScalarExtract(
MachineInstr &
MI,
unsigned TypeIdx,
1754 unsigned Offset =
MI.getOperand(2).getImm();
1780 MI.eraseFromParent();
1785 LLT ShiftTy = SrcTy;
1794 MI.eraseFromParent();
1825 LegalizerHelper::widenScalarInsert(
MachineInstr &
MI,
unsigned TypeIdx,
1827 if (TypeIdx != 0 || WideTy.
isVector())
1837 LegalizerHelper::widenScalarAddSubOverflow(
MachineInstr &
MI,
unsigned TypeIdx,
1842 switch (
MI.getOpcode()) {
1845 case TargetOpcode::G_SADDO:
1846 Opcode = TargetOpcode::G_ADD;
1847 ExtOpcode = TargetOpcode::G_SEXT;
1849 case TargetOpcode::G_SSUBO:
1850 Opcode = TargetOpcode::G_SUB;
1851 ExtOpcode = TargetOpcode::G_SEXT;
1853 case TargetOpcode::G_UADDO:
1854 Opcode = TargetOpcode::G_ADD;
1855 ExtOpcode = TargetOpcode::G_ZEXT;
1857 case TargetOpcode::G_USUBO:
1858 Opcode = TargetOpcode::G_SUB;
1859 ExtOpcode = TargetOpcode::G_ZEXT;
1861 case TargetOpcode::G_SADDE:
1862 Opcode = TargetOpcode::G_UADDE;
1863 ExtOpcode = TargetOpcode::G_SEXT;
1864 CarryIn =
MI.getOperand(4).getReg();
1866 case TargetOpcode::G_SSUBE:
1867 Opcode = TargetOpcode::G_USUBE;
1868 ExtOpcode = TargetOpcode::G_SEXT;
1869 CarryIn =
MI.getOperand(4).getReg();
1871 case TargetOpcode::G_UADDE:
1872 Opcode = TargetOpcode::G_UADDE;
1873 ExtOpcode = TargetOpcode::G_ZEXT;
1874 CarryIn =
MI.getOperand(4).getReg();
1876 case TargetOpcode::G_USUBE:
1877 Opcode = TargetOpcode::G_USUBE;
1878 ExtOpcode = TargetOpcode::G_ZEXT;
1879 CarryIn =
MI.getOperand(4).getReg();
1900 LLT CarryOutTy = MRI.
getType(
MI.getOperand(1).getReg());
1903 {LHSExt, RHSExt, *CarryIn})
1915 MI.eraseFromParent();
1920 LegalizerHelper::widenScalarAddSubShlSat(
MachineInstr &
MI,
unsigned TypeIdx,
1922 bool IsSigned =
MI.getOpcode() == TargetOpcode::G_SADDSAT ||
1923 MI.getOpcode() == TargetOpcode::G_SSUBSAT ||
1924 MI.getOpcode() == TargetOpcode::G_SSHLSAT;
1925 bool IsShift =
MI.getOpcode() == TargetOpcode::G_SSHLSAT ||
1926 MI.getOpcode() == TargetOpcode::G_USHLSAT;
1951 {ShiftL, ShiftR},
MI.getFlags());
1959 MI.eraseFromParent();
1964 LegalizerHelper::widenScalarMulo(
MachineInstr &
MI,
unsigned TypeIdx,
1973 bool IsSigned =
MI.getOpcode() == TargetOpcode::G_SMULO;
1975 Register OriginalOverflow =
MI.getOperand(1).getReg();
1979 LLT OverflowTy = MRI.
getType(OriginalOverflow);
1986 unsigned ExtOp = IsSigned ? TargetOpcode::G_SEXT : TargetOpcode::G_ZEXT;
1991 {LeftOperand, RightOperand});
2019 MI.eraseFromParent();
2025 switch (
MI.getOpcode()) {
2028 case TargetOpcode::G_ATOMICRMW_XCHG:
2029 case TargetOpcode::G_ATOMICRMW_ADD:
2030 case TargetOpcode::G_ATOMICRMW_SUB:
2031 case TargetOpcode::G_ATOMICRMW_AND:
2032 case TargetOpcode::G_ATOMICRMW_OR:
2033 case TargetOpcode::G_ATOMICRMW_XOR:
2034 case TargetOpcode::G_ATOMICRMW_MIN:
2035 case TargetOpcode::G_ATOMICRMW_MAX:
2036 case TargetOpcode::G_ATOMICRMW_UMIN:
2037 case TargetOpcode::G_ATOMICRMW_UMAX:
2038 assert(TypeIdx == 0 &&
"atomicrmw with second scalar type");
2044 case TargetOpcode::G_ATOMIC_CMPXCHG:
2045 assert(TypeIdx == 0 &&
"G_ATOMIC_CMPXCHG with second scalar type");
2052 case TargetOpcode::G_ATOMIC_CMPXCHG_WITH_SUCCESS:
2062 "G_ATOMIC_CMPXCHG_WITH_SUCCESS with third scalar type");
2067 case TargetOpcode::G_EXTRACT:
2068 return widenScalarExtract(
MI, TypeIdx, WideTy);
2069 case TargetOpcode::G_INSERT:
2070 return widenScalarInsert(
MI, TypeIdx, WideTy);
2071 case TargetOpcode::G_MERGE_VALUES:
2072 return widenScalarMergeValues(
MI, TypeIdx, WideTy);
2073 case TargetOpcode::G_UNMERGE_VALUES:
2074 return widenScalarUnmergeValues(
MI, TypeIdx, WideTy);
2075 case TargetOpcode::G_SADDO:
2076 case TargetOpcode::G_SSUBO:
2077 case TargetOpcode::G_UADDO:
2078 case TargetOpcode::G_USUBO:
2079 case TargetOpcode::G_SADDE:
2080 case TargetOpcode::G_SSUBE:
2081 case TargetOpcode::G_UADDE:
2082 case TargetOpcode::G_USUBE:
2083 return widenScalarAddSubOverflow(
MI, TypeIdx, WideTy);
2084 case TargetOpcode::G_UMULO:
2085 case TargetOpcode::G_SMULO:
2086 return widenScalarMulo(
MI, TypeIdx, WideTy);
2087 case TargetOpcode::G_SADDSAT:
2088 case TargetOpcode::G_SSUBSAT:
2089 case TargetOpcode::G_SSHLSAT:
2090 case TargetOpcode::G_UADDSAT:
2091 case TargetOpcode::G_USUBSAT:
2092 case TargetOpcode::G_USHLSAT:
2093 return widenScalarAddSubShlSat(
MI, TypeIdx, WideTy);
2094 case TargetOpcode::G_CTTZ:
2095 case TargetOpcode::G_CTTZ_ZERO_UNDEF:
2096 case TargetOpcode::G_CTLZ:
2097 case TargetOpcode::G_CTLZ_ZERO_UNDEF:
2098 case TargetOpcode::G_CTPOP: {
2109 unsigned ExtOpc =
MI.getOpcode() == TargetOpcode::G_CTTZ ||
2110 MI.getOpcode() == TargetOpcode::G_CTTZ_ZERO_UNDEF
2111 ? TargetOpcode::G_ANYEXT
2112 : TargetOpcode::G_ZEXT;
2115 unsigned NewOpc =
MI.getOpcode();
2116 if (NewOpc == TargetOpcode::G_CTTZ) {
2125 NewOpc = TargetOpcode::G_CTTZ_ZERO_UNDEF;
2131 if (
MI.getOpcode() == TargetOpcode::G_CTLZ ||
2132 MI.getOpcode() == TargetOpcode::G_CTLZ_ZERO_UNDEF) {
2140 MI.eraseFromParent();
2143 case TargetOpcode::G_BSWAP: {
2152 MI.getOperand(0).setReg(DstExt);
2165 case TargetOpcode::G_BITREVERSE: {
2174 MI.getOperand(0).setReg(DstExt);
2183 case TargetOpcode::G_FREEZE:
2190 case TargetOpcode::G_ABS:
2197 case TargetOpcode::G_ADD:
2198 case TargetOpcode::G_AND:
2199 case TargetOpcode::G_MUL:
2200 case TargetOpcode::G_OR:
2201 case TargetOpcode::G_XOR:
2202 case TargetOpcode::G_SUB:
2213 case TargetOpcode::G_SBFX:
2214 case TargetOpcode::G_UBFX:
2228 case TargetOpcode::G_SHL:
2244 case TargetOpcode::G_SDIV:
2245 case TargetOpcode::G_SREM:
2246 case TargetOpcode::G_SMIN:
2247 case TargetOpcode::G_SMAX:
2255 case TargetOpcode::G_SDIVREM:
2264 case TargetOpcode::G_ASHR:
2265 case TargetOpcode::G_LSHR:
2269 unsigned CvtOp =
MI.getOpcode() == TargetOpcode::G_ASHR ?
2270 TargetOpcode::G_SEXT : TargetOpcode::G_ZEXT;
2283 case TargetOpcode::G_UDIV:
2284 case TargetOpcode::G_UREM:
2285 case TargetOpcode::G_UMIN:
2286 case TargetOpcode::G_UMAX:
2294 case TargetOpcode::G_UDIVREM:
2303 case TargetOpcode::G_SELECT:
2320 case TargetOpcode::G_FPTOSI:
2321 case TargetOpcode::G_FPTOUI:
2331 case TargetOpcode::G_SITOFP:
2341 case TargetOpcode::G_UITOFP:
2351 case TargetOpcode::G_LOAD:
2352 case TargetOpcode::G_SEXTLOAD:
2353 case TargetOpcode::G_ZEXTLOAD:
2359 case TargetOpcode::G_STORE: {
2370 TargetOpcode::G_ZEXT : TargetOpcode::G_ANYEXT;
2376 case TargetOpcode::G_CONSTANT: {
2380 MRI.
getType(
MI.getOperand(0).getReg()));
2381 assert((ExtOpc == TargetOpcode::G_ZEXT || ExtOpc == TargetOpcode::G_SEXT ||
2382 ExtOpc == TargetOpcode::G_ANYEXT) &&
2385 const APInt &Val = (ExtOpc == TargetOpcode::G_SEXT)
2395 case TargetOpcode::G_FCONSTANT: {
2413 assert(!LosesInfo &&
"extend should always be lossless");
2422 case TargetOpcode::G_IMPLICIT_DEF: {
2428 case TargetOpcode::G_BRCOND:
2434 case TargetOpcode::G_FCMP:
2445 case TargetOpcode::G_ICMP:
2451 MI.getOperand(1).getPredicate()))
2452 ? TargetOpcode::G_SEXT
2453 : TargetOpcode::G_ZEXT;
2460 case TargetOpcode::G_PTR_ADD:
2461 assert(TypeIdx == 1 &&
"unable to legalize pointer of G_PTR_ADD");
2467 case TargetOpcode::G_PHI: {
2468 assert(TypeIdx == 0 &&
"Expecting only Idx 0");
2471 for (
unsigned I = 1;
I <
MI.getNumOperands();
I += 2) {
2483 case TargetOpcode::G_EXTRACT_VECTOR_ELT: {
2491 TargetOpcode::G_ANYEXT);
2506 case TargetOpcode::G_INSERT_VECTOR_ELT: {
2531 case TargetOpcode::G_FADD:
2532 case TargetOpcode::G_FMUL:
2533 case TargetOpcode::G_FSUB:
2534 case TargetOpcode::G_FMA:
2535 case TargetOpcode::G_FMAD:
2536 case TargetOpcode::G_FNEG:
2537 case TargetOpcode::G_FABS:
2538 case TargetOpcode::G_FCANONICALIZE:
2539 case TargetOpcode::G_FMINNUM:
2540 case TargetOpcode::G_FMAXNUM:
2541 case TargetOpcode::G_FMINNUM_IEEE:
2542 case TargetOpcode::G_FMAXNUM_IEEE:
2543 case TargetOpcode::G_FMINIMUM:
2544 case TargetOpcode::G_FMAXIMUM:
2545 case TargetOpcode::G_FDIV:
2546 case TargetOpcode::G_FREM:
2547 case TargetOpcode::G_FCEIL:
2548 case TargetOpcode::G_FFLOOR:
2549 case TargetOpcode::G_FCOS:
2550 case TargetOpcode::G_FSIN:
2551 case TargetOpcode::G_FLOG10:
2552 case TargetOpcode::G_FLOG:
2553 case TargetOpcode::G_FLOG2:
2554 case TargetOpcode::G_FRINT:
2555 case TargetOpcode::G_FNEARBYINT:
2556 case TargetOpcode::G_FSQRT:
2557 case TargetOpcode::G_FEXP:
2558 case TargetOpcode::G_FEXP2:
2559 case TargetOpcode::G_FPOW:
2560 case TargetOpcode::G_INTRINSIC_TRUNC:
2561 case TargetOpcode::G_INTRINSIC_ROUND:
2562 case TargetOpcode::G_INTRINSIC_ROUNDEVEN:
2566 for (
unsigned I = 1,
E =
MI.getNumOperands();
I !=
E; ++
I)
2572 case TargetOpcode::G_FPOWI: {
2581 case TargetOpcode::G_INTTOPTR:
2589 case TargetOpcode::G_PTRTOINT:
2597 case TargetOpcode::G_BUILD_VECTOR: {
2601 for (
int I = 1,
E =
MI.getNumOperands();
I !=
E; ++
I)
2615 case TargetOpcode::G_SEXT_INREG:
2624 case TargetOpcode::G_PTRMASK: {
2637 auto Unmerge =
B.buildUnmerge(Ty, Src);
2638 for (
int I = 0,
E = Unmerge->getNumOperands() - 1;
I !=
E; ++
I)
2639 Pieces.push_back(Unmerge.getReg(
I));
2658 LLT DstCastTy = DstEltTy;
2659 LLT SrcPartTy = SrcEltTy;
2663 if (NumSrcElt < NumDstElt) {
2673 SrcPartTy = SrcEltTy;
2674 }
else if (NumSrcElt > NumDstElt) {
2685 DstCastTy = DstEltTy;
2695 MI.eraseFromParent();
2703 MI.eraseFromParent();
2719 unsigned NewEltSize,
2720 unsigned OldEltSize) {
2721 const unsigned Log2EltRatio =
Log2_32(NewEltSize / OldEltSize);
2722 LLT IdxTy =
B.getMRI()->getType(Idx);
2725 auto OffsetMask =
B.buildConstant(
2727 auto OffsetIdx =
B.buildAnd(IdxTy, Idx, OffsetMask);
2728 return B.buildShl(IdxTy, OffsetIdx,
2729 B.buildConstant(IdxTy,
Log2_32(OldEltSize))).getReg(0);
2759 if (NewNumElts > OldNumElts) {
2770 if (NewNumElts % OldNumElts != 0)
2774 const unsigned NewEltsPerOldElt = NewNumElts / OldNumElts;
2783 for (
unsigned I = 0;
I < NewEltsPerOldElt; ++
I) {
2787 NewOps[
I] = Elt.getReg(0);
2792 MI.eraseFromParent();
2796 if (NewNumElts < OldNumElts) {
2797 if (NewEltSize % OldEltSize != 0)
2819 const unsigned Log2EltRatio =
Log2_32(NewEltSize / OldEltSize);
2838 MI.eraseFromParent();
2852 LLT TargetTy =
B.getMRI()->getType(TargetReg);
2853 LLT InsertTy =
B.getMRI()->getType(InsertReg);
2854 auto ZextVal =
B.buildZExt(TargetTy, InsertReg);
2855 auto ShiftedInsertVal =
B.buildShl(TargetTy, ZextVal, OffsetBits);
2858 auto EltMask =
B.buildConstant(
2862 auto ShiftedMask =
B.buildShl(TargetTy, EltMask, OffsetBits);
2863 auto InvShiftedMask =
B.buildNot(TargetTy, ShiftedMask);
2866 auto MaskedOldElt =
B.buildAnd(TargetTy, TargetReg, InvShiftedMask);
2870 return B.buildOr(TargetTy, MaskedOldElt, ShiftedInsertVal).getReg(0);
2901 if (NewNumElts < OldNumElts) {
2902 if (NewEltSize % OldEltSize != 0)
2911 const unsigned Log2EltRatio =
Log2_32(NewEltSize / OldEltSize);
2931 CastTy, CastVec, InsertedElt, ScaledIdx).
getReg(0);
2935 MI.eraseFromParent();
2954 if (MemSizeInBits != MemStoreSizeInBits) {
2974 if (isa<GSExtLoad>(LoadMI)) {
2977 }
else if (isa<GZExtLoad>(LoadMI) || WideMemTy == LoadTy) {
2986 if (DstTy != LoadTy)
3012 uint64_t LargeSplitSize, SmallSplitSize;
3017 SmallSplitSize = MemSizeInBits - LargeSplitSize;
3027 SmallSplitSize = LargeSplitSize = MemSizeInBits / 2;
3052 LargeSplitSize / 8);
3056 SmallPtr, *SmallMMO);
3061 if (AnyExtTy == DstTy)
3096 if (StoreWidth != StoreSizeInBits) {
3131 uint64_t LargeSplitSize, SmallSplitSize;
3141 SmallSplitSize = LargeSplitSize = MemSizeInBits / 2;
3180 switch (
MI.getOpcode()) {
3181 case TargetOpcode::G_LOAD: {
3196 case TargetOpcode::G_STORE: {
3212 case TargetOpcode::G_SELECT: {
3218 dbgs() <<
"bitcast action not implemented for vector select\n");
3229 case TargetOpcode::G_AND:
3230 case TargetOpcode::G_OR:
3231 case TargetOpcode::G_XOR: {
3239 case TargetOpcode::G_EXTRACT_VECTOR_ELT:
3241 case TargetOpcode::G_INSERT_VECTOR_ELT:
3249 void LegalizerHelper::changeOpcode(
MachineInstr &
MI,
unsigned NewOpcode) {
3257 using namespace TargetOpcode;
3259 switch(
MI.getOpcode()) {
3262 case TargetOpcode::G_BITCAST:
3264 case TargetOpcode::G_SREM:
3265 case TargetOpcode::G_UREM: {
3269 {MI.getOperand(1), MI.getOperand(2)});
3273 MI.eraseFromParent();
3276 case TargetOpcode::G_SADDO:
3277 case TargetOpcode::G_SSUBO:
3279 case TargetOpcode::G_UMULH:
3280 case TargetOpcode::G_SMULH:
3282 case TargetOpcode::G_SMULO:
3283 case TargetOpcode::G_UMULO: {
3287 Register Overflow =
MI.getOperand(1).getReg();
3292 unsigned Opcode =
MI.getOpcode() == TargetOpcode::G_SMULO
3293 ? TargetOpcode::G_SMULH
3294 : TargetOpcode::G_UMULH;
3298 MI.setDesc(
TII.get(TargetOpcode::G_MUL));
3299 MI.removeOperand(1);
3310 if (Opcode == TargetOpcode::G_SMULH) {
3319 case TargetOpcode::G_FNEG: {
3329 Register SubByReg =
MI.getOperand(1).getReg();
3331 MI.eraseFromParent();
3334 case TargetOpcode::G_FSUB: {
3341 if (LI.getAction({G_FNEG, {Ty}}).Action == Lower)
3348 MI.eraseFromParent();
3351 case TargetOpcode::G_FMAD:
3353 case TargetOpcode::G_FFLOOR:
3355 case TargetOpcode::G_INTRINSIC_ROUND:
3357 case TargetOpcode::G_INTRINSIC_ROUNDEVEN: {
3360 changeOpcode(
MI, TargetOpcode::G_FRINT);
3363 case TargetOpcode::G_ATOMIC_CMPXCHG_WITH_SUCCESS: {
3364 Register OldValRes =
MI.getOperand(0).getReg();
3365 Register SuccessRes =
MI.getOperand(1).getReg();
3370 **
MI.memoperands_begin());
3372 MI.eraseFromParent();
3375 case TargetOpcode::G_LOAD:
3376 case TargetOpcode::G_SEXTLOAD:
3377 case TargetOpcode::G_ZEXTLOAD:
3379 case TargetOpcode::G_STORE:
3381 case TargetOpcode::G_CTLZ_ZERO_UNDEF:
3382 case TargetOpcode::G_CTTZ_ZERO_UNDEF:
3383 case TargetOpcode::G_CTLZ:
3384 case TargetOpcode::G_CTTZ:
3385 case TargetOpcode::G_CTPOP:
3389 Register CarryOut =
MI.getOperand(1).getReg();
3396 MI.eraseFromParent();
3401 Register CarryOut =
MI.getOperand(1).getReg();
3404 Register CarryIn =
MI.getOperand(4).getReg();
3412 MI.eraseFromParent();
3417 Register BorrowOut =
MI.getOperand(1).getReg();
3424 MI.eraseFromParent();
3429 Register BorrowOut =
MI.getOperand(1).getReg();
3432 Register BorrowIn =
MI.getOperand(4).getReg();
3444 MI.eraseFromParent();
3469 case G_MERGE_VALUES:
3471 case G_UNMERGE_VALUES:
3473 case TargetOpcode::G_SEXT_INREG: {
3474 assert(
MI.getOperand(2).isImm() &&
"Expected immediate");
3475 int64_t SizeInBits =
MI.getOperand(2).getImm();
3485 MI.eraseFromParent();
3488 case G_EXTRACT_VECTOR_ELT:
3489 case G_INSERT_VECTOR_ELT:
3491 case G_SHUFFLE_VECTOR:
3493 case G_DYN_STACKALLOC:
3503 case G_READ_REGISTER:
3504 case G_WRITE_REGISTER:
3512 if (LI.isLegalOrCustom({G_UMIN, Ty}))
3523 if (LI.isLegalOrCustom({G_SMIN, Ty}) && LI.isLegalOrCustom({G_SMAX, Ty}))
3547 case G_MEMCPY_INLINE:
3548 return lowerMemcpyInline(
MI);
3571 unsigned AddrSpace =
DL.getAllocaAddrSpace();
3584 LLT IdxTy =
B.getMRI()->getType(IdxReg);
3588 return B.buildAnd(IdxTy, IdxReg,
B.buildConstant(IdxTy,
Imm)).getReg(0);
3591 return B.buildUMin(IdxTy, IdxReg,
B.buildConstant(IdxTy, NElts - 1))
3602 "Converting bits to bytes lost precision");
3619 std::initializer_list<unsigned> NonVecOpIndices) {
3620 if (
MI.getNumMemOperands() != 0)
3628 for (
unsigned OpIdx = 1; OpIdx <
MI.getNumOperands(); ++OpIdx) {
3661 int NumParts, NumLeftover;
3662 std::tie(NumParts, NumLeftover) =
3665 assert(NumParts > 0 &&
"Error in getNarrowTypeBreakDown");
3666 for (
int i = 0;
i < NumParts; ++
i) {
3667 DstOps.push_back(NarrowTy);
3671 assert(NumLeftover == 1 &&
"expected exactly one leftover");
3672 DstOps.push_back(LeftoverTy);
3680 for (
unsigned i = 0;
i <
N; ++
i) {
3682 Ops.push_back(
Op.getReg());
3683 else if (
Op.isImm())
3684 Ops.push_back(
Op.getImm());
3685 else if (
Op.isPredicate())
3707 std::initializer_list<unsigned> NonVecOpIndices) {
3709 "Non-compatible opcode or not specified non-vector operands");
3712 unsigned NumInputs =
MI.getNumOperands() -
MI.getNumDefs();
3713 unsigned NumDefs =
MI.getNumDefs();
3721 for (
unsigned i = 0;
i < NumDefs; ++
i) {
3730 for (
unsigned UseIdx = NumDefs, UseNo = 0; UseIdx <
MI.getNumOperands();
3731 ++UseIdx, ++UseNo) {
3734 MI.getOperand(UseIdx));
3737 extractVectorParts(
MI.getReg(UseIdx), NumElts, SplitPieces);
3738 for (
auto Reg : SplitPieces)
3739 InputOpsPieces[UseNo].push_back(
Reg);
3743 unsigned NumLeftovers = OrigNumElts % NumElts ? 1 : 0;
3747 for (
unsigned i = 0;
i < OrigNumElts / NumElts + NumLeftovers; ++
i) {
3749 for (
unsigned DstNo = 0; DstNo < NumDefs; ++DstNo)
3750 Defs.push_back(OutputOpsPieces[DstNo][
i]);
3753 for (
unsigned InputNo = 0; InputNo < NumInputs; ++InputNo)
3754 Uses.push_back(InputOpsPieces[InputNo][
i]);
3757 for (
unsigned DstNo = 0; DstNo < NumDefs; ++DstNo)
3758 OutputRegs[DstNo].push_back(
I.getReg(DstNo));
3763 for (
unsigned i = 0;
i < NumDefs; ++
i)
3764 mergeMixedSubvectors(
MI.getReg(
i), OutputRegs[
i]);
3766 for (
unsigned i = 0;
i < NumDefs; ++
i)
3770 MI.eraseFromParent();
3779 unsigned NumInputs =
MI.getNumOperands() -
MI.getNumDefs();
3780 unsigned NumDefs =
MI.getNumDefs();
3789 for (
unsigned UseIdx = NumDefs, UseNo = 0; UseIdx <
MI.getNumOperands();
3790 UseIdx += 2, ++UseNo) {
3793 extractVectorParts(
MI.getReg(UseIdx), NumElts, InputOpsPieces[UseNo]);
3797 unsigned NumLeftovers = OrigNumElts % NumElts ? 1 : 0;
3799 for (
unsigned i = 0;
i < OrigNumElts / NumElts + NumLeftovers; ++
i) {
3803 OutputRegs.push_back(Phi.getReg(0));
3805 for (
unsigned j = 0;
j < NumInputs / 2; ++
j) {
3806 Phi.addUse(InputOpsPieces[
j][
i]);
3807 Phi.add(
MI.getOperand(1 +
j * 2 + 1));
3813 mergeMixedSubvectors(
MI.getReg(0), OutputRegs);
3818 MI.eraseFromParent();
3826 const int NumDst =
MI.getNumOperands() - 1;
3827 const Register SrcReg =
MI.getOperand(NumDst).getReg();
3831 if (TypeIdx != 1 || NarrowTy == DstTy)
3857 const int PartsPerUnmerge = NumDst / NumUnmerge;
3859 for (
int I = 0;
I != NumUnmerge; ++
I) {
3862 for (
int J = 0; J != PartsPerUnmerge; ++J)
3863 MIB.
addDef(
MI.getOperand(
I * PartsPerUnmerge + J).getReg());
3864 MIB.
addUse(Unmerge.getReg(
I));
3867 MI.eraseFromParent();
3882 if (NarrowTy == SrcTy)
3906 for (
unsigned i = 1;
i <
MI.getNumOperands(); ++
i) {
3908 for (
unsigned j = 0;
j < Unmerge->getNumDefs(); ++
j)
3909 Elts.push_back(Unmerge.getReg(
j));
3914 unsigned NumNarrowTyPieces = DstTy.
getNumElements() / NumNarrowTyElts;
3915 for (
unsigned i = 0, Offset = 0;
i < NumNarrowTyPieces;
3916 ++
i, Offset += NumNarrowTyElts) {
3922 MI.eraseFromParent();
3926 assert(TypeIdx == 0 &&
"Bad type index");
3945 for (
unsigned i = 0;
i < NumParts; ++
i) {
3947 for (
unsigned j = 0;
j < NumElts; ++
j)
3948 Sources.push_back(
MI.getOperand(1 +
i * NumElts +
j).getReg());
3953 MI.eraseFromParent();
3964 bool IsInsert =
MI.getOpcode() == TargetOpcode::G_INSERT_VECTOR_ELT;
3966 assert((IsInsert ? TypeIdx == 0 : TypeIdx == 1) &&
"not a vector type index");
3968 InsertVal =
MI.getOperand(2).getReg();
3970 Register Idx =
MI.getOperand(
MI.getNumOperands() - 1).getReg();
3983 IdxVal = MaybeCst->Value.getSExtValue();
3987 MI.eraseFromParent();
3992 LLT GCDTy = extractGCDType(VecParts, VecTy, NarrowVecTy, SrcVec);
3995 LLT LCMTy = buildLCMMergePieces(VecTy, NarrowVecTy, GCDTy, VecParts,
3996 TargetOpcode::G_ANYEXT);
4001 int64_t PartIdx = IdxVal / NewNumElts;
4010 PartTy, VecParts[PartIdx], InsertVal, NewIdx);
4011 VecParts[PartIdx] = InsertPart.getReg(0);
4015 buildWidenedRemergeToDst(DstReg, LCMTy, VecParts);
4020 MI.eraseFromParent();
4044 bool IsLoad = isa<GLoad>(LdStMI);
4056 int NumLeftover = -1;
4062 if (extractParts(ValReg, ValTy, NarrowTy, LeftoverTy, NarrowRegs,
4063 NarrowLeftoverRegs)) {
4064 NumParts = NarrowRegs.size();
4065 NumLeftover = NarrowLeftoverRegs.size();
4082 auto MMO = LdStMI.
getMMO();
4084 unsigned NumParts,
unsigned Offset) ->
unsigned {
4087 for (
unsigned Idx = 0,
E = NumParts; Idx !=
E && Offset < TotalSize;
4089 unsigned ByteOffset = Offset / 8;
4099 ValRegs.push_back(Dst);
4104 Offset =
isBigEndian ? Offset - PartSize : Offset + PartSize;
4111 unsigned HandledOffset =
4112 splitTypePieces(NarrowTy, NarrowRegs, NumParts, Offset);
4116 splitTypePieces(LeftoverTy, NarrowLeftoverRegs, NumLeftover, HandledOffset);
4119 insertParts(ValReg, ValTy, NarrowTy, NarrowRegs,
4120 LeftoverTy, NarrowLeftoverRegs);
4130 using namespace TargetOpcode;
4134 switch (
MI.getOpcode()) {
4135 case G_IMPLICIT_DEF:
4151 case G_FCANONICALIZE:
4166 case G_INTRINSIC_ROUND:
4167 case G_INTRINSIC_ROUNDEVEN:
4168 case G_INTRINSIC_TRUNC:
4187 case G_FMINNUM_IEEE:
4188 case G_FMAXNUM_IEEE:
4208 case G_CTLZ_ZERO_UNDEF:
4210 case G_CTTZ_ZERO_UNDEF:
4224 case G_ADDRSPACE_CAST:
4243 case G_UNMERGE_VALUES:
4245 case G_BUILD_VECTOR:
4246 assert(TypeIdx == 0 &&
"not a vector type index");
4248 case G_CONCAT_VECTORS:
4252 case G_EXTRACT_VECTOR_ELT:
4253 case G_INSERT_VECTOR_ELT:
4262 case G_SHUFFLE_VECTOR:
4271 assert(
MI.getOpcode() == TargetOpcode::G_SHUFFLE_VECTOR);
4276 Register Src1Reg =
MI.getOperand(1).getReg();
4277 Register Src2Reg =
MI.getOperand(2).getReg();
4283 if (DstTy != Src1Ty)
4285 if (DstTy != Src2Ty)
4298 extractParts(Src1Reg, NarrowTy, 2, SplitSrc1Regs);
4299 extractParts(Src2Reg, NarrowTy, 2, SplitSrc2Regs);
4300 Register Inputs[4] = {SplitSrc1Regs[0], SplitSrc1Regs[1], SplitSrc2Regs[0],
4316 unsigned InputUsed[2] = {-1U, -1U};
4317 unsigned FirstMaskIdx =
High * NewElts;
4318 bool UseBuildVector =
false;
4319 for (
unsigned MaskOffset = 0; MaskOffset < NewElts; ++MaskOffset) {
4321 int Idx =
Mask[FirstMaskIdx + MaskOffset];
4324 unsigned Input = (unsigned)Idx / NewElts;
4333 Idx -= Input * NewElts;
4338 if (InputUsed[OpNo] == Input) {
4341 }
else if (InputUsed[OpNo] == -1U) {
4343 InputUsed[OpNo] = Input;
4351 UseBuildVector =
true;
4356 Ops.push_back(Idx + OpNo * NewElts);
4359 if (UseBuildVector) {
4364 for (
unsigned MaskOffset = 0; MaskOffset < NewElts; ++MaskOffset) {
4366 int Idx =
Mask[FirstMaskIdx + MaskOffset];
4369 unsigned Input = (unsigned)Idx / NewElts;
4378 Idx -= Input * NewElts;
4382 .buildExtractVectorElement(
4383 EltTy, Inputs[Input],
4390 }
else if (InputUsed[0] == -1U) {
4394 Register Op0 = Inputs[InputUsed[0]];
4398 : Inputs[InputUsed[1]];
4407 MI.eraseFromParent();
4414 case TargetOpcode::G_VECREDUCE_FADD:
4415 ScalarOpc = TargetOpcode::G_FADD;
4417 case TargetOpcode::G_VECREDUCE_FMUL:
4418 ScalarOpc = TargetOpcode::G_FMUL;
4420 case TargetOpcode::G_VECREDUCE_FMAX:
4421 ScalarOpc = TargetOpcode::G_FMAXNUM;
4423 case TargetOpcode::G_VECREDUCE_FMIN:
4424 ScalarOpc = TargetOpcode::G_FMINNUM;
4426 case TargetOpcode::G_VECREDUCE_ADD:
4427 ScalarOpc = TargetOpcode::G_ADD;
4429 case TargetOpcode::G_VECREDUCE_MUL:
4430 ScalarOpc = TargetOpcode::G_MUL;
4432 case TargetOpcode::G_VECREDUCE_AND:
4433 ScalarOpc = TargetOpcode::G_AND;
4435 case TargetOpcode::G_VECREDUCE_OR:
4436 ScalarOpc = TargetOpcode::G_OR;
4438 case TargetOpcode::G_VECREDUCE_XOR:
4439 ScalarOpc = TargetOpcode::G_XOR;
4441 case TargetOpcode::G_VECREDUCE_SMAX:
4442 ScalarOpc = TargetOpcode::G_SMAX;
4444 case TargetOpcode::G_VECREDUCE_SMIN:
4445 ScalarOpc = TargetOpcode::G_SMIN;
4447 case TargetOpcode::G_VECREDUCE_UMAX:
4448 ScalarOpc = TargetOpcode::G_UMAX;
4450 case TargetOpcode::G_VECREDUCE_UMIN:
4451 ScalarOpc = TargetOpcode::G_UMIN;
4461 unsigned Opc =
MI.getOpcode();
4462 assert(Opc != TargetOpcode::G_VECREDUCE_SEQ_FADD &&
4463 Opc != TargetOpcode::G_VECREDUCE_SEQ_FMUL &&
4464 "Sequential reductions not expected");
4483 const unsigned NumParts =
4487 extractParts(SrcReg, NarrowTy, NumParts, SplitSrcs);
4489 if (DstTy != NarrowTy)
4495 unsigned NumPartsLeft = NumParts;
4496 while (NumPartsLeft > 1) {
4497 for (
unsigned Idx = 0; Idx < NumPartsLeft - 1; Idx += 2) {
4500 .buildInstr(ScalarOpc, {NarrowTy},
4501 {SplitSrcs[Idx], SplitSrcs[Idx + 1]})
4504 SplitSrcs = PartialResults;
4505 PartialResults.
clear();
4506 NumPartsLeft = SplitSrcs.size();
4508 assert(SplitSrcs.size() == 1);
4510 MI.eraseFromParent();
4515 for (
unsigned Idx = 1; Idx < NumParts; ++Idx)
4519 MI.eraseFromParent();
4523 for (
unsigned Part = 0; Part < NumParts; ++Part) {
4524 PartialReductions.push_back(
4533 return tryNarrowPow2Reduction(
MI, SrcReg, SrcTy, NarrowTy, ScalarOpc);
4536 Register Acc = PartialReductions[0];
4537 for (
unsigned Part = 1; Part < NumParts; ++Part) {
4538 if (Part == NumParts - 1) {
4540 {Acc, PartialReductions[Part]});
4543 .
buildInstr(ScalarOpc, {DstTy}, {Acc, PartialReductions[Part]})
4547 MI.eraseFromParent();
4554 unsigned ScalarOpc) {
4557 extractParts(SrcReg, NarrowTy,
4561 while (SplitSrcs.size() > 1) {
4563 for (
unsigned Idx = 0; Idx < SplitSrcs.size()-1; Idx += 2) {
4569 PartialRdxs.push_back(Res);
4575 MI.getOperand(1).setReg(SplitSrcs[0]);
4582 const LLT HalfTy,
const LLT AmtTy) {
4590 MI.eraseFromParent();
4596 unsigned VTBits = 2 * NVTBits;
4599 if (
MI.getOpcode() == TargetOpcode::G_SHL) {
4600 if (Amt.
ugt(VTBits)) {
4602 }
else if (Amt.
ugt(NVTBits)) {
4606 }
else if (Amt == NVTBits) {
4617 }
else if (
MI.getOpcode() == TargetOpcode::G_LSHR) {
4618 if (Amt.
ugt(VTBits)) {
4620 }
else if (Amt.
ugt(NVTBits)) {
4624 }
else if (Amt == NVTBits) {
4638 if (Amt.
ugt(VTBits)) {
4641 }
else if (Amt.
ugt(NVTBits)) {
4646 }
else if (Amt == NVTBits) {
4663 MI.eraseFromParent();
4687 if (DstEltSize % 2 != 0)
4693 const unsigned NewBitSize = DstEltSize / 2;
4719 switch (
MI.getOpcode()) {
4720 case TargetOpcode::G_SHL: {
4736 ResultRegs[0] = Lo.getReg(0);
4737 ResultRegs[1] = Hi.getReg(0);
4740 case TargetOpcode::G_LSHR:
4741 case TargetOpcode::G_ASHR: {
4751 if (
MI.getOpcode() == TargetOpcode::G_LSHR) {
4765 ResultRegs[0] = Lo.getReg(0);
4766 ResultRegs[1] = Hi.getReg(0);
4774 MI.eraseFromParent();
4781 assert(TypeIdx == 0 &&
"Expecting only Idx 0");
4784 for (
unsigned I = 1,
E =
MI.getNumOperands();
I !=
E;
I += 2) {