41#define DEBUG_TYPE "legalizer"
44using namespace LegalizeActions;
45using namespace MIPatternMatch;
54static std::pair<int, int>
60 unsigned NumParts =
Size / NarrowSize;
61 unsigned LeftoverSize =
Size - NumParts * NarrowSize;
64 if (LeftoverSize == 0)
69 if (LeftoverSize % EltSize != 0)
78 return std::make_pair(NumParts, NumLeftover);
105 : MIRBuilder(
Builder), Observer(Observer),
MRI(MF.getRegInfo()),
106 LI(*MF.getSubtarget().getLegalizerInfo()),
107 TLI(*MF.getSubtarget().getTargetLowering()), KB(nullptr) {}
112 : MIRBuilder(
B), Observer(Observer),
MRI(MF.getRegInfo()), LI(LI),
113 TLI(*MF.getSubtarget().getTargetLowering()), KB(KB) {}
122 if (isa<GIntrinsic>(
MI))
125 switch (Step.Action) {
140 return bitcast(
MI, Step.TypeIdx, Step.NewType);
143 return lower(
MI, Step.TypeIdx, Step.NewType);
159void LegalizerHelper::extractParts(
Register Reg,
LLT Ty,
int NumParts,
161 for (
int i = 0; i < NumParts; ++i)
166bool LegalizerHelper::extractParts(
Register Reg,
LLT RegTy,
167 LLT MainTy,
LLT &LeftoverTy,
174 unsigned NumParts =
RegSize / MainSize;
175 unsigned LeftoverSize =
RegSize - NumParts * MainSize;
178 if (LeftoverSize == 0) {
179 for (
unsigned I = 0;
I < NumParts; ++
I)
189 for (
unsigned i = 0; i < RegPieces.
size() - 1; ++i)
192 LeftoverTy = MRI.
getType(LeftoverRegs[0]);
198 for (
unsigned I = 0;
I != NumParts; ++
I) {
214void LegalizerHelper::extractVectorParts(
Register Reg,
unsigned NumElts,
222 unsigned LeftoverNumElts = RegNumElts % NumElts;
223 unsigned NumNarrowTyPieces = RegNumElts / NumElts;
226 if (LeftoverNumElts == 0)
227 return extractParts(Reg, NarrowTy, NumNarrowTyPieces, VRegs);
233 extractParts(Reg, EltTy, RegNumElts, Elts);
237 for (
unsigned i = 0; i < NumNarrowTyPieces; ++i,
Offset += NumElts) {
243 if (LeftoverNumElts == 1) {
253void LegalizerHelper::insertParts(
Register DstReg,
275 assert(LeftoverRegs.
size() == 1 &&
"Expected one leftover register");
277 for (
auto Reg : concat<const Register>(PartRegs, LeftoverRegs))
279 return mergeMixedSubvectors(DstReg, AllRegs);
284 for (
auto PartReg : concat<const Register>(PartRegs, LeftoverRegs))
285 extractGCDType(GCDRegs, GCDTy, PartReg);
286 LLT ResultLCMTy = buildLCMMergePieces(ResultTy, LeftoverTy, GCDTy, GCDRegs);
287 buildWidenedRemergeToDst(DstReg, ResultLCMTy, GCDRegs);
299void LegalizerHelper::mergeMixedSubvectors(
Register DstReg,
302 for (
unsigned i = 0; i < PartRegs.
size() - 1; ++i)
303 appendVectorElts(AllElts, PartRegs[i]);
309 appendVectorElts(AllElts, Leftover);
317 assert(
MI.getOpcode() == TargetOpcode::G_UNMERGE_VALUES);
319 const int StartIdx = Regs.
size();
320 const int NumResults =
MI.getNumOperands() - 1;
322 for (
int I = 0;
I != NumResults; ++
I)
323 Regs[StartIdx +
I] =
MI.getOperand(
I).getReg();
329 if (SrcTy == GCDTy) {
344 extractGCDType(Parts, GCDTy, SrcReg);
348LLT LegalizerHelper::buildLCMMergePieces(
LLT DstTy,
LLT NarrowTy,
LLT GCDTy,
350 unsigned PadStrategy) {
355 int NumOrigSrc = VRegs.
size();
361 if (NumOrigSrc < NumParts * NumSubParts) {
362 if (PadStrategy == TargetOpcode::G_ZEXT)
364 else if (PadStrategy == TargetOpcode::G_ANYEXT)
367 assert(PadStrategy == TargetOpcode::G_SEXT);
388 for (
int I = 0;
I != NumParts; ++
I) {
389 bool AllMergePartsArePadding =
true;
392 for (
int J = 0; J != NumSubParts; ++J) {
393 int Idx =
I * NumSubParts + J;
394 if (
Idx >= NumOrigSrc) {
395 SubMerge[J] = PadReg;
399 SubMerge[J] = VRegs[
Idx];
402 AllMergePartsArePadding =
false;
408 if (AllMergePartsArePadding && !AllPadReg) {
409 if (PadStrategy == TargetOpcode::G_ANYEXT)
411 else if (PadStrategy == TargetOpcode::G_ZEXT)
421 Remerge[
I] = AllPadReg;
425 if (NumSubParts == 1)
426 Remerge[
I] = SubMerge[0];
431 if (AllMergePartsArePadding && !AllPadReg)
432 AllPadReg = Remerge[
I];
435 VRegs = std::move(Remerge);
439void LegalizerHelper::buildWidenedRemergeToDst(
Register DstReg,
LLT LCMTy,
446 if (DstTy == LCMTy) {
460 UnmergeDefs[0] = DstReg;
461 for (
unsigned I = 1;
I != NumDefs; ++
I)
473#define RTLIBCASE_INT(LibcallPrefix) \
477 return RTLIB::LibcallPrefix##32; \
479 return RTLIB::LibcallPrefix##64; \
481 return RTLIB::LibcallPrefix##128; \
483 llvm_unreachable("unexpected size"); \
487#define RTLIBCASE(LibcallPrefix) \
491 return RTLIB::LibcallPrefix##32; \
493 return RTLIB::LibcallPrefix##64; \
495 return RTLIB::LibcallPrefix##80; \
497 return RTLIB::LibcallPrefix##128; \
499 llvm_unreachable("unexpected size"); \
504 case TargetOpcode::G_MUL:
506 case TargetOpcode::G_SDIV:
508 case TargetOpcode::G_UDIV:
510 case TargetOpcode::G_SREM:
512 case TargetOpcode::G_UREM:
514 case TargetOpcode::G_CTLZ_ZERO_UNDEF:
516 case TargetOpcode::G_FADD:
518 case TargetOpcode::G_FSUB:
520 case TargetOpcode::G_FMUL:
522 case TargetOpcode::G_FDIV:
524 case TargetOpcode::G_FEXP:
526 case TargetOpcode::G_FEXP2:
528 case TargetOpcode::G_FEXP10:
530 case TargetOpcode::G_FREM:
532 case TargetOpcode::G_FPOW:
534 case TargetOpcode::G_FMA:
536 case TargetOpcode::G_FSIN:
538 case TargetOpcode::G_FCOS:
540 case TargetOpcode::G_FLOG10:
542 case TargetOpcode::G_FLOG:
544 case TargetOpcode::G_FLOG2:
546 case TargetOpcode::G_FLDEXP:
548 case TargetOpcode::G_FCEIL:
550 case TargetOpcode::G_FFLOOR:
552 case TargetOpcode::G_FMINNUM:
554 case TargetOpcode::G_FMAXNUM:
556 case TargetOpcode::G_FSQRT:
558 case TargetOpcode::G_FRINT:
560 case TargetOpcode::G_FNEARBYINT:
562 case TargetOpcode::G_INTRINSIC_ROUNDEVEN:
587 if (CallerAttrs.
hasRetAttr(Attribute::ZExt) ||
599 switch (
MI.getOpcode()) {
602 case TargetOpcode::G_BZERO:
604 case TargetOpcode::G_MEMCPY:
605 case TargetOpcode::G_MEMMOVE:
606 case TargetOpcode::G_MEMSET:
611 if (!VReg.
isVirtual() || VReg != Next->getOperand(1).getReg())
614 Register PReg = Next->getOperand(0).getReg();
622 if (Ret->getNumImplicitOperands() != 1)
625 if (PReg != Ret->getOperand(0).getReg())
648 Info.OrigRet = Result;
649 std::copy(Args.begin(), Args.end(), std::back_inserter(
Info.OrigArgs));
650 if (!CLI.lowerCall(MIRBuilder,
Info))
675 Args.push_back({MO.getReg(), OpType, 0});
677 {
MI.getOperand(0).
getReg(), OpType, 0}, Args);
687 for (
unsigned i = 0; i <
MI.getNumOperands() - 1; ++i) {
691 LLT OpLLT =
MRI.getType(Reg);
692 Type *OpTy =
nullptr;
697 Args.push_back({Reg, OpTy, 0});
703 unsigned Opc =
MI.getOpcode();
705 case TargetOpcode::G_BZERO:
706 RTLibcall = RTLIB::BZERO;
708 case TargetOpcode::G_MEMCPY:
709 RTLibcall = RTLIB::MEMCPY;
710 Args[0].Flags[0].setReturned();
712 case TargetOpcode::G_MEMMOVE:
713 RTLibcall = RTLIB::MEMMOVE;
714 Args[0].Flags[0].setReturned();
716 case TargetOpcode::G_MEMSET:
717 RTLibcall = RTLIB::MEMSET;
718 Args[0].Flags[0].setReturned();
723 const char *
Name = TLI.getLibcallName(RTLibcall);
733 Info.CallConv = TLI.getLibcallCallingConv(RTLibcall);
736 Info.IsTailCall =
MI.getOperand(
MI.getNumOperands() - 1).getImm() &&
739 std::copy(Args.begin(), Args.end(), std::back_inserter(
Info.OrigArgs));
740 if (!CLI.lowerCall(MIRBuilder,
Info))
743 if (
Info.LoweredTailCall) {
744 assert(
Info.IsTailCall &&
"Lowered tail call when it wasn't a tail call?");
755 "Expected instr following MI to be return or debug inst?");
759 }
while (
MI.getNextNode());
774 case TargetOpcode::G_FPEXT:
776 case TargetOpcode::G_FPTRUNC:
778 case TargetOpcode::G_FPTOSI:
780 case TargetOpcode::G_FPTOUI:
782 case TargetOpcode::G_SITOFP:
784 case TargetOpcode::G_UITOFP:
795 {
MI.getOperand(0).
getReg(), ToType, 0},
796 {{
MI.getOperand(1).
getReg(), FromType, 0}});
805 switch (
MI.getOpcode()) {
808 case TargetOpcode::G_MUL:
809 case TargetOpcode::G_SDIV:
810 case TargetOpcode::G_UDIV:
811 case TargetOpcode::G_SREM:
812 case TargetOpcode::G_UREM:
813 case TargetOpcode::G_CTLZ_ZERO_UNDEF: {
820 case TargetOpcode::G_FADD:
821 case TargetOpcode::G_FSUB:
822 case TargetOpcode::G_FMUL:
823 case TargetOpcode::G_FDIV:
824 case TargetOpcode::G_FMA:
825 case TargetOpcode::G_FPOW:
826 case TargetOpcode::G_FREM:
827 case TargetOpcode::G_FCOS:
828 case TargetOpcode::G_FSIN:
829 case TargetOpcode::G_FLOG10:
830 case TargetOpcode::G_FLOG:
831 case TargetOpcode::G_FLOG2:
832 case TargetOpcode::G_FLDEXP:
833 case TargetOpcode::G_FEXP:
834 case TargetOpcode::G_FEXP2:
835 case TargetOpcode::G_FEXP10:
836 case TargetOpcode::G_FCEIL:
837 case TargetOpcode::G_FFLOOR:
838 case TargetOpcode::G_FMINNUM:
839 case TargetOpcode::G_FMAXNUM:
840 case TargetOpcode::G_FSQRT:
841 case TargetOpcode::G_FRINT:
842 case TargetOpcode::G_FNEARBYINT:
843 case TargetOpcode::G_INTRINSIC_ROUNDEVEN: {
846 LLVM_DEBUG(
dbgs() <<
"No libcall available for type " << LLTy <<
".\n");
854 case TargetOpcode::G_FPEXT:
855 case TargetOpcode::G_FPTRUNC: {
858 if (!FromTy || !ToTy)
865 case TargetOpcode::G_FPTOSI:
866 case TargetOpcode::G_FPTOUI: {
870 if ((ToSize != 32 && ToSize != 64) || (FromSize != 32 && FromSize != 64))
880 case TargetOpcode::G_SITOFP:
881 case TargetOpcode::G_UITOFP: {
885 if ((FromSize != 32 && FromSize != 64) || (ToSize != 32 && ToSize != 64))
895 case TargetOpcode::G_BZERO:
896 case TargetOpcode::G_MEMCPY:
897 case TargetOpcode::G_MEMMOVE:
898 case TargetOpcode::G_MEMSET: {
903 MI.eraseFromParent();
908 MI.eraseFromParent();
918 switch (
MI.getOpcode()) {
921 case TargetOpcode::G_IMPLICIT_DEF: {
931 if (SizeOp0 % NarrowSize != 0) {
932 LLT ImplicitTy = NarrowTy;
939 MI.eraseFromParent();
943 int NumParts = SizeOp0 / NarrowSize;
946 for (
int i = 0; i < NumParts; ++i)
953 MI.eraseFromParent();
956 case TargetOpcode::G_CONSTANT: {
958 const APInt &Val =
MI.getOperand(1).getCImm()->getValue();
961 int NumParts = TotalSize / NarrowSize;
964 for (
int I = 0;
I != NumParts; ++
I) {
965 unsigned Offset =
I * NarrowSize;
972 unsigned LeftoverBits = TotalSize - NumParts * NarrowSize;
974 if (LeftoverBits != 0) {
978 Val.
lshr(NumParts * NarrowSize).
trunc(LeftoverBits));
982 insertParts(
MI.getOperand(0).getReg(),
983 Ty, NarrowTy, PartRegs, LeftoverTy, LeftoverRegs);
985 MI.eraseFromParent();
988 case TargetOpcode::G_SEXT:
989 case TargetOpcode::G_ZEXT:
990 case TargetOpcode::G_ANYEXT:
992 case TargetOpcode::G_TRUNC: {
998 LLVM_DEBUG(
dbgs() <<
"Can't narrow trunc to type " << NarrowTy <<
"\n");
1004 MI.eraseFromParent();
1008 case TargetOpcode::G_FREEZE: {
1019 for (
unsigned i = 0; i < Unmerge->getNumDefs(); ++i) {
1025 MI.eraseFromParent();
1028 case TargetOpcode::G_ADD:
1029 case TargetOpcode::G_SUB:
1030 case TargetOpcode::G_SADDO:
1031 case TargetOpcode::G_SSUBO:
1032 case TargetOpcode::G_SADDE:
1033 case TargetOpcode::G_SSUBE:
1034 case TargetOpcode::G_UADDO:
1035 case TargetOpcode::G_USUBO:
1036 case TargetOpcode::G_UADDE:
1037 case TargetOpcode::G_USUBE:
1039 case TargetOpcode::G_MUL:
1040 case TargetOpcode::G_UMULH:
1042 case TargetOpcode::G_EXTRACT:
1044 case TargetOpcode::G_INSERT:
1046 case TargetOpcode::G_LOAD: {
1047 auto &LoadMI = cast<GLoad>(
MI);
1048 Register DstReg = LoadMI.getDstReg();
1057 LoadMI.eraseFromParent();
1063 case TargetOpcode::G_ZEXTLOAD:
1064 case TargetOpcode::G_SEXTLOAD: {
1065 auto &LoadMI = cast<GExtLoad>(
MI);
1066 Register DstReg = LoadMI.getDstReg();
1067 Register PtrReg = LoadMI.getPointerReg();
1070 auto &MMO = LoadMI.getMMO();
1071 unsigned MemSize = MMO.getSizeInBits();
1073 if (MemSize == NarrowSize) {
1075 }
else if (MemSize < NarrowSize) {
1077 }
else if (MemSize > NarrowSize) {
1082 if (isa<GZExtLoad>(LoadMI))
1087 LoadMI.eraseFromParent();
1090 case TargetOpcode::G_STORE: {
1091 auto &StoreMI = cast<GStore>(
MI);
1093 Register SrcReg = StoreMI.getValueReg();
1098 int NumParts = SizeOp0 / NarrowSize;
1100 unsigned LeftoverBits = SrcTy.
getSizeInBits() - HandledSize;
1101 if (SrcTy.
isVector() && LeftoverBits != 0)
1108 StoreMI.eraseFromParent();
1114 case TargetOpcode::G_SELECT:
1116 case TargetOpcode::G_AND:
1117 case TargetOpcode::G_OR:
1118 case TargetOpcode::G_XOR: {
1130 case TargetOpcode::G_SHL:
1131 case TargetOpcode::G_LSHR:
1132 case TargetOpcode::G_ASHR:
1134 case TargetOpcode::G_CTLZ:
1135 case TargetOpcode::G_CTLZ_ZERO_UNDEF:
1136 case TargetOpcode::G_CTTZ:
1137 case TargetOpcode::G_CTTZ_ZERO_UNDEF:
1138 case TargetOpcode::G_CTPOP:
1140 switch (
MI.getOpcode()) {
1141 case TargetOpcode::G_CTLZ:
1142 case TargetOpcode::G_CTLZ_ZERO_UNDEF:
1144 case TargetOpcode::G_CTTZ:
1145 case TargetOpcode::G_CTTZ_ZERO_UNDEF:
1147 case TargetOpcode::G_CTPOP:
1157 case TargetOpcode::G_INTTOPTR:
1165 case TargetOpcode::G_PTRTOINT:
1173 case TargetOpcode::G_PHI: {
1176 if (SizeOp0 % NarrowSize != 0)
1179 unsigned NumParts = SizeOp0 / NarrowSize;
1183 for (
unsigned i = 1; i <
MI.getNumOperands(); i += 2) {
1186 extractParts(
MI.getOperand(i).getReg(), NarrowTy, NumParts,
1191 for (
unsigned i = 0; i < NumParts; ++i) {
1195 for (
unsigned j = 1; j <
MI.getNumOperands(); j += 2)
1196 MIB.
addUse(SrcRegs[j / 2][i]).
add(
MI.getOperand(j + 1));
1201 MI.eraseFromParent();
1204 case TargetOpcode::G_EXTRACT_VECTOR_ELT:
1205 case TargetOpcode::G_INSERT_VECTOR_ELT: {
1209 int OpIdx =
MI.getOpcode() == TargetOpcode::G_EXTRACT_VECTOR_ELT ? 2 : 3;
1215 case TargetOpcode::G_ICMP: {
1228 if (!extractParts(
LHS, SrcTy, NarrowTy, LeftoverTy, LHSPartRegs,
1234 if (!extractParts(
MI.getOperand(3).getReg(), SrcTy, NarrowTy, Unused,
1235 RHSPartRegs, RHSLeftoverRegs))
1248 for (
auto LHSAndRHS :
zip(LHSPartRegs, RHSPartRegs)) {
1249 auto LHS = std::get<0>(LHSAndRHS);
1250 auto RHS = std::get<1>(LHSAndRHS);
1258 for (
auto LHSAndRHS :
zip(LHSLeftoverRegs, RHSLeftoverRegs)) {
1259 auto LHS = std::get<0>(LHSAndRHS);
1260 auto RHS = std::get<1>(LHSAndRHS);
1262 LLT GCDTy = extractGCDType(WidenedXors, NarrowTy, LeftoverTy,
Xor);
1263 buildLCMMergePieces(LeftoverTy, NarrowTy, GCDTy, WidenedXors,
1264 TargetOpcode::G_ZEXT);
1271 assert(Xors.
size() >= 2 &&
"Should have gotten at least two Xors?");
1273 for (
unsigned I = 2,
E = Xors.
size();
I <
E; ++
I)
1278 assert(LHSPartRegs.
size() == 2 &&
"Expected exactly 2 LHS part regs?");
1279 assert(RHSPartRegs.
size() == 2 &&
"Expected exactly 2 RHS part regs?");
1291 MI.eraseFromParent();
1294 case TargetOpcode::G_SEXT_INREG: {
1298 int64_t SizeInBits =
MI.getOperand(2).getImm();
1308 MO1.
setReg(TruncMIB.getReg(0));
1323 if (SizeOp0 % NarrowSize != 0)
1325 int NumParts = SizeOp0 / NarrowSize;
1333 for (
int i = 0; i < NumParts; ++i) {
1349 for (
int i = 0; i < NumParts; ++i) {
1353 assert(PartialExtensionReg &&
1354 "Expected to visit partial extension before full");
1355 if (FullExtensionReg) {
1362 FullExtensionReg = DstRegs.
back();
1367 TargetOpcode::G_SEXT_INREG, {NarrowTy},
1370 PartialExtensionReg = DstRegs.
back();
1377 MI.eraseFromParent();
1380 case TargetOpcode::G_BSWAP:
1381 case TargetOpcode::G_BITREVERSE: {
1382 if (SizeOp0 % NarrowSize != 0)
1387 unsigned NumParts = SizeOp0 / NarrowSize;
1388 extractParts(
MI.getOperand(1).getReg(), NarrowTy, NumParts, SrcRegs);
1390 for (
unsigned i = 0; i < NumParts; ++i) {
1392 {SrcRegs[NumParts - 1 - i]});
1399 MI.eraseFromParent();
1402 case TargetOpcode::G_PTR_ADD:
1403 case TargetOpcode::G_PTRMASK: {
1411 case TargetOpcode::G_FPTOUI:
1412 case TargetOpcode::G_FPTOSI:
1414 case TargetOpcode::G_FPEXT:
1421 case TargetOpcode::G_FLDEXP:
1422 case TargetOpcode::G_STRICT_FLDEXP:
1450 unsigned OpIdx,
unsigned ExtOpcode) {
1453 MO.
setReg(ExtB.getReg(0));
1460 MO.
setReg(ExtB.getReg(0));
1464 unsigned OpIdx,
unsigned TruncOpcode) {
1473 unsigned OpIdx,
unsigned ExtOpcode) {
1512LegalizerHelper::widenScalarMergeValues(
MachineInstr &
MI,
unsigned TypeIdx,
1517 auto [DstReg, DstTy, Src1Reg, Src1Ty] =
MI.getFirst2RegLLTs();
1518 if (DstTy.isVector())
1525 const int NumMerge = (DstSize + WideSize - 1) / WideSize;
1527 unsigned NumOps =
MI.getNumOperands();
1528 unsigned NumSrc =
MI.getNumOperands() - 1;
1529 unsigned PartSize = DstTy.getSizeInBits() / NumSrc;
1531 if (WideSize >= DstSize) {
1535 for (
unsigned I = 2;
I != NumOps; ++
I) {
1536 const unsigned Offset = (
I - 1) * PartSize;
1543 Register NextResult =
I + 1 == NumOps && WideTy == DstTy ? DstReg :
1549 ResultReg = NextResult;
1552 if (WideSize > DstSize)
1554 else if (DstTy.isPointer())
1557 MI.eraseFromParent();
1582 const int GCD = std::gcd(SrcSize, WideSize);
1593 if (GCD == SrcSize) {
1597 for (
int J = 0, JE = Unmerge->getNumOperands() - 1; J != JE; ++J)
1603 if (
static_cast<int>(Unmerges.
size()) != NumMerge * WideSize) {
1605 for (
int I = Unmerges.
size();
I != NumMerge * WideSize; ++
I)
1609 const int PartsPerGCD = WideSize / GCD;
1613 for (
int I = 0;
I != NumMerge; ++
I, Slicer = Slicer.drop_front(PartsPerGCD)) {
1628 MI.eraseFromParent();
1633LegalizerHelper::widenScalarUnmergeValues(
MachineInstr &
MI,
unsigned TypeIdx,
1638 int NumDst =
MI.getNumOperands() - 1;
1639 Register SrcReg =
MI.getOperand(NumDst).getReg();
1644 Register Dst0Reg =
MI.getOperand(0).getReg();
1654 dbgs() <<
"Not casting non-integral address space integer\n");
1675 for (
int I = 1;
I != NumDst; ++
I) {
1681 MI.eraseFromParent();
1692 LLVM_DEBUG(
dbgs() <<
"Widening pointer source types not implemented\n");
1717 const int NumUnmerge = Unmerge->getNumOperands() - 1;
1722 if (PartsPerRemerge == 1) {
1725 for (
int I = 0;
I != NumUnmerge; ++
I) {
1728 for (
int J = 0; J != PartsPerUnmerge; ++J) {
1729 int Idx =
I * PartsPerUnmerge + J;
1731 MIB.addDef(
MI.getOperand(
Idx).getReg());
1738 MIB.addUse(Unmerge.getReg(
I));
1742 for (
int J = 0; J != NumUnmerge; ++J)
1743 extractGCDType(Parts, GCDTy, Unmerge.getReg(J));
1746 for (
int I = 0;
I != NumDst; ++
I) {
1747 for (
int J = 0; J < PartsPerRemerge; ++J) {
1748 const int Idx =
I * PartsPerRemerge + J;
1753 RemergeParts.
clear();
1757 MI.eraseFromParent();
1762LegalizerHelper::widenScalarExtract(
MachineInstr &
MI,
unsigned TypeIdx,
1764 auto [DstReg, DstTy, SrcReg, SrcTy] =
MI.getFirst2RegLLTs();
1765 unsigned Offset =
MI.getOperand(2).getImm();
1768 if (SrcTy.
isVector() || DstTy.isVector())
1784 if (DstTy.isPointer())
1791 MI.eraseFromParent();
1796 LLT ShiftTy = SrcTy;
1805 MI.eraseFromParent();
1836LegalizerHelper::widenScalarInsert(
MachineInstr &
MI,
unsigned TypeIdx,
1838 if (TypeIdx != 0 || WideTy.
isVector())
1848LegalizerHelper::widenScalarAddSubOverflow(
MachineInstr &
MI,
unsigned TypeIdx,
1852 std::optional<Register> CarryIn;
1853 switch (
MI.getOpcode()) {
1856 case TargetOpcode::G_SADDO:
1857 Opcode = TargetOpcode::G_ADD;
1858 ExtOpcode = TargetOpcode::G_SEXT;
1860 case TargetOpcode::G_SSUBO:
1861 Opcode = TargetOpcode::G_SUB;
1862 ExtOpcode = TargetOpcode::G_SEXT;
1864 case TargetOpcode::G_UADDO:
1865 Opcode = TargetOpcode::G_ADD;
1866 ExtOpcode = TargetOpcode::G_ZEXT;
1868 case TargetOpcode::G_USUBO:
1869 Opcode = TargetOpcode::G_SUB;
1870 ExtOpcode = TargetOpcode::G_ZEXT;
1872 case TargetOpcode::G_SADDE:
1873 Opcode = TargetOpcode::G_UADDE;
1874 ExtOpcode = TargetOpcode::G_SEXT;
1875 CarryIn =
MI.getOperand(4).getReg();
1877 case TargetOpcode::G_SSUBE:
1878 Opcode = TargetOpcode::G_USUBE;
1879 ExtOpcode = TargetOpcode::G_SEXT;
1880 CarryIn =
MI.getOperand(4).getReg();
1882 case TargetOpcode::G_UADDE:
1883 Opcode = TargetOpcode::G_UADDE;
1884 ExtOpcode = TargetOpcode::G_ZEXT;
1885 CarryIn =
MI.getOperand(4).getReg();
1887 case TargetOpcode::G_USUBE:
1888 Opcode = TargetOpcode::G_USUBE;
1889 ExtOpcode = TargetOpcode::G_ZEXT;
1890 CarryIn =
MI.getOperand(4).getReg();
1911 LLT CarryOutTy = MRI.
getType(
MI.getOperand(1).getReg());
1914 {LHSExt, RHSExt, *CarryIn})
1926 MI.eraseFromParent();
1931LegalizerHelper::widenScalarAddSubShlSat(
MachineInstr &
MI,
unsigned TypeIdx,
1933 bool IsSigned =
MI.getOpcode() == TargetOpcode::G_SADDSAT ||
1934 MI.getOpcode() == TargetOpcode::G_SSUBSAT ||
1935 MI.getOpcode() == TargetOpcode::G_SSHLSAT;
1936 bool IsShift =
MI.getOpcode() == TargetOpcode::G_SSHLSAT ||
1937 MI.getOpcode() == TargetOpcode::G_USHLSAT;
1962 {ShiftL, ShiftR},
MI.getFlags());
1970 MI.eraseFromParent();
1975LegalizerHelper::widenScalarMulo(
MachineInstr &
MI,
unsigned TypeIdx,
1984 bool IsSigned =
MI.getOpcode() == TargetOpcode::G_SMULO;
1987 LLT OverflowTy = MRI.
getType(OriginalOverflow);
1994 unsigned ExtOp = IsSigned ? TargetOpcode::G_SEXT : TargetOpcode::G_ZEXT;
1999 {LeftOperand, RightOperand});
2000 auto Mul = Mulo->getOperand(0);
2027 MI.eraseFromParent();
2033 switch (
MI.getOpcode()) {
2036 case TargetOpcode::G_ATOMICRMW_XCHG:
2037 case TargetOpcode::G_ATOMICRMW_ADD:
2038 case TargetOpcode::G_ATOMICRMW_SUB:
2039 case TargetOpcode::G_ATOMICRMW_AND:
2040 case TargetOpcode::G_ATOMICRMW_OR:
2041 case TargetOpcode::G_ATOMICRMW_XOR:
2042 case TargetOpcode::G_ATOMICRMW_MIN:
2043 case TargetOpcode::G_ATOMICRMW_MAX:
2044 case TargetOpcode::G_ATOMICRMW_UMIN:
2045 case TargetOpcode::G_ATOMICRMW_UMAX:
2046 assert(TypeIdx == 0 &&
"atomicrmw with second scalar type");
2052 case TargetOpcode::G_ATOMIC_CMPXCHG:
2053 assert(TypeIdx == 0 &&
"G_ATOMIC_CMPXCHG with second scalar type");
2060 case TargetOpcode::G_ATOMIC_CMPXCHG_WITH_SUCCESS:
2070 "G_ATOMIC_CMPXCHG_WITH_SUCCESS with third scalar type");
2075 case TargetOpcode::G_EXTRACT:
2076 return widenScalarExtract(
MI, TypeIdx, WideTy);
2077 case TargetOpcode::G_INSERT:
2078 return widenScalarInsert(
MI, TypeIdx, WideTy);
2079 case TargetOpcode::G_MERGE_VALUES:
2080 return widenScalarMergeValues(
MI, TypeIdx, WideTy);
2081 case TargetOpcode::G_UNMERGE_VALUES:
2082 return widenScalarUnmergeValues(
MI, TypeIdx, WideTy);
2083 case TargetOpcode::G_SADDO:
2084 case TargetOpcode::G_SSUBO:
2085 case TargetOpcode::G_UADDO:
2086 case TargetOpcode::G_USUBO:
2087 case TargetOpcode::G_SADDE:
2088 case TargetOpcode::G_SSUBE:
2089 case TargetOpcode::G_UADDE:
2090 case TargetOpcode::G_USUBE:
2091 return widenScalarAddSubOverflow(
MI, TypeIdx, WideTy);
2092 case TargetOpcode::G_UMULO:
2093 case TargetOpcode::G_SMULO:
2094 return widenScalarMulo(
MI, TypeIdx, WideTy);
2095 case TargetOpcode::G_SADDSAT:
2096 case TargetOpcode::G_SSUBSAT:
2097 case TargetOpcode::G_SSHLSAT:
2098 case TargetOpcode::G_UADDSAT:
2099 case TargetOpcode::G_USUBSAT:
2100 case TargetOpcode::G_USHLSAT:
2101 return widenScalarAddSubShlSat(
MI, TypeIdx, WideTy);
2102 case TargetOpcode::G_CTTZ:
2103 case TargetOpcode::G_CTTZ_ZERO_UNDEF:
2104 case TargetOpcode::G_CTLZ:
2105 case TargetOpcode::G_CTLZ_ZERO_UNDEF:
2106 case TargetOpcode::G_CTPOP: {
2117 unsigned ExtOpc =
MI.getOpcode() == TargetOpcode::G_CTTZ ||
2118 MI.getOpcode() == TargetOpcode::G_CTTZ_ZERO_UNDEF
2119 ? TargetOpcode::G_ANYEXT
2120 : TargetOpcode::G_ZEXT;
2123 unsigned NewOpc =
MI.getOpcode();
2124 if (NewOpc == TargetOpcode::G_CTTZ) {
2133 NewOpc = TargetOpcode::G_CTTZ_ZERO_UNDEF;
2139 if (
MI.getOpcode() == TargetOpcode::G_CTLZ ||
2140 MI.getOpcode() == TargetOpcode::G_CTLZ_ZERO_UNDEF) {
2148 MI.eraseFromParent();
2151 case TargetOpcode::G_BSWAP: {
2160 MI.getOperand(0).setReg(DstExt);
2173 case TargetOpcode::G_BITREVERSE: {
2182 MI.getOperand(0).setReg(DstExt);
2191 case TargetOpcode::G_FREEZE:
2198 case TargetOpcode::G_ABS:
2205 case TargetOpcode::G_ADD:
2206 case TargetOpcode::G_AND:
2207 case TargetOpcode::G_MUL:
2208 case TargetOpcode::G_OR:
2209 case TargetOpcode::G_XOR:
2210 case TargetOpcode::G_SUB:
2221 case TargetOpcode::G_SBFX:
2222 case TargetOpcode::G_UBFX:
2236 case TargetOpcode::G_SHL:
2252 case TargetOpcode::G_SDIV:
2253 case TargetOpcode::G_SREM:
2254 case TargetOpcode::G_SMIN:
2255 case TargetOpcode::G_SMAX:
2263 case TargetOpcode::G_SDIVREM:
2272 case TargetOpcode::G_ASHR:
2273 case TargetOpcode::G_LSHR:
2277 unsigned CvtOp =
MI.getOpcode() == TargetOpcode::G_ASHR ?
2278 TargetOpcode::G_SEXT : TargetOpcode::G_ZEXT;
2291 case TargetOpcode::G_UDIV:
2292 case TargetOpcode::G_UREM:
2293 case TargetOpcode::G_UMIN:
2294 case TargetOpcode::G_UMAX:
2302 case TargetOpcode::G_UDIVREM:
2311 case TargetOpcode::G_SELECT:
2328 case TargetOpcode::G_FPTOSI:
2329 case TargetOpcode::G_FPTOUI:
2339 case TargetOpcode::G_SITOFP:
2349 case TargetOpcode::G_UITOFP:
2359 case TargetOpcode::G_LOAD:
2360 case TargetOpcode::G_SEXTLOAD:
2361 case TargetOpcode::G_ZEXTLOAD:
2367 case TargetOpcode::G_STORE: {
2378 TargetOpcode::G_ZEXT : TargetOpcode::G_ANYEXT;
2384 case TargetOpcode::G_CONSTANT: {
2388 MRI.
getType(
MI.getOperand(0).getReg()));
2389 assert((ExtOpc == TargetOpcode::G_ZEXT || ExtOpc == TargetOpcode::G_SEXT ||
2390 ExtOpc == TargetOpcode::G_ANYEXT) &&
2393 const APInt &Val = (ExtOpc == TargetOpcode::G_SEXT)
2403 case TargetOpcode::G_FCONSTANT: {
2411 MI.eraseFromParent();
2414 case TargetOpcode::G_IMPLICIT_DEF: {
2420 case TargetOpcode::G_BRCOND:
2426 case TargetOpcode::G_FCMP:
2437 case TargetOpcode::G_ICMP:
2443 MI.getOperand(1).getPredicate()))
2444 ? TargetOpcode::G_SEXT
2445 : TargetOpcode::G_ZEXT;
2452 case TargetOpcode::G_PTR_ADD:
2453 assert(TypeIdx == 1 &&
"unable to legalize pointer of G_PTR_ADD");
2459 case TargetOpcode::G_PHI: {
2460 assert(TypeIdx == 0 &&
"Expecting only Idx 0");
2463 for (
unsigned I = 1;
I <
MI.getNumOperands();
I += 2) {
2475 case TargetOpcode::G_EXTRACT_VECTOR_ELT: {
2483 TargetOpcode::G_ANYEXT);
2498 case TargetOpcode::G_INSERT_VECTOR_ELT: {
2534 case TargetOpcode::G_FADD:
2535 case TargetOpcode::G_FMUL:
2536 case TargetOpcode::G_FSUB:
2537 case TargetOpcode::G_FMA:
2538 case TargetOpcode::G_FMAD:
2539 case TargetOpcode::G_FNEG:
2540 case TargetOpcode::G_FABS:
2541 case TargetOpcode::G_FCANONICALIZE:
2542 case TargetOpcode::G_FMINNUM:
2543 case TargetOpcode::G_FMAXNUM:
2544 case TargetOpcode::G_FMINNUM_IEEE:
2545 case TargetOpcode::G_FMAXNUM_IEEE:
2546 case TargetOpcode::G_FMINIMUM:
2547 case TargetOpcode::G_FMAXIMUM:
2548 case TargetOpcode::G_FDIV:
2549 case TargetOpcode::G_FREM:
2550 case TargetOpcode::G_FCEIL:
2551 case TargetOpcode::G_FFLOOR:
2552 case TargetOpcode::G_FCOS:
2553 case TargetOpcode::G_FSIN:
2554 case TargetOpcode::G_FLOG10:
2555 case TargetOpcode::G_FLOG:
2556 case TargetOpcode::G_FLOG2:
2557 case TargetOpcode::G_FRINT:
2558 case TargetOpcode::G_FNEARBYINT:
2559 case TargetOpcode::G_FSQRT:
2560 case TargetOpcode::G_FEXP:
2561 case TargetOpcode::G_FEXP2:
2562 case TargetOpcode::G_FEXP10:
2563 case TargetOpcode::G_FPOW:
2564 case TargetOpcode::G_INTRINSIC_TRUNC:
2565 case TargetOpcode::G_INTRINSIC_ROUND:
2566 case TargetOpcode::G_INTRINSIC_ROUNDEVEN:
2570 for (
unsigned I = 1,
E =
MI.getNumOperands();
I !=
E; ++
I)
2576 case TargetOpcode::G_FPOWI:
2577 case TargetOpcode::G_FLDEXP:
2578 case TargetOpcode::G_STRICT_FLDEXP: {
2580 if (
MI.getOpcode() == TargetOpcode::G_STRICT_FLDEXP)
2601 case TargetOpcode::G_FFREXP: {
2614 case TargetOpcode::G_INTTOPTR:
2622 case TargetOpcode::G_PTRTOINT:
2630 case TargetOpcode::G_BUILD_VECTOR: {
2634 for (
int I = 1,
E =
MI.getNumOperands();
I !=
E; ++
I)
2648 case TargetOpcode::G_SEXT_INREG:
2657 case TargetOpcode::G_PTRMASK: {
2665 case TargetOpcode::G_VECREDUCE_FMIN:
2666 case TargetOpcode::G_VECREDUCE_FMAX:
2667 case TargetOpcode::G_VECREDUCE_FMINIMUM:
2668 case TargetOpcode::G_VECREDUCE_FMAXIMUM:
2686 auto Unmerge =
B.buildUnmerge(Ty, Src);
2687 for (
int I = 0,
E = Unmerge->getNumOperands() - 1;
I !=
E; ++
I)
2698 unsigned AddrSpace =
DL.getDefaultGlobalsAddressSpace();
2705 MI.getOperand(1).getFPImm(), Alignment));
2712 MI.eraseFromParent();
2719 auto [Dst, DstTy, Src, SrcTy] =
MI.getFirst2RegLLTs();
2724 if (DstTy.isVector()) {
2725 int NumDstElt = DstTy.getNumElements();
2729 LLT DstCastTy = DstEltTy;
2730 LLT SrcPartTy = SrcEltTy;
2734 if (NumSrcElt < NumDstElt) {
2744 SrcPartTy = SrcEltTy;
2745 }
else if (NumSrcElt > NumDstElt) {
2756 DstCastTy = DstEltTy;
2766 MI.eraseFromParent();
2770 if (DstTy.isVector()) {
2774 MI.eraseFromParent();
2790 unsigned NewEltSize,
2791 unsigned OldEltSize) {
2792 const unsigned Log2EltRatio =
Log2_32(NewEltSize / OldEltSize);
2793 LLT IdxTy =
B.getMRI()->getType(
Idx);
2796 auto OffsetMask =
B.buildConstant(
2798 auto OffsetIdx =
B.buildAnd(IdxTy,
Idx, OffsetMask);
2799 return B.buildShl(IdxTy, OffsetIdx,
2800 B.buildConstant(IdxTy,
Log2_32(OldEltSize))).getReg(0);
2815 auto [Dst, DstTy, SrcVec, SrcVecTy,
Idx, IdxTy] =
MI.getFirst3RegLLTs();
2819 unsigned OldNumElts = SrcVecTy.getNumElements();
2826 if (NewNumElts > OldNumElts) {
2837 if (NewNumElts % OldNumElts != 0)
2841 const unsigned NewEltsPerOldElt = NewNumElts / OldNumElts;
2850 for (
unsigned I = 0;
I < NewEltsPerOldElt; ++
I) {
2854 NewOps[
I] = Elt.getReg(0);
2859 MI.eraseFromParent();
2863 if (NewNumElts < OldNumElts) {
2864 if (NewEltSize % OldEltSize != 0)
2886 const unsigned Log2EltRatio =
Log2_32(NewEltSize / OldEltSize);
2905 MI.eraseFromParent();
2919 LLT TargetTy =
B.getMRI()->getType(TargetReg);
2920 LLT InsertTy =
B.getMRI()->getType(InsertReg);
2921 auto ZextVal =
B.buildZExt(TargetTy, InsertReg);
2922 auto ShiftedInsertVal =
B.buildShl(TargetTy, ZextVal, OffsetBits);
2925 auto EltMask =
B.buildConstant(
2929 auto ShiftedMask =
B.buildShl(TargetTy, EltMask, OffsetBits);
2930 auto InvShiftedMask =
B.buildNot(TargetTy, ShiftedMask);
2933 auto MaskedOldElt =
B.buildAnd(TargetTy, TargetReg, InvShiftedMask);
2937 return B.buildOr(TargetTy, MaskedOldElt, ShiftedInsertVal).getReg(0);
2951 auto [Dst, DstTy, SrcVec, SrcVecTy, Val, ValTy,
Idx, IdxTy] =
2952 MI.getFirst4RegLLTs();
2964 if (NewNumElts < OldNumElts) {
2965 if (NewEltSize % OldEltSize != 0)
2974 const unsigned Log2EltRatio =
Log2_32(NewEltSize / OldEltSize);
2994 CastTy, CastVec, InsertedElt, ScaledIdx).
getReg(0);
2998 MI.eraseFromParent();
3017 if (MemSizeInBits != MemStoreSizeInBits) {
3037 if (isa<GSExtLoad>(LoadMI)) {
3040 }
else if (isa<GZExtLoad>(LoadMI) || WideMemTy == LoadTy) {
3049 if (DstTy != LoadTy)
3075 uint64_t LargeSplitSize, SmallSplitSize;
3080 SmallSplitSize = MemSizeInBits - LargeSplitSize;
3090 SmallSplitSize = LargeSplitSize = MemSizeInBits / 2;
3115 LargeSplitSize / 8);
3119 SmallPtr, *SmallMMO);
3124 if (AnyExtTy == DstTy)
3159 if (StoreWidth != StoreSizeInBits) {
3194 uint64_t LargeSplitSize, SmallSplitSize;
3197 LargeSplitSize = llvm::bit_floor<uint64_t>(MemTy.
getSizeInBits());
3204 SmallSplitSize = LargeSplitSize = MemSizeInBits / 2;
3243 switch (
MI.getOpcode()) {
3244 case TargetOpcode::G_LOAD: {
3259 case TargetOpcode::G_STORE: {
3275 case TargetOpcode::G_SELECT: {
3281 dbgs() <<
"bitcast action not implemented for vector select\n");
3292 case TargetOpcode::G_AND:
3293 case TargetOpcode::G_OR:
3294 case TargetOpcode::G_XOR: {
3302 case TargetOpcode::G_EXTRACT_VECTOR_ELT:
3304 case TargetOpcode::G_INSERT_VECTOR_ELT:
3312void LegalizerHelper::changeOpcode(
MachineInstr &
MI,
unsigned NewOpcode) {
3320 using namespace TargetOpcode;
3322 switch(
MI.getOpcode()) {
3325 case TargetOpcode::G_FCONSTANT:
3327 case TargetOpcode::G_BITCAST:
3329 case TargetOpcode::G_SREM:
3330 case TargetOpcode::G_UREM: {
3334 {MI.getOperand(1), MI.getOperand(2)});
3338 MI.eraseFromParent();
3341 case TargetOpcode::G_SADDO:
3342 case TargetOpcode::G_SSUBO:
3344 case TargetOpcode::G_UMULH:
3345 case TargetOpcode::G_SMULH:
3347 case TargetOpcode::G_SMULO:
3348 case TargetOpcode::G_UMULO: {
3351 auto [Res, Overflow,
LHS,
RHS] =
MI.getFirst4Regs();
3354 unsigned Opcode =
MI.getOpcode() == TargetOpcode::G_SMULO
3355 ? TargetOpcode::G_SMULH
3356 : TargetOpcode::G_UMULH;
3360 MI.setDesc(
TII.get(TargetOpcode::G_MUL));
3361 MI.removeOperand(1);
3372 if (Opcode == TargetOpcode::G_SMULH) {
3381 case TargetOpcode::G_FNEG: {
3382 auto [Res, SubByReg] =
MI.getFirst2Regs();
3392 MI.eraseFromParent();
3395 case TargetOpcode::G_FSUB:
3396 case TargetOpcode::G_STRICT_FSUB: {
3397 auto [Res,
LHS,
RHS] =
MI.getFirst3Regs();
3403 if (
MI.getOpcode() == TargetOpcode::G_STRICT_FSUB)
3408 MI.eraseFromParent();
3411 case TargetOpcode::G_FMAD:
3413 case TargetOpcode::G_FFLOOR:
3415 case TargetOpcode::G_INTRINSIC_ROUND:
3417 case TargetOpcode::G_INTRINSIC_ROUNDEVEN: {
3420 changeOpcode(
MI, TargetOpcode::G_FRINT);
3423 case TargetOpcode::G_ATOMIC_CMPXCHG_WITH_SUCCESS: {
3424 auto [OldValRes, SuccessRes,
Addr, CmpVal, NewVal] =
MI.getFirst5Regs();
3426 **
MI.memoperands_begin());
3428 MI.eraseFromParent();
3431 case TargetOpcode::G_LOAD:
3432 case TargetOpcode::G_SEXTLOAD:
3433 case TargetOpcode::G_ZEXTLOAD:
3435 case TargetOpcode::G_STORE:
3437 case TargetOpcode::G_CTLZ_ZERO_UNDEF:
3438 case TargetOpcode::G_CTTZ_ZERO_UNDEF:
3439 case TargetOpcode::G_CTLZ:
3440 case TargetOpcode::G_CTTZ:
3441 case TargetOpcode::G_CTPOP:
3444 auto [Res, CarryOut,
LHS,
RHS] =
MI.getFirst4Regs();
3449 MI.eraseFromParent();
3453 auto [Res, CarryOut,
LHS,
RHS, CarryIn] =
MI.getFirst5Regs();
3474 MI.eraseFromParent();
3478 auto [Res, BorrowOut,
LHS,
RHS] =
MI.getFirst4Regs();
3483 MI.eraseFromParent();
3487 auto [Res, BorrowOut,
LHS,
RHS, BorrowIn] =
MI.getFirst5Regs();
3509 MI.eraseFromParent();
3534 case G_MERGE_VALUES:
3536 case G_UNMERGE_VALUES:
3538 case TargetOpcode::G_SEXT_INREG: {
3539 assert(
MI.getOperand(2).isImm() &&
"Expected immediate");
3540 int64_t SizeInBits =
MI.getOperand(2).getImm();
3542 auto [DstReg, SrcReg] =
MI.getFirst2Regs();
3549 MI.eraseFromParent();
3552 case G_EXTRACT_VECTOR_ELT:
3553 case G_INSERT_VECTOR_ELT:
3555 case G_SHUFFLE_VECTOR:
3557 case G_DYN_STACKALLOC:
3561 case G_STACKRESTORE:
3571 case G_READ_REGISTER:
3572 case G_WRITE_REGISTER:
3617 case G_MEMCPY_INLINE:
3618 return lowerMemcpyInline(
MI);
3645 unsigned AddrSpace =
DL.getAllocaAddrSpace();
3658 LLT IdxTy =
B.getMRI()->getType(IdxReg);
3662 return B.buildAnd(IdxTy, IdxReg,
B.buildConstant(IdxTy, Imm)).getReg(0);
3665 return B.buildUMin(IdxTy, IdxReg,
B.buildConstant(IdxTy, NElts - 1))
3676 "Converting bits to bytes lost precision");
3693 std::initializer_list<unsigned> NonVecOpIndices) {
3694 if (
MI.getNumMemOperands() != 0)
3697 LLT VecTy =
MRI.getType(
MI.getReg(0));
3702 for (
unsigned OpIdx = 1; OpIdx <
MI.getNumOperands(); ++OpIdx) {
3735 int NumParts, NumLeftover;
3736 std::tie(NumParts, NumLeftover) =
3739 assert(NumParts > 0 &&
"Error in getNarrowTypeBreakDown");
3740 for (
int i = 0; i < NumParts; ++i) {
3745 assert(NumLeftover == 1 &&
"expected exactly one leftover");
3754 for (
unsigned i = 0; i <
N; ++i) {
3757 else if (
Op.isImm())
3759 else if (
Op.isPredicate())
3781 std::initializer_list<unsigned> NonVecOpIndices) {
3783 "Non-compatible opcode or not specified non-vector operands");
3786 unsigned NumInputs =
MI.getNumOperands() -
MI.getNumDefs();
3787 unsigned NumDefs =
MI.getNumDefs();
3795 for (
unsigned i = 0; i < NumDefs; ++i) {
3804 for (
unsigned UseIdx = NumDefs, UseNo = 0; UseIdx <
MI.getNumOperands();
3805 ++UseIdx, ++UseNo) {
3808 MI.getOperand(UseIdx));
3811 extractVectorParts(
MI.getReg(UseIdx), NumElts, SplitPieces);
3812 for (
auto Reg : SplitPieces)
3817 unsigned NumLeftovers = OrigNumElts % NumElts ? 1 : 0;
3821 for (
unsigned i = 0; i < OrigNumElts / NumElts + NumLeftovers; ++i) {
3823 for (
unsigned DstNo = 0; DstNo < NumDefs; ++DstNo)
3824 Defs.
push_back(OutputOpsPieces[DstNo][i]);
3827 for (
unsigned InputNo = 0; InputNo < NumInputs; ++InputNo)
3828 Uses.push_back(InputOpsPieces[InputNo][i]);
3831 for (
unsigned DstNo = 0; DstNo < NumDefs; ++DstNo)
3832 OutputRegs[DstNo].push_back(
I.getReg(DstNo));
3837 for (
unsigned i = 0; i < NumDefs; ++i)
3838 mergeMixedSubvectors(
MI.getReg(i), OutputRegs[i]);
3840 for (
unsigned i = 0; i < NumDefs; ++i)
3844 MI.eraseFromParent();
3853 unsigned NumInputs =
MI.getNumOperands() -
MI.getNumDefs();
3854 unsigned NumDefs =
MI.getNumDefs();
3863 for (
unsigned UseIdx = NumDefs, UseNo = 0; UseIdx <
MI.getNumOperands();
3864 UseIdx += 2, ++UseNo) {
3867 extractVectorParts(
MI.getReg(UseIdx), NumElts, InputOpsPieces[UseNo]);
3871 unsigned NumLeftovers = OrigNumElts % NumElts ? 1 : 0;
3873 for (
unsigned i = 0; i < OrigNumElts / NumElts + NumLeftovers; ++i) {
3879 for (
unsigned j = 0; j < NumInputs / 2; ++j) {
3880 Phi.addUse(InputOpsPieces[j][i]);
3881 Phi.add(
MI.getOperand(1 + j * 2 + 1));
3887 mergeMixedSubvectors(
MI.getReg(0), OutputRegs);
3892 MI.eraseFromParent();
3900 const int NumDst =
MI.getNumOperands() - 1;
3901 const Register SrcReg =
MI.getOperand(NumDst).getReg();
3905 if (TypeIdx != 1 || NarrowTy == DstTy)
3931 const int PartsPerUnmerge = NumDst / NumUnmerge;
3933 for (
int I = 0;
I != NumUnmerge; ++
I) {
3936 for (
int J = 0; J != PartsPerUnmerge; ++J)
3937 MIB.
addDef(
MI.getOperand(
I * PartsPerUnmerge + J).getReg());
3938 MIB.
addUse(Unmerge.getReg(
I));
3941 MI.eraseFromParent();
3948 auto [DstReg, DstTy, SrcReg, SrcTy] =
MI.getFirst2RegLLTs();
3952 assert(DstTy.isVector() && NarrowTy.
isVector() &&
"Expected vector types");
3954 if (NarrowTy == SrcTy)
3964 if ((DstTy.getSizeInBits() % NarrowTy.
getSizeInBits() != 0) ||
3978 for (
unsigned i = 1; i <
MI.getNumOperands(); ++i) {
3980 for (
unsigned j = 0; j < Unmerge->getNumDefs(); ++j)
3986 unsigned NumNarrowTyPieces = DstTy.getNumElements() / NumNarrowTyElts;
3987 for (
unsigned i = 0,
Offset = 0; i < NumNarrowTyPieces;
3988 ++i,
Offset += NumNarrowTyElts) {
3995 MI.eraseFromParent();
3999 assert(TypeIdx == 0 &&
"Bad type index");
4015 unsigned NumParts = DstTy.getNumElements() / NarrowTy.
getNumElements();
4018 for (
unsigned i = 0; i < NumParts; ++i) {
4020 for (
unsigned j = 0; j < NumElts; ++j)
4021 Sources.
push_back(
MI.getOperand(1 + i * NumElts + j).getReg());
4027 MI.eraseFromParent();
4035 auto [DstReg, SrcVec] =
MI.getFirst2Regs();
4037 bool IsInsert =
MI.getOpcode() == TargetOpcode::G_INSERT_VECTOR_ELT;
4039 assert((IsInsert ? TypeIdx == 0 : TypeIdx == 1) &&
"not a vector type index");
4041 InsertVal =
MI.getOperand(2).getReg();
4056 IdxVal = MaybeCst->Value.getSExtValue();
4060 MI.eraseFromParent();
4065 LLT GCDTy = extractGCDType(VecParts, VecTy, NarrowVecTy, SrcVec);
4068 LLT LCMTy = buildLCMMergePieces(VecTy, NarrowVecTy, GCDTy, VecParts,
4069 TargetOpcode::G_ANYEXT);
4074 int64_t PartIdx = IdxVal / NewNumElts;
4083 PartTy, VecParts[PartIdx], InsertVal, NewIdx);
4084 VecParts[PartIdx] = InsertPart.getReg(0);
4088 buildWidenedRemergeToDst(DstReg, LCMTy, VecParts);
4093 MI.eraseFromParent();
4117 bool IsLoad = isa<GLoad>(LdStMI);
4129 int NumLeftover = -1;
4135 if (extractParts(ValReg, ValTy, NarrowTy, LeftoverTy, NarrowRegs,
4136 NarrowLeftoverRegs)) {
4137 NumParts = NarrowRegs.
size();
4138 NumLeftover = NarrowLeftoverRegs.
size();
4155 auto MMO = LdStMI.
getMMO();
4157 unsigned NumParts,
unsigned Offset) ->
unsigned {
4160 for (
unsigned Idx = 0,
E = NumParts;
Idx !=
E &&
Offset < TotalSize;
4162 unsigned ByteOffset =
Offset / 8;
4172 ValRegs.push_back(Dst);
4184 unsigned HandledOffset =
4185 splitTypePieces(NarrowTy, NarrowRegs, NumParts,
Offset);
4189 splitTypePieces(LeftoverTy, NarrowLeftoverRegs, NumLeftover, HandledOffset);
4192 insertParts(ValReg, ValTy, NarrowTy, NarrowRegs,
4193 LeftoverTy, NarrowLeftoverRegs);
4203 using namespace TargetOpcode;
4207 switch (
MI.getOpcode()) {
4208 case G_IMPLICIT_DEF:
4224 case G_FCANONICALIZE:
4241 case G_INTRINSIC_ROUND:
4242 case G_INTRINSIC_ROUNDEVEN:
4243 case G_INTRINSIC_TRUNC:
4262 case G_FMINNUM_IEEE:
4263 case G_FMAXNUM_IEEE:
4283 case G_CTLZ_ZERO_UNDEF:
4285 case G_CTTZ_ZERO_UNDEF:
4299 case G_ADDRSPACE_CAST:
4312 case G_STRICT_FLDEXP:
4326 case G_UNMERGE_VALUES:
4328 case G_BUILD_VECTOR:
4329 assert(TypeIdx == 0 &&
"not a vector type index");
4331 case G_CONCAT_VECTORS:
4335 case G_EXTRACT_VECTOR_ELT:
4336 case G_INSERT_VECTOR_ELT:
4345 case G_SHUFFLE_VECTOR:
4354 assert(
MI.getOpcode() == TargetOpcode::G_SHUFFLE_VECTOR);
4358 auto [DstReg, DstTy, Src1Reg, Src1Ty, Src2Reg, Src2Ty] =
4359 MI.getFirst3RegLLTs();
4362 if (DstTy != Src1Ty)
4364 if (DstTy != Src2Ty)
4377 extractParts(Src1Reg, NarrowTy, 2, SplitSrc1Regs);
4378 extractParts(Src2Reg, NarrowTy, 2, SplitSrc2Regs);
4379 Register Inputs[4] = {SplitSrc1Regs[0], SplitSrc1Regs[1], SplitSrc2Regs[0],
4395 unsigned InputUsed[2] = {-1U, -1U};
4396 unsigned FirstMaskIdx =
High * NewElts;
4397 bool UseBuildVector =
false;
4398 for (
unsigned MaskOffset = 0; MaskOffset < NewElts; ++MaskOffset) {
4400 int Idx = Mask[FirstMaskIdx + MaskOffset];
4405 if (Input >= std::size(Inputs)) {
4412 Idx -= Input * NewElts;
4416 for (OpNo = 0; OpNo < std::size(InputUsed); ++OpNo) {
4417 if (InputUsed[OpNo] == Input) {
4420 }
else if (InputUsed[OpNo] == -1U) {
4422 InputUsed[OpNo] = Input;
4427 if (OpNo >= std::size(InputUsed)) {
4430 UseBuildVector =
true;
4438 if (UseBuildVector) {
4443 for (
unsigned MaskOffset = 0; MaskOffset < NewElts; ++MaskOffset) {
4445 int Idx = Mask[FirstMaskIdx + MaskOffset];
4450 if (Input >= std::size(Inputs)) {
4457 Idx -= Input * NewElts;
4461 .buildExtractVectorElement(
4462 EltTy, Inputs[Input],
4469 }
else if (InputUsed[0] == -1U) {
4473 Register Op0 = Inputs[InputUsed[0]];
4477 : Inputs[InputUsed[1]];
4486 MI.eraseFromParent();