41#define DEBUG_TYPE "legalizer"
44using namespace LegalizeActions;
45using namespace MIPatternMatch;
54static std::pair<int, int>
60 unsigned NumParts =
Size / NarrowSize;
61 unsigned LeftoverSize =
Size - NumParts * NarrowSize;
64 if (LeftoverSize == 0)
69 if (LeftoverSize % EltSize != 0)
78 return std::make_pair(NumParts, NumLeftover);
105 : MIRBuilder(
Builder), Observer(Observer),
MRI(MF.getRegInfo()),
106 LI(*MF.getSubtarget().getLegalizerInfo()),
107 TLI(*MF.getSubtarget().getTargetLowering()), KB(nullptr) {}
112 : MIRBuilder(
B), Observer(Observer),
MRI(MF.getRegInfo()), LI(LI),
113 TLI(*MF.getSubtarget().getTargetLowering()), KB(KB) {}
122 if (
MI.getOpcode() == TargetOpcode::G_INTRINSIC ||
123 MI.getOpcode() == TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS)
126 switch (Step.Action) {
141 return bitcast(
MI, Step.TypeIdx, Step.NewType);
144 return lower(
MI, Step.TypeIdx, Step.NewType);
160void LegalizerHelper::extractParts(
Register Reg,
LLT Ty,
int NumParts,
162 for (
int i = 0; i < NumParts; ++i)
167bool LegalizerHelper::extractParts(
Register Reg,
LLT RegTy,
168 LLT MainTy,
LLT &LeftoverTy,
175 unsigned NumParts =
RegSize / MainSize;
176 unsigned LeftoverSize =
RegSize - NumParts * MainSize;
179 if (LeftoverSize == 0) {
180 for (
unsigned I = 0;
I < NumParts; ++
I)
190 for (
unsigned i = 0; i < RegPieces.
size() - 1; ++i)
193 LeftoverTy = MRI.
getType(LeftoverRegs[0]);
199 for (
unsigned I = 0;
I != NumParts; ++
I) {
215void LegalizerHelper::extractVectorParts(
Register Reg,
unsigned NumElts,
223 unsigned LeftoverNumElts = RegNumElts % NumElts;
224 unsigned NumNarrowTyPieces = RegNumElts / NumElts;
227 if (LeftoverNumElts == 0)
228 return extractParts(Reg, NarrowTy, NumNarrowTyPieces, VRegs);
234 extractParts(Reg, EltTy, RegNumElts, Elts);
238 for (
unsigned i = 0; i < NumNarrowTyPieces; ++i,
Offset += NumElts) {
244 if (LeftoverNumElts == 1) {
254void LegalizerHelper::insertParts(
Register DstReg,
276 assert(LeftoverRegs.
size() == 1 &&
"Expected one leftover register");
278 for (
auto Reg : concat<const Register>(PartRegs, LeftoverRegs))
280 return mergeMixedSubvectors(DstReg, AllRegs);
285 for (
auto PartReg : concat<const Register>(PartRegs, LeftoverRegs))
286 extractGCDType(GCDRegs, GCDTy, PartReg);
287 LLT ResultLCMTy = buildLCMMergePieces(ResultTy, LeftoverTy, GCDTy, GCDRegs);
288 buildWidenedRemergeToDst(DstReg, ResultLCMTy, GCDRegs);
300void LegalizerHelper::mergeMixedSubvectors(
Register DstReg,
303 for (
unsigned i = 0; i < PartRegs.
size() - 1; ++i)
304 appendVectorElts(AllElts, PartRegs[i]);
310 appendVectorElts(AllElts, Leftover);
318 assert(
MI.getOpcode() == TargetOpcode::G_UNMERGE_VALUES);
320 const int StartIdx = Regs.
size();
321 const int NumResults =
MI.getNumOperands() - 1;
323 for (
int I = 0;
I != NumResults; ++
I)
324 Regs[StartIdx +
I] =
MI.getOperand(
I).getReg();
330 if (SrcTy == GCDTy) {
345 extractGCDType(Parts, GCDTy, SrcReg);
349LLT LegalizerHelper::buildLCMMergePieces(
LLT DstTy,
LLT NarrowTy,
LLT GCDTy,
351 unsigned PadStrategy) {
356 int NumOrigSrc = VRegs.
size();
362 if (NumOrigSrc < NumParts * NumSubParts) {
363 if (PadStrategy == TargetOpcode::G_ZEXT)
365 else if (PadStrategy == TargetOpcode::G_ANYEXT)
368 assert(PadStrategy == TargetOpcode::G_SEXT);
389 for (
int I = 0;
I != NumParts; ++
I) {
390 bool AllMergePartsArePadding =
true;
393 for (
int J = 0; J != NumSubParts; ++J) {
394 int Idx =
I * NumSubParts + J;
395 if (
Idx >= NumOrigSrc) {
396 SubMerge[J] = PadReg;
400 SubMerge[J] = VRegs[
Idx];
403 AllMergePartsArePadding =
false;
409 if (AllMergePartsArePadding && !AllPadReg) {
410 if (PadStrategy == TargetOpcode::G_ANYEXT)
412 else if (PadStrategy == TargetOpcode::G_ZEXT)
422 Remerge[
I] = AllPadReg;
426 if (NumSubParts == 1)
427 Remerge[
I] = SubMerge[0];
432 if (AllMergePartsArePadding && !AllPadReg)
433 AllPadReg = Remerge[
I];
436 VRegs = std::move(Remerge);
440void LegalizerHelper::buildWidenedRemergeToDst(
Register DstReg,
LLT LCMTy,
447 if (DstTy == LCMTy) {
461 UnmergeDefs[0] = DstReg;
462 for (
unsigned I = 1;
I != NumDefs; ++
I)
474#define RTLIBCASE_INT(LibcallPrefix) \
478 return RTLIB::LibcallPrefix##32; \
480 return RTLIB::LibcallPrefix##64; \
482 return RTLIB::LibcallPrefix##128; \
484 llvm_unreachable("unexpected size"); \
488#define RTLIBCASE(LibcallPrefix) \
492 return RTLIB::LibcallPrefix##32; \
494 return RTLIB::LibcallPrefix##64; \
496 return RTLIB::LibcallPrefix##80; \
498 return RTLIB::LibcallPrefix##128; \
500 llvm_unreachable("unexpected size"); \
505 case TargetOpcode::G_MUL:
507 case TargetOpcode::G_SDIV:
509 case TargetOpcode::G_UDIV:
511 case TargetOpcode::G_SREM:
513 case TargetOpcode::G_UREM:
515 case TargetOpcode::G_CTLZ_ZERO_UNDEF:
517 case TargetOpcode::G_FADD:
519 case TargetOpcode::G_FSUB:
521 case TargetOpcode::G_FMUL:
523 case TargetOpcode::G_FDIV:
525 case TargetOpcode::G_FEXP:
527 case TargetOpcode::G_FEXP2:
529 case TargetOpcode::G_FREM:
531 case TargetOpcode::G_FPOW:
533 case TargetOpcode::G_FMA:
535 case TargetOpcode::G_FSIN:
537 case TargetOpcode::G_FCOS:
539 case TargetOpcode::G_FLOG10:
541 case TargetOpcode::G_FLOG:
543 case TargetOpcode::G_FLOG2:
545 case TargetOpcode::G_FCEIL:
547 case TargetOpcode::G_FFLOOR:
549 case TargetOpcode::G_FMINNUM:
551 case TargetOpcode::G_FMAXNUM:
553 case TargetOpcode::G_FSQRT:
555 case TargetOpcode::G_FRINT:
557 case TargetOpcode::G_FNEARBYINT:
559 case TargetOpcode::G_INTRINSIC_ROUNDEVEN:
584 if (CallerAttrs.
hasRetAttr(Attribute::ZExt) ||
596 switch (
MI.getOpcode()) {
599 case TargetOpcode::G_BZERO:
601 case TargetOpcode::G_MEMCPY:
602 case TargetOpcode::G_MEMMOVE:
603 case TargetOpcode::G_MEMSET:
608 if (!VReg.
isVirtual() || VReg != Next->getOperand(1).getReg())
611 Register PReg = Next->getOperand(0).getReg();
619 if (Ret->getNumImplicitOperands() != 1)
622 if (PReg != Ret->getOperand(0).getReg())
645 Info.OrigRet = Result;
646 std::copy(Args.begin(), Args.end(), std::back_inserter(
Info.OrigArgs));
647 if (!CLI.lowerCall(MIRBuilder,
Info))
672 Args.push_back({MO.getReg(), OpType, 0});
674 {
MI.getOperand(0).
getReg(), OpType, 0}, Args);
684 for (
unsigned i = 0; i <
MI.getNumOperands() - 1; ++i) {
688 LLT OpLLT =
MRI.getType(Reg);
689 Type *OpTy =
nullptr;
694 Args.push_back({Reg, OpTy, 0});
700 unsigned Opc =
MI.getOpcode();
702 case TargetOpcode::G_BZERO:
703 RTLibcall = RTLIB::BZERO;
705 case TargetOpcode::G_MEMCPY:
706 RTLibcall = RTLIB::MEMCPY;
707 Args[0].Flags[0].setReturned();
709 case TargetOpcode::G_MEMMOVE:
710 RTLibcall = RTLIB::MEMMOVE;
711 Args[0].Flags[0].setReturned();
713 case TargetOpcode::G_MEMSET:
714 RTLibcall = RTLIB::MEMSET;
715 Args[0].Flags[0].setReturned();
720 const char *
Name = TLI.getLibcallName(RTLibcall);
730 Info.CallConv = TLI.getLibcallCallingConv(RTLibcall);
733 Info.IsTailCall =
MI.getOperand(
MI.getNumOperands() - 1).getImm() &&
736 std::copy(Args.begin(), Args.end(), std::back_inserter(
Info.OrigArgs));
737 if (!CLI.lowerCall(MIRBuilder,
Info))
740 if (
Info.LoweredTailCall) {
741 assert(
Info.IsTailCall &&
"Lowered tail call when it wasn't a tail call?");
752 "Expected instr following MI to be return or debug inst?");
756 }
while (
MI.getNextNode());
771 case TargetOpcode::G_FPEXT:
773 case TargetOpcode::G_FPTRUNC:
775 case TargetOpcode::G_FPTOSI:
777 case TargetOpcode::G_FPTOUI:
779 case TargetOpcode::G_SITOFP:
781 case TargetOpcode::G_UITOFP:
792 {
MI.getOperand(0).
getReg(), ToType, 0},
793 {{
MI.getOperand(1).
getReg(), FromType, 0}});
802 switch (
MI.getOpcode()) {
805 case TargetOpcode::G_MUL:
806 case TargetOpcode::G_SDIV:
807 case TargetOpcode::G_UDIV:
808 case TargetOpcode::G_SREM:
809 case TargetOpcode::G_UREM:
810 case TargetOpcode::G_CTLZ_ZERO_UNDEF: {
817 case TargetOpcode::G_FADD:
818 case TargetOpcode::G_FSUB:
819 case TargetOpcode::G_FMUL:
820 case TargetOpcode::G_FDIV:
821 case TargetOpcode::G_FMA:
822 case TargetOpcode::G_FPOW:
823 case TargetOpcode::G_FREM:
824 case TargetOpcode::G_FCOS:
825 case TargetOpcode::G_FSIN:
826 case TargetOpcode::G_FLOG10:
827 case TargetOpcode::G_FLOG:
828 case TargetOpcode::G_FLOG2:
829 case TargetOpcode::G_FEXP:
830 case TargetOpcode::G_FEXP2:
831 case TargetOpcode::G_FCEIL:
832 case TargetOpcode::G_FFLOOR:
833 case TargetOpcode::G_FMINNUM:
834 case TargetOpcode::G_FMAXNUM:
835 case TargetOpcode::G_FSQRT:
836 case TargetOpcode::G_FRINT:
837 case TargetOpcode::G_FNEARBYINT:
838 case TargetOpcode::G_INTRINSIC_ROUNDEVEN: {
841 LLVM_DEBUG(
dbgs() <<
"No libcall available for type " << LLTy <<
".\n");
849 case TargetOpcode::G_FPEXT:
850 case TargetOpcode::G_FPTRUNC: {
853 if (!FromTy || !ToTy)
860 case TargetOpcode::G_FPTOSI:
861 case TargetOpcode::G_FPTOUI: {
865 if ((ToSize != 32 && ToSize != 64) || (FromSize != 32 && FromSize != 64))
875 case TargetOpcode::G_SITOFP:
876 case TargetOpcode::G_UITOFP: {
880 if ((FromSize != 32 && FromSize != 64) || (ToSize != 32 && ToSize != 64))
890 case TargetOpcode::G_BZERO:
891 case TargetOpcode::G_MEMCPY:
892 case TargetOpcode::G_MEMMOVE:
893 case TargetOpcode::G_MEMSET: {
898 MI.eraseFromParent();
903 MI.eraseFromParent();
913 switch (
MI.getOpcode()) {
916 case TargetOpcode::G_IMPLICIT_DEF: {
926 if (SizeOp0 % NarrowSize != 0) {
927 LLT ImplicitTy = NarrowTy;
934 MI.eraseFromParent();
938 int NumParts = SizeOp0 / NarrowSize;
941 for (
int i = 0; i < NumParts; ++i)
948 MI.eraseFromParent();
951 case TargetOpcode::G_CONSTANT: {
953 const APInt &Val =
MI.getOperand(1).getCImm()->getValue();
956 int NumParts = TotalSize / NarrowSize;
959 for (
int I = 0;
I != NumParts; ++
I) {
960 unsigned Offset =
I * NarrowSize;
967 unsigned LeftoverBits = TotalSize - NumParts * NarrowSize;
969 if (LeftoverBits != 0) {
973 Val.
lshr(NumParts * NarrowSize).
trunc(LeftoverBits));
977 insertParts(
MI.getOperand(0).getReg(),
978 Ty, NarrowTy, PartRegs, LeftoverTy, LeftoverRegs);
980 MI.eraseFromParent();
983 case TargetOpcode::G_SEXT:
984 case TargetOpcode::G_ZEXT:
985 case TargetOpcode::G_ANYEXT:
987 case TargetOpcode::G_TRUNC: {
993 LLVM_DEBUG(
dbgs() <<
"Can't narrow trunc to type " << NarrowTy <<
"\n");
999 MI.eraseFromParent();
1003 case TargetOpcode::G_FREEZE: {
1014 for (
unsigned i = 0; i < Unmerge->getNumDefs(); ++i) {
1020 MI.eraseFromParent();
1023 case TargetOpcode::G_ADD:
1024 case TargetOpcode::G_SUB:
1025 case TargetOpcode::G_SADDO:
1026 case TargetOpcode::G_SSUBO:
1027 case TargetOpcode::G_SADDE:
1028 case TargetOpcode::G_SSUBE:
1029 case TargetOpcode::G_UADDO:
1030 case TargetOpcode::G_USUBO:
1031 case TargetOpcode::G_UADDE:
1032 case TargetOpcode::G_USUBE:
1034 case TargetOpcode::G_MUL:
1035 case TargetOpcode::G_UMULH:
1037 case TargetOpcode::G_EXTRACT:
1039 case TargetOpcode::G_INSERT:
1041 case TargetOpcode::G_LOAD: {
1042 auto &LoadMI = cast<GLoad>(
MI);
1043 Register DstReg = LoadMI.getDstReg();
1052 LoadMI.eraseFromParent();
1058 case TargetOpcode::G_ZEXTLOAD:
1059 case TargetOpcode::G_SEXTLOAD: {
1060 auto &LoadMI = cast<GExtLoad>(
MI);
1061 Register DstReg = LoadMI.getDstReg();
1062 Register PtrReg = LoadMI.getPointerReg();
1065 auto &MMO = LoadMI.getMMO();
1066 unsigned MemSize = MMO.getSizeInBits();
1068 if (MemSize == NarrowSize) {
1070 }
else if (MemSize < NarrowSize) {
1072 }
else if (MemSize > NarrowSize) {
1077 if (isa<GZExtLoad>(LoadMI))
1082 LoadMI.eraseFromParent();
1085 case TargetOpcode::G_STORE: {
1086 auto &StoreMI = cast<GStore>(
MI);
1088 Register SrcReg = StoreMI.getValueReg();
1093 int NumParts = SizeOp0 / NarrowSize;
1095 unsigned LeftoverBits = SrcTy.
getSizeInBits() - HandledSize;
1096 if (SrcTy.
isVector() && LeftoverBits != 0)
1103 StoreMI.eraseFromParent();
1109 case TargetOpcode::G_SELECT:
1111 case TargetOpcode::G_AND:
1112 case TargetOpcode::G_OR:
1113 case TargetOpcode::G_XOR: {
1125 case TargetOpcode::G_SHL:
1126 case TargetOpcode::G_LSHR:
1127 case TargetOpcode::G_ASHR:
1129 case TargetOpcode::G_CTLZ:
1130 case TargetOpcode::G_CTLZ_ZERO_UNDEF:
1131 case TargetOpcode::G_CTTZ:
1132 case TargetOpcode::G_CTTZ_ZERO_UNDEF:
1133 case TargetOpcode::G_CTPOP:
1135 switch (
MI.getOpcode()) {
1136 case TargetOpcode::G_CTLZ:
1137 case TargetOpcode::G_CTLZ_ZERO_UNDEF:
1139 case TargetOpcode::G_CTTZ:
1140 case TargetOpcode::G_CTTZ_ZERO_UNDEF:
1142 case TargetOpcode::G_CTPOP:
1152 case TargetOpcode::G_INTTOPTR:
1160 case TargetOpcode::G_PTRTOINT:
1168 case TargetOpcode::G_PHI: {
1171 if (SizeOp0 % NarrowSize != 0)
1174 unsigned NumParts = SizeOp0 / NarrowSize;
1178 for (
unsigned i = 1; i <
MI.getNumOperands(); i += 2) {
1181 extractParts(
MI.getOperand(i).getReg(), NarrowTy, NumParts,
1186 for (
unsigned i = 0; i < NumParts; ++i) {
1190 for (
unsigned j = 1; j <
MI.getNumOperands(); j += 2)
1191 MIB.
addUse(SrcRegs[j / 2][i]).
add(
MI.getOperand(j + 1));
1196 MI.eraseFromParent();
1199 case TargetOpcode::G_EXTRACT_VECTOR_ELT:
1200 case TargetOpcode::G_INSERT_VECTOR_ELT: {
1204 int OpIdx =
MI.getOpcode() == TargetOpcode::G_EXTRACT_VECTOR_ELT ? 2 : 3;
1210 case TargetOpcode::G_ICMP: {
1223 if (!extractParts(
LHS, SrcTy, NarrowTy, LeftoverTy, LHSPartRegs,
1229 if (!extractParts(
MI.getOperand(3).getReg(), SrcTy, NarrowTy, Unused,
1230 RHSPartRegs, RHSLeftoverRegs))
1243 for (
auto LHSAndRHS :
zip(LHSPartRegs, RHSPartRegs)) {
1244 auto LHS = std::get<0>(LHSAndRHS);
1245 auto RHS = std::get<1>(LHSAndRHS);
1253 for (
auto LHSAndRHS :
zip(LHSLeftoverRegs, RHSLeftoverRegs)) {
1254 auto LHS = std::get<0>(LHSAndRHS);
1255 auto RHS = std::get<1>(LHSAndRHS);
1257 LLT GCDTy = extractGCDType(WidenedXors, NarrowTy, LeftoverTy,
Xor);
1258 buildLCMMergePieces(LeftoverTy, NarrowTy, GCDTy, WidenedXors,
1259 TargetOpcode::G_ZEXT);
1266 assert(Xors.
size() >= 2 &&
"Should have gotten at least two Xors?");
1268 for (
unsigned I = 2,
E = Xors.
size();
I <
E; ++
I)
1273 assert(LHSPartRegs.
size() == 2 &&
"Expected exactly 2 LHS part regs?");
1274 assert(RHSPartRegs.
size() == 2 &&
"Expected exactly 2 RHS part regs?");
1286 MI.eraseFromParent();
1289 case TargetOpcode::G_SEXT_INREG: {
1293 int64_t SizeInBits =
MI.getOperand(2).getImm();
1303 MO1.
setReg(TruncMIB.getReg(0));
1318 if (SizeOp0 % NarrowSize != 0)
1320 int NumParts = SizeOp0 / NarrowSize;
1328 for (
int i = 0; i < NumParts; ++i) {
1344 for (
int i = 0; i < NumParts; ++i) {
1348 assert(PartialExtensionReg &&
1349 "Expected to visit partial extension before full");
1350 if (FullExtensionReg) {
1357 FullExtensionReg = DstRegs.
back();
1362 TargetOpcode::G_SEXT_INREG, {NarrowTy},
1365 PartialExtensionReg = DstRegs.
back();
1372 MI.eraseFromParent();
1375 case TargetOpcode::G_BSWAP:
1376 case TargetOpcode::G_BITREVERSE: {
1377 if (SizeOp0 % NarrowSize != 0)
1382 unsigned NumParts = SizeOp0 / NarrowSize;
1383 extractParts(
MI.getOperand(1).getReg(), NarrowTy, NumParts, SrcRegs);
1385 for (
unsigned i = 0; i < NumParts; ++i) {
1387 {SrcRegs[NumParts - 1 - i]});
1394 MI.eraseFromParent();
1397 case TargetOpcode::G_PTR_ADD:
1398 case TargetOpcode::G_PTRMASK: {
1406 case TargetOpcode::G_FPTOUI:
1407 case TargetOpcode::G_FPTOSI:
1409 case TargetOpcode::G_FPEXT:
1442 unsigned OpIdx,
unsigned ExtOpcode) {
1445 MO.
setReg(ExtB.getReg(0));
1452 MO.
setReg(ExtB.getReg(0));
1456 unsigned OpIdx,
unsigned TruncOpcode) {
1465 unsigned OpIdx,
unsigned ExtOpcode) {
1504LegalizerHelper::widenScalarMergeValues(
MachineInstr &
MI,
unsigned TypeIdx,
1519 const int NumMerge = (DstSize + WideSize - 1) / WideSize;
1521 unsigned NumOps =
MI.getNumOperands();
1522 unsigned NumSrc =
MI.getNumOperands() - 1;
1525 if (WideSize >= DstSize) {
1529 for (
unsigned I = 2;
I != NumOps; ++
I) {
1530 const unsigned Offset = (
I - 1) * PartSize;
1537 Register NextResult =
I + 1 == NumOps && WideTy == DstTy ? DstReg :
1543 ResultReg = NextResult;
1546 if (WideSize > DstSize)
1551 MI.eraseFromParent();
1576 const int GCD = std::gcd(SrcSize, WideSize);
1587 if (GCD == SrcSize) {
1591 for (
int J = 0, JE = Unmerge->getNumOperands() - 1; J != JE; ++J)
1597 if (
static_cast<int>(Unmerges.
size()) != NumMerge * WideSize) {
1599 for (
int I = Unmerges.
size();
I != NumMerge * WideSize; ++
I)
1603 const int PartsPerGCD = WideSize / GCD;
1607 for (
int I = 0;
I != NumMerge; ++
I, Slicer = Slicer.drop_front(PartsPerGCD)) {
1622 MI.eraseFromParent();
1627LegalizerHelper::widenScalarUnmergeValues(
MachineInstr &
MI,
unsigned TypeIdx,
1632 int NumDst =
MI.getNumOperands() - 1;
1633 Register SrcReg =
MI.getOperand(NumDst).getReg();
1638 Register Dst0Reg =
MI.getOperand(0).getReg();
1648 dbgs() <<
"Not casting non-integral address space integer\n");
1669 for (
int I = 1;
I != NumDst; ++
I) {
1675 MI.eraseFromParent();
1686 LLVM_DEBUG(
dbgs() <<
"Widening pointer source types not implemented\n");
1711 const int NumUnmerge = Unmerge->getNumOperands() - 1;
1716 if (PartsPerRemerge == 1) {
1719 for (
int I = 0;
I != NumUnmerge; ++
I) {
1722 for (
int J = 0; J != PartsPerUnmerge; ++J) {
1723 int Idx =
I * PartsPerUnmerge + J;
1725 MIB.addDef(
MI.getOperand(
Idx).getReg());
1732 MIB.addUse(Unmerge.getReg(
I));
1736 for (
int J = 0; J != NumUnmerge; ++J)
1737 extractGCDType(Parts, GCDTy, Unmerge.getReg(J));
1740 for (
int I = 0;
I != NumDst; ++
I) {
1741 for (
int J = 0; J < PartsPerRemerge; ++J) {
1742 const int Idx =
I * PartsPerRemerge + J;
1747 RemergeParts.
clear();
1751 MI.eraseFromParent();
1756LegalizerHelper::widenScalarExtract(
MachineInstr &
MI,
unsigned TypeIdx,
1763 unsigned Offset =
MI.getOperand(2).getImm();
1789 MI.eraseFromParent();
1794 LLT ShiftTy = SrcTy;
1803 MI.eraseFromParent();
1834LegalizerHelper::widenScalarInsert(
MachineInstr &
MI,
unsigned TypeIdx,
1836 if (TypeIdx != 0 || WideTy.
isVector())
1846LegalizerHelper::widenScalarAddSubOverflow(
MachineInstr &
MI,
unsigned TypeIdx,
1850 std::optional<Register> CarryIn;
1851 switch (
MI.getOpcode()) {
1854 case TargetOpcode::G_SADDO:
1855 Opcode = TargetOpcode::G_ADD;
1856 ExtOpcode = TargetOpcode::G_SEXT;
1858 case TargetOpcode::G_SSUBO:
1859 Opcode = TargetOpcode::G_SUB;
1860 ExtOpcode = TargetOpcode::G_SEXT;
1862 case TargetOpcode::G_UADDO:
1863 Opcode = TargetOpcode::G_ADD;
1864 ExtOpcode = TargetOpcode::G_ZEXT;
1866 case TargetOpcode::G_USUBO:
1867 Opcode = TargetOpcode::G_SUB;
1868 ExtOpcode = TargetOpcode::G_ZEXT;
1870 case TargetOpcode::G_SADDE:
1871 Opcode = TargetOpcode::G_UADDE;
1872 ExtOpcode = TargetOpcode::G_SEXT;
1873 CarryIn =
MI.getOperand(4).getReg();
1875 case TargetOpcode::G_SSUBE:
1876 Opcode = TargetOpcode::G_USUBE;
1877 ExtOpcode = TargetOpcode::G_SEXT;
1878 CarryIn =
MI.getOperand(4).getReg();
1880 case TargetOpcode::G_UADDE:
1881 Opcode = TargetOpcode::G_UADDE;
1882 ExtOpcode = TargetOpcode::G_ZEXT;
1883 CarryIn =
MI.getOperand(4).getReg();
1885 case TargetOpcode::G_USUBE:
1886 Opcode = TargetOpcode::G_USUBE;
1887 ExtOpcode = TargetOpcode::G_ZEXT;
1888 CarryIn =
MI.getOperand(4).getReg();
1909 LLT CarryOutTy = MRI.
getType(
MI.getOperand(1).getReg());
1912 {LHSExt, RHSExt, *CarryIn})
1924 MI.eraseFromParent();
1929LegalizerHelper::widenScalarAddSubShlSat(
MachineInstr &
MI,
unsigned TypeIdx,
1931 bool IsSigned =
MI.getOpcode() == TargetOpcode::G_SADDSAT ||
1932 MI.getOpcode() == TargetOpcode::G_SSUBSAT ||
1933 MI.getOpcode() == TargetOpcode::G_SSHLSAT;
1934 bool IsShift =
MI.getOpcode() == TargetOpcode::G_SSHLSAT ||
1935 MI.getOpcode() == TargetOpcode::G_USHLSAT;
1960 {ShiftL, ShiftR},
MI.getFlags());
1968 MI.eraseFromParent();
1973LegalizerHelper::widenScalarMulo(
MachineInstr &
MI,
unsigned TypeIdx,
1982 bool IsSigned =
MI.getOpcode() == TargetOpcode::G_SMULO;
1984 Register OriginalOverflow =
MI.getOperand(1).getReg();
1988 LLT OverflowTy = MRI.
getType(OriginalOverflow);
1995 unsigned ExtOp = IsSigned ? TargetOpcode::G_SEXT : TargetOpcode::G_ZEXT;
2000 {LeftOperand, RightOperand});
2001 auto Mul = Mulo->getOperand(0);
2028 MI.eraseFromParent();
2034 switch (
MI.getOpcode()) {
2037 case TargetOpcode::G_ATOMICRMW_XCHG:
2038 case TargetOpcode::G_ATOMICRMW_ADD:
2039 case TargetOpcode::G_ATOMICRMW_SUB:
2040 case TargetOpcode::G_ATOMICRMW_AND:
2041 case TargetOpcode::G_ATOMICRMW_OR:
2042 case TargetOpcode::G_ATOMICRMW_XOR:
2043 case TargetOpcode::G_ATOMICRMW_MIN:
2044 case TargetOpcode::G_ATOMICRMW_MAX:
2045 case TargetOpcode::G_ATOMICRMW_UMIN:
2046 case TargetOpcode::G_ATOMICRMW_UMAX:
2047 assert(TypeIdx == 0 &&
"atomicrmw with second scalar type");
2053 case TargetOpcode::G_ATOMIC_CMPXCHG:
2054 assert(TypeIdx == 0 &&
"G_ATOMIC_CMPXCHG with second scalar type");
2061 case TargetOpcode::G_ATOMIC_CMPXCHG_WITH_SUCCESS:
2071 "G_ATOMIC_CMPXCHG_WITH_SUCCESS with third scalar type");
2076 case TargetOpcode::G_EXTRACT:
2077 return widenScalarExtract(
MI, TypeIdx, WideTy);
2078 case TargetOpcode::G_INSERT:
2079 return widenScalarInsert(
MI, TypeIdx, WideTy);
2080 case TargetOpcode::G_MERGE_VALUES:
2081 return widenScalarMergeValues(
MI, TypeIdx, WideTy);
2082 case TargetOpcode::G_UNMERGE_VALUES:
2083 return widenScalarUnmergeValues(
MI, TypeIdx, WideTy);
2084 case TargetOpcode::G_SADDO:
2085 case TargetOpcode::G_SSUBO:
2086 case TargetOpcode::G_UADDO:
2087 case TargetOpcode::G_USUBO:
2088 case TargetOpcode::G_SADDE:
2089 case TargetOpcode::G_SSUBE:
2090 case TargetOpcode::G_UADDE:
2091 case TargetOpcode::G_USUBE:
2092 return widenScalarAddSubOverflow(
MI, TypeIdx, WideTy);
2093 case TargetOpcode::G_UMULO:
2094 case TargetOpcode::G_SMULO:
2095 return widenScalarMulo(
MI, TypeIdx, WideTy);
2096 case TargetOpcode::G_SADDSAT:
2097 case TargetOpcode::G_SSUBSAT:
2098 case TargetOpcode::G_SSHLSAT:
2099 case TargetOpcode::G_UADDSAT:
2100 case TargetOpcode::G_USUBSAT:
2101 case TargetOpcode::G_USHLSAT:
2102 return widenScalarAddSubShlSat(
MI, TypeIdx, WideTy);
2103 case TargetOpcode::G_CTTZ:
2104 case TargetOpcode::G_CTTZ_ZERO_UNDEF:
2105 case TargetOpcode::G_CTLZ:
2106 case TargetOpcode::G_CTLZ_ZERO_UNDEF:
2107 case TargetOpcode::G_CTPOP: {
2118 unsigned ExtOpc =
MI.getOpcode() == TargetOpcode::G_CTTZ ||
2119 MI.getOpcode() == TargetOpcode::G_CTTZ_ZERO_UNDEF
2120 ? TargetOpcode::G_ANYEXT
2121 : TargetOpcode::G_ZEXT;
2124 unsigned NewOpc =
MI.getOpcode();
2125 if (NewOpc == TargetOpcode::G_CTTZ) {
2134 NewOpc = TargetOpcode::G_CTTZ_ZERO_UNDEF;
2140 if (
MI.getOpcode() == TargetOpcode::G_CTLZ ||
2141 MI.getOpcode() == TargetOpcode::G_CTLZ_ZERO_UNDEF) {
2149 MI.eraseFromParent();
2152 case TargetOpcode::G_BSWAP: {
2161 MI.getOperand(0).setReg(DstExt);
2174 case TargetOpcode::G_BITREVERSE: {
2183 MI.getOperand(0).setReg(DstExt);
2192 case TargetOpcode::G_FREEZE:
2199 case TargetOpcode::G_ABS:
2206 case TargetOpcode::G_ADD:
2207 case TargetOpcode::G_AND:
2208 case TargetOpcode::G_MUL:
2209 case TargetOpcode::G_OR:
2210 case TargetOpcode::G_XOR:
2211 case TargetOpcode::G_SUB:
2222 case TargetOpcode::G_SBFX:
2223 case TargetOpcode::G_UBFX:
2237 case TargetOpcode::G_SHL:
2253 case TargetOpcode::G_SDIV:
2254 case TargetOpcode::G_SREM:
2255 case TargetOpcode::G_SMIN:
2256 case TargetOpcode::G_SMAX:
2264 case TargetOpcode::G_SDIVREM:
2273 case TargetOpcode::G_ASHR:
2274 case TargetOpcode::G_LSHR:
2278 unsigned CvtOp =
MI.getOpcode() == TargetOpcode::G_ASHR ?
2279 TargetOpcode::G_SEXT : TargetOpcode::G_ZEXT;
2292 case TargetOpcode::G_UDIV:
2293 case TargetOpcode::G_UREM:
2294 case TargetOpcode::G_UMIN:
2295 case TargetOpcode::G_UMAX:
2303 case TargetOpcode::G_UDIVREM:
2312 case TargetOpcode::G_SELECT:
2329 case TargetOpcode::G_FPTOSI:
2330 case TargetOpcode::G_FPTOUI:
2340 case TargetOpcode::G_SITOFP:
2350 case TargetOpcode::G_UITOFP:
2360 case TargetOpcode::G_LOAD:
2361 case TargetOpcode::G_SEXTLOAD:
2362 case TargetOpcode::G_ZEXTLOAD:
2368 case TargetOpcode::G_STORE: {
2379 TargetOpcode::G_ZEXT : TargetOpcode::G_ANYEXT;
2385 case TargetOpcode::G_CONSTANT: {
2389 MRI.
getType(
MI.getOperand(0).getReg()));
2390 assert((ExtOpc == TargetOpcode::G_ZEXT || ExtOpc == TargetOpcode::G_SEXT ||
2391 ExtOpc == TargetOpcode::G_ANYEXT) &&
2394 const APInt &Val = (ExtOpc == TargetOpcode::G_SEXT)
2404 case TargetOpcode::G_FCONSTANT: {
2412 MI.eraseFromParent();
2415 case TargetOpcode::G_IMPLICIT_DEF: {
2421 case TargetOpcode::G_BRCOND:
2427 case TargetOpcode::G_FCMP:
2438 case TargetOpcode::G_ICMP:
2444 MI.getOperand(1).getPredicate()))
2445 ? TargetOpcode::G_SEXT
2446 : TargetOpcode::G_ZEXT;
2453 case TargetOpcode::G_PTR_ADD:
2454 assert(TypeIdx == 1 &&
"unable to legalize pointer of G_PTR_ADD");
2460 case TargetOpcode::G_PHI: {
2461 assert(TypeIdx == 0 &&
"Expecting only Idx 0");
2464 for (
unsigned I = 1;
I <
MI.getNumOperands();
I += 2) {
2476 case TargetOpcode::G_EXTRACT_VECTOR_ELT: {
2484 TargetOpcode::G_ANYEXT);
2499 case TargetOpcode::G_INSERT_VECTOR_ELT: {
2524 case TargetOpcode::G_FADD:
2525 case TargetOpcode::G_FMUL:
2526 case TargetOpcode::G_FSUB:
2527 case TargetOpcode::G_FMA:
2528 case TargetOpcode::G_FMAD:
2529 case TargetOpcode::G_FNEG:
2530 case TargetOpcode::G_FABS:
2531 case TargetOpcode::G_FCANONICALIZE:
2532 case TargetOpcode::G_FMINNUM:
2533 case TargetOpcode::G_FMAXNUM:
2534 case TargetOpcode::G_FMINNUM_IEEE:
2535 case TargetOpcode::G_FMAXNUM_IEEE:
2536 case TargetOpcode::G_FMINIMUM:
2537 case TargetOpcode::G_FMAXIMUM:
2538 case TargetOpcode::G_FDIV:
2539 case TargetOpcode::G_FREM:
2540 case TargetOpcode::G_FCEIL:
2541 case TargetOpcode::G_FFLOOR:
2542 case TargetOpcode::G_FCOS:
2543 case TargetOpcode::G_FSIN:
2544 case TargetOpcode::G_FLOG10:
2545 case TargetOpcode::G_FLOG:
2546 case TargetOpcode::G_FLOG2:
2547 case TargetOpcode::G_FRINT:
2548 case TargetOpcode::G_FNEARBYINT:
2549 case TargetOpcode::G_FSQRT:
2550 case TargetOpcode::G_FEXP:
2551 case TargetOpcode::G_FEXP2:
2552 case TargetOpcode::G_FPOW:
2553 case TargetOpcode::G_INTRINSIC_TRUNC:
2554 case TargetOpcode::G_INTRINSIC_ROUND:
2555 case TargetOpcode::G_INTRINSIC_ROUNDEVEN:
2559 for (
unsigned I = 1,
E =
MI.getNumOperands();
I !=
E; ++
I)
2565 case TargetOpcode::G_FPOWI: {
2574 case TargetOpcode::G_INTTOPTR:
2582 case TargetOpcode::G_PTRTOINT:
2590 case TargetOpcode::G_BUILD_VECTOR: {
2594 for (
int I = 1,
E =
MI.getNumOperands();
I !=
E; ++
I)
2608 case TargetOpcode::G_SEXT_INREG:
2617 case TargetOpcode::G_PTRMASK: {
2630 auto Unmerge =
B.buildUnmerge(Ty, Src);
2631 for (
int I = 0,
E = Unmerge->getNumOperands() - 1;
I !=
E; ++
I)
2642 unsigned AddrSpace =
DL.getDefaultGlobalsAddressSpace();
2649 MI.getOperand(1).getFPImm(), Alignment));
2656 MI.eraseFromParent();
2677 LLT DstCastTy = DstEltTy;
2678 LLT SrcPartTy = SrcEltTy;
2682 if (NumSrcElt < NumDstElt) {
2692 SrcPartTy = SrcEltTy;
2693 }
else if (NumSrcElt > NumDstElt) {
2704 DstCastTy = DstEltTy;
2714 MI.eraseFromParent();
2722 MI.eraseFromParent();
2738 unsigned NewEltSize,
2739 unsigned OldEltSize) {
2740 const unsigned Log2EltRatio =
Log2_32(NewEltSize / OldEltSize);
2741 LLT IdxTy =
B.getMRI()->getType(
Idx);
2744 auto OffsetMask =
B.buildConstant(
2746 auto OffsetIdx =
B.buildAnd(IdxTy,
Idx, OffsetMask);
2747 return B.buildShl(IdxTy, OffsetIdx,
2748 B.buildConstant(IdxTy,
Log2_32(OldEltSize))).getReg(0);
2778 if (NewNumElts > OldNumElts) {
2789 if (NewNumElts % OldNumElts != 0)
2793 const unsigned NewEltsPerOldElt = NewNumElts / OldNumElts;
2802 for (
unsigned I = 0;
I < NewEltsPerOldElt; ++
I) {
2806 NewOps[
I] = Elt.getReg(0);
2811 MI.eraseFromParent();
2815 if (NewNumElts < OldNumElts) {
2816 if (NewEltSize % OldEltSize != 0)
2838 const unsigned Log2EltRatio =
Log2_32(NewEltSize / OldEltSize);
2857 MI.eraseFromParent();
2871 LLT TargetTy =
B.getMRI()->getType(TargetReg);
2872 LLT InsertTy =
B.getMRI()->getType(InsertReg);
2873 auto ZextVal =
B.buildZExt(TargetTy, InsertReg);
2874 auto ShiftedInsertVal =
B.buildShl(TargetTy, ZextVal, OffsetBits);
2877 auto EltMask =
B.buildConstant(
2881 auto ShiftedMask =
B.buildShl(TargetTy, EltMask, OffsetBits);
2882 auto InvShiftedMask =
B.buildNot(TargetTy, ShiftedMask);
2885 auto MaskedOldElt =
B.buildAnd(TargetTy, TargetReg, InvShiftedMask);
2889 return B.buildOr(TargetTy, MaskedOldElt, ShiftedInsertVal).getReg(0);
2920 if (NewNumElts < OldNumElts) {
2921 if (NewEltSize % OldEltSize != 0)
2930 const unsigned Log2EltRatio =
Log2_32(NewEltSize / OldEltSize);
2950 CastTy, CastVec, InsertedElt, ScaledIdx).
getReg(0);
2954 MI.eraseFromParent();
2973 if (MemSizeInBits != MemStoreSizeInBits) {
2993 if (isa<GSExtLoad>(LoadMI)) {
2996 }
else if (isa<GZExtLoad>(LoadMI) || WideMemTy == LoadTy) {
3005 if (DstTy != LoadTy)
3031 uint64_t LargeSplitSize, SmallSplitSize;
3036 SmallSplitSize = MemSizeInBits - LargeSplitSize;
3046 SmallSplitSize = LargeSplitSize = MemSizeInBits / 2;
3071 LargeSplitSize / 8);
3075 SmallPtr, *SmallMMO);
3080 if (AnyExtTy == DstTy)
3115 if (StoreWidth != StoreSizeInBits) {
3150 uint64_t LargeSplitSize, SmallSplitSize;
3153 LargeSplitSize = llvm::bit_floor<uint64_t>(MemTy.
getSizeInBits());
3160 SmallSplitSize = LargeSplitSize = MemSizeInBits / 2;
3199 switch (
MI.getOpcode()) {
3200 case TargetOpcode::G_LOAD: {
3215 case TargetOpcode::G_STORE: {
3231 case TargetOpcode::G_SELECT: {
3237 dbgs() <<
"bitcast action not implemented for vector select\n");
3248 case TargetOpcode::G_AND:
3249 case TargetOpcode::G_OR:
3250 case TargetOpcode::G_XOR: {
3258 case TargetOpcode::G_EXTRACT_VECTOR_ELT:
3260 case TargetOpcode::G_INSERT_VECTOR_ELT:
3268void LegalizerHelper::changeOpcode(
MachineInstr &
MI,
unsigned NewOpcode) {
3276 using namespace TargetOpcode;
3278 switch(
MI.getOpcode()) {
3281 case TargetOpcode::G_FCONSTANT:
3283 case TargetOpcode::G_BITCAST:
3285 case TargetOpcode::G_SREM:
3286 case TargetOpcode::G_UREM: {
3290 {MI.getOperand(1), MI.getOperand(2)});
3294 MI.eraseFromParent();
3297 case TargetOpcode::G_SADDO:
3298 case TargetOpcode::G_SSUBO:
3300 case TargetOpcode::G_UMULH:
3301 case TargetOpcode::G_SMULH:
3303 case TargetOpcode::G_SMULO:
3304 case TargetOpcode::G_UMULO: {
3308 Register Overflow =
MI.getOperand(1).getReg();
3313 unsigned Opcode =
MI.getOpcode() == TargetOpcode::G_SMULO
3314 ? TargetOpcode::G_SMULH
3315 : TargetOpcode::G_UMULH;
3319 MI.setDesc(
TII.get(TargetOpcode::G_MUL));
3320 MI.removeOperand(1);
3331 if (Opcode == TargetOpcode::G_SMULH) {
3340 case TargetOpcode::G_FNEG: {
3350 Register SubByReg =
MI.getOperand(1).getReg();
3352 MI.eraseFromParent();
3355 case TargetOpcode::G_FSUB:
3356 case TargetOpcode::G_STRICT_FSUB: {
3369 if (
MI.getOpcode() == TargetOpcode::G_STRICT_FSUB)
3374 MI.eraseFromParent();
3377 case TargetOpcode::G_FMAD:
3379 case TargetOpcode::G_FFLOOR:
3381 case TargetOpcode::G_INTRINSIC_ROUND:
3383 case TargetOpcode::G_INTRINSIC_ROUNDEVEN: {
3386 changeOpcode(
MI, TargetOpcode::G_FRINT);
3389 case TargetOpcode::G_ATOMIC_CMPXCHG_WITH_SUCCESS: {
3390 Register OldValRes =
MI.getOperand(0).getReg();
3391 Register SuccessRes =
MI.getOperand(1).getReg();
3396 **
MI.memoperands_begin());
3398 MI.eraseFromParent();
3401 case TargetOpcode::G_LOAD:
3402 case TargetOpcode::G_SEXTLOAD:
3403 case TargetOpcode::G_ZEXTLOAD:
3405 case TargetOpcode::G_STORE:
3407 case TargetOpcode::G_CTLZ_ZERO_UNDEF:
3408 case TargetOpcode::G_CTTZ_ZERO_UNDEF:
3409 case TargetOpcode::G_CTLZ:
3410 case TargetOpcode::G_CTTZ:
3411 case TargetOpcode::G_CTPOP:
3415 Register CarryOut =
MI.getOperand(1).getReg();
3422 MI.eraseFromParent();
3427 Register CarryOut =
MI.getOperand(1).getReg();
3430 Register CarryIn =
MI.getOperand(4).getReg();
3438 MI.eraseFromParent();
3443 Register BorrowOut =
MI.getOperand(1).getReg();
3450 MI.eraseFromParent();
3455 Register BorrowOut =
MI.getOperand(1).getReg();
3458 Register BorrowIn =
MI.getOperand(4).getReg();
3470 MI.eraseFromParent();
3495 case G_MERGE_VALUES:
3497 case G_UNMERGE_VALUES:
3499 case TargetOpcode::G_SEXT_INREG: {
3500 assert(
MI.getOperand(2).isImm() &&
"Expected immediate");
3501 int64_t SizeInBits =
MI.getOperand(2).getImm();
3511 MI.eraseFromParent();
3514 case G_EXTRACT_VECTOR_ELT:
3515 case G_INSERT_VECTOR_ELT:
3517 case G_SHUFFLE_VECTOR:
3519 case G_DYN_STACKALLOC:
3529 case G_READ_REGISTER:
3530 case G_WRITE_REGISTER:
3575 case G_MEMCPY_INLINE:
3576 return lowerMemcpyInline(
MI);
3599 unsigned AddrSpace =
DL.getAllocaAddrSpace();
3612 LLT IdxTy =
B.getMRI()->getType(IdxReg);
3616 return B.buildAnd(IdxTy, IdxReg,
B.buildConstant(IdxTy, Imm)).getReg(0);
3619 return B.buildUMin(IdxTy, IdxReg,
B.buildConstant(IdxTy, NElts - 1))
3630 "Converting bits to bytes lost precision");
3647 std::initializer_list<unsigned> NonVecOpIndices) {
3648 if (
MI.getNumMemOperands() != 0)
3651 LLT VecTy =
MRI.getType(
MI.getReg(0));
3656 for (
unsigned OpIdx = 1; OpIdx <
MI.getNumOperands(); ++OpIdx) {
3664 LLT Ty =
MRI.getType(Op.getReg());
3689 int NumParts, NumLeftover;
3690 std::tie(NumParts, NumLeftover) =
3693 assert(NumParts > 0 &&
"Error in getNarrowTypeBreakDown");
3694 for (
int i = 0; i < NumParts; ++i) {
3699 assert(NumLeftover == 1 &&
"expected exactly one leftover");
3708 for (
unsigned i = 0; i <
N; ++i) {
3711 else if (Op.isImm())
3713 else if (Op.isPredicate())
3735 std::initializer_list<unsigned> NonVecOpIndices) {
3737 "Non-compatible opcode or not specified non-vector operands");
3740 unsigned NumInputs =
MI.getNumOperands() -
MI.getNumDefs();
3741 unsigned NumDefs =
MI.getNumDefs();
3749 for (
unsigned i = 0; i < NumDefs; ++i) {
3758 for (
unsigned UseIdx = NumDefs, UseNo = 0; UseIdx <
MI.getNumOperands();
3759 ++UseIdx, ++UseNo) {
3762 MI.getOperand(UseIdx));
3765 extractVectorParts(
MI.getReg(UseIdx), NumElts, SplitPieces);
3766 for (
auto Reg : SplitPieces)
3771 unsigned NumLeftovers = OrigNumElts % NumElts ? 1 : 0;
3775 for (
unsigned i = 0; i < OrigNumElts / NumElts + NumLeftovers; ++i) {
3777 for (
unsigned DstNo = 0; DstNo < NumDefs; ++DstNo)
3778 Defs.
push_back(OutputOpsPieces[DstNo][i]);
3781 for (
unsigned InputNo = 0; InputNo < NumInputs; ++InputNo)
3782 Uses.push_back(InputOpsPieces[InputNo][i]);
3785 for (
unsigned DstNo = 0; DstNo < NumDefs; ++DstNo)
3786 OutputRegs[DstNo].push_back(
I.getReg(DstNo));
3791 for (
unsigned i = 0; i < NumDefs; ++i)
3792 mergeMixedSubvectors(
MI.getReg(i), OutputRegs[i]);
3794 for (
unsigned i = 0; i < NumDefs; ++i)
3798 MI.eraseFromParent();
3807 unsigned NumInputs =
MI.getNumOperands() -
MI.getNumDefs();
3808 unsigned NumDefs =
MI.getNumDefs();
3817 for (
unsigned UseIdx = NumDefs, UseNo = 0; UseIdx <
MI.getNumOperands();
3818 UseIdx += 2, ++UseNo) {
3821 extractVectorParts(
MI.getReg(UseIdx), NumElts, InputOpsPieces[UseNo]);
3825 unsigned NumLeftovers = OrigNumElts % NumElts ? 1 : 0;
3827 for (
unsigned i = 0; i < OrigNumElts / NumElts + NumLeftovers; ++i) {
3833 for (
unsigned j = 0; j < NumInputs / 2; ++j) {
3834 Phi.addUse(InputOpsPieces[j][i]);
3835 Phi.add(
MI.getOperand(1 + j * 2 + 1));
3841 mergeMixedSubvectors(
MI.getReg(0), OutputRegs);
3846 MI.eraseFromParent();
3854 const int NumDst =
MI.getNumOperands() - 1;
3855 const Register SrcReg =
MI.getOperand(NumDst).getReg();
3859 if (TypeIdx != 1 || NarrowTy == DstTy)
3885 const int PartsPerUnmerge = NumDst / NumUnmerge;
3887 for (
int I = 0;
I != NumUnmerge; ++
I) {
3890 for (
int J = 0; J != PartsPerUnmerge; ++J)
3891 MIB.
addDef(
MI.getOperand(
I * PartsPerUnmerge + J).getReg());
3892 MIB.
addUse(Unmerge.getReg(
I));
3895 MI.eraseFromParent();
3910 if (NarrowTy == SrcTy)
3934 for (
unsigned i = 1; i <
MI.getNumOperands(); ++i) {
3936 for (
unsigned j = 0; j < Unmerge->getNumDefs(); ++j)
3942 unsigned NumNarrowTyPieces = DstTy.
getNumElements() / NumNarrowTyElts;
3943 for (
unsigned i = 0,
Offset = 0; i < NumNarrowTyPieces;
3944 ++i,
Offset += NumNarrowTyElts) {
3951 MI.eraseFromParent();
3955 assert(TypeIdx == 0 &&
"Bad type index");
3974 for (
unsigned i = 0; i < NumParts; ++i) {
3976 for (
unsigned j = 0; j < NumElts; ++j)
3977 Sources.
push_back(
MI.getOperand(1 + i * NumElts + j).getReg());
3983 MI.eraseFromParent();
3994 bool IsInsert =
MI.getOpcode() == TargetOpcode::G_INSERT_VECTOR_ELT;
3996 assert((IsInsert ? TypeIdx == 0 : TypeIdx == 1) &&
"not a vector type index");
3998 InsertVal =
MI.getOperand(2).getReg();
4013 IdxVal = MaybeCst->Value.getSExtValue();
4017 MI.eraseFromParent();
4022 LLT GCDTy = extractGCDType(VecParts, VecTy, NarrowVecTy, SrcVec);
4025 LLT LCMTy = buildLCMMergePieces(VecTy, NarrowVecTy, GCDTy, VecParts,
4026 TargetOpcode::G_ANYEXT);
4031 int64_t PartIdx = IdxVal / NewNumElts;
4040 PartTy, VecParts[PartIdx], InsertVal, NewIdx);
4041 VecParts[PartIdx] = InsertPart.getReg(0);
4045 buildWidenedRemergeToDst(DstReg, LCMTy, VecParts);
4050 MI.eraseFromParent();
4074 bool IsLoad = isa<GLoad>(LdStMI);
4086 int NumLeftover = -1;
4092 if (extractParts(ValReg, ValTy, NarrowTy, LeftoverTy, NarrowRegs,
4093 NarrowLeftoverRegs)) {
4094 NumParts = NarrowRegs.
size();
4095 NumLeftover = NarrowLeftoverRegs.
size();
4112 auto MMO = LdStMI.
getMMO();
4114 unsigned NumParts,
unsigned Offset) ->
unsigned {
4117 for (
unsigned Idx = 0,
E = NumParts;
Idx !=
E &&
Offset < TotalSize;
4119 unsigned ByteOffset =
Offset / 8;
4129 ValRegs.push_back(Dst);
4141 unsigned HandledOffset =
4142 splitTypePieces(NarrowTy, NarrowRegs, NumParts,
Offset);
4146 splitTypePieces(LeftoverTy, NarrowLeftoverRegs, NumLeftover, HandledOffset);
4149 insertParts(ValReg, ValTy, NarrowTy, NarrowRegs,
4150 LeftoverTy, NarrowLeftoverRegs);
4160 using namespace TargetOpcode;
4164 switch (
MI.getOpcode()) {
4165 case G_IMPLICIT_DEF:
4181 case G_FCANONICALIZE:
4196 case G_INTRINSIC_ROUND:
4197 case G_INTRINSIC_ROUNDEVEN:
4198 case G_INTRINSIC_TRUNC:
4217 case G_FMINNUM_IEEE:
4218 case G_FMAXNUM_IEEE:
4238 case G_CTLZ_ZERO_UNDEF:
4240 case G_CTTZ_ZERO_UNDEF:
4254 case G_ADDRSPACE_CAST:
4279 case G_UNMERGE_VALUES:
4281 case G_BUILD_VECTOR:
4282 assert(TypeIdx == 0 &&
"not a vector type index");
4284 case G_CONCAT_VECTORS:
4288 case G_EXTRACT_VECTOR_ELT:
4289 case G_INSERT_VECTOR_ELT:
4298 case G_SHUFFLE_VECTOR:
4307 assert(
MI.getOpcode() == TargetOpcode::G_SHUFFLE_VECTOR);
4312 Register Src1Reg =
MI.getOperand(1).getReg();
4313 Register Src2Reg =
MI.getOperand(2).getReg();
4319 if (DstTy != Src1Ty)
4321 if (DstTy != Src2Ty)
4334 extractParts(Src1Reg, NarrowTy, 2, SplitSrc1Regs);
4335 extractParts(Src2Reg, NarrowTy, 2, SplitSrc2Regs);
4336 Register Inputs[4] = {SplitSrc1Regs[0], SplitSrc1Regs[1], SplitSrc2Regs[0],
4352 unsigned InputUsed[2] = {-1U, -1U};
4353 unsigned FirstMaskIdx =
High * NewElts;
4354 bool UseBuildVector =
false;
4355 for (
unsigned MaskOffset = 0; MaskOffset < NewElts; ++MaskOffset) {
4357 int Idx = Mask[FirstMaskIdx + MaskOffset];
4362 if (Input >= std::size(Inputs)) {
4369 Idx -= Input * NewElts;
4373 for (OpNo = 0; OpNo < std::size(InputUsed); ++OpNo) {
4374 if (InputUsed[OpNo] == Input) {
4377 }
else if (InputUsed[OpNo] == -1U) {
4379 InputUsed[OpNo] = Input;
4384 if (OpNo >= std::size(InputUsed)) {
4387 UseBuildVector =
true;
4395 if (UseBuildVector) {
4400 for (
unsigned MaskOffset = 0; MaskOffset < NewElts; ++MaskOffset) {
4402 int Idx = Mask[FirstMaskIdx + MaskOffset];
4407 if (Input >= std::size(Inputs)) {
4414 Idx -= Input * NewElts;
4418 .buildExtractVectorElement(
4419 EltTy, Inputs[Input],
4426 }
else if (InputUsed[0] == -1U) {
4430 Register Op0 = Inputs[InputUsed[0]];
4434 : Inputs[InputUsed[1]];
4443 MI.eraseFromParent();
4450 case TargetOpcode::G_VECREDUCE_FADD:
4451 ScalarOpc = TargetOpcode::G_FADD;
4453 case TargetOpcode::G_VECREDUCE_FMUL:
4454 ScalarOpc = TargetOpcode::G_FMUL;
4456 case TargetOpcode::G_VECREDUCE_FMAX:
4457 ScalarOpc = TargetOpcode::G_FMAXNUM;
4459 case TargetOpcode::G_VECREDUCE_FMIN:
4460 ScalarOpc = TargetOpcode::G_FMINNUM;
4462 case TargetOpcode::G_VECREDUCE_ADD:
4463 ScalarOpc = TargetOpcode::G_ADD;
4465 case TargetOpcode::G_VECREDUCE_MUL:
4466 ScalarOpc = TargetOpcode::G_MUL;
4468 case TargetOpcode::G_VECREDUCE_AND:
4469 ScalarOpc = TargetOpcode::G_AND;
4471 case TargetOpcode::G_VECREDUCE_OR: