29 #define DEBUG_TYPE "legalizer" 32 using namespace LegalizeActions;
33 using namespace MIPatternMatch;
42 static std::pair<int, int>
48 unsigned NumParts =
Size / NarrowSize;
49 unsigned LeftoverSize =
Size - NumParts * NarrowSize;
52 if (LeftoverSize == 0)
57 if (LeftoverSize % EltSize != 0)
65 return std::make_pair(NumParts, NumLeftover);
92 : MIRBuilder(
Builder), Observer(Observer),
MRI(MF.getRegInfo()),
93 LI(*MF.getSubtarget().getLegalizerInfo()),
94 TLI(*MF.getSubtarget().getTargetLowering()) { }
99 : MIRBuilder(
B), Observer(Observer),
MRI(MF.getRegInfo()), LI(LI),
100 TLI(*MF.getSubtarget().getTargetLowering()) { }
108 if (
MI.getOpcode() == TargetOpcode::G_INTRINSIC ||
109 MI.getOpcode() == TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS)
112 switch (Step.Action) {
127 return bitcast(
MI, Step.TypeIdx, Step.NewType);
130 return lower(
MI, Step.TypeIdx, Step.NewType);
146 void LegalizerHelper::extractParts(
Register Reg,
LLT Ty,
int NumParts,
148 for (
int i = 0; i < NumParts; ++i)
154 LLT MainTy,
LLT &LeftoverTy,
161 unsigned NumParts = RegSize / MainSize;
162 unsigned LeftoverSize = RegSize - NumParts * MainSize;
165 if (LeftoverSize == 0) {
166 for (
unsigned I = 0;
I < NumParts; ++
I)
174 if (LeftoverSize % EltSize != 0)
182 for (
unsigned I = 0;
I != NumParts; ++
I) {
188 for (
unsigned Offset = MainSize * NumParts;
Offset < RegSize;
198 void LegalizerHelper::insertParts(
Register DstReg,
228 CurResultReg = NewResultReg;
232 for (
unsigned I = 0,
E = LeftoverRegs.
size();
I !=
E; ++
I) {
238 CurResultReg = NewResultReg;
239 Offset += LeftoverPartSize;
246 assert(
MI.getOpcode() == TargetOpcode::G_UNMERGE_VALUES);
248 const int StartIdx = Regs.
size();
249 const int NumResults =
MI.getNumOperands() - 1;
251 for (
int I = 0;
I != NumResults; ++
I)
252 Regs[StartIdx +
I] =
MI.getOperand(
I).getReg();
258 if (SrcTy == GCDTy) {
273 extractGCDType(Parts, GCDTy, SrcReg);
277 LLT LegalizerHelper::buildLCMMergePieces(
LLT DstTy,
LLT NarrowTy,
LLT GCDTy,
279 unsigned PadStrategy) {
284 int NumOrigSrc = VRegs.
size();
290 if (NumOrigSrc < NumParts * NumSubParts) {
291 if (PadStrategy == TargetOpcode::G_ZEXT)
293 else if (PadStrategy == TargetOpcode::G_ANYEXT)
296 assert(PadStrategy == TargetOpcode::G_SEXT);
317 for (
int I = 0;
I != NumParts; ++
I) {
318 bool AllMergePartsArePadding =
true;
321 for (
int J = 0; J != NumSubParts; ++J) {
322 int Idx =
I * NumSubParts + J;
323 if (Idx >= NumOrigSrc) {
324 SubMerge[J] = PadReg;
328 SubMerge[J] = VRegs[Idx];
331 AllMergePartsArePadding =
false;
337 if (AllMergePartsArePadding && !AllPadReg) {
338 if (PadStrategy == TargetOpcode::G_ANYEXT)
340 else if (PadStrategy == TargetOpcode::G_ZEXT)
350 Remerge[
I] = AllPadReg;
354 if (NumSubParts == 1)
355 Remerge[
I] = SubMerge[0];
360 if (AllMergePartsArePadding && !AllPadReg)
361 AllPadReg = Remerge[
I];
368 void LegalizerHelper::buildWidenedRemergeToDst(
Register DstReg,
LLT LCMTy,
375 if (DstTy == LCMTy) {
389 UnmergeDefs[0] = DstReg;
390 for (
unsigned I = 1;
I != NumDefs; ++
I)
402 #define RTLIBCASE_INT(LibcallPrefix) \ 406 return RTLIB::LibcallPrefix##32; \ 408 return RTLIB::LibcallPrefix##64; \ 410 return RTLIB::LibcallPrefix##128; \ 412 llvm_unreachable("unexpected size"); \ 416 #define RTLIBCASE(LibcallPrefix) \ 420 return RTLIB::LibcallPrefix##32; \ 422 return RTLIB::LibcallPrefix##64; \ 424 return RTLIB::LibcallPrefix##80; \ 426 return RTLIB::LibcallPrefix##128; \ 428 llvm_unreachable("unexpected size"); \ 433 case TargetOpcode::G_SDIV:
435 case TargetOpcode::G_UDIV:
437 case TargetOpcode::G_SREM:
439 case TargetOpcode::G_UREM:
441 case TargetOpcode::G_CTLZ_ZERO_UNDEF:
443 case TargetOpcode::G_FADD:
445 case TargetOpcode::G_FSUB:
447 case TargetOpcode::G_FMUL:
449 case TargetOpcode::G_FDIV:
451 case TargetOpcode::G_FEXP:
453 case TargetOpcode::G_FEXP2:
455 case TargetOpcode::G_FREM:
457 case TargetOpcode::G_FPOW:
459 case TargetOpcode::G_FMA:
461 case TargetOpcode::G_FSIN:
463 case TargetOpcode::G_FCOS:
465 case TargetOpcode::G_FLOG10:
467 case TargetOpcode::G_FLOG:
469 case TargetOpcode::G_FLOG2:
471 case TargetOpcode::G_FCEIL:
473 case TargetOpcode::G_FFLOOR:
475 case TargetOpcode::G_FMINNUM:
477 case TargetOpcode::G_FMAXNUM:
479 case TargetOpcode::G_FSQRT:
481 case TargetOpcode::G_FRINT:
483 case TargetOpcode::G_FNEARBYINT:
485 case TargetOpcode::G_INTRINSIC_ROUNDEVEN:
504 .removeAttribute(Attribute::NonNull)
531 Info.OrigRet = Result;
533 if (!CLI.lowerCall(MIRBuilder,
Info))
556 for (
unsigned i = 1; i <
MI.getNumOperands(); i++)
557 Args.push_back({MI.getOperand(i).getReg(), OpType});
569 for (
unsigned i = 0; i <
MI.getNumOperands() - 1; ++i) {
574 Type *OpTy =
nullptr;
585 switch (
MI.getOpcode()) {
586 case TargetOpcode::G_MEMCPY:
589 case TargetOpcode::G_MEMMOVE:
590 RTLibcall = RTLIB::MEMMOVE;
592 case TargetOpcode::G_MEMSET:
593 RTLibcall = RTLIB::MEMSET;
598 const char *
Name = TLI.getLibcallName(RTLibcall);
601 Info.CallConv = TLI.getLibcallCallingConv(RTLibcall);
604 Info.IsTailCall =
MI.getOperand(
MI.getNumOperands() - 1).getImm() &&
608 if (!CLI.lowerCall(MIRBuilder,
Info))
611 if (
Info.LoweredTailCall) {
612 assert(
Info.IsTailCall &&
"Lowered tail call when it wasn't a tail call?");
618 "Expected instr following MI to be return or debug inst?");
622 }
while (
MI.getNextNode());
634 case TargetOpcode::G_FPEXT:
636 case TargetOpcode::G_FPTRUNC:
638 case TargetOpcode::G_FPTOSI:
640 case TargetOpcode::G_FPTOUI:
642 case TargetOpcode::G_SITOFP:
644 case TargetOpcode::G_UITOFP:
664 switch (
MI.getOpcode()) {
667 case TargetOpcode::G_SDIV:
668 case TargetOpcode::G_UDIV:
669 case TargetOpcode::G_SREM:
670 case TargetOpcode::G_UREM:
671 case TargetOpcode::G_CTLZ_ZERO_UNDEF: {
678 case TargetOpcode::G_FADD:
679 case TargetOpcode::G_FSUB:
680 case TargetOpcode::G_FMUL:
681 case TargetOpcode::G_FDIV:
682 case TargetOpcode::G_FMA:
683 case TargetOpcode::G_FPOW:
684 case TargetOpcode::G_FREM:
685 case TargetOpcode::G_FCOS:
686 case TargetOpcode::G_FSIN:
687 case TargetOpcode::G_FLOG10:
688 case TargetOpcode::G_FLOG:
689 case TargetOpcode::G_FLOG2:
690 case TargetOpcode::G_FEXP:
691 case TargetOpcode::G_FEXP2:
692 case TargetOpcode::G_FCEIL:
693 case TargetOpcode::G_FFLOOR:
694 case TargetOpcode::G_FMINNUM:
695 case TargetOpcode::G_FMAXNUM:
696 case TargetOpcode::G_FSQRT:
697 case TargetOpcode::G_FRINT:
698 case TargetOpcode::G_FNEARBYINT:
699 case TargetOpcode::G_INTRINSIC_ROUNDEVEN: {
702 LLVM_DEBUG(
dbgs() <<
"No libcall available for type " << LLTy <<
".\n");
710 case TargetOpcode::G_FPEXT:
711 case TargetOpcode::G_FPTRUNC: {
714 if (!FromTy || !ToTy)
721 case TargetOpcode::G_FPTOSI:
722 case TargetOpcode::G_FPTOUI: {
726 if ((ToSize != 32 && ToSize != 64) || (FromSize != 32 && FromSize != 64))
736 case TargetOpcode::G_SITOFP:
737 case TargetOpcode::G_UITOFP: {
741 if ((FromSize != 32 && FromSize != 64) || (ToSize != 32 && ToSize != 64))
751 case TargetOpcode::G_MEMCPY:
752 case TargetOpcode::G_MEMMOVE:
753 case TargetOpcode::G_MEMSET: {
755 MI.eraseFromParent();
760 MI.eraseFromParent();
770 switch (
MI.getOpcode()) {
773 case TargetOpcode::G_IMPLICIT_DEF: {
783 if (SizeOp0 % NarrowSize != 0) {
784 LLT ImplicitTy = NarrowTy;
791 MI.eraseFromParent();
795 int NumParts = SizeOp0 / NarrowSize;
798 for (
int i = 0; i < NumParts; ++i)
805 MI.eraseFromParent();
808 case TargetOpcode::G_CONSTANT: {
810 const APInt &Val =
MI.getOperand(1).getCImm()->getValue();
813 int NumParts = TotalSize / NarrowSize;
816 for (
int I = 0;
I != NumParts; ++
I) {
817 unsigned Offset =
I * NarrowSize;
824 unsigned LeftoverBits = TotalSize - NumParts * NarrowSize;
826 if (LeftoverBits != 0) {
830 Val.
lshr(NumParts * NarrowSize).
trunc(LeftoverBits));
834 insertParts(
MI.getOperand(0).getReg(),
835 Ty, NarrowTy, PartRegs, LeftoverTy, LeftoverRegs);
837 MI.eraseFromParent();
840 case TargetOpcode::G_SEXT:
841 case TargetOpcode::G_ZEXT:
842 case TargetOpcode::G_ANYEXT:
844 case TargetOpcode::G_TRUNC: {
850 LLVM_DEBUG(
dbgs() <<
"Can't narrow trunc to type " << NarrowTy <<
"\n");
856 MI.eraseFromParent();
860 case TargetOpcode::G_FREEZE:
863 case TargetOpcode::G_ADD: {
866 if (SizeOp0 % NarrowSize != 0)
872 extractParts(
MI.getOperand(1).getReg(), NarrowTy, NumParts, Src1Regs);
873 extractParts(
MI.getOperand(2).getReg(), NarrowTy, NumParts, Src2Regs);
876 for (
int i = 0; i < NumParts; ++i) {
884 Src2Regs[i], CarryIn);
895 MI.eraseFromParent();
898 case TargetOpcode::G_SUB: {
901 if (SizeOp0 % NarrowSize != 0)
907 extractParts(
MI.getOperand(1).getReg(), NarrowTy, NumParts, Src1Regs);
908 extractParts(
MI.getOperand(2).getReg(), NarrowTy, NumParts, Src2Regs);
913 {Src1Regs[0], Src2Regs[0]});
916 for (
int i = 1; i < NumParts; ++i) {
921 {Src1Regs[i], Src2Regs[i], BorrowIn});
924 BorrowIn = BorrowOut;
927 MI.eraseFromParent();
930 case TargetOpcode::G_MUL:
931 case TargetOpcode::G_UMULH:
933 case TargetOpcode::G_EXTRACT:
935 case TargetOpcode::G_INSERT:
937 case TargetOpcode::G_LOAD: {
938 auto &MMO = **
MI.memoperands_begin();
948 MI.eraseFromParent();
954 case TargetOpcode::G_ZEXTLOAD:
955 case TargetOpcode::G_SEXTLOAD: {
956 bool ZExt =
MI.getOpcode() == TargetOpcode::G_ZEXTLOAD;
961 auto &MMO = **
MI.memoperands_begin();
962 unsigned MemSize = MMO.getSizeInBits();
964 if (MemSize == NarrowSize) {
966 }
else if (MemSize < NarrowSize) {
968 }
else if (MemSize > NarrowSize) {
978 MI.eraseFromParent();
981 case TargetOpcode::G_STORE: {
982 const auto &MMO = **
MI.memoperands_begin();
989 int NumParts = SizeOp0 / NarrowSize;
992 if (SrcTy.
isVector() && LeftoverBits != 0)
997 auto &MMO = **
MI.memoperands_begin();
1000 MI.eraseFromParent();
1006 case TargetOpcode::G_SELECT:
1008 case TargetOpcode::G_AND:
1009 case TargetOpcode::G_OR:
1010 case TargetOpcode::G_XOR: {
1022 case TargetOpcode::G_SHL:
1023 case TargetOpcode::G_LSHR:
1024 case TargetOpcode::G_ASHR:
1026 case TargetOpcode::G_CTLZ:
1027 case TargetOpcode::G_CTLZ_ZERO_UNDEF:
1028 case TargetOpcode::G_CTTZ:
1029 case TargetOpcode::G_CTTZ_ZERO_UNDEF:
1030 case TargetOpcode::G_CTPOP:
1032 switch (
MI.getOpcode()) {
1033 case TargetOpcode::G_CTLZ:
1034 case TargetOpcode::G_CTLZ_ZERO_UNDEF:
1036 case TargetOpcode::G_CTTZ:
1037 case TargetOpcode::G_CTTZ_ZERO_UNDEF:
1039 case TargetOpcode::G_CTPOP:
1049 case TargetOpcode::G_INTTOPTR:
1057 case TargetOpcode::G_PTRTOINT:
1065 case TargetOpcode::G_PHI: {
1066 unsigned NumParts = SizeOp0 / NarrowSize;
1070 for (
unsigned i = 1; i <
MI.getNumOperands(); i += 2) {
1073 extractParts(
MI.getOperand(i).getReg(), NarrowTy, NumParts,
1078 for (
unsigned i = 0; i < NumParts; ++i) {
1082 for (
unsigned j = 1; j <
MI.getNumOperands(); j += 2)
1083 MIB.
addUse(SrcRegs[j / 2][i]).
add(
MI.getOperand(j + 1));
1088 MI.eraseFromParent();
1091 case TargetOpcode::G_EXTRACT_VECTOR_ELT:
1092 case TargetOpcode::G_INSERT_VECTOR_ELT: {
1096 int OpIdx =
MI.getOpcode() == TargetOpcode::G_EXTRACT_VECTOR_ELT ? 2 : 3;
1102 case TargetOpcode::G_ICMP: {
1104 if (NarrowSize * 2 != SrcSize)
1117 static_cast<CmpInst::Predicate>(
MI.getOperand(1).getPredicate());
1135 MI.eraseFromParent();
1138 case TargetOpcode::G_SEXT_INREG: {
1142 int64_t SizeInBits =
MI.getOperand(2).getImm();
1152 MO1.
setReg(TruncMIB.getReg(0));
1167 if (SizeOp0 % NarrowSize != 0)
1169 int NumParts = SizeOp0 / NarrowSize;
1177 for (
int i = 0; i < NumParts; ++i) {
1193 for (
int i = 0; i < NumParts; ++i) {
1197 assert(PartialExtensionReg &&
1198 "Expected to visit partial extension before full");
1199 if (FullExtensionReg) {
1206 FullExtensionReg = DstRegs.
back();
1211 TargetOpcode::G_SEXT_INREG, {NarrowTy},
1214 PartialExtensionReg = DstRegs.
back();
1221 MI.eraseFromParent();
1224 case TargetOpcode::G_BSWAP:
1225 case TargetOpcode::G_BITREVERSE: {
1226 if (SizeOp0 % NarrowSize != 0)
1231 unsigned NumParts = SizeOp0 / NarrowSize;
1232 extractParts(
MI.getOperand(1).getReg(), NarrowTy, NumParts, SrcRegs);
1234 for (
unsigned i = 0; i < NumParts; ++i) {
1236 {SrcRegs[NumParts - 1 - i]});
1243 MI.eraseFromParent();
1246 case TargetOpcode::G_PTR_ADD:
1247 case TargetOpcode::G_PTRMASK: {
1255 case TargetOpcode::G_FPTOUI: {
1263 case TargetOpcode::G_FPTOSI: {
1271 case TargetOpcode::G_FPEXT:
1298 if (EltTy.isPointer())
1304 unsigned OpIdx,
unsigned ExtOpcode) {
1307 MO.
setReg(ExtB.getReg(0));
1314 MO.
setReg(ExtB.getReg(0));
1318 unsigned OpIdx,
unsigned TruncOpcode) {
1327 unsigned OpIdx,
unsigned ExtOpcode) {
1350 unsigned NumParts = NewElts / OldElts;
1353 if (NumParts * OldElts == NewElts) {
1358 for (
unsigned I = 1;
I != NumParts; ++
I)
1386 LegalizerHelper::widenScalarMergeValues(
MachineInstr &
MI,
unsigned TypeIdx,
1401 const int NumMerge = (DstSize + WideSize - 1) / WideSize;
1403 unsigned NumOps =
MI.getNumOperands();
1404 unsigned NumSrc =
MI.getNumOperands() - 1;
1407 if (WideSize >= DstSize) {
1411 for (
unsigned I = 2;
I != NumOps; ++
I) {
1412 const unsigned Offset = (
I - 1) * PartSize;
1419 Register NextResult =
I + 1 == NumOps && WideTy == DstTy ? DstReg :
1425 ResultReg = NextResult;
1428 if (WideSize > DstSize)
1433 MI.eraseFromParent();
1467 for (
int I = 1,
E =
MI.getNumOperands();
I !=
E; ++
I) {
1469 if (GCD == SrcSize) {
1473 for (
int J = 0, JE = Unmerge->getNumOperands() - 1; J != JE; ++J)
1479 if (static_cast<int>(Unmerges.
size()) != NumMerge * WideSize) {
1481 for (
int I = Unmerges.
size();
I != NumMerge * WideSize; ++
I)
1485 const int PartsPerGCD = WideSize / GCD;
1489 for (
int I = 0;
I != NumMerge; ++
I, Slicer = Slicer.drop_front(PartsPerGCD)) {
1503 MI.eraseFromParent();
1524 if (NumMergeParts > 1) {
1527 MergeParts[0] = WideReg;
1533 UnmergeResults[0] = OrigReg;
1534 for (
int I = 1;
I != NumUnmergeParts; ++
I)
1542 LegalizerHelper::widenScalarUnmergeValues(
MachineInstr &
MI,
unsigned TypeIdx,
1547 int NumDst =
MI.getNumOperands() - 1;
1548 Register SrcReg =
MI.getOperand(NumDst).getReg();
1553 Register Dst0Reg =
MI.getOperand(0).getReg();
1563 dbgs() <<
"Not casting non-integral address space integer\n");
1584 for (
int I = 1;
I != NumDst; ++
I) {
1590 MI.eraseFromParent();
1601 LLVM_DEBUG(
dbgs() <<
"Widening pointer source types not implemented\n");
1626 const int NumUnmerge = Unmerge->getNumOperands() - 1;
1631 if (PartsPerRemerge == 1) {
1634 for (
int I = 0;
I != NumUnmerge; ++
I) {
1637 for (
int J = 0; J != PartsPerUnmerge; ++J) {
1638 int Idx =
I * PartsPerUnmerge + J;
1640 MIB.addDef(
MI.getOperand(Idx).getReg());
1647 MIB.addUse(Unmerge.getReg(
I));
1651 for (
int J = 0; J != NumUnmerge; ++J)
1652 extractGCDType(Parts, GCDTy, Unmerge.getReg(J));
1655 for (
int I = 0;
I != NumDst; ++
I) {
1656 for (
int J = 0; J < PartsPerRemerge; ++J) {
1657 const int Idx =
I * PartsPerRemerge + J;
1662 RemergeParts.
clear();
1666 MI.eraseFromParent();
1671 LegalizerHelper::widenScalarExtract(
MachineInstr &
MI,
unsigned TypeIdx,
1678 unsigned Offset =
MI.getOperand(2).getImm();
1704 MI.eraseFromParent();
1709 LLT ShiftTy = SrcTy;
1718 MI.eraseFromParent();
1749 LegalizerHelper::widenScalarInsert(
MachineInstr &
MI,
unsigned TypeIdx,
1751 if (TypeIdx != 0 || WideTy.
isVector())
1761 LegalizerHelper::widenScalarAddSubShlSat(
MachineInstr &
MI,
unsigned TypeIdx,
1763 bool IsSigned =
MI.getOpcode() == TargetOpcode::G_SADDSAT ||
1764 MI.getOpcode() == TargetOpcode::G_SSUBSAT ||
1765 MI.getOpcode() == TargetOpcode::G_SSHLSAT;
1766 bool IsShift =
MI.getOpcode() == TargetOpcode::G_SSHLSAT ||
1767 MI.getOpcode() == TargetOpcode::G_USHLSAT;
1792 {ShiftL, ShiftR},
MI.getFlags());
1800 MI.eraseFromParent();
1806 switch (
MI.getOpcode()) {
1809 case TargetOpcode::G_EXTRACT:
1810 return widenScalarExtract(
MI, TypeIdx, WideTy);
1811 case TargetOpcode::G_INSERT:
1812 return widenScalarInsert(
MI, TypeIdx, WideTy);
1813 case TargetOpcode::G_MERGE_VALUES:
1814 return widenScalarMergeValues(
MI, TypeIdx, WideTy);
1815 case TargetOpcode::G_UNMERGE_VALUES:
1816 return widenScalarUnmergeValues(
MI, TypeIdx, WideTy);
1817 case TargetOpcode::G_UADDO:
1818 case TargetOpcode::G_USUBO: {
1823 unsigned Opcode =
MI.getOpcode() == TargetOpcode::G_UADDO
1824 ? TargetOpcode::G_ADD
1825 : TargetOpcode::G_SUB;
1837 MI.eraseFromParent();
1840 case TargetOpcode::G_SADDSAT:
1841 case TargetOpcode::G_SSUBSAT:
1842 case TargetOpcode::G_SSHLSAT:
1843 case TargetOpcode::G_UADDSAT:
1844 case TargetOpcode::G_USUBSAT:
1845 case TargetOpcode::G_USHLSAT:
1846 return widenScalarAddSubShlSat(
MI, TypeIdx, WideTy);
1847 case TargetOpcode::G_CTTZ:
1848 case TargetOpcode::G_CTTZ_ZERO_UNDEF:
1849 case TargetOpcode::G_CTLZ:
1850 case TargetOpcode::G_CTLZ_ZERO_UNDEF:
1851 case TargetOpcode::G_CTPOP: {
1864 if (
MI.getOpcode() == TargetOpcode::G_CTTZ) {
1877 if (
MI.getOpcode() == TargetOpcode::G_CTLZ ||
1878 MI.getOpcode() == TargetOpcode::G_CTLZ_ZERO_UNDEF) {
1886 MI.eraseFromParent();
1889 case TargetOpcode::G_BSWAP: {
1898 MI.getOperand(0).setReg(DstExt);
1911 case TargetOpcode::G_BITREVERSE: {
1920 MI.getOperand(0).setReg(DstExt);
1929 case TargetOpcode::G_FREEZE:
1936 case TargetOpcode::G_ADD:
1937 case TargetOpcode::G_AND:
1938 case TargetOpcode::G_MUL:
1939 case TargetOpcode::G_OR:
1940 case TargetOpcode::G_XOR:
1941 case TargetOpcode::G_SUB:
1952 case TargetOpcode::G_SHL:
1968 case TargetOpcode::G_SDIV:
1969 case TargetOpcode::G_SREM:
1970 case TargetOpcode::G_SMIN:
1971 case TargetOpcode::G_SMAX:
1979 case TargetOpcode::G_ASHR:
1980 case TargetOpcode::G_LSHR:
1984 unsigned CvtOp =
MI.getOpcode() == TargetOpcode::G_ASHR ?
1985 TargetOpcode::G_SEXT : TargetOpcode::G_ZEXT;
1998 case TargetOpcode::G_UDIV:
1999 case TargetOpcode::G_UREM:
2000 case TargetOpcode::G_UMIN:
2001 case TargetOpcode::G_UMAX:
2009 case TargetOpcode::G_SELECT:
2026 case TargetOpcode::G_FPTOSI:
2027 case TargetOpcode::G_FPTOUI:
2037 case TargetOpcode::G_SITOFP:
2047 case TargetOpcode::G_UITOFP:
2057 case TargetOpcode::G_LOAD:
2058 case TargetOpcode::G_SEXTLOAD:
2059 case TargetOpcode::G_ZEXTLOAD:
2065 case TargetOpcode::G_STORE: {
2076 TargetOpcode::G_ZEXT : TargetOpcode::G_ANYEXT;
2082 case TargetOpcode::G_CONSTANT: {
2086 MRI.
getType(
MI.getOperand(0).getReg()));
2087 assert((ExtOpc == TargetOpcode::G_ZEXT || ExtOpc == TargetOpcode::G_SEXT ||
2088 ExtOpc == TargetOpcode::G_ANYEXT) &&
2091 const APInt &Val = (ExtOpc == TargetOpcode::G_SEXT)
2101 case TargetOpcode::G_FCONSTANT: {
2119 assert(!LosesInfo &&
"extend should always be lossless");
2128 case TargetOpcode::G_IMPLICIT_DEF: {
2134 case TargetOpcode::G_BRCOND:
2140 case TargetOpcode::G_FCMP:
2151 case TargetOpcode::G_ICMP:
2157 MI.getOperand(1).getPredicate()))
2158 ? TargetOpcode::G_SEXT
2159 : TargetOpcode::G_ZEXT;
2166 case TargetOpcode::G_PTR_ADD:
2167 assert(TypeIdx == 1 &&
"unable to legalize pointer of G_PTR_ADD");
2173 case TargetOpcode::G_PHI: {
2174 assert(TypeIdx == 0 &&
"Expecting only Idx 0");
2177 for (
unsigned I = 1;
I <
MI.getNumOperands();
I += 2) {
2189 case TargetOpcode::G_EXTRACT_VECTOR_ELT: {
2197 1, TargetOpcode::G_SEXT);
2212 case TargetOpcode::G_INSERT_VECTOR_ELT: {
2237 case TargetOpcode::G_FADD:
2238 case TargetOpcode::G_FMUL:
2239 case TargetOpcode::G_FSUB:
2240 case TargetOpcode::G_FMA:
2241 case TargetOpcode::G_FMAD:
2242 case TargetOpcode::G_FNEG:
2243 case TargetOpcode::G_FABS:
2244 case TargetOpcode::G_FCANONICALIZE:
2245 case TargetOpcode::G_FMINNUM:
2246 case TargetOpcode::G_FMAXNUM:
2247 case TargetOpcode::G_FMINNUM_IEEE:
2248 case TargetOpcode::G_FMAXNUM_IEEE:
2249 case TargetOpcode::G_FMINIMUM:
2250 case TargetOpcode::G_FMAXIMUM:
2251 case TargetOpcode::G_FDIV:
2252 case TargetOpcode::G_FREM:
2253 case TargetOpcode::G_FCEIL:
2254 case TargetOpcode::G_FFLOOR:
2255 case TargetOpcode::G_FCOS:
2256 case TargetOpcode::G_FSIN:
2257 case TargetOpcode::G_FLOG10:
2258 case TargetOpcode::G_FLOG:
2259 case TargetOpcode::G_FLOG2:
2260 case TargetOpcode::G_FRINT:
2261 case TargetOpcode::G_FNEARBYINT:
2262 case TargetOpcode::G_FSQRT:
2263 case TargetOpcode::G_FEXP:
2264 case TargetOpcode::G_FEXP2:
2265 case TargetOpcode::G_FPOW:
2266 case TargetOpcode::G_INTRINSIC_TRUNC:
2267 case TargetOpcode::G_INTRINSIC_ROUND:
2268 case TargetOpcode::G_INTRINSIC_ROUNDEVEN:
2272 for (
unsigned I = 1,
E =
MI.getNumOperands();
I !=
E; ++
I)
2278 case TargetOpcode::G_FPOWI: {
2287 case TargetOpcode::G_INTTOPTR:
2295 case TargetOpcode::G_PTRTOINT:
2303 case TargetOpcode::G_BUILD_VECTOR: {
2307 for (
int I = 1,
E =
MI.getNumOperands();
I !=
E; ++
I)
2321 case TargetOpcode::G_SEXT_INREG:
2330 case TargetOpcode::G_PTRMASK: {
2343 auto Unmerge =
B.buildUnmerge(Ty, Src);
2344 for (
int I = 0,
E = Unmerge->getNumOperands() - 1;
I !=
E; ++
I)
2364 LLT DstCastTy = DstEltTy;
2365 LLT SrcPartTy = SrcEltTy;
2369 if (NumSrcElt < NumDstElt) {
2378 DstCastTy =
LLT::vector(NumDstElt / NumSrcElt, DstEltTy);
2379 SrcPartTy = SrcEltTy;
2380 }
else if (NumSrcElt > NumDstElt) {
2390 SrcPartTy =
LLT::vector(NumSrcElt / NumDstElt, SrcEltTy);
2391 DstCastTy = DstEltTy;
2401 MI.eraseFromParent();
2409 MI.eraseFromParent();
2425 unsigned NewEltSize,
2426 unsigned OldEltSize) {
2427 const unsigned Log2EltRatio =
Log2_32(NewEltSize / OldEltSize);
2428 LLT IdxTy =
B.getMRI()->getType(Idx);
2431 auto OffsetMask =
B.buildConstant(
2433 auto OffsetIdx =
B.buildAnd(IdxTy, Idx, OffsetMask);
2434 return B.buildShl(IdxTy, OffsetIdx,
2465 if (NewNumElts > OldNumElts) {
2476 if (NewNumElts % OldNumElts != 0)
2480 const unsigned NewEltsPerOldElt = NewNumElts / OldNumElts;
2488 for (
unsigned I = 0;
I < NewEltsPerOldElt; ++
I) {
2492 NewOps[
I] = Elt.getReg(0);
2497 MI.eraseFromParent();
2501 if (NewNumElts < OldNumElts) {
2502 if (NewEltSize % OldEltSize != 0)
2524 const unsigned Log2EltRatio =
Log2_32(NewEltSize / OldEltSize);
2543 MI.eraseFromParent();
2557 LLT TargetTy =
B.getMRI()->getType(TargetReg);
2558 LLT InsertTy =
B.getMRI()->getType(InsertReg);
2559 auto ZextVal =
B.buildZExt(TargetTy, InsertReg);
2560 auto ShiftedInsertVal =
B.buildShl(TargetTy, ZextVal, OffsetBits);
2563 auto EltMask =
B.buildConstant(
2567 auto ShiftedMask =
B.buildShl(TargetTy, EltMask, OffsetBits);
2568 auto InvShiftedMask =
B.buildNot(TargetTy, ShiftedMask);
2571 auto MaskedOldElt =
B.buildAnd(TargetTy, TargetReg, InvShiftedMask);
2575 return B.buildOr(TargetTy, MaskedOldElt, ShiftedInsertVal).getReg(0);
2606 if (NewNumElts < OldNumElts) {
2607 if (NewEltSize % OldEltSize != 0)
2616 const unsigned Log2EltRatio =
Log2_32(NewEltSize / OldEltSize);
2636 CastTy, CastVec, InsertedElt, ScaledIdx).
getReg(0);
2640 MI.eraseFromParent();
2653 auto &MMO = **
MI.memoperands_begin();
2656 if (
MI.getOpcode() == TargetOpcode::G_LOAD) {
2676 uint64_t SmallSplitSize = DstTy.
getSizeInBits() - LargeSplitSize;
2682 &MMO, LargeSplitSize / 8, SmallSplitSize / 8);
2690 TargetOpcode::G_ZEXTLOAD, LargeLdReg, PtrReg, *LargeMMO);
2704 MI.eraseFromParent();
2709 MI.eraseFromParent();
2717 switch (
MI.getOpcode()) {
2720 case TargetOpcode::G_LOAD:
2723 case TargetOpcode::G_SEXTLOAD:
2726 case TargetOpcode::G_ZEXTLOAD:
2731 MI.eraseFromParent();
2763 uint64_t SmallSplitSize = SrcTy.
getSizeInBits() - LargeSplitSize;
2782 MI.eraseFromParent();
2788 switch (
MI.getOpcode()) {
2789 case TargetOpcode::G_LOAD: {
2798 case TargetOpcode::G_STORE: {
2807 case TargetOpcode::G_SELECT: {
2813 dbgs() <<
"bitcast action not implemented for vector select\n");
2824 case TargetOpcode::G_AND:
2825 case TargetOpcode::G_OR:
2826 case TargetOpcode::G_XOR: {
2834 case TargetOpcode::G_EXTRACT_VECTOR_ELT:
2836 case TargetOpcode::G_INSERT_VECTOR_ELT:
2844 void LegalizerHelper::changeOpcode(
MachineInstr &
MI,
unsigned NewOpcode) {
2852 using namespace TargetOpcode;
2854 switch(
MI.getOpcode()) {
2857 case TargetOpcode::G_BITCAST:
2859 case TargetOpcode::G_SREM:
2860 case TargetOpcode::G_UREM: {
2861 LLT Ty =
MRI.getType(
MI.getOperand(0).getReg());
2864 {
MI.getOperand(1),
MI.getOperand(2)});
2868 MI.eraseFromParent();
2871 case TargetOpcode::G_SADDO:
2872 case TargetOpcode::G_SSUBO:
2874 case TargetOpcode::G_UMULH:
2875 case TargetOpcode::G_SMULH:
2877 case TargetOpcode::G_SMULO:
2878 case TargetOpcode::G_UMULO: {
2882 Register Overflow =
MI.getOperand(1).getReg();
2885 LLT Ty =
MRI.getType(Res);
2887 unsigned Opcode =
MI.getOpcode() == TargetOpcode::G_SMULO
2888 ? TargetOpcode::G_SMULH
2889 : TargetOpcode::G_UMULH;
2893 MI.setDesc(
TII.get(TargetOpcode::G_MUL));
2894 MI.RemoveOperand(1);
2905 if (Opcode == TargetOpcode::G_SMULH) {
2914 case TargetOpcode::G_FNEG: {
2916 LLT Ty =
MRI.getType(Res);
2924 Register SubByReg =
MI.getOperand(1).getReg();
2926 MI.eraseFromParent();
2929 case TargetOpcode::G_FSUB: {
2931 LLT Ty =
MRI.getType(Res);
2936 if (LI.getAction({G_FNEG, {Ty}}).Action ==
Lower)
2940 Register Neg =
MRI.createGenericVirtualRegister(Ty);
2943 MI.eraseFromParent();
2946 case TargetOpcode::G_FMAD:
2948 case TargetOpcode::G_FFLOOR:
2950 case TargetOpcode::G_INTRINSIC_ROUND:
2952 case TargetOpcode::G_INTRINSIC_ROUNDEVEN: {
2955 changeOpcode(
MI, TargetOpcode::G_FRINT);
2958 case TargetOpcode::G_ATOMIC_CMPXCHG_WITH_SUCCESS: {
2959 Register OldValRes =
MI.getOperand(0).getReg();
2960 Register SuccessRes =
MI.getOperand(1).getReg();
2965 **
MI.memoperands_begin());
2967 MI.eraseFromParent();
2970 case TargetOpcode::G_LOAD:
2971 case TargetOpcode::G_SEXTLOAD:
2972 case TargetOpcode::G_ZEXTLOAD:
2974 case TargetOpcode::G_STORE:
2976 case TargetOpcode::G_CTLZ_ZERO_UNDEF:
2977 case TargetOpcode::G_CTTZ_ZERO_UNDEF:
2978 case TargetOpcode::G_CTLZ:
2979 case TargetOpcode::G_CTTZ:
2980 case TargetOpcode::G_CTPOP:
2984 Register CarryOut =
MI.getOperand(1).getReg();
2991 MI.eraseFromParent();
2996 Register CarryOut =
MI.getOperand(1).getReg();
2999 Register CarryIn =
MI.getOperand(4).getReg();
3000 LLT Ty =
MRI.getType(Res);
3007 MI.eraseFromParent();
3012 Register BorrowOut =
MI.getOperand(1).getReg();
3019 MI.eraseFromParent();
3024 Register BorrowOut =
MI.getOperand(1).getReg();
3027 Register BorrowIn =
MI.getOperand(4).getReg();
3028 const LLT CondTy =
MRI.getType(BorrowOut);
3029 const LLT Ty =
MRI.getType(Res);
3039 MI.eraseFromParent();
3064 case G_MERGE_VALUES:
3066 case G_UNMERGE_VALUES:
3068 case TargetOpcode::G_SEXT_INREG: {
3069 assert(
MI.getOperand(2).isImm() &&
"Expected immediate");
3070 int64_t SizeInBits =
MI.getOperand(2).getImm();
3074 LLT DstTy =
MRI.getType(DstReg);
3075 Register TmpRes =
MRI.createGenericVirtualRegister(DstTy);
3080 MI.eraseFromParent();
3083 case G_EXTRACT_VECTOR_ELT:
3084 case G_INSERT_VECTOR_ELT:
3086 case G_SHUFFLE_VECTOR:
3088 case G_DYN_STACKALLOC:
3098 case G_READ_REGISTER:
3099 case G_WRITE_REGISTER:
3106 LLT Ty =
MRI.getType(
MI.getOperand(0).getReg());
3107 if (LI.isLegalOrCustom({G_UMIN, Ty}))
3113 LLT Ty =
MRI.getType(
MI.getOperand(0).getReg());
3118 if (LI.isLegalOrCustom({G_SMIN, Ty}) && LI.isLegalOrCustom({G_SMAX, Ty}))
3130 LLT DstTy =
MRI.getType(
MI.getOperand(0).getReg());
3138 MI.eraseFromParent();
3163 unsigned AddrSpace =
DL.getAllocaAddrSpace();
3176 LLT IdxTy =
B.getMRI()->getType(IdxReg);
3180 return B.buildAnd(IdxTy, IdxReg,
B.buildConstant(IdxTy, Imm)).
getReg(0);
3183 return B.buildUMin(IdxTy, IdxReg,
B.buildConstant(IdxTy, NElts - 1))
3194 "Converting bits to bytes lost precision");
3217 buildWidenedRemergeToDst(DstReg, LCMTy, Parts);
3218 MI.eraseFromParent();
3240 const LLT NarrowTy0 = NarrowTyArg;
3241 const unsigned NewNumElts =
3244 const Register DstReg =
MI.getOperand(0).getReg();
3260 for (
unsigned I = 1,
E =
MI.getNumOperands();
I !=
E; ++
I) {
3268 if (!extractParts(SrcReg, SrcTyI, NarrowTyI, LeftoverTyI, PartRegs,
3275 for (
Register PartReg : PartRegs) {
3283 for (
Register LeftoverReg : LeftoverRegs) {
3296 unsigned InstCount = 0;
3297 for (
unsigned J = 0, JE = PartRegs.
size(); J != JE; ++J)
3298 NewInsts[InstCount++].addUse(PartRegs[J]);
3299 for (
unsigned J = 0, JE = LeftoverRegs.
size(); J != JE; ++J)
3300 NewInsts[InstCount++].addUse(LeftoverRegs[J]);
3304 LeftoverRegs.
clear();
3308 for (
auto &MIB : NewInsts)
3311 insertParts(DstReg, DstTy, NarrowTy0, DstRegs, LeftoverTy0, LeftoverDstRegs);
3313 MI.eraseFromParent();
3328 LLT NarrowTy0 = NarrowTy;
3345 extractParts(SrcReg, NarrowTy1, NumParts, SrcRegs);
3347 for (
unsigned I = 0;
I < NumParts; ++
I) {
3361 MI.eraseFromParent();
3369 Register Src0Reg =
MI.getOperand(2).getReg();
3374 LLT NarrowTy0, NarrowTy1;
3380 NarrowTy0 = NarrowTy;
3390 NumParts = NarrowTy.
isVector() ? (OldElts / NewElts) :
3394 NarrowTy1 = NarrowTy;
3404 = static_cast<CmpInst::Predicate>(
MI.getOperand(1).getPredicate());
3407 extractParts(
MI.getOperand(2).getReg(), NarrowTy1, NumParts, Src1Regs);
3408 extractParts(
MI.getOperand(3).getReg(), NarrowTy1, NumParts, Src2Regs);
3410 for (
unsigned I = 0;
I < NumParts; ++
I) {
3412 DstRegs.push_back(DstReg);
3414 if (
MI.getOpcode() == TargetOpcode::G_ICMP)
3428 MI.eraseFromParent();
3436 Register CondReg =
MI.getOperand(1).getReg();
3438 unsigned NumParts = 0;
3439 LLT NarrowTy0, NarrowTy1;
3448 NarrowTy0 = NarrowTy;
3454 if (
Size % NarrowSize != 0)
3457 NumParts =
Size / NarrowSize;
3477 NarrowTy1 = NarrowTy;
3483 extractParts(
MI.getOperand(1).getReg(), NarrowTy1, NumParts, Src0Regs);
3485 extractParts(
MI.getOperand(2).getReg(), NarrowTy0, NumParts, Src1Regs);
3486 extractParts(
MI.getOperand(3).getReg(), NarrowTy0, NumParts, Src2Regs);
3488 for (
unsigned i = 0; i < NumParts; ++i) {
3491 Src1Regs[i], Src2Regs[i]);
3500 MI.eraseFromParent();
3507 const Register DstReg =
MI.getOperand(0).getReg();
3514 int NumParts, NumLeftover;
3515 std::tie(NumParts, NumLeftover)
3523 const int TotalNumParts = NumParts + NumLeftover;
3526 for (
int I = 0;
I != TotalNumParts; ++
I) {
3527 LLT Ty =
I < NumParts ? NarrowTy : LeftoverTy;
3539 insertParts(DstReg, PhiTy, NarrowTy, DstRegs, LeftoverTy, LeftoverDstRegs);
3544 for (
unsigned I = 1,
E =
MI.getNumOperands();
I !=
E;
I += 2) {
3546 LeftoverRegs.
clear();
3553 if (!extractParts(SrcReg, PhiTy, NarrowTy, Unused, PartRegs,
3560 for (
int J = 0; J != TotalNumParts; ++J) {
3562 MIB.
addUse(J < NumParts ? PartRegs[J] : LeftoverRegs[J - NumParts]);
3567 MI.eraseFromParent();
3578 const int NumDst =
MI.getNumOperands() - 1;
3579 const Register SrcReg =
MI.getOperand(NumDst).getReg();
3585 if (DstTy == NarrowTy)
3589 if (DstTy == GCDTy) {
3597 const int PartsPerUnmerge = NumDst / NumUnmerge;
3599 for (
int I = 0;
I != NumUnmerge; ++
I) {
3602 for (
int J = 0; J != PartsPerUnmerge; ++J)
3603 MIB.
addDef(
MI.getOperand(
I * PartsPerUnmerge + J).getReg());
3604 MIB.
addUse(Unmerge.getReg(
I));
3607 MI.eraseFromParent();
3636 for (
unsigned I = 1,
E =
MI.getNumOperands();
I !=
E; ++
I)
3637 extractGCDType(Parts, GCDTy,
MI.getOperand(
I).getReg());
3640 LLT LCMTy = buildLCMMergePieces(DstTy, NarrowTy, GCDTy, Parts,
3641 TargetOpcode::G_ANYEXT);
3644 buildWidenedRemergeToDst(DstReg, LCMTy, Parts);
3646 MI.eraseFromParent();
3657 bool IsInsert =
MI.getOpcode() == TargetOpcode::G_INSERT_VECTOR_ELT;
3659 assert((IsInsert ? TypeIdx == 0 : TypeIdx == 1) &&
"not a vector type index");
3661 InsertVal =
MI.getOperand(2).getReg();
3678 MI.eraseFromParent();
3683 LLT GCDTy = extractGCDType(VecParts, VecTy, NarrowVecTy, SrcVec);
3686 LLT LCMTy = buildLCMMergePieces(VecTy, NarrowVecTy, GCDTy, VecParts,
3687 TargetOpcode::G_ANYEXT);
3692 int64_t PartIdx = IdxVal / NewNumElts;
3701 PartTy, VecParts[PartIdx], InsertVal, NewIdx);
3702 VecParts[PartIdx] = InsertPart.getReg(0);
3706 buildWidenedRemergeToDst(DstReg, LCMTy, VecParts);
3711 MI.eraseFromParent();
3738 bool IsLoad =
MI.getOpcode() == TargetOpcode::G_LOAD;
3740 Register AddrReg =
MI.getOperand(1).getReg();
3750 int NumLeftover = -1;
3756 if (extractParts(ValReg, ValTy, NarrowTy, LeftoverTy, NarrowRegs,
3757 NarrowLeftoverRegs)) {
3758 NumParts = NarrowRegs.
size();
3759 NumLeftover = NarrowLeftoverRegs.
size();
3776 unsigned Offset) ->
unsigned {
3779 for (
unsigned Idx = 0,
E = NumParts; Idx !=
E &&
Offset < TotalSize;
3780 Offset += PartSize, ++Idx) {
3781 unsigned ByteSize = PartSize / 8;
3782 unsigned ByteOffset =
Offset / 8;
3792 ValRegs.push_back(Dst);
3802 unsigned HandledOffset = splitTypePieces(NarrowTy, NarrowRegs, 0);
3806 splitTypePieces(LeftoverTy, NarrowLeftoverRegs, HandledOffset);
3809 insertParts(ValReg, ValTy, NarrowTy, NarrowRegs,
3810 LeftoverTy, NarrowLeftoverRegs);
3813 MI.eraseFromParent();
3820 assert(TypeIdx == 0 &&
"only one type index expected");
3822 const unsigned Opc =
MI.getOpcode();
3823 const int NumOps =
MI.getNumOperands() - 1;
3824 const Register DstReg =
MI.getOperand(0).getReg();
3825 const unsigned Flags =
MI.getFlags();
3829 assert(NumOps <= 3 &&
"expected instruction with 1 result and 1-3 sources");
3843 for (
int I = 0;
I != NumOps; ++
I) {
3851 OpNarrowTy = NarrowScalarTy;
3864 LLT GCDTy = extractGCDType(ExtractedRegs[
I], SrcTy, OpNarrowTy, SrcReg);
3867 buildLCMMergePieces(SrcTy, OpNarrowTy, GCDTy, ExtractedRegs[
I],
3868 TargetOpcode::G_ANYEXT);
3876 int NumParts = ExtractedRegs[0].
size();
3881 LLT DstLCMTy, NarrowDstTy;
3883 DstLCMTy =
getLCMType(DstScalarTy, NarrowScalarTy);
3884 NarrowDstTy = NarrowScalarTy;
3887 NarrowDstTy = NarrowTy;
3892 const int NumRealParts = (DstSize + NarrowSize - 1) / NarrowSize;
3894 for (
int I = 0;
I != NumRealParts; ++
I) {
3896 for (
int J = 0; J != NumOps; ++J)
3897 InputRegs[J] = ExtractedRegs[J][
I];
3905 int NumUndefParts = NumParts - NumRealParts;
3906 if (NumUndefParts != 0)
3907 ResultRegs.
append(NumUndefParts,
3916 MergeDstReg = DstReg;
3918 buildWidenedRemergeToDst(MergeDstReg, DstLCMTy, ResultRegs);
3924 MI.eraseFromParent();
3933 int64_t Imm =
MI.getOperand(2).getImm();
3938 LLT GCDTy = extractGCDType(Parts, DstTy, NarrowTy, SrcReg);
3939 LLT LCMTy = buildLCMMergePieces(DstTy, NarrowTy, GCDTy, Parts);
3944 buildWidenedRemergeToDst(DstReg, LCMTy, Parts);
3946 MI.eraseFromParent();
3953 using namespace TargetOpcode;
3955 switch (
MI.getOpcode()) {
3956 case G_IMPLICIT_DEF:
3973 case G_FCANONICALIZE:
3988 case G_INTRINSIC_ROUND:
3989 case G_INTRINSIC_ROUNDEVEN:
3990 case G_INTRINSIC_TRUNC:
4006 case G_FMINNUM_IEEE:
4007 case G_FMAXNUM_IEEE:
4024 case G_CTLZ_ZERO_UNDEF:
4026 case G_CTTZ_ZERO_UNDEF:
4041 case G_ADDRSPACE_CAST:
4050 case G_UNMERGE_VALUES:
4052 case G_BUILD_VECTOR:
4053 assert(TypeIdx == 0 &&
"not a vector type index");
4055 case G_CONCAT_VECTORS:
4059 case G_EXTRACT_VECTOR_ELT:
4060 case G_INSERT_VECTOR_ELT:
4074 const LLT HalfTy,
const LLT AmtTy) {
4082 MI.eraseFromParent();
4088 unsigned VTBits = 2 * NVTBits;
4091 if (
MI.getOpcode() == TargetOpcode::G_SHL) {
4092 if (Amt.
ugt(VTBits)) {
4094 }
else if (Amt.
ugt(NVTBits)) {
4098 }
else if (Amt == NVTBits) {
4109 }
else if (
MI.getOpcode() == TargetOpcode::G_LSHR) {
4110 if (Amt.
ugt(VTBits)) {
4112 }
else if (Amt.
ugt(NVTBits)) {
4116 }
else if (Amt == NVTBits) {
4130 if (Amt.
ugt(VTBits)) {
4133 }
else if (Amt.
ugt(NVTBits)) {
4138 }
else if (Amt == NVTBits) {
4155 MI.eraseFromParent();
4179 if (DstEltSize % 2 != 0)
4185 const unsigned NewBitSize = DstEltSize / 2;
4192 MI, KShiftAmt->getOperand(1).getCImm()->getValue(), HalfTy, ShiftAmtTy);
4212 switch (
MI.getOpcode()) {
4213 case TargetOpcode::G_SHL: {
4229 ResultRegs[0] =
Lo.getReg(0);
4230 ResultRegs[1] =
Hi.getReg(0);
4233 case TargetOpcode::G_LSHR:
4234 case TargetOpcode::G_ASHR: {
4244 if (
MI.getOpcode() == TargetOpcode::G_LSHR) {
4258 ResultRegs[0] =
Lo.getReg(0);
4259 ResultRegs[1] =
Hi.getReg(0);
4267 MI.eraseFromParent();
4274 assert(TypeIdx == 0 &&
"Expecting only Idx 0");
4277 for (
unsigned I = 1,
E =
MI.getNumOperands();
I !=
E;
I += 2) {
4293 unsigned Opc =
MI.getOpcode();
4295 case TargetOpcode::G_IMPLICIT_DEF:
4296 case TargetOpcode::G_LOAD: {
4304 case TargetOpcode::G_STORE:
4311 case TargetOpcode::G_AND:
4312 case TargetOpcode::G_OR:
4313 case TargetOpcode::G_XOR:
4314 case TargetOpcode::G_SMIN:
4315 case TargetOpcode::G_SMAX:
4316 case TargetOpcode::G_UMIN:
4317 case TargetOpcode::G_UMAX:
4318 case TargetOpcode::G_FMINNUM:
4319 case TargetOpcode::G_FMAXNUM:
4320 case TargetOpcode::G_FMINNUM_IEEE:
4321 case TargetOpcode::G_FMAXNUM_IEEE:
4322 case TargetOpcode::G_FMINIMUM:
4323 case TargetOpcode::G_FMAXIMUM: {
4331 case TargetOpcode::G_EXTRACT:
4338 case TargetOpcode::G_INSERT:
4339 case TargetOpcode::G_FREEZE:
4347 case TargetOpcode::G_SELECT:
4359 case TargetOpcode::G_UNMERGE_VALUES: {
4364 int NumDst =
MI.getNumOperands() - 1;
4368 for (
int I = 0;
I != NumDst; ++
I)
4372 for (
int I = NumDst;
I != NewNumDst; ++
I)
4375 MIB.addUse(
MI.getOperand(NumDst).getReg());
4376 MI.eraseFromParent();
4379 case TargetOpcode::G_PHI:
4391 unsigned SrcParts = Src1Regs.
size();
4392 unsigned DstParts = DstRegs.
size();
4394 unsigned DstIdx = 0;
4396 B.buildMul(NarrowTy, Src1Regs[DstIdx], Src2Regs[DstIdx]).getReg(0);
4397 DstRegs[DstIdx] = FactorSum;
4399 unsigned CarrySumPrevDstIdx;
4402 for (DstIdx = 1; DstIdx < DstParts; DstIdx++) {
4404 for (
unsigned i = DstIdx + 1 < SrcParts ? 0 : DstIdx - SrcParts + 1;
4405 i <=
std::min(DstIdx, SrcParts - 1); ++i) {
4407 B.buildMul(NarrowTy, Src1Regs[DstIdx - i], Src2Regs[i]);
4411 for (
unsigned i = DstIdx < SrcParts ? 0 : DstIdx - SrcParts;
4412 i <=
std::min(DstIdx - 1, SrcParts - 1); ++i) {
4414 B.buildUMulH(NarrowTy, Src1Regs[DstIdx - 1 - i], Src2Regs[i]);
4424 if (DstIdx != DstParts - 1) {
4426 B.buildUAddo(NarrowTy,
LLT::scalar(1), Factors[0], Factors[1]);
4427 FactorSum = Uaddo.
getReg(0);
4428 CarrySum =
B.buildZExt(NarrowTy, Uaddo.
getReg(1)).
getReg(0);
4429 for (
unsigned i = 2; i < Factors.
size(); ++i) {
4431 B.buildUAddo(NarrowTy,
LLT::scalar(1), FactorSum, Factors[i]);
4432 FactorSum = Uaddo.
getReg(0);
4434 CarrySum =
B.buildAdd(NarrowTy, CarrySum, Carry).getReg(0);
4438 FactorSum =
B.buildAdd(NarrowTy, Factors[0], Factors[1]).getReg(0);
4439 for (
unsigned i = 2; i < Factors.
size(); ++i)
4440 FactorSum =
B.buildAdd(NarrowTy, FactorSum, Factors[i]).getReg(0);
4443 CarrySumPrevDstIdx = CarrySum;
4444 DstRegs[DstIdx] = FactorSum;
4462 if (DstSize % NarrowSize != 0 || SrcSize % NarrowSize != 0)
4465 unsigned NumDstParts = DstSize / NarrowSize;
4466 unsigned NumSrcParts = SrcSize / NarrowSize;
4467 bool IsMulHigh =
MI.getOpcode() == TargetOpcode::G_UMULH;
4468 unsigned DstTmpParts = NumDstParts * (IsMulHigh ? 2 : 1);
4472 extractParts(Src1, NarrowTy, NumSrcParts, Src1Parts);
4473 extractParts(Src2, NarrowTy, NumSrcParts, Src2Parts);
4474 multiplyRegisters(DstTmpRegs, Src1Parts, Src2Parts, NarrowTy);
4478 IsMulHigh ? &DstTmpRegs[DstTmpParts / 2] : &DstTmpRegs[0], NumDstParts);
4480 MI.eraseFromParent();
4495 if (SizeOp1 % NarrowSize != 0)
4497 int NumParts = SizeOp1 / NarrowSize;
4501 extractParts(
MI.getOperand(1).getReg(), NarrowTy, NumParts, SrcRegs);
4504 uint64_t OpStart =
MI.getOperand(2).getImm();
4506 for (
int i = 0; i < NumParts; ++i) {
4507 unsigned SrcStart = i * NarrowSize;
4509 if (SrcStart + NarrowSize <= OpStart || SrcStart >= OpStart + OpSize) {
4512 }
else if (SrcStart == OpStart && NarrowTy == MRI.
getType(OpReg)) {
4520 int64_t ExtractOffset;
4522 if (OpStart < SrcStart) {
4524 SegSize =
std::min(NarrowSize, OpStart + OpSize - SrcStart);
4526 ExtractOffset = OpStart - SrcStart;
4527 SegSize =
std::min(SrcStart + NarrowSize - OpStart, OpSize);
4531 if (ExtractOffset != 0 || SegSize != NarrowSize) {
4543 else if (DstRegs.
size() > 1)
4547 MI.eraseFromParent();
4563 if (SizeOp0 % NarrowSize != 0)
4566 int NumParts = SizeOp0 / NarrowSize;
4570 extractParts(
MI.getOperand(1).getReg(), NarrowTy, NumParts, SrcRegs);
4573 uint64_t OpStart =
MI.getOperand(3).getImm();
4575 for (
int i = 0; i < NumParts; ++i) {
4576 unsigned DstStart = i * NarrowSize;
4578 if (DstStart + NarrowSize <= OpStart || DstStart >= OpStart + OpSize) {
4582 }
else if (DstStart == OpStart && NarrowTy == MRI.
getType(OpReg)) {
4591 int64_t ExtractOffset, InsertOffset;
4593 if (OpStart < DstStart) {
4595 ExtractOffset = DstStart - OpStart;
4596 SegSize =
std::min(NarrowSize, OpStart + OpSize - DstStart);
4598 InsertOffset = OpStart - DstStart;
4601 std::min(NarrowSize - InsertOffset, OpStart + OpSize - DstStart);
4605 if (ExtractOffset != 0 || SegSize != OpSize) {
4622 MI.eraseFromParent();
4632 assert(
MI.getNumOperands() == 3 && TypeIdx == 0);
4638 if (!extractParts(
MI.getOperand(1).getReg(), DstTy, NarrowTy, LeftoverTy,
4639 Src0Regs, Src0LeftoverRegs))
4643 if (!extractParts(
MI.getOperand(2).getReg(), DstTy, NarrowTy, Unused,
4644 Src1Regs, Src1LeftoverRegs))
4647 for (
unsigned I = 0,
E = Src1Regs.size();
I !=
E; ++
I) {
4649 {Src0Regs[
I], Src1Regs[
I]});
4650 DstRegs.push_back(Inst.getReg(0));
4653 for (
unsigned I = 0,
E = Src1LeftoverRegs.
size();
I !=
E; ++
I) {
4656 {LeftoverTy}, {Src0LeftoverRegs[
I], Src1LeftoverRegs[
I]});
4657 DstLeftoverRegs.push_back(Inst.getReg(0));
4660 insertParts(DstReg, DstTy, NarrowTy, DstRegs,
4661 LeftoverTy, DstLeftoverRegs);
4663 MI.eraseFromParent();
4681 LLT GCDTy = extractGCDType(Parts, DstTy, NarrowTy, SrcReg);
4682 LLT LCMTy = buildLCMMergePieces(DstTy, NarrowTy, GCDTy, Parts,
MI.getOpcode());
4683 buildWidenedRemergeToDst(DstReg, LCMTy, Parts);
4685 MI.eraseFromParent();
4695 Register CondReg =
MI.getOperand(1).getReg();
4707 if (!extractParts(
MI.getOperand(2).getReg(), DstTy, NarrowTy, LeftoverTy,
4708 Src1Regs, Src1LeftoverRegs))
4712 if (!extractParts(
MI.getOperand(3).getReg(), DstTy, NarrowTy, Unused,
4713 Src2Regs, Src2LeftoverRegs))
4716 for (
unsigned I = 0,
E = Src1Regs.size();
I !=
E; ++
I) {
4718 CondReg, Src1Regs[
I], Src2Regs[
I]);
4722 for (
unsigned I = 0,
E = Src1LeftoverRegs.
size();
I !=
E; ++
I) {
4724 LeftoverTy, CondReg, Src1LeftoverRegs[
I], Src2LeftoverRegs[
I]);
4728 insertParts(DstReg, DstTy, NarrowTy, DstRegs,
4729 LeftoverTy, DstLeftoverRegs);
4731 MI.eraseFromParent();
4748 const bool IsUndef =
MI.getOpcode() == TargetOpcode::G_CTLZ_ZERO_UNDEF;
4751 auto UnmergeSrc =
B.buildUnmerge(NarrowTy, SrcReg);
4753 auto C_0 =
B.buildConstant(NarrowTy, 0);
4755 UnmergeSrc.getReg(1), C_0);
4756 auto LoCTLZ = IsUndef ?
4757 B.buildCTLZ_ZERO_UNDEF(DstTy, UnmergeSrc.getReg(0)) :
4758 B.buildCTLZ(DstTy, UnmergeSrc.getReg(0));
4759 auto C_NarrowSize =
B.buildConstant(DstTy, NarrowSize);
4760 auto HiIsZeroCTLZ =
B.buildAdd(DstTy, LoCTLZ, C_NarrowSize);
4761 auto HiCTLZ =
B.buildCTLZ_ZERO_UNDEF(DstTy, UnmergeSrc.getReg(1));
4762 B.buildSelect(DstReg, HiIsZero, HiIsZeroCTLZ, HiCTLZ);
4764 MI.eraseFromParent();